summaryrefslogtreecommitdiff
path: root/purify_html
diff options
context:
space:
mode:
Diffstat (limited to 'purify_html')
-rwxr-xr-xpurify_html42
1 files changed, 33 insertions, 9 deletions
diff --git a/purify_html b/purify_html
index 4cf582af..9c3a7862 100755
--- a/purify_html
+++ b/purify_html
@@ -1,15 +1,39 @@
#!/bin/sh
-# Mangle the rendered files to cause fewer differernces upon re-rendering.
+# Mangle the rendered files to cause fewer differences after re-rendering.
-# Written by Thomas Schwinge <tschwinge@gnu.org>.
+# Written by Thomas Schwinge <thomas@schwinge.name>.
# Un-mangle mailto links: convert HTML character entities to real characters.
find ./ -name \*.html -print0 \
- | xargs -0 \
- perl -p -i -l -e \
- 'BEGIN { $replacing = 0; }
- # The replacing-toggling logic is a bit rough, but so is life.
- $replacing = 1 if /<a href="mailto:/;
- s%\&#(x?)([^;]*);%chr(length($1) ? hex($2) : $2)%eg if $replacing;
- $replacing = 0 if /<\/a>/;'
+ | xargs -0 --no-run-if-empty -n 1 \
+ perl -e \
+ 'BEGIN {
+ $file = $ARGV[0];
+ $discard = 1;
+ $replacing = 0;
+
+ # TODO: could use a proper temporary file.
+ open(OUT, ">$file.new") or die "open: $file: $!";
+ select(OUT) or die "select: $file: $!";
+ }
+
+ while (<>) {
+ # The replacing-toggling logic is a bit rough, but so is life.
+ $replacing = 1 if /<a href="mailto:/;
+ s%\&#(x?)([^;]*);%$discard = 0; chr(length($1) ? hex($2) : $2);%eg if $replacing;
+ $replacing = 0 if /<\/a>/;
+ } continue {
+ print or die "print: $file: $!";
+ }
+
+ END {
+ if ($discard) {
+ unlink("$file.new") or die "unlink: $file: $!";
+ } else {
+ rename("$file.new", $file) or die "rename: $file: $!";
+ }
+ }'
+
+# Compared to using ``perl -p -i -l'', this solution maintains the files'
+# original timestamps unless they're actually modified.