diff options
-rwxr-xr-x | purify_html | 42 |
1 files changed, 33 insertions, 9 deletions
diff --git a/purify_html b/purify_html index 4cf582af..9c3a7862 100755 --- a/purify_html +++ b/purify_html @@ -1,15 +1,39 @@ #!/bin/sh -# Mangle the rendered files to cause fewer differernces upon re-rendering. +# Mangle the rendered files to cause fewer differences after re-rendering. -# Written by Thomas Schwinge <tschwinge@gnu.org>. +# Written by Thomas Schwinge <thomas@schwinge.name>. # Un-mangle mailto links: convert HTML character entities to real characters. find ./ -name \*.html -print0 \ - | xargs -0 \ - perl -p -i -l -e \ - 'BEGIN { $replacing = 0; } - # The replacing-toggling logic is a bit rough, but so is life. - $replacing = 1 if /<a href="mailto:/; - s%\&#(x?)([^;]*);%chr(length($1) ? hex($2) : $2)%eg if $replacing; - $replacing = 0 if /<\/a>/;' + | xargs -0 --no-run-if-empty -n 1 \ + perl -e \ + 'BEGIN { + $file = $ARGV[0]; + $discard = 1; + $replacing = 0; + + # TODO: could use a proper temporary file. + open(OUT, ">$file.new") or die "open: $file: $!"; + select(OUT) or die "select: $file: $!"; + } + + while (<>) { + # The replacing-toggling logic is a bit rough, but so is life. + $replacing = 1 if /<a href="mailto:/; + s%\&#(x?)([^;]*);%$discard = 0; chr(length($1) ? hex($2) : $2);%eg if $replacing; + $replacing = 0 if /<\/a>/; + } continue { + print or die "print: $file: $!"; + } + + END { + if ($discard) { + unlink("$file.new") or die "unlink: $file: $!"; + } else { + rename("$file.new", $file) or die "rename: $file: $!"; + } + }' + +# Compared to using ``perl -p -i -l'', this solution maintains the files' +# original timestamps unless they're actually modified. |