diff options
author | Thomas Schwinge <thomas@schwinge.name> | 2010-12-13 17:11:51 +0100 |
---|---|---|
committer | Thomas Schwinge <thomas@schwinge.name> | 2010-12-13 17:11:51 +0100 |
commit | 2d75167da62e3486836e5f1773e5f1ab06e43fe8 (patch) | |
tree | e44fc83e0b1419836d1b21652ad1d38b8d0af2c4 /purify_html | |
parent | 217998d56f5b6424a685f8c87f2c0e924d1c89da (diff) | |
parent | 5c5c16e265d8ef56b71f319885f32bf144bdea23 (diff) |
Merge branch 'master' into external_pager_mechanism
Conflicts:
microkernel/mach/external_pager_mechanism.mdwn
Diffstat (limited to 'purify_html')
-rwxr-xr-x | purify_html | 39 |
1 files changed, 39 insertions, 0 deletions
diff --git a/purify_html b/purify_html new file mode 100755 index 00000000..9c3a7862 --- /dev/null +++ b/purify_html @@ -0,0 +1,39 @@ +#!/bin/sh + +# Mangle the rendered files to cause fewer differences after re-rendering. + +# Written by Thomas Schwinge <thomas@schwinge.name>. + +# Un-mangle mailto links: convert HTML character entities to real characters. +find ./ -name \*.html -print0 \ + | xargs -0 --no-run-if-empty -n 1 \ + perl -e \ + 'BEGIN { + $file = $ARGV[0]; + $discard = 1; + $replacing = 0; + + # TODO: could use a proper temporary file. + open(OUT, ">$file.new") or die "open: $file: $!"; + select(OUT) or die "select: $file: $!"; + } + + while (<>) { + # The replacing-toggling logic is a bit rough, but so is life. + $replacing = 1 if /<a href="mailto:/; + s%\&#(x?)([^;]*);%$discard = 0; chr(length($1) ? hex($2) : $2);%eg if $replacing; + $replacing = 0 if /<\/a>/; + } continue { + print or die "print: $file: $!"; + } + + END { + if ($discard) { + unlink("$file.new") or die "unlink: $file: $!"; + } else { + rename("$file.new", $file) or die "rename: $file: $!"; + } + }' + +# Compared to using ``perl -p -i -l'', this solution maintains the files' +# original timestamps unless they're actually modified. |