summaryrefslogtreecommitdiff
path: root/purify_html
diff options
context:
space:
mode:
authorThomas Schwinge <thomas@schwinge.name>2010-12-13 17:11:51 +0100
committerThomas Schwinge <thomas@schwinge.name>2010-12-13 17:11:51 +0100
commit2d75167da62e3486836e5f1773e5f1ab06e43fe8 (patch)
treee44fc83e0b1419836d1b21652ad1d38b8d0af2c4 /purify_html
parent217998d56f5b6424a685f8c87f2c0e924d1c89da (diff)
parent5c5c16e265d8ef56b71f319885f32bf144bdea23 (diff)
Merge branch 'master' into external_pager_mechanism
Conflicts: microkernel/mach/external_pager_mechanism.mdwn
Diffstat (limited to 'purify_html')
-rwxr-xr-xpurify_html39
1 files changed, 39 insertions, 0 deletions
diff --git a/purify_html b/purify_html
new file mode 100755
index 00000000..9c3a7862
--- /dev/null
+++ b/purify_html
@@ -0,0 +1,39 @@
+#!/bin/sh
+
+# Mangle the rendered files to cause fewer differences after re-rendering.
+
+# Written by Thomas Schwinge <thomas@schwinge.name>.
+
+# Un-mangle mailto links: convert HTML character entities to real characters.
+find ./ -name \*.html -print0 \
+ | xargs -0 --no-run-if-empty -n 1 \
+ perl -e \
+ 'BEGIN {
+ $file = $ARGV[0];
+ $discard = 1;
+ $replacing = 0;
+
+ # TODO: could use a proper temporary file.
+ open(OUT, ">$file.new") or die "open: $file: $!";
+ select(OUT) or die "select: $file: $!";
+ }
+
+ while (<>) {
+ # The replacing-toggling logic is a bit rough, but so is life.
+ $replacing = 1 if /<a href="mailto:/;
+ s%\&#(x?)([^;]*);%$discard = 0; chr(length($1) ? hex($2) : $2);%eg if $replacing;
+ $replacing = 0 if /<\/a>/;
+ } continue {
+ print or die "print: $file: $!";
+ }
+
+ END {
+ if ($discard) {
+ unlink("$file.new") or die "unlink: $file: $!";
+ } else {
+ rename("$file.new", $file) or die "rename: $file: $!";
+ }
+ }'
+
+# Compared to using ``perl -p -i -l'', this solution maintains the files'
+# original timestamps unless they're actually modified.