From ddf5b37e218dd2b8e923a95aeab444f34283f88f Mon Sep 17 00:00:00 2001
From: =?utf8?q?Th=C3=A9r=C3=A8se=20Godefroy?=
Date: Wed, 23 Jul 2014 18:31:28 +0200
Subject: [PATCH] fr: script to reformat the HTML pages (remove indentation,
single out paragraphs, etc.
---
fr/kitchen/page-reformat | 104 +++++++++++++++++++++++++++++++++++++++
1 file changed, 104 insertions(+)
create mode 100755 fr/kitchen/page-reformat
diff --git a/fr/kitchen/page-reformat b/fr/kitchen/page-reformat
new file mode 100755
index 0000000..cb2caf9
--- /dev/null
+++ b/fr/kitchen/page-reformat
@@ -0,0 +1,104 @@
+#!/bin/bash
+
+# page-reformat: reformat the original pages of emailselfdefense.fsf.org
+
+# cd to the directory where the script is located, then:
+# $ ./page-reformat [path to the HTML file]
+# You can also drag-and-drop the file into the terminal.
+# The result has extension .html.html to avoid writing over the original page.
+
+#Â If you want to wrap the text, uncomment line 97 and comment out line 94.
+
+
+cp $1 tmp
+
+# Remove javascript, which shouldn't be reformated.
+sed -i '/jquery-1.11.0.min.js/,$d' tmp
+
+# Remove leading and trailing spaces/tabs.
+sed -i 's,\t, ,g' tmp
+sed -i 's,^ *,,' tmp
+sed -i 's, *$,,' tmp
+
+# Remove LF after .
+sed -i '/<\/a>$/ {N; s,<\/a>\n\([^<]\),<\/a>\1,}' tmp
+
+# One string per paragraph, header or list item.
+for tag in li p strong a h3; do
+ sed -i "/<$tag[^>]*>$/ {N; s,\\n, ,}" tmp
+done
+for tag in a strong; do
+ sed -i "/<\\/$tag>$/ {N; s,\\n, ,}" tmp
+done
+# This command may need to be repeated. Adjust the number of repeats. This
+# could be done by looping back to a sed marker, but a while loop seems
+# quicker.
+i=0
+while (( i < 2 )); do
+ sed -i '/[^<>]$/ {N; s,\([^<>]\)\n,\1 ,}' tmp
+ let i=i+1
+done
+
+sed -i '/ \/>$/ {N; s,\( \/>\)\n,\1 ,}' tmp
+sed -i '/ ]*>$/ {N; s,\(]*>\)\n\([^<]\),\1 \2,}' tmp
+
+# Make sure there is only one paragraph per string. This command may need to
+# be repeated. Adjust the number of repeats.
+i=0
+while (( i < 2 )); do
+ sed -i 's,
\(.\+\)$,\n\1,' tmp
+ let i=i+1
+done
+
+# Single out the tags which include p (will also work for pre).
+sed -i 's,\(.\) <$tag,>\n<$tag," tmp
+done
+# Remove leading and trailing spaces, double spaces and blank lines.
+sed -i 's,^ *,,' tmp
+sed -i 's, *$,,' tmp
+sed -i 's, , ,g' tmp
+sed -i '/^$/d' tmp
+
+# Fuse comment with
.
+sed -i '/<\/p>$/ {N;s,\n\(