From 7a9ee056e49855a97a7ca29752f93f8d3611ef15 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Th=C3=A9r=C3=A8se=20Godefroy?= Date: Fri, 15 Aug 2014 01:18:29 +0200 Subject: [PATCH] fr/kitchen/page-reformat: rename to reformat-html and modify for use with color-wdiff. --- fr/kitchen/page-reformat | 104 --------------------------- fr/kitchen/reformat-html | 147 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 147 insertions(+), 104 deletions(-) delete mode 100755 fr/kitchen/page-reformat create mode 100755 fr/kitchen/reformat-html diff --git a/fr/kitchen/page-reformat b/fr/kitchen/page-reformat deleted file mode 100755 index cb2caf9..0000000 --- a/fr/kitchen/page-reformat +++ /dev/null @@ -1,104 +0,0 @@ -#!/bin/bash - -# page-reformat: reformat the original pages of emailselfdefense.fsf.org - -# cd to the directory where the script is located, then: -# $ ./page-reformat [path to the HTML file] -# You can also drag-and-drop the file into the terminal. -# The result has extension .html.html to avoid writing over the original page. - -# If you want to wrap the text, uncomment line 97 and comment out line 94. - - -cp $1 tmp - -# Remove javascript, which shouldn't be reformated. -sed -i '/jquery-1.11.0.min.js/,$d' tmp - -# Remove leading and trailing spaces/tabs. -sed -i 's,\t, ,g' tmp -sed -i 's,^ *,,' tmp -sed -i 's, *$,,' tmp - -# Remove LF after . -sed -i '/<\/a>$/ {N; s,<\/a>\n\([^<]\),<\/a>\1,}' tmp - -# One string per paragraph, header or list item. -for tag in li p strong a h3; do - sed -i "/<$tag[^>]*>$/ {N; s,\\n, ,}" tmp -done -for tag in a strong; do - sed -i "/<\\/$tag>$/ {N; s,\\n, ,}" tmp -done -# This command may need to be repeated. Adjust the number of repeats. This -# could be done by looping back to a sed marker, but a while loop seems -# quicker. -i=0 -while (( i < 2 )); do - sed -i '/[^<>]$/ {N; s,\([^<>]\)\n,\1 ,}' tmp - let i=i+1 -done - -sed -i '/ \/>$/ {N; s,\( \/>\)\n,\1 ,}' tmp -sed -i '/ ]*>$/ {N; s,\(]*>\)\n\([^<]\),\1 \2,}' tmp - -# Make sure there is only one paragraph per string. This command may need to -# be repeated. Adjust the number of repeats. -i=0 -while (( i < 2 )); do - sed -i 's,

\(.\+\)$,

\n\1,' tmp - let i=i+1 -done - -# Single out the tags which include p (will also work for pre). -sed -i 's,\(.\) <$tag,>\n<$tag," tmp -done -# Remove leading and trailing spaces, double spaces and blank lines. -sed -i 's,^ *,,' tmp -sed -i 's, *$,,' tmp -sed -i 's, , ,g' tmp -sed -i '/^$/d' tmp - -# Fuse comment with

. -sed -i '/<\/p>$/ {N;s,\n\(