#!/bin/bash # page-reformat: reformat the original pages of emailselfdefense.fsf.org # cd to the directory where the script is located, then: # $ ./page-reformat [path to the HTML file] # You can also drag-and-drop the file into the terminal. # The result has extension .html.html to avoid writing over the original page. # If you want to wrap the text, uncomment line 97 and comment out line 94. cp $1 tmp # Remove javascript, which shouldn't be reformated. sed -i '/jquery-1.11.0.min.js/,$d' tmp # Remove leading and trailing spaces/tabs. sed -i 's,\t, ,g' tmp sed -i 's,^ *,,' tmp sed -i 's, *$,,' tmp # Remove LF after . sed -i '/<\/a>$/ {N; s,<\/a>\n\([^<]\),<\/a>\1,}' tmp # One string per paragraph, header or list item. for tag in li p strong a h3; do sed -i "/<$tag[^>]*>$/ {N; s,\\n, ,}" tmp done for tag in a strong; do sed -i "/<\\/$tag>$/ {N; s,\\n, ,}" tmp done # This command may need to be repeated. Adjust the number of repeats. This # could be done by looping back to a sed marker, but a while loop seems # quicker. i=0 while (( i < 2 )); do sed -i '/[^<>]$/ {N; s,\([^<>]\)\n,\1 ,}' tmp let i=i+1 done sed -i '/ \/>$/ {N; s,\( \/>\)\n,\1 ,}' tmp sed -i '/ ]*>$/ {N; s,\(]*>\)\n\([^<]\),\1 \2,}' tmp # Make sure there is only one paragraph per string. This command may need to # be repeated. Adjust the number of repeats. i=0 while (( i < 2 )); do sed -i 's,

\(.\+\)$,

\n\1,' tmp let i=i+1 done # Single out the tags which include p (will also work for pre). sed -i 's,\(.\) <$tag,>\n<$tag," tmp done # Remove leading and trailing spaces, double spaces and blank lines. sed -i 's,^ *,,' tmp sed -i 's, *$,,' tmp sed -i 's, , ,g' tmp sed -i '/^$/d' tmp # Fuse comment with

. sed -i '/<\/p>$/ {N;s,\n\(