#!/bin/bash # NAME # reformat-html - reformat HTML files from emailselfdefense.fsf.org # SYNOPSIS # reformat-html /PATH/TO/NAME.html # GRAPHIC INTERFACE HOWTO # * Launch the script by double-clicking on it; a terminal will open. # * At the prompt, drag and drop the input file into the terminal. # # Alternatively (in Gnome, KDE, XFCE, etc.) # * create a launcher for the application menu; # * launch the script from the contextual menu of the HTML file. # # The reformatted file is created in the directory where the input file # resides, and its name is NAME-r.html. #============================================================================== set -e # Test whether the script is called from color-wdiff p=$(pidof -x color-wdiff) || true test "$p" == "$PPID" && called_from_color_wdiff=1 function close_or_exit () { # turns off interactivity and lets the terminal close normally if the script # is called from color-wdiff. if test "$called_from_color_wdiff" == "1"; then exit $1 else if test "$1" == "1"; then echo -e 1>&2 "\n!!! $input doesn't exist or is not an HTML." fi echo -e '\n*** Close the terminal window or press Return.'; read OK test -z "$OK" && exit $1 fi } # Get a valid HTML as input. input=$1 if test ! -f "$input" -o ! -s "$input"; then echo -e "\n*** reformat-html - Please enter the HTML file." read input input=${input%\'}; input=${input#\'} test -f "$input" -a "${input%.html}" != "$input" || close_or_exit 1 fi # Define the output file. if test "$called_from_color_wdiff" == "1"; then output=$2 else output=${input%.html}-r.html fi tmp=$(mktemp -t ref.XXXXXX) || close_or_exit 1 trap "rm -f $tmp" EXIT cp $input $tmp # Remove javascript, which shouldn't be reformatted, leading and trailing # spaces/tabs, multiple spaces, LF after and
  • . sed -i -e '/jquery-1.11.0.min.js/,$d' \ -e 's,\t, ,g' \ -e 's,^ *,,' \ -e 's, *, ,g' \ -e 's, *$,,' $tmp sed -i -e '/<\/a>$/ {N; s,<\/a>\n<,<\/a> <,}' $tmp sed -i -e '/^
  • \n ]*>$/ {N; s,\\n, ,}" $tmp done for tag in a strong; do sed -i "/<\\/$tag>$/ {N; s,\\n, ,}" $tmp done # This command may need to be repeated. Adjust the number of repeats. This # could be done by looping back to a sed marker, but a while loop seems # quicker. i=0 while (( i < 2 )); do sed -i '/[^<>]$/ {N; s,\([^<>]\)\n,\1 ,}' $tmp let i=i+1 done sed -i -e '/ \/>$/ {N; s,\( \/>\)\n,\1 ,}' \ -e '/ ]*>$/ {N; s,\(]*>\)\n\([^<]\),\1 \2,}' $tmp # Make sure there is only one paragraph per string. This command may need to # be repeated. Adjust the number of repeats. i=0 while (( i < 2 )); do sed -i 's,

    \(.\+\)$,

    \n\1,' $tmp let i=i+1 done # Single out the tags which include p (will also work for pre). sed -i 's,\(.\) <$tag,>\n<$tag," $tmp done # Remove leading and trailing spaces, double spaces and blank lines. # Fuse comment with

    ; separate truncated "~~~" comment from fused tag. sed -i -e 's,^ *,,' \ -e 's, *$,,' \ -e 's, , ,g' \ -e '/^$/d' \ -e '/<\/p>$/ {N;s,\n\(