4 # reformat-html - reformat HTML files from emailselfdefense.fsf.org
7 # reformat-html /PATH/TO/NAME.html
9 # GRAPHIC INTERFACE HOWTO
10 # * Launch the script by double-clicking on it; a terminal will open.
11 # * At the prompt, drag and drop the input file into the terminal.
13 # Alternatively (in Gnome, KDE, XFCE, etc.)
14 # * create a launcher for the application menu;
15 # * launch the script from the contextual menu of the HTML file.
17 # The reformatted file is created in the directory where the input file
18 # resides, and its name is NAME-r.html.
20 #==============================================================================
24 # Test whether the script is called from color-wdiff
25 p
=$
(pidof
-x color-wdiff
) || true
26 test "$p" == "$PPID" && called_from_color_wdiff
=1
28 function sleep_or_exit
() {
29 # turns off interactivity and lets the terminal close normally if the script
30 # is called from color-wdiff.
32 if test "$called_from_color_wdiff" == "1"; then
35 if test "$1" == "1"; then
36 echo -e 1>&2 "\n!!! $input doesn't exist or is not HTML."
43 # Get a valid HTML as input.
45 if test ! -f "$input" -o ! -s "$input"; then
46 echo -e "\n*** reformat-html - Please enter the HTML file."
48 input
=${input%\'}; input
=${input#\'}
50 test -f "$input" -a "${input%.html}" != "$input" || sleep_or_exit
1
52 # Define the output file.
53 if test "$called_from_color_wdiff" == "1"; then
56 output
=${input%.html}-r.html
59 tmp
=$
(mktemp
-t ref.XXXXXX
) ||
exit 1
60 tmp1
=$
(mktemp
-t ref.XXXXXX
) ||
exit 1
61 tmp2
=$
(mktemp
-t ref.XXXXXX
) ||
exit 1
62 trap 'rm -f "$tmp" "$tmp1" "$tmp2"' EXIT
64 # Don't touch the scripts.
65 sed -n '/<script/,$p' $input > $tmp1
66 sed '/<script/,$d' $input > $tmp
68 # Clean up extra spaces and tabs; remove blank lines.
69 sed -i -e 's,[[:space:]]\+, ,g' \
75 # For the language list: fix the commented-out items (broken by po4a).
76 sed -i -e '/<\/a>$/ {N; s,<\/a>\n<,<\/a><,}' $tmp
77 sed -i -e '/^<li/ {N; s,>\n<a ,><a ,}' $tmp
79 # </p> at the end of the line (much better to do it by hand).
80 sed -i -e '/[>.]$/ {N;s,\n</strong,</strong,}' $tmp
81 sed -i -e '/[>.]$/ {N;s,\n</p,</p,}' $tmp
82 sed -i -e '/"$/ {N;s,\n</p,</p,}' $tmp
84 # <p> and its attributes on the same line.
85 sed -i -e '/<p$/ {N;s,\n, ,}' $tmp
87 # Remove LF after opening tags.
88 for tag
in li p strong a h3
; do
89 sed -i "/<$tag[^>]*>$/ {N; s,\\n,,}" $tmp
92 # Single out paragraphs (fused with <noscript> in one instance), main
93 # sections, and image links (e.g. infographic) when followed by text.
94 # Separate truncated "~~~" comment from fused tag.
95 # Fold img tags; Add a line after footer.
96 sed -i -e 's,\([^t]>\)<p,\1\n<p,' \
97 -e 's,><noscript,>\n<noscript,' \
98 -e 's,<!-- ~~,\n<!-- ~~,' \
99 -e 's,/></a> \([[:alnum:]]\),/></a>\n\1,' \
100 -e 's,~~~[ ]\?[-]\?[-]\?[ ]\?<,~~~\n<,' \
101 -e 's, src=",\nsrc=",' \
102 -e 's, alt=",\nalt=",' \
103 -e 's,<!-- End #footer -->,&\n,' $tmp
105 # Fuse header, section and footer with the corresponding div.
106 for tag
in header section footer
; do
107 sed -i "/^<$tag/ {N; s,\\(<$tag[^>]*>\\)\\n<div>,\\1<div>,}" $tmp
108 sed -i "/^<\\/div>$/ {N; s,<\\/div>\\n\\(<\\/$tag>\\),</div>\\1,}" $tmp
111 # Make the text more readable.
112 sed -i 's,\(<link[^>]*>\)<,\1\n<,' $tmp
113 sed -i 's,\(<meta[^>]*>\)<,\1\n<,' $tmp
115 for tag
in p
dd li h1 h2 h3 form
; do
116 sed -i "/<\\/$tag>$/s,$,\\n," $tmp
118 for tag
in p dl ul h1 h2 h3 h4 form body
; do
119 sed -i "/^<$tag/s,^,\\n," $tmp
121 sed -i '/^$/ {N; s,^\n</dl>,</dl>\n,}' $tmp
122 sed -i '/^$/ {N; s,^\n</ul>,</ul>\n,}' $tmp
124 # Remove blank lines in menus and image blocks.
125 sed -i '/^$/ {N; s,^\n<li><img,<li><img,}' $tmp
126 sed -i '/^$/ {N; s,^\n<li[^>]*><a ,<li><a ,}' $tmp
127 sed -i '/JS enabled -->/ {N;N; s,\n\n,\n,}' $tmp
129 # Unwrap the last item of the language list for easier
131 sed -i '/<li><a/ {N; s,\n\(.*Translation_Guide\), \1,}' $tmp
134 fmt -s -w 80 $tmp > $tmp2
136 # Suppress repeated empty lines.
137 cat -s $tmp2 $tmp1 > $output