1d344b9988863165262b06282f921bb314141e8e
[enc.git] / fr / kitchen / reformat-html
1 #!/bin/bash
2
3 # NAME
4 # reformat-html - reformat HTML files from emailselfdefense.fsf.org
5
6 # SYNOPSIS
7 # reformat-html /PATH/TO/NAME.html
8
9 # GRAPHIC INTERFACE HOWTO
10 # * Launch the script by double-clicking on it; a terminal will open.
11 # * At the prompt, drag and drop the input file into the terminal.
12 #
13 # Alternatively (in Gnome, KDE, XFCE, etc.)
14 # * create a launcher for the application menu;
15 # * launch the script from the contextual menu of the HTML file.
16 #
17 # The reformatted file is created in the directory where the input file
18 # resides, and its name is NAME-r.html.
19
20 #==============================================================================
21
22 set -e
23
24 # Test whether the script is called from color-wdiff
25 p=$(pidof -x color-wdiff) || true
26 test "$p" == "$PPID" && called_from_color_wdiff=1
27
28 function close_or_exit () {
29 # turns off interactivity and lets the terminal close normally if the script
30 # is called from color-wdiff.
31
32 if test "$called_from_color_wdiff" == "1"; then
33 exit $1
34 else
35 if test "$1" == "1"; then
36 echo -e 1>&2 "\n!!! $input doesn't exist or is not an HTML."
37 fi
38 echo -e '\n*** Close the terminal window or press Return.'; read OK
39 test -z "$OK" && exit $1
40 fi
41 }
42
43 # Get a valid HTML as input.
44 input=$1
45 if test ! -f "$input" -o ! -s "$input"; then
46 echo -e "\n*** reformat-html - Please enter the HTML file."
47 read input
48 input=${input%\'}; input=${input#\'}
49 test -f "$input" -a "${input%.html}" != "$input" || close_or_exit 1
50 fi
51
52 # Define the output file.
53 if test "$called_from_color_wdiff" == "1"; then
54 output=$2
55 else
56 output=${input%.html}-r.html
57 fi
58
59 tmp=$(mktemp -t ref.XXXXXX) || close_or_exit 1
60 trap "rm -f $tmp" EXIT
61
62 cp $input $tmp
63
64 # Remove javascript, which shouldn't be reformatted, leading and trailing
65 # spaces/tabs, multiple spaces, LF after </a> and <li>.
66 sed -i -e '/jquery-1.11.0.min.js/,$d' \
67 -e 's,\t, ,g' \
68 -e 's,^ *,,' \
69 -e 's, *, ,g' \
70 -e 's, *$,,' $tmp
71 sed -i -e '/<\/a>$/ {N; s,<\/a>\n<,<\/a> <,}' $tmp
72 sed -i -e '/^<li/ {N; s,>\n<a ,> <a ,}' $tmp
73
74 # One string per paragraph, header or list item.
75 for tag in li p strong a h3; do
76 sed -i "/<$tag[^>]*>$/ {N; s,\\n, ,}" $tmp
77 done
78 for tag in a strong; do
79 sed -i "/<\\/$tag>$/ {N; s,\\n, ,}" $tmp
80 done
81 # This command may need to be repeated. Adjust the number of repeats. This
82 # could be done by looping back to a sed marker, but a while loop seems
83 # quicker.
84 i=0
85 while (( i < 2 )); do
86 sed -i '/[^<>]$/ {N; s,\([^<>]\)\n,\1 ,}' $tmp
87 let i=i+1
88 done
89 sed -i -e '/ \/>$/ {N; s,\( \/>\)\n,\1 ,}' \
90 -e '/ <a[^>]*>$/ {N; s,\(<a[^>]*>\)\n\([^<]\),\1 \2,}' $tmp
91
92 # Make sure there is only one paragraph per string. This command may need to
93 # be repeated. Adjust the number of repeats.
94 i=0
95 while (( i < 2 )); do
96 sed -i 's,</p>\(.\+\)$,</p>\n\1,' $tmp
97 let i=i+1
98 done
99
100 # Single out the tags which include p (will also work for pre).
101 sed -i 's,\(.\)<p,\1\n<p,' $tmp
102
103 # Single-out input meta and link.
104 for tag in input meta link link; do
105 sed -i "s,> <$tag,>\n<$tag," $tmp
106 done
107
108 # Remove leading and trailing spaces, double spaces and blank lines.
109 # Fuse comment with </p>; separate truncated "~~~" comment from fused tag.
110 sed -i -e 's,^ *,,' \
111 -e 's, *$,,' \
112 -e 's, , ,g' \
113 -e '/^$/d' \
114 -e '/<\/p>$/ {N;s,\n\(<!-- [^~]\),\1,}' \
115 -e 's,~~~[ ]\?[-]\?[-]\?[ ]\?<,~~~\n<,' $tmp
116
117 # Fuse header, section and footer with the corresponding div.
118 for tag in header section footer; do
119 sed -i "/^<$tag/ {N; s,\\(<$tag[^>]*>\\)\\n<div>,\\1<div>,}" $tmp
120 sed -i "/^<\\/div>$/ {N; s,<\\/div>\\n\\(<\\/$tag>\\),</div>\\1,}" $tmp
121 done
122
123 # Add LF before main sections and commented-out parts.
124 sed -i 's,<!-- ~~,\n<!-- ~~,' $tmp
125 sed -i '/COMMENTED OUT/ s,^,\n,' $tmp
126
127 # Make the text more readable.
128 for tag in p h1 h2 h3 h4 dl title form; do
129 sed -i "s,<$tag,\\n&," $tmp
130 done
131 for tag in p dl ul h1 h2 h3 h4 title head footer form script; do
132 sed -i "/<\\/$tag>/s,$,\\n," $tmp
133 done
134 sed -i '/<\/dd>/ {N; s,</dd>\n<dt,</dd>\n\n<dt,}' $tmp
135 sed -i '/<\/dt>/ {N; s,</dt>\n<dd,</dt>\n\n<dd,}' $tmp
136 sed -i -e 's,</p></span>$,</p>\n</span>,' \
137 -e 's, alt=,\nalt=,g' \
138 -e 's, | , |\n,g' $tmp
139
140 # Remove extra LFs, if any.
141 sed -i ':a /^$/ {N; s,\n$,,; ba}' $tmp
142 sed -i ':a /^\n*$/ {$d; N; ba}' $tmp
143
144 # Wrap the text.
145 fmt -s -w 95 $tmp > $output
146
147 close_or_exit 0