Commit | Line | Data |
---|---|---|
7a9ee056 TG |
1 | #!/bin/bash |
2 | ||
3 | # NAME | |
4 | # reformat-html - reformat HTML files from emailselfdefense.fsf.org | |
5 | ||
6 | # SYNOPSIS | |
7 | # reformat-html /PATH/TO/NAME.html | |
8 | ||
9 | # GRAPHIC INTERFACE HOWTO | |
10 | # * Launch the script by double-clicking on it; a terminal will open. | |
11 | # * At the prompt, drag and drop the input file into the terminal. | |
12 | # | |
13 | # Alternatively (in Gnome, KDE, XFCE, etc.) | |
14 | # * create a launcher for the application menu; | |
15 | # * launch the script from the contextual menu of the HTML file. | |
16 | # | |
17 | # The reformatted file is created in the directory where the input file | |
18 | # resides, and its name is NAME-r.html. | |
19 | ||
20 | #============================================================================== | |
21 | ||
22 | set -e | |
23 | ||
24 | # Test whether the script is called from color-wdiff | |
25 | p=$(pidof -x color-wdiff) || true | |
26 | test "$p" == "$PPID" && called_from_color_wdiff=1 | |
27 | ||
28 | function close_or_exit () { | |
29 | # turns off interactivity and lets the terminal close normally if the script | |
30 | # is called from color-wdiff. | |
31 | ||
32 | if test "$called_from_color_wdiff" == "1"; then | |
33 | exit $1 | |
34 | else | |
35 | if test "$1" == "1"; then | |
36 | echo -e 1>&2 "\n!!! $input doesn't exist or is not an HTML." | |
37 | fi | |
38 | echo -e '\n*** Close the terminal window or press Return.'; read OK | |
39 | test -z "$OK" && exit $1 | |
40 | fi | |
41 | } | |
42 | ||
43 | # Get a valid HTML as input. | |
44 | input=$1 | |
45 | if test ! -f "$input" -o ! -s "$input"; then | |
46 | echo -e "\n*** reformat-html - Please enter the HTML file." | |
47 | read input | |
48 | input=${input%\'}; input=${input#\'} | |
49 | test -f "$input" -a "${input%.html}" != "$input" || close_or_exit 1 | |
50 | fi | |
51 | ||
52 | # Define the output file. | |
53 | if test "$called_from_color_wdiff" == "1"; then | |
54 | output=$2 | |
55 | else | |
56 | output=${input%.html}-r.html | |
57 | fi | |
58 | ||
59 | tmp=$(mktemp -t ref.XXXXXX) || close_or_exit 1 | |
60 | trap "rm -f $tmp" EXIT | |
61 | ||
62 | cp $input $tmp | |
63 | ||
64 | # Remove javascript, which shouldn't be reformatted, leading and trailing | |
65 | # spaces/tabs, multiple spaces, LF after </a> and <li>. | |
66 | sed -i -e '/jquery-1.11.0.min.js/,$d' \ | |
67 | -e 's,\t, ,g' \ | |
68 | -e 's,^ *,,' \ | |
69 | -e 's, *, ,g' \ | |
70 | -e 's, *$,,' $tmp | |
71 | sed -i -e '/<\/a>$/ {N; s,<\/a>\n<,<\/a> <,}' $tmp | |
72 | sed -i -e '/^<li/ {N; s,>\n<a ,> <a ,}' $tmp | |
73 | ||
74 | # One string per paragraph, header or list item. | |
75 | for tag in li p strong a h3; do | |
76 | sed -i "/<$tag[^>]*>$/ {N; s,\\n, ,}" $tmp | |
77 | done | |
78 | for tag in a strong; do | |
79 | sed -i "/<\\/$tag>$/ {N; s,\\n, ,}" $tmp | |
80 | done | |
81 | # This command may need to be repeated. Adjust the number of repeats. This | |
82 | # could be done by looping back to a sed marker, but a while loop seems | |
83 | # quicker. | |
84 | i=0 | |
85 | while (( i < 2 )); do | |
86 | sed -i '/[^<>]$/ {N; s,\([^<>]\)\n,\1 ,}' $tmp | |
87 | let i=i+1 | |
88 | done | |
89 | sed -i -e '/ \/>$/ {N; s,\( \/>\)\n,\1 ,}' \ | |
90 | -e '/ <a[^>]*>$/ {N; s,\(<a[^>]*>\)\n\([^<]\),\1 \2,}' $tmp | |
91 | ||
92 | # Make sure there is only one paragraph per string. This command may need to | |
93 | # be repeated. Adjust the number of repeats. | |
94 | i=0 | |
95 | while (( i < 2 )); do | |
96 | sed -i 's,</p>\(.\+\)$,</p>\n\1,' $tmp | |
97 | let i=i+1 | |
98 | done | |
99 | ||
100 | # Single out the tags which include p (will also work for pre). | |
101 | sed -i 's,\(.\)<p,\1\n<p,' $tmp | |
102 | ||
103 | # Single-out input meta and link. | |
104 | for tag in input meta link link; do | |
105 | sed -i "s,> <$tag,>\n<$tag," $tmp | |
106 | done | |
107 | ||
108 | # Remove leading and trailing spaces, double spaces and blank lines. | |
109 | # Fuse comment with </p>; separate truncated "~~~" comment from fused tag. | |
110 | sed -i -e 's,^ *,,' \ | |
111 | -e 's, *$,,' \ | |
112 | -e 's, , ,g' \ | |
113 | -e '/^$/d' \ | |
114 | -e '/<\/p>$/ {N;s,\n\(<!-- [^~]\),\1,}' \ | |
115 | -e 's,~~~[ ]\?[-]\?[-]\?[ ]\?<,~~~\n<,' $tmp | |
116 | ||
117 | # Fuse header, section and footer with the corresponding div. | |
118 | for tag in header section footer; do | |
119 | sed -i "/^<$tag/ {N; s,\\(<$tag[^>]*>\\)\\n<div>,\\1<div>,}" $tmp | |
120 | sed -i "/^<\\/div>$/ {N; s,<\\/div>\\n\\(<\\/$tag>\\),</div>\\1,}" $tmp | |
121 | done | |
122 | ||
123 | # Add LF before main sections and commented-out parts. | |
124 | sed -i 's,<!-- ~~,\n<!-- ~~,' $tmp | |
125 | sed -i '/COMMENTED OUT/ s,^,\n,' $tmp | |
126 | ||
127 | # Make the text more readable. | |
128 | for tag in p h1 h2 h3 h4 dl title form; do | |
129 | sed -i "s,<$tag,\\n&," $tmp | |
130 | done | |
131 | for tag in p dl ul h1 h2 h3 h4 title head footer form script; do | |
132 | sed -i "/<\\/$tag>/s,$,\\n," $tmp | |
133 | done | |
134 | sed -i '/<\/dd>/ {N; s,</dd>\n<dt,</dd>\n\n<dt,}' $tmp | |
135 | sed -i '/<\/dt>/ {N; s,</dt>\n<dd,</dt>\n\n<dd,}' $tmp | |
136 | sed -i -e 's,</p></span>$,</p>\n</span>,' \ | |
137 | -e 's, alt=,\nalt=,g' \ | |
138 | -e 's, | , |\n,g' $tmp | |
139 | ||
140 | # Remove extra LFs, if any. | |
141 | sed -i ':a /^$/ {N; s,\n$,,; ba}' $tmp | |
142 | sed -i ':a /^\n*$/ {$d; N; ba}' $tmp | |
143 | ||
144 | # Wrap the text. | |
145 | fmt -s -w 95 $tmp > $output | |
146 | ||
147 | close_or_exit 0 |