Commit | Line | Data |
---|---|---|
5ec82b3f A |
1 | #!/bin/bash |
2 | ||
3 | # NAME | |
4 | # reformat-html - reformat HTML files from emailselfdefense.fsf.org | |
5 | ||
6 | # SYNOPSIS | |
7 | # reformat-html /PATH/TO/NAME.html | |
8 | ||
9 | # GRAPHIC INTERFACE HOWTO | |
10 | # * Launch the script by double-clicking on it; a terminal will open. | |
11 | # * At the prompt, drag and drop the input file into the terminal. | |
12 | # | |
13 | # Alternatively (in Gnome, KDE, XFCE, etc.) | |
14 | # * create a launcher for the application menu; | |
15 | # * launch the script from the contextual menu of the HTML file. | |
16 | # | |
17 | # The reformatted file is created in the directory where the input file | |
18 | # resides, and its name is NAME-r.html. | |
19 | ||
20 | #============================================================================== | |
21 | ||
22 | set -e | |
23 | ||
24 | # Test whether the script is called from color-wdiff | |
25 | p=$(pidof -x color-wdiff) || true | |
26 | test "$p" == "$PPID" && called_from_color_wdiff=1 | |
27 | ||
28 | function sleep_or_exit () { | |
29 | # turns off interactivity and lets the terminal close normally if the script | |
30 | # is called from color-wdiff. | |
31 | ||
32 | if test "$called_from_color_wdiff" == "1"; then | |
33 | exit $1 | |
34 | else | |
35 | if test "$1" == "1"; then | |
36 | echo -e 1>&2 "\n!!! $input doesn't exist or is not HTML." | |
37 | sleep 3 | |
38 | fi | |
39 | exit $1 | |
40 | fi | |
41 | } | |
42 | ||
43 | # Get a valid HTML as input. | |
44 | input=$1 | |
45 | if test ! -f "$input" -o ! -s "$input"; then | |
46 | echo -e "\n*** reformat-html - Please enter the HTML file." | |
47 | read input | |
48 | input=${input%\'}; input=${input#\'} | |
49 | fi | |
50 | test -f "$input" -a "${input%.html}" != "$input" || sleep_or_exit 1 | |
51 | ||
52 | # Define the output file. | |
53 | if test "$called_from_color_wdiff" == "1"; then | |
54 | output=$2 | |
55 | else | |
56 | output=${input%.html}-r.html | |
57 | fi | |
58 | ||
59 | tmp=$(mktemp -t ref.XXXXXX) || exit 1 | |
60 | tmp1=$(mktemp -t ref.XXXXXX) || exit 1 | |
61 | tmp2=$(mktemp -t ref.XXXXXX) || exit 1 | |
62 | trap 'rm -f "$tmp" "$tmp1" "$tmp2"' EXIT | |
63 | ||
64 | # Don't touch the scripts. | |
65 | sed -n '/<script/,$p' $input > $tmp1 | |
66 | sed '/<script/,$d' $input > $tmp | |
67 | ||
68 | # Clean up extra spaces and tabs; remove blank lines. | |
69 | sed -i -e 's,[[:space:]]\+, ,g' \ | |
70 | -e 's,^ ,,' \ | |
71 | -e 's, $,,' \ | |
72 | -e 's,> <,><,g' \ | |
73 | -e '/^$/d' $tmp | |
74 | ||
75 | # For the language list: fix the commented-out items (broken by po4a). | |
76 | sed -i -e '/<\/a>$/ {N; s,<\/a>\n<,<\/a><,}' $tmp | |
77 | sed -i -e '/^<li/ {N; s,>\n<a ,><a ,}' $tmp | |
78 | ||
79 | # </p> at the end of the line (much better to do it by hand). | |
80 | sed -i -e '/[>.]$/ {N;s,\n</strong,</strong,}' $tmp | |
81 | sed -i -e '/[>.]$/ {N;s,\n</p,</p,}' $tmp | |
82 | sed -i -e '/"$/ {N;s,\n</p,</p,}' $tmp | |
83 | ||
84 | # <p> and its attributes on the same line. | |
85 | sed -i -e '/<p$/ {N;s,\n, ,}' $tmp | |
86 | ||
87 | # Remove LF after opening tags. | |
88 | for tag in li p strong a h3; do | |
89 | sed -i "/<$tag[^>]*>$/ {N; s,\\n,,}" $tmp | |
90 | done | |
91 | ||
92 | # Single out paragraphs (fused with <noscript> in one instance), main | |
93 | # sections, and image links (e.g. infographic) when followed by text. | |
94 | # Separate truncated "~~~" comment from fused tag. | |
95 | # Fold img tags; Add a line after footer. | |
96 | sed -i -e 's,\([^t]>\)<p,\1\n<p,' \ | |
97 | -e 's,><noscript,>\n<noscript,' \ | |
98 | -e 's,<!-- ~~,\n<!-- ~~,' \ | |
99 | -e 's,/></a> \([[:alnum:]]\),/></a>\n\1,' \ | |
100 | -e 's,~~~[ ]\?[-]\?[-]\?[ ]\?<,~~~\n<,' \ | |
101 | -e 's, src=",\nsrc=",' \ | |
102 | -e 's, alt=",\nalt=",' \ | |
103 | -e 's,<!-- End #footer -->,&\n,' $tmp | |
104 | ||
105 | # Fuse header, section and footer with the corresponding div. | |
106 | for tag in header section footer; do | |
107 | sed -i "/^<$tag/ {N; s,\\(<$tag[^>]*>\\)\\n<div>,\\1<div>,}" $tmp | |
108 | sed -i "/^<\\/div>$/ {N; s,<\\/div>\\n\\(<\\/$tag>\\),</div>\\1,}" $tmp | |
109 | done | |
110 | ||
111 | # Make the text more readable. | |
112 | sed -i 's,\(<link[^>]*>\)<,\1\n<,' $tmp | |
113 | sed -i 's,\(<meta[^>]*>\)<,\1\n<,' $tmp | |
114 | ||
115 | for tag in p dd li h1 h2 h3 form; do | |
116 | sed -i "/<\\/$tag>$/s,$,\\n," $tmp | |
117 | done | |
118 | for tag in p dl ul h1 h2 h3 h4 form body; do | |
119 | sed -i "/^<$tag/s,^,\\n," $tmp | |
120 | done | |
121 | sed -i '/^$/ {N; s,^\n</dl>,</dl>\n,}' $tmp | |
122 | sed -i '/^$/ {N; s,^\n</ul>,</ul>\n,}' $tmp | |
123 | ||
124 | # Remove blank lines in menus and image blocks. | |
125 | sed -i '/^$/ {N; s,^\n<li><img,<li><img,}' $tmp | |
126 | sed -i '/^$/ {N; s,^\n<li[^>]*><a ,<li><a ,}' $tmp | |
127 | sed -i '/JS enabled -->/ {N;N; s,\n\n,\n,}' $tmp | |
128 | ||
129 | # Unwrap the last item of the language list for easier | |
130 | # replacement. | |
131 | sed -i '/<li><a/ {N; s,\n\(.*Translation_Guide\), \1,}' $tmp | |
132 | ||
133 | # Wrap lines. | |
134 | fmt -s -w 80 $tmp > $tmp2 | |
135 | ||
136 | # Suppress repeated empty lines. | |
137 | cat -s $tmp2 $tmp1 > $output | |
138 | ||
139 | sleep_or_exit 0 |