copied the static data into this repo
[enc.git] / en / kitchen / reformat-html.1
1 #!/bin/bash
2
3 # NAME
4 # reformat-html - reformat HTML files from emailselfdefense.fsf.org
5
6 # SYNOPSIS
7 # reformat-html /PATH/TO/NAME.html
8
9 # GRAPHIC INTERFACE HOWTO
10 # * Launch the script by double-clicking on it; a terminal will open.
11 # * At the prompt, drag and drop the input file into the terminal.
12 #
13 # Alternatively (in Gnome, KDE, XFCE, etc.)
14 # * create a launcher for the application menu;
15 # * launch the script from the contextual menu of the HTML file.
16 #
17 # The reformatted file is created in the directory where the input file
18 # resides, and its name is NAME-r.html.
19
20 #==============================================================================
21
22 set -e
23
24 # Test whether the script is called from color-wdiff
25 p=$(pidof -x color-wdiff) || true
26 test "$p" == "$PPID" && called_from_color_wdiff=1
27
28 function sleep_or_exit () {
29 # turns off interactivity and lets the terminal close normally if the script
30 # is called from color-wdiff.
31
32 if test "$called_from_color_wdiff" == "1"; then
33 exit $1
34 else
35 if test "$1" == "1"; then
36 echo -e 1>&2 "\n!!! $input doesn't exist or is not HTML."
37 sleep 3
38 fi
39 exit $1
40 fi
41 }
42
43 # Get a valid HTML as input.
44 input=$1
45 if test ! -f "$input" -o ! -s "$input"; then
46 echo -e "\n*** reformat-html - Please enter the HTML file."
47 read input
48 input=${input%\'}; input=${input#\'}
49 fi
50 test -f "$input" -a "${input%.html}" != "$input" || sleep_or_exit 1
51
52 # Define the output file.
53 if test "$called_from_color_wdiff" == "1"; then
54 output=$2
55 else
56 output=${input%.html}-r.html
57 fi
58
59 tmp=$(mktemp -t ref.XXXXXX) || exit 1
60 tmp1=$(mktemp -t ref.XXXXXX) || exit 1
61 tmp2=$(mktemp -t ref.XXXXXX) || exit 1
62 trap 'rm -f "$tmp" "$tmp1" "$tmp2"' EXIT
63
64 # Don't touch the scripts.
65 sed -n '/<script/,$p' $input > $tmp1
66 sed '/<script/,$d' $input > $tmp
67
68 # Clean up extra spaces and tabs; remove blank lines.
69 sed -i -e 's,[[:space:]]\+, ,g' \
70 -e 's,^ ,,' \
71 -e 's, $,,' \
72 -e 's,> <,><,g' \
73 -e '/^$/d' $tmp
74
75 # For the language list: fix the commented-out items (broken by po4a).
76 sed -i -e '/<\/a>$/ {N; s,<\/a>\n<,<\/a><,}' $tmp
77 sed -i -e '/^<li/ {N; s,>\n<a ,><a ,}' $tmp
78
79 # </p> at the end of the line (much better to do it by hand).
80 sed -i -e '/[>.]$/ {N;s,\n</strong,</strong,}' $tmp
81 sed -i -e '/[>.]$/ {N;s,\n</p,</p,}' $tmp
82 sed -i -e '/"$/ {N;s,\n</p,</p,}' $tmp
83
84 # <p> and its attributes on the same line.
85 sed -i -e '/<p$/ {N;s,\n, ,}' $tmp
86
87 # Remove LF after opening tags.
88 for tag in li p strong a h3; do
89 sed -i "/<$tag[^>]*>$/ {N; s,\\n,,}" $tmp
90 done
91
92 # Single out paragraphs (fused with <noscript> in one instance), main
93 # sections, and image links (e.g. infographic) when followed by text.
94 # Separate truncated "~~~" comment from fused tag.
95 # Fold img tags; Add a line after footer.
96 sed -i -e 's,\([^t]>\)<p,\1\n<p,' \
97 -e 's,><noscript,>\n<noscript,' \
98 -e 's,<!-- ~~,\n<!-- ~~,' \
99 -e 's,/></a> \([[:alnum:]]\),/></a>\n\1,' \
100 -e 's,~~~[ ]\?[-]\?[-]\?[ ]\?<,~~~\n<,' \
101 -e 's, src=",\nsrc=",' \
102 -e 's, alt=",\nalt=",' \
103 -e 's,<!-- End #footer -->,&\n,' $tmp
104
105 # Fuse header, section and footer with the corresponding div.
106 for tag in header section footer; do
107 sed -i "/^<$tag/ {N; s,\\(<$tag[^>]*>\\)\\n<div>,\\1<div>,}" $tmp
108 sed -i "/^<\\/div>$/ {N; s,<\\/div>\\n\\(<\\/$tag>\\),</div>\\1,}" $tmp
109 done
110
111 # Make the text more readable.
112 sed -i 's,\(<link[^>]*>\)<,\1\n<,' $tmp
113 sed -i 's,\(<meta[^>]*>\)<,\1\n<,' $tmp
114
115 for tag in p dd li h1 h2 h3 form; do
116 sed -i "/<\\/$tag>$/s,$,\\n," $tmp
117 done
118 for tag in p dl ul h1 h2 h3 h4 form body; do
119 sed -i "/^<$tag/s,^,\\n," $tmp
120 done
121 sed -i '/^$/ {N; s,^\n</dl>,</dl>\n,}' $tmp
122 sed -i '/^$/ {N; s,^\n</ul>,</ul>\n,}' $tmp
123
124 # Remove blank lines in menus and image blocks.
125 sed -i '/^$/ {N; s,^\n<li><img,<li><img,}' $tmp
126 sed -i '/^$/ {N; s,^\n<li[^>]*><a ,<li><a ,}' $tmp
127 sed -i '/JS enabled -->/ {N;N; s,\n\n,\n,}' $tmp
128
129 # Unwrap the last item of the language list for easier
130 # replacement.
131 sed -i '/<li><a/ {N; s,\n\(.*Translation_Guide\), \1,}' $tmp
132
133 # Wrap lines.
134 fmt -s -w 80 $tmp > $tmp2
135
136 # Suppress repeated empty lines.
137 cat -s $tmp2 $tmp1 > $output
138
139 sleep_or_exit 0