fr: finish updating to v2.1; reword some sentences; add Roboto in footer (thx Ilias...
[enc.git] / fr / kitchen / page-reformat
1 #!/bin/bash
2
3 # page-reformat: reformat the original pages of emailselfdefense.fsf.org
4
5 # cd to the directory where the script is located, then:
6 # $ ./page-reformat [path to the HTML file]
7 # You can also drag-and-drop the file into the terminal.
8 # The result has extension .html.html to avoid writing over the original page.
9
10 # If you want to wrap the text, uncomment line 97 and comment out line 94.
11
12
13 cp $1 tmp
14
15 # Remove javascript, which shouldn't be reformated.
16 sed -i '/jquery-1.11.0.min.js/,$d' tmp
17
18 # Remove leading and trailing spaces/tabs.
19 sed -i 's,\t, ,g' tmp
20 sed -i 's,^ *,,' tmp
21 sed -i 's, *$,,' tmp
22
23 # Remove LF after </a>.
24 sed -i '/<\/a>$/ {N; s,<\/a>\n\([^<]\),<\/a>\1,}' tmp
25
26 # One string per paragraph, header or list item.
27 for tag in li p strong a h3; do
28 sed -i "/<$tag[^>]*>$/ {N; s,\\n, ,}" tmp
29 done
30 for tag in a strong; do
31 sed -i "/<\\/$tag>$/ {N; s,\\n, ,}" tmp
32 done
33 # This command may need to be repeated. Adjust the number of repeats. This
34 # could be done by looping back to a sed marker, but a while loop seems
35 # quicker.
36 i=0
37 while (( i < 2 )); do
38 sed -i '/[^<>]$/ {N; s,\([^<>]\)\n,\1 ,}' tmp
39 let i=i+1
40 done
41
42 sed -i '/ \/>$/ {N; s,\( \/>\)\n,\1 ,}' tmp
43 sed -i '/ <a[^>]*>$/ {N; s,\(<a[^>]*>\)\n\([^<]\),\1 \2,}' tmp
44
45 # Make sure there is only one paragraph per string. This command may need to
46 # be repeated. Adjust the number of repeats.
47 i=0
48 while (( i < 2 )); do
49 sed -i 's,</p>\(.\+\)$,</p>\n\1,' tmp
50 let i=i+1
51 done
52
53 # Single out the tags which include p (will also work for pre).
54 sed -i 's,\(.\)<p,\1\n<p,' tmp
55
56 # Single-out input meta and link.
57 for tag in input meta link link; do
58 sed -i "s,> <$tag,>\n<$tag," tmp
59 done
60 # Remove leading and trailing spaces, double spaces and blank lines.
61 sed -i 's,^ *,,' tmp
62 sed -i 's, *$,,' tmp
63 sed -i 's, , ,g' tmp
64 sed -i '/^$/d' tmp
65
66 # Fuse comment with </p>.
67 sed -i '/<\/p>$/ {N;s,\n\(<!-- [^~]\),\1,}' tmp
68
69 # Separate truncated "~~~" comment from fused tag.
70 sed -i 's,~~~[ ]\?[-]\?[-]\?[ ]\?<,~~~\n<,' tmp
71
72 # Fuse header, section and footer with the corresponding div.
73 for tag in header section footer; do
74 sed -i "/^<$tag/ {N; s,\\(<$tag[^>]*>\\)\\n<div>,\\1<div>,}" tmp
75 sed -i "/^<\\/div>$/ {N; s,<\\/div>\\n\\(<\\/$tag>\\),</div>\\1,}" tmp
76 done
77
78 # Add LF before main sections and commented-out parts.
79 sed -i 's,<!-- ~~,\n<!-- ~~,' tmp
80 sed -i '/COMMENTED OUT/ s,^,\n,' tmp
81
82 # Make the text more readable.
83 for tag in p h1 h2 h3 h4 dl title form; do
84 sed -i "s,<$tag,\\n&," tmp
85 done
86 for tag in p dl ul h1 h2 h3 h4 title head footer form script; do
87 sed -i "/<\\/$tag>/s,$,\\n," tmp
88 done
89 sed -i '/<\/dd>/ {N; s,</dd>\n<dt,</dd>\n\n<dt,}' tmp
90 sed -i '/<\/dt>/ {N; s,</dt>\n<dd,</dt>\n\n<dd,}' tmp
91 sed -i 's,</p></span>$,</p>\n</span>,' tmp
92
93 sed -i 's, alt=,\nalt=,g' tmp
94 sed -i 's, | , |\n,g' tmp
95 mv tmp $1.html
96
97 # Wrap the text.
98 #fmt -s -w 95 tmp > $1.html
99
100 # Remove extra LFs, if any.
101 sed -i ':a /^$/ {N; s,\n$,,; ba}' $1.html
102 sed -i ':a /^\n*$/ {$d; N; ba}' $1.html
103
104 rm -f tmp