Commit | Line | Data |
---|---|---|
f275dd4f TG |
1 | #!/bin/bash |
2 | ||
3 | # color-wdiff visualizes the differences between 2 versions of the same page. | |
4 | # This script was extracted from GNUN's GNUmakefile (function "mark-outdated") | |
5 | # and adapted. GNUN (http://www.gnu.org/software/gnun/) is under GPLv3. | |
6 | ||
7 | # Howto: | |
8 | # - The script is in [...]/enc/fr/kitchen. | |
9 | # - cd to that directory and run: | |
10 | # ./color-wdiff FILE1 FILE2 | |
11 | # (FILE1 and FILE2 are the paths to the files to be compared.) | |
12 | # - The diff file is created in the directory where FILE1 resides. | |
13 | # | |
14 | # For example, compare old and new versions of index.t.html: | |
15 | # ./color-diff index.t.html index-new.t.html | |
16 | ||
17 | # Note: the diff is much easier to use if the HTML is not indented. | |
18 | # Reformatting is done by the function "page_reformat", which leaves the | |
19 | # original page intact. | |
20 | ||
21 | ||
22 | file=$1 | |
23 | file1=$2 | |
24 | diff_file=${file%.html}-diff.html | |
25 | ||
26 | page_reformat () { | |
27 | #reformats the original pages of emailselfdefense.fsf.org | |
28 | ||
29 | cp $1 $1.tmp | |
30 | ||
31 | # Remove javascript, which shouldn't be reformatted. | |
32 | sed -i '/jquery-1.11.0.min.js/,$d' $1.tmp | |
33 | ||
34 | # Remove leading and trailing spaces/tabs. | |
35 | sed -i 's,\t, ,g' $1.tmp | |
36 | sed -i 's,^ *,,' $1.tmp | |
37 | sed -i 's, *$,,' $1.tmp | |
38 | ||
39 | # Remove LF after </a>. | |
40 | sed -i '/<\/a>$/ {N; s,<\/a>\n\([^<]\),<\/a>\1,}' $1.tmp | |
41 | ||
42 | # One string per paragraph, header or list item. | |
43 | for tag in li p strong a h3; do | |
44 | sed -i "/<$tag[^>]*>$/ {N; s,\\n, ,}" $1.tmp | |
45 | done | |
46 | for tag in a strong; do | |
47 | sed -i "/<\\/$tag>$/ {N; s,\\n, ,}" $1.tmp | |
48 | done | |
49 | # This command may need to be repeated. Adjust the number of repeats. This | |
50 | # could be done by looping back to a sed marker, but a while loop seems | |
51 | # quicker. | |
52 | i=0 | |
53 | while (( i < 2 )); do | |
54 | sed -i '/[^<>]$/ {N; s,\([^<>]\)\n,\1 ,}' $1.tmp | |
55 | let i=i+1 | |
56 | done | |
57 | ||
58 | sed -i '/ \/>$/ {N; s,\( \/>\)\n,\1 ,}' $1.tmp | |
59 | sed -i '/ <a[^>]*>$/ {N; s,\(<a[^>]*>\)\n\([^<]\),\1 \2,}' $1.tmp | |
60 | ||
61 | # Make sure there is only one paragraph per string. This command may need to | |
62 | # be repeated. Adjust the number of repeats. | |
63 | i=0 | |
64 | while (( i < 2 )); do | |
65 | sed -i 's,</p>\(.\+\)$,</p>\n\1,' $1.tmp | |
66 | let i=i+1 | |
67 | done | |
68 | ||
69 | # Single out the tags which include p (will also work for pre). | |
70 | sed -i 's,\(.\)<p,\1\n<p,' $1.tmp | |
71 | ||
72 | # Single-out input meta and link. | |
73 | for tag in input meta link link; do | |
74 | sed -i "s,> <$tag,>\n<$tag," $1.tmp | |
75 | done | |
76 | # Remove leading and trailing spaces, double spaces and blank lines. | |
77 | sed -i 's,^ *,,' $1.tmp | |
78 | sed -i 's, *$,,' $1.tmp | |
79 | sed -i 's, , ,g' $1.tmp | |
80 | sed -i '/^$/d' $1.tmp | |
81 | ||
82 | # Fuse comment with </p>. | |
83 | sed -i '/<\/p>$/ {N;s,\n\(<!-- [^~]\),\1,}' $1.tmp | |
84 | ||
85 | # Separate truncated "~~~" comment from fused tag. | |
86 | sed -i 's,~~~[ ]\?[-]\?[-]\?[ ]\?<,~~~\n<,' $1.tmp | |
87 | ||
88 | # Fuse header, section and footer with the corresponding div. | |
89 | for tag in header section footer; do | |
90 | sed -i "/^<$tag/ {N; s,\\(<$tag[^>]*>\\)\\n<div>,\\1<div>,}" $1.tmp | |
91 | sed -i "/^<\\/div>$/ {N; s,<\\/div>\\n\\(<\\/$tag>\\),</div>\\1,}" $1.tmp | |
92 | done | |
93 | ||
94 | # Add LF before main sections and commented-out parts. | |
95 | sed -i 's,<!-- ~~,\n<!-- ~~,' $1.tmp | |
96 | sed -i '/COMMENTED OUT/ s,^,\n,' $1.tmp | |
97 | ||
98 | # Make the text more readable. | |
99 | for tag in p h1 h2 h3 h4 dl title form; do | |
100 | sed -i "s,<$tag,\\n&," $1.tmp | |
101 | done | |
102 | for tag in p dl ul h1 h2 h3 h4 title head footer form script; do | |
103 | sed -i "/<\\/$tag>/s,$,\\n," $1.tmp | |
104 | done | |
105 | sed -i '/<\/dd>/ {N; s,</dd>\n<dt,</dd>\n\n<dt,}' $1.tmp | |
106 | sed -i '/<\/dt>/ {N; s,</dt>\n<dd,</dt>\n\n<dd,}' $1.tmp | |
107 | sed -i 's,</p></span>$,</p>\n</span>,' $1.tmp | |
108 | ||
109 | sed -i 's, alt=,\nalt=,g' $1.tmp | |
110 | sed -i 's, | , |\n,g' $1.tmp | |
111 | ||
112 | # Remove extra LFs, if any. | |
113 | sed -i ':a /^$/ {N; s,\n$,,; ba}' $1.tmp | |
114 | sed -i ':a /^\n*$/ {$d; N; ba}' $1.tmp | |
115 | ||
116 | # Wrap the text. | |
117 | fmt -s -w 95 $1.tmp > $1.r | |
118 | } | |
119 | ||
120 | ||
121 | page_reformat ${file} | |
122 | # Replace chevrons with HTML entities; the files are treated as simple text. | |
123 | sed "s/</\</g;s/>/\>/g" ${file}.r > ${file}.tmp | |
124 | page_reformat ${file1} | |
125 | sed "s/</\</g;s/>/\>/g" ${file1}.r > ${file1}.tmp | |
126 | ||
127 | # Add an HTML header to the wdiff output, with style for visualizing the | |
128 | # insertions and deletions, and write the title of the page. | |
129 | cat > ${diff_file} << EOF | |
130 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | |
131 | "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | |
132 | <!-- Generated by GNUN --> | |
133 | <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | |
134 | <head> | |
135 | <meta http-equiv="content-type" content="text/html; charset=utf-8" /> | |
136 | <title>${diff_file##*\/}</title> | |
137 | <style type="text/css"> | |
138 | span.removed { background-color: #f22; color: #000; } | |
139 | span.inserted { background-color: #2f2; color: #000; } | |
140 | </style></head> | |
141 | <body><pre> | |
142 | EOF | |
143 | ||
144 | # Run wdiff with options to add the proper markup at the beginning and end of | |
145 | # deletions and insertions. | |
146 | wdiff --start-delete '<span class="removed"><del><strong>' \ | |
147 | --end-delete '</strong></del></span>' \ | |
148 | --start-insert '<span class="inserted"><ins><em>' \ | |
149 | --end-insert '</em></ins></span>' \ | |
150 | ${file}.tmp ${file1}.tmp >> $diff_file | |
151 | ||
152 | # Add the closing tags. | |
153 | echo '</pre></body></html>' >> $diff_file | |
154 | ||
155 | # Clean up. | |
156 | rm -f ${file}.tmp ${file1}.tmp ${file}.r ${file1}.r |