#!/bin/bash
-# color-wdiff visualizes the differences between 2 versions of the same page.
-# This script was extracted from GNUN's GNUmakefile (function "mark-outdated")
-# and adapted. GNUN (http://www.gnu.org/software/gnun/) is under GPLv3.
-
-# Howto:
-# - The script is in [...]/enc/fr/kitchen.
-# - cd to that directory and run:
-# ./color-wdiff FILE1 FILE2
-# (FILE1 and FILE2 are the paths to the files to be compared.)
-# - The diff file is created in the directory where FILE1 resides.
+# NAME
+# color-wdiff - visualize differences between text files
+
+# SYNOPSIS
+# color-wdiff FILE0 FILE1
+
+# DEPENDENCIES
+# wdiff, reformat-html in $HOME/bin.
+
+# DESCRIPTION
+# 1. If the input files are HTML, they are reformatted, to remove
+# indentation among other things.
+# 2. The markup is inactivated by replacing angle brackets with the
+# corresponding entities.
+# 3. The files are compared with wdiff, using options which label
+# insertions and deletions. The labels are HTML tags with specific
+# classes.
+# 4. The diff is turned into a valid HTML page by adding the required
+# markup, plus CSS style for the insertion and deletion classes.
#
-# For example, compare old and new versions of index.t.html:
-# ./color-diff index.t.html index-new.t.html
-
-# Note: the diff is much easier to use if the HTML is not indented.
-# Reformatting is done by the function "page_reformat", which leaves the
-# original page intact.
-
-
-file=$1
-file1=$2
-diff_file=${file%.html}-diff.html
-
-page_reformat () {
- #reformats the original pages of emailselfdefense.fsf.org
-
- cp $1 $1.tmp
-
- # Remove javascript, which shouldn't be reformatted.
- sed -i '/jquery-1.11.0.min.js/,$d' $1.tmp
-
- # Remove leading and trailing spaces/tabs.
- sed -i 's,\t, ,g' $1.tmp
- sed -i 's,^ *,,' $1.tmp
- sed -i 's, *$,,' $1.tmp
-
- # Remove LF after </a>.
- sed -i '/<\/a>$/ {N; s,<\/a>\n\([^<]\),<\/a>\1,}' $1.tmp
-
- # One string per paragraph, header or list item.
- for tag in li p strong a h3; do
- sed -i "/<$tag[^>]*>$/ {N; s,\\n, ,}" $1.tmp
- done
- for tag in a strong; do
- sed -i "/<\\/$tag>$/ {N; s,\\n, ,}" $1.tmp
- done
- # This command may need to be repeated. Adjust the number of repeats. This
- # could be done by looping back to a sed marker, but a while loop seems
- # quicker.
- i=0
- while (( i < 2 )); do
- sed -i '/[^<>]$/ {N; s,\([^<>]\)\n,\1 ,}' $1.tmp
- let i=i+1
- done
-
- sed -i '/ \/>$/ {N; s,\( \/>\)\n,\1 ,}' $1.tmp
- sed -i '/ <a[^>]*>$/ {N; s,\(<a[^>]*>\)\n\([^<]\),\1 \2,}' $1.tmp
-
- # Make sure there is only one paragraph per string. This command may need to
- # be repeated. Adjust the number of repeats.
- i=0
- while (( i < 2 )); do
- sed -i 's,</p>\(.\+\)$,</p>\n\1,' $1.tmp
- let i=i+1
- done
-
- # Single out the tags which include p (will also work for pre).
- sed -i 's,\(.\)<p,\1\n<p,' $1.tmp
-
- # Single-out input meta and link.
- for tag in input meta link link; do
- sed -i "s,> <$tag,>\n<$tag," $1.tmp
- done
- # Remove leading and trailing spaces, double spaces and blank lines.
- sed -i 's,^ *,,' $1.tmp
- sed -i 's, *$,,' $1.tmp
- sed -i 's, , ,g' $1.tmp
- sed -i '/^$/d' $1.tmp
-
- # Fuse comment with </p>.
- sed -i '/<\/p>$/ {N;s,\n\(<!-- [^~]\),\1,}' $1.tmp
-
- # Separate truncated "~~~" comment from fused tag.
- sed -i 's,~~~[ ]\?[-]\?[-]\?[ ]\?<,~~~\n<,' $1.tmp
-
- # Fuse header, section and footer with the corresponding div.
- for tag in header section footer; do
- sed -i "/^<$tag/ {N; s,\\(<$tag[^>]*>\\)\\n<div>,\\1<div>,}" $1.tmp
- sed -i "/^<\\/div>$/ {N; s,<\\/div>\\n\\(<\\/$tag>\\),</div>\\1,}" $1.tmp
- done
-
- # Add LF before main sections and commented-out parts.
- sed -i 's,<!-- ~~,\n<!-- ~~,' $1.tmp
- sed -i '/COMMENTED OUT/ s,^,\n,' $1.tmp
-
- # Make the text more readable.
- for tag in p h1 h2 h3 h4 dl title form; do
- sed -i "s,<$tag,\\n&," $1.tmp
- done
- for tag in p dl ul h1 h2 h3 h4 title head footer form script; do
- sed -i "/<\\/$tag>/s,$,\\n," $1.tmp
- done
- sed -i '/<\/dd>/ {N; s,</dd>\n<dt,</dd>\n\n<dt,}' $1.tmp
- sed -i '/<\/dt>/ {N; s,</dt>\n<dd,</dt>\n\n<dd,}' $1.tmp
- sed -i 's,</p></span>$,</p>\n</span>,' $1.tmp
-
- sed -i 's, alt=,\nalt=,g' $1.tmp
- sed -i 's, | , |\n,g' $1.tmp
-
- # Remove extra LFs, if any.
- sed -i ':a /^$/ {N; s,\n$,,; ba}' $1.tmp
- sed -i ':a /^\n*$/ {$d; N; ba}' $1.tmp
-
- # Wrap the text.
- fmt -s -w 95 $1.tmp > $1.r
-}
+# The diff file is created in the directory where FILE0 resides.
+
+# ORIGIN OF THE SCRIPT
+# This script was extracted from GNUN's GNUmakefile (function
+# "mark-outdated"), and adapted.
+# GNUN (http://www.gnu.org/software/gnun/) is under GPLv3.
+
+# =============================================================================
+# Command-line arguments
+arg=($1 $2)
+
+set -e
+
+close_term () {
+ printf '\n%s' '*** Close the terminal window or press Return.'; read OK
+ test -z "$OK" && exit $1
+}
-page_reformat ${file}
-# Replace chevrons with HTML entities; the files are treated as simple text.
-sed "s/</\</g;s/>/\>/g" ${file}.r > ${file}.tmp
-page_reformat ${file1}
-sed "s/</\</g;s/>/\>/g" ${file1}.r > ${file1}.tmp
+f[0]=$(mktemp -t cdif.XXXXXX) || close_or_exit 1
+f[1]=$(mktemp -t cdif.XXXXXX) || close_or_exit 1
+trap 'rm -f "${f[0]}" "${f[1]}"' EXIT
+
+## Prepare the pages to be compared.
+
+for n in 0 1; do
+ # Input a valid file.
+ input=${arg[$n]}
+ if test ! -f "$input" -o ! -s "$input"; then
+ echo "*** color-wdiff - Please enter file $n."; read input
+ input=${input%\'}; input=${input#\'}
+ test -f "$input" -a -s "$input" \
+ || (echo 1>&2 "!!! This file doesn't exist or is empty."; close_term 1)
+ fi
+
+ # Name the diff after file 0.
+ test "$n" == "0" && diff_file=${input%.html}-diff.html
+
+ # If the file is an HTML but not a diff, process it:
+ if test "${input%.html}" != "$input" -a "${input%-diff.html}" == "$input";
+ then
+ # - Standardize the format for easier reading of the diff.
+ if test -f "$HOME/bin/reformat-html"; then
+ $HOME/bin/reformat-html $input ${f[$n]}
+ else
+ cp $input ${f[$n]}
+ fi
+ # - Replace chevrons with HTML entities. The page becomes simple text.
+ sed -i "s/</\</g;s/>/\>/g" ${f[$n]}
+ fi
+done
+
+## Build the diff page.
# Add an HTML header to the wdiff output, with style for visualizing the
# insertions and deletions, and write the title of the page.
-cat > ${diff_file} << EOF
+cat > $diff_file << EOF
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<!-- Generated by GNUN -->
--end-delete '</strong></del></span>' \
--start-insert '<span class="inserted"><ins><em>' \
--end-insert '</em></ins></span>' \
- ${file}.tmp ${file1}.tmp >> $diff_file
+ ${f[0]} ${f[1]} >> $diff_file || true
# Add the closing tags.
-echo '</pre></body></html>' >> $diff_file
+echo '</pre></body></html>' >> ${diff_file}
-# Clean up.
-rm -f ${file}.tmp ${file1}.tmp ${file}.r ${file1}.r
+echo -e "\n The diff file is $diff_file."
+close_term 0