#! /usr/bin/perl # $Cambridge: exim/doc/doc-docbook/TidyHTML-filter,v 1.3 2006/02/01 11:01:01 ph10 Exp $ # Script to tidy up the filter HTML file that is generated by xmlto. The # following changes are made: # # 1. Split very long lines. # 2. Create reverse links from chapter and section titles back to the TOC. # 3. Turn

into

and # a matching

into
to get rid of unwanted vertical white # space. $tocref = 1; $thisdiv = 0; # Read in the filter.html file. open(IN, "filter.html") || die "Failed to open filter.html for reading: $!\n"; @text = ; close(IN); # Insert a newline after every > in the toc, because the whole toc is generated # as one humungous line that is hard to check. Indeed, the start of the first # chapter is also on the line, so we have to split if off first. Having # inserted newlines, we split the toc into separate items in the vector. for ($i = 0; $i < scalar(@text); $i++) { if ($text[$i] =~ ?Exim's interfaces to mail filtering?) { splice @text, $i, 1, (split /(?=
\s*/>\n/g; splice @text, $i, 1, (split /(?<=\n)/, $text[$i]); last; } } # We want to create reverse links from each chapter and section title back to # the relevant place in the TOC. Scan the TOC for the relevant entries. Add # an id to each entry, and create tables that remember the new link ids. We # detect the start of the TOC by
/) { my($ss) = $1; my($id) = sprintf "%04d", $tocref++; $text[$i] =~ s/<\/a>(.*?)<\/h(.*)/) { my($ref) = $backref{"#$2"}; $text[$i] = "$1$4

(?:
)?(.*)/) { my($j); $text[$i] = "$1

$2"; for ($j = $i + 1; $j < scalar(@text); $j++) { if ($text[$j] =~ /^<\/p><\/div>/) { $text[$j] =~ s/<\/p>//; last; } } } } # Write out the revised file open(OUT, ">filter.html") || die "Failed to open filter.html for writing: $!\n"; print OUT @text; close(OUT); # End