| 1 | #! /usr/bin/perl |
| 2 | |
| 3 | # $Cambridge: exim/doc/doc-docbook/TidyHTML-filter,v 1.3 2006/02/01 11:01:01 ph10 Exp $ |
| 4 | |
| 5 | # Script to tidy up the filter HTML file that is generated by xmlto. The |
| 6 | # following changes are made: |
| 7 | # |
| 8 | # 1. Split very long lines. |
| 9 | # 2. Create reverse links from chapter and section titles back to the TOC. |
| 10 | # 3. Turn <div class="literallayout"><p> into <div class="literallayout"> and |
| 11 | # a matching </p></div> into </div> to get rid of unwanted vertical white |
| 12 | # space. |
| 13 | |
| 14 | |
| 15 | $tocref = 1; |
| 16 | $thisdiv = 0; |
| 17 | |
| 18 | # Read in the filter.html file. |
| 19 | |
| 20 | open(IN, "filter.html") || die "Failed to open filter.html for reading: $!\n"; |
| 21 | @text = <IN>; |
| 22 | close(IN); |
| 23 | |
| 24 | # Insert a newline after every > in the toc, because the whole toc is generated |
| 25 | # as one humungous line that is hard to check. Indeed, the start of the first |
| 26 | # chapter is also on the line, so we have to split if off first. Having |
| 27 | # inserted newlines, we split the toc into separate items in the vector. |
| 28 | |
| 29 | for ($i = 0; $i < scalar(@text); $i++) |
| 30 | { |
| 31 | if ($text[$i] =~ ?<title>Exim's interfaces to mail filtering</title>?) |
| 32 | { |
| 33 | splice @text, $i, 1, (split /(?=<div class="chapter")/, $text[$i]); |
| 34 | $text[$i] =~ s/>\s*/>\n/g; |
| 35 | splice @text, $i, 1, (split /(?<=\n)/, $text[$i]); |
| 36 | last; |
| 37 | } |
| 38 | } |
| 39 | |
| 40 | # We want to create reverse links from each chapter and section title back to |
| 41 | # the relevant place in the TOC. Scan the TOC for the relevant entries. Add |
| 42 | # an id to each entry, and create tables that remember the new link ids. We |
| 43 | # detect the start of the TOC by <div class="toc" and the end of the TOC by |
| 44 | # <div class="chapter". |
| 45 | |
| 46 | # Skip to start of TOC |
| 47 | |
| 48 | for ($i = 0; $i < scalar(@text); $i++) |
| 49 | { |
| 50 | last if $text[$i] =~ /^<div class="toc"/; |
| 51 | } |
| 52 | |
| 53 | # Scan the TOC |
| 54 | |
| 55 | for (; $i < scalar(@text); $i++) |
| 56 | { |
| 57 | last if $text[$i] =~ /^<div class="chapter"/; |
| 58 | if ($text[$i] =~ /^<a href="(#[^"]+)">/) |
| 59 | { |
| 60 | my($ss) = $1; |
| 61 | my($id) = sprintf "%04d", $tocref++; |
| 62 | $text[$i] =~ s/<a/<a id="toc$id"/; |
| 63 | $backref{"$ss"} = "toc$id"; |
| 64 | } |
| 65 | } |
| 66 | |
| 67 | # Scan remainder of the document |
| 68 | |
| 69 | for (; $i < scalar(@text); $i++) |
| 70 | { |
| 71 | while ($text[$i] =~ |
| 72 | /^(.*)<a( xmlns="[^"]+")? id="([^"]+)"><\/a>(.*?)<\/h(.*)/) |
| 73 | { |
| 74 | my($ref) = $backref{"#$2"}; |
| 75 | $text[$i] = "$1<a$2 href=\"#$ref\" id=\"$3\">$4</a></h$5"; |
| 76 | } |
| 77 | |
| 78 | if ($text[$i] =~ /^(.*)<div class="literallayout"><p>(?:<br \/>)?(.*)/) |
| 79 | { |
| 80 | my($j); |
| 81 | $text[$i] = "$1<div class=\"literallayout\">$2"; |
| 82 | |
| 83 | for ($j = $i + 1; $j < scalar(@text); $j++) |
| 84 | { |
| 85 | if ($text[$j] =~ /^<\/p><\/div>/) |
| 86 | { |
| 87 | $text[$j] =~ s/<\/p>//; |
| 88 | last; |
| 89 | } |
| 90 | } |
| 91 | } |
| 92 | } |
| 93 | |
| 94 | # Write out the revised file |
| 95 | |
| 96 | open(OUT, ">filter.html") || die "Failed to open filter.html for writing: $!\n"; |
| 97 | print OUT @text; |
| 98 | close(OUT); |
| 99 | |
| 100 | # End |