Commit | Line | Data |
---|---|---|
168e428f PH |
1 | #! /usr/bin/perl |
2 | ||
9b371988 | 3 | # $Cambridge: exim/doc/doc-docbook/TidyHTML-filter,v 1.3 2006/02/01 11:01:01 ph10 Exp $ |
168e428f PH |
4 | |
5 | # Script to tidy up the filter HTML file that is generated by xmlto. The | |
6 | # following changes are made: | |
7 | # | |
8 | # 1. Split very long lines. | |
9 | # 2. Create reverse links from chapter and section titles back to the TOC. | |
068aaea8 PH |
10 | # 3. Turn <div class="literallayout"><p> into <div class="literallayout"> and |
11 | # a matching </p></div> into </div> to get rid of unwanted vertical white | |
12 | # space. | |
168e428f PH |
13 | |
14 | ||
15 | $tocref = 1; | |
068aaea8 | 16 | $thisdiv = 0; |
168e428f PH |
17 | |
18 | # Read in the filter.html file. | |
19 | ||
20 | open(IN, "filter.html") || die "Failed to open filter.html for reading: $!\n"; | |
21 | @text = <IN>; | |
22 | close(IN); | |
23 | ||
9b371988 PH |
24 | # Insert a newline after every > in the toc, because the whole toc is generated |
25 | # as one humungous line that is hard to check. Indeed, the start of the first | |
26 | # chapter is also on the line, so we have to split if off first. Having | |
27 | # inserted newlines, we split the toc into separate items in the vector. | |
168e428f | 28 | |
168e428f | 29 | for ($i = 0; $i < scalar(@text); $i++) |
9b371988 PH |
30 | { |
31 | if ($text[$i] =~ ?<title>Exim's interfaces to mail filtering</title>?) | |
32 | { | |
33 | splice @text, $i, 1, (split /(?=<div class="chapter")/, $text[$i]); | |
34 | $text[$i] =~ s/>\s*/>\n/g; | |
35 | splice @text, $i, 1, (split /(?<=\n)/, $text[$i]); | |
36 | last; | |
37 | } | |
38 | } | |
168e428f PH |
39 | |
40 | # We want to create reverse links from each chapter and section title back to | |
41 | # the relevant place in the TOC. Scan the TOC for the relevant entries. Add | |
42 | # an id to each entry, and create tables that remember the new link ids. We | |
43 | # detect the start of the TOC by <div class="toc" and the end of the TOC by | |
44 | # <div class="chapter". | |
45 | ||
46 | # Skip to start of TOC | |
47 | ||
48 | for ($i = 0; $i < scalar(@text); $i++) | |
49 | { | |
50 | last if $text[$i] =~ /^<div class="toc"/; | |
51 | } | |
52 | ||
53 | # Scan the TOC | |
54 | ||
55 | for (; $i < scalar(@text); $i++) | |
56 | { | |
57 | last if $text[$i] =~ /^<div class="chapter"/; | |
58 | if ($text[$i] =~ /^<a href="(#[^"]+)">/) | |
59 | { | |
60 | my($ss) = $1; | |
61 | my($id) = sprintf "%04d", $tocref++; | |
62 | $text[$i] =~ s/<a/<a id="toc$id"/; | |
63 | $backref{"$ss"} = "toc$id"; | |
64 | } | |
65 | } | |
66 | ||
67 | # Scan remainder of the document | |
68 | ||
69 | for (; $i < scalar(@text); $i++) | |
70 | { | |
9b371988 PH |
71 | while ($text[$i] =~ |
72 | /^(.*)<a( xmlns="[^"]+")? id="([^"]+)"><\/a>(.*?)<\/h(.*)/) | |
068aaea8 | 73 | { |
9b371988 PH |
74 | my($ref) = $backref{"#$2"}; |
75 | $text[$i] = "$1<a$2 href=\"#$ref\" id=\"$3\">$4</a></h$5"; | |
068aaea8 | 76 | } |
9b371988 PH |
77 | |
78 | if ($text[$i] =~ /^(.*)<div class="literallayout"><p>(?:<br \/>)?(.*)/) | |
168e428f | 79 | { |
9b371988 PH |
80 | my($j); |
81 | $text[$i] = "$1<div class=\"literallayout\">$2"; | |
82 | ||
83 | for ($j = $i + 1; $j < scalar(@text); $j++) | |
168e428f | 84 | { |
9b371988 PH |
85 | if ($text[$j] =~ /^<\/p><\/div>/) |
86 | { | |
87 | $text[$j] =~ s/<\/p>//; | |
88 | last; | |
89 | } | |
168e428f PH |
90 | } |
91 | } | |
92 | } | |
93 | ||
94 | # Write out the revised file | |
95 | ||
96 | open(OUT, ">filter.html") || die "Failed to open filter.html for writing: $!\n"; | |
97 | print OUT @text; | |
98 | close(OUT); | |
99 | ||
100 | # End |