Commit | Line | Data |
---|---|---|
168e428f PH |
1 | #! /usr/bin/perl |
2 | ||
3 | # $Cambridge: exim/doc/doc-docbook/TidyHTML-filter,v 1.1 2005/06/16 10:32:31 ph10 Exp $ | |
4 | ||
5 | # Script to tidy up the filter HTML file that is generated by xmlto. The | |
6 | # following changes are made: | |
7 | # | |
8 | # 1. Split very long lines. | |
9 | # 2. Create reverse links from chapter and section titles back to the TOC. | |
10 | ||
11 | ||
12 | $tocref = 1; | |
13 | ||
14 | # Read in the filter.html file. | |
15 | ||
16 | open(IN, "filter.html") || die "Failed to open filter.html for reading: $!\n"; | |
17 | @text = <IN>; | |
18 | close(IN); | |
19 | ||
20 | # Insert a newline after every > because the whole toc is generated as one | |
21 | # humungous line that is hard to check. Then split the lines so that each one | |
22 | # is a separate element in the vector. | |
23 | ||
24 | foreach $line (@text) { $line =~ s/>\s*/>\n/g; } | |
25 | for ($i = 0; $i < scalar(@text); $i++) | |
26 | { splice @text, $i, 1, (split /(?<=\n)/, $text[$i]); } | |
27 | ||
28 | # We want to create reverse links from each chapter and section title back to | |
29 | # the relevant place in the TOC. Scan the TOC for the relevant entries. Add | |
30 | # an id to each entry, and create tables that remember the new link ids. We | |
31 | # detect the start of the TOC by <div class="toc" and the end of the TOC by | |
32 | # <div class="chapter". | |
33 | ||
34 | # Skip to start of TOC | |
35 | ||
36 | for ($i = 0; $i < scalar(@text); $i++) | |
37 | { | |
38 | last if $text[$i] =~ /^<div class="toc"/; | |
39 | } | |
40 | ||
41 | # Scan the TOC | |
42 | ||
43 | for (; $i < scalar(@text); $i++) | |
44 | { | |
45 | last if $text[$i] =~ /^<div class="chapter"/; | |
46 | if ($text[$i] =~ /^<a href="(#[^"]+)">/) | |
47 | { | |
48 | my($ss) = $1; | |
49 | my($id) = sprintf "%04d", $tocref++; | |
50 | $text[$i] =~ s/<a/<a id="toc$id"/; | |
51 | $backref{"$ss"} = "toc$id"; | |
52 | } | |
53 | } | |
54 | ||
55 | # Scan remainder of the document | |
56 | ||
57 | for (; $i < scalar(@text); $i++) | |
58 | { | |
59 | if ($text[$i] =~ /^<h[23] /) | |
60 | { | |
61 | $i++; | |
62 | if ($text[$i] =~ /^<a( xmlns="[^"]+")? id="([^"]+)">$/) | |
63 | { | |
64 | my($ref) = $backref{"#$2"}; | |
65 | $text[$i++] = "<a$1 href=\"#$ref\" id=\"$2\">\n"; | |
66 | my($temp) = $text[$i]; | |
67 | $text[$i] = $text[$i+1]; | |
68 | $text[++$i] = $temp; | |
69 | } | |
70 | } | |
71 | } | |
72 | ||
73 | # Write out the revised file | |
74 | ||
75 | open(OUT, ">filter.html") || die "Failed to open filter.html for writing: $!\n"; | |
76 | print OUT @text; | |
77 | close(OUT); | |
78 | ||
79 | # End |