Commit | Line | Data |
---|---|---|
168e428f PH |
1 | #! /usr/bin/perl |
2 | ||
068aaea8 | 3 | # $Cambridge: exim/doc/doc-docbook/TidyHTML-filter,v 1.2 2005/11/10 12:30:13 ph10 Exp $ |
168e428f PH |
4 | |
5 | # Script to tidy up the filter HTML file that is generated by xmlto. The | |
6 | # following changes are made: | |
7 | # | |
8 | # 1. Split very long lines. | |
9 | # 2. Create reverse links from chapter and section titles back to the TOC. | |
068aaea8 PH |
10 | # 3. Turn <div class="literallayout"><p> into <div class="literallayout"> and |
11 | # a matching </p></div> into </div> to get rid of unwanted vertical white | |
12 | # space. | |
168e428f PH |
13 | |
14 | ||
15 | $tocref = 1; | |
068aaea8 | 16 | $thisdiv = 0; |
168e428f PH |
17 | |
18 | # Read in the filter.html file. | |
19 | ||
20 | open(IN, "filter.html") || die "Failed to open filter.html for reading: $!\n"; | |
21 | @text = <IN>; | |
22 | close(IN); | |
23 | ||
24 | # Insert a newline after every > because the whole toc is generated as one | |
25 | # humungous line that is hard to check. Then split the lines so that each one | |
26 | # is a separate element in the vector. | |
27 | ||
28 | foreach $line (@text) { $line =~ s/>\s*/>\n/g; } | |
29 | for ($i = 0; $i < scalar(@text); $i++) | |
30 | { splice @text, $i, 1, (split /(?<=\n)/, $text[$i]); } | |
31 | ||
32 | # We want to create reverse links from each chapter and section title back to | |
33 | # the relevant place in the TOC. Scan the TOC for the relevant entries. Add | |
34 | # an id to each entry, and create tables that remember the new link ids. We | |
35 | # detect the start of the TOC by <div class="toc" and the end of the TOC by | |
36 | # <div class="chapter". | |
37 | ||
38 | # Skip to start of TOC | |
39 | ||
40 | for ($i = 0; $i < scalar(@text); $i++) | |
41 | { | |
42 | last if $text[$i] =~ /^<div class="toc"/; | |
43 | } | |
44 | ||
45 | # Scan the TOC | |
46 | ||
47 | for (; $i < scalar(@text); $i++) | |
48 | { | |
49 | last if $text[$i] =~ /^<div class="chapter"/; | |
50 | if ($text[$i] =~ /^<a href="(#[^"]+)">/) | |
51 | { | |
52 | my($ss) = $1; | |
53 | my($id) = sprintf "%04d", $tocref++; | |
54 | $text[$i] =~ s/<a/<a id="toc$id"/; | |
55 | $backref{"$ss"} = "toc$id"; | |
56 | } | |
57 | } | |
58 | ||
59 | # Scan remainder of the document | |
60 | ||
61 | for (; $i < scalar(@text); $i++) | |
62 | { | |
068aaea8 PH |
63 | if ($text[$i] eq "<div class=\"literallayout\">\n" && $text[$i+1] eq "<p>\n") |
64 | { | |
65 | $text[++$i] = ""; | |
66 | $thisdiv = 1; | |
67 | } | |
68 | elsif ($thisdiv && $text[$i] eq "</p>\n" && $text[$i+1] eq "</div>\n") | |
69 | { | |
70 | $text[$i] = ""; | |
71 | $thisdiv = 0; | |
72 | } | |
73 | elsif ($text[$i] =~ /^<h[23] /) | |
168e428f PH |
74 | { |
75 | $i++; | |
76 | if ($text[$i] =~ /^<a( xmlns="[^"]+")? id="([^"]+)">$/) | |
77 | { | |
78 | my($ref) = $backref{"#$2"}; | |
79 | $text[$i++] = "<a$1 href=\"#$ref\" id=\"$2\">\n"; | |
80 | my($temp) = $text[$i]; | |
81 | $text[$i] = $text[$i+1]; | |
82 | $text[++$i] = $temp; | |
83 | } | |
84 | } | |
85 | } | |
86 | ||
87 | # Write out the revised file | |
88 | ||
89 | open(OUT, ">filter.html") || die "Failed to open filter.html for writing: $!\n"; | |
90 | print OUT @text; | |
91 | close(OUT); | |
92 | ||
93 | # End |