Commit | Line | Data |
---|---|---|
168e428f PH |
1 | #! /usr/bin/perl |
2 | ||
068aaea8 | 3 | # $Cambridge: exim/doc/doc-docbook/TidyHTML-spec,v 1.2 2005/11/10 12:30:13 ph10 Exp $ |
168e428f PH |
4 | |
5 | # Script to tidy up the spec HTML files that are generated by xmlto. The | |
6 | # following changes are made: | |
7 | # | |
8 | # 1. Tidy the index.html file by splitting the very long lines. | |
9 | # 2. Create reverse links from chapter and section titles back to the TOC. | |
10 | # 3. Tidy the ix01.html file - the actual index - by splitting long lines. | |
11 | # 4. Insert links from the letter divisions to the top of the Index. | |
068aaea8 PH |
12 | # 5. Turn <div class="literallayout"><p> into <div class="literallayout"> and |
13 | # a matching </p></div> into </div> to get rid of unwanted vertical white | |
14 | # space. | |
15 | # 6. Before each occurrence of </td> insert so that the table's cell | |
16 | # is a little bit wider than the text itself. | |
168e428f PH |
17 | |
18 | chdir "spec.html"; | |
19 | ||
20 | $tocref = 1; | |
21 | ||
22 | # Read in the index.html file. It's really the TOC. | |
23 | ||
24 | open(IN, "index.html") || die "Failed to open index.html for reading: $!\n"; | |
25 | @toc = <IN>; | |
26 | close(IN); | |
27 | ||
28 | # Insert a newline after every > because the whole toc is generated as one | |
29 | # humungous line that is hard to check. Then split the lines so that each one | |
30 | # is a separate element in the vector. | |
31 | ||
32 | foreach $line (@toc) { $line =~ s/>\s*/>\n/g; } | |
33 | for ($i = 0; $i < scalar(@toc); $i++) | |
34 | { splice @toc, $i, 1, (split /(?<=\n)/, $toc[$i]); } | |
35 | ||
36 | # We want to create reverse links from each chapter and section title back to | |
37 | # the relevant place in the TOC. Scan the TOC for the relevant entries. Add | |
38 | # an id to each entry, and create tables that remember the file names and the | |
39 | # new link ids. | |
40 | ||
41 | foreach $line (@toc) | |
42 | { | |
43 | if ($line =~ /^<a href="((?:ch|ix)\d+\.html)(#[^"]+)?">/) | |
44 | { | |
45 | my($chix) = $1; | |
46 | my($ss) = $2; | |
47 | my($id) = sprintf "%04d", $tocref++; | |
48 | $line =~ s/<a/<a id="toc$id"/; | |
49 | $backref{"$chix$ss"} = "toc$id"; | |
50 | push @chlist, $chix; | |
51 | } | |
52 | } | |
53 | ||
54 | # Write out the modified index.html file. | |
55 | ||
56 | open (OUT, ">index.html") || die "Failed to open index.html for writing: $!\n"; | |
57 | print OUT @toc; | |
58 | close(OUT); | |
59 | ||
068aaea8 PH |
60 | # Now scan each of the other page files and insert the reverse links. While |
61 | # we are at it, we tidy up <div class="literallayout"> by removing unwanted | |
62 | # paragraph marks, which generate unwanted vertical space. We also insert | |
63 | # before </td> to push table cells apart from each other. | |
168e428f PH |
64 | |
65 | foreach $file (@chlist) | |
66 | { | |
67 | open(IN, "$file") || die "Failed to open $file for reading: $!\n"; | |
68 | @text = <IN>; | |
69 | close(IN); | |
70 | ||
068aaea8 PH |
71 | # Insert a newline after certain elements, and split the lines so that each |
72 | # one is a separate element in the vector. This makes it easier to recognize | |
73 | # these elements. | |
74 | ||
168e428f PH |
75 | foreach $line (@text) |
76 | { | |
068aaea8 PH |
77 | $line =~ s/<p>\s*(?!\n)/<p>\n/g; |
78 | $line =~ s/<\/p>\s*(?!\n)/<\/p>\n/g; | |
79 | $line =~ s/<\/div>\s*(?!\n)/<\/div>\n/g; | |
80 | $line =~ s/<div([^>]*)>(?!\n)/<div$1>\n/g; | |
81 | } | |
82 | ||
83 | for ($i = 0; $i < scalar(@text); $i++) | |
84 | { splice @text, $i, 1, (split /(?<=\n)/, $text[$i]); } | |
85 | ||
86 | $thisdiv = 0; | |
87 | ||
88 | for ($i = 0; $i < scalar(@text); $i++) | |
89 | { | |
90 | if ($text[$i] =~ /^(.*?)<a( xmlns="[^"]+")? id="([^"]+)"><\/a>(.+?)<\/h(.*)$/) | |
168e428f PH |
91 | { |
92 | my($pre, $opt, $id, $title, $post) = ($1, $2, $3, $4, $5); | |
93 | ||
94 | # Section reference | |
95 | my($ref) = $backref{"$file#$id"}; | |
96 | ||
97 | # If not found, try for a chapter reference | |
98 | $ref = $backref{"$file"} if !defined $ref; | |
99 | ||
100 | # Adjust the line | |
068aaea8 PH |
101 | $text[$i]= "$pre<a$opt href=\"index.html#$ref\" id=\"$id\">$title</a></h$post"; |
102 | } | |
103 | ||
104 | elsif ($text[$i] eq "<div class=\"literallayout\">\n" && $text[$i+1] eq "<p>\n") | |
105 | { | |
106 | $text[++$i] = ""; | |
107 | $thisdiv = 1; | |
108 | } | |
109 | elsif ($thisdiv && $text[$i] eq "</p>\n" && $text[$i+1] eq "</div>\n") | |
110 | { | |
111 | $text[$i] = ""; | |
112 | $thisdiv = 0; | |
113 | } | |
114 | elsif ($text[$i] =~ /^\s*<\/td>/) | |
115 | { | |
116 | $text[$i] = " $text[$i]"; | |
168e428f PH |
117 | } |
118 | } | |
119 | ||
120 | open(OUT, ">$file") || die "Failed to open $file for writing: $!\n"; | |
121 | print OUT @text; | |
122 | close(OUT); | |
123 | } | |
124 | ||
125 | # Now process the ix01.html file | |
126 | ||
127 | open(IN, "ix01.html") || die "Failed to open ix01.html for reading: $!\n"; | |
128 | @index = <IN>; | |
129 | close(IN); | |
130 | ||
131 | # Insert a newline after every > because the whole index is generated as one | |
132 | # humungous line that is hard to check. Then split the lines so that each one | |
133 | # is a separate element in the vector. | |
134 | ||
135 | foreach $line (@index) { $line =~ s/>\s*/>\n/g; } | |
136 | for ($i = 0; $i < scalar(@index); $i++) | |
137 | { splice @index, $i, 1, (split /(?<=\n)/, $index[$i]); } | |
138 | ||
139 | # We want to add a list of letters at the top of the index, and link back | |
140 | # to them from each letter heading. First find the index title and remember | |
141 | # where to insert the list of letters. | |
142 | ||
143 | for ($i = 0; $i < scalar(@index); $i++) | |
144 | { | |
145 | if ($index[$i] =~ /^<\/h2>$/) | |
146 | { | |
147 | $listindex = $i; | |
148 | last; | |
149 | } | |
150 | } | |
151 | ||
152 | # Now scan through for the letter headings and build the cross references, | |
153 | # while also building up the list to insert. | |
154 | ||
155 | $list = "<h4>\n"; | |
156 | for (; $i < scalar(@index); $i++) | |
157 | { | |
158 | if ($index[$i] =~ /^(.)<\/h3>$/) | |
159 | { | |
160 | $letter = $1; | |
161 | $index[$i-1] =~ s/^/<a id="${letter}B" href="#${letter}T">/; | |
162 | $index[$i] =~ s/$/<\/a>/; | |
163 | $list .= "<a id=\"${letter}T\" href=\"#${letter}B\"> $letter</a>\n"; | |
164 | } | |
165 | } | |
166 | ||
167 | # Now we know which letters we have, we can insert the list. | |
168 | ||
169 | $list .= "</h4>\n"; | |
170 | splice @index, $listindex, 0, $list; | |
171 | ||
172 | # Write out the modified index.html file. | |
173 | ||
174 | open (OUT, ">ix01.html") || die "Failed to open ix01.html for writing: $!\n"; | |
175 | print OUT @index; | |
176 | close(OUT); | |
177 | ||
178 | ||
179 | # End |