Commit | Line | Data |
---|---|---|
168e428f PH |
1 | #! /usr/bin/perl |
2 | ||
3 | # $Cambridge: exim/doc/doc-docbook/TidyHTML-spec,v 1.1 2005/06/16 10:32:31 ph10 Exp $ | |
4 | ||
5 | # Script to tidy up the spec HTML files that are generated by xmlto. The | |
6 | # following changes are made: | |
7 | # | |
8 | # 1. Tidy the index.html file by splitting the very long lines. | |
9 | # 2. Create reverse links from chapter and section titles back to the TOC. | |
10 | # 3. Tidy the ix01.html file - the actual index - by splitting long lines. | |
11 | # 4. Insert links from the letter divisions to the top of the Index. | |
12 | ||
13 | chdir "spec.html"; | |
14 | ||
15 | $tocref = 1; | |
16 | ||
17 | # Read in the index.html file. It's really the TOC. | |
18 | ||
19 | open(IN, "index.html") || die "Failed to open index.html for reading: $!\n"; | |
20 | @toc = <IN>; | |
21 | close(IN); | |
22 | ||
23 | # Insert a newline after every > because the whole toc is generated as one | |
24 | # humungous line that is hard to check. Then split the lines so that each one | |
25 | # is a separate element in the vector. | |
26 | ||
27 | foreach $line (@toc) { $line =~ s/>\s*/>\n/g; } | |
28 | for ($i = 0; $i < scalar(@toc); $i++) | |
29 | { splice @toc, $i, 1, (split /(?<=\n)/, $toc[$i]); } | |
30 | ||
31 | # We want to create reverse links from each chapter and section title back to | |
32 | # the relevant place in the TOC. Scan the TOC for the relevant entries. Add | |
33 | # an id to each entry, and create tables that remember the file names and the | |
34 | # new link ids. | |
35 | ||
36 | foreach $line (@toc) | |
37 | { | |
38 | if ($line =~ /^<a href="((?:ch|ix)\d+\.html)(#[^"]+)?">/) | |
39 | { | |
40 | my($chix) = $1; | |
41 | my($ss) = $2; | |
42 | my($id) = sprintf "%04d", $tocref++; | |
43 | $line =~ s/<a/<a id="toc$id"/; | |
44 | $backref{"$chix$ss"} = "toc$id"; | |
45 | push @chlist, $chix; | |
46 | } | |
47 | } | |
48 | ||
49 | # Write out the modified index.html file. | |
50 | ||
51 | open (OUT, ">index.html") || die "Failed to open index.html for writing: $!\n"; | |
52 | print OUT @toc; | |
53 | close(OUT); | |
54 | ||
55 | # Now scan each of the other page files and insert the reverse links. | |
56 | ||
57 | foreach $file (@chlist) | |
58 | { | |
59 | open(IN, "$file") || die "Failed to open $file for reading: $!\n"; | |
60 | @text = <IN>; | |
61 | close(IN); | |
62 | ||
63 | foreach $line (@text) | |
64 | { | |
65 | if ($line =~ /^(.*?)<a( xmlns="[^"]+")? id="([^"]+)"><\/a>(.+?)<\/h(.*)$/) | |
66 | { | |
67 | my($pre, $opt, $id, $title, $post) = ($1, $2, $3, $4, $5); | |
68 | ||
69 | # Section reference | |
70 | my($ref) = $backref{"$file#$id"}; | |
71 | ||
72 | # If not found, try for a chapter reference | |
73 | $ref = $backref{"$file"} if !defined $ref; | |
74 | ||
75 | # Adjust the line | |
76 | $line = "$pre<a$opt href=\"index.html#$ref\" id=\"$id\">$title</a></h$post"; | |
77 | } | |
78 | } | |
79 | ||
80 | open(OUT, ">$file") || die "Failed to open $file for writing: $!\n"; | |
81 | print OUT @text; | |
82 | close(OUT); | |
83 | } | |
84 | ||
85 | # Now process the ix01.html file | |
86 | ||
87 | open(IN, "ix01.html") || die "Failed to open ix01.html for reading: $!\n"; | |
88 | @index = <IN>; | |
89 | close(IN); | |
90 | ||
91 | # Insert a newline after every > because the whole index is generated as one | |
92 | # humungous line that is hard to check. Then split the lines so that each one | |
93 | # is a separate element in the vector. | |
94 | ||
95 | foreach $line (@index) { $line =~ s/>\s*/>\n/g; } | |
96 | for ($i = 0; $i < scalar(@index); $i++) | |
97 | { splice @index, $i, 1, (split /(?<=\n)/, $index[$i]); } | |
98 | ||
99 | # We want to add a list of letters at the top of the index, and link back | |
100 | # to them from each letter heading. First find the index title and remember | |
101 | # where to insert the list of letters. | |
102 | ||
103 | for ($i = 0; $i < scalar(@index); $i++) | |
104 | { | |
105 | if ($index[$i] =~ /^<\/h2>$/) | |
106 | { | |
107 | $listindex = $i; | |
108 | last; | |
109 | } | |
110 | } | |
111 | ||
112 | # Now scan through for the letter headings and build the cross references, | |
113 | # while also building up the list to insert. | |
114 | ||
115 | $list = "<h4>\n"; | |
116 | for (; $i < scalar(@index); $i++) | |
117 | { | |
118 | if ($index[$i] =~ /^(.)<\/h3>$/) | |
119 | { | |
120 | $letter = $1; | |
121 | $index[$i-1] =~ s/^/<a id="${letter}B" href="#${letter}T">/; | |
122 | $index[$i] =~ s/$/<\/a>/; | |
123 | $list .= "<a id=\"${letter}T\" href=\"#${letter}B\"> $letter</a>\n"; | |
124 | } | |
125 | } | |
126 | ||
127 | # Now we know which letters we have, we can insert the list. | |
128 | ||
129 | $list .= "</h4>\n"; | |
130 | splice @index, $listindex, 0, $list; | |
131 | ||
132 | # Write out the modified index.html file. | |
133 | ||
134 | open (OUT, ">ix01.html") || die "Failed to open ix01.html for writing: $!\n"; | |
135 | print OUT @index; | |
136 | close(OUT); | |
137 | ||
138 | ||
139 | # End |