05a9d826b5b4e3d43a79b58b9e7005898e612360
[exim.git] / doc / doc-docbook / TidyHTML-spec
1 #! /usr/bin/perl
2
3 # $Cambridge: exim/doc/doc-docbook/TidyHTML-spec,v 1.2 2005/11/10 12:30:13 ph10 Exp $
4
5 # Script to tidy up the spec HTML files that are generated by xmlto. The
6 # following changes are made:
7 #
8 # 1. Tidy the index.html file by splitting the very long lines.
9 # 2. Create reverse links from chapter and section titles back to the TOC.
10 # 3. Tidy the ix01.html file - the actual index - by splitting long lines.
11 # 4. Insert links from the letter divisions to the top of the Index.
12 # 5. Turn <div class="literallayout"><p> into <div class="literallayout"> and
13 # a matching </p></div> into </div> to get rid of unwanted vertical white
14 # space.
15 # 6. Before each occurrence of </td> insert &nbsp; so that the table's cell
16 # is a little bit wider than the text itself.
17
18 chdir "spec.html";
19
20 $tocref = 1;
21
22 # Read in the index.html file. It's really the TOC.
23
24 open(IN, "index.html") || die "Failed to open index.html for reading: $!\n";
25 @toc = <IN>;
26 close(IN);
27
28 # Insert a newline after every > because the whole toc is generated as one
29 # humungous line that is hard to check. Then split the lines so that each one
30 # is a separate element in the vector.
31
32 foreach $line (@toc) { $line =~ s/>\s*/>\n/g; }
33 for ($i = 0; $i < scalar(@toc); $i++)
34 { splice @toc, $i, 1, (split /(?<=\n)/, $toc[$i]); }
35
36 # We want to create reverse links from each chapter and section title back to
37 # the relevant place in the TOC. Scan the TOC for the relevant entries. Add
38 # an id to each entry, and create tables that remember the file names and the
39 # new link ids.
40
41 foreach $line (@toc)
42 {
43 if ($line =~ /^<a href="((?:ch|ix)\d+\.html)(#[^"]+)?">/)
44 {
45 my($chix) = $1;
46 my($ss) = $2;
47 my($id) = sprintf "%04d", $tocref++;
48 $line =~ s/<a/<a id="toc$id"/;
49 $backref{"$chix$ss"} = "toc$id";
50 push @chlist, $chix;
51 }
52 }
53
54 # Write out the modified index.html file.
55
56 open (OUT, ">index.html") || die "Failed to open index.html for writing: $!\n";
57 print OUT @toc;
58 close(OUT);
59
60 # Now scan each of the other page files and insert the reverse links. While
61 # we are at it, we tidy up <div class="literallayout"> by removing unwanted
62 # paragraph marks, which generate unwanted vertical space. We also insert
63 # &nbsp; before </td> to push table cells apart from each other.
64
65 foreach $file (@chlist)
66 {
67 open(IN, "$file") || die "Failed to open $file for reading: $!\n";
68 @text = <IN>;
69 close(IN);
70
71 # Insert a newline after certain elements, and split the lines so that each
72 # one is a separate element in the vector. This makes it easier to recognize
73 # these elements.
74
75 foreach $line (@text)
76 {
77 $line =~ s/<p>\s*(?!\n)/<p>\n/g;
78 $line =~ s/<\/p>\s*(?!\n)/<\/p>\n/g;
79 $line =~ s/<\/div>\s*(?!\n)/<\/div>\n/g;
80 $line =~ s/<div([^>]*)>(?!\n)/<div$1>\n/g;
81 }
82
83 for ($i = 0; $i < scalar(@text); $i++)
84 { splice @text, $i, 1, (split /(?<=\n)/, $text[$i]); }
85
86 $thisdiv = 0;
87
88 for ($i = 0; $i < scalar(@text); $i++)
89 {
90 if ($text[$i] =~ /^(.*?)<a( xmlns="[^"]+")? id="([^"]+)"><\/a>(.+?)<\/h(.*)$/)
91 {
92 my($pre, $opt, $id, $title, $post) = ($1, $2, $3, $4, $5);
93
94 # Section reference
95 my($ref) = $backref{"$file#$id"};
96
97 # If not found, try for a chapter reference
98 $ref = $backref{"$file"} if !defined $ref;
99
100 # Adjust the line
101 $text[$i]= "$pre<a$opt href=\"index.html#$ref\" id=\"$id\">$title</a></h$post";
102 }
103
104 elsif ($text[$i] eq "<div class=\"literallayout\">\n" && $text[$i+1] eq "<p>\n")
105 {
106 $text[++$i] = "";
107 $thisdiv = 1;
108 }
109 elsif ($thisdiv && $text[$i] eq "</p>\n" && $text[$i+1] eq "</div>\n")
110 {
111 $text[$i] = "";
112 $thisdiv = 0;
113 }
114 elsif ($text[$i] =~ /^\s*<\/td>/)
115 {
116 $text[$i] = "&nbsp;$text[$i]";
117 }
118 }
119
120 open(OUT, ">$file") || die "Failed to open $file for writing: $!\n";
121 print OUT @text;
122 close(OUT);
123 }
124
125 # Now process the ix01.html file
126
127 open(IN, "ix01.html") || die "Failed to open ix01.html for reading: $!\n";
128 @index = <IN>;
129 close(IN);
130
131 # Insert a newline after every > because the whole index is generated as one
132 # humungous line that is hard to check. Then split the lines so that each one
133 # is a separate element in the vector.
134
135 foreach $line (@index) { $line =~ s/>\s*/>\n/g; }
136 for ($i = 0; $i < scalar(@index); $i++)
137 { splice @index, $i, 1, (split /(?<=\n)/, $index[$i]); }
138
139 # We want to add a list of letters at the top of the index, and link back
140 # to them from each letter heading. First find the index title and remember
141 # where to insert the list of letters.
142
143 for ($i = 0; $i < scalar(@index); $i++)
144 {
145 if ($index[$i] =~ /^<\/h2>$/)
146 {
147 $listindex = $i;
148 last;
149 }
150 }
151
152 # Now scan through for the letter headings and build the cross references,
153 # while also building up the list to insert.
154
155 $list = "<h4>\n";
156 for (; $i < scalar(@index); $i++)
157 {
158 if ($index[$i] =~ /^(.)<\/h3>$/)
159 {
160 $letter = $1;
161 $index[$i-1] =~ s/^/<a id="${letter}B" href="#${letter}T">/;
162 $index[$i] =~ s/$/<\/a>/;
163 $list .= "<a id=\"${letter}T\" href=\"#${letter}B\"> $letter</a>\n";
164 }
165 }
166
167 # Now we know which letters we have, we can insert the list.
168
169 $list .= "</h4>\n";
170 splice @index, $listindex, 0, $list;
171
172 # Write out the modified index.html file.
173
174 open (OUT, ">ix01.html") || die "Failed to open ix01.html for writing: $!\n";
175 print OUT @index;
176 close(OUT);
177
178
179 # End