Updated embedded PCRE to version 7.4 to avoid 2 CVE issues:-
[exim.git] / doc / doc-docbook / TidyHTML-spec
CommitLineData
168e428f
PH
1#! /usr/bin/perl
2
4f578862 3# $Cambridge: exim/doc/doc-docbook/TidyHTML-spec,v 1.4 2006/04/04 14:03:49 ph10 Exp $
168e428f
PH
4
5# Script to tidy up the spec HTML files that are generated by xmlto. The
6# following changes are made:
7#
8# 1. Tidy the index.html file by splitting the very long lines.
9# 2. Create reverse links from chapter and section titles back to the TOC.
10# 3. Tidy the ix01.html file - the actual index - by splitting long lines.
11# 4. Insert links from the letter divisions to the top of the Index.
068aaea8
PH
12# 5. Turn <div class="literallayout"><p> into <div class="literallayout"> and
13# a matching </p></div> into </div> to get rid of unwanted vertical white
14# space.
15# 6. Before each occurrence of </td> insert &nbsp; so that the table's cell
16# is a little bit wider than the text itself.
168e428f 17
4f578862 18chdir "spec_html";
168e428f
PH
19
20$tocref = 1;
21
22# Read in the index.html file. It's really the TOC.
23
24open(IN, "index.html") || die "Failed to open index.html for reading: $!\n";
25@toc = <IN>;
26close(IN);
27
4f578862
PH
28# Insert a newline after every > except when it is preceded by 'class="quote"',
29# because the whole toc is generated as one humungous line that is hard to
30# check. We have to avoid it in the quote case because that puts a space into
31# the output, and similarly for the </span> the comes afterwards. Easy way out
32# is just not to do it for all </span> occurrences. Unfortunately, Perl does
33# not implement lookbehinds where the alternatives are of different lengths, so
34# we have to take two passes.
35
36
37foreach $line (@toc)
38 {
39 $line =~ s/(?<!class="quote")>\s*/>\n/g;
40 $line =~ s/<\/span>\n/<\/span>/g;
41 }
42
43# Split the lines so that each one is a separate element in the vector.
168e428f 44
168e428f
PH
45for ($i = 0; $i < scalar(@toc); $i++)
46 { splice @toc, $i, 1, (split /(?<=\n)/, $toc[$i]); }
47
48# We want to create reverse links from each chapter and section title back to
49# the relevant place in the TOC. Scan the TOC for the relevant entries. Add
50# an id to each entry, and create tables that remember the file names and the
51# new link ids.
52
53foreach $line (@toc)
54 {
55 if ($line =~ /^<a href="((?:ch|ix)\d+\.html)(#[^"]+)?">/)
56 {
57 my($chix) = $1;
58 my($ss) = $2;
59 my($id) = sprintf "%04d", $tocref++;
60 $line =~ s/<a/<a id="toc$id"/;
61 $backref{"$chix$ss"} = "toc$id";
62 push @chlist, $chix;
63 }
64 }
65
66# Write out the modified index.html file.
67
68open (OUT, ">index.html") || die "Failed to open index.html for writing: $!\n";
69print OUT @toc;
70close(OUT);
71
068aaea8
PH
72# Now scan each of the other page files and insert the reverse links. While
73# we are at it, we tidy up <div class="literallayout"> by removing unwanted
74# paragraph marks, which generate unwanted vertical space. We also insert
75# &nbsp; before </td> to push table cells apart from each other.
168e428f
PH
76
77foreach $file (@chlist)
78 {
79 open(IN, "$file") || die "Failed to open $file for reading: $!\n";
80 @text = <IN>;
81 close(IN);
82
068aaea8
PH
83 # Insert a newline after certain elements, and split the lines so that each
84 # one is a separate element in the vector. This makes it easier to recognize
85 # these elements.
86
168e428f
PH
87 foreach $line (@text)
88 {
068aaea8
PH
89 $line =~ s/<p>\s*(?!\n)/<p>\n/g;
90 $line =~ s/<\/p>\s*(?!\n)/<\/p>\n/g;
91 $line =~ s/<\/div>\s*(?!\n)/<\/div>\n/g;
92 $line =~ s/<div([^>]*)>(?!\n)/<div$1>\n/g;
93 }
94
95 for ($i = 0; $i < scalar(@text); $i++)
96 { splice @text, $i, 1, (split /(?<=\n)/, $text[$i]); }
97
98 $thisdiv = 0;
99
100 for ($i = 0; $i < scalar(@text); $i++)
101 {
102 if ($text[$i] =~ /^(.*?)<a( xmlns="[^"]+")? id="([^"]+)"><\/a>(.+?)<\/h(.*)$/)
168e428f
PH
103 {
104 my($pre, $opt, $id, $title, $post) = ($1, $2, $3, $4, $5);
105
106 # Section reference
107 my($ref) = $backref{"$file#$id"};
108
109 # If not found, try for a chapter reference
110 $ref = $backref{"$file"} if !defined $ref;
111
112 # Adjust the line
068aaea8
PH
113 $text[$i]= "$pre<a$opt href=\"index.html#$ref\" id=\"$id\">$title</a></h$post";
114 }
115
9b371988 116 elsif ($text[$i] =~ /^<div [^>]*?class="literallayout">$/ && $text[$i+1] eq "<p>\n")
068aaea8
PH
117 {
118 $text[++$i] = "";
119 $thisdiv = 1;
120 }
121 elsif ($thisdiv && $text[$i] eq "</p>\n" && $text[$i+1] eq "</div>\n")
122 {
123 $text[$i] = "";
124 $thisdiv = 0;
125 }
126 elsif ($text[$i] =~ /^\s*<\/td>/)
127 {
128 $text[$i] = "&nbsp;$text[$i]";
168e428f
PH
129 }
130 }
131
132 open(OUT, ">$file") || die "Failed to open $file for writing: $!\n";
133 print OUT @text;
134 close(OUT);
135 }
136
137# Now process the ix01.html file
138
139open(IN, "ix01.html") || die "Failed to open ix01.html for reading: $!\n";
140@index = <IN>;
141close(IN);
142
143# Insert a newline after every > because the whole index is generated as one
144# humungous line that is hard to check. Then split the lines so that each one
145# is a separate element in the vector.
146
147foreach $line (@index) { $line =~ s/>\s*/>\n/g; }
148for ($i = 0; $i < scalar(@index); $i++)
149 { splice @index, $i, 1, (split /(?<=\n)/, $index[$i]); }
150
151# We want to add a list of letters at the top of the index, and link back
152# to them from each letter heading. First find the index title and remember
153# where to insert the list of letters.
154
155for ($i = 0; $i < scalar(@index); $i++)
156 {
157 if ($index[$i] =~ /^<\/h2>$/)
158 {
159 $listindex = $i;
160 last;
161 }
162 }
163
164# Now scan through for the letter headings and build the cross references,
165# while also building up the list to insert.
166
167$list = "<h4>\n";
168for (; $i < scalar(@index); $i++)
169 {
170 if ($index[$i] =~ /^(.)<\/h3>$/)
171 {
172 $letter = $1;
173 $index[$i-1] =~ s/^/<a id="${letter}B" href="#${letter}T">/;
174 $index[$i] =~ s/$/<\/a>/;
175 $list .= "<a id=\"${letter}T\" href=\"#${letter}B\"> $letter</a>\n";
176 }
177 }
178
179# Now we know which letters we have, we can insert the list.
180
181$list .= "</h4>\n";
182splice @index, $listindex, 0, $list;
183
184# Write out the modified index.html file.
185
186open (OUT, ">ix01.html") || die "Failed to open ix01.html for writing: $!\n";
187print OUT @index;
188close(OUT);
189
190
191# End