Defend against bad data from gethostbyaddr(). Fixes bug #619
[exim.git] / doc / doc-scripts / f2h
1 #!/usr/bin/perl
2 # $Cambridge: exim/doc/doc-scripts/f2h,v 1.1 2004/10/07 15:04:35 ph10 Exp $
3
4 # Script to turn the Exim FAQ into HTML.
5
6 use integer;
7
8 # Function to do text conversions that apply to both displays and non displays
9
10 sub process_both {
11 my($s) = $_[0];
12 $s =~ s/</&#60;/g; # Deal with < and >
13 $s =~ s/>/&#62;/g;
14 return $s;
15 }
16
17
18 # Function to do text conversions to display paragraphs
19
20 sub process_display {
21 my($s) = $_[0];
22 $s =~ s/^==>/ /;
23 my($indent) = $s =~ /^(\s+)/;
24 my($remove) = " " x (length($indent) - 3);
25 $s =~ s/^$remove//mg;
26 $s = &process_both($s);
27 return $s;
28 }
29
30
31 # Function to do text conversions to paragraphs not in displays.
32
33 sub process_non_display {
34 my($s) = &process_both($_[0]);
35
36 $s =~ s/@\\/@@backslash@@/g; # @\ temporarily hidden
37
38 $s =~ s/\\#/&nbsp;/g; # \# is a hard space
39
40 $s =~ s/\\\*\*([^*]*)\*\*\\/<b>$1<\/b>/g; # \**...**\ => bold
41 $s =~ s/\\\*([^*]*)\*\\/<i>$1<\/i>/g; # \*.....*\ => italic
42 $s =~ s/\\"([^"]*)"\\/<tt>$1<\/tt>/g; # \"....."\ => fixed pitch
43 $s =~ s/\\\$([^\$]*)\$\\/<i>\$$1<\/i>/g; # \$.....$\ => $italic
44 $s =~ s/\\\\([^\\]*)\\\\/<small>$1<\/small>/g; # \\.....\\ => small
45 $s =~ s/\\\(([^)]*)\)\\/<i>$1<\/i>/g; # \(.....)\ => italic
46 $s =~ s/\\-([^\\]*)-\\/<b>-$1<\/b>/g; # \-.....-\ => -bold
47 $s =~ s/\\\[([^]]*)\]\\/&\#60;<i>$1<\/i>&\#62;/gx; # \[.....]\ => <italic>
48 $s =~ s/\\\?(.*?)\?\\/<a href="$1">$1<\/a>/g; # \?.....?\ => URL
49 $s =~ s/\\\^\^([^^]*)\^\^\\/<i>$1<\/i>/g; # \^^...^^\ => italic
50 $s =~ s/\\\^([^^]*)\^\\/<i>$1<\/i>/g; # \^.....^\ => italic
51 $s =~ s/\\%([^%]*)%\\/<b>$1<\/b>/g; # \%.....%\ => bold
52 $s =~ s/\\\/([^\/]*)\/\\/<i>$1<\/i>/g; # \/...../\ => italic
53 $s =~ s/\\([^\\]+)\\/<tt>$1<\/tt>/g; # \.......\ => fixed pitch
54
55 $s =~ s"//([^/\"]*)//"<i>$1</i>"g; # //.....// => italic
56 $s =~ s/::([^:]*)::/<i>$1:<\/i>/g; # ::.....:: => italic:
57
58 $s =~ s/``(.*?)''/&#147;$1&#148;/g; # ``.....'' => quoted text
59
60 $s =~ s/\s*\[\[br\]\]\s*/<br>/g; # [[br]] => <br>
61
62 $s =~ s/@@backslash@@/\\/g; # Put back single backslash
63
64 $s =~ s/^(\s*\(\d\)\s)/$1&nbsp;/; # Extra space after (1), etc.
65
66 # Cross references within paragraphs
67
68 $s =~ s/Q(\d{4})(?!:)/<a href="$xref{$1}">$&<\/a>/xg;
69
70 # References to configuration samples
71
72 $s =~ s/\b([CFLS]\d\d\d)\b/<a href="$1.txt">$1<\/a>/g;
73
74 # Remove white space preceding a newline in the middle of paragraphs,
75 # to keep the file smaller (and for human reading when debugging).
76
77 $s =~ s/^\s+//mg;
78
79 return $s;
80 }
81
82
83 # Main program
84
85 # We want to read the file paragraph by paragraph; Perl only does this if the
86 # separating lines are truly blank. Having been caught by lines containing
87 # whitespace before, do a detrailing pass first.
88
89 open(IN, "$ARGV[0]") || die "can't open $ARGV[0] (preliminary)\n";
90 open(OUT, ">$ARGV[0]-$$") || die "can't open $ARGV[0]-$$\n";
91 while (<IN>)
92 {
93 s/[ \t]+$//;
94 print OUT;
95 }
96 close(IN);
97 close(OUT);
98 rename("$ARGV[0]-$$", "$ARGV[0]") ||
99 die "can't rename $ARGV[0]-$$ as $ARGV[0]\n";
100
101 # The second argument is the name of a directory into which to put multiple
102 # HTML files. We start off with FAQ.html.
103
104 $hdir = $ARGV[1];
105 open(OUT, ">$hdir/FAQ.html") || die "can't open $hdir/FAQ.html\n";
106
107 # Initial output
108
109 print OUT <<End ;
110 <html>
111 <head>
112 <title>The Exim FAQ</title>
113 </head>
114 <body bgcolor="#F8F8F8" text="#00005A" link="#0066FF" alink="#0066FF" vlink="#000099">
115 <h1>The Exim FAQ</h1>
116 End
117
118 $/ = "";
119
120 # First pass to read the titles and questions and create the table of
121 # contents. We save it up in a vector so that it can be written after the
122 # introductory paragraphs.
123
124 open(IN, "$ARGV[0]") || die "can't open $ARGV[0] (first time)\n";
125
126 $toc = 0;
127 $sec = -1;
128 $inul = 0;
129
130 while ($_ = <IN>)
131 {
132 $count = s/\n/\n/g - 1; # Number of lines in paragraph
133
134 if ($count == 1 && /^\d+\./) # Look for headings
135 {
136 chomp;
137 push @toc, "</ul>" if $inul;
138 $inul = 0;
139 push @toc, "<br>\n\n" if $sec++ >= 0;
140 push @toc, "<a name=\"TOC$toc\" href=\"FAQ_$sec.html\">$_</a>\n";
141 $toc++;
142
143 ($number,$title) = /^(\d+)\.\s+(.*)$/;
144 if ($title ne "UUCP" && $title ne "IRIX" && $title ne "BSDI" &&
145 $title ne "HP-UX")
146 {
147 ($initial,$rest) = $title =~ /^(.)(.*)$/;
148 $title = "$initial\L$rest";
149 $title =~ s/isdn/ISDN/;
150 $title =~ s/\btls\b/TLS/;
151 $title =~ s/\bssl\b/SSL/;
152 $title =~ s/ os x/ OS X/;
153 }
154 push @seclist, "<a href=\"FAQ_$sec.html\">$number. $title</a>";
155
156 next;
157 }
158
159 if (/^(Q\d{4})/) # Q initial paragraph
160 {
161 if (!$inul)
162 {
163 push @toc, "<ul>\n";
164 $inul = 1;
165 }
166 $num = $1;
167 $rest = $';
168 $xref{substr($num,1)} = "FAQ_$sec.html#TOC$toc";
169 $rest =~ s/^: /:&nbsp;&nbsp;/;
170 $rest = &process_non_display($rest);
171 push @toc, "<li><a name=\"TOC$toc\" href=\"FAQ_$sec.html#TOC$toc\">$num</a>$rest<br><br></li>\n";
172 $toc++;
173 next;
174 }
175 }
176
177 push @toc, "</ul>\n" if $inul;
178 close(IN);
179
180
181 # This is the main processing pass. We have to detect the different kinds of
182 # "paragraph" and do appropriate things.
183
184 open(IN, "$ARGV[0]") || die "can't open $ARGV[0] (second time)\n";
185
186 # Skip the title line
187
188 $_ = <IN>;
189
190 # Handle the rest of the file
191
192 $toc = 0;
193 $maxsec = $sec;
194 $sec = -1;
195
196 while ($_ = <IN>)
197 {
198 $count = s/\n/\n/g - 1; # Number of lines in paragraph
199 chomp; # Trailing newlines
200
201 if (/^The FAQ is divided into/)
202 {
203 my($count) = scalar(@seclist);
204 my($cols) = ($count + 1)/2;
205
206 print OUT "<hr><a name=\"TOC\"><h1>Index</h1></a>\n";
207 print OUT "<p>A <i>Keyword-in-context</i> <a href=\"FAQ-KWIC_A.html\">index</a> " .
208 "to the questions is available. This is usually the " .
209 "quickest way to find information in the FAQ.</p>\n";
210
211 print OUT "<h1>Contents</h1>\n";
212 print OUT "<p>The FAQ is divided into the following sections:<br><br></p>\n";
213
214 print OUT "<table>\n";
215
216 for ($i = 0; $i < $cols; $i++)
217 {
218 print OUT "<tr>\n";
219 print OUT " <td>", "&nbsp;" x 4, "</td>\n";
220 print OUT " <td>&nbsp;$seclist[$i]</td>\n";
221 print OUT " <td>", "&nbsp;" x8, "$seclist[$cols+$i]</td>\n"
222 if $cols+$i < $count;
223 print OUT "</tr>\n";
224 }
225 print OUT "</table><br><p>\n<hr><br>\n";
226 print OUT "<h1>List of questions</h1>\n";
227
228 $_ = <IN>; # Skip section list
229 next;
230 }
231
232 if ($count == 1 && /^\d+\./) # Look for headings
233 {
234 if (@toc != 0) # TOC when hit first heading
235 {
236 while (@toc != 0) { print OUT shift @toc; }
237 }
238
239 # Output links at the bottom of this page
240
241 print OUT "<hr><br>\n";
242 print OUT "<a href=\"FAQ.html#TOC\">Contents</a>&nbsp;&nbsp;\n";
243 if ($sec > 0)
244 {
245 printf OUT ("<a href=\"FAQ_%d.html\">Previous</a>&nbsp;&nbsp;\n", $sec-1);
246 }
247 printf OUT ("<a href=\"FAQ_%d.html\">Next</a>\n", $sec+1);
248
249 # New section goes in new file
250
251 print OUT "</body>\n</html>\n";
252 close OUT;
253
254 $sec++;
255 open(OUT, ">$hdir/FAQ_$sec.html") ||
256 die "Can't open $hdir/FAQ_$sec.html\n";
257
258 print OUT "<html>\n<head>\n" .
259 "<title>The Exim FAQ Section $sec</title>\n" .
260 "</head>\n" .
261 "<body bgcolor=\"#F8F8F8\" text=\"#00005A\" " .
262 "link=\"#FF6600\" alink=\"#FF9933\" vlink=\"#990000\">\n";
263
264 printf OUT "<h1>The Exim FAQ</h1>\n";
265
266 print OUT "<a href=\"FAQ.html#TOC\">Contents</a>&nbsp;&nbsp;\n";
267 if ($sec > 0)
268 {
269 printf OUT ("<a href=\"FAQ_%d.html\">Previous</a>&nbsp;&nbsp;\n", $sec-1);
270 }
271 if ($sec < $maxsec)
272 {
273 printf OUT ("<a href=\"FAQ_%d.html\">Next</a>\n", $sec+1);
274 }
275
276 print OUT "<hr><br>\n";
277
278 print OUT "<h2><a href=\"FAQ.html#TOC$toc\">$_</a></h2>\n";
279 $toc++;
280 next;
281 }
282
283 s/^([QA]\d{4}|[CFLS]\d{3}): /$1:&nbsp;&nbsp;/;
284
285 if (/^(Q\d{4}:)/) # Q initial paragraph
286 {
287 print OUT "<p>\n<a name=\"TOC$toc\" href=\"FAQ.html#TOC$toc\">$1</a>";
288 $_ = &process_non_display($');
289 print OUT "$_\n</p>\n";
290 $toc++;
291 next;
292 }
293
294 if (/^A\d{4}:/) # A initial paragraph
295 {
296 $_ = &process_non_display($_);
297 s/^(A\d{4}:)/<font color="#00BB00">$1<\/font>/;
298 print OUT "<p>\n$_\n</p>\n";
299 next;
300 }
301
302 # If a paragraph begins ==> it is a display which must remain verbatin
303 # and not be reformatted. The flag gets turned into spaces.
304
305 if ($_ =~ /^==>/)
306 {
307 $_ = &process_display($_);
308 chomp;
309 print OUT "<pre>\n$_</pre>\n";
310 }
311
312 # Non-display paragraph; massage the final line & my sig.
313
314 elsif (/^\*\*\* End of Exim FAQ \*\*\*/)
315 {
316 }
317
318 else
319 {
320 $_ = &process_non_display($_);
321 if (/^Philip Hazel/)
322 {
323 s/\n/<br>\n/g;
324 s/<br>$/<hr><br>/;
325 }
326 print OUT "<p>\n$_\n</p>\n";
327 }
328 }
329
330 close(IN);
331
332 print OUT "<hr><br>\n";
333 print OUT "<a href=\"FAQ.html#TOC\">Contents</a>&nbsp;&nbsp;\n";
334 printf OUT ("<a href=\"FAQ_%d.html\">Previous</a>\n", $sec-1);
335
336 print OUT "</body>\n</html>\n";
337 close(OUT);
338 End