Commit | Line | Data |
---|---|---|
495ae4b0 | 1 | #! /usr/bin/perl -w |
a82639d2 | 2 | # $Cambridge: exim/doc/doc-scripts/g2h,v 1.3 2005/02/17 12:17:09 ph10 Exp $ |
495ae4b0 PH |
3 | |
4 | # This is a script that turns the SGCAL source of Exim's documentation into | |
5 | # HTML. It can be used for both the filter document and the main Exim | |
6 | # specification. The syntax is | |
7 | # | |
8 | # g2h [-split no|section|chapter] <source file> <title> | |
9 | # | |
10 | # Previously, -split section was used for the filter document, and -split | |
11 | # chapter for the main specification. However, the filter document has gained | |
12 | # some chapters, so they are both split by chapter now. Only one -split can be | |
13 | # specified. | |
14 | # | |
15 | # A number of assumptions about the style of the input markup are made. | |
16 | # | |
17 | # The HTML is written into the directory html/ using the source file base | |
18 | # name as its base. | |
19 | ||
20 | # Written by Philip Hazel | |
21 | # Starting 21-Dec-2001 | |
22 | # Last modified 26-Nov-2003 | |
23 | ||
24 | ############################################################################# | |
25 | ||
26 | ||
27 | ||
28 | ################################################## | |
29 | # Open an output file # | |
30 | ################################################## | |
31 | ||
32 | sub openout { | |
33 | open (OUT, ">$_[0]") || die "Can't open $_[0]\n"; | |
34 | ||
35 | # Boilerplate | |
36 | ||
37 | print OUT "<!DOCTYPE HTML PUBLIC \"-//IETF//DTD HTML//EN\">\n"; | |
38 | ||
39 | print OUT "<html>\n<head>\n<title>$doctitle" . | |
40 | (($thischapter > 0)? " chapter $thischapter" : "") . | |
41 | (($thissection > 0)? " section $thissection" : "") . | |
42 | "</title>\n</head>\n" . | |
43 | "<body bgcolor=\"#F8F8F8\" text=\"#00005A\" " . | |
44 | "link=\"#FF6600\" alink=\"#FF9933\" vlink=\"#990000\">\n"; | |
45 | ||
46 | # Forward/backward links when chapter splitting | |
47 | ||
48 | if ($chapsplit) | |
49 | { | |
50 | print OUT "<font size=2>\n"; | |
51 | printf OUT ("<a href=\"${file_base}_%s.html\">Previous</a> \n", | |
52 | $thischapter - 1) if $thischapter > 1; | |
53 | printf OUT ("<a href=\"${file_base}_%s.html\">Next</a> \n", | |
54 | $thischapter + 1) if $thischapter < $maxchapter; | |
55 | print OUT "<a href=\"${file_base}_toc.html\">Contents</a>\n"; | |
56 | print OUT " " x 6, "($doctitle)\n</font><hr>\n"; | |
57 | } | |
58 | ||
59 | # Forward/backward links when section splitting | |
60 | ||
61 | elsif ($sectsplit) | |
62 | { | |
63 | print OUT "<font size=2>\n"; | |
64 | printf OUT ("<a href=\"${file_base}_%s.html\">Previous</a> \n", | |
65 | $thissection - 1) if $thissection > 1; | |
66 | printf OUT ("<a href=\"${file_base}_%s.html\">Next</a> \n", | |
67 | $thissection + 1) if $thissection < $maxsection; | |
68 | print OUT "<a href=\"${file_base}_toc.html\">Contents</a>\n"; | |
69 | print OUT " " x 6, "($doctitle)\n</font><hr>\n"; | |
70 | } | |
71 | ||
72 | # Save the final component of the current file name (for TOC creation) | |
73 | ||
74 | $_[0] =~ /^(?:.*)\/([^\/]+)$/; | |
75 | $current_file = $1; | |
76 | } | |
77 | ||
78 | ||
79 | ||
80 | ################################################## | |
81 | # Close an output file # | |
82 | ################################################## | |
83 | ||
84 | # The first argument is one of: | |
85 | # | |
86 | # "CHAP" a chapter is ending | |
87 | # "SECT" a section is ending | |
88 | # "" the whole thing is ending | |
89 | # | |
90 | # In the first two cases $thischapter and $thissection contain the new chapter | |
91 | # and section numbers, respectively. In the third case, we can deduce what is | |
92 | # ending from the flags. The variables contain the current values. | |
93 | ||
94 | sub closeout { | |
95 | my($s) = $_[0]; | |
96 | ||
97 | print OUT "<hr>\n" if !$lastwasrule; | |
98 | &setpar(0); | |
99 | ||
100 | if ($s eq "CHAP") | |
101 | { | |
102 | print OUT "<font size=2>\n"; | |
103 | printf OUT ("<a href=\"${file_base}_%s.html\">Previous</a> ", | |
104 | $thischapter - 2) if ($thischapter > 2); | |
105 | print OUT "<a href=\"${file_base}_$thischapter.html\">Next</a> "; | |
106 | print OUT "<a href=\"${file_base}_toc.html\">Contents</a>\n"; | |
107 | print OUT " " x 6, "($doctitle)\n</font>\n"; | |
108 | } | |
109 | ||
110 | elsif ($s eq "SECT") | |
111 | { | |
112 | print OUT "<font size=2>\n"; | |
113 | printf OUT ("<a href=\"${file_base}_%s.html\">Previous</a> ", | |
114 | $thissection - 2) if ($thissection > 2); | |
115 | print OUT "<a href=\"${file_base}_$thissection.html\">Next</a> "; | |
116 | print OUT "<a href=\"${file_base}_toc.html\">Contents</a>\n"; | |
117 | print OUT " " x 6, "($doctitle)\n</font>\n"; | |
118 | } | |
119 | ||
120 | else | |
121 | { | |
122 | if ($chapsplit) | |
123 | { | |
124 | print OUT "<font size=2>\n"; | |
125 | printf OUT ("<a href=\"${file_base}_%s.html\">Previous</a> ", | |
126 | $thischapter - 1) if ($thischapter > 1); | |
127 | print OUT "<a href=\"${file_base}_toc.html\">Contents</a>\n"; | |
128 | print OUT " " x 6, "($doctitle)\n</font>\n"; | |
129 | } | |
130 | elsif ($sectsplit) | |
131 | { | |
132 | print OUT "<font size=2>\n"; | |
133 | printf OUT ("<a href=\"${file_base}_%s.html\">Previous</a> ", | |
134 | $thissection - 1) if ($thissection > 1); | |
135 | print OUT "<a href=\"${file_base}_toc.html\">Contents</a>\n"; | |
136 | print OUT " " x 6, "($doctitle)\n</font>\n"; | |
137 | } | |
138 | } | |
139 | ||
140 | print OUT "</body>\n</html>\n"; | |
141 | close(OUT); | |
142 | } | |
143 | ||
144 | ||
145 | ||
146 | ################################################## | |
147 | # Handle an index line # | |
148 | ################################################## | |
149 | ||
150 | # This function returns an empty string so that it can be called as part | |
151 | # of an s operator when handling index items within paragraphs. The two | |
152 | # arguments are: | |
153 | # | |
154 | # the text to index, already converted to HTML | |
155 | # 1 for the concept index, 0 for the options index | |
156 | ||
157 | sub handle_index { | |
158 | my($text) = $_[0]; | |
159 | my($hash) = $_[1]? \%cindex : \%oindex; | |
160 | my ($key,$ref); | |
161 | ||
162 | # Up the index count, and compute the reference to the file and the | |
163 | # label within it. | |
164 | ||
165 | $index_count++; | |
166 | $ref = $chapsplit? | |
167 | "${file_base}_$thischapter.html#IX$index_count" | |
168 | : $sectsplit? | |
169 | "${file_base}_$thissection.html#IX$index_count" | |
170 | : | |
171 | "#IX$index_count"; | |
172 | ||
173 | # Create the index key, which consists of the text with all the HTML | |
174 | # coding and any leading quotation marks removed. Turn the primary/secondary | |
175 | # splitting string "||" into ":". | |
176 | ||
177 | $text =~ s/\|\|/:/g; | |
178 | ||
179 | $key = "$text"; | |
180 | $key =~ s/<[^>]+>//g; | |
181 | $key =~ s/&#(\d+);/chr($1)/eg; | |
182 | $key =~ s/^`+//; | |
f055f31e | 183 | $key =~ s/^"//; |
495ae4b0 PH |
184 | |
185 | # Turn all spaces in the text into so that they don't ever split. | |
186 | # However, there may be spaces in the HTML that already exists in the | |
187 | # text, so we have to avoid changing spaces inside <>. | |
188 | ||
189 | $text =~ s/ (?=[^<>]*(?:<|$))/ /g; | |
190 | ||
191 | # If this is the first encounter with this index key, we create a | |
192 | # straightforward reference. | |
193 | ||
194 | if (!defined $$hash{$key}) | |
195 | { | |
196 | $$hash{$key} = "<a href=\"$ref\">$text</a>"; | |
197 | } | |
198 | ||
199 | # For the second and subsequent encounters, add "[2]" etc. to the | |
200 | # index text. We find out the number by counting occurrences of "<a" | |
201 | # in the existing string. | |
202 | ||
203 | else | |
204 | { | |
205 | my($number) = 1; | |
206 | $number++ while $$hash{$key} =~ /<a/g; | |
207 | $$hash{$key} .= " <a href=\"$ref\">[$number]</a>"; | |
208 | } | |
209 | ||
210 | # Place the name in the current output | |
211 | ||
212 | print OUT "<a name=\"IX$index_count\"></a>\n"; | |
213 | return ""; | |
214 | } | |
215 | ||
216 | ||
217 | ||
218 | ################################################## | |
219 | # Handle emphasis bars # | |
220 | ################################################## | |
221 | ||
222 | # Set colour green for text marked with "emphasis bars", keeping | |
223 | # track in case the matching isn't perfect. | |
224 | ||
225 | sub setinem { | |
226 | if ($_[0]) | |
227 | { | |
228 | return "" if $inem; | |
229 | $inem = 1; | |
230 | return "<font color=green>\n"; | |
231 | } | |
232 | else | |
233 | { | |
234 | return "" if !$inem; | |
235 | $inem = 0; | |
236 | return "</font>\n"; | |
237 | } | |
238 | } | |
239 | ||
240 | ||
241 | ||
242 | ################################################## | |
243 | # Convert marked-up text # | |
244 | ################################################## | |
245 | ||
246 | # This function converts text from SGCAL markup to HTML markup, with a couple | |
247 | # of exceptions: | |
248 | # | |
249 | # 1. We don't touch $t because that is handled by the .display code. | |
250 | # | |
251 | # 2. The text may contain embedded .index, .em, and .nem directives. We | |
252 | # handle .em and .nem, but leave .index because it must be done during | |
253 | # paragraph outputting. | |
254 | # | |
255 | # In a non-"rm" display, we turn $rm{ into cancelling of <tt>. Otherwise | |
256 | # it is ignored - in practice it is only used in that special case. | |
257 | # | |
258 | # The order in which things are done in this function is highly sensitive! | |
259 | ||
260 | sub handle_text { | |
261 | my($s) = $_[0]; | |
262 | my($rmspecial) = $_[1]; | |
263 | ||
264 | # Escape all & characters (they aren't involved in markup) but for the moment | |
265 | # use &+ instead of &# so that we can handle # characters in the text. | |
266 | ||
267 | $s =~ s/&/&+038;/g; | |
268 | ||
269 | # Turn SGCAL literals into HTML literals that don't look like SGCAL | |
270 | # markup, so won't be touched by what follows. Again, use + instead of #. | |
271 | ||
272 | $s =~ s/@@/&+064;/g; | |
273 | $s =~ s/@([^@])/"&+".sprintf("%0.3d",ord($1)).";"/eg; | |
274 | ||
275 | # Now turn any #s that are markup into spaces, and convert the previously | |
276 | # created literals to the correct form. | |
277 | ||
278 | $s =~ s/#/ /g; | |
279 | $s =~ s/&\+(\d+);/&#$1;/g; | |
280 | ||
281 | # Some simple markup that doesn't involve argument text. | |
282 | ||
283 | $s =~ s/\$~//g; # turn $~ into nothing | |
284 | $s =~ s/__/_/g; # turn __ into _ | |
285 | $s =~ s/--(?=$|\s|\d)/–/mg; # turn -- into endash in text or number range | |
286 | $s =~ s/\(c\)/©/g; # turn (c) into copyright symbol | |
287 | ||
288 | # Use double quotes | |
289 | ||
290 | # $s =~ s/`([^']+)'/``$1''/g; | |
291 | ||
292 | $s =~ s/`([^']+)'/“$1”/g; | |
293 | ||
294 | # This is a fudge for some specific usages of $<; can't just do a global | |
295 | # is it occurs in things like "$<variable name>" as well. | |
296 | ||
297 | $s =~ s/(\d)\$<-/$1-/g; # turn 0$<- into 0- | |
298 | $s =~ s/\$<//g; # other $< is ignored | |
299 | ||
300 | # Turn <<...>> into equivalent SGCAL markup that doesn't involve the use of | |
301 | # < and >, and then escape the remaining < and > characters in the text. | |
302 | ||
303 | $s =~ s/<<([^>]*?)>>/<\$it{$1}>/g; # turn <<xxx>> into <$it{xxx}> | |
304 | $s =~ s/</</g; | |
305 | $s =~ s/>/>/g; | |
306 | ||
307 | # Other markup... | |
308 | ||
309 | $s =~ s/\$sm\{//g; # turn $sm{ into nothing | |
310 | $s =~ s/\$smc\{//g; # turn $smc{ into nothing | |
311 | $s =~ s/\$smi\{//g; # turn $smi{ into nothing | |
312 | ||
313 | $s =~ s/\$tt\{([^\}]*?)\}/<tt>$1<\/tt>/g; # turn $tt{xxx} into <tt>xxx</tt> | |
314 | $s =~ s/\$it\{([^\}]*?)\}/<em>$1<\/em>/g; # turn $it{xxx} into <em>xxx</em> | |
315 | $s =~ s/\$bf\{([^\}]*?)\}/<b>$1<\/b>/g; # turn $bf{xxx} into <b>xxx</b> | |
316 | ||
317 | $s =~ s/\$cb\{([^\}]*?)\}/<tt><b>$1<\/b><\/tt>/g; # turn $cb{xxx} into | |
318 | # <tt><b>xxx</b></tt> | |
319 | ||
320 | $s =~ s/\\\\([^\\]*?)\\\\/<font size=-1>$1<\/font>/g; # turn \\xxx\\ into | |
321 | # small font | |
322 | $s =~ s/\\\?([^?]*?)\?\\/<a href="$1">$1<\/a>/g; # turn \?URL?\ into URL | |
323 | ||
324 | $s =~ s/\\\(([^)]*?)\)\\/<i>$1<\/i>/g; # turn \(xxx)\ into <i>xxx</i> | |
325 | $s =~ s/\\\"([^\"]*?)\"\\/<tt>$1<\/tt>/g; # turn \"xxx"\ into <tt>xxx</tt> | |
326 | ||
327 | ||
328 | $s =~ s/\\\$([^\$]*?)\$\\/<tt>\$$1<\/tt>/g; # turn \$xxx$\ into <tt>$xxx</tt> | |
329 | $s =~ s/\\\-([^\\]*?)\-\\/<i>-$1<\/i>/g; # turn \-xxx-\ into -italic | |
330 | $s =~ s/\\\*\*([^*]*?)\*\*\\/<b>$1<\/b>/g; # turn \**xxx**\ into <b>xxx</b> | |
331 | $s =~ s/\\\*([^*]*?)\*\\/<i>$1<\/i>/g; # turn \*xxx*\ into italic | |
332 | $s =~ s/\\%([^*]*?)%\\/<b>$1<\/b>/g; # turn \%xxx%\ into bold | |
333 | $s =~ s/\\([^\\]*?)\\/<tt>$1<\/tt>/g; # turn \xxx\ into <tt>xxx</tt> | |
334 | $s =~ s/::([^\$]*?)::/<i>$1:<\/i>/g; # turn ::xxx:: into italic: | |
335 | $s =~ s/\$\*\$/\*/g; # turn $*$ into * | |
336 | ||
337 | # Handle $rm{...} | |
338 | ||
339 | if ($rmspecial) | |
340 | { | |
341 | $s =~ s/\$rm\{([^\}]*?)\}/<\/tt>$1<tt>/g; # turn $rm{xxx} into </tt>xxx<tt> | |
342 | } | |
343 | else | |
344 | { | |
345 | $s =~ s/\$rm\{([^\}]*?)\}/$1/g; # turn $rm{xxx} into xxx | |
346 | } | |
347 | ||
348 | # There is one case where the terminating } of an escape sequence is | |
349 | # in another paragraph - this follows $sm{ - it can be fixed by | |
350 | # removing any stray } in a paragraph that contains no { chars. | |
351 | ||
352 | $s =~ s/\}//g if !/\{/; | |
353 | ||
354 | # Remove any null flags ($$) | |
355 | ||
356 | $s =~ s/\$\$//g; | |
357 | ||
358 | # If the paragraph starts with $c\b, remove it. | |
359 | ||
360 | $s =~ s/^\$c\b//; | |
361 | ||
362 | # If the paragraph starts with $e\b, indent it slightly. | |
363 | ||
364 | $s =~ s/^\$e\b/ /; | |
365 | ||
366 | # Handle .em, and .nem directives that occur within the paragraph | |
367 | ||
368 | $s =~ s/\.em\s*\n/&setinem(1)/eg; | |
369 | $s =~ s/\.nem\s*\n/&setinem(0)/eg; | |
370 | ||
371 | # Explicitly included HTML | |
372 | ||
373 | $s =~ s/\[\(([^)]+)\)\]/<$1>/g; # turn [(...)] into <...> | |
374 | ||
375 | # Finally, do the substitutions and return the modified text. | |
376 | ||
377 | $s =~ s/~~(\w+)/$var_value{$1}/eg; | |
378 | ||
379 | return $s; | |
380 | } | |
381 | ||
382 | ||
383 | ||
384 | ################################################## | |
385 | # Start/end a paragraph # | |
386 | ################################################## | |
387 | ||
388 | # We want to leave paragraphs unterminated until we know that a horizontal | |
389 | # rule does not follow, to avoid getting space inserted before the rule, | |
390 | # which doesn't look good. So we have this function to help control things. | |
391 | # If the argument is 1 we are starting a new paragraph; if it is 0 we want | |
392 | # to force the ending of any incomplete paragraph. | |
393 | ||
394 | sub setpar { | |
395 | if ($inpar) | |
396 | { | |
397 | print OUT "</p>\n"; | |
398 | $inpar = 0; | |
399 | } | |
400 | if ($_[0]) | |
401 | { | |
402 | print OUT "<p>\n"; | |
403 | $inpar = 1; | |
404 | } | |
405 | } | |
406 | ||
407 | ||
408 | ||
409 | ################################################## | |
410 | # Handle a "paragraph" # | |
411 | ################################################## | |
412 | ||
413 | # Read a paragraph of text, which may contain many lines and may contain | |
414 | # .index, .em, and .nem directives within it. We may also encounter | |
415 | # ".if ~~html" within paragraphs. Process those directives, | |
416 | # convert the markup, and output the rest as an HTML paragraph. | |
417 | ||
418 | ||
419 | sub handle_paragraph{ | |
420 | my($par) = $_; | |
421 | my($htmlcond) = 0; | |
422 | while(<IN>) | |
423 | { | |
424 | if (/^\.if\s+~~html\b/) | |
425 | { | |
426 | $htmlcond = 1; | |
427 | $par =~ s/\s+$//; # lose unwanted whitespace and newlines | |
428 | next; | |
429 | } | |
430 | elsif ($htmlcond && /^\.else\b/) | |
431 | { | |
432 | while (<IN>) { last if /^\.fi\b/; } | |
433 | $htmlcond = 0; | |
434 | next; | |
435 | } | |
436 | elsif ($htmlcond && /^\.fi\b/) | |
437 | { | |
438 | $htmlcond = 0; | |
439 | next; | |
440 | } | |
441 | ||
442 | last if /^\s*$/ || (/^\./ && !/^\.index\b/ && !/^\.em\b/ && !/^\.nem\b/); | |
443 | $par .= $_; | |
444 | } | |
445 | $par = &handle_text($par, 0); | |
446 | ||
447 | # We can't handle .index until this point, when we do it just before | |
448 | # outputting the paragraph. | |
449 | ||
450 | if ($par !~ /^\s*$/) | |
451 | { | |
452 | &setpar(1); | |
453 | $par =~ s/\.index\s+([^\n]+)\n/&handle_index($1, 1)/eg; | |
454 | print OUT "$par"; | |
455 | } | |
456 | } | |
457 | ||
458 | ||
459 | ||
460 | ################################################## | |
461 | # Handle a non-paragraph directive # | |
462 | ################################################## | |
463 | ||
464 | # The directives .index, .em, and .nem can also appear within paragraphs, | |
465 | # and are then handled within the handle_paragraph() code. | |
466 | ||
467 | sub handle_directive{ | |
468 | my($new_lastwasitem) = 0; | |
469 | ||
470 | $lastwasrule = 0; | |
471 | ||
472 | if (/^\.r?set\b/ || /^\.(?:\s|$)/) {} # ignore .(r)set and comments | |
473 | ||
474 | elsif (/^\.justify\b/) {} # and .justify | |
475 | ||
476 | elsif (/^\.newline\b/) { print OUT "<br>\n"; } | |
477 | ||
478 | elsif (/^\.blank\b/ || /^\.space\b/) { print OUT "<br>\n"; } | |
479 | ||
480 | elsif (/^\.rule\b/) { &setpar(0); print OUT "<hr>\n"; $lastwasrule = 1; } | |
481 | ||
482 | elsif (/^\.index\s+(.*)/) { &handle_index(&handle_text($1), 1); } | |
483 | ||
484 | # Emphasis is handled by colour | |
485 | ||
486 | elsif (/^\.em\b/) | |
487 | { | |
488 | &setpar(0); | |
489 | print OUT "<font color=green>" if ! $inem; | |
490 | $inem = 1; | |
491 | } | |
492 | ||
493 | elsif (/^\.nem\b/) | |
494 | { | |
495 | &setpar(0); | |
496 | print OUT "</font>" if $inem; | |
497 | $inem = 0; | |
498 | } | |
499 | ||
500 | # Ignore tab setting stuff - we use tables instead. | |
501 | ||
502 | elsif (/^\.tabs(?:et)?\b/) {} | |
503 | ||
504 | # .tempindent is used only to align some of the expansion stuff nicely; | |
505 | # just ignore it. It is used in conjunction with .push/.pop. | |
506 | ||
507 | elsif (/^\.(tempindent|push|pop)\b/) {} | |
508 | ||
509 | # There are some instances of .if ~~sys.fancy in the source. Some of those | |
510 | # that are not inside displays are two-part things, in which case we just keep | |
511 | # the non-fancy part. For diagrams, however, they are in three parts: | |
512 | # | |
513 | # .if ~~sys.fancy | |
514 | # <aspic drawing stuff for PostScript and PDF> | |
515 | # .elif !~~html | |
516 | # <ascii art for txt and Texinfo> | |
517 | # .else | |
518 | # <HTML instructions for including a gif> | |
519 | # .fi | |
520 | # | |
521 | # In this case, we skip to the third part. | |
522 | ||
523 | elsif (/^\.if\s+~~sys\.fancy/ || /^\.else\b/) | |
524 | { | |
525 | while (<IN>) | |
526 | { last if /^\.else\b/ || /^\.elif\s+!\s*~~html/ || /^\.fi\b/; } | |
527 | ||
528 | if (/^\.elif\b/) | |
529 | { | |
530 | while (<IN>) { last if /^\.else\b/ || /^\.fi\b/; } | |
531 | } | |
532 | } | |
533 | ||
534 | # Similarly, for .if !~~sys.fancy, take the non-fancy part. | |
535 | ||
536 | elsif (/^\.if\s+!\s*~~sys.fancy/) {} | |
537 | ||
538 | # There are some explicit tests for ~~html for direct HTML inclusions | |
539 | ||
540 | elsif (/^\.if\s+~~html\b/) {} | |
541 | ||
542 | # There are occasional requirements to do things differently for Texinfo/HTML | |
543 | # and PS/txt versions. The latter are produced by SGCAL, so that's what the | |
544 | # flag is called. | |
545 | ||
546 | elsif (/\.if\s+~~sgcal/) | |
547 | { | |
548 | while (<IN>) { last if /\.else\b/ || /\.fi\b/; } | |
549 | } | |
550 | ||
551 | # Also there is a texinfo flag | |
552 | ||
553 | elsif (/^\.if\s+~~texinfo\b/) | |
554 | { | |
555 | while (<IN>) | |
556 | { last if /^\.else\b/ || /^\.elif\s+!\s*~~html/ || /^\.fi\b/; } | |
557 | } | |
558 | ||
559 | # Ignore any other .if, .else, or .fi directives | |
560 | ||
561 | elsif (/^\.if\b/ || /^\.fi\b/ || /^\.else\b/) {} | |
562 | ||
563 | # Ignore .indent | |
564 | ||
565 | elsif (/^\.indent\b/) {} | |
566 | ||
567 | # Various flavours of numberpars map to corresponding list types. | |
568 | ||
569 | elsif (/^\.numberpars\b/) | |
570 | { | |
571 | $rest = $'; | |
572 | &setpar(0); | |
573 | ||
574 | if ($rest =~ /(?:\$\.|\" \")/) | |
575 | { | |
576 | unshift @endlist, "ul"; | |
577 | unshift @listtype, ""; | |
578 | print OUT "<ul>\n<li>"; | |
579 | } | |
580 | else | |
581 | { | |
582 | $nptype = ($rest =~ /roman/)? "a" : "1"; | |
583 | unshift @endlist, "ol"; | |
584 | unshift @listtype, " TYPE=\"$nptype\""; | |
585 | print OUT "<ol>\n<li$listtype[0]>"; | |
586 | } | |
587 | } | |
588 | ||
589 | elsif (/^\.nextp\b/) | |
590 | { | |
591 | &setpar(0); | |
592 | print OUT "</li>\n<li$listtype[0]>"; | |
593 | } | |
594 | ||
595 | elsif (/^\.endp\b/) | |
596 | { | |
597 | &setpar(0); | |
598 | print OUT "</li>\n</$endlist[0]>\n"; | |
599 | shift @listtype; | |
600 | shift @endlist; | |
601 | } | |
602 | ||
603 | # .display asis can use <pre> which uses a typewriter font. | |
604 | # Otherwise, we have to do our own line breaking. Turn tabbed lines | |
605 | # into an HTML table. There will always be a .tabs line first. | |
606 | ||
607 | elsif (/^\.display\b/) | |
608 | { | |
609 | my($intable) = 0; | |
610 | my($asis) = /asis/; | |
611 | my($rm) = /rm/; | |
612 | my($eol,$indent); | |
613 | ||
614 | # For non asis displays, start a paragraph, and set up to put an | |
615 | # explicit break after every line. | |
616 | ||
617 | if (!$asis) | |
618 | { | |
619 | &setpar(1); | |
620 | $eol = "<br>"; | |
621 | $indent = "<tt> </tt>"; | |
622 | } | |
623 | ||
624 | # For asis displays, use <pre> and no explicit breaks | |
625 | ||
626 | else | |
627 | { | |
628 | print OUT "<pre>\n"; | |
629 | $eol = ""; | |
630 | $indent = " "; | |
631 | } | |
632 | ||
633 | # Now read through until we hit .endd (or EOF, but that shouldn't happen) | |
634 | # and process the lines in the display. | |
635 | ||
636 | while (<IN>) | |
637 | { | |
638 | last if /^\.endd\b/; | |
639 | ||
640 | # The presence of .tabs[et] starts a table | |
641 | ||
642 | if (/^\.tabs/) | |
643 | { | |
644 | $intable = 1; | |
645 | print OUT "<table cellspacing=0 cellpadding=0>\n"; | |
646 | } | |
647 | ||
648 | # Some displays have an indent setting - ignore | |
649 | ||
650 | elsif (/^\.indent\b/) {} | |
651 | ||
652 | # Some displays have .blank inside them | |
653 | ||
654 | elsif (/^\.blank\b/) | |
655 | { | |
656 | print OUT "<br>\n"; | |
657 | } | |
a82639d2 | 658 | |
495ae4b0 | 659 | # Some displays have emphasis inside them |
a82639d2 | 660 | |
495ae4b0 PH |
661 | elsif (/^\.em\b/) |
662 | { | |
663 | print OUT "<font color=green>" if ! $inem; | |
664 | $inem = 1; | |
a82639d2 | 665 | } |
495ae4b0 PH |
666 | |
667 | elsif (/^\.nem\b/) | |
668 | { | |
669 | print OUT "</font>" if $inem; | |
670 | $inem = 0; | |
a82639d2 | 671 | } |
495ae4b0 PH |
672 | |
673 | # There are occasional instances of .if [!]~~sys.fancy inside displays. | |
674 | # In both cases we want the non-fancy alternative. (The only thing that | |
675 | # matters in practice is noticing .tabs[et] actually.) Assume the syntax | |
676 | # is valid. | |
677 | ||
678 | elsif (/^\.if\s+~~sys.fancy/ || /^\.else\b/) | |
679 | { | |
680 | while (<IN>) | |
681 | { | |
682 | last if /^\.fi\b/ || /^\.else/; | |
683 | } | |
684 | } | |
685 | ||
686 | elsif (/^\.if\s+!\s*~~sys.fancy/) {} | |
687 | ||
688 | elsif (/^\.fi\b/) {} | |
689 | ||
690 | # Ignore .newline and .linelength | |
691 | ||
692 | elsif (/^\.newline\b/ || /^\.linelength\b/) {} | |
a82639d2 | 693 | |
495ae4b0 | 694 | # Ignore comments |
a82639d2 PH |
695 | |
696 | elsif (/^\.(\s|$)/) {} | |
495ae4b0 PH |
697 | |
698 | # There shouldn't be any other directives inside displays | |
699 | ||
700 | elsif (/^\./) | |
701 | { | |
702 | print "*** Ignored directive inside .display: $_"; | |
703 | } | |
704 | ||
705 | # Handle a data line within a display. If it's an asis display, the only | |
706 | # conversion is to escape the HTML characters. Otherwise, process the | |
707 | # SGCAL markup. | |
708 | ||
709 | else | |
710 | { | |
711 | chomp; | |
712 | if ($asis) | |
713 | { | |
714 | s/&/&/g; | |
715 | s/</</g; | |
716 | s/>/>/g; | |
717 | } | |
718 | else | |
719 | { | |
720 | $_ = &handle_text($_, !$rm); | |
721 | $_ = "<tt>$_</tt>" if !$rm && $_ ne ""; | |
722 | } | |
723 | ||
724 | # In a table, break fields at $t. For non-rm we must break the | |
725 | # <tt> group as well. | |
726 | ||
727 | if ($intable) | |
728 | { | |
729 | if ($rm) | |
730 | { | |
731 | s/\s*\$t\s*/ <\/td><td>/g; | |
732 | } | |
733 | else | |
734 | { | |
735 | s/\s*\$t\s*/ <\/tt><\/td><td><tt>/g; | |
736 | } | |
737 | s/<tt><\/tt>//g; | |
738 | print OUT "<tr><td> $_</td></tr>\n"; | |
739 | } | |
740 | ||
741 | # Otherwise, output straight, with <br> for non asis displays | |
742 | ||
743 | else | |
744 | { | |
745 | s/<tt><\/tt>//g; | |
746 | print OUT "$indent$_$eol\n"; | |
747 | } | |
748 | } | |
749 | } # Loop for display contents | |
750 | ||
751 | # Finish off the table and the <pre> - leave a paragraph open | |
752 | ||
753 | print OUT "</table>\n" if $intable; | |
754 | print OUT "</pre>\n" if $asis; | |
755 | } | |
756 | ||
757 | # Handle configuration option definitions | |
758 | ||
a82639d2 | 759 | elsif (/^\.startconf\s+(.*)/) |
f055f31e | 760 | { |
a82639d2 | 761 | $confuse = &handle_text($1); |
f055f31e | 762 | } |
495ae4b0 PH |
763 | |
764 | elsif (/^\.conf\b/) | |
765 | { | |
766 | my($option, $type, $default) = | |
767 | /^\.conf\s+(\S+)\s+("(?:[^"]|"")+"|\S+)\s+("(?:[^"]|"")+"|.*)/; | |
768 | ||
769 | $option =~ s/\@_/_/g; # Underscore will be quoted in option name | |
770 | ||
771 | # If $type ends with $**$, add ",expanded" as there doesn't seem to be | |
772 | # a dagger character generally available. | |
773 | ||
774 | $type =~ s/^"([^"]+)"/$1/; | |
775 | $type =~ s/\$\*\*\$/, expanded/; | |
776 | ||
777 | # Default may be quoted, and it may also have quotes that are required, | |
778 | # if it is a string. | |
779 | ||
780 | $default =~ s/^"(.*)"$/$1/; | |
781 | $default =~ s/""/"/g; | |
782 | $default = &handle_text($default, 0); | |
783 | ||
784 | print OUT "<hr>"; | |
785 | &setpar(0); | |
786 | &handle_index($option, 0); | |
787 | print OUT "<h3>$option</h3>\n" . | |
a82639d2 | 788 | "<i>Use:</i> $confuse<br>" . |
495ae4b0 PH |
789 | "<i>Type:</i> $type<br><i>Default:</i> $default<br>\n"; |
790 | } | |
791 | ||
792 | elsif (/^\.endconf\b/) | |
793 | { | |
794 | print OUT "<hr><br>\n"; | |
795 | } | |
796 | ||
797 | ||
798 | # Handle "items" - used for expansion items and the like. We force the | |
799 | # item text into bold, and put a rule between items. | |
800 | ||
801 | elsif (/^\.startitems\b/) {} | |
802 | ||
803 | elsif (/^\.item\s+(.*)/) | |
804 | { | |
805 | my($arg) = $1; | |
806 | chomp($arg); | |
807 | $arg =~ s/^"(.*)"$/$1/; | |
808 | $arg = &handle_text($arg, 0); | |
809 | ||
810 | # If there are two .items in a row, we don't want to put in the | |
811 | # separator line or start a new paragraph. | |
812 | ||
813 | if ($lastwasitem) | |
814 | { | |
815 | print OUT "<br>"; | |
816 | } | |
817 | else | |
818 | { | |
819 | print OUT "<hr>"; | |
820 | &setpar(1); | |
821 | } | |
822 | print OUT "<b>$arg</b>\n"; | |
823 | $new_lastwasitem = 1; | |
824 | } | |
825 | ||
826 | elsif (/^\.enditems\b/) | |
827 | { | |
828 | print OUT "<hr><br>\n"; | |
829 | } | |
830 | ||
831 | ||
832 | # Handle command line option items | |
833 | ||
834 | elsif (/^\.startoptions\b/) {} | |
835 | ||
836 | elsif (/^\.option\s+(.*)/) | |
837 | { | |
838 | my($arg) = $1; | |
f055f31e | 839 | $arg =~ s/"([^"]*)"/$1/g; |
495ae4b0 PH |
840 | |
841 | print OUT "<hr>"; | |
842 | &setpar(0); | |
843 | ||
844 | # For indexing, we want to take up to the first # or < in the line, | |
845 | # before processing. | |
846 | ||
847 | my($name) = $arg =~ /^([^#<]+)/; | |
848 | $name = &handle_text($name, 0); | |
849 | &handle_index("-$name", 0); | |
850 | ||
851 | # Output as heading, after the index | |
852 | ||
853 | $arg = &handle_text($arg, 0); | |
854 | print OUT "<h3>-$arg</h3>\n"; | |
855 | } | |
856 | ||
857 | elsif (/^\.endoptions\b/) | |
858 | { | |
859 | print OUT "<hr><br>\n"; | |
860 | } | |
861 | ||
862 | # Found an SGCAL directive that isn't dealt with. Oh dear. | |
863 | ||
864 | else | |
865 | { | |
866 | print "*** Unexpected SGCAL directive: line $. ignored:\n"; | |
867 | print "$_\n"; | |
868 | } | |
869 | ||
870 | # Remember if last was a .item, and read the next line | |
871 | ||
872 | $lastwasitem = $new_lastwasitem; | |
873 | $_ = <IN>; | |
874 | } | |
875 | ||
876 | ||
877 | ||
878 | ################################################## | |
879 | # First Pass - collect references # | |
880 | ################################################## | |
881 | ||
882 | sub pass_one{ | |
883 | $thischapter = 0; | |
884 | ||
885 | open (IN, $source_file) || die "Can't open $source_file (first pass)\n"; | |
886 | $_ = <IN>; | |
887 | ||
888 | # At the start of the specification text, there are some textual replacement | |
f055f31e PH |
889 | # definitions. They set values, but not cross-references. They may be preceded |
890 | # by comments. | |
891 | ||
892 | $_ = <IN> while (/^\.(\s|$)/); | |
495ae4b0 PH |
893 | |
894 | while (/^\.r?set\s+(\S+)\s+"?([^"]+)\"?\s*$/) | |
895 | { | |
896 | $var_value{$1} = $2; | |
897 | $_ = <IN>; | |
898 | } | |
899 | ||
900 | # Now skip on till we hit the start of the first chapter. It will be numbered | |
901 | # 0 if we hit ".set chapter -1". There is only ever one unnumbered chapter. | |
902 | ||
903 | while (!/^\.chapter/) | |
904 | { | |
905 | $thischapter = -1 if /^\.set\s+chapter\s+-1/; | |
906 | $_ = <IN>; | |
907 | } | |
908 | ||
909 | # Loop for handling chapters | |
910 | ||
911 | while ($_) | |
912 | { | |
913 | $thischapter++; | |
914 | $thissection = 0; | |
915 | ||
916 | # Scan through chapter, setting up cross-references to the chapter | |
917 | # and to the sections within it. | |
918 | ||
919 | while (<IN>) | |
920 | { | |
921 | last if /^\.chapter/; | |
922 | chomp; | |
923 | ||
924 | if (/^\.section/) | |
925 | { | |
926 | $thissection++; | |
927 | next; | |
928 | } | |
929 | ||
930 | # Handle .(r)set directives. | |
931 | ||
932 | if (/^\.r?set\s+(\S+)\s+"?([^"]+)\"?\s*$/ && $1 ne "runningfoot") | |
933 | { | |
934 | my($key,$value) = ($1,$2); | |
935 | $value =~ s/~~chapter/$thischapter/e; | |
936 | $value =~ s/~~section/$thissection/e; | |
937 | ||
938 | # Only one of $chapsplit or $sectsplit can be set. | |
939 | ||
940 | if ($key =~ /^CHAP/) | |
941 | { | |
942 | $value = $chapsplit? | |
943 | "<a href=\"${file_base}_$thischapter.html\">$value</a>" | |
944 | : | |
945 | "<a href=\"#CHAP$thischapter\">$value</a>"; | |
946 | } | |
947 | ||
948 | elsif ($key =~ /^SECT/) | |
949 | { | |
950 | $value = $chapsplit? | |
951 | "<a href=\"${file_base}_$thischapter.html" . | |
952 | "#SECT$thischapter.$thissection\">$value</a>" | |
953 | : | |
954 | $sectsplit? "<a href=\"${file_base}_$thissection.html\">$value</a>" | |
955 | : | |
956 | "<a href=\"#SECT$thischapter.$thissection\">$value</a>"; | |
957 | } | |
958 | ||
959 | $var_value{$key} = $value; | |
960 | } | |
961 | } | |
962 | } | |
963 | ||
964 | close(IN); | |
965 | } | |
966 | ||
967 | ||
968 | ||
969 | ||
970 | ||
971 | ################################################## | |
972 | # Second Pass - generate HTML # | |
973 | ################################################## | |
974 | ||
975 | sub pass_two{ | |
976 | my($tocn) = 0; | |
977 | my($inmacro) = 0; | |
978 | my($insection) = 0; | |
979 | ||
980 | $inem = 0; | |
981 | $thischapter = 0; | |
982 | $thissection = 0; | |
983 | ||
984 | # Open the source file and get the first line | |
985 | ||
986 | open (IN, $source_file) || die "Can't open $source_file (2nd pass)\n"; | |
987 | $_ = <IN>; | |
988 | ||
989 | # Skip on till we hit the start of the first chapter, but note if we | |
a82639d2 | 990 | # pass ".set chapter -1", which is used to indicate no chapter numbering for |
495ae4b0 PH |
991 | # the first chapter (we number is 0). Keep track of whether we are in macro |
992 | # definitions or not, and when not, notice occurrences of .index, because this | |
993 | # are the "x see y" type entries. | |
994 | ||
995 | while (!/^\.chapter/) | |
996 | { | |
997 | $thischapter = -1 if /^\.set\s+chapter\s+-1/; | |
998 | $inmacro = 1 if /^\.macro/; | |
999 | $inmacro = 0 if /^\.endm/; | |
1000 | if (!$inmacro && /^\.index\s+(.*)/) | |
1001 | { | |
1002 | my($key); | |
1003 | my($s) = $1; | |
1004 | $s = &handle_text($s, 0); | |
1005 | $s =~ s/ / /g; # All spaces unsplittable | |
1006 | $key = "\L$s"; | |
1007 | $key =~ s/<[^>]+>//g; | |
1008 | $key =~ s/&#(\d+);/chr($1)/eg; | |
1009 | $cindex{$key} = $s; | |
1010 | } | |
1011 | $_ = <IN>; | |
1012 | } | |
a82639d2 | 1013 | |
495ae4b0 PH |
1014 | # Open the TOC file |
1015 | ||
1016 | open (TOC, ">$html/${file_base}_toc.html") || | |
1017 | die "Can't open $html/${file_base}_toc.html\n"; | |
1018 | ||
1019 | print TOC "<!DOCTYPE HTML PUBLIC \"-//IETF//DTD HTML//EN\">\n"; | |
1020 | print TOC "<html>\n<head>\n<title>$doctitle Contents</title>\n</head>\n" . | |
1021 | "<body bgcolor=\"#F8F8F8\" text=\"#00005A\" " . | |
1022 | "link=\"#FF6600\" alink=\"#FF9933\" vlink=\"#990000\">\n"; | |
1023 | print TOC "<h1>$doctitle</h1><hr>\n<ul>\n"; | |
1024 | ||
1025 | # Open the data file if we are not splitting at chapters | |
1026 | ||
1027 | &openout("$html/${file_base}.html") if !$chapsplit; | |
1028 | ||
1029 | # Loop for handling chapters. At the start of this loop, $_ is either EOF, | |
1030 | # or contains a .chapter line. | |
1031 | ||
a82639d2 | 1032 | $firstchapter = $thischapter + 1; |
495ae4b0 PH |
1033 | |
1034 | while ($_) | |
1035 | { | |
1036 | print TOC "</ul>\n" if $insection; | |
1037 | $insection = 0; | |
1038 | ||
1039 | $thischapter++; | |
1040 | $thissection = 0; | |
1041 | $lastwasrule = 0; | |
1042 | ||
1043 | # Start a new file if required | |
1044 | ||
1045 | if ($chapsplit) | |
1046 | { | |
1047 | &closeout("CHAP") if $thischapter != $firstchapter; | |
1048 | &openout("$html/${file_base}_$thischapter.html"); | |
1049 | } | |
1050 | ||
1051 | # Set up the chapter title. Save it for the TOC. Set up the anchor and | |
1052 | # link back to the TOC and show the title. | |
1053 | ||
1054 | $_ =~ /^\.chapter\s+(.*)/; | |
1055 | ||
1056 | my($title) = (($thischapter > 0)? "$thischapter. " : "") . &handle_text($1, 0); | |
1057 | ||
1058 | $tocn++; | |
1059 | print TOC "<li><a " . | |
1060 | "name=\"TOC$tocn\" " . | |
1061 | "href=\"$current_file#CHAP$thischapter\">$title</a></li>\n"; | |
1062 | ||
1063 | print OUT "<h1>\n"; | |
1064 | print OUT "<a name=\"CHAP$thischapter\" href=\"${file_base}_toc.html#TOC$tocn\">\n"; | |
1065 | print OUT "$title\n</a></h1>\n"; | |
1066 | ||
1067 | # Scan the contents of the chapter | |
1068 | ||
1069 | $_ = <IN>; | |
1070 | while ($_) | |
1071 | { | |
1072 | last if /^\.chapter/; | |
1073 | ||
1074 | # Handle the start of a new section, starting a new file if required | |
1075 | ||
1076 | if (/^\.section\s+(.*)/) | |
1077 | { | |
1078 | $thissection++; | |
1079 | ||
1080 | print TOC "<ul>\n" if !$insection; | |
1081 | $insection = 1; | |
1082 | ||
a82639d2 PH |
1083 | my($title) = (($thischapter > 0)? "$thischapter.$thissection " : |
1084 | "$thissection. ") . &handle_text($1, 0); | |
495ae4b0 PH |
1085 | |
1086 | if ($sectsplit) | |
1087 | { | |
1088 | &closeout("SECT"); | |
1089 | &openout("$html/${file_base}_$thissection.html"); | |
1090 | } | |
1091 | ||
1092 | $tocn++; | |
1093 | printf TOC ("<li><a " . | |
1094 | "name=\"TOC$tocn\" " . | |
1095 | "href=\"$current_file#SECT%s$thissection\">%s</a></li>\n", | |
1096 | ($thischapter > 0)? "$thischapter." : "", $title); | |
1097 | ||
1098 | &setpar(0); | |
1099 | print OUT "<h2>\n"; | |
1100 | printf OUT ("<a name=\"SECT%s$thissection\" ", | |
1101 | ($thischapter > 0)? "$thischapter." : ""); | |
1102 | print OUT "href=\"${file_base}_toc.html#TOC$tocn\">\n"; | |
1103 | print OUT "$title\n</a></h2>\n"; | |
1104 | $_ = <IN>; | |
1105 | $lastwasrule = 0; | |
1106 | } | |
1107 | ||
1108 | # Blank lines at this level are ignored | |
1109 | ||
1110 | elsif (/^\s*$/) | |
1111 | { | |
1112 | $_ = <IN>; | |
1113 | } | |
1114 | ||
1115 | # Directive and non-directive lines are handled independently, though | |
1116 | # in each case further lines may be read. Afterwards, the next line is | |
1117 | # in $_. If .em is at the start of a paragraph, treat it with the | |
1118 | # paragraph, because the matching .nem will be too. Messy! | |
1119 | ||
1120 | elsif (/^\./) | |
1121 | { | |
1122 | if (/^\.em\b/) | |
1123 | { | |
1124 | $_=<IN>; | |
1125 | if (/^\./) | |
1126 | { | |
1127 | print OUT "<font color=green>" if ! $inem; | |
1128 | $inem = 1; | |
1129 | # Used to handle it here - but that fails if it is .section. | |
a82639d2 | 1130 | # Just let the next iteration of the loop handle it. |
495ae4b0 PH |
1131 | # &handle_directive(); |
1132 | } | |
1133 | ||
1134 | else | |
1135 | { | |
1136 | $_ = ".em\n" . $_; | |
1137 | &handle_paragraph(); | |
1138 | $lastwasrule = 0; | |
1139 | $lastwasitem = 0; | |
1140 | } | |
1141 | } | |
1142 | ||
1143 | # Not .em | |
1144 | ||
1145 | else | |
1146 | { | |
1147 | &handle_directive(); | |
1148 | } | |
1149 | } | |
1150 | ||
1151 | # Not a directive | |
1152 | ||
1153 | else | |
1154 | { | |
1155 | &handle_paragraph(); | |
1156 | $lastwasrule = 0; | |
1157 | $lastwasitem = 0; | |
1158 | } | |
1159 | ||
1160 | } # Loop for each line in a chapter | |
1161 | } # Loop for each chapter | |
1162 | ||
1163 | # Close the last file, end off the TOC, and we are done. | |
1164 | ||
1165 | &closeout(""); | |
1166 | ||
1167 | print TOC "</ul>\n" if $insection; | |
1168 | ||
1169 | if (defined %cindex) | |
1170 | { | |
1171 | $cindex_tocn = ++$tocn; | |
1172 | print TOC "<li><a name=\"TOC$tocn\" ". | |
1173 | "href=\"${file_base}_cindex.html\">Concept Index</a></li>\n"; | |
1174 | } | |
1175 | ||
1176 | if (defined %oindex) | |
1177 | { | |
1178 | $oindex_tocn = ++$tocn; | |
1179 | print TOC "<li><a name=\"TOC$tocn\" ". | |
1180 | "href=\"${file_base}_oindex.html\">Option Index</a></li>\n"; | |
1181 | } | |
1182 | ||
1183 | print TOC "</ul>\n</body>\n</html>\n"; | |
1184 | close(TOC); | |
1185 | close(IN); | |
1186 | } | |
1187 | ||
1188 | ||
1189 | ||
1190 | ||
1191 | ################################################## | |
1192 | # Adjust index points # | |
1193 | ################################################## | |
1194 | ||
1195 | # Because of the way the source is written, there are often index entries | |
1196 | # that immediately follow the start of chapters and sections and the definition | |
1197 | # of "items" like "helo = verify". This gets the correct page numbers for the | |
1198 | # PostScript and PDF formats. However, for HTML we want the index anchor to be | |
1199 | # before the section heading, because browsers tend to put the index point at | |
1200 | # the top of the screen. So we re-read all the files we've just created, and | |
1201 | # move some of the index points about. This is necessary only if indexes exist. | |
1202 | # The files are small enough to be handled entirely in memory. | |
1203 | ||
1204 | sub adjust_index_points { | |
1205 | print "Adjusting index points to precede headings\n"; | |
1206 | ||
1207 | $" = ""; | |
1208 | ||
1209 | opendir(DIR, "$html") || die "Failed to opendir $html\n"; | |
1210 | while ($file = readdir(DIR)) | |
1211 | { | |
1212 | my($i); | |
1213 | next unless $file =~ /^${file_base}_\d+\.html$/; | |
1214 | ||
1215 | open(IN, "<$html/$file") || | |
1216 | die "Failed to open $html/$file (read)\n"; | |
1217 | my(@lines) = <IN>; | |
1218 | close(IN); | |
1219 | ||
1220 | for ($i = 0; $i < @lines; $i++) | |
1221 | { | |
1222 | if ($lines[$i] =~ /^<a name="IX\d+"><\/a>$/) | |
1223 | { | |
1224 | # Handle an index line that follows a heading definition. Move it back | |
1225 | # to just before the <h1> or whatever. This preserves the order of | |
1226 | # multiple index lines, not that that matters. | |
1227 | ||
1228 | if ($lines[$i-1] =~ /^<\/a><\/h(\d)>/) | |
1229 | { | |
1230 | my($j); | |
1231 | my($found) = 0; | |
1232 | for ($j = $i-2; $j > 0 && $j > $i - 10; $j--) | |
1233 | { | |
1234 | if ($lines[$j] =~ /<h$1>/) | |
1235 | { | |
1236 | $found = 1; | |
1237 | last; | |
1238 | } | |
1239 | } | |
1240 | if ($found) | |
1241 | { | |
1242 | splice(@lines, $j, 0, splice(@lines, $i, 1)); | |
1243 | } | |
1244 | } | |
1245 | ||
1246 | # Handle an index line that follows an "item". Move it back one line. | |
1247 | ||
1248 | elsif ($lines[$i-1] =~ /^<b>.*<\/b>\s*$/) | |
1249 | { | |
1250 | splice(@lines, $i-1, 0, splice(@lines, $i, 1)); | |
1251 | } | |
1252 | ||
1253 | # Handle an index line that follows a "conf" definition | |
1254 | ||
1255 | elsif ($lines[$i-1] =~ /^<i>Type:<\/i>/ && $lines[$i-2] =~ /^<h3>/) | |
1256 | { | |
1257 | splice(@lines, $i-2, 0, splice(@lines, $i, 1)); | |
1258 | } | |
1259 | ||
1260 | # Handle an index line that follows an "option" definition | |
1261 | ||
1262 | elsif ($lines[$i-1] =~ /^<h3>/) | |
1263 | { | |
1264 | splice(@lines, $i-1, 0, splice(@lines, $i, 1)); | |
1265 | } | |
1266 | } | |
1267 | } | |
1268 | ||
1269 | open(OUT, ">$html/$file") || | |
1270 | die "Failed to open $html/$file (write)\n"; | |
1271 | ||
1272 | print OUT "@lines"; | |
1273 | close OUT; | |
1274 | undef @lines; | |
1275 | } | |
1276 | } | |
1277 | ||
1278 | ||
1279 | ||
1280 | ||
1281 | ################################################## | |
1282 | # Create Index # | |
1283 | ################################################## | |
1284 | ||
1285 | sub create_index{ | |
1286 | my($hash) = $_[0]; | |
1287 | my($ifname) = $_[1]; | |
1288 | my($ititle) = $_[2]; | |
1289 | my(%indexindex); | |
1290 | ||
1291 | open(INDEX, ">$html/${file_base}_$_[1].html") || | |
1292 | die "Failed to open $html/${file_base}_$ifname\n"; | |
1293 | ||
1294 | print INDEX "<!DOCTYPE HTML PUBLIC \"-//IETF//DTD HTML//EN\">\n"; | |
1295 | print INDEX "<html>\n<head>\n<title>$doctitle $ititle</title>\n"; | |
1296 | print INDEX "<base target=\"body\">\n</head>\n"; | |
1297 | ||
1298 | print INDEX "<body bgcolor=\"#FFFFDF\" text=\"#00005A\" " . | |
1299 | "link=\"#FF6600\" alink=\"#FF9933\" vlink=\"#990000\">\n"; | |
1300 | ||
1301 | print INDEX "<h3>$ititle</h3>\n"; | |
1302 | ||
1303 | # We have to scan the keys in the hash twice; first to build the list | |
1304 | # of initial letters, and then to do the business. The first time we | |
1305 | # do not need to sort them. | |
1306 | ||
1307 | foreach $key (keys %$hash) | |
1308 | { | |
1309 | my($initial) = substr($key,0,1); | |
1310 | $initial = "\U$initial"; | |
f055f31e | 1311 | $indexindex{$initial} = 1 if $initial ge "A" && $initial le "Z"; |
495ae4b0 PH |
1312 | } |
1313 | ||
1314 | print INDEX "<p>\n"; | |
1315 | foreach $key (sort keys %indexindex) | |
1316 | { | |
1317 | print INDEX " <a href=\"#$key\" target=\"index\">$key</a>\n"; | |
1318 | } | |
1319 | print INDEX "<hr></p>\n"; | |
1320 | ||
1321 | my($letter) = ""; | |
1322 | print INDEX "<p>\n"; | |
1323 | ||
1324 | foreach $key (sort | |
f055f31e | 1325 | { |
a82639d2 PH |
1326 | my($aa) = $a; |
1327 | my($bb) = $b; | |
1328 | ||
f055f31e PH |
1329 | $aa =~ s/^\x93//; # Seems like the actual char values are |
1330 | $bb =~ s/^\x93//; # set by this time, not "“" | |
a82639d2 PH |
1331 | |
1332 | return ("\L$aa" eq "\L$bb")? ("$aa" cmp "$bb") : ("\L$aa" cmp "\L$bb"); | |
f055f31e | 1333 | } |
495ae4b0 PH |
1334 | keys %$hash) |
1335 | { | |
1336 | my($initial) = substr($key,0,1); | |
1337 | $initial = "\U$initial"; | |
f055f31e | 1338 | if ($initial ne $letter && $initial ge "A" && $initial le "Z") |
495ae4b0 | 1339 | { |
f055f31e PH |
1340 | print INDEX "<br>\n"; |
1341 | print INDEX "<a name=\"$initial\"></a>\n"; | |
1342 | print INDEX "<font size=\"+1\">\U$initial\E</font><br>\n"; | |
495ae4b0 PH |
1343 | $letter = $initial; |
1344 | } | |
1345 | print INDEX "$$hash{$key}<br>\n"; | |
1346 | } | |
1347 | ||
1348 | print INDEX "</p>\n"; | |
1349 | ||
1350 | print INDEX "</body>\n</html>\n"; | |
1351 | close(INDEX); | |
1352 | } | |
1353 | ||
1354 | ||
1355 | ||
1356 | ||
1357 | ################################################## | |
1358 | # Show usage and die # | |
1359 | ################################################## | |
1360 | ||
1361 | sub usage { | |
1362 | die "Usage: g2h [-split no|section|chapter] <source> <title>\n"; | |
1363 | } | |
1364 | ||
1365 | ||
1366 | ||
1367 | ################################################## | |
1368 | # Entry point and main program # | |
1369 | ################################################## | |
1370 | ||
1371 | ||
1372 | # Directory in which to put the new HTML files | |
1373 | ||
1374 | $html = "html"; | |
1375 | ||
1376 | # Global variables. | |
1377 | ||
1378 | %cindex = (); | |
1379 | %oindex = (); | |
1380 | ||
1381 | $chapsplit = 0; | |
1382 | $cindex_tocn = 0; | |
f055f31e | 1383 | $confuse = ""; |
495ae4b0 PH |
1384 | $file_base = ""; |
1385 | $index_count = 0; | |
1386 | $inem = 0; | |
1387 | $inpar = 0; | |
1388 | $lastwasitem = 0; | |
1389 | $lastwasrule = 0; | |
1390 | $oindex_tocn = 0; | |
1391 | $sectsplit = 0; | |
1392 | $source_file = ""; | |
1393 | $thischapter = 0; | |
1394 | $thissection = 0; | |
1395 | ||
1396 | ||
1397 | # Handle options | |
1398 | ||
1399 | my($splitset) = 0; | |
1400 | ||
1401 | while (scalar @ARGV > 0 && $ARGV[0] =~ /^-/) | |
1402 | { | |
1403 | if ($ARGV[0] eq "-split" && !$splitset) | |
1404 | { | |
1405 | $splitset = 1; | |
1406 | shift @ARGV; | |
1407 | my($type) = shift @ARGV; | |
1408 | if ($type eq "section") { $sectsplit = 1; } | |
1409 | elsif ($type eq "chapter") { $chapsplit = 1; } | |
1410 | elsif ($type eq "no" ) { $sectsplit = $chapsplit = 0; } | |
1411 | else { &usage(); } | |
1412 | } | |
1413 | else { &usage(); } | |
1414 | } | |
1415 | ||
1416 | # Get the source file and its base | |
1417 | ||
1418 | &usage() if scalar @ARGV <= 0; | |
1419 | $source_file = shift @ARGV; | |
1420 | ($file_base) = $source_file =~ /^(.*)\.src$/; | |
1421 | ||
1422 | &usage() if scalar @ARGV <= 0; | |
1423 | $doctitle = shift @ARGV; | |
1424 | ||
1425 | print "\nCreate HTML for $doctitle from $source_file\n"; | |
1426 | ||
1427 | # Remove the old HTML files | |
1428 | ||
1429 | print "Removing old HTML files\n"; | |
1430 | system("/bin/rm -rf $html/${file_base}_*.html"); | |
1431 | ||
1432 | # First pass identifies all the chapters and sections, and collects the | |
1433 | # values of the cross-referencing variables. | |
1434 | ||
1435 | print "Scanning for cross-references\n"; | |
1436 | &pass_one(); | |
1437 | ||
1438 | $maxchapter = $thischapter; # Used if chapter splitting | |
1439 | $maxsection = $thissection; # Used if section splitting | |
1440 | ||
1441 | # Second pass actually creates the HTML files. | |
1442 | ||
1443 | print "Creating the HTML files\n"; | |
1444 | &pass_two(); | |
1445 | ||
1446 | # Reprocess for moving some of the index points, if indexes were created | |
1447 | ||
1448 | &adjust_index_points() if scalar(keys %cindex) > 0 || scalar(keys %oindex) > 0; | |
1449 | ||
1450 | # Finally, we must create the option and concept indexes if any data | |
1451 | # has been collected for them. | |
1452 | ||
1453 | if (scalar(keys %cindex) > 0) | |
1454 | { | |
1455 | print "Creating concept index\n"; | |
1456 | &create_index(\%cindex, "cindex", "Concepts"); | |
1457 | } | |
1458 | ||
1459 | if (scalar(keys %oindex) > 0) | |
1460 | { | |
1461 | print "Creating option index\n"; | |
1462 | &create_index(\%oindex, "oindex", "Options"); | |
1463 | } | |
1464 | ||
1465 | # End of g2h |