Expand documentation on use of dnslists in an IPv6 environment. Bug 1369
[exim.git] / doc / doc-scripts / g2h
CommitLineData
495ae4b0 1#! /usr/bin/perl -w
495ae4b0
PH
2
3# This is a script that turns the SGCAL source of Exim's documentation into
4# HTML. It can be used for both the filter document and the main Exim
5# specification. The syntax is
6#
7# g2h [-split no|section|chapter] <source file> <title>
8#
9# Previously, -split section was used for the filter document, and -split
10# chapter for the main specification. However, the filter document has gained
11# some chapters, so they are both split by chapter now. Only one -split can be
12# specified.
13#
14# A number of assumptions about the style of the input markup are made.
15#
16# The HTML is written into the directory html/ using the source file base
17# name as its base.
18
19# Written by Philip Hazel
20# Starting 21-Dec-2001
21# Last modified 26-Nov-2003
22
23#############################################################################
24
25
26
27##################################################
28# Open an output file #
29##################################################
30
31sub openout {
32open (OUT, ">$_[0]") || die "Can't open $_[0]\n";
33
34# Boilerplate
35
36print OUT "<!DOCTYPE HTML PUBLIC \"-//IETF//DTD HTML//EN\">\n";
37
38print OUT "<html>\n<head>\n<title>$doctitle" .
39 (($thischapter > 0)? " chapter $thischapter" : "") .
40 (($thissection > 0)? " section $thissection" : "") .
41 "</title>\n</head>\n" .
42 "<body bgcolor=\"#F8F8F8\" text=\"#00005A\" " .
43 "link=\"#FF6600\" alink=\"#FF9933\" vlink=\"#990000\">\n";
44
45# Forward/backward links when chapter splitting
46
47if ($chapsplit)
48 {
49 print OUT "<font size=2>\n";
50 printf OUT ("<a href=\"${file_base}_%s.html\">Previous</a>&nbsp;&nbsp;\n",
51 $thischapter - 1) if $thischapter > 1;
52 printf OUT ("<a href=\"${file_base}_%s.html\">Next</a>&nbsp;&nbsp;\n",
53 $thischapter + 1) if $thischapter < $maxchapter;
54 print OUT "<a href=\"${file_base}_toc.html\">Contents</a>\n";
55 print OUT "&nbsp;" x 6, "($doctitle)\n</font><hr>\n";
56 }
57
58# Forward/backward links when section splitting
59
60elsif ($sectsplit)
61 {
62 print OUT "<font size=2>\n";
63 printf OUT ("<a href=\"${file_base}_%s.html\">Previous</a>&nbsp;&nbsp;\n",
64 $thissection - 1) if $thissection > 1;
65 printf OUT ("<a href=\"${file_base}_%s.html\">Next</a>&nbsp;&nbsp;\n",
66 $thissection + 1) if $thissection < $maxsection;
67 print OUT "<a href=\"${file_base}_toc.html\">Contents</a>\n";
68 print OUT "&nbsp;" x 6, "($doctitle)\n</font><hr>\n";
69 }
70
71# Save the final component of the current file name (for TOC creation)
72
73$_[0] =~ /^(?:.*)\/([^\/]+)$/;
74$current_file = $1;
75}
76
77
78
79##################################################
80# Close an output file #
81##################################################
82
83# The first argument is one of:
84#
85# "CHAP" a chapter is ending
86# "SECT" a section is ending
87# "" the whole thing is ending
88#
89# In the first two cases $thischapter and $thissection contain the new chapter
90# and section numbers, respectively. In the third case, we can deduce what is
91# ending from the flags. The variables contain the current values.
92
93sub closeout {
94my($s) = $_[0];
95
96print OUT "<hr>\n" if !$lastwasrule;
97&setpar(0);
98
99if ($s eq "CHAP")
100 {
101 print OUT "<font size=2>\n";
102 printf OUT ("<a href=\"${file_base}_%s.html\">Previous</a>&nbsp;&nbsp;",
103 $thischapter - 2) if ($thischapter > 2);
104 print OUT "<a href=\"${file_base}_$thischapter.html\">Next</a>&nbsp;&nbsp;";
105 print OUT "<a href=\"${file_base}_toc.html\">Contents</a>\n";
106 print OUT "&nbsp;" x 6, "($doctitle)\n</font>\n";
107 }
108
109elsif ($s eq "SECT")
110 {
111 print OUT "<font size=2>\n";
112 printf OUT ("<a href=\"${file_base}_%s.html\">Previous</a>&nbsp;&nbsp;",
113 $thissection - 2) if ($thissection > 2);
114 print OUT "<a href=\"${file_base}_$thissection.html\">Next</a>&nbsp;&nbsp;";
115 print OUT "<a href=\"${file_base}_toc.html\">Contents</a>\n";
116 print OUT "&nbsp;" x 6, "($doctitle)\n</font>\n";
117 }
118
119else
120 {
121 if ($chapsplit)
122 {
123 print OUT "<font size=2>\n";
124 printf OUT ("<a href=\"${file_base}_%s.html\">Previous</a>&nbsp;&nbsp;",
125 $thischapter - 1) if ($thischapter > 1);
126 print OUT "<a href=\"${file_base}_toc.html\">Contents</a>\n";
127 print OUT "&nbsp;" x 6, "($doctitle)\n</font>\n";
128 }
129 elsif ($sectsplit)
130 {
131 print OUT "<font size=2>\n";
132 printf OUT ("<a href=\"${file_base}_%s.html\">Previous</a>&nbsp;&nbsp;",
133 $thissection - 1) if ($thissection > 1);
134 print OUT "<a href=\"${file_base}_toc.html\">Contents</a>\n";
135 print OUT "&nbsp;" x 6, "($doctitle)\n</font>\n";
136 }
137 }
138
139print OUT "</body>\n</html>\n";
140close(OUT);
141}
142
143
144
145##################################################
146# Handle an index line #
147##################################################
148
149# This function returns an empty string so that it can be called as part
150# of an s operator when handling index items within paragraphs. The two
151# arguments are:
152#
153# the text to index, already converted to HTML
154# 1 for the concept index, 0 for the options index
155
156sub handle_index {
157my($text) = $_[0];
158my($hash) = $_[1]? \%cindex : \%oindex;
159my ($key,$ref);
160
161# Up the index count, and compute the reference to the file and the
162# label within it.
163
164$index_count++;
165$ref = $chapsplit?
166 "${file_base}_$thischapter.html#IX$index_count"
167 : $sectsplit?
168 "${file_base}_$thissection.html#IX$index_count"
169 :
170 "#IX$index_count";
171
172# Create the index key, which consists of the text with all the HTML
173# coding and any leading quotation marks removed. Turn the primary/secondary
174# splitting string "||" into ":".
175
176$text =~ s/\|\|/:/g;
177
178$key = "$text";
179$key =~ s/<[^>]+>//g;
180$key =~ s/&#(\d+);/chr($1)/eg;
181$key =~ s/^`+//;
f055f31e 182$key =~ s/^"//;
495ae4b0
PH
183
184# Turn all spaces in the text into &nbsp; so that they don't ever split.
185# However, there may be spaces in the HTML that already exists in the
186# text, so we have to avoid changing spaces inside <>.
187
188$text =~ s/ (?=[^<>]*(?:<|$))/&nbsp;/g;
189
190# If this is the first encounter with this index key, we create a
191# straightforward reference.
192
193if (!defined $$hash{$key})
194 {
195 $$hash{$key} = "<a href=\"$ref\">$text</a>";
196 }
197
198# For the second and subsequent encounters, add "[2]" etc. to the
199# index text. We find out the number by counting occurrences of "<a"
200# in the existing string.
201
202else
203 {
204 my($number) = 1;
205 $number++ while $$hash{$key} =~ /<a/g;
206 $$hash{$key} .= " &nbsp;<a href=\"$ref\">[$number]</a>";
207 }
208
209# Place the name in the current output
210
211print OUT "<a name=\"IX$index_count\"></a>\n";
212return "";
213}
214
215
216
217##################################################
218# Handle emphasis bars #
219##################################################
220
221# Set colour green for text marked with "emphasis bars", keeping
222# track in case the matching isn't perfect.
223
224sub setinem {
225if ($_[0])
226 {
227 return "" if $inem;
228 $inem = 1;
229 return "<font color=green>\n";
230 }
231else
232 {
233 return "" if !$inem;
234 $inem = 0;
235 return "</font>\n";
236 }
237}
238
239
240
241##################################################
242# Convert marked-up text #
243##################################################
244
245# This function converts text from SGCAL markup to HTML markup, with a couple
246# of exceptions:
247#
248# 1. We don't touch $t because that is handled by the .display code.
249#
250# 2. The text may contain embedded .index, .em, and .nem directives. We
251# handle .em and .nem, but leave .index because it must be done during
252# paragraph outputting.
253#
254# In a non-"rm" display, we turn $rm{ into cancelling of <tt>. Otherwise
255# it is ignored - in practice it is only used in that special case.
256#
257# The order in which things are done in this function is highly sensitive!
258
259sub handle_text {
260my($s) = $_[0];
261my($rmspecial) = $_[1];
262
263# Escape all & characters (they aren't involved in markup) but for the moment
264# use &+ instead of &# so that we can handle # characters in the text.
265
266$s =~ s/&/&+038;/g;
267
268# Turn SGCAL literals into HTML literals that don't look like SGCAL
269# markup, so won't be touched by what follows. Again, use + instead of #.
270
271$s =~ s/@@/&+064;/g;
272$s =~ s/@([^@])/"&+".sprintf("%0.3d",ord($1)).";"/eg;
273
274# Now turn any #s that are markup into spaces, and convert the previously
275# created literals to the correct form.
276
277$s =~ s/#/&nbsp;/g;
278$s =~ s/&\+(\d+);/&#$1;/g;
279
280# Some simple markup that doesn't involve argument text.
281
282$s =~ s/\$~//g; # turn $~ into nothing
283$s =~ s/__/_/g; # turn __ into _
284$s =~ s/--(?=$|\s|\d)/&#150;/mg; # turn -- into endash in text or number range
285$s =~ s/\(c\)/&copy;/g; # turn (c) into copyright symbol
286
287# Use double quotes
288
289# $s =~ s/`([^']+)'/``$1''/g;
290
291$s =~ s/`([^']+)'/&#147;$1&#148;/g;
292
293# This is a fudge for some specific usages of $<; can't just do a global
294# is it occurs in things like "$<variable name>" as well.
295
296$s =~ s/(\d)\$<-/$1-/g; # turn 0$<- into 0-
297$s =~ s/\$<//g; # other $< is ignored
298
299# Turn <<...>> into equivalent SGCAL markup that doesn't involve the use of
300# < and >, and then escape the remaining < and > characters in the text.
301
302$s =~ s/<<([^>]*?)>>/<\$it{$1}>/g; # turn <<xxx>> into <$it{xxx}>
303$s =~ s/</&#060;/g;
304$s =~ s/>/&#062;/g;
305
306# Other markup...
307
308$s =~ s/\$sm\{//g; # turn $sm{ into nothing
309$s =~ s/\$smc\{//g; # turn $smc{ into nothing
310$s =~ s/\$smi\{//g; # turn $smi{ into nothing
311
312$s =~ s/\$tt\{([^\}]*?)\}/<tt>$1<\/tt>/g; # turn $tt{xxx} into <tt>xxx</tt>
313$s =~ s/\$it\{([^\}]*?)\}/<em>$1<\/em>/g; # turn $it{xxx} into <em>xxx</em>
314$s =~ s/\$bf\{([^\}]*?)\}/<b>$1<\/b>/g; # turn $bf{xxx} into <b>xxx</b>
315
316$s =~ s/\$cb\{([^\}]*?)\}/<tt><b>$1<\/b><\/tt>/g; # turn $cb{xxx} into
317 # <tt><b>xxx</b></tt>
318
319$s =~ s/\\\\([^\\]*?)\\\\/<font size=-1>$1<\/font>/g; # turn \\xxx\\ into
320 # small font
321$s =~ s/\\\?([^?]*?)\?\\/<a href="$1">$1<\/a>/g; # turn \?URL?\ into URL
322
323$s =~ s/\\\(([^)]*?)\)\\/<i>$1<\/i>/g; # turn \(xxx)\ into <i>xxx</i>
324$s =~ s/\\\"([^\"]*?)\"\\/<tt>$1<\/tt>/g; # turn \"xxx"\ into <tt>xxx</tt>
325
326
327$s =~ s/\\\$([^\$]*?)\$\\/<tt>\$$1<\/tt>/g; # turn \$xxx$\ into <tt>$xxx</tt>
328$s =~ s/\\\-([^\\]*?)\-\\/<i>-$1<\/i>/g; # turn \-xxx-\ into -italic
329$s =~ s/\\\*\*([^*]*?)\*\*\\/<b>$1<\/b>/g; # turn \**xxx**\ into <b>xxx</b>
330$s =~ s/\\\*([^*]*?)\*\\/<i>$1<\/i>/g; # turn \*xxx*\ into italic
331$s =~ s/\\%([^*]*?)%\\/<b>$1<\/b>/g; # turn \%xxx%\ into bold
332$s =~ s/\\([^\\]*?)\\/<tt>$1<\/tt>/g; # turn \xxx\ into <tt>xxx</tt>
333$s =~ s/::([^\$]*?)::/<i>$1:<\/i>/g; # turn ::xxx:: into italic:
334$s =~ s/\$\*\$/\*/g; # turn $*$ into *
335
336# Handle $rm{...}
337
338if ($rmspecial)
339 {
340 $s =~ s/\$rm\{([^\}]*?)\}/<\/tt>$1<tt>/g; # turn $rm{xxx} into </tt>xxx<tt>
341 }
342else
343 {
344 $s =~ s/\$rm\{([^\}]*?)\}/$1/g; # turn $rm{xxx} into xxx
345 }
346
347# There is one case where the terminating } of an escape sequence is
348# in another paragraph - this follows $sm{ - it can be fixed by
349# removing any stray } in a paragraph that contains no { chars.
350
351$s =~ s/\}//g if !/\{/;
352
353# Remove any null flags ($$)
354
355$s =~ s/\$\$//g;
356
357# If the paragraph starts with $c\b, remove it.
358
359$s =~ s/^\$c\b//;
360
361# If the paragraph starts with $e\b, indent it slightly.
362
363$s =~ s/^\$e\b/&nbsp;&nbsp;/;
364
365# Handle .em, and .nem directives that occur within the paragraph
366
367$s =~ s/\.em\s*\n/&setinem(1)/eg;
368$s =~ s/\.nem\s*\n/&setinem(0)/eg;
369
370# Explicitly included HTML
371
372$s =~ s/\[\(([^)]+)\)\]/<$1>/g; # turn [(...)] into <...>
373
374# Finally, do the substitutions and return the modified text.
375
376$s =~ s/~~(\w+)/$var_value{$1}/eg;
377
378return $s;
379}
380
381
382
383##################################################
384# Start/end a paragraph #
385##################################################
386
387# We want to leave paragraphs unterminated until we know that a horizontal
388# rule does not follow, to avoid getting space inserted before the rule,
389# which doesn't look good. So we have this function to help control things.
390# If the argument is 1 we are starting a new paragraph; if it is 0 we want
391# to force the ending of any incomplete paragraph.
392
393sub setpar {
394if ($inpar)
395 {
396 print OUT "</p>\n";
397 $inpar = 0;
398 }
399if ($_[0])
400 {
401 print OUT "<p>\n";
402 $inpar = 1;
403 }
404}
405
406
407
408##################################################
409# Handle a "paragraph" #
410##################################################
411
412# Read a paragraph of text, which may contain many lines and may contain
413# .index, .em, and .nem directives within it. We may also encounter
414# ".if ~~html" within paragraphs. Process those directives,
415# convert the markup, and output the rest as an HTML paragraph.
416
417
418sub handle_paragraph{
419my($par) = $_;
420my($htmlcond) = 0;
421while(<IN>)
422 {
423 if (/^\.if\s+~~html\b/)
424 {
425 $htmlcond = 1;
426 $par =~ s/\s+$//; # lose unwanted whitespace and newlines
427 next;
428 }
429 elsif ($htmlcond && /^\.else\b/)
430 {
431 while (<IN>) { last if /^\.fi\b/; }
432 $htmlcond = 0;
433 next;
434 }
435 elsif ($htmlcond && /^\.fi\b/)
436 {
437 $htmlcond = 0;
438 next;
439 }
440
441 last if /^\s*$/ || (/^\./ && !/^\.index\b/ && !/^\.em\b/ && !/^\.nem\b/);
442 $par .= $_;
443 }
444$par = &handle_text($par, 0);
445
446# We can't handle .index until this point, when we do it just before
447# outputting the paragraph.
448
449if ($par !~ /^\s*$/)
450 {
451 &setpar(1);
452 $par =~ s/\.index\s+([^\n]+)\n/&handle_index($1, 1)/eg;
453 print OUT "$par";
454 }
455}
456
457
458
459##################################################
460# Handle a non-paragraph directive #
461##################################################
462
463# The directives .index, .em, and .nem can also appear within paragraphs,
464# and are then handled within the handle_paragraph() code.
465
466sub handle_directive{
467my($new_lastwasitem) = 0;
468
469$lastwasrule = 0;
470
471if (/^\.r?set\b/ || /^\.(?:\s|$)/) {} # ignore .(r)set and comments
472
473elsif (/^\.justify\b/) {} # and .justify
474
475elsif (/^\.newline\b/) { print OUT "<br>\n"; }
476
477elsif (/^\.blank\b/ || /^\.space\b/) { print OUT "<br>\n"; }
478
479elsif (/^\.rule\b/) { &setpar(0); print OUT "<hr>\n"; $lastwasrule = 1; }
480
481elsif (/^\.index\s+(.*)/) { &handle_index(&handle_text($1), 1); }
482
483# Emphasis is handled by colour
484
485elsif (/^\.em\b/)
486 {
487 &setpar(0);
488 print OUT "<font color=green>" if ! $inem;
489 $inem = 1;
490 }
491
492elsif (/^\.nem\b/)
493 {
494 &setpar(0);
495 print OUT "</font>" if $inem;
496 $inem = 0;
497 }
498
499# Ignore tab setting stuff - we use tables instead.
500
501elsif (/^\.tabs(?:et)?\b/) {}
502
503# .tempindent is used only to align some of the expansion stuff nicely;
504# just ignore it. It is used in conjunction with .push/.pop.
505
506elsif (/^\.(tempindent|push|pop)\b/) {}
507
508# There are some instances of .if ~~sys.fancy in the source. Some of those
509# that are not inside displays are two-part things, in which case we just keep
510# the non-fancy part. For diagrams, however, they are in three parts:
511#
512# .if ~~sys.fancy
513# <aspic drawing stuff for PostScript and PDF>
514# .elif !~~html
515# <ascii art for txt and Texinfo>
516# .else
517# <HTML instructions for including a gif>
518# .fi
519#
520# In this case, we skip to the third part.
521
522elsif (/^\.if\s+~~sys\.fancy/ || /^\.else\b/)
523 {
524 while (<IN>)
525 { last if /^\.else\b/ || /^\.elif\s+!\s*~~html/ || /^\.fi\b/; }
526
527 if (/^\.elif\b/)
528 {
529 while (<IN>) { last if /^\.else\b/ || /^\.fi\b/; }
530 }
531 }
532
533# Similarly, for .if !~~sys.fancy, take the non-fancy part.
534
535elsif (/^\.if\s+!\s*~~sys.fancy/) {}
536
537# There are some explicit tests for ~~html for direct HTML inclusions
538
539elsif (/^\.if\s+~~html\b/) {}
540
541# There are occasional requirements to do things differently for Texinfo/HTML
542# and PS/txt versions. The latter are produced by SGCAL, so that's what the
543# flag is called.
544
545elsif (/\.if\s+~~sgcal/)
546 {
547 while (<IN>) { last if /\.else\b/ || /\.fi\b/; }
548 }
549
550# Also there is a texinfo flag
551
552elsif (/^\.if\s+~~texinfo\b/)
553 {
554 while (<IN>)
555 { last if /^\.else\b/ || /^\.elif\s+!\s*~~html/ || /^\.fi\b/; }
556 }
557
558# Ignore any other .if, .else, or .fi directives
559
560elsif (/^\.if\b/ || /^\.fi\b/ || /^\.else\b/) {}
561
562# Ignore .indent
563
564elsif (/^\.indent\b/) {}
565
566# Various flavours of numberpars map to corresponding list types.
567
568elsif (/^\.numberpars\b/)
569 {
570 $rest = $';
571 &setpar(0);
572
573 if ($rest =~ /(?:\$\.|\" \")/)
574 {
575 unshift @endlist, "ul";
576 unshift @listtype, "";
577 print OUT "<ul>\n<li>";
578 }
579 else
580 {
581 $nptype = ($rest =~ /roman/)? "a" : "1";
582 unshift @endlist, "ol";
583 unshift @listtype, " TYPE=\"$nptype\"";
584 print OUT "<ol>\n<li$listtype[0]>";
585 }
586 }
587
588elsif (/^\.nextp\b/)
589 {
590 &setpar(0);
591 print OUT "</li>\n<li$listtype[0]>";
592 }
593
594elsif (/^\.endp\b/)
595 {
596 &setpar(0);
597 print OUT "</li>\n</$endlist[0]>\n";
598 shift @listtype;
599 shift @endlist;
600 }
601
602# .display asis can use <pre> which uses a typewriter font.
603# Otherwise, we have to do our own line breaking. Turn tabbed lines
604# into an HTML table. There will always be a .tabs line first.
605
606elsif (/^\.display\b/)
607 {
608 my($intable) = 0;
609 my($asis) = /asis/;
610 my($rm) = /rm/;
611 my($eol,$indent);
612
613 # For non asis displays, start a paragraph, and set up to put an
614 # explicit break after every line.
615
616 if (!$asis)
617 {
618 &setpar(1);
619 $eol = "<br>";
620 $indent = "<tt>&nbsp;&nbsp;</tt>";
621 }
622
623 # For asis displays, use <pre> and no explicit breaks
624
625 else
626 {
627 print OUT "<pre>\n";
628 $eol = "";
629 $indent = "&nbsp;&nbsp;";
630 }
631
632 # Now read through until we hit .endd (or EOF, but that shouldn't happen)
633 # and process the lines in the display.
634
635 while (<IN>)
636 {
637 last if /^\.endd\b/;
638
639 # The presence of .tabs[et] starts a table
640
641 if (/^\.tabs/)
642 {
643 $intable = 1;
644 print OUT "<table cellspacing=0 cellpadding=0>\n";
645 }
646
647 # Some displays have an indent setting - ignore
648
649 elsif (/^\.indent\b/) {}
650
651 # Some displays have .blank inside them
652
653 elsif (/^\.blank\b/)
654 {
655 print OUT "<br>\n";
656 }
a82639d2 657
495ae4b0 658 # Some displays have emphasis inside them
a82639d2 659
495ae4b0
PH
660 elsif (/^\.em\b/)
661 {
662 print OUT "<font color=green>" if ! $inem;
663 $inem = 1;
a82639d2 664 }
495ae4b0
PH
665
666 elsif (/^\.nem\b/)
667 {
668 print OUT "</font>" if $inem;
669 $inem = 0;
a82639d2 670 }
495ae4b0
PH
671
672 # There are occasional instances of .if [!]~~sys.fancy inside displays.
673 # In both cases we want the non-fancy alternative. (The only thing that
674 # matters in practice is noticing .tabs[et] actually.) Assume the syntax
675 # is valid.
676
677 elsif (/^\.if\s+~~sys.fancy/ || /^\.else\b/)
678 {
679 while (<IN>)
680 {
681 last if /^\.fi\b/ || /^\.else/;
682 }
683 }
684
685 elsif (/^\.if\s+!\s*~~sys.fancy/) {}
686
687 elsif (/^\.fi\b/) {}
688
689 # Ignore .newline and .linelength
690
691 elsif (/^\.newline\b/ || /^\.linelength\b/) {}
a82639d2 692
495ae4b0 693 # Ignore comments
a82639d2
PH
694
695 elsif (/^\.(\s|$)/) {}
495ae4b0
PH
696
697 # There shouldn't be any other directives inside displays
698
699 elsif (/^\./)
700 {
701 print "*** Ignored directive inside .display: $_";
702 }
703
704 # Handle a data line within a display. If it's an asis display, the only
705 # conversion is to escape the HTML characters. Otherwise, process the
706 # SGCAL markup.
707
708 else
709 {
710 chomp;
711 if ($asis)
712 {
713 s/&/&#038;/g;
714 s/</&#060;/g;
715 s/>/&#062;/g;
716 }
717 else
718 {
719 $_ = &handle_text($_, !$rm);
720 $_ = "<tt>$_</tt>" if !$rm && $_ ne "";
721 }
722
723 # In a table, break fields at $t. For non-rm we must break the
724 # <tt> group as well.
725
726 if ($intable)
727 {
728 if ($rm)
729 {
730 s/\s*\$t\s*/&nbsp;&nbsp;<\/td><td>/g;
731 }
732 else
733 {
734 s/\s*\$t\s*/&nbsp;&nbsp;<\/tt><\/td><td><tt>/g;
735 }
736 s/<tt><\/tt>//g;
737 print OUT "<tr><td>&nbsp;&nbsp;$_</td></tr>\n";
738 }
739
740 # Otherwise, output straight, with <br> for non asis displays
741
742 else
743 {
744 s/<tt><\/tt>//g;
745 print OUT "$indent$_$eol\n";
746 }
747 }
748 } # Loop for display contents
749
750 # Finish off the table and the <pre> - leave a paragraph open
751
752 print OUT "</table>\n" if $intable;
753 print OUT "</pre>\n" if $asis;
754 }
755
756# Handle configuration option definitions
757
a82639d2 758elsif (/^\.startconf\s+(.*)/)
f055f31e 759 {
a82639d2 760 $confuse = &handle_text($1);
f055f31e 761 }
495ae4b0
PH
762
763elsif (/^\.conf\b/)
764 {
765 my($option, $type, $default) =
766 /^\.conf\s+(\S+)\s+("(?:[^"]|"")+"|\S+)\s+("(?:[^"]|"")+"|.*)/;
767
768 $option =~ s/\@_/_/g; # Underscore will be quoted in option name
769
770 # If $type ends with $**$, add ",expanded" as there doesn't seem to be
771 # a dagger character generally available.
772
773 $type =~ s/^"([^"]+)"/$1/;
774 $type =~ s/\$\*\*\$/, expanded/;
775
776 # Default may be quoted, and it may also have quotes that are required,
777 # if it is a string.
778
779 $default =~ s/^"(.*)"$/$1/;
780 $default =~ s/""/"/g;
781 $default = &handle_text($default, 0);
782
783 print OUT "<hr>";
784 &setpar(0);
785 &handle_index($option, 0);
786 print OUT "<h3>$option</h3>\n" .
a82639d2 787 "<i>Use:</i>&nbsp; $confuse<br>" .
495ae4b0
PH
788 "<i>Type:</i>&nbsp; $type<br><i>Default:</i>&nbsp; $default<br>\n";
789 }
790
791elsif (/^\.endconf\b/)
792 {
793 print OUT "<hr><br>\n";
794 }
795
796
797# Handle "items" - used for expansion items and the like. We force the
798# item text into bold, and put a rule between items.
799
800elsif (/^\.startitems\b/) {}
801
802elsif (/^\.item\s+(.*)/)
803 {
804 my($arg) = $1;
805 chomp($arg);
806 $arg =~ s/^"(.*)"$/$1/;
807 $arg = &handle_text($arg, 0);
808
809 # If there are two .items in a row, we don't want to put in the
810 # separator line or start a new paragraph.
811
812 if ($lastwasitem)
813 {
814 print OUT "<br>";
815 }
816 else
817 {
818 print OUT "<hr>";
819 &setpar(1);
820 }
821 print OUT "<b>$arg</b>\n";
822 $new_lastwasitem = 1;
823 }
824
825elsif (/^\.enditems\b/)
826 {
827 print OUT "<hr><br>\n";
828 }
829
830
831# Handle command line option items
832
833elsif (/^\.startoptions\b/) {}
834
835elsif (/^\.option\s+(.*)/)
836 {
837 my($arg) = $1;
f055f31e 838 $arg =~ s/"([^"]*)"/$1/g;
495ae4b0
PH
839
840 print OUT "<hr>";
841 &setpar(0);
842
843 # For indexing, we want to take up to the first # or < in the line,
844 # before processing.
845
846 my($name) = $arg =~ /^([^#<]+)/;
847 $name = &handle_text($name, 0);
848 &handle_index("-$name", 0);
849
850 # Output as heading, after the index
851
852 $arg = &handle_text($arg, 0);
853 print OUT "<h3>-$arg</h3>\n";
854 }
855
856elsif (/^\.endoptions\b/)
857 {
858 print OUT "<hr><br>\n";
859 }
860
861# Found an SGCAL directive that isn't dealt with. Oh dear.
862
863else
864 {
865 print "*** Unexpected SGCAL directive: line $. ignored:\n";
866 print "$_\n";
867 }
868
869# Remember if last was a .item, and read the next line
870
871$lastwasitem = $new_lastwasitem;
872$_ = <IN>;
873}
874
875
876
877##################################################
878# First Pass - collect references #
879##################################################
880
881sub pass_one{
882$thischapter = 0;
883
884open (IN, $source_file) || die "Can't open $source_file (first pass)\n";
885$_ = <IN>;
886
887# At the start of the specification text, there are some textual replacement
f055f31e
PH
888# definitions. They set values, but not cross-references. They may be preceded
889# by comments.
890
891$_ = <IN> while (/^\.(\s|$)/);
495ae4b0
PH
892
893while (/^\.r?set\s+(\S+)\s+"?([^"]+)\"?\s*$/)
894 {
895 $var_value{$1} = $2;
896 $_ = <IN>;
897 }
898
899# Now skip on till we hit the start of the first chapter. It will be numbered
900# 0 if we hit ".set chapter -1". There is only ever one unnumbered chapter.
901
902while (!/^\.chapter/)
903 {
904 $thischapter = -1 if /^\.set\s+chapter\s+-1/;
905 $_ = <IN>;
906 }
907
908# Loop for handling chapters
909
910while ($_)
911 {
912 $thischapter++;
913 $thissection = 0;
914
915 # Scan through chapter, setting up cross-references to the chapter
916 # and to the sections within it.
917
918 while (<IN>)
919 {
920 last if /^\.chapter/;
921 chomp;
922
923 if (/^\.section/)
924 {
925 $thissection++;
926 next;
927 }
928
929 # Handle .(r)set directives.
930
931 if (/^\.r?set\s+(\S+)\s+"?([^"]+)\"?\s*$/ && $1 ne "runningfoot")
932 {
933 my($key,$value) = ($1,$2);
934 $value =~ s/~~chapter/$thischapter/e;
935 $value =~ s/~~section/$thissection/e;
936
937 # Only one of $chapsplit or $sectsplit can be set.
938
939 if ($key =~ /^CHAP/)
940 {
941 $value = $chapsplit?
942 "<a href=\"${file_base}_$thischapter.html\">$value</a>"
943 :
944 "<a href=\"#CHAP$thischapter\">$value</a>";
945 }
946
947 elsif ($key =~ /^SECT/)
948 {
949 $value = $chapsplit?
950 "<a href=\"${file_base}_$thischapter.html" .
951 "#SECT$thischapter.$thissection\">$value</a>"
952 :
953 $sectsplit? "<a href=\"${file_base}_$thissection.html\">$value</a>"
954 :
955 "<a href=\"#SECT$thischapter.$thissection\">$value</a>";
956 }
957
958 $var_value{$key} = $value;
959 }
960 }
961 }
962
963close(IN);
964}
965
966
967
968
969
970##################################################
971# Second Pass - generate HTML #
972##################################################
973
974sub pass_two{
975my($tocn) = 0;
976my($inmacro) = 0;
977my($insection) = 0;
978
979$inem = 0;
980$thischapter = 0;
981$thissection = 0;
982
983# Open the source file and get the first line
984
985open (IN, $source_file) || die "Can't open $source_file (2nd pass)\n";
986$_ = <IN>;
987
988# Skip on till we hit the start of the first chapter, but note if we
a82639d2 989# pass ".set chapter -1", which is used to indicate no chapter numbering for
495ae4b0
PH
990# the first chapter (we number is 0). Keep track of whether we are in macro
991# definitions or not, and when not, notice occurrences of .index, because this
992# are the "x see y" type entries.
993
994while (!/^\.chapter/)
995 {
996 $thischapter = -1 if /^\.set\s+chapter\s+-1/;
997 $inmacro = 1 if /^\.macro/;
998 $inmacro = 0 if /^\.endm/;
999 if (!$inmacro && /^\.index\s+(.*)/)
1000 {
1001 my($key);
1002 my($s) = $1;
1003 $s = &handle_text($s, 0);
1004 $s =~ s/ /&nbsp;/g; # All spaces unsplittable
1005 $key = "\L$s";
1006 $key =~ s/<[^>]+>//g;
1007 $key =~ s/&#(\d+);/chr($1)/eg;
1008 $cindex{$key} = $s;
1009 }
1010 $_ = <IN>;
1011 }
a82639d2 1012
495ae4b0
PH
1013# Open the TOC file
1014
1015open (TOC, ">$html/${file_base}_toc.html") ||
1016 die "Can't open $html/${file_base}_toc.html\n";
1017
1018print TOC "<!DOCTYPE HTML PUBLIC \"-//IETF//DTD HTML//EN\">\n";
1019print TOC "<html>\n<head>\n<title>$doctitle Contents</title>\n</head>\n" .
1020 "<body bgcolor=\"#F8F8F8\" text=\"#00005A\" " .
1021 "link=\"#FF6600\" alink=\"#FF9933\" vlink=\"#990000\">\n";
1022print TOC "<h1>$doctitle</h1><hr>\n<ul>\n";
1023
1024# Open the data file if we are not splitting at chapters
1025
1026&openout("$html/${file_base}.html") if !$chapsplit;
1027
1028# Loop for handling chapters. At the start of this loop, $_ is either EOF,
1029# or contains a .chapter line.
1030
a82639d2 1031$firstchapter = $thischapter + 1;
495ae4b0
PH
1032
1033while ($_)
1034 {
1035 print TOC "</ul>\n" if $insection;
1036 $insection = 0;
1037
1038 $thischapter++;
1039 $thissection = 0;
1040 $lastwasrule = 0;
1041
1042 # Start a new file if required
1043
1044 if ($chapsplit)
1045 {
1046 &closeout("CHAP") if $thischapter != $firstchapter;
1047 &openout("$html/${file_base}_$thischapter.html");
1048 }
1049
1050 # Set up the chapter title. Save it for the TOC. Set up the anchor and
1051 # link back to the TOC and show the title.
1052
1053 $_ =~ /^\.chapter\s+(.*)/;
1054
1055 my($title) = (($thischapter > 0)? "$thischapter. " : "") . &handle_text($1, 0);
1056
1057 $tocn++;
1058 print TOC "<li><a " .
1059 "name=\"TOC$tocn\" " .
1060 "href=\"$current_file#CHAP$thischapter\">$title</a></li>\n";
1061
1062 print OUT "<h1>\n";
1063 print OUT "<a name=\"CHAP$thischapter\" href=\"${file_base}_toc.html#TOC$tocn\">\n";
1064 print OUT "$title\n</a></h1>\n";
1065
1066 # Scan the contents of the chapter
1067
1068 $_ = <IN>;
1069 while ($_)
1070 {
1071 last if /^\.chapter/;
1072
1073 # Handle the start of a new section, starting a new file if required
1074
1075 if (/^\.section\s+(.*)/)
1076 {
1077 $thissection++;
1078
1079 print TOC "<ul>\n" if !$insection;
1080 $insection = 1;
1081
a82639d2
PH
1082 my($title) = (($thischapter > 0)? "$thischapter.$thissection " :
1083 "$thissection. ") . &handle_text($1, 0);
495ae4b0
PH
1084
1085 if ($sectsplit)
1086 {
1087 &closeout("SECT");
1088 &openout("$html/${file_base}_$thissection.html");
1089 }
1090
1091 $tocn++;
1092 printf TOC ("<li><a " .
1093 "name=\"TOC$tocn\" " .
1094 "href=\"$current_file#SECT%s$thissection\">%s</a></li>\n",
1095 ($thischapter > 0)? "$thischapter." : "", $title);
1096
1097 &setpar(0);
1098 print OUT "<h2>\n";
1099 printf OUT ("<a name=\"SECT%s$thissection\" ",
1100 ($thischapter > 0)? "$thischapter." : "");
1101 print OUT "href=\"${file_base}_toc.html#TOC$tocn\">\n";
1102 print OUT "$title\n</a></h2>\n";
1103 $_ = <IN>;
1104 $lastwasrule = 0;
1105 }
1106
1107 # Blank lines at this level are ignored
1108
1109 elsif (/^\s*$/)
1110 {
1111 $_ = <IN>;
1112 }
1113
1114 # Directive and non-directive lines are handled independently, though
1115 # in each case further lines may be read. Afterwards, the next line is
1116 # in $_. If .em is at the start of a paragraph, treat it with the
1117 # paragraph, because the matching .nem will be too. Messy!
1118
1119 elsif (/^\./)
1120 {
1121 if (/^\.em\b/)
1122 {
1123 $_=<IN>;
1124 if (/^\./)
1125 {
1126 print OUT "<font color=green>" if ! $inem;
1127 $inem = 1;
1128 # Used to handle it here - but that fails if it is .section.
a82639d2 1129 # Just let the next iteration of the loop handle it.
495ae4b0
PH
1130 # &handle_directive();
1131 }
1132
1133 else
1134 {
1135 $_ = ".em\n" . $_;
1136 &handle_paragraph();
1137 $lastwasrule = 0;
1138 $lastwasitem = 0;
1139 }
1140 }
1141
1142 # Not .em
1143
1144 else
1145 {
1146 &handle_directive();
1147 }
1148 }
1149
1150 # Not a directive
1151
1152 else
1153 {
1154 &handle_paragraph();
1155 $lastwasrule = 0;
1156 $lastwasitem = 0;
1157 }
1158
1159 } # Loop for each line in a chapter
1160 } # Loop for each chapter
1161
1162# Close the last file, end off the TOC, and we are done.
1163
1164&closeout("");
1165
1166print TOC "</ul>\n" if $insection;
1167
1168if (defined %cindex)
1169 {
1170 $cindex_tocn = ++$tocn;
1171 print TOC "<li><a name=\"TOC$tocn\" ".
1172 "href=\"${file_base}_cindex.html\">Concept Index</a></li>\n";
1173 }
1174
1175if (defined %oindex)
1176 {
1177 $oindex_tocn = ++$tocn;
1178 print TOC "<li><a name=\"TOC$tocn\" ".
1179 "href=\"${file_base}_oindex.html\">Option Index</a></li>\n";
1180 }
1181
1182print TOC "</ul>\n</body>\n</html>\n";
1183close(TOC);
1184close(IN);
1185}
1186
1187
1188
1189
1190##################################################
1191# Adjust index points #
1192##################################################
1193
1194# Because of the way the source is written, there are often index entries
1195# that immediately follow the start of chapters and sections and the definition
1196# of "items" like "helo = verify". This gets the correct page numbers for the
1197# PostScript and PDF formats. However, for HTML we want the index anchor to be
1198# before the section heading, because browsers tend to put the index point at
1199# the top of the screen. So we re-read all the files we've just created, and
1200# move some of the index points about. This is necessary only if indexes exist.
1201# The files are small enough to be handled entirely in memory.
1202
1203sub adjust_index_points {
1204print "Adjusting index points to precede headings\n";
1205
1206$" = "";
1207
1208opendir(DIR, "$html") || die "Failed to opendir $html\n";
1209while ($file = readdir(DIR))
1210 {
1211 my($i);
1212 next unless $file =~ /^${file_base}_\d+\.html$/;
1213
1214 open(IN, "<$html/$file") ||
1215 die "Failed to open $html/$file (read)\n";
1216 my(@lines) = <IN>;
1217 close(IN);
1218
1219 for ($i = 0; $i < @lines; $i++)
1220 {
1221 if ($lines[$i] =~ /^<a name="IX\d+"><\/a>$/)
1222 {
1223 # Handle an index line that follows a heading definition. Move it back
1224 # to just before the <h1> or whatever. This preserves the order of
1225 # multiple index lines, not that that matters.
1226
1227 if ($lines[$i-1] =~ /^<\/a><\/h(\d)>/)
1228 {
1229 my($j);
1230 my($found) = 0;
1231 for ($j = $i-2; $j > 0 && $j > $i - 10; $j--)
1232 {
1233 if ($lines[$j] =~ /<h$1>/)
1234 {
1235 $found = 1;
1236 last;
1237 }
1238 }
1239 if ($found)
1240 {
1241 splice(@lines, $j, 0, splice(@lines, $i, 1));
1242 }
1243 }
1244
1245 # Handle an index line that follows an "item". Move it back one line.
1246
1247 elsif ($lines[$i-1] =~ /^<b>.*<\/b>\s*$/)
1248 {
1249 splice(@lines, $i-1, 0, splice(@lines, $i, 1));
1250 }
1251
1252 # Handle an index line that follows a "conf" definition
1253
1254 elsif ($lines[$i-1] =~ /^<i>Type:<\/i>/ && $lines[$i-2] =~ /^<h3>/)
1255 {
1256 splice(@lines, $i-2, 0, splice(@lines, $i, 1));
1257 }
1258
1259 # Handle an index line that follows an "option" definition
1260
1261 elsif ($lines[$i-1] =~ /^<h3>/)
1262 {
1263 splice(@lines, $i-1, 0, splice(@lines, $i, 1));
1264 }
1265 }
1266 }
1267
1268 open(OUT, ">$html/$file") ||
1269 die "Failed to open $html/$file (write)\n";
1270
1271 print OUT "@lines";
1272 close OUT;
1273 undef @lines;
1274 }
1275}
1276
1277
1278
1279
1280##################################################
1281# Create Index #
1282##################################################
1283
1284sub create_index{
1285my($hash) = $_[0];
1286my($ifname) = $_[1];
1287my($ititle) = $_[2];
1288my(%indexindex);
1289
1290open(INDEX, ">$html/${file_base}_$_[1].html") ||
1291 die "Failed to open $html/${file_base}_$ifname\n";
1292
1293print INDEX "<!DOCTYPE HTML PUBLIC \"-//IETF//DTD HTML//EN\">\n";
1294print INDEX "<html>\n<head>\n<title>$doctitle $ititle</title>\n";
1295print INDEX "<base target=\"body\">\n</head>\n";
1296
1297print INDEX "<body bgcolor=\"#FFFFDF\" text=\"#00005A\" " .
1298 "link=\"#FF6600\" alink=\"#FF9933\" vlink=\"#990000\">\n";
1299
1300print INDEX "<h3>$ititle</h3>\n";
1301
1302# We have to scan the keys in the hash twice; first to build the list
1303# of initial letters, and then to do the business. The first time we
1304# do not need to sort them.
1305
1306foreach $key (keys %$hash)
1307 {
1308 my($initial) = substr($key,0,1);
1309 $initial = "\U$initial";
f055f31e 1310 $indexindex{$initial} = 1 if $initial ge "A" && $initial le "Z";
495ae4b0
PH
1311 }
1312
1313print INDEX "<p>\n";
1314foreach $key (sort keys %indexindex)
1315 {
1316 print INDEX "&nbsp;<a href=\"#$key\" target=\"index\">$key</a>\n";
1317 }
1318print INDEX "<hr></p>\n";
1319
1320my($letter) = "";
1321print INDEX "<p>\n";
1322
1323foreach $key (sort
f055f31e 1324 {
a82639d2
PH
1325 my($aa) = $a;
1326 my($bb) = $b;
1327
f055f31e
PH
1328 $aa =~ s/^\x93//; # Seems like the actual char values are
1329 $bb =~ s/^\x93//; # set by this time, not "&#147;"
a82639d2
PH
1330
1331 return ("\L$aa" eq "\L$bb")? ("$aa" cmp "$bb") : ("\L$aa" cmp "\L$bb");
f055f31e 1332 }
495ae4b0
PH
1333 keys %$hash)
1334 {
1335 my($initial) = substr($key,0,1);
1336 $initial = "\U$initial";
f055f31e 1337 if ($initial ne $letter && $initial ge "A" && $initial le "Z")
495ae4b0 1338 {
f055f31e
PH
1339 print INDEX "<br>\n";
1340 print INDEX "<a name=\"$initial\"></a>\n";
1341 print INDEX "<font size=\"+1\">\U$initial\E</font><br>\n";
495ae4b0
PH
1342 $letter = $initial;
1343 }
1344 print INDEX "$$hash{$key}<br>\n";
1345 }
1346
1347print INDEX "</p>\n";
1348
1349print INDEX "</body>\n</html>\n";
1350close(INDEX);
1351}
1352
1353
1354
1355
1356##################################################
1357# Show usage and die #
1358##################################################
1359
1360sub usage {
1361die "Usage: g2h [-split no|section|chapter] <source> <title>\n";
1362}
1363
1364
1365
1366##################################################
1367# Entry point and main program #
1368##################################################
1369
1370
1371# Directory in which to put the new HTML files
1372
1373$html = "html";
1374
1375# Global variables.
1376
1377%cindex = ();
1378%oindex = ();
1379
1380$chapsplit = 0;
1381$cindex_tocn = 0;
f055f31e 1382$confuse = "";
495ae4b0
PH
1383$file_base = "";
1384$index_count = 0;
1385$inem = 0;
1386$inpar = 0;
1387$lastwasitem = 0;
1388$lastwasrule = 0;
1389$oindex_tocn = 0;
1390$sectsplit = 0;
1391$source_file = "";
1392$thischapter = 0;
1393$thissection = 0;
1394
1395
1396# Handle options
1397
1398my($splitset) = 0;
1399
1400while (scalar @ARGV > 0 && $ARGV[0] =~ /^-/)
1401 {
1402 if ($ARGV[0] eq "-split" && !$splitset)
1403 {
1404 $splitset = 1;
1405 shift @ARGV;
1406 my($type) = shift @ARGV;
1407 if ($type eq "section") { $sectsplit = 1; }
1408 elsif ($type eq "chapter") { $chapsplit = 1; }
1409 elsif ($type eq "no" ) { $sectsplit = $chapsplit = 0; }
1410 else { &usage(); }
1411 }
1412 else { &usage(); }
1413 }
1414
1415# Get the source file and its base
1416
1417&usage() if scalar @ARGV <= 0;
1418$source_file = shift @ARGV;
1419($file_base) = $source_file =~ /^(.*)\.src$/;
1420
1421&usage() if scalar @ARGV <= 0;
1422$doctitle = shift @ARGV;
1423
1424print "\nCreate HTML for $doctitle from $source_file\n";
1425
1426# Remove the old HTML files
1427
1428print "Removing old HTML files\n";
1429system("/bin/rm -rf $html/${file_base}_*.html");
1430
1431# First pass identifies all the chapters and sections, and collects the
1432# values of the cross-referencing variables.
1433
1434print "Scanning for cross-references\n";
1435&pass_one();
1436
1437$maxchapter = $thischapter; # Used if chapter splitting
1438$maxsection = $thissection; # Used if section splitting
1439
1440# Second pass actually creates the HTML files.
1441
1442print "Creating the HTML files\n";
1443&pass_two();
1444
1445# Reprocess for moving some of the index points, if indexes were created
1446
1447&adjust_index_points() if scalar(keys %cindex) > 0 || scalar(keys %oindex) > 0;
1448
1449# Finally, we must create the option and concept indexes if any data
1450# has been collected for them.
1451
1452if (scalar(keys %cindex) > 0)
1453 {
1454 print "Creating concept index\n";
1455 &create_index(\%cindex, "cindex", "Concepts");
1456 }
1457
1458if (scalar(keys %oindex) > 0)
1459 {
1460 print "Creating option index\n";
1461 &create_index(\%oindex, "oindex", "Options");
1462 }
1463
1464# End of g2h