TK/09
[exim.git] / doc / doc-scripts / g2h
CommitLineData
495ae4b0 1#! /usr/bin/perl -w
a82639d2 2# $Cambridge: exim/doc/doc-scripts/g2h,v 1.3 2005/02/17 12:17:09 ph10 Exp $
495ae4b0
PH
3
4# This is a script that turns the SGCAL source of Exim's documentation into
5# HTML. It can be used for both the filter document and the main Exim
6# specification. The syntax is
7#
8# g2h [-split no|section|chapter] <source file> <title>
9#
10# Previously, -split section was used for the filter document, and -split
11# chapter for the main specification. However, the filter document has gained
12# some chapters, so they are both split by chapter now. Only one -split can be
13# specified.
14#
15# A number of assumptions about the style of the input markup are made.
16#
17# The HTML is written into the directory html/ using the source file base
18# name as its base.
19
20# Written by Philip Hazel
21# Starting 21-Dec-2001
22# Last modified 26-Nov-2003
23
24#############################################################################
25
26
27
28##################################################
29# Open an output file #
30##################################################
31
32sub openout {
33open (OUT, ">$_[0]") || die "Can't open $_[0]\n";
34
35# Boilerplate
36
37print OUT "<!DOCTYPE HTML PUBLIC \"-//IETF//DTD HTML//EN\">\n";
38
39print OUT "<html>\n<head>\n<title>$doctitle" .
40 (($thischapter > 0)? " chapter $thischapter" : "") .
41 (($thissection > 0)? " section $thissection" : "") .
42 "</title>\n</head>\n" .
43 "<body bgcolor=\"#F8F8F8\" text=\"#00005A\" " .
44 "link=\"#FF6600\" alink=\"#FF9933\" vlink=\"#990000\">\n";
45
46# Forward/backward links when chapter splitting
47
48if ($chapsplit)
49 {
50 print OUT "<font size=2>\n";
51 printf OUT ("<a href=\"${file_base}_%s.html\">Previous</a>&nbsp;&nbsp;\n",
52 $thischapter - 1) if $thischapter > 1;
53 printf OUT ("<a href=\"${file_base}_%s.html\">Next</a>&nbsp;&nbsp;\n",
54 $thischapter + 1) if $thischapter < $maxchapter;
55 print OUT "<a href=\"${file_base}_toc.html\">Contents</a>\n";
56 print OUT "&nbsp;" x 6, "($doctitle)\n</font><hr>\n";
57 }
58
59# Forward/backward links when section splitting
60
61elsif ($sectsplit)
62 {
63 print OUT "<font size=2>\n";
64 printf OUT ("<a href=\"${file_base}_%s.html\">Previous</a>&nbsp;&nbsp;\n",
65 $thissection - 1) if $thissection > 1;
66 printf OUT ("<a href=\"${file_base}_%s.html\">Next</a>&nbsp;&nbsp;\n",
67 $thissection + 1) if $thissection < $maxsection;
68 print OUT "<a href=\"${file_base}_toc.html\">Contents</a>\n";
69 print OUT "&nbsp;" x 6, "($doctitle)\n</font><hr>\n";
70 }
71
72# Save the final component of the current file name (for TOC creation)
73
74$_[0] =~ /^(?:.*)\/([^\/]+)$/;
75$current_file = $1;
76}
77
78
79
80##################################################
81# Close an output file #
82##################################################
83
84# The first argument is one of:
85#
86# "CHAP" a chapter is ending
87# "SECT" a section is ending
88# "" the whole thing is ending
89#
90# In the first two cases $thischapter and $thissection contain the new chapter
91# and section numbers, respectively. In the third case, we can deduce what is
92# ending from the flags. The variables contain the current values.
93
94sub closeout {
95my($s) = $_[0];
96
97print OUT "<hr>\n" if !$lastwasrule;
98&setpar(0);
99
100if ($s eq "CHAP")
101 {
102 print OUT "<font size=2>\n";
103 printf OUT ("<a href=\"${file_base}_%s.html\">Previous</a>&nbsp;&nbsp;",
104 $thischapter - 2) if ($thischapter > 2);
105 print OUT "<a href=\"${file_base}_$thischapter.html\">Next</a>&nbsp;&nbsp;";
106 print OUT "<a href=\"${file_base}_toc.html\">Contents</a>\n";
107 print OUT "&nbsp;" x 6, "($doctitle)\n</font>\n";
108 }
109
110elsif ($s eq "SECT")
111 {
112 print OUT "<font size=2>\n";
113 printf OUT ("<a href=\"${file_base}_%s.html\">Previous</a>&nbsp;&nbsp;",
114 $thissection - 2) if ($thissection > 2);
115 print OUT "<a href=\"${file_base}_$thissection.html\">Next</a>&nbsp;&nbsp;";
116 print OUT "<a href=\"${file_base}_toc.html\">Contents</a>\n";
117 print OUT "&nbsp;" x 6, "($doctitle)\n</font>\n";
118 }
119
120else
121 {
122 if ($chapsplit)
123 {
124 print OUT "<font size=2>\n";
125 printf OUT ("<a href=\"${file_base}_%s.html\">Previous</a>&nbsp;&nbsp;",
126 $thischapter - 1) if ($thischapter > 1);
127 print OUT "<a href=\"${file_base}_toc.html\">Contents</a>\n";
128 print OUT "&nbsp;" x 6, "($doctitle)\n</font>\n";
129 }
130 elsif ($sectsplit)
131 {
132 print OUT "<font size=2>\n";
133 printf OUT ("<a href=\"${file_base}_%s.html\">Previous</a>&nbsp;&nbsp;",
134 $thissection - 1) if ($thissection > 1);
135 print OUT "<a href=\"${file_base}_toc.html\">Contents</a>\n";
136 print OUT "&nbsp;" x 6, "($doctitle)\n</font>\n";
137 }
138 }
139
140print OUT "</body>\n</html>\n";
141close(OUT);
142}
143
144
145
146##################################################
147# Handle an index line #
148##################################################
149
150# This function returns an empty string so that it can be called as part
151# of an s operator when handling index items within paragraphs. The two
152# arguments are:
153#
154# the text to index, already converted to HTML
155# 1 for the concept index, 0 for the options index
156
157sub handle_index {
158my($text) = $_[0];
159my($hash) = $_[1]? \%cindex : \%oindex;
160my ($key,$ref);
161
162# Up the index count, and compute the reference to the file and the
163# label within it.
164
165$index_count++;
166$ref = $chapsplit?
167 "${file_base}_$thischapter.html#IX$index_count"
168 : $sectsplit?
169 "${file_base}_$thissection.html#IX$index_count"
170 :
171 "#IX$index_count";
172
173# Create the index key, which consists of the text with all the HTML
174# coding and any leading quotation marks removed. Turn the primary/secondary
175# splitting string "||" into ":".
176
177$text =~ s/\|\|/:/g;
178
179$key = "$text";
180$key =~ s/<[^>]+>//g;
181$key =~ s/&#(\d+);/chr($1)/eg;
182$key =~ s/^`+//;
f055f31e 183$key =~ s/^"//;
495ae4b0
PH
184
185# Turn all spaces in the text into &nbsp; so that they don't ever split.
186# However, there may be spaces in the HTML that already exists in the
187# text, so we have to avoid changing spaces inside <>.
188
189$text =~ s/ (?=[^<>]*(?:<|$))/&nbsp;/g;
190
191# If this is the first encounter with this index key, we create a
192# straightforward reference.
193
194if (!defined $$hash{$key})
195 {
196 $$hash{$key} = "<a href=\"$ref\">$text</a>";
197 }
198
199# For the second and subsequent encounters, add "[2]" etc. to the
200# index text. We find out the number by counting occurrences of "<a"
201# in the existing string.
202
203else
204 {
205 my($number) = 1;
206 $number++ while $$hash{$key} =~ /<a/g;
207 $$hash{$key} .= " &nbsp;<a href=\"$ref\">[$number]</a>";
208 }
209
210# Place the name in the current output
211
212print OUT "<a name=\"IX$index_count\"></a>\n";
213return "";
214}
215
216
217
218##################################################
219# Handle emphasis bars #
220##################################################
221
222# Set colour green for text marked with "emphasis bars", keeping
223# track in case the matching isn't perfect.
224
225sub setinem {
226if ($_[0])
227 {
228 return "" if $inem;
229 $inem = 1;
230 return "<font color=green>\n";
231 }
232else
233 {
234 return "" if !$inem;
235 $inem = 0;
236 return "</font>\n";
237 }
238}
239
240
241
242##################################################
243# Convert marked-up text #
244##################################################
245
246# This function converts text from SGCAL markup to HTML markup, with a couple
247# of exceptions:
248#
249# 1. We don't touch $t because that is handled by the .display code.
250#
251# 2. The text may contain embedded .index, .em, and .nem directives. We
252# handle .em and .nem, but leave .index because it must be done during
253# paragraph outputting.
254#
255# In a non-"rm" display, we turn $rm{ into cancelling of <tt>. Otherwise
256# it is ignored - in practice it is only used in that special case.
257#
258# The order in which things are done in this function is highly sensitive!
259
260sub handle_text {
261my($s) = $_[0];
262my($rmspecial) = $_[1];
263
264# Escape all & characters (they aren't involved in markup) but for the moment
265# use &+ instead of &# so that we can handle # characters in the text.
266
267$s =~ s/&/&+038;/g;
268
269# Turn SGCAL literals into HTML literals that don't look like SGCAL
270# markup, so won't be touched by what follows. Again, use + instead of #.
271
272$s =~ s/@@/&+064;/g;
273$s =~ s/@([^@])/"&+".sprintf("%0.3d",ord($1)).";"/eg;
274
275# Now turn any #s that are markup into spaces, and convert the previously
276# created literals to the correct form.
277
278$s =~ s/#/&nbsp;/g;
279$s =~ s/&\+(\d+);/&#$1;/g;
280
281# Some simple markup that doesn't involve argument text.
282
283$s =~ s/\$~//g; # turn $~ into nothing
284$s =~ s/__/_/g; # turn __ into _
285$s =~ s/--(?=$|\s|\d)/&#150;/mg; # turn -- into endash in text or number range
286$s =~ s/\(c\)/&copy;/g; # turn (c) into copyright symbol
287
288# Use double quotes
289
290# $s =~ s/`([^']+)'/``$1''/g;
291
292$s =~ s/`([^']+)'/&#147;$1&#148;/g;
293
294# This is a fudge for some specific usages of $<; can't just do a global
295# is it occurs in things like "$<variable name>" as well.
296
297$s =~ s/(\d)\$<-/$1-/g; # turn 0$<- into 0-
298$s =~ s/\$<//g; # other $< is ignored
299
300# Turn <<...>> into equivalent SGCAL markup that doesn't involve the use of
301# < and >, and then escape the remaining < and > characters in the text.
302
303$s =~ s/<<([^>]*?)>>/<\$it{$1}>/g; # turn <<xxx>> into <$it{xxx}>
304$s =~ s/</&#060;/g;
305$s =~ s/>/&#062;/g;
306
307# Other markup...
308
309$s =~ s/\$sm\{//g; # turn $sm{ into nothing
310$s =~ s/\$smc\{//g; # turn $smc{ into nothing
311$s =~ s/\$smi\{//g; # turn $smi{ into nothing
312
313$s =~ s/\$tt\{([^\}]*?)\}/<tt>$1<\/tt>/g; # turn $tt{xxx} into <tt>xxx</tt>
314$s =~ s/\$it\{([^\}]*?)\}/<em>$1<\/em>/g; # turn $it{xxx} into <em>xxx</em>
315$s =~ s/\$bf\{([^\}]*?)\}/<b>$1<\/b>/g; # turn $bf{xxx} into <b>xxx</b>
316
317$s =~ s/\$cb\{([^\}]*?)\}/<tt><b>$1<\/b><\/tt>/g; # turn $cb{xxx} into
318 # <tt><b>xxx</b></tt>
319
320$s =~ s/\\\\([^\\]*?)\\\\/<font size=-1>$1<\/font>/g; # turn \\xxx\\ into
321 # small font
322$s =~ s/\\\?([^?]*?)\?\\/<a href="$1">$1<\/a>/g; # turn \?URL?\ into URL
323
324$s =~ s/\\\(([^)]*?)\)\\/<i>$1<\/i>/g; # turn \(xxx)\ into <i>xxx</i>
325$s =~ s/\\\"([^\"]*?)\"\\/<tt>$1<\/tt>/g; # turn \"xxx"\ into <tt>xxx</tt>
326
327
328$s =~ s/\\\$([^\$]*?)\$\\/<tt>\$$1<\/tt>/g; # turn \$xxx$\ into <tt>$xxx</tt>
329$s =~ s/\\\-([^\\]*?)\-\\/<i>-$1<\/i>/g; # turn \-xxx-\ into -italic
330$s =~ s/\\\*\*([^*]*?)\*\*\\/<b>$1<\/b>/g; # turn \**xxx**\ into <b>xxx</b>
331$s =~ s/\\\*([^*]*?)\*\\/<i>$1<\/i>/g; # turn \*xxx*\ into italic
332$s =~ s/\\%([^*]*?)%\\/<b>$1<\/b>/g; # turn \%xxx%\ into bold
333$s =~ s/\\([^\\]*?)\\/<tt>$1<\/tt>/g; # turn \xxx\ into <tt>xxx</tt>
334$s =~ s/::([^\$]*?)::/<i>$1:<\/i>/g; # turn ::xxx:: into italic:
335$s =~ s/\$\*\$/\*/g; # turn $*$ into *
336
337# Handle $rm{...}
338
339if ($rmspecial)
340 {
341 $s =~ s/\$rm\{([^\}]*?)\}/<\/tt>$1<tt>/g; # turn $rm{xxx} into </tt>xxx<tt>
342 }
343else
344 {
345 $s =~ s/\$rm\{([^\}]*?)\}/$1/g; # turn $rm{xxx} into xxx
346 }
347
348# There is one case where the terminating } of an escape sequence is
349# in another paragraph - this follows $sm{ - it can be fixed by
350# removing any stray } in a paragraph that contains no { chars.
351
352$s =~ s/\}//g if !/\{/;
353
354# Remove any null flags ($$)
355
356$s =~ s/\$\$//g;
357
358# If the paragraph starts with $c\b, remove it.
359
360$s =~ s/^\$c\b//;
361
362# If the paragraph starts with $e\b, indent it slightly.
363
364$s =~ s/^\$e\b/&nbsp;&nbsp;/;
365
366# Handle .em, and .nem directives that occur within the paragraph
367
368$s =~ s/\.em\s*\n/&setinem(1)/eg;
369$s =~ s/\.nem\s*\n/&setinem(0)/eg;
370
371# Explicitly included HTML
372
373$s =~ s/\[\(([^)]+)\)\]/<$1>/g; # turn [(...)] into <...>
374
375# Finally, do the substitutions and return the modified text.
376
377$s =~ s/~~(\w+)/$var_value{$1}/eg;
378
379return $s;
380}
381
382
383
384##################################################
385# Start/end a paragraph #
386##################################################
387
388# We want to leave paragraphs unterminated until we know that a horizontal
389# rule does not follow, to avoid getting space inserted before the rule,
390# which doesn't look good. So we have this function to help control things.
391# If the argument is 1 we are starting a new paragraph; if it is 0 we want
392# to force the ending of any incomplete paragraph.
393
394sub setpar {
395if ($inpar)
396 {
397 print OUT "</p>\n";
398 $inpar = 0;
399 }
400if ($_[0])
401 {
402 print OUT "<p>\n";
403 $inpar = 1;
404 }
405}
406
407
408
409##################################################
410# Handle a "paragraph" #
411##################################################
412
413# Read a paragraph of text, which may contain many lines and may contain
414# .index, .em, and .nem directives within it. We may also encounter
415# ".if ~~html" within paragraphs. Process those directives,
416# convert the markup, and output the rest as an HTML paragraph.
417
418
419sub handle_paragraph{
420my($par) = $_;
421my($htmlcond) = 0;
422while(<IN>)
423 {
424 if (/^\.if\s+~~html\b/)
425 {
426 $htmlcond = 1;
427 $par =~ s/\s+$//; # lose unwanted whitespace and newlines
428 next;
429 }
430 elsif ($htmlcond && /^\.else\b/)
431 {
432 while (<IN>) { last if /^\.fi\b/; }
433 $htmlcond = 0;
434 next;
435 }
436 elsif ($htmlcond && /^\.fi\b/)
437 {
438 $htmlcond = 0;
439 next;
440 }
441
442 last if /^\s*$/ || (/^\./ && !/^\.index\b/ && !/^\.em\b/ && !/^\.nem\b/);
443 $par .= $_;
444 }
445$par = &handle_text($par, 0);
446
447# We can't handle .index until this point, when we do it just before
448# outputting the paragraph.
449
450if ($par !~ /^\s*$/)
451 {
452 &setpar(1);
453 $par =~ s/\.index\s+([^\n]+)\n/&handle_index($1, 1)/eg;
454 print OUT "$par";
455 }
456}
457
458
459
460##################################################
461# Handle a non-paragraph directive #
462##################################################
463
464# The directives .index, .em, and .nem can also appear within paragraphs,
465# and are then handled within the handle_paragraph() code.
466
467sub handle_directive{
468my($new_lastwasitem) = 0;
469
470$lastwasrule = 0;
471
472if (/^\.r?set\b/ || /^\.(?:\s|$)/) {} # ignore .(r)set and comments
473
474elsif (/^\.justify\b/) {} # and .justify
475
476elsif (/^\.newline\b/) { print OUT "<br>\n"; }
477
478elsif (/^\.blank\b/ || /^\.space\b/) { print OUT "<br>\n"; }
479
480elsif (/^\.rule\b/) { &setpar(0); print OUT "<hr>\n"; $lastwasrule = 1; }
481
482elsif (/^\.index\s+(.*)/) { &handle_index(&handle_text($1), 1); }
483
484# Emphasis is handled by colour
485
486elsif (/^\.em\b/)
487 {
488 &setpar(0);
489 print OUT "<font color=green>" if ! $inem;
490 $inem = 1;
491 }
492
493elsif (/^\.nem\b/)
494 {
495 &setpar(0);
496 print OUT "</font>" if $inem;
497 $inem = 0;
498 }
499
500# Ignore tab setting stuff - we use tables instead.
501
502elsif (/^\.tabs(?:et)?\b/) {}
503
504# .tempindent is used only to align some of the expansion stuff nicely;
505# just ignore it. It is used in conjunction with .push/.pop.
506
507elsif (/^\.(tempindent|push|pop)\b/) {}
508
509# There are some instances of .if ~~sys.fancy in the source. Some of those
510# that are not inside displays are two-part things, in which case we just keep
511# the non-fancy part. For diagrams, however, they are in three parts:
512#
513# .if ~~sys.fancy
514# <aspic drawing stuff for PostScript and PDF>
515# .elif !~~html
516# <ascii art for txt and Texinfo>
517# .else
518# <HTML instructions for including a gif>
519# .fi
520#
521# In this case, we skip to the third part.
522
523elsif (/^\.if\s+~~sys\.fancy/ || /^\.else\b/)
524 {
525 while (<IN>)
526 { last if /^\.else\b/ || /^\.elif\s+!\s*~~html/ || /^\.fi\b/; }
527
528 if (/^\.elif\b/)
529 {
530 while (<IN>) { last if /^\.else\b/ || /^\.fi\b/; }
531 }
532 }
533
534# Similarly, for .if !~~sys.fancy, take the non-fancy part.
535
536elsif (/^\.if\s+!\s*~~sys.fancy/) {}
537
538# There are some explicit tests for ~~html for direct HTML inclusions
539
540elsif (/^\.if\s+~~html\b/) {}
541
542# There are occasional requirements to do things differently for Texinfo/HTML
543# and PS/txt versions. The latter are produced by SGCAL, so that's what the
544# flag is called.
545
546elsif (/\.if\s+~~sgcal/)
547 {
548 while (<IN>) { last if /\.else\b/ || /\.fi\b/; }
549 }
550
551# Also there is a texinfo flag
552
553elsif (/^\.if\s+~~texinfo\b/)
554 {
555 while (<IN>)
556 { last if /^\.else\b/ || /^\.elif\s+!\s*~~html/ || /^\.fi\b/; }
557 }
558
559# Ignore any other .if, .else, or .fi directives
560
561elsif (/^\.if\b/ || /^\.fi\b/ || /^\.else\b/) {}
562
563# Ignore .indent
564
565elsif (/^\.indent\b/) {}
566
567# Various flavours of numberpars map to corresponding list types.
568
569elsif (/^\.numberpars\b/)
570 {
571 $rest = $';
572 &setpar(0);
573
574 if ($rest =~ /(?:\$\.|\" \")/)
575 {
576 unshift @endlist, "ul";
577 unshift @listtype, "";
578 print OUT "<ul>\n<li>";
579 }
580 else
581 {
582 $nptype = ($rest =~ /roman/)? "a" : "1";
583 unshift @endlist, "ol";
584 unshift @listtype, " TYPE=\"$nptype\"";
585 print OUT "<ol>\n<li$listtype[0]>";
586 }
587 }
588
589elsif (/^\.nextp\b/)
590 {
591 &setpar(0);
592 print OUT "</li>\n<li$listtype[0]>";
593 }
594
595elsif (/^\.endp\b/)
596 {
597 &setpar(0);
598 print OUT "</li>\n</$endlist[0]>\n";
599 shift @listtype;
600 shift @endlist;
601 }
602
603# .display asis can use <pre> which uses a typewriter font.
604# Otherwise, we have to do our own line breaking. Turn tabbed lines
605# into an HTML table. There will always be a .tabs line first.
606
607elsif (/^\.display\b/)
608 {
609 my($intable) = 0;
610 my($asis) = /asis/;
611 my($rm) = /rm/;
612 my($eol,$indent);
613
614 # For non asis displays, start a paragraph, and set up to put an
615 # explicit break after every line.
616
617 if (!$asis)
618 {
619 &setpar(1);
620 $eol = "<br>";
621 $indent = "<tt>&nbsp;&nbsp;</tt>";
622 }
623
624 # For asis displays, use <pre> and no explicit breaks
625
626 else
627 {
628 print OUT "<pre>\n";
629 $eol = "";
630 $indent = "&nbsp;&nbsp;";
631 }
632
633 # Now read through until we hit .endd (or EOF, but that shouldn't happen)
634 # and process the lines in the display.
635
636 while (<IN>)
637 {
638 last if /^\.endd\b/;
639
640 # The presence of .tabs[et] starts a table
641
642 if (/^\.tabs/)
643 {
644 $intable = 1;
645 print OUT "<table cellspacing=0 cellpadding=0>\n";
646 }
647
648 # Some displays have an indent setting - ignore
649
650 elsif (/^\.indent\b/) {}
651
652 # Some displays have .blank inside them
653
654 elsif (/^\.blank\b/)
655 {
656 print OUT "<br>\n";
657 }
a82639d2 658
495ae4b0 659 # Some displays have emphasis inside them
a82639d2 660
495ae4b0
PH
661 elsif (/^\.em\b/)
662 {
663 print OUT "<font color=green>" if ! $inem;
664 $inem = 1;
a82639d2 665 }
495ae4b0
PH
666
667 elsif (/^\.nem\b/)
668 {
669 print OUT "</font>" if $inem;
670 $inem = 0;
a82639d2 671 }
495ae4b0
PH
672
673 # There are occasional instances of .if [!]~~sys.fancy inside displays.
674 # In both cases we want the non-fancy alternative. (The only thing that
675 # matters in practice is noticing .tabs[et] actually.) Assume the syntax
676 # is valid.
677
678 elsif (/^\.if\s+~~sys.fancy/ || /^\.else\b/)
679 {
680 while (<IN>)
681 {
682 last if /^\.fi\b/ || /^\.else/;
683 }
684 }
685
686 elsif (/^\.if\s+!\s*~~sys.fancy/) {}
687
688 elsif (/^\.fi\b/) {}
689
690 # Ignore .newline and .linelength
691
692 elsif (/^\.newline\b/ || /^\.linelength\b/) {}
a82639d2 693
495ae4b0 694 # Ignore comments
a82639d2
PH
695
696 elsif (/^\.(\s|$)/) {}
495ae4b0
PH
697
698 # There shouldn't be any other directives inside displays
699
700 elsif (/^\./)
701 {
702 print "*** Ignored directive inside .display: $_";
703 }
704
705 # Handle a data line within a display. If it's an asis display, the only
706 # conversion is to escape the HTML characters. Otherwise, process the
707 # SGCAL markup.
708
709 else
710 {
711 chomp;
712 if ($asis)
713 {
714 s/&/&#038;/g;
715 s/</&#060;/g;
716 s/>/&#062;/g;
717 }
718 else
719 {
720 $_ = &handle_text($_, !$rm);
721 $_ = "<tt>$_</tt>" if !$rm && $_ ne "";
722 }
723
724 # In a table, break fields at $t. For non-rm we must break the
725 # <tt> group as well.
726
727 if ($intable)
728 {
729 if ($rm)
730 {
731 s/\s*\$t\s*/&nbsp;&nbsp;<\/td><td>/g;
732 }
733 else
734 {
735 s/\s*\$t\s*/&nbsp;&nbsp;<\/tt><\/td><td><tt>/g;
736 }
737 s/<tt><\/tt>//g;
738 print OUT "<tr><td>&nbsp;&nbsp;$_</td></tr>\n";
739 }
740
741 # Otherwise, output straight, with <br> for non asis displays
742
743 else
744 {
745 s/<tt><\/tt>//g;
746 print OUT "$indent$_$eol\n";
747 }
748 }
749 } # Loop for display contents
750
751 # Finish off the table and the <pre> - leave a paragraph open
752
753 print OUT "</table>\n" if $intable;
754 print OUT "</pre>\n" if $asis;
755 }
756
757# Handle configuration option definitions
758
a82639d2 759elsif (/^\.startconf\s+(.*)/)
f055f31e 760 {
a82639d2 761 $confuse = &handle_text($1);
f055f31e 762 }
495ae4b0
PH
763
764elsif (/^\.conf\b/)
765 {
766 my($option, $type, $default) =
767 /^\.conf\s+(\S+)\s+("(?:[^"]|"")+"|\S+)\s+("(?:[^"]|"")+"|.*)/;
768
769 $option =~ s/\@_/_/g; # Underscore will be quoted in option name
770
771 # If $type ends with $**$, add ",expanded" as there doesn't seem to be
772 # a dagger character generally available.
773
774 $type =~ s/^"([^"]+)"/$1/;
775 $type =~ s/\$\*\*\$/, expanded/;
776
777 # Default may be quoted, and it may also have quotes that are required,
778 # if it is a string.
779
780 $default =~ s/^"(.*)"$/$1/;
781 $default =~ s/""/"/g;
782 $default = &handle_text($default, 0);
783
784 print OUT "<hr>";
785 &setpar(0);
786 &handle_index($option, 0);
787 print OUT "<h3>$option</h3>\n" .
a82639d2 788 "<i>Use:</i>&nbsp; $confuse<br>" .
495ae4b0
PH
789 "<i>Type:</i>&nbsp; $type<br><i>Default:</i>&nbsp; $default<br>\n";
790 }
791
792elsif (/^\.endconf\b/)
793 {
794 print OUT "<hr><br>\n";
795 }
796
797
798# Handle "items" - used for expansion items and the like. We force the
799# item text into bold, and put a rule between items.
800
801elsif (/^\.startitems\b/) {}
802
803elsif (/^\.item\s+(.*)/)
804 {
805 my($arg) = $1;
806 chomp($arg);
807 $arg =~ s/^"(.*)"$/$1/;
808 $arg = &handle_text($arg, 0);
809
810 # If there are two .items in a row, we don't want to put in the
811 # separator line or start a new paragraph.
812
813 if ($lastwasitem)
814 {
815 print OUT "<br>";
816 }
817 else
818 {
819 print OUT "<hr>";
820 &setpar(1);
821 }
822 print OUT "<b>$arg</b>\n";
823 $new_lastwasitem = 1;
824 }
825
826elsif (/^\.enditems\b/)
827 {
828 print OUT "<hr><br>\n";
829 }
830
831
832# Handle command line option items
833
834elsif (/^\.startoptions\b/) {}
835
836elsif (/^\.option\s+(.*)/)
837 {
838 my($arg) = $1;
f055f31e 839 $arg =~ s/"([^"]*)"/$1/g;
495ae4b0
PH
840
841 print OUT "<hr>";
842 &setpar(0);
843
844 # For indexing, we want to take up to the first # or < in the line,
845 # before processing.
846
847 my($name) = $arg =~ /^([^#<]+)/;
848 $name = &handle_text($name, 0);
849 &handle_index("-$name", 0);
850
851 # Output as heading, after the index
852
853 $arg = &handle_text($arg, 0);
854 print OUT "<h3>-$arg</h3>\n";
855 }
856
857elsif (/^\.endoptions\b/)
858 {
859 print OUT "<hr><br>\n";
860 }
861
862# Found an SGCAL directive that isn't dealt with. Oh dear.
863
864else
865 {
866 print "*** Unexpected SGCAL directive: line $. ignored:\n";
867 print "$_\n";
868 }
869
870# Remember if last was a .item, and read the next line
871
872$lastwasitem = $new_lastwasitem;
873$_ = <IN>;
874}
875
876
877
878##################################################
879# First Pass - collect references #
880##################################################
881
882sub pass_one{
883$thischapter = 0;
884
885open (IN, $source_file) || die "Can't open $source_file (first pass)\n";
886$_ = <IN>;
887
888# At the start of the specification text, there are some textual replacement
f055f31e
PH
889# definitions. They set values, but not cross-references. They may be preceded
890# by comments.
891
892$_ = <IN> while (/^\.(\s|$)/);
495ae4b0
PH
893
894while (/^\.r?set\s+(\S+)\s+"?([^"]+)\"?\s*$/)
895 {
896 $var_value{$1} = $2;
897 $_ = <IN>;
898 }
899
900# Now skip on till we hit the start of the first chapter. It will be numbered
901# 0 if we hit ".set chapter -1". There is only ever one unnumbered chapter.
902
903while (!/^\.chapter/)
904 {
905 $thischapter = -1 if /^\.set\s+chapter\s+-1/;
906 $_ = <IN>;
907 }
908
909# Loop for handling chapters
910
911while ($_)
912 {
913 $thischapter++;
914 $thissection = 0;
915
916 # Scan through chapter, setting up cross-references to the chapter
917 # and to the sections within it.
918
919 while (<IN>)
920 {
921 last if /^\.chapter/;
922 chomp;
923
924 if (/^\.section/)
925 {
926 $thissection++;
927 next;
928 }
929
930 # Handle .(r)set directives.
931
932 if (/^\.r?set\s+(\S+)\s+"?([^"]+)\"?\s*$/ && $1 ne "runningfoot")
933 {
934 my($key,$value) = ($1,$2);
935 $value =~ s/~~chapter/$thischapter/e;
936 $value =~ s/~~section/$thissection/e;
937
938 # Only one of $chapsplit or $sectsplit can be set.
939
940 if ($key =~ /^CHAP/)
941 {
942 $value = $chapsplit?
943 "<a href=\"${file_base}_$thischapter.html\">$value</a>"
944 :
945 "<a href=\"#CHAP$thischapter\">$value</a>";
946 }
947
948 elsif ($key =~ /^SECT/)
949 {
950 $value = $chapsplit?
951 "<a href=\"${file_base}_$thischapter.html" .
952 "#SECT$thischapter.$thissection\">$value</a>"
953 :
954 $sectsplit? "<a href=\"${file_base}_$thissection.html\">$value</a>"
955 :
956 "<a href=\"#SECT$thischapter.$thissection\">$value</a>";
957 }
958
959 $var_value{$key} = $value;
960 }
961 }
962 }
963
964close(IN);
965}
966
967
968
969
970
971##################################################
972# Second Pass - generate HTML #
973##################################################
974
975sub pass_two{
976my($tocn) = 0;
977my($inmacro) = 0;
978my($insection) = 0;
979
980$inem = 0;
981$thischapter = 0;
982$thissection = 0;
983
984# Open the source file and get the first line
985
986open (IN, $source_file) || die "Can't open $source_file (2nd pass)\n";
987$_ = <IN>;
988
989# Skip on till we hit the start of the first chapter, but note if we
a82639d2 990# pass ".set chapter -1", which is used to indicate no chapter numbering for
495ae4b0
PH
991# the first chapter (we number is 0). Keep track of whether we are in macro
992# definitions or not, and when not, notice occurrences of .index, because this
993# are the "x see y" type entries.
994
995while (!/^\.chapter/)
996 {
997 $thischapter = -1 if /^\.set\s+chapter\s+-1/;
998 $inmacro = 1 if /^\.macro/;
999 $inmacro = 0 if /^\.endm/;
1000 if (!$inmacro && /^\.index\s+(.*)/)
1001 {
1002 my($key);
1003 my($s) = $1;
1004 $s = &handle_text($s, 0);
1005 $s =~ s/ /&nbsp;/g; # All spaces unsplittable
1006 $key = "\L$s";
1007 $key =~ s/<[^>]+>//g;
1008 $key =~ s/&#(\d+);/chr($1)/eg;
1009 $cindex{$key} = $s;
1010 }
1011 $_ = <IN>;
1012 }
a82639d2 1013
495ae4b0
PH
1014# Open the TOC file
1015
1016open (TOC, ">$html/${file_base}_toc.html") ||
1017 die "Can't open $html/${file_base}_toc.html\n";
1018
1019print TOC "<!DOCTYPE HTML PUBLIC \"-//IETF//DTD HTML//EN\">\n";
1020print TOC "<html>\n<head>\n<title>$doctitle Contents</title>\n</head>\n" .
1021 "<body bgcolor=\"#F8F8F8\" text=\"#00005A\" " .
1022 "link=\"#FF6600\" alink=\"#FF9933\" vlink=\"#990000\">\n";
1023print TOC "<h1>$doctitle</h1><hr>\n<ul>\n";
1024
1025# Open the data file if we are not splitting at chapters
1026
1027&openout("$html/${file_base}.html") if !$chapsplit;
1028
1029# Loop for handling chapters. At the start of this loop, $_ is either EOF,
1030# or contains a .chapter line.
1031
a82639d2 1032$firstchapter = $thischapter + 1;
495ae4b0
PH
1033
1034while ($_)
1035 {
1036 print TOC "</ul>\n" if $insection;
1037 $insection = 0;
1038
1039 $thischapter++;
1040 $thissection = 0;
1041 $lastwasrule = 0;
1042
1043 # Start a new file if required
1044
1045 if ($chapsplit)
1046 {
1047 &closeout("CHAP") if $thischapter != $firstchapter;
1048 &openout("$html/${file_base}_$thischapter.html");
1049 }
1050
1051 # Set up the chapter title. Save it for the TOC. Set up the anchor and
1052 # link back to the TOC and show the title.
1053
1054 $_ =~ /^\.chapter\s+(.*)/;
1055
1056 my($title) = (($thischapter > 0)? "$thischapter. " : "") . &handle_text($1, 0);
1057
1058 $tocn++;
1059 print TOC "<li><a " .
1060 "name=\"TOC$tocn\" " .
1061 "href=\"$current_file#CHAP$thischapter\">$title</a></li>\n";
1062
1063 print OUT "<h1>\n";
1064 print OUT "<a name=\"CHAP$thischapter\" href=\"${file_base}_toc.html#TOC$tocn\">\n";
1065 print OUT "$title\n</a></h1>\n";
1066
1067 # Scan the contents of the chapter
1068
1069 $_ = <IN>;
1070 while ($_)
1071 {
1072 last if /^\.chapter/;
1073
1074 # Handle the start of a new section, starting a new file if required
1075
1076 if (/^\.section\s+(.*)/)
1077 {
1078 $thissection++;
1079
1080 print TOC "<ul>\n" if !$insection;
1081 $insection = 1;
1082
a82639d2
PH
1083 my($title) = (($thischapter > 0)? "$thischapter.$thissection " :
1084 "$thissection. ") . &handle_text($1, 0);
495ae4b0
PH
1085
1086 if ($sectsplit)
1087 {
1088 &closeout("SECT");
1089 &openout("$html/${file_base}_$thissection.html");
1090 }
1091
1092 $tocn++;
1093 printf TOC ("<li><a " .
1094 "name=\"TOC$tocn\" " .
1095 "href=\"$current_file#SECT%s$thissection\">%s</a></li>\n",
1096 ($thischapter > 0)? "$thischapter." : "", $title);
1097
1098 &setpar(0);
1099 print OUT "<h2>\n";
1100 printf OUT ("<a name=\"SECT%s$thissection\" ",
1101 ($thischapter > 0)? "$thischapter." : "");
1102 print OUT "href=\"${file_base}_toc.html#TOC$tocn\">\n";
1103 print OUT "$title\n</a></h2>\n";
1104 $_ = <IN>;
1105 $lastwasrule = 0;
1106 }
1107
1108 # Blank lines at this level are ignored
1109
1110 elsif (/^\s*$/)
1111 {
1112 $_ = <IN>;
1113 }
1114
1115 # Directive and non-directive lines are handled independently, though
1116 # in each case further lines may be read. Afterwards, the next line is
1117 # in $_. If .em is at the start of a paragraph, treat it with the
1118 # paragraph, because the matching .nem will be too. Messy!
1119
1120 elsif (/^\./)
1121 {
1122 if (/^\.em\b/)
1123 {
1124 $_=<IN>;
1125 if (/^\./)
1126 {
1127 print OUT "<font color=green>" if ! $inem;
1128 $inem = 1;
1129 # Used to handle it here - but that fails if it is .section.
a82639d2 1130 # Just let the next iteration of the loop handle it.
495ae4b0
PH
1131 # &handle_directive();
1132 }
1133
1134 else
1135 {
1136 $_ = ".em\n" . $_;
1137 &handle_paragraph();
1138 $lastwasrule = 0;
1139 $lastwasitem = 0;
1140 }
1141 }
1142
1143 # Not .em
1144
1145 else
1146 {
1147 &handle_directive();
1148 }
1149 }
1150
1151 # Not a directive
1152
1153 else
1154 {
1155 &handle_paragraph();
1156 $lastwasrule = 0;
1157 $lastwasitem = 0;
1158 }
1159
1160 } # Loop for each line in a chapter
1161 } # Loop for each chapter
1162
1163# Close the last file, end off the TOC, and we are done.
1164
1165&closeout("");
1166
1167print TOC "</ul>\n" if $insection;
1168
1169if (defined %cindex)
1170 {
1171 $cindex_tocn = ++$tocn;
1172 print TOC "<li><a name=\"TOC$tocn\" ".
1173 "href=\"${file_base}_cindex.html\">Concept Index</a></li>\n";
1174 }
1175
1176if (defined %oindex)
1177 {
1178 $oindex_tocn = ++$tocn;
1179 print TOC "<li><a name=\"TOC$tocn\" ".
1180 "href=\"${file_base}_oindex.html\">Option Index</a></li>\n";
1181 }
1182
1183print TOC "</ul>\n</body>\n</html>\n";
1184close(TOC);
1185close(IN);
1186}
1187
1188
1189
1190
1191##################################################
1192# Adjust index points #
1193##################################################
1194
1195# Because of the way the source is written, there are often index entries
1196# that immediately follow the start of chapters and sections and the definition
1197# of "items" like "helo = verify". This gets the correct page numbers for the
1198# PostScript and PDF formats. However, for HTML we want the index anchor to be
1199# before the section heading, because browsers tend to put the index point at
1200# the top of the screen. So we re-read all the files we've just created, and
1201# move some of the index points about. This is necessary only if indexes exist.
1202# The files are small enough to be handled entirely in memory.
1203
1204sub adjust_index_points {
1205print "Adjusting index points to precede headings\n";
1206
1207$" = "";
1208
1209opendir(DIR, "$html") || die "Failed to opendir $html\n";
1210while ($file = readdir(DIR))
1211 {
1212 my($i);
1213 next unless $file =~ /^${file_base}_\d+\.html$/;
1214
1215 open(IN, "<$html/$file") ||
1216 die "Failed to open $html/$file (read)\n";
1217 my(@lines) = <IN>;
1218 close(IN);
1219
1220 for ($i = 0; $i < @lines; $i++)
1221 {
1222 if ($lines[$i] =~ /^<a name="IX\d+"><\/a>$/)
1223 {
1224 # Handle an index line that follows a heading definition. Move it back
1225 # to just before the <h1> or whatever. This preserves the order of
1226 # multiple index lines, not that that matters.
1227
1228 if ($lines[$i-1] =~ /^<\/a><\/h(\d)>/)
1229 {
1230 my($j);
1231 my($found) = 0;
1232 for ($j = $i-2; $j > 0 && $j > $i - 10; $j--)
1233 {
1234 if ($lines[$j] =~ /<h$1>/)
1235 {
1236 $found = 1;
1237 last;
1238 }
1239 }
1240 if ($found)
1241 {
1242 splice(@lines, $j, 0, splice(@lines, $i, 1));
1243 }
1244 }
1245
1246 # Handle an index line that follows an "item". Move it back one line.
1247
1248 elsif ($lines[$i-1] =~ /^<b>.*<\/b>\s*$/)
1249 {
1250 splice(@lines, $i-1, 0, splice(@lines, $i, 1));
1251 }
1252
1253 # Handle an index line that follows a "conf" definition
1254
1255 elsif ($lines[$i-1] =~ /^<i>Type:<\/i>/ && $lines[$i-2] =~ /^<h3>/)
1256 {
1257 splice(@lines, $i-2, 0, splice(@lines, $i, 1));
1258 }
1259
1260 # Handle an index line that follows an "option" definition
1261
1262 elsif ($lines[$i-1] =~ /^<h3>/)
1263 {
1264 splice(@lines, $i-1, 0, splice(@lines, $i, 1));
1265 }
1266 }
1267 }
1268
1269 open(OUT, ">$html/$file") ||
1270 die "Failed to open $html/$file (write)\n";
1271
1272 print OUT "@lines";
1273 close OUT;
1274 undef @lines;
1275 }
1276}
1277
1278
1279
1280
1281##################################################
1282# Create Index #
1283##################################################
1284
1285sub create_index{
1286my($hash) = $_[0];
1287my($ifname) = $_[1];
1288my($ititle) = $_[2];
1289my(%indexindex);
1290
1291open(INDEX, ">$html/${file_base}_$_[1].html") ||
1292 die "Failed to open $html/${file_base}_$ifname\n";
1293
1294print INDEX "<!DOCTYPE HTML PUBLIC \"-//IETF//DTD HTML//EN\">\n";
1295print INDEX "<html>\n<head>\n<title>$doctitle $ititle</title>\n";
1296print INDEX "<base target=\"body\">\n</head>\n";
1297
1298print INDEX "<body bgcolor=\"#FFFFDF\" text=\"#00005A\" " .
1299 "link=\"#FF6600\" alink=\"#FF9933\" vlink=\"#990000\">\n";
1300
1301print INDEX "<h3>$ititle</h3>\n";
1302
1303# We have to scan the keys in the hash twice; first to build the list
1304# of initial letters, and then to do the business. The first time we
1305# do not need to sort them.
1306
1307foreach $key (keys %$hash)
1308 {
1309 my($initial) = substr($key,0,1);
1310 $initial = "\U$initial";
f055f31e 1311 $indexindex{$initial} = 1 if $initial ge "A" && $initial le "Z";
495ae4b0
PH
1312 }
1313
1314print INDEX "<p>\n";
1315foreach $key (sort keys %indexindex)
1316 {
1317 print INDEX "&nbsp;<a href=\"#$key\" target=\"index\">$key</a>\n";
1318 }
1319print INDEX "<hr></p>\n";
1320
1321my($letter) = "";
1322print INDEX "<p>\n";
1323
1324foreach $key (sort
f055f31e 1325 {
a82639d2
PH
1326 my($aa) = $a;
1327 my($bb) = $b;
1328
f055f31e
PH
1329 $aa =~ s/^\x93//; # Seems like the actual char values are
1330 $bb =~ s/^\x93//; # set by this time, not "&#147;"
a82639d2
PH
1331
1332 return ("\L$aa" eq "\L$bb")? ("$aa" cmp "$bb") : ("\L$aa" cmp "\L$bb");
f055f31e 1333 }
495ae4b0
PH
1334 keys %$hash)
1335 {
1336 my($initial) = substr($key,0,1);
1337 $initial = "\U$initial";
f055f31e 1338 if ($initial ne $letter && $initial ge "A" && $initial le "Z")
495ae4b0 1339 {
f055f31e
PH
1340 print INDEX "<br>\n";
1341 print INDEX "<a name=\"$initial\"></a>\n";
1342 print INDEX "<font size=\"+1\">\U$initial\E</font><br>\n";
495ae4b0
PH
1343 $letter = $initial;
1344 }
1345 print INDEX "$$hash{$key}<br>\n";
1346 }
1347
1348print INDEX "</p>\n";
1349
1350print INDEX "</body>\n</html>\n";
1351close(INDEX);
1352}
1353
1354
1355
1356
1357##################################################
1358# Show usage and die #
1359##################################################
1360
1361sub usage {
1362die "Usage: g2h [-split no|section|chapter] <source> <title>\n";
1363}
1364
1365
1366
1367##################################################
1368# Entry point and main program #
1369##################################################
1370
1371
1372# Directory in which to put the new HTML files
1373
1374$html = "html";
1375
1376# Global variables.
1377
1378%cindex = ();
1379%oindex = ();
1380
1381$chapsplit = 0;
1382$cindex_tocn = 0;
f055f31e 1383$confuse = "";
495ae4b0
PH
1384$file_base = "";
1385$index_count = 0;
1386$inem = 0;
1387$inpar = 0;
1388$lastwasitem = 0;
1389$lastwasrule = 0;
1390$oindex_tocn = 0;
1391$sectsplit = 0;
1392$source_file = "";
1393$thischapter = 0;
1394$thissection = 0;
1395
1396
1397# Handle options
1398
1399my($splitset) = 0;
1400
1401while (scalar @ARGV > 0 && $ARGV[0] =~ /^-/)
1402 {
1403 if ($ARGV[0] eq "-split" && !$splitset)
1404 {
1405 $splitset = 1;
1406 shift @ARGV;
1407 my($type) = shift @ARGV;
1408 if ($type eq "section") { $sectsplit = 1; }
1409 elsif ($type eq "chapter") { $chapsplit = 1; }
1410 elsif ($type eq "no" ) { $sectsplit = $chapsplit = 0; }
1411 else { &usage(); }
1412 }
1413 else { &usage(); }
1414 }
1415
1416# Get the source file and its base
1417
1418&usage() if scalar @ARGV <= 0;
1419$source_file = shift @ARGV;
1420($file_base) = $source_file =~ /^(.*)\.src$/;
1421
1422&usage() if scalar @ARGV <= 0;
1423$doctitle = shift @ARGV;
1424
1425print "\nCreate HTML for $doctitle from $source_file\n";
1426
1427# Remove the old HTML files
1428
1429print "Removing old HTML files\n";
1430system("/bin/rm -rf $html/${file_base}_*.html");
1431
1432# First pass identifies all the chapters and sections, and collects the
1433# values of the cross-referencing variables.
1434
1435print "Scanning for cross-references\n";
1436&pass_one();
1437
1438$maxchapter = $thischapter; # Used if chapter splitting
1439$maxsection = $thissection; # Used if section splitting
1440
1441# Second pass actually creates the HTML files.
1442
1443print "Creating the HTML files\n";
1444&pass_two();
1445
1446# Reprocess for moving some of the index points, if indexes were created
1447
1448&adjust_index_points() if scalar(keys %cindex) > 0 || scalar(keys %oindex) > 0;
1449
1450# Finally, we must create the option and concept indexes if any data
1451# has been collected for them.
1452
1453if (scalar(keys %cindex) > 0)
1454 {
1455 print "Creating concept index\n";
1456 &create_index(\%cindex, "cindex", "Concepts");
1457 }
1458
1459if (scalar(keys %oindex) > 0)
1460 {
1461 print "Creating option index\n";
1462 &create_index(\%oindex, "oindex", "Options");
1463 }
1464
1465# End of g2h