| 1 | #! /usr/bin/perl -w |
| 2 | # $Cambridge: exim/doc/doc-scripts/g2h,v 1.3 2005/02/17 12:17:09 ph10 Exp $ |
| 3 | |
| 4 | # This is a script that turns the SGCAL source of Exim's documentation into |
| 5 | # HTML. It can be used for both the filter document and the main Exim |
| 6 | # specification. The syntax is |
| 7 | # |
| 8 | # g2h [-split no|section|chapter] <source file> <title> |
| 9 | # |
| 10 | # Previously, -split section was used for the filter document, and -split |
| 11 | # chapter for the main specification. However, the filter document has gained |
| 12 | # some chapters, so they are both split by chapter now. Only one -split can be |
| 13 | # specified. |
| 14 | # |
| 15 | # A number of assumptions about the style of the input markup are made. |
| 16 | # |
| 17 | # The HTML is written into the directory html/ using the source file base |
| 18 | # name as its base. |
| 19 | |
| 20 | # Written by Philip Hazel |
| 21 | # Starting 21-Dec-2001 |
| 22 | # Last modified 26-Nov-2003 |
| 23 | |
| 24 | ############################################################################# |
| 25 | |
| 26 | |
| 27 | |
| 28 | ################################################## |
| 29 | # Open an output file # |
| 30 | ################################################## |
| 31 | |
| 32 | sub openout { |
| 33 | open (OUT, ">$_[0]") || die "Can't open $_[0]\n"; |
| 34 | |
| 35 | # Boilerplate |
| 36 | |
| 37 | print OUT "<!DOCTYPE HTML PUBLIC \"-//IETF//DTD HTML//EN\">\n"; |
| 38 | |
| 39 | print OUT "<html>\n<head>\n<title>$doctitle" . |
| 40 | (($thischapter > 0)? " chapter $thischapter" : "") . |
| 41 | (($thissection > 0)? " section $thissection" : "") . |
| 42 | "</title>\n</head>\n" . |
| 43 | "<body bgcolor=\"#F8F8F8\" text=\"#00005A\" " . |
| 44 | "link=\"#FF6600\" alink=\"#FF9933\" vlink=\"#990000\">\n"; |
| 45 | |
| 46 | # Forward/backward links when chapter splitting |
| 47 | |
| 48 | if ($chapsplit) |
| 49 | { |
| 50 | print OUT "<font size=2>\n"; |
| 51 | printf OUT ("<a href=\"${file_base}_%s.html\">Previous</a> \n", |
| 52 | $thischapter - 1) if $thischapter > 1; |
| 53 | printf OUT ("<a href=\"${file_base}_%s.html\">Next</a> \n", |
| 54 | $thischapter + 1) if $thischapter < $maxchapter; |
| 55 | print OUT "<a href=\"${file_base}_toc.html\">Contents</a>\n"; |
| 56 | print OUT " " x 6, "($doctitle)\n</font><hr>\n"; |
| 57 | } |
| 58 | |
| 59 | # Forward/backward links when section splitting |
| 60 | |
| 61 | elsif ($sectsplit) |
| 62 | { |
| 63 | print OUT "<font size=2>\n"; |
| 64 | printf OUT ("<a href=\"${file_base}_%s.html\">Previous</a> \n", |
| 65 | $thissection - 1) if $thissection > 1; |
| 66 | printf OUT ("<a href=\"${file_base}_%s.html\">Next</a> \n", |
| 67 | $thissection + 1) if $thissection < $maxsection; |
| 68 | print OUT "<a href=\"${file_base}_toc.html\">Contents</a>\n"; |
| 69 | print OUT " " x 6, "($doctitle)\n</font><hr>\n"; |
| 70 | } |
| 71 | |
| 72 | # Save the final component of the current file name (for TOC creation) |
| 73 | |
| 74 | $_[0] =~ /^(?:.*)\/([^\/]+)$/; |
| 75 | $current_file = $1; |
| 76 | } |
| 77 | |
| 78 | |
| 79 | |
| 80 | ################################################## |
| 81 | # Close an output file # |
| 82 | ################################################## |
| 83 | |
| 84 | # The first argument is one of: |
| 85 | # |
| 86 | # "CHAP" a chapter is ending |
| 87 | # "SECT" a section is ending |
| 88 | # "" the whole thing is ending |
| 89 | # |
| 90 | # In the first two cases $thischapter and $thissection contain the new chapter |
| 91 | # and section numbers, respectively. In the third case, we can deduce what is |
| 92 | # ending from the flags. The variables contain the current values. |
| 93 | |
| 94 | sub closeout { |
| 95 | my($s) = $_[0]; |
| 96 | |
| 97 | print OUT "<hr>\n" if !$lastwasrule; |
| 98 | &setpar(0); |
| 99 | |
| 100 | if ($s eq "CHAP") |
| 101 | { |
| 102 | print OUT "<font size=2>\n"; |
| 103 | printf OUT ("<a href=\"${file_base}_%s.html\">Previous</a> ", |
| 104 | $thischapter - 2) if ($thischapter > 2); |
| 105 | print OUT "<a href=\"${file_base}_$thischapter.html\">Next</a> "; |
| 106 | print OUT "<a href=\"${file_base}_toc.html\">Contents</a>\n"; |
| 107 | print OUT " " x 6, "($doctitle)\n</font>\n"; |
| 108 | } |
| 109 | |
| 110 | elsif ($s eq "SECT") |
| 111 | { |
| 112 | print OUT "<font size=2>\n"; |
| 113 | printf OUT ("<a href=\"${file_base}_%s.html\">Previous</a> ", |
| 114 | $thissection - 2) if ($thissection > 2); |
| 115 | print OUT "<a href=\"${file_base}_$thissection.html\">Next</a> "; |
| 116 | print OUT "<a href=\"${file_base}_toc.html\">Contents</a>\n"; |
| 117 | print OUT " " x 6, "($doctitle)\n</font>\n"; |
| 118 | } |
| 119 | |
| 120 | else |
| 121 | { |
| 122 | if ($chapsplit) |
| 123 | { |
| 124 | print OUT "<font size=2>\n"; |
| 125 | printf OUT ("<a href=\"${file_base}_%s.html\">Previous</a> ", |
| 126 | $thischapter - 1) if ($thischapter > 1); |
| 127 | print OUT "<a href=\"${file_base}_toc.html\">Contents</a>\n"; |
| 128 | print OUT " " x 6, "($doctitle)\n</font>\n"; |
| 129 | } |
| 130 | elsif ($sectsplit) |
| 131 | { |
| 132 | print OUT "<font size=2>\n"; |
| 133 | printf OUT ("<a href=\"${file_base}_%s.html\">Previous</a> ", |
| 134 | $thissection - 1) if ($thissection > 1); |
| 135 | print OUT "<a href=\"${file_base}_toc.html\">Contents</a>\n"; |
| 136 | print OUT " " x 6, "($doctitle)\n</font>\n"; |
| 137 | } |
| 138 | } |
| 139 | |
| 140 | print OUT "</body>\n</html>\n"; |
| 141 | close(OUT); |
| 142 | } |
| 143 | |
| 144 | |
| 145 | |
| 146 | ################################################## |
| 147 | # Handle an index line # |
| 148 | ################################################## |
| 149 | |
| 150 | # This function returns an empty string so that it can be called as part |
| 151 | # of an s operator when handling index items within paragraphs. The two |
| 152 | # arguments are: |
| 153 | # |
| 154 | # the text to index, already converted to HTML |
| 155 | # 1 for the concept index, 0 for the options index |
| 156 | |
| 157 | sub handle_index { |
| 158 | my($text) = $_[0]; |
| 159 | my($hash) = $_[1]? \%cindex : \%oindex; |
| 160 | my ($key,$ref); |
| 161 | |
| 162 | # Up the index count, and compute the reference to the file and the |
| 163 | # label within it. |
| 164 | |
| 165 | $index_count++; |
| 166 | $ref = $chapsplit? |
| 167 | "${file_base}_$thischapter.html#IX$index_count" |
| 168 | : $sectsplit? |
| 169 | "${file_base}_$thissection.html#IX$index_count" |
| 170 | : |
| 171 | "#IX$index_count"; |
| 172 | |
| 173 | # Create the index key, which consists of the text with all the HTML |
| 174 | # coding and any leading quotation marks removed. Turn the primary/secondary |
| 175 | # splitting string "||" into ":". |
| 176 | |
| 177 | $text =~ s/\|\|/:/g; |
| 178 | |
| 179 | $key = "$text"; |
| 180 | $key =~ s/<[^>]+>//g; |
| 181 | $key =~ s/&#(\d+);/chr($1)/eg; |
| 182 | $key =~ s/^`+//; |
| 183 | $key =~ s/^"//; |
| 184 | |
| 185 | # Turn all spaces in the text into so that they don't ever split. |
| 186 | # However, there may be spaces in the HTML that already exists in the |
| 187 | # text, so we have to avoid changing spaces inside <>. |
| 188 | |
| 189 | $text =~ s/ (?=[^<>]*(?:<|$))/ /g; |
| 190 | |
| 191 | # If this is the first encounter with this index key, we create a |
| 192 | # straightforward reference. |
| 193 | |
| 194 | if (!defined $$hash{$key}) |
| 195 | { |
| 196 | $$hash{$key} = "<a href=\"$ref\">$text</a>"; |
| 197 | } |
| 198 | |
| 199 | # For the second and subsequent encounters, add "[2]" etc. to the |
| 200 | # index text. We find out the number by counting occurrences of "<a" |
| 201 | # in the existing string. |
| 202 | |
| 203 | else |
| 204 | { |
| 205 | my($number) = 1; |
| 206 | $number++ while $$hash{$key} =~ /<a/g; |
| 207 | $$hash{$key} .= " <a href=\"$ref\">[$number]</a>"; |
| 208 | } |
| 209 | |
| 210 | # Place the name in the current output |
| 211 | |
| 212 | print OUT "<a name=\"IX$index_count\"></a>\n"; |
| 213 | return ""; |
| 214 | } |
| 215 | |
| 216 | |
| 217 | |
| 218 | ################################################## |
| 219 | # Handle emphasis bars # |
| 220 | ################################################## |
| 221 | |
| 222 | # Set colour green for text marked with "emphasis bars", keeping |
| 223 | # track in case the matching isn't perfect. |
| 224 | |
| 225 | sub setinem { |
| 226 | if ($_[0]) |
| 227 | { |
| 228 | return "" if $inem; |
| 229 | $inem = 1; |
| 230 | return "<font color=green>\n"; |
| 231 | } |
| 232 | else |
| 233 | { |
| 234 | return "" if !$inem; |
| 235 | $inem = 0; |
| 236 | return "</font>\n"; |
| 237 | } |
| 238 | } |
| 239 | |
| 240 | |
| 241 | |
| 242 | ################################################## |
| 243 | # Convert marked-up text # |
| 244 | ################################################## |
| 245 | |
| 246 | # This function converts text from SGCAL markup to HTML markup, with a couple |
| 247 | # of exceptions: |
| 248 | # |
| 249 | # 1. We don't touch $t because that is handled by the .display code. |
| 250 | # |
| 251 | # 2. The text may contain embedded .index, .em, and .nem directives. We |
| 252 | # handle .em and .nem, but leave .index because it must be done during |
| 253 | # paragraph outputting. |
| 254 | # |
| 255 | # In a non-"rm" display, we turn $rm{ into cancelling of <tt>. Otherwise |
| 256 | # it is ignored - in practice it is only used in that special case. |
| 257 | # |
| 258 | # The order in which things are done in this function is highly sensitive! |
| 259 | |
| 260 | sub handle_text { |
| 261 | my($s) = $_[0]; |
| 262 | my($rmspecial) = $_[1]; |
| 263 | |
| 264 | # Escape all & characters (they aren't involved in markup) but for the moment |
| 265 | # use &+ instead of &# so that we can handle # characters in the text. |
| 266 | |
| 267 | $s =~ s/&/&+038;/g; |
| 268 | |
| 269 | # Turn SGCAL literals into HTML literals that don't look like SGCAL |
| 270 | # markup, so won't be touched by what follows. Again, use + instead of #. |
| 271 | |
| 272 | $s =~ s/@@/&+064;/g; |
| 273 | $s =~ s/@([^@])/"&+".sprintf("%0.3d",ord($1)).";"/eg; |
| 274 | |
| 275 | # Now turn any #s that are markup into spaces, and convert the previously |
| 276 | # created literals to the correct form. |
| 277 | |
| 278 | $s =~ s/#/ /g; |
| 279 | $s =~ s/&\+(\d+);/&#$1;/g; |
| 280 | |
| 281 | # Some simple markup that doesn't involve argument text. |
| 282 | |
| 283 | $s =~ s/\$~//g; # turn $~ into nothing |
| 284 | $s =~ s/__/_/g; # turn __ into _ |
| 285 | $s =~ s/--(?=$|\s|\d)/–/mg; # turn -- into endash in text or number range |
| 286 | $s =~ s/\(c\)/©/g; # turn (c) into copyright symbol |
| 287 | |
| 288 | # Use double quotes |
| 289 | |
| 290 | # $s =~ s/`([^']+)'/``$1''/g; |
| 291 | |
| 292 | $s =~ s/`([^']+)'/“$1”/g; |
| 293 | |
| 294 | # This is a fudge for some specific usages of $<; can't just do a global |
| 295 | # is it occurs in things like "$<variable name>" as well. |
| 296 | |
| 297 | $s =~ s/(\d)\$<-/$1-/g; # turn 0$<- into 0- |
| 298 | $s =~ s/\$<//g; # other $< is ignored |
| 299 | |
| 300 | # Turn <<...>> into equivalent SGCAL markup that doesn't involve the use of |
| 301 | # < and >, and then escape the remaining < and > characters in the text. |
| 302 | |
| 303 | $s =~ s/<<([^>]*?)>>/<\$it{$1}>/g; # turn <<xxx>> into <$it{xxx}> |
| 304 | $s =~ s/</</g; |
| 305 | $s =~ s/>/>/g; |
| 306 | |
| 307 | # Other markup... |
| 308 | |
| 309 | $s =~ s/\$sm\{//g; # turn $sm{ into nothing |
| 310 | $s =~ s/\$smc\{//g; # turn $smc{ into nothing |
| 311 | $s =~ s/\$smi\{//g; # turn $smi{ into nothing |
| 312 | |
| 313 | $s =~ s/\$tt\{([^\}]*?)\}/<tt>$1<\/tt>/g; # turn $tt{xxx} into <tt>xxx</tt> |
| 314 | $s =~ s/\$it\{([^\}]*?)\}/<em>$1<\/em>/g; # turn $it{xxx} into <em>xxx</em> |
| 315 | $s =~ s/\$bf\{([^\}]*?)\}/<b>$1<\/b>/g; # turn $bf{xxx} into <b>xxx</b> |
| 316 | |
| 317 | $s =~ s/\$cb\{([^\}]*?)\}/<tt><b>$1<\/b><\/tt>/g; # turn $cb{xxx} into |
| 318 | # <tt><b>xxx</b></tt> |
| 319 | |
| 320 | $s =~ s/\\\\([^\\]*?)\\\\/<font size=-1>$1<\/font>/g; # turn \\xxx\\ into |
| 321 | # small font |
| 322 | $s =~ s/\\\?([^?]*?)\?\\/<a href="$1">$1<\/a>/g; # turn \?URL?\ into URL |
| 323 | |
| 324 | $s =~ s/\\\(([^)]*?)\)\\/<i>$1<\/i>/g; # turn \(xxx)\ into <i>xxx</i> |
| 325 | $s =~ s/\\\"([^\"]*?)\"\\/<tt>$1<\/tt>/g; # turn \"xxx"\ into <tt>xxx</tt> |
| 326 | |
| 327 | |
| 328 | $s =~ s/\\\$([^\$]*?)\$\\/<tt>\$$1<\/tt>/g; # turn \$xxx$\ into <tt>$xxx</tt> |
| 329 | $s =~ s/\\\-([^\\]*?)\-\\/<i>-$1<\/i>/g; # turn \-xxx-\ into -italic |
| 330 | $s =~ s/\\\*\*([^*]*?)\*\*\\/<b>$1<\/b>/g; # turn \**xxx**\ into <b>xxx</b> |
| 331 | $s =~ s/\\\*([^*]*?)\*\\/<i>$1<\/i>/g; # turn \*xxx*\ into italic |
| 332 | $s =~ s/\\%([^*]*?)%\\/<b>$1<\/b>/g; # turn \%xxx%\ into bold |
| 333 | $s =~ s/\\([^\\]*?)\\/<tt>$1<\/tt>/g; # turn \xxx\ into <tt>xxx</tt> |
| 334 | $s =~ s/::([^\$]*?)::/<i>$1:<\/i>/g; # turn ::xxx:: into italic: |
| 335 | $s =~ s/\$\*\$/\*/g; # turn $*$ into * |
| 336 | |
| 337 | # Handle $rm{...} |
| 338 | |
| 339 | if ($rmspecial) |
| 340 | { |
| 341 | $s =~ s/\$rm\{([^\}]*?)\}/<\/tt>$1<tt>/g; # turn $rm{xxx} into </tt>xxx<tt> |
| 342 | } |
| 343 | else |
| 344 | { |
| 345 | $s =~ s/\$rm\{([^\}]*?)\}/$1/g; # turn $rm{xxx} into xxx |
| 346 | } |
| 347 | |
| 348 | # There is one case where the terminating } of an escape sequence is |
| 349 | # in another paragraph - this follows $sm{ - it can be fixed by |
| 350 | # removing any stray } in a paragraph that contains no { chars. |
| 351 | |
| 352 | $s =~ s/\}//g if !/\{/; |
| 353 | |
| 354 | # Remove any null flags ($$) |
| 355 | |
| 356 | $s =~ s/\$\$//g; |
| 357 | |
| 358 | # If the paragraph starts with $c\b, remove it. |
| 359 | |
| 360 | $s =~ s/^\$c\b//; |
| 361 | |
| 362 | # If the paragraph starts with $e\b, indent it slightly. |
| 363 | |
| 364 | $s =~ s/^\$e\b/ /; |
| 365 | |
| 366 | # Handle .em, and .nem directives that occur within the paragraph |
| 367 | |
| 368 | $s =~ s/\.em\s*\n/&setinem(1)/eg; |
| 369 | $s =~ s/\.nem\s*\n/&setinem(0)/eg; |
| 370 | |
| 371 | # Explicitly included HTML |
| 372 | |
| 373 | $s =~ s/\[\(([^)]+)\)\]/<$1>/g; # turn [(...)] into <...> |
| 374 | |
| 375 | # Finally, do the substitutions and return the modified text. |
| 376 | |
| 377 | $s =~ s/~~(\w+)/$var_value{$1}/eg; |
| 378 | |
| 379 | return $s; |
| 380 | } |
| 381 | |
| 382 | |
| 383 | |
| 384 | ################################################## |
| 385 | # Start/end a paragraph # |
| 386 | ################################################## |
| 387 | |
| 388 | # We want to leave paragraphs unterminated until we know that a horizontal |
| 389 | # rule does not follow, to avoid getting space inserted before the rule, |
| 390 | # which doesn't look good. So we have this function to help control things. |
| 391 | # If the argument is 1 we are starting a new paragraph; if it is 0 we want |
| 392 | # to force the ending of any incomplete paragraph. |
| 393 | |
| 394 | sub setpar { |
| 395 | if ($inpar) |
| 396 | { |
| 397 | print OUT "</p>\n"; |
| 398 | $inpar = 0; |
| 399 | } |
| 400 | if ($_[0]) |
| 401 | { |
| 402 | print OUT "<p>\n"; |
| 403 | $inpar = 1; |
| 404 | } |
| 405 | } |
| 406 | |
| 407 | |
| 408 | |
| 409 | ################################################## |
| 410 | # Handle a "paragraph" # |
| 411 | ################################################## |
| 412 | |
| 413 | # Read a paragraph of text, which may contain many lines and may contain |
| 414 | # .index, .em, and .nem directives within it. We may also encounter |
| 415 | # ".if ~~html" within paragraphs. Process those directives, |
| 416 | # convert the markup, and output the rest as an HTML paragraph. |
| 417 | |
| 418 | |
| 419 | sub handle_paragraph{ |
| 420 | my($par) = $_; |
| 421 | my($htmlcond) = 0; |
| 422 | while(<IN>) |
| 423 | { |
| 424 | if (/^\.if\s+~~html\b/) |
| 425 | { |
| 426 | $htmlcond = 1; |
| 427 | $par =~ s/\s+$//; # lose unwanted whitespace and newlines |
| 428 | next; |
| 429 | } |
| 430 | elsif ($htmlcond && /^\.else\b/) |
| 431 | { |
| 432 | while (<IN>) { last if /^\.fi\b/; } |
| 433 | $htmlcond = 0; |
| 434 | next; |
| 435 | } |
| 436 | elsif ($htmlcond && /^\.fi\b/) |
| 437 | { |
| 438 | $htmlcond = 0; |
| 439 | next; |
| 440 | } |
| 441 | |
| 442 | last if /^\s*$/ || (/^\./ && !/^\.index\b/ && !/^\.em\b/ && !/^\.nem\b/); |
| 443 | $par .= $_; |
| 444 | } |
| 445 | $par = &handle_text($par, 0); |
| 446 | |
| 447 | # We can't handle .index until this point, when we do it just before |
| 448 | # outputting the paragraph. |
| 449 | |
| 450 | if ($par !~ /^\s*$/) |
| 451 | { |
| 452 | &setpar(1); |
| 453 | $par =~ s/\.index\s+([^\n]+)\n/&handle_index($1, 1)/eg; |
| 454 | print OUT "$par"; |
| 455 | } |
| 456 | } |
| 457 | |
| 458 | |
| 459 | |
| 460 | ################################################## |
| 461 | # Handle a non-paragraph directive # |
| 462 | ################################################## |
| 463 | |
| 464 | # The directives .index, .em, and .nem can also appear within paragraphs, |
| 465 | # and are then handled within the handle_paragraph() code. |
| 466 | |
| 467 | sub handle_directive{ |
| 468 | my($new_lastwasitem) = 0; |
| 469 | |
| 470 | $lastwasrule = 0; |
| 471 | |
| 472 | if (/^\.r?set\b/ || /^\.(?:\s|$)/) {} # ignore .(r)set and comments |
| 473 | |
| 474 | elsif (/^\.justify\b/) {} # and .justify |
| 475 | |
| 476 | elsif (/^\.newline\b/) { print OUT "<br>\n"; } |
| 477 | |
| 478 | elsif (/^\.blank\b/ || /^\.space\b/) { print OUT "<br>\n"; } |
| 479 | |
| 480 | elsif (/^\.rule\b/) { &setpar(0); print OUT "<hr>\n"; $lastwasrule = 1; } |
| 481 | |
| 482 | elsif (/^\.index\s+(.*)/) { &handle_index(&handle_text($1), 1); } |
| 483 | |
| 484 | # Emphasis is handled by colour |
| 485 | |
| 486 | elsif (/^\.em\b/) |
| 487 | { |
| 488 | &setpar(0); |
| 489 | print OUT "<font color=green>" if ! $inem; |
| 490 | $inem = 1; |
| 491 | } |
| 492 | |
| 493 | elsif (/^\.nem\b/) |
| 494 | { |
| 495 | &setpar(0); |
| 496 | print OUT "</font>" if $inem; |
| 497 | $inem = 0; |
| 498 | } |
| 499 | |
| 500 | # Ignore tab setting stuff - we use tables instead. |
| 501 | |
| 502 | elsif (/^\.tabs(?:et)?\b/) {} |
| 503 | |
| 504 | # .tempindent is used only to align some of the expansion stuff nicely; |
| 505 | # just ignore it. It is used in conjunction with .push/.pop. |
| 506 | |
| 507 | elsif (/^\.(tempindent|push|pop)\b/) {} |
| 508 | |
| 509 | # There are some instances of .if ~~sys.fancy in the source. Some of those |
| 510 | # that are not inside displays are two-part things, in which case we just keep |
| 511 | # the non-fancy part. For diagrams, however, they are in three parts: |
| 512 | # |
| 513 | # .if ~~sys.fancy |
| 514 | # <aspic drawing stuff for PostScript and PDF> |
| 515 | # .elif !~~html |
| 516 | # <ascii art for txt and Texinfo> |
| 517 | # .else |
| 518 | # <HTML instructions for including a gif> |
| 519 | # .fi |
| 520 | # |
| 521 | # In this case, we skip to the third part. |
| 522 | |
| 523 | elsif (/^\.if\s+~~sys\.fancy/ || /^\.else\b/) |
| 524 | { |
| 525 | while (<IN>) |
| 526 | { last if /^\.else\b/ || /^\.elif\s+!\s*~~html/ || /^\.fi\b/; } |
| 527 | |
| 528 | if (/^\.elif\b/) |
| 529 | { |
| 530 | while (<IN>) { last if /^\.else\b/ || /^\.fi\b/; } |
| 531 | } |
| 532 | } |
| 533 | |
| 534 | # Similarly, for .if !~~sys.fancy, take the non-fancy part. |
| 535 | |
| 536 | elsif (/^\.if\s+!\s*~~sys.fancy/) {} |
| 537 | |
| 538 | # There are some explicit tests for ~~html for direct HTML inclusions |
| 539 | |
| 540 | elsif (/^\.if\s+~~html\b/) {} |
| 541 | |
| 542 | # There are occasional requirements to do things differently for Texinfo/HTML |
| 543 | # and PS/txt versions. The latter are produced by SGCAL, so that's what the |
| 544 | # flag is called. |
| 545 | |
| 546 | elsif (/\.if\s+~~sgcal/) |
| 547 | { |
| 548 | while (<IN>) { last if /\.else\b/ || /\.fi\b/; } |
| 549 | } |
| 550 | |
| 551 | # Also there is a texinfo flag |
| 552 | |
| 553 | elsif (/^\.if\s+~~texinfo\b/) |
| 554 | { |
| 555 | while (<IN>) |
| 556 | { last if /^\.else\b/ || /^\.elif\s+!\s*~~html/ || /^\.fi\b/; } |
| 557 | } |
| 558 | |
| 559 | # Ignore any other .if, .else, or .fi directives |
| 560 | |
| 561 | elsif (/^\.if\b/ || /^\.fi\b/ || /^\.else\b/) {} |
| 562 | |
| 563 | # Ignore .indent |
| 564 | |
| 565 | elsif (/^\.indent\b/) {} |
| 566 | |
| 567 | # Various flavours of numberpars map to corresponding list types. |
| 568 | |
| 569 | elsif (/^\.numberpars\b/) |
| 570 | { |
| 571 | $rest = $'; |
| 572 | &setpar(0); |
| 573 | |
| 574 | if ($rest =~ /(?:\$\.|\" \")/) |
| 575 | { |
| 576 | unshift @endlist, "ul"; |
| 577 | unshift @listtype, ""; |
| 578 | print OUT "<ul>\n<li>"; |
| 579 | } |
| 580 | else |
| 581 | { |
| 582 | $nptype = ($rest =~ /roman/)? "a" : "1"; |
| 583 | unshift @endlist, "ol"; |
| 584 | unshift @listtype, " TYPE=\"$nptype\""; |
| 585 | print OUT "<ol>\n<li$listtype[0]>"; |
| 586 | } |
| 587 | } |
| 588 | |
| 589 | elsif (/^\.nextp\b/) |
| 590 | { |
| 591 | &setpar(0); |
| 592 | print OUT "</li>\n<li$listtype[0]>"; |
| 593 | } |
| 594 | |
| 595 | elsif (/^\.endp\b/) |
| 596 | { |
| 597 | &setpar(0); |
| 598 | print OUT "</li>\n</$endlist[0]>\n"; |
| 599 | shift @listtype; |
| 600 | shift @endlist; |
| 601 | } |
| 602 | |
| 603 | # .display asis can use <pre> which uses a typewriter font. |
| 604 | # Otherwise, we have to do our own line breaking. Turn tabbed lines |
| 605 | # into an HTML table. There will always be a .tabs line first. |
| 606 | |
| 607 | elsif (/^\.display\b/) |
| 608 | { |
| 609 | my($intable) = 0; |
| 610 | my($asis) = /asis/; |
| 611 | my($rm) = /rm/; |
| 612 | my($eol,$indent); |
| 613 | |
| 614 | # For non asis displays, start a paragraph, and set up to put an |
| 615 | # explicit break after every line. |
| 616 | |
| 617 | if (!$asis) |
| 618 | { |
| 619 | &setpar(1); |
| 620 | $eol = "<br>"; |
| 621 | $indent = "<tt> </tt>"; |
| 622 | } |
| 623 | |
| 624 | # For asis displays, use <pre> and no explicit breaks |
| 625 | |
| 626 | else |
| 627 | { |
| 628 | print OUT "<pre>\n"; |
| 629 | $eol = ""; |
| 630 | $indent = " "; |
| 631 | } |
| 632 | |
| 633 | # Now read through until we hit .endd (or EOF, but that shouldn't happen) |
| 634 | # and process the lines in the display. |
| 635 | |
| 636 | while (<IN>) |
| 637 | { |
| 638 | last if /^\.endd\b/; |
| 639 | |
| 640 | # The presence of .tabs[et] starts a table |
| 641 | |
| 642 | if (/^\.tabs/) |
| 643 | { |
| 644 | $intable = 1; |
| 645 | print OUT "<table cellspacing=0 cellpadding=0>\n"; |
| 646 | } |
| 647 | |
| 648 | # Some displays have an indent setting - ignore |
| 649 | |
| 650 | elsif (/^\.indent\b/) {} |
| 651 | |
| 652 | # Some displays have .blank inside them |
| 653 | |
| 654 | elsif (/^\.blank\b/) |
| 655 | { |
| 656 | print OUT "<br>\n"; |
| 657 | } |
| 658 | |
| 659 | # Some displays have emphasis inside them |
| 660 | |
| 661 | elsif (/^\.em\b/) |
| 662 | { |
| 663 | print OUT "<font color=green>" if ! $inem; |
| 664 | $inem = 1; |
| 665 | } |
| 666 | |
| 667 | elsif (/^\.nem\b/) |
| 668 | { |
| 669 | print OUT "</font>" if $inem; |
| 670 | $inem = 0; |
| 671 | } |
| 672 | |
| 673 | # There are occasional instances of .if [!]~~sys.fancy inside displays. |
| 674 | # In both cases we want the non-fancy alternative. (The only thing that |
| 675 | # matters in practice is noticing .tabs[et] actually.) Assume the syntax |
| 676 | # is valid. |
| 677 | |
| 678 | elsif (/^\.if\s+~~sys.fancy/ || /^\.else\b/) |
| 679 | { |
| 680 | while (<IN>) |
| 681 | { |
| 682 | last if /^\.fi\b/ || /^\.else/; |
| 683 | } |
| 684 | } |
| 685 | |
| 686 | elsif (/^\.if\s+!\s*~~sys.fancy/) {} |
| 687 | |
| 688 | elsif (/^\.fi\b/) {} |
| 689 | |
| 690 | # Ignore .newline and .linelength |
| 691 | |
| 692 | elsif (/^\.newline\b/ || /^\.linelength\b/) {} |
| 693 | |
| 694 | # Ignore comments |
| 695 | |
| 696 | elsif (/^\.(\s|$)/) {} |
| 697 | |
| 698 | # There shouldn't be any other directives inside displays |
| 699 | |
| 700 | elsif (/^\./) |
| 701 | { |
| 702 | print "*** Ignored directive inside .display: $_"; |
| 703 | } |
| 704 | |
| 705 | # Handle a data line within a display. If it's an asis display, the only |
| 706 | # conversion is to escape the HTML characters. Otherwise, process the |
| 707 | # SGCAL markup. |
| 708 | |
| 709 | else |
| 710 | { |
| 711 | chomp; |
| 712 | if ($asis) |
| 713 | { |
| 714 | s/&/&/g; |
| 715 | s/</</g; |
| 716 | s/>/>/g; |
| 717 | } |
| 718 | else |
| 719 | { |
| 720 | $_ = &handle_text($_, !$rm); |
| 721 | $_ = "<tt>$_</tt>" if !$rm && $_ ne ""; |
| 722 | } |
| 723 | |
| 724 | # In a table, break fields at $t. For non-rm we must break the |
| 725 | # <tt> group as well. |
| 726 | |
| 727 | if ($intable) |
| 728 | { |
| 729 | if ($rm) |
| 730 | { |
| 731 | s/\s*\$t\s*/ <\/td><td>/g; |
| 732 | } |
| 733 | else |
| 734 | { |
| 735 | s/\s*\$t\s*/ <\/tt><\/td><td><tt>/g; |
| 736 | } |
| 737 | s/<tt><\/tt>//g; |
| 738 | print OUT "<tr><td> $_</td></tr>\n"; |
| 739 | } |
| 740 | |
| 741 | # Otherwise, output straight, with <br> for non asis displays |
| 742 | |
| 743 | else |
| 744 | { |
| 745 | s/<tt><\/tt>//g; |
| 746 | print OUT "$indent$_$eol\n"; |
| 747 | } |
| 748 | } |
| 749 | } # Loop for display contents |
| 750 | |
| 751 | # Finish off the table and the <pre> - leave a paragraph open |
| 752 | |
| 753 | print OUT "</table>\n" if $intable; |
| 754 | print OUT "</pre>\n" if $asis; |
| 755 | } |
| 756 | |
| 757 | # Handle configuration option definitions |
| 758 | |
| 759 | elsif (/^\.startconf\s+(.*)/) |
| 760 | { |
| 761 | $confuse = &handle_text($1); |
| 762 | } |
| 763 | |
| 764 | elsif (/^\.conf\b/) |
| 765 | { |
| 766 | my($option, $type, $default) = |
| 767 | /^\.conf\s+(\S+)\s+("(?:[^"]|"")+"|\S+)\s+("(?:[^"]|"")+"|.*)/; |
| 768 | |
| 769 | $option =~ s/\@_/_/g; # Underscore will be quoted in option name |
| 770 | |
| 771 | # If $type ends with $**$, add ",expanded" as there doesn't seem to be |
| 772 | # a dagger character generally available. |
| 773 | |
| 774 | $type =~ s/^"([^"]+)"/$1/; |
| 775 | $type =~ s/\$\*\*\$/, expanded/; |
| 776 | |
| 777 | # Default may be quoted, and it may also have quotes that are required, |
| 778 | # if it is a string. |
| 779 | |
| 780 | $default =~ s/^"(.*)"$/$1/; |
| 781 | $default =~ s/""/"/g; |
| 782 | $default = &handle_text($default, 0); |
| 783 | |
| 784 | print OUT "<hr>"; |
| 785 | &setpar(0); |
| 786 | &handle_index($option, 0); |
| 787 | print OUT "<h3>$option</h3>\n" . |
| 788 | "<i>Use:</i> $confuse<br>" . |
| 789 | "<i>Type:</i> $type<br><i>Default:</i> $default<br>\n"; |
| 790 | } |
| 791 | |
| 792 | elsif (/^\.endconf\b/) |
| 793 | { |
| 794 | print OUT "<hr><br>\n"; |
| 795 | } |
| 796 | |
| 797 | |
| 798 | # Handle "items" - used for expansion items and the like. We force the |
| 799 | # item text into bold, and put a rule between items. |
| 800 | |
| 801 | elsif (/^\.startitems\b/) {} |
| 802 | |
| 803 | elsif (/^\.item\s+(.*)/) |
| 804 | { |
| 805 | my($arg) = $1; |
| 806 | chomp($arg); |
| 807 | $arg =~ s/^"(.*)"$/$1/; |
| 808 | $arg = &handle_text($arg, 0); |
| 809 | |
| 810 | # If there are two .items in a row, we don't want to put in the |
| 811 | # separator line or start a new paragraph. |
| 812 | |
| 813 | if ($lastwasitem) |
| 814 | { |
| 815 | print OUT "<br>"; |
| 816 | } |
| 817 | else |
| 818 | { |
| 819 | print OUT "<hr>"; |
| 820 | &setpar(1); |
| 821 | } |
| 822 | print OUT "<b>$arg</b>\n"; |
| 823 | $new_lastwasitem = 1; |
| 824 | } |
| 825 | |
| 826 | elsif (/^\.enditems\b/) |
| 827 | { |
| 828 | print OUT "<hr><br>\n"; |
| 829 | } |
| 830 | |
| 831 | |
| 832 | # Handle command line option items |
| 833 | |
| 834 | elsif (/^\.startoptions\b/) {} |
| 835 | |
| 836 | elsif (/^\.option\s+(.*)/) |
| 837 | { |
| 838 | my($arg) = $1; |
| 839 | $arg =~ s/"([^"]*)"/$1/g; |
| 840 | |
| 841 | print OUT "<hr>"; |
| 842 | &setpar(0); |
| 843 | |
| 844 | # For indexing, we want to take up to the first # or < in the line, |
| 845 | # before processing. |
| 846 | |
| 847 | my($name) = $arg =~ /^([^#<]+)/; |
| 848 | $name = &handle_text($name, 0); |
| 849 | &handle_index("-$name", 0); |
| 850 | |
| 851 | # Output as heading, after the index |
| 852 | |
| 853 | $arg = &handle_text($arg, 0); |
| 854 | print OUT "<h3>-$arg</h3>\n"; |
| 855 | } |
| 856 | |
| 857 | elsif (/^\.endoptions\b/) |
| 858 | { |
| 859 | print OUT "<hr><br>\n"; |
| 860 | } |
| 861 | |
| 862 | # Found an SGCAL directive that isn't dealt with. Oh dear. |
| 863 | |
| 864 | else |
| 865 | { |
| 866 | print "*** Unexpected SGCAL directive: line $. ignored:\n"; |
| 867 | print "$_\n"; |
| 868 | } |
| 869 | |
| 870 | # Remember if last was a .item, and read the next line |
| 871 | |
| 872 | $lastwasitem = $new_lastwasitem; |
| 873 | $_ = <IN>; |
| 874 | } |
| 875 | |
| 876 | |
| 877 | |
| 878 | ################################################## |
| 879 | # First Pass - collect references # |
| 880 | ################################################## |
| 881 | |
| 882 | sub pass_one{ |
| 883 | $thischapter = 0; |
| 884 | |
| 885 | open (IN, $source_file) || die "Can't open $source_file (first pass)\n"; |
| 886 | $_ = <IN>; |
| 887 | |
| 888 | # At the start of the specification text, there are some textual replacement |
| 889 | # definitions. They set values, but not cross-references. They may be preceded |
| 890 | # by comments. |
| 891 | |
| 892 | $_ = <IN> while (/^\.(\s|$)/); |
| 893 | |
| 894 | while (/^\.r?set\s+(\S+)\s+"?([^"]+)\"?\s*$/) |
| 895 | { |
| 896 | $var_value{$1} = $2; |
| 897 | $_ = <IN>; |
| 898 | } |
| 899 | |
| 900 | # Now skip on till we hit the start of the first chapter. It will be numbered |
| 901 | # 0 if we hit ".set chapter -1". There is only ever one unnumbered chapter. |
| 902 | |
| 903 | while (!/^\.chapter/) |
| 904 | { |
| 905 | $thischapter = -1 if /^\.set\s+chapter\s+-1/; |
| 906 | $_ = <IN>; |
| 907 | } |
| 908 | |
| 909 | # Loop for handling chapters |
| 910 | |
| 911 | while ($_) |
| 912 | { |
| 913 | $thischapter++; |
| 914 | $thissection = 0; |
| 915 | |
| 916 | # Scan through chapter, setting up cross-references to the chapter |
| 917 | # and to the sections within it. |
| 918 | |
| 919 | while (<IN>) |
| 920 | { |
| 921 | last if /^\.chapter/; |
| 922 | chomp; |
| 923 | |
| 924 | if (/^\.section/) |
| 925 | { |
| 926 | $thissection++; |
| 927 | next; |
| 928 | } |
| 929 | |
| 930 | # Handle .(r)set directives. |
| 931 | |
| 932 | if (/^\.r?set\s+(\S+)\s+"?([^"]+)\"?\s*$/ && $1 ne "runningfoot") |
| 933 | { |
| 934 | my($key,$value) = ($1,$2); |
| 935 | $value =~ s/~~chapter/$thischapter/e; |
| 936 | $value =~ s/~~section/$thissection/e; |
| 937 | |
| 938 | # Only one of $chapsplit or $sectsplit can be set. |
| 939 | |
| 940 | if ($key =~ /^CHAP/) |
| 941 | { |
| 942 | $value = $chapsplit? |
| 943 | "<a href=\"${file_base}_$thischapter.html\">$value</a>" |
| 944 | : |
| 945 | "<a href=\"#CHAP$thischapter\">$value</a>"; |
| 946 | } |
| 947 | |
| 948 | elsif ($key =~ /^SECT/) |
| 949 | { |
| 950 | $value = $chapsplit? |
| 951 | "<a href=\"${file_base}_$thischapter.html" . |
| 952 | "#SECT$thischapter.$thissection\">$value</a>" |
| 953 | : |
| 954 | $sectsplit? "<a href=\"${file_base}_$thissection.html\">$value</a>" |
| 955 | : |
| 956 | "<a href=\"#SECT$thischapter.$thissection\">$value</a>"; |
| 957 | } |
| 958 | |
| 959 | $var_value{$key} = $value; |
| 960 | } |
| 961 | } |
| 962 | } |
| 963 | |
| 964 | close(IN); |
| 965 | } |
| 966 | |
| 967 | |
| 968 | |
| 969 | |
| 970 | |
| 971 | ################################################## |
| 972 | # Second Pass - generate HTML # |
| 973 | ################################################## |
| 974 | |
| 975 | sub pass_two{ |
| 976 | my($tocn) = 0; |
| 977 | my($inmacro) = 0; |
| 978 | my($insection) = 0; |
| 979 | |
| 980 | $inem = 0; |
| 981 | $thischapter = 0; |
| 982 | $thissection = 0; |
| 983 | |
| 984 | # Open the source file and get the first line |
| 985 | |
| 986 | open (IN, $source_file) || die "Can't open $source_file (2nd pass)\n"; |
| 987 | $_ = <IN>; |
| 988 | |
| 989 | # Skip on till we hit the start of the first chapter, but note if we |
| 990 | # pass ".set chapter -1", which is used to indicate no chapter numbering for |
| 991 | # the first chapter (we number is 0). Keep track of whether we are in macro |
| 992 | # definitions or not, and when not, notice occurrences of .index, because this |
| 993 | # are the "x see y" type entries. |
| 994 | |
| 995 | while (!/^\.chapter/) |
| 996 | { |
| 997 | $thischapter = -1 if /^\.set\s+chapter\s+-1/; |
| 998 | $inmacro = 1 if /^\.macro/; |
| 999 | $inmacro = 0 if /^\.endm/; |
| 1000 | if (!$inmacro && /^\.index\s+(.*)/) |
| 1001 | { |
| 1002 | my($key); |
| 1003 | my($s) = $1; |
| 1004 | $s = &handle_text($s, 0); |
| 1005 | $s =~ s/ / /g; # All spaces unsplittable |
| 1006 | $key = "\L$s"; |
| 1007 | $key =~ s/<[^>]+>//g; |
| 1008 | $key =~ s/&#(\d+);/chr($1)/eg; |
| 1009 | $cindex{$key} = $s; |
| 1010 | } |
| 1011 | $_ = <IN>; |
| 1012 | } |
| 1013 | |
| 1014 | # Open the TOC file |
| 1015 | |
| 1016 | open (TOC, ">$html/${file_base}_toc.html") || |
| 1017 | die "Can't open $html/${file_base}_toc.html\n"; |
| 1018 | |
| 1019 | print TOC "<!DOCTYPE HTML PUBLIC \"-//IETF//DTD HTML//EN\">\n"; |
| 1020 | print TOC "<html>\n<head>\n<title>$doctitle Contents</title>\n</head>\n" . |
| 1021 | "<body bgcolor=\"#F8F8F8\" text=\"#00005A\" " . |
| 1022 | "link=\"#FF6600\" alink=\"#FF9933\" vlink=\"#990000\">\n"; |
| 1023 | print TOC "<h1>$doctitle</h1><hr>\n<ul>\n"; |
| 1024 | |
| 1025 | # Open the data file if we are not splitting at chapters |
| 1026 | |
| 1027 | &openout("$html/${file_base}.html") if !$chapsplit; |
| 1028 | |
| 1029 | # Loop for handling chapters. At the start of this loop, $_ is either EOF, |
| 1030 | # or contains a .chapter line. |
| 1031 | |
| 1032 | $firstchapter = $thischapter + 1; |
| 1033 | |
| 1034 | while ($_) |
| 1035 | { |
| 1036 | print TOC "</ul>\n" if $insection; |
| 1037 | $insection = 0; |
| 1038 | |
| 1039 | $thischapter++; |
| 1040 | $thissection = 0; |
| 1041 | $lastwasrule = 0; |
| 1042 | |
| 1043 | # Start a new file if required |
| 1044 | |
| 1045 | if ($chapsplit) |
| 1046 | { |
| 1047 | &closeout("CHAP") if $thischapter != $firstchapter; |
| 1048 | &openout("$html/${file_base}_$thischapter.html"); |
| 1049 | } |
| 1050 | |
| 1051 | # Set up the chapter title. Save it for the TOC. Set up the anchor and |
| 1052 | # link back to the TOC and show the title. |
| 1053 | |
| 1054 | $_ =~ /^\.chapter\s+(.*)/; |
| 1055 | |
| 1056 | my($title) = (($thischapter > 0)? "$thischapter. " : "") . &handle_text($1, 0); |
| 1057 | |
| 1058 | $tocn++; |
| 1059 | print TOC "<li><a " . |
| 1060 | "name=\"TOC$tocn\" " . |
| 1061 | "href=\"$current_file#CHAP$thischapter\">$title</a></li>\n"; |
| 1062 | |
| 1063 | print OUT "<h1>\n"; |
| 1064 | print OUT "<a name=\"CHAP$thischapter\" href=\"${file_base}_toc.html#TOC$tocn\">\n"; |
| 1065 | print OUT "$title\n</a></h1>\n"; |
| 1066 | |
| 1067 | # Scan the contents of the chapter |
| 1068 | |
| 1069 | $_ = <IN>; |
| 1070 | while ($_) |
| 1071 | { |
| 1072 | last if /^\.chapter/; |
| 1073 | |
| 1074 | # Handle the start of a new section, starting a new file if required |
| 1075 | |
| 1076 | if (/^\.section\s+(.*)/) |
| 1077 | { |
| 1078 | $thissection++; |
| 1079 | |
| 1080 | print TOC "<ul>\n" if !$insection; |
| 1081 | $insection = 1; |
| 1082 | |
| 1083 | my($title) = (($thischapter > 0)? "$thischapter.$thissection " : |
| 1084 | "$thissection. ") . &handle_text($1, 0); |
| 1085 | |
| 1086 | if ($sectsplit) |
| 1087 | { |
| 1088 | &closeout("SECT"); |
| 1089 | &openout("$html/${file_base}_$thissection.html"); |
| 1090 | } |
| 1091 | |
| 1092 | $tocn++; |
| 1093 | printf TOC ("<li><a " . |
| 1094 | "name=\"TOC$tocn\" " . |
| 1095 | "href=\"$current_file#SECT%s$thissection\">%s</a></li>\n", |
| 1096 | ($thischapter > 0)? "$thischapter." : "", $title); |
| 1097 | |
| 1098 | &setpar(0); |
| 1099 | print OUT "<h2>\n"; |
| 1100 | printf OUT ("<a name=\"SECT%s$thissection\" ", |
| 1101 | ($thischapter > 0)? "$thischapter." : ""); |
| 1102 | print OUT "href=\"${file_base}_toc.html#TOC$tocn\">\n"; |
| 1103 | print OUT "$title\n</a></h2>\n"; |
| 1104 | $_ = <IN>; |
| 1105 | $lastwasrule = 0; |
| 1106 | } |
| 1107 | |
| 1108 | # Blank lines at this level are ignored |
| 1109 | |
| 1110 | elsif (/^\s*$/) |
| 1111 | { |
| 1112 | $_ = <IN>; |
| 1113 | } |
| 1114 | |
| 1115 | # Directive and non-directive lines are handled independently, though |
| 1116 | # in each case further lines may be read. Afterwards, the next line is |
| 1117 | # in $_. If .em is at the start of a paragraph, treat it with the |
| 1118 | # paragraph, because the matching .nem will be too. Messy! |
| 1119 | |
| 1120 | elsif (/^\./) |
| 1121 | { |
| 1122 | if (/^\.em\b/) |
| 1123 | { |
| 1124 | $_=<IN>; |
| 1125 | if (/^\./) |
| 1126 | { |
| 1127 | print OUT "<font color=green>" if ! $inem; |
| 1128 | $inem = 1; |
| 1129 | # Used to handle it here - but that fails if it is .section. |
| 1130 | # Just let the next iteration of the loop handle it. |
| 1131 | # &handle_directive(); |
| 1132 | } |
| 1133 | |
| 1134 | else |
| 1135 | { |
| 1136 | $_ = ".em\n" . $_; |
| 1137 | &handle_paragraph(); |
| 1138 | $lastwasrule = 0; |
| 1139 | $lastwasitem = 0; |
| 1140 | } |
| 1141 | } |
| 1142 | |
| 1143 | # Not .em |
| 1144 | |
| 1145 | else |
| 1146 | { |
| 1147 | &handle_directive(); |
| 1148 | } |
| 1149 | } |
| 1150 | |
| 1151 | # Not a directive |
| 1152 | |
| 1153 | else |
| 1154 | { |
| 1155 | &handle_paragraph(); |
| 1156 | $lastwasrule = 0; |
| 1157 | $lastwasitem = 0; |
| 1158 | } |
| 1159 | |
| 1160 | } # Loop for each line in a chapter |
| 1161 | } # Loop for each chapter |
| 1162 | |
| 1163 | # Close the last file, end off the TOC, and we are done. |
| 1164 | |
| 1165 | &closeout(""); |
| 1166 | |
| 1167 | print TOC "</ul>\n" if $insection; |
| 1168 | |
| 1169 | if (defined %cindex) |
| 1170 | { |
| 1171 | $cindex_tocn = ++$tocn; |
| 1172 | print TOC "<li><a name=\"TOC$tocn\" ". |
| 1173 | "href=\"${file_base}_cindex.html\">Concept Index</a></li>\n"; |
| 1174 | } |
| 1175 | |
| 1176 | if (defined %oindex) |
| 1177 | { |
| 1178 | $oindex_tocn = ++$tocn; |
| 1179 | print TOC "<li><a name=\"TOC$tocn\" ". |
| 1180 | "href=\"${file_base}_oindex.html\">Option Index</a></li>\n"; |
| 1181 | } |
| 1182 | |
| 1183 | print TOC "</ul>\n</body>\n</html>\n"; |
| 1184 | close(TOC); |
| 1185 | close(IN); |
| 1186 | } |
| 1187 | |
| 1188 | |
| 1189 | |
| 1190 | |
| 1191 | ################################################## |
| 1192 | # Adjust index points # |
| 1193 | ################################################## |
| 1194 | |
| 1195 | # Because of the way the source is written, there are often index entries |
| 1196 | # that immediately follow the start of chapters and sections and the definition |
| 1197 | # of "items" like "helo = verify". This gets the correct page numbers for the |
| 1198 | # PostScript and PDF formats. However, for HTML we want the index anchor to be |
| 1199 | # before the section heading, because browsers tend to put the index point at |
| 1200 | # the top of the screen. So we re-read all the files we've just created, and |
| 1201 | # move some of the index points about. This is necessary only if indexes exist. |
| 1202 | # The files are small enough to be handled entirely in memory. |
| 1203 | |
| 1204 | sub adjust_index_points { |
| 1205 | print "Adjusting index points to precede headings\n"; |
| 1206 | |
| 1207 | $" = ""; |
| 1208 | |
| 1209 | opendir(DIR, "$html") || die "Failed to opendir $html\n"; |
| 1210 | while ($file = readdir(DIR)) |
| 1211 | { |
| 1212 | my($i); |
| 1213 | next unless $file =~ /^${file_base}_\d+\.html$/; |
| 1214 | |
| 1215 | open(IN, "<$html/$file") || |
| 1216 | die "Failed to open $html/$file (read)\n"; |
| 1217 | my(@lines) = <IN>; |
| 1218 | close(IN); |
| 1219 | |
| 1220 | for ($i = 0; $i < @lines; $i++) |
| 1221 | { |
| 1222 | if ($lines[$i] =~ /^<a name="IX\d+"><\/a>$/) |
| 1223 | { |
| 1224 | # Handle an index line that follows a heading definition. Move it back |
| 1225 | # to just before the <h1> or whatever. This preserves the order of |
| 1226 | # multiple index lines, not that that matters. |
| 1227 | |
| 1228 | if ($lines[$i-1] =~ /^<\/a><\/h(\d)>/) |
| 1229 | { |
| 1230 | my($j); |
| 1231 | my($found) = 0; |
| 1232 | for ($j = $i-2; $j > 0 && $j > $i - 10; $j--) |
| 1233 | { |
| 1234 | if ($lines[$j] =~ /<h$1>/) |
| 1235 | { |
| 1236 | $found = 1; |
| 1237 | last; |
| 1238 | } |
| 1239 | } |
| 1240 | if ($found) |
| 1241 | { |
| 1242 | splice(@lines, $j, 0, splice(@lines, $i, 1)); |
| 1243 | } |
| 1244 | } |
| 1245 | |
| 1246 | # Handle an index line that follows an "item". Move it back one line. |
| 1247 | |
| 1248 | elsif ($lines[$i-1] =~ /^<b>.*<\/b>\s*$/) |
| 1249 | { |
| 1250 | splice(@lines, $i-1, 0, splice(@lines, $i, 1)); |
| 1251 | } |
| 1252 | |
| 1253 | # Handle an index line that follows a "conf" definition |
| 1254 | |
| 1255 | elsif ($lines[$i-1] =~ /^<i>Type:<\/i>/ && $lines[$i-2] =~ /^<h3>/) |
| 1256 | { |
| 1257 | splice(@lines, $i-2, 0, splice(@lines, $i, 1)); |
| 1258 | } |
| 1259 | |
| 1260 | # Handle an index line that follows an "option" definition |
| 1261 | |
| 1262 | elsif ($lines[$i-1] =~ /^<h3>/) |
| 1263 | { |
| 1264 | splice(@lines, $i-1, 0, splice(@lines, $i, 1)); |
| 1265 | } |
| 1266 | } |
| 1267 | } |
| 1268 | |
| 1269 | open(OUT, ">$html/$file") || |
| 1270 | die "Failed to open $html/$file (write)\n"; |
| 1271 | |
| 1272 | print OUT "@lines"; |
| 1273 | close OUT; |
| 1274 | undef @lines; |
| 1275 | } |
| 1276 | } |
| 1277 | |
| 1278 | |
| 1279 | |
| 1280 | |
| 1281 | ################################################## |
| 1282 | # Create Index # |
| 1283 | ################################################## |
| 1284 | |
| 1285 | sub create_index{ |
| 1286 | my($hash) = $_[0]; |
| 1287 | my($ifname) = $_[1]; |
| 1288 | my($ititle) = $_[2]; |
| 1289 | my(%indexindex); |
| 1290 | |
| 1291 | open(INDEX, ">$html/${file_base}_$_[1].html") || |
| 1292 | die "Failed to open $html/${file_base}_$ifname\n"; |
| 1293 | |
| 1294 | print INDEX "<!DOCTYPE HTML PUBLIC \"-//IETF//DTD HTML//EN\">\n"; |
| 1295 | print INDEX "<html>\n<head>\n<title>$doctitle $ititle</title>\n"; |
| 1296 | print INDEX "<base target=\"body\">\n</head>\n"; |
| 1297 | |
| 1298 | print INDEX "<body bgcolor=\"#FFFFDF\" text=\"#00005A\" " . |
| 1299 | "link=\"#FF6600\" alink=\"#FF9933\" vlink=\"#990000\">\n"; |
| 1300 | |
| 1301 | print INDEX "<h3>$ititle</h3>\n"; |
| 1302 | |
| 1303 | # We have to scan the keys in the hash twice; first to build the list |
| 1304 | # of initial letters, and then to do the business. The first time we |
| 1305 | # do not need to sort them. |
| 1306 | |
| 1307 | foreach $key (keys %$hash) |
| 1308 | { |
| 1309 | my($initial) = substr($key,0,1); |
| 1310 | $initial = "\U$initial"; |
| 1311 | $indexindex{$initial} = 1 if $initial ge "A" && $initial le "Z"; |
| 1312 | } |
| 1313 | |
| 1314 | print INDEX "<p>\n"; |
| 1315 | foreach $key (sort keys %indexindex) |
| 1316 | { |
| 1317 | print INDEX " <a href=\"#$key\" target=\"index\">$key</a>\n"; |
| 1318 | } |
| 1319 | print INDEX "<hr></p>\n"; |
| 1320 | |
| 1321 | my($letter) = ""; |
| 1322 | print INDEX "<p>\n"; |
| 1323 | |
| 1324 | foreach $key (sort |
| 1325 | { |
| 1326 | my($aa) = $a; |
| 1327 | my($bb) = $b; |
| 1328 | |
| 1329 | $aa =~ s/^\x93//; # Seems like the actual char values are |
| 1330 | $bb =~ s/^\x93//; # set by this time, not "“" |
| 1331 | |
| 1332 | return ("\L$aa" eq "\L$bb")? ("$aa" cmp "$bb") : ("\L$aa" cmp "\L$bb"); |
| 1333 | } |
| 1334 | keys %$hash) |
| 1335 | { |
| 1336 | my($initial) = substr($key,0,1); |
| 1337 | $initial = "\U$initial"; |
| 1338 | if ($initial ne $letter && $initial ge "A" && $initial le "Z") |
| 1339 | { |
| 1340 | print INDEX "<br>\n"; |
| 1341 | print INDEX "<a name=\"$initial\"></a>\n"; |
| 1342 | print INDEX "<font size=\"+1\">\U$initial\E</font><br>\n"; |
| 1343 | $letter = $initial; |
| 1344 | } |
| 1345 | print INDEX "$$hash{$key}<br>\n"; |
| 1346 | } |
| 1347 | |
| 1348 | print INDEX "</p>\n"; |
| 1349 | |
| 1350 | print INDEX "</body>\n</html>\n"; |
| 1351 | close(INDEX); |
| 1352 | } |
| 1353 | |
| 1354 | |
| 1355 | |
| 1356 | |
| 1357 | ################################################## |
| 1358 | # Show usage and die # |
| 1359 | ################################################## |
| 1360 | |
| 1361 | sub usage { |
| 1362 | die "Usage: g2h [-split no|section|chapter] <source> <title>\n"; |
| 1363 | } |
| 1364 | |
| 1365 | |
| 1366 | |
| 1367 | ################################################## |
| 1368 | # Entry point and main program # |
| 1369 | ################################################## |
| 1370 | |
| 1371 | |
| 1372 | # Directory in which to put the new HTML files |
| 1373 | |
| 1374 | $html = "html"; |
| 1375 | |
| 1376 | # Global variables. |
| 1377 | |
| 1378 | %cindex = (); |
| 1379 | %oindex = (); |
| 1380 | |
| 1381 | $chapsplit = 0; |
| 1382 | $cindex_tocn = 0; |
| 1383 | $confuse = ""; |
| 1384 | $file_base = ""; |
| 1385 | $index_count = 0; |
| 1386 | $inem = 0; |
| 1387 | $inpar = 0; |
| 1388 | $lastwasitem = 0; |
| 1389 | $lastwasrule = 0; |
| 1390 | $oindex_tocn = 0; |
| 1391 | $sectsplit = 0; |
| 1392 | $source_file = ""; |
| 1393 | $thischapter = 0; |
| 1394 | $thissection = 0; |
| 1395 | |
| 1396 | |
| 1397 | # Handle options |
| 1398 | |
| 1399 | my($splitset) = 0; |
| 1400 | |
| 1401 | while (scalar @ARGV > 0 && $ARGV[0] =~ /^-/) |
| 1402 | { |
| 1403 | if ($ARGV[0] eq "-split" && !$splitset) |
| 1404 | { |
| 1405 | $splitset = 1; |
| 1406 | shift @ARGV; |
| 1407 | my($type) = shift @ARGV; |
| 1408 | if ($type eq "section") { $sectsplit = 1; } |
| 1409 | elsif ($type eq "chapter") { $chapsplit = 1; } |
| 1410 | elsif ($type eq "no" ) { $sectsplit = $chapsplit = 0; } |
| 1411 | else { &usage(); } |
| 1412 | } |
| 1413 | else { &usage(); } |
| 1414 | } |
| 1415 | |
| 1416 | # Get the source file and its base |
| 1417 | |
| 1418 | &usage() if scalar @ARGV <= 0; |
| 1419 | $source_file = shift @ARGV; |
| 1420 | ($file_base) = $source_file =~ /^(.*)\.src$/; |
| 1421 | |
| 1422 | &usage() if scalar @ARGV <= 0; |
| 1423 | $doctitle = shift @ARGV; |
| 1424 | |
| 1425 | print "\nCreate HTML for $doctitle from $source_file\n"; |
| 1426 | |
| 1427 | # Remove the old HTML files |
| 1428 | |
| 1429 | print "Removing old HTML files\n"; |
| 1430 | system("/bin/rm -rf $html/${file_base}_*.html"); |
| 1431 | |
| 1432 | # First pass identifies all the chapters and sections, and collects the |
| 1433 | # values of the cross-referencing variables. |
| 1434 | |
| 1435 | print "Scanning for cross-references\n"; |
| 1436 | &pass_one(); |
| 1437 | |
| 1438 | $maxchapter = $thischapter; # Used if chapter splitting |
| 1439 | $maxsection = $thissection; # Used if section splitting |
| 1440 | |
| 1441 | # Second pass actually creates the HTML files. |
| 1442 | |
| 1443 | print "Creating the HTML files\n"; |
| 1444 | &pass_two(); |
| 1445 | |
| 1446 | # Reprocess for moving some of the index points, if indexes were created |
| 1447 | |
| 1448 | &adjust_index_points() if scalar(keys %cindex) > 0 || scalar(keys %oindex) > 0; |
| 1449 | |
| 1450 | # Finally, we must create the option and concept indexes if any data |
| 1451 | # has been collected for them. |
| 1452 | |
| 1453 | if (scalar(keys %cindex) > 0) |
| 1454 | { |
| 1455 | print "Creating concept index\n"; |
| 1456 | &create_index(\%cindex, "cindex", "Concepts"); |
| 1457 | } |
| 1458 | |
| 1459 | if (scalar(keys %oindex) > 0) |
| 1460 | { |
| 1461 | print "Creating option index\n"; |
| 1462 | &create_index(\%oindex, "oindex", "Options"); |
| 1463 | } |
| 1464 | |
| 1465 | # End of g2h |