doc/doc-scripts/f2h

   1 #!/usr/bin/perl
   2 # $Cambridge: exim/doc/doc-scripts/f2h,v 1.1 2004/10/07 15:04:35 ph10 Exp $
   3
   4 # Script to turn the Exim FAQ into HTML.
   5
   6 use integer;
   7
   8 # Function to do text conversions that apply to both displays and non displays
   9
  10 sub process_both {
  11 my($s) = $_[0];
  12 $s =~ s/</&#60;/g;                                 # Deal with < and >
  13 $s =~ s/>/&#62;/g;
  14 return $s;
  15 }
  16
  17
  18 # Function to do text conversions to display paragraphs
  19
  20 sub process_display {
  21 my($s) = $_[0];
  22 $s =~ s/^==>/   /;
  23 my($indent) = $s =~ /^(\s+)/;
  24 my($remove) = " " x (length($indent) - 3);
  25 $s =~ s/^$remove//mg;
  26 $s = &process_both($s);
  27 return $s;
  28 }
  29
  30
  31 # Function to do text conversions to paragraphs not in displays.
  32
  33 sub process_non_display {
  34 my($s) = &process_both($_[0]);
  35
  36 $s =~ s/@\\/@@backslash@@/g;                       # @\ temporarily hidden
  37
  38 $s =~ s/\\#/&nbsp;/g;                              # \# is a hard space
  39
  40 $s =~ s/\\\*\*([^*]*)\*\*\\/<b>$1<\/b>/g;          # \**...**\   => bold
  41 $s =~ s/\\\*([^*]*)\*\\/<i>$1<\/i>/g;              # \*.....*\   => italic
  42 $s =~ s/\\"([^"]*)"\\/<tt>$1<\/tt>/g;              # \"....."\   => fixed pitch
  43 $s =~ s/\\\$([^\$]*)\$\\/<i>\$$1<\/i>/g;           # \$.....$\   => $italic
  44 $s =~ s/\\\\([^\\]*)\\\\/<small>$1<\/small>/g;     # \\.....\\   => small
  45 $s =~ s/\\\(([^)]*)\)\\/<i>$1<\/i>/g;              # \(.....)\   => italic
  46 $s =~ s/\\-([^\\]*)-\\/<b>-$1<\/b>/g;              # \-.....-\   => -bold
  47 $s =~ s/\\\[([^]]*)\]\\/&\#60;<i>$1<\/i>&\#62;/gx; # \[.....]\   => <italic>
  48 $s =~ s/\\\?(.*?)\?\\/<a href="$1">$1<\/a>/g;      # \?.....?\   => URL
  49 $s =~ s/\\\^\^([^^]*)\^\^\\/<i>$1<\/i>/g;          # \^^...^^\   => italic
  50 $s =~ s/\\\^([^^]*)\^\\/<i>$1<\/i>/g;              # \^.....^\   => italic
  51 $s =~ s/\\%([^%]*)%\\/<b>$1<\/b>/g;                # \%.....%\   => bold
  52 $s =~ s/\\\/([^\/]*)\/\\/<i>$1<\/i>/g;             # \/...../\   => italic
  53 $s =~ s/\\([^\\]+)\\/<tt>$1<\/tt>/g;               # \.......\   => fixed pitch
  54
  55 $s =~ s"//([^/\"]*)//"<i>$1</i>"g;                 # //.....//   => italic
  56 $s =~ s/::([^:]*)::/<i>$1:<\/i>/g;                 # ::.....::   => italic:
  57
  58 $s =~ s/``(.*?)''/&#147;$1&#148;/g;                # ``.....''   => quoted text
  59
  60 $s =~ s/\s*\[\[br\]\]\s*/<br>/g;                   # [[br]]      => <br>
  61
  62 $s =~ s/@@backslash@@/\\/g;                        # Put back single backslash
  63
  64 $s =~ s/^(\s*\(\d\)\s)/$1&nbsp;/;                  # Extra space after (1), etc.
  65
  66 # Cross references within paragraphs
  67
  68 $s =~ s/Q(\d{4})(?!:)/<a href="$xref{$1}">$&<\/a>/xg;
  69
  70 # References to configuration samples
  71
  72 $s =~ s/\b([CFLS]\d\d\d)\b/<a href="$1.txt">$1<\/a>/g;
  73
  74 # Remove white space preceding a newline in the middle of paragraphs,
  75 # to keep the file smaller (and for human reading when debugging).
  76
  77 $s =~ s/^\s+//mg;
  78
  79 return $s;
  80 }
  81
  82
  83 # Main program
  84
  85 # We want to read the file paragraph by paragraph; Perl only does this if the
  86 # separating lines are truly blank. Having been caught by lines containing
  87 # whitespace before, do a detrailing pass first.
  88
  89 open(IN, "$ARGV[0]") || die "can't open $ARGV[0] (preliminary)\n";
  90 open(OUT, ">$ARGV[0]-$$") || die "can't open $ARGV[0]-$$\n";
  91 while (<IN>)
  92   {
  93   s/[ \t]+$//;
  94   print OUT;
  95   }
  96 close(IN);
  97 close(OUT);
  98 rename("$ARGV[0]-$$", "$ARGV[0]") ||
  99   die "can't rename $ARGV[0]-$$ as $ARGV[0]\n";
 100
 101 # The second argument is the name of a directory into which to put multiple
 102 # HTML files. We start off with FAQ.html.
 103
 104 $hdir = $ARGV[1];
 105 open(OUT, ">$hdir/FAQ.html") || die "can't open $hdir/FAQ.html\n";
 106
 107 # Initial output
 108
 109 print OUT <<End ;
 110 <html>
 111 <head>
 112 <title>The Exim FAQ</title>
 113 </head>
 114 <body bgcolor="#F8F8F8" text="#00005A" link="#0066FF" alink="#0066FF" vlink="#000099">
 115 <h1>The Exim FAQ</h1>
 116 End
 117
 118 $/ = "";
 119
 120 # First pass to read the titles and questions and create the table of
 121 # contents. We save it up in a vector so that it can be written after the
 122 # introductory paragraphs.
 123
 124 open(IN, "$ARGV[0]") || die "can't open $ARGV[0] (first time)\n";
 125
 126 $toc = 0;
 127 $sec = -1;
 128 $inul = 0;
 129
 130 while ($_ = <IN>)
 131   {
 132   $count = s/\n/\n/g - 1;          # Number of lines in paragraph
 133
 134   if ($count == 1 && /^\d+\./)     # Look for headings
 135     {
 136     chomp;
 137     push @toc, "</ul>" if $inul;
 138     $inul = 0;
 139     push @toc, "<br>\n\n" if $sec++ >= 0;
 140     push @toc, "<a name=\"TOC$toc\" href=\"FAQ_$sec.html\">$_</a>\n";
 141     $toc++;
 142
 143     ($number,$title) = /^(\d+)\.\s+(.*)$/;
 144     if ($title ne "UUCP" && $title ne "IRIX" && $title ne "BSDI" &&
 145         $title ne "HP-UX")
 146       {
 147       ($initial,$rest) = $title =~ /^(.)(.*)$/;
 148       $title = "$initial\L$rest";
 149       $title =~ s/isdn/ISDN/;
 150       $title =~ s/\btls\b/TLS/;
 151       $title =~ s/\bssl\b/SSL/;
 152       $title =~ s/ os x/ OS X/;
 153       }
 154     push @seclist, "<a href=\"FAQ_$sec.html\">$number. $title</a>";
 155
 156     next;
 157     }
 158
 159   if (/^(Q\d{4})/)                 # Q initial paragraph
 160     {
 161     if (!$inul)
 162       {
 163       push @toc, "<ul>\n";
 164       $inul = 1;
 165       }
 166     $num = $1;
 167     $rest = $';
 168     $xref{substr($num,1)} = "FAQ_$sec.html#TOC$toc";
 169     $rest =~ s/^: /:&nbsp;&nbsp;/;
 170     $rest = &process_non_display($rest);
 171     push @toc, "<li><a name=\"TOC$toc\" href=\"FAQ_$sec.html#TOC$toc\">$num</a>$rest<br><br></li>\n";
 172     $toc++;
 173     next;
 174     }
 175   }
 176
 177 push @toc, "</ul>\n" if $inul;
 178 close(IN);
 179
 180
 181 # This is the main processing pass. We have to detect the different kinds of
 182 # "paragraph" and do appropriate things.
 183
 184 open(IN, "$ARGV[0]") || die "can't open $ARGV[0] (second time)\n";
 185
 186 # Skip the title line
 187
 188 $_ = <IN>;
 189
 190 # Handle the rest of the file
 191
 192 $toc = 0;
 193 $maxsec = $sec;
 194 $sec = -1;
 195
 196 while ($_ = <IN>)
 197   {
 198   $count = s/\n/\n/g - 1;          # Number of lines in paragraph
 199   chomp;                           # Trailing newlines
 200
 201   if (/^The FAQ is divided into/)
 202     {
 203     my($count) = scalar(@seclist);
 204     my($cols) = ($count + 1)/2;
 205
 206     print OUT "<hr><a name=\"TOC\"><h1>Index</h1></a>\n";
 207     print OUT "<p>A <i>Keyword-in-context</i> <a href=\"FAQ-KWIC_A.html\">index</a> " .
 208               "to the questions is available. This is usually the " .
 209               "quickest way to find information in the FAQ.</p>\n";
 210
 211     print OUT "<h1>Contents</h1>\n";
 212     print OUT "<p>The FAQ is divided into the following sections:<br><br></p>\n";
 213
 214     print OUT "<table>\n";
 215
 216     for ($i = 0; $i < $cols; $i++)
 217       {
 218       print OUT "<tr>\n";
 219       print OUT "  <td>", "&nbsp;" x 4, "</td>\n";
 220       print OUT "  <td>&nbsp;$seclist[$i]</td>\n";
 221       print OUT "  <td>", "&nbsp;" x8, "$seclist[$cols+$i]</td>\n"
 222         if $cols+$i < $count;
 223       print OUT "</tr>\n";
 224       }
 225     print OUT "</table><br><p>\n<hr><br>\n";
 226     print OUT "<h1>List of questions</h1>\n";
 227
 228     $_ = <IN>;                     # Skip section list
 229     next;
 230     }
 231
 232   if ($count == 1 && /^\d+\./)     # Look for headings
 233     {
 234     if (@toc != 0)                 # TOC when hit first heading
 235       {
 236       while (@toc != 0) { print OUT shift @toc; }
 237       }
 238
 239     # Output links at the bottom of this page
 240
 241     print OUT "<hr><br>\n";
 242     print OUT "<a href=\"FAQ.html#TOC\">Contents</a>&nbsp;&nbsp;\n";
 243     if ($sec > 0)
 244       {
 245       printf OUT ("<a href=\"FAQ_%d.html\">Previous</a>&nbsp;&nbsp;\n", $sec-1);
 246       }
 247     printf OUT ("<a href=\"FAQ_%d.html\">Next</a>\n", $sec+1);
 248
 249     # New section goes in new file
 250
 251     print OUT "</body>\n</html>\n";
 252     close OUT;
 253
 254     $sec++;
 255     open(OUT, ">$hdir/FAQ_$sec.html") ||
 256       die "Can't open $hdir/FAQ_$sec.html\n";
 257
 258     print OUT "<html>\n<head>\n" .
 259       "<title>The Exim FAQ Section $sec</title>\n" .
 260       "</head>\n" .
 261       "<body bgcolor=\"#F8F8F8\" text=\"#00005A\" " .
 262       "link=\"#FF6600\" alink=\"#FF9933\" vlink=\"#990000\">\n";
 263
 264     printf OUT "<h1>The Exim FAQ</h1>\n";
 265
 266     print OUT "<a href=\"FAQ.html#TOC\">Contents</a>&nbsp;&nbsp;\n";
 267     if ($sec > 0)
 268       {
 269       printf OUT ("<a href=\"FAQ_%d.html\">Previous</a>&nbsp;&nbsp;\n", $sec-1);
 270       }
 271     if ($sec < $maxsec)
 272       {
 273       printf OUT ("<a href=\"FAQ_%d.html\">Next</a>\n", $sec+1);
 274       }
 275
 276     print OUT "<hr><br>\n";
 277
 278     print OUT "<h2><a href=\"FAQ.html#TOC$toc\">$_</a></h2>\n";
 279     $toc++;
 280     next;
 281     }
 282
 283   s/^([QA]\d{4}|[CFLS]\d{3}): /$1:&nbsp;&nbsp;/;
 284
 285   if (/^(Q\d{4}:)/)               # Q initial paragraph
 286     {
 287     print OUT "<p>\n<a name=\"TOC$toc\" href=\"FAQ.html#TOC$toc\">$1</a>";
 288     $_ = &process_non_display($');
 289     print OUT "$_\n</p>\n";
 290     $toc++;
 291     next;
 292     }
 293
 294   if (/^A\d{4}:/)                 # A initial paragraph
 295     {
 296     $_ = &process_non_display($_);
 297     s/^(A\d{4}:)/<font color="#00BB00">$1<\/font>/;
 298     print OUT "<p>\n$_\n</p>\n";
 299     next;
 300     }
 301
 302   # If a paragraph begins ==> it is a display which must remain verbatin
 303   # and not be reformatted. The flag gets turned into spaces.
 304
 305   if ($_ =~ /^==>/)
 306     {
 307     $_ = &process_display($_);
 308     chomp;
 309     print OUT "<pre>\n$_</pre>\n";
 310     }
 311
 312   # Non-display paragraph; massage the final line & my sig.
 313
 314   elsif (/^\*\*\* End of Exim FAQ \*\*\*/)
 315     {
 316     }
 317
 318   else
 319     {
 320     $_ = &process_non_display($_);
 321     if (/^Philip Hazel/)
 322       {
 323       s/\n/<br>\n/g;
 324       s/<br>$/<hr><br>/;
 325       }
 326     print OUT "<p>\n$_\n</p>\n";
 327     }
 328   }
 329
 330 close(IN);
 331
 332 print OUT "<hr><br>\n";
 333 print OUT "<a href=\"FAQ.html#TOC\">Contents</a>&nbsp;&nbsp;\n";
 334 printf OUT ("<a href=\"FAQ_%d.html\">Previous</a>\n", $sec-1);
 335
 336 print OUT "</body>\n</html>\n";
 337 close(OUT);
 338 End