The Exim FAQ

#!/usr/bin/perl # Script to turn the Exim FAQ into HTML. use integer; # Function to do text conversions that apply to both displays and non displays sub process_both { my($s) = $_[0]; $s =~ s/ $s =~ s/>/>/g; return $s; } # Function to do text conversions to display paragraphs sub process_display { my($s) = $_[0]; $s =~ s/^==>/ /; my($indent) = $s =~ /^(\s+)/; my($remove) = " " x (length($indent) - 3); $s =~ s/^$remove//mg; $s = &process_both($s); return $s; } # Function to do text conversions to paragraphs not in displays. sub process_non_display { my($s) = &process_both($_[0]); $s =~ s/@\\/@@backslash@@/g; # @\ temporarily hidden $s =~ s/\\#/ /g; # \# is a hard space $s =~ s/\\\*\*([^*]*)\*\*\\/$1<\/b>/g; # \**...**\ => bold $s =~ s/\\\*([^*]*)\*\\/$1<\/i>/g; # \*.....*\ => italic $s =~ s/\\"([^"]*)"\\/$1<\/tt>/g; # \"....."\ => fixed pitch $s =~ s/\\\$([^\$]*)\$\\/\$$1<\/i>/g; # \$.....$\ => $italic $s =~ s/\\\$[^\\]*)\\\\/$1<\/small>/g; # \\.....\\ => small $s =~ s/\\\(([^)]*)$\\/$1<\/i>/g; # $.....)\ => italic $s =~ s/\\-([^\\]*)-\\/-$1<\/b>/g; # \-.....-\ => -bold $s =~ s/\\\[([^]]*)\]\\/&\#60;$1<\/i>&\#62;/gx; # \[.....]\ => $s =~ s/\\\?(.*?)\?\\/$1<\/a>/g; # \?.....?\ => URL $s =~ s/\\\^\^([^^]*)\^\^\\/$1<\/i>/g; # \^^...^^\ => italic $s =~ s/\\\^([^^]*)\^\\/$1<\/i>/g; # \^.....^\ => italic $s =~ s/\\%([^%]*)%\\/$1<\/b>/g; # \%.....%\ => bold $s =~ s/\\\/([^\/]*)\/\\/$1<\/i>/g; # \/...../\ => italic $s =~ s/\\([^\\]+)\\/$1<\/tt>/g; # \.......\ => fixed pitch $s =~ s"//([^/\"]*)//"$1"g; # //.....// => italic $s =~ s/::([^:]*)::/$1:<\/i>/g; # ::.....:: => italic: $s =~ s/``(.*?)''/“$1”/g; # ``.....'' => quoted text $s =~ s/\s*\[\[br\]\]\s*/ /g; # [[br]] => $s =~ s/@@backslash@@/\\/g; # Put back single backslash $s =~ s/^(\s*\(\d$\s)/$1 /; # Extra space after (1), etc. # Cross references within paragraphs $s =~ s/Q(\d{4})(?!:)/$&<\/a>/xg; # References to configuration samples $s =~ s/\b([CFLS]\d\d\d)\b/$1<\/a>/g; # Remove white space preceding a newline in the middle of paragraphs, # to keep the file smaller (and for human reading when debugging). $s =~ s/^\s+//mg; return $s; } # Main program # We want to read the file paragraph by paragraph; Perl only does this if the # separating lines are truly blank. Having been caught by lines containing # whitespace before, do a detrailing pass first. open(IN, "$ARGV[0]") || die "can't open $ARGV[0] (preliminary)\n"; open(OUT, ">$ARGV[0]-$$") || die "can't open $ARGV[0]-$$\n"; while () { s/[ \t]+$//; print OUT; } close(IN); close(OUT); rename("$ARGV[0]-$$", "$ARGV[0]") || die "can't rename $ARGV[0]-$$ as $ARGV[0]\n"; # The second argument is the name of a directory into which to put multiple # HTML files. We start off with FAQ.html. $hdir = $ARGV[1]; open(OUT, ">$hdir/FAQ.html") || die "can't open $hdir/FAQ.html\n"; # Initial output print OUT < The Exim FAQ The Exim FAQ End $/ = ""; # First pass to read the titles and questions and create the table of # contents. We save it up in a vector so that it can be written after the # introductory paragraphs. open(IN, "$ARGV[0]") || die "can't open $ARGV[0] (first time)\n"; $toc = 0; $sec = -1; $inul = 0; while ($_ = ) { $count = s/\n/\n/g - 1; # Number of lines in paragraph if ($count == 1 && /^\d+\./) # Look for headings { chomp; push @toc, "" if $inul; $inul = 0; push @toc, " \n\n" if $sec++ >= 0; push @toc, "$_\n"; $toc++; ($number,$title) = /^(\d+)\.\s+(.*)$/; if ($title ne "UUCP" && $title ne "IRIX" && $title ne "BSDI" && $title ne "HP-UX") { ($initial,$rest) = $title =~ /^(.)(.*)$/; $title = "$initial\L$rest"; $title =~ s/isdn/ISDN/; $title =~ s/\btls\b/TLS/; $title =~ s/\bssl\b/SSL/; $title =~ s/ os x/ OS X/; } push @seclist, "$number. $title"; next; } if (/^(Q\d{4})/) # Q initial paragraph { if (!$inul) { push @toc, "\n"; $inul = 1; } $num = $1; $rest = $'; $xref{substr($num,1)} = "FAQ_$sec.html#TOC$toc"; $rest =~ s/^: /: /; $rest = &process_non_display($rest); push @toc, "$num$rest \n"; $toc++; next; } } push @toc, "\n" if $inul; close(IN); # This is the main processing pass. We have to detect the different kinds of # "paragraph" and do appropriate things. open(IN, "$ARGV[0]") || die "can't open $ARGV[0] (second time)\n"; # Skip the title line $_ = ; # Handle the rest of the file $toc = 0; $maxsec = $sec; $sec = -1; while ($_ = ) { $count = s/\n/\n/g - 1; # Number of lines in paragraph chomp; # Trailing newlines if (/^The FAQ is divided into/) { my($count) = scalar(@seclist); my($cols) = ($count + 1)/2; print OUT " Index\n"; print OUT "A Keyword-in-context index " . "to the questions is available. This is usually the " . "quickest way to find information in the FAQ.\n"; print OUT "Contents\n"; print OUT "The FAQ is divided into the following sections: \n"; print OUT "\n"; for ($i = 0; $i < $cols; $i++) { print OUT "\n"; print OUT " \n"; print OUT " \n"; print OUT " \n" if $cols+$i < $count; print OUT "\n"; } print OUT "", " " x 4, " $seclist[$i] ", " " x8, "$seclist[$cols+$i] \n \n"; print OUT "List of questions\n"; $_ = ; # Skip section list next; } if ($count == 1 && /^\d+\./) # Look for headings { if (@toc != 0) # TOC when hit first heading { while (@toc != 0) { print OUT shift @toc; } } # Output links at the bottom of this page print OUT " \n"; print OUT "Contents \n"; if ($sec > 0) { printf OUT ("Previous \n", $sec-1); } printf OUT ("Next\n", $sec+1); # New section goes in new file print OUT "\n\n"; close OUT; $sec++; open(OUT, ">$hdir/FAQ_$sec.html") || die "Can't open $hdir/FAQ_$sec.html\n"; print OUT "\n\n" . "The Exim FAQ Section $sec\n" . "\n" . "\n"; printf OUT "The Exim FAQ\n"; print OUT "Contents \n"; if ($sec > 0) { printf OUT ("Previous \n", $sec-1); } if ($sec < $maxsec) { printf OUT ("Next\n", $sec+1); } print OUT " \n"; print OUT "$_\n"; $toc++; next; } s/^([QA]\d{4}|[CFLS]\d{3}): /$1: /; if (/^(Q\d{4}:)/) # Q initial paragraph { print OUT "\n$1"; $_ = &process_non_display($'); print OUT "$_\n\n"; $toc++; next; } if (/^A\d{4}:/) # A initial paragraph { $_ = &process_non_display($_); s/^(A\d{4}:)/$1<\/font>/; print OUT "\n$_\n\n"; next; } # If a paragraph begins ==> it is a display which must remain verbatin # and not be reformatted. The flag gets turned into spaces. if ($_ =~ /^==>/) { $_ = &process_display($_); chomp; print OUT "\n$_\n"; } # Non-display paragraph; massage the final line & my sig. elsif (/^\*\*\* End of Exim FAQ \*\*\*/) { } else { $_ = &process_non_display($_); if (/^Philip Hazel/) { s/\n/ \n/g; s/ $/ /; } print OUT "\n$_\n\n"; } } close(IN); print OUT " \n"; print OUT "Contents \n"; printf OUT ("Previous\n", $sec-1); print OUT "\n\n"; close(OUT); End