#! /usr/bin/perl # $Cambridge: exim/doc/doc-docbook/Pre-xml,v 1.1 2005/06/16 10:32:31 ph10 Exp $ # Script to pre-process XML input before processing it for various purposes. # Options specify which transformations are to be done. Monospaced literal # layout blocks are never touched. # Changes: # -abstract: Remove the element # -ascii: Replace &8230; (sic, no x) with ... # Replace ’ by ' # Replace “ by " # Replace ” by " # Replace – by - # Replace † by * # Replace ‡ by ** # Replace   by a space # Replace © by (c) # Put quotes round text # Put quotes round text # -bookinfo: Remove the element from the file # -fi: Replace "fi" by fi except when it is in an XML element, or # inside a . # -noindex Remove the XML to generate a Concept and an Options index. # -oneindex Ditto, but add XML to generate a single index. # The function that processes non-literal monospaced text sub process() { my($s) = $_[0]; $s =~ s/fi(?![^<>]*>)/fi/g if $ligatures; if ($ascii) { $s =~ s/…/.../g; $s =~ s/’/'/g; $s =~ s/“/"/g; $s =~ s/”/"/g; $s =~ s/–/-/g; $s =~ s/†/*/g; $s =~ s/‡/**/g; $s =~ s/ / /g; $s =~ s/©/(c)/g; $s =~ s//"/g; $s =~ s/<\/quote>/"/g; } $s; } # The main program $abstract = 0; $ascii = 0; $bookinfo = 0; $inliteral = 0; $ligatures = 0; $madeindex = 0; $noindex = 0; $oneindex = 0; foreach $arg (@ARGV) { if ($arg eq "-fi") { $ligatures = 1; } elsif ($arg eq "-abstract") { $abstract = 1; } elsif ($arg eq "-ascii") { $ascii = 1; } elsif ($arg eq "-bookinfo") { $bookinfo = 1; } elsif ($arg eq "-noindex") { $noindex = 1; } elsif ($arg eq "-oneindex") { $oneindex = 1; } else { die "** Pre-xml: Unknown option \"$arg\"\n"; } } while () { # Remove if required next if ($abstract && /^\s*/); # Remove if required if ($bookinfo && /^) { last if /^<\/bookinfo/; } next; } # Copy monospaced literallayout blocks if (/^/) { print; while () { print; last if /^<\/literallayout>/; } next; } # Adjust index-generation code if required if (($noindex || $oneindex) && /^]/) { while () { last if /^<\/index>/; } if ($oneindex && !$madeindex) { $madeindex = 1; print "Index\n"; } next; } # A line that is not in a monospaced literal block; keep track of which # parts are in and which not. The latter get processed by the # function above. for (;;) { if ($inliteral) { if (/^(.*?)<\/literal>(.*)$/) { print $1; print "\"" if $ascii; print ""; $inliteral = 0; $_ = "$2\n"; } else { print; last; } } # Not in literal state else { if (/^(.*?)(.*)$/) { print &process($1); print ""; print "\"" if $ascii; $inliteral = 1; $_ = "$2\n"; } else { print &process($_); last; } } } # Loop for different parts of one line } # Loop for multiple lines # End