First pass update for 4.60 documentation.
[exim.git] / doc / doc-docbook / Pre-xml
1 #! /usr/bin/perl
2
3 # $Cambridge: exim/doc/doc-docbook/Pre-xml,v 1.2 2005/11/10 12:30:13 ph10 Exp $
4
5 # Script to pre-process XML input before processing it for various purposes.
6 # Options specify which transformations are to be done. Monospaced literal
7 # layout blocks are never touched.
8
9 # Changes:
10
11 # -abstract: Remove the <abstract> element
12
13 # -ascii: Replace &8230; (sic, no x) with ...
14 # Replace &#x2019; by '
15 # Replace &#x201C; by "
16 # Replace &#x201D; by "
17 # Replace &#x2013; by -
18 # Replace &#x2020; by *
19 # Replace &#x2021; by **
20 # Replace &#x00a0; by a space
21 # Replace &#169; by (c)
22 # Put quotes round <literal> text
23 # Put quotes round <quote> text
24 #
25 # -bookinfo: Remove the <bookinfo> element from the file
26 #
27 # -fi: Replace "fi" by &#xFB01; except when it is in an XML element, or
28 # inside a <literal>.
29 #
30 # -html: Certain things are done only for HTML output:
31 #
32 # If <literallayout> is followed by optional # space and then a
33 # newline, the space and newline are removed, because otherwise you
34 # get a blank line in the HTML output.
35 #
36 # -noindex Remove the XML to generate a Concept and an Options index.
37 # -oneindex Ditto, but add XML to generate a single index.
38
39
40
41 # The function that processes non-literal monospaced text
42
43 sub process()
44 {
45 my($s) = $_[0];
46
47 $s =~ s/fi(?![^<>]*>)/&#xFB01;/g if $ligatures;
48
49 if ($ascii)
50 {
51 $s =~ s/&#8230;/.../g;
52 $s =~ s/&#x2019;/'/g;
53 $s =~ s/&#x201C;/"/g;
54 $s =~ s/&#x201D;/"/g;
55 $s =~ s/&#x2013;/-/g;
56 $s =~ s/&#x2020;/*/g;
57 $s =~ s/&#x2021;/**/g;
58 $s =~ s/&#x00a0;/ /g;
59 $s =~ s/&#169;/(c)/g;
60 $s =~ s/<quote>/"/g;
61 $s =~ s/<\/quote>/"/g;
62 }
63
64 $s;
65 }
66
67
68 # The main program
69
70 $abstract = 0;
71 $ascii = 0;
72 $bookinfo = 0;
73 $html = 0;
74 $inliteral = 0;
75 $inliterallayout = 0;
76 $ligatures = 0;
77 $madeindex = 0;
78 $noindex = 0;
79 $oneindex = 0;
80
81 foreach $arg (@ARGV)
82 {
83 if ($arg eq "-fi") { $ligatures = 1; }
84 elsif ($arg eq "-abstract") { $abstract = 1; }
85 elsif ($arg eq "-ascii") { $ascii = 1; }
86 elsif ($arg eq "-bookinfo") { $bookinfo = 1; }
87 elsif ($arg eq "-html") { $html = 1; }
88 elsif ($arg eq "-noindex") { $noindex = 1; }
89 elsif ($arg eq "-oneindex") { $oneindex = 1; }
90 else { die "** Pre-xml: Unknown option \"$arg\"\n"; }
91 }
92
93 while (<STDIN>)
94 {
95 # Remove <abstract> if required
96
97 next if ($abstract && /^\s*<abstract>/);
98
99 # Remove <bookinfo> if required
100
101 if ($bookinfo && /^<bookinfo/)
102 {
103 while (<STDIN>) { last if /^<\/bookinfo/; }
104 next;
105 }
106
107 # Copy monospaced literallayout blocks
108
109 if (/^<literallayout class="monospaced">/)
110 {
111 $_ = substr($_, 0, -1) if $html;
112 print;
113 while (<STDIN>)
114 {
115 print;
116 last if /^<\/literallayout>/;
117 }
118 next;
119 }
120
121 # Adjust index-generation code if required
122
123 if (($noindex || $oneindex) && /^<index[\s>]/)
124 {
125 while (<STDIN>)
126 {
127 last if /^<\/index>/;
128 }
129
130 if ($oneindex && !$madeindex)
131 {
132 $madeindex = 1;
133 print "<index><title>Index</title></index>\n";
134 }
135
136 next;
137 }
138
139 # A line that is not in a monospaced literal block; keep track of which
140 # parts are in <literal> and which not. The latter get processed by the
141 # function above. Items in <literal> get quoted unless they are also in
142 # a <literallayout> block, or are already being quoted.
143
144 for (;;)
145 {
146 $_ = substr($_, 0, -1) if $html && /^<literallayout[^>]*>\s*\n$/;
147 $inliterallayout = 1 if /^<literallayout/;
148 $inliterallayout = 0 if /^<\/literallayout/;
149
150 if ($inliteral)
151 {
152 if (/^(.*?)<\/literal>(?!<\/quote>)(.*)$/)
153 {
154 print $1;
155 print "\"" if $ascii && !$inliterallayout;
156 print "</literal>";
157 $inliteral = 0;
158 $_ = "$2\n";
159 }
160 else
161 {
162 print;
163 last;
164 }
165 }
166
167 # Not in literal state
168
169 else
170 {
171 if (/^(.*?)(?<!<quote>)<literal>(.*)$/)
172 {
173 print &process($1);
174 print "<literal>";
175 print "\"" if $ascii && !$inliterallayout;
176 $inliteral = 1;
177 $_ = "$2\n";
178 }
179 else
180 {
181 print &process($_);
182 last;
183 }
184 }
185 } # Loop for different parts of one line
186 } # Loop for multiple lines
187
188 # End