Releases signed by Phil's key, not Nigel's.
[exim.git] / doc / doc-docbook / Pre-xml
CommitLineData
168e428f
PH
1#! /usr/bin/perl
2
168e428f
PH
3# Script to pre-process XML input before processing it for various purposes.
4# Options specify which transformations are to be done. Monospaced literal
5# layout blocks are never touched.
6
7# Changes:
8
9b371988
PH
9# -ascii: Replace ’ by '
10# Replace © by (c)
11# Replace † by *
12# Replace ‡ by **
13# Replace   by a space
14# Replace – by -
168e428f 15# Put quotes round <quote> text
068aaea8 16#
9b371988
PH
17# -quoteliteral:
18# Put quotes round <literal> text
19#
168e428f 20# -bookinfo: Remove the <bookinfo> element from the file
068aaea8 21#
168e428f
PH
22# -fi: Replace "fi" by &#xFB01; except when it is in an XML element, or
23# inside a <literal>.
068aaea8
PH
24#
25# -html: Certain things are done only for HTML output:
26#
9b371988 27# If <literallayout> is followed by optional space and then a
068aaea8
PH
28# newline, the space and newline are removed, because otherwise you
29# get a blank line in the HTML output.
30#
f89d2485 31# -noindex Remove the XML that generates indexes.
168e428f 32# -oneindex Ditto, but add XML to generate a single index.
9b371988
PH
33#
34# -optbreak Insert an optional line break (zero-width space, &#x200B;) after
35# every underscore in text within <option> and <variable> elements,
36# except when preceded by <entry> (i.e. not in tables). The same is
37# also done within a word of four or more upper-case letters (for
38# compile-time options).
168e428f
PH
39
40
41
9b371988 42# The function that processes non-literal, non-monospaced text
168e428f
PH
43
44sub process()
45{
46my($s) = $_[0];
47
48$s =~ s/fi(?![^<>]*>)/&#xFB01;/g if $ligatures;
49
9b371988
PH
50if ($optbreak)
51 {
52 $s =~ s%(?<!<entry>)(<option>|<varname>)([^<]+)%
53 my($x,$y) = ($1,$2); $y =~ s/_/_&#x200B;/g; "$x"."$y"%gex;
54
55 $s =~ s?\b([A-Z_]{4,})\b?
56 my($x) = $1; $x =~ s/_/_&#x200B;/g; "$x"?gex;
57 }
58
168e428f
PH
59if ($ascii)
60 {
168e428f 61 $s =~ s/&#x2019;/'/g;
9b371988
PH
62 $s =~ s/&copy;/(c)/g;
63 $s =~ s/&dagger;/*/g;
64 $s =~ s/&Dagger;/**/g;
65 $s =~ s/&nsbp;/ /g;
66 $s =~ s/&ndash;/-/g;
168e428f
PH
67 $s =~ s/<quote>/"/g;
68 $s =~ s/<\/quote>/"/g;
69 }
70
71$s;
72}
73
74
75# The main program
76
168e428f
PH
77$ascii = 0;
78$bookinfo = 0;
068aaea8 79$html = 0;
168e428f 80$inliteral = 0;
068aaea8 81$inliterallayout = 0;
168e428f
PH
82$ligatures = 0;
83$madeindex = 0;
84$noindex = 0;
85$oneindex = 0;
9b371988
PH
86$optbreak = 0;
87$quoteliteral = 0;
168e428f
PH
88
89foreach $arg (@ARGV)
90 {
91 if ($arg eq "-fi") { $ligatures = 1; }
168e428f
PH
92 elsif ($arg eq "-ascii") { $ascii = 1; }
93 elsif ($arg eq "-bookinfo") { $bookinfo = 1; }
068aaea8 94 elsif ($arg eq "-html") { $html = 1; }
168e428f
PH
95 elsif ($arg eq "-noindex") { $noindex = 1; }
96 elsif ($arg eq "-oneindex") { $oneindex = 1; }
9b371988
PH
97 elsif ($arg eq "-optbreak") { $optbreak = 1; }
98 elsif ($arg eq "-quoteliteral") { $quoteliteral = 1; }
168e428f
PH
99 else { die "** Pre-xml: Unknown option \"$arg\"\n"; }
100 }
101
102while (<STDIN>)
103 {
168e428f
PH
104 # Remove <bookinfo> if required
105
106 if ($bookinfo && /^<bookinfo/)
107 {
108 while (<STDIN>) { last if /^<\/bookinfo/; }
109 next;
110 }
111
112 # Copy monospaced literallayout blocks
113
114 if (/^<literallayout class="monospaced">/)
115 {
068aaea8 116 $_ = substr($_, 0, -1) if $html;
168e428f
PH
117 print;
118 while (<STDIN>)
119 {
120 print;
121 last if /^<\/literallayout>/;
122 }
123 next;
124 }
125
126 # Adjust index-generation code if required
127
128 if (($noindex || $oneindex) && /^<index[\s>]/)
129 {
130 while (<STDIN>)
131 {
132 last if /^<\/index>/;
133 }
134
135 if ($oneindex && !$madeindex)
136 {
137 $madeindex = 1;
138 print "<index><title>Index</title></index>\n";
139 }
140
141 next;
142 }
143
144 # A line that is not in a monospaced literal block; keep track of which
145 # parts are in <literal> and which not. The latter get processed by the
068aaea8
PH
146 # function above. Items in <literal> get quoted unless they are also in
147 # a <literallayout> block, or are already being quoted.
168e428f
PH
148
149 for (;;)
150 {
068aaea8
PH
151 $_ = substr($_, 0, -1) if $html && /^<literallayout[^>]*>\s*\n$/;
152 $inliterallayout = 1 if /^<literallayout/;
153 $inliterallayout = 0 if /^<\/literallayout/;
154
168e428f
PH
155 if ($inliteral)
156 {
068aaea8 157 if (/^(.*?)<\/literal>(?!<\/quote>)(.*)$/)
168e428f
PH
158 {
159 print $1;
9b371988 160 print "\"" if $quoteliteral && !$inliterallayout;
168e428f
PH
161 print "</literal>";
162 $inliteral = 0;
163 $_ = "$2\n";
164 }
165 else
166 {
167 print;
168 last;
169 }
170 }
171
172 # Not in literal state
173
174 else
175 {
068aaea8 176 if (/^(.*?)(?<!<quote>)<literal>(.*)$/)
168e428f
PH
177 {
178 print &process($1);
179 print "<literal>";
9b371988 180 print "\"" if $quoteliteral && !$inliterallayout;
168e428f
PH
181 $inliteral = 1;
182 $_ = "$2\n";
183 }
184 else
185 {
186 print &process($_);
187 last;
188 }
189 }
190 } # Loop for different parts of one line
191 } # Loop for multiple lines
192
193# End