Moved a debug statement to avoid a testing race.
[exim.git] / doc / doc-docbook / Pre-xml
CommitLineData
168e428f
PH
1#! /usr/bin/perl
2
068aaea8 3# $Cambridge: exim/doc/doc-docbook/Pre-xml,v 1.2 2005/11/10 12:30:13 ph10 Exp $
168e428f
PH
4
5# Script to pre-process XML input before processing it for various purposes.
6# Options specify which transformations are to be done. Monospaced literal
7# layout blocks are never touched.
8
9# Changes:
10
11# -abstract: Remove the <abstract> element
12
13# -ascii: Replace &8230; (sic, no x) with ...
14# Replace &#x2019; by '
15# Replace &#x201C; by "
16# Replace &#x201D; by "
17# Replace &#x2013; by -
18# Replace &#x2020; by *
19# Replace &#x2021; by **
20# Replace &#x00a0; by a space
21# Replace &#169; by (c)
22# Put quotes round <literal> text
23# Put quotes round <quote> text
068aaea8 24#
168e428f 25# -bookinfo: Remove the <bookinfo> element from the file
068aaea8 26#
168e428f
PH
27# -fi: Replace "fi" by &#xFB01; except when it is in an XML element, or
28# inside a <literal>.
068aaea8
PH
29#
30# -html: Certain things are done only for HTML output:
31#
32# If <literallayout> is followed by optional # space and then a
33# newline, the space and newline are removed, because otherwise you
34# get a blank line in the HTML output.
35#
168e428f
PH
36# -noindex Remove the XML to generate a Concept and an Options index.
37# -oneindex Ditto, but add XML to generate a single index.
38
39
40
41# The function that processes non-literal monospaced text
42
43sub process()
44{
45my($s) = $_[0];
46
47$s =~ s/fi(?![^<>]*>)/&#xFB01;/g if $ligatures;
48
49if ($ascii)
50 {
51 $s =~ s/&#8230;/.../g;
52 $s =~ s/&#x2019;/'/g;
53 $s =~ s/&#x201C;/"/g;
54 $s =~ s/&#x201D;/"/g;
55 $s =~ s/&#x2013;/-/g;
56 $s =~ s/&#x2020;/*/g;
57 $s =~ s/&#x2021;/**/g;
58 $s =~ s/&#x00a0;/ /g;
068aaea8 59 $s =~ s/&#169;/(c)/g;
168e428f
PH
60 $s =~ s/<quote>/"/g;
61 $s =~ s/<\/quote>/"/g;
62 }
63
64$s;
65}
66
67
68# The main program
69
70$abstract = 0;
71$ascii = 0;
72$bookinfo = 0;
068aaea8 73$html = 0;
168e428f 74$inliteral = 0;
068aaea8 75$inliterallayout = 0;
168e428f
PH
76$ligatures = 0;
77$madeindex = 0;
78$noindex = 0;
79$oneindex = 0;
80
81foreach $arg (@ARGV)
82 {
83 if ($arg eq "-fi") { $ligatures = 1; }
84 elsif ($arg eq "-abstract") { $abstract = 1; }
85 elsif ($arg eq "-ascii") { $ascii = 1; }
86 elsif ($arg eq "-bookinfo") { $bookinfo = 1; }
068aaea8 87 elsif ($arg eq "-html") { $html = 1; }
168e428f
PH
88 elsif ($arg eq "-noindex") { $noindex = 1; }
89 elsif ($arg eq "-oneindex") { $oneindex = 1; }
90 else { die "** Pre-xml: Unknown option \"$arg\"\n"; }
91 }
92
93while (<STDIN>)
94 {
95 # Remove <abstract> if required
96
97 next if ($abstract && /^\s*<abstract>/);
98
99 # Remove <bookinfo> if required
100
101 if ($bookinfo && /^<bookinfo/)
102 {
103 while (<STDIN>) { last if /^<\/bookinfo/; }
104 next;
105 }
106
107 # Copy monospaced literallayout blocks
108
109 if (/^<literallayout class="monospaced">/)
110 {
068aaea8 111 $_ = substr($_, 0, -1) if $html;
168e428f
PH
112 print;
113 while (<STDIN>)
114 {
115 print;
116 last if /^<\/literallayout>/;
117 }
118 next;
119 }
120
121 # Adjust index-generation code if required
122
123 if (($noindex || $oneindex) && /^<index[\s>]/)
124 {
125 while (<STDIN>)
126 {
127 last if /^<\/index>/;
128 }
129
130 if ($oneindex && !$madeindex)
131 {
132 $madeindex = 1;
133 print "<index><title>Index</title></index>\n";
134 }
135
136 next;
137 }
138
139 # A line that is not in a monospaced literal block; keep track of which
140 # parts are in <literal> and which not. The latter get processed by the
068aaea8
PH
141 # function above. Items in <literal> get quoted unless they are also in
142 # a <literallayout> block, or are already being quoted.
168e428f
PH
143
144 for (;;)
145 {
068aaea8
PH
146 $_ = substr($_, 0, -1) if $html && /^<literallayout[^>]*>\s*\n$/;
147 $inliterallayout = 1 if /^<literallayout/;
148 $inliterallayout = 0 if /^<\/literallayout/;
149
168e428f
PH
150 if ($inliteral)
151 {
068aaea8 152 if (/^(.*?)<\/literal>(?!<\/quote>)(.*)$/)
168e428f
PH
153 {
154 print $1;
068aaea8 155 print "\"" if $ascii && !$inliterallayout;
168e428f
PH
156 print "</literal>";
157 $inliteral = 0;
158 $_ = "$2\n";
159 }
160 else
161 {
162 print;
163 last;
164 }
165 }
166
167 # Not in literal state
168
169 else
170 {
068aaea8 171 if (/^(.*?)(?<!<quote>)<literal>(.*)$/)
168e428f
PH
172 {
173 print &process($1);
174 print "<literal>";
068aaea8 175 print "\"" if $ascii && !$inliterallayout;
168e428f
PH
176 $inliteral = 1;
177 $_ = "$2\n";
178 }
179 else
180 {
181 print &process($_);
182 last;
183 }
184 }
185 } # Loop for different parts of one line
186 } # Loop for multiple lines
187
188# End