Remove Asciidoc versions of the documentation and building apparatus;
[exim.git] / doc / doc-docbook / Pre-xml
CommitLineData
168e428f
PH
1#! /usr/bin/perl
2
9b371988 3# $Cambridge: exim/doc/doc-docbook/Pre-xml,v 1.3 2006/02/01 11:01:01 ph10 Exp $
168e428f
PH
4
5# Script to pre-process XML input before processing it for various purposes.
6# Options specify which transformations are to be done. Monospaced literal
7# layout blocks are never touched.
8
9# Changes:
10
9b371988
PH
11# -ascii: Replace ’ by '
12# Replace © by (c)
13# Replace † by *
14# Replace ‡ by **
15# Replace   by a space
16# Replace – by -
168e428f 17# Put quotes round <quote> text
068aaea8 18#
9b371988
PH
19# -quoteliteral:
20# Put quotes round <literal> text
21#
168e428f 22# -bookinfo: Remove the <bookinfo> element from the file
068aaea8 23#
168e428f
PH
24# -fi: Replace "fi" by &#xFB01; except when it is in an XML element, or
25# inside a <literal>.
068aaea8
PH
26#
27# -html: Certain things are done only for HTML output:
28#
9b371988 29# If <literallayout> is followed by optional space and then a
068aaea8
PH
30# newline, the space and newline are removed, because otherwise you
31# get a blank line in the HTML output.
32#
168e428f
PH
33# -noindex Remove the XML to generate a Concept and an Options index.
34# -oneindex Ditto, but add XML to generate a single index.
9b371988
PH
35#
36# -optbreak Insert an optional line break (zero-width space, &#x200B;) after
37# every underscore in text within <option> and <variable> elements,
38# except when preceded by <entry> (i.e. not in tables). The same is
39# also done within a word of four or more upper-case letters (for
40# compile-time options).
168e428f
PH
41
42
43
9b371988 44# The function that processes non-literal, non-monospaced text
168e428f
PH
45
46sub process()
47{
48my($s) = $_[0];
49
50$s =~ s/fi(?![^<>]*>)/&#xFB01;/g if $ligatures;
51
9b371988
PH
52if ($optbreak)
53 {
54 $s =~ s%(?<!<entry>)(<option>|<varname>)([^<]+)%
55 my($x,$y) = ($1,$2); $y =~ s/_/_&#x200B;/g; "$x"."$y"%gex;
56
57 $s =~ s?\b([A-Z_]{4,})\b?
58 my($x) = $1; $x =~ s/_/_&#x200B;/g; "$x"?gex;
59 }
60
168e428f
PH
61if ($ascii)
62 {
168e428f 63 $s =~ s/&#x2019;/'/g;
9b371988
PH
64 $s =~ s/&copy;/(c)/g;
65 $s =~ s/&dagger;/*/g;
66 $s =~ s/&Dagger;/**/g;
67 $s =~ s/&nsbp;/ /g;
68 $s =~ s/&ndash;/-/g;
168e428f
PH
69 $s =~ s/<quote>/"/g;
70 $s =~ s/<\/quote>/"/g;
71 }
72
73$s;
74}
75
76
77# The main program
78
168e428f
PH
79$ascii = 0;
80$bookinfo = 0;
068aaea8 81$html = 0;
168e428f 82$inliteral = 0;
068aaea8 83$inliterallayout = 0;
168e428f
PH
84$ligatures = 0;
85$madeindex = 0;
86$noindex = 0;
87$oneindex = 0;
9b371988
PH
88$optbreak = 0;
89$quoteliteral = 0;
168e428f
PH
90
91foreach $arg (@ARGV)
92 {
93 if ($arg eq "-fi") { $ligatures = 1; }
168e428f
PH
94 elsif ($arg eq "-ascii") { $ascii = 1; }
95 elsif ($arg eq "-bookinfo") { $bookinfo = 1; }
068aaea8 96 elsif ($arg eq "-html") { $html = 1; }
168e428f
PH
97 elsif ($arg eq "-noindex") { $noindex = 1; }
98 elsif ($arg eq "-oneindex") { $oneindex = 1; }
9b371988
PH
99 elsif ($arg eq "-optbreak") { $optbreak = 1; }
100 elsif ($arg eq "-quoteliteral") { $quoteliteral = 1; }
168e428f
PH
101 else { die "** Pre-xml: Unknown option \"$arg\"\n"; }
102 }
103
104while (<STDIN>)
105 {
168e428f
PH
106 # Remove <bookinfo> if required
107
108 if ($bookinfo && /^<bookinfo/)
109 {
110 while (<STDIN>) { last if /^<\/bookinfo/; }
111 next;
112 }
113
114 # Copy monospaced literallayout blocks
115
116 if (/^<literallayout class="monospaced">/)
117 {
068aaea8 118 $_ = substr($_, 0, -1) if $html;
168e428f
PH
119 print;
120 while (<STDIN>)
121 {
122 print;
123 last if /^<\/literallayout>/;
124 }
125 next;
126 }
127
128 # Adjust index-generation code if required
129
130 if (($noindex || $oneindex) && /^<index[\s>]/)
131 {
132 while (<STDIN>)
133 {
134 last if /^<\/index>/;
135 }
136
137 if ($oneindex && !$madeindex)
138 {
139 $madeindex = 1;
140 print "<index><title>Index</title></index>\n";
141 }
142
143 next;
144 }
145
146 # A line that is not in a monospaced literal block; keep track of which
147 # parts are in <literal> and which not. The latter get processed by the
068aaea8
PH
148 # function above. Items in <literal> get quoted unless they are also in
149 # a <literallayout> block, or are already being quoted.
168e428f
PH
150
151 for (;;)
152 {
068aaea8
PH
153 $_ = substr($_, 0, -1) if $html && /^<literallayout[^>]*>\s*\n$/;
154 $inliterallayout = 1 if /^<literallayout/;
155 $inliterallayout = 0 if /^<\/literallayout/;
156
168e428f
PH
157 if ($inliteral)
158 {
068aaea8 159 if (/^(.*?)<\/literal>(?!<\/quote>)(.*)$/)
168e428f
PH
160 {
161 print $1;
9b371988 162 print "\"" if $quoteliteral && !$inliterallayout;
168e428f
PH
163 print "</literal>";
164 $inliteral = 0;
165 $_ = "$2\n";
166 }
167 else
168 {
169 print;
170 last;
171 }
172 }
173
174 # Not in literal state
175
176 else
177 {
068aaea8 178 if (/^(.*?)(?<!<quote>)<literal>(.*)$/)
168e428f
PH
179 {
180 print &process($1);
181 print "<literal>";
9b371988 182 print "\"" if $quoteliteral && !$inliterallayout;
168e428f
PH
183 $inliteral = 1;
184 $_ = "$2\n";
185 }
186 else
187 {
188 print &process($_);
189 last;
190 }
191 }
192 } # Loop for different parts of one line
193 } # Loop for multiple lines
194
195# End