Install all the files that comprise the new DocBook way of making the
[exim.git] / doc / doc-docbook / Pre-xml
CommitLineData
168e428f
PH
1#! /usr/bin/perl
2
3# $Cambridge: exim/doc/doc-docbook/Pre-xml,v 1.1 2005/06/16 10:32:31 ph10 Exp $
4
5# Script to pre-process XML input before processing it for various purposes.
6# Options specify which transformations are to be done. Monospaced literal
7# layout blocks are never touched.
8
9# Changes:
10
11# -abstract: Remove the <abstract> element
12
13# -ascii: Replace &8230; (sic, no x) with ...
14# Replace &#x2019; by '
15# Replace &#x201C; by "
16# Replace &#x201D; by "
17# Replace &#x2013; by -
18# Replace &#x2020; by *
19# Replace &#x2021; by **
20# Replace &#x00a0; by a space
21# Replace &#169; by (c)
22# Put quotes round <literal> text
23# Put quotes round <quote> text
24
25# -bookinfo: Remove the <bookinfo> element from the file
26
27# -fi: Replace "fi" by &#xFB01; except when it is in an XML element, or
28# inside a <literal>.
29
30# -noindex Remove the XML to generate a Concept and an Options index.
31# -oneindex Ditto, but add XML to generate a single index.
32
33
34
35# The function that processes non-literal monospaced text
36
37sub process()
38{
39my($s) = $_[0];
40
41$s =~ s/fi(?![^<>]*>)/&#xFB01;/g if $ligatures;
42
43if ($ascii)
44 {
45 $s =~ s/&#8230;/.../g;
46 $s =~ s/&#x2019;/'/g;
47 $s =~ s/&#x201C;/"/g;
48 $s =~ s/&#x201D;/"/g;
49 $s =~ s/&#x2013;/-/g;
50 $s =~ s/&#x2020;/*/g;
51 $s =~ s/&#x2021;/**/g;
52 $s =~ s/&#x00a0;/ /g;
53 $s =~ s/&#x00a9;/(c)/g;
54 $s =~ s/<quote>/"/g;
55 $s =~ s/<\/quote>/"/g;
56 }
57
58$s;
59}
60
61
62# The main program
63
64$abstract = 0;
65$ascii = 0;
66$bookinfo = 0;
67$inliteral = 0;
68$ligatures = 0;
69$madeindex = 0;
70$noindex = 0;
71$oneindex = 0;
72
73foreach $arg (@ARGV)
74 {
75 if ($arg eq "-fi") { $ligatures = 1; }
76 elsif ($arg eq "-abstract") { $abstract = 1; }
77 elsif ($arg eq "-ascii") { $ascii = 1; }
78 elsif ($arg eq "-bookinfo") { $bookinfo = 1; }
79 elsif ($arg eq "-noindex") { $noindex = 1; }
80 elsif ($arg eq "-oneindex") { $oneindex = 1; }
81 else { die "** Pre-xml: Unknown option \"$arg\"\n"; }
82 }
83
84while (<STDIN>)
85 {
86 # Remove <abstract> if required
87
88 next if ($abstract && /^\s*<abstract>/);
89
90 # Remove <bookinfo> if required
91
92 if ($bookinfo && /^<bookinfo/)
93 {
94 while (<STDIN>) { last if /^<\/bookinfo/; }
95 next;
96 }
97
98 # Copy monospaced literallayout blocks
99
100 if (/^<literallayout class="monospaced">/)
101 {
102 print;
103 while (<STDIN>)
104 {
105 print;
106 last if /^<\/literallayout>/;
107 }
108 next;
109 }
110
111 # Adjust index-generation code if required
112
113 if (($noindex || $oneindex) && /^<index[\s>]/)
114 {
115 while (<STDIN>)
116 {
117 last if /^<\/index>/;
118 }
119
120 if ($oneindex && !$madeindex)
121 {
122 $madeindex = 1;
123 print "<index><title>Index</title></index>\n";
124 }
125
126 next;
127 }
128
129 # A line that is not in a monospaced literal block; keep track of which
130 # parts are in <literal> and which not. The latter get processed by the
131 # function above.
132
133 for (;;)
134 {
135 if ($inliteral)
136 {
137 if (/^(.*?)<\/literal>(.*)$/)
138 {
139 print $1;
140 print "\"" if $ascii;
141 print "</literal>";
142 $inliteral = 0;
143 $_ = "$2\n";
144 }
145 else
146 {
147 print;
148 last;
149 }
150 }
151
152 # Not in literal state
153
154 else
155 {
156 if (/^(.*?)<literal>(.*)$/)
157 {
158 print &process($1);
159 print "<literal>";
160 print "\"" if $ascii;
161 $inliteral = 1;
162 $_ = "$2\n";
163 }
164 else
165 {
166 print &process($_);
167 last;
168 }
169 }
170 } # Loop for different parts of one line
171 } # Loop for multiple lines
172
173# End