Commit | Line | Data |
---|---|---|
168e428f PH |
1 | #! /usr/bin/perl |
2 | ||
3 | # $Cambridge: exim/doc/doc-docbook/Pre-xml,v 1.1 2005/06/16 10:32:31 ph10 Exp $ | |
4 | ||
5 | # Script to pre-process XML input before processing it for various purposes. | |
6 | # Options specify which transformations are to be done. Monospaced literal | |
7 | # layout blocks are never touched. | |
8 | ||
9 | # Changes: | |
10 | ||
11 | # -abstract: Remove the <abstract> element | |
12 | ||
13 | # -ascii: Replace &8230; (sic, no x) with ... | |
14 | # Replace ’ by ' | |
15 | # Replace “ by " | |
16 | # Replace ” by " | |
17 | # Replace – by - | |
18 | # Replace † by * | |
19 | # Replace ‡ by ** | |
20 | # Replace   by a space | |
21 | # Replace © by (c) | |
22 | # Put quotes round <literal> text | |
23 | # Put quotes round <quote> text | |
24 | ||
25 | # -bookinfo: Remove the <bookinfo> element from the file | |
26 | ||
27 | # -fi: Replace "fi" by fi except when it is in an XML element, or | |
28 | # inside a <literal>. | |
29 | ||
30 | # -noindex Remove the XML to generate a Concept and an Options index. | |
31 | # -oneindex Ditto, but add XML to generate a single index. | |
32 | ||
33 | ||
34 | ||
35 | # The function that processes non-literal monospaced text | |
36 | ||
37 | sub process() | |
38 | { | |
39 | my($s) = $_[0]; | |
40 | ||
41 | $s =~ s/fi(?![^<>]*>)/fi/g if $ligatures; | |
42 | ||
43 | if ($ascii) | |
44 | { | |
45 | $s =~ s/…/.../g; | |
46 | $s =~ s/’/'/g; | |
47 | $s =~ s/“/"/g; | |
48 | $s =~ s/”/"/g; | |
49 | $s =~ s/–/-/g; | |
50 | $s =~ s/†/*/g; | |
51 | $s =~ s/‡/**/g; | |
52 | $s =~ s/ / /g; | |
53 | $s =~ s/©/(c)/g; | |
54 | $s =~ s/<quote>/"/g; | |
55 | $s =~ s/<\/quote>/"/g; | |
56 | } | |
57 | ||
58 | $s; | |
59 | } | |
60 | ||
61 | ||
62 | # The main program | |
63 | ||
64 | $abstract = 0; | |
65 | $ascii = 0; | |
66 | $bookinfo = 0; | |
67 | $inliteral = 0; | |
68 | $ligatures = 0; | |
69 | $madeindex = 0; | |
70 | $noindex = 0; | |
71 | $oneindex = 0; | |
72 | ||
73 | foreach $arg (@ARGV) | |
74 | { | |
75 | if ($arg eq "-fi") { $ligatures = 1; } | |
76 | elsif ($arg eq "-abstract") { $abstract = 1; } | |
77 | elsif ($arg eq "-ascii") { $ascii = 1; } | |
78 | elsif ($arg eq "-bookinfo") { $bookinfo = 1; } | |
79 | elsif ($arg eq "-noindex") { $noindex = 1; } | |
80 | elsif ($arg eq "-oneindex") { $oneindex = 1; } | |
81 | else { die "** Pre-xml: Unknown option \"$arg\"\n"; } | |
82 | } | |
83 | ||
84 | while (<STDIN>) | |
85 | { | |
86 | # Remove <abstract> if required | |
87 | ||
88 | next if ($abstract && /^\s*<abstract>/); | |
89 | ||
90 | # Remove <bookinfo> if required | |
91 | ||
92 | if ($bookinfo && /^<bookinfo/) | |
93 | { | |
94 | while (<STDIN>) { last if /^<\/bookinfo/; } | |
95 | next; | |
96 | } | |
97 | ||
98 | # Copy monospaced literallayout blocks | |
99 | ||
100 | if (/^<literallayout class="monospaced">/) | |
101 | { | |
102 | print; | |
103 | while (<STDIN>) | |
104 | { | |
105 | print; | |
106 | last if /^<\/literallayout>/; | |
107 | } | |
108 | next; | |
109 | } | |
110 | ||
111 | # Adjust index-generation code if required | |
112 | ||
113 | if (($noindex || $oneindex) && /^<index[\s>]/) | |
114 | { | |
115 | while (<STDIN>) | |
116 | { | |
117 | last if /^<\/index>/; | |
118 | } | |
119 | ||
120 | if ($oneindex && !$madeindex) | |
121 | { | |
122 | $madeindex = 1; | |
123 | print "<index><title>Index</title></index>\n"; | |
124 | } | |
125 | ||
126 | next; | |
127 | } | |
128 | ||
129 | # A line that is not in a monospaced literal block; keep track of which | |
130 | # parts are in <literal> and which not. The latter get processed by the | |
131 | # function above. | |
132 | ||
133 | for (;;) | |
134 | { | |
135 | if ($inliteral) | |
136 | { | |
137 | if (/^(.*?)<\/literal>(.*)$/) | |
138 | { | |
139 | print $1; | |
140 | print "\"" if $ascii; | |
141 | print "</literal>"; | |
142 | $inliteral = 0; | |
143 | $_ = "$2\n"; | |
144 | } | |
145 | else | |
146 | { | |
147 | print; | |
148 | last; | |
149 | } | |
150 | } | |
151 | ||
152 | # Not in literal state | |
153 | ||
154 | else | |
155 | { | |
156 | if (/^(.*?)<literal>(.*)$/) | |
157 | { | |
158 | print &process($1); | |
159 | print "<literal>"; | |
160 | print "\"" if $ascii; | |
161 | $inliteral = 1; | |
162 | $_ = "$2\n"; | |
163 | } | |
164 | else | |
165 | { | |
166 | print &process($_); | |
167 | last; | |
168 | } | |
169 | } | |
170 | } # Loop for different parts of one line | |
171 | } # Loop for multiple lines | |
172 | ||
173 | # End |