Commit | Line | Data |
---|---|---|
495ae4b0 PH |
1 | #! /usr/bin/perl -w |
2 | # $Cambridge: exim/doc/doc-scripts/DoIndex,v 1.1 2004/10/07 15:04:35 ph10 Exp $ | |
3 | ||
4 | # Script for producing the Index for the Exim manual from the output of the | |
5 | # SGCAL run. This is copied from the script for the Exim book. | |
6 | ||
7 | ||
8 | ############################################################################## | |
9 | # Patterns for matching things to be removed from the sort keys | |
10 | ||
11 | # This was copied from the Exim book processor, but we have now found a | |
12 | # better way of doing this. Leave the code until I am quite sure... | |
13 | ||
14 | # $pat[0] = qr/ \(\\\*see also\*\\[^)]+\)/; | |
15 | # $pat[1] = qr/(?<!@)\/\//; # // | |
16 | # $pat[2] = qr/(?<!@)\/\\/; # /\ | |
17 | # $pat[3] = qr/(?<!@)\\\//; # \/ | |
18 | # $pat[4] = qr/(?<!@) \\ # non-@ \, followed by one of | |
19 | # (?: | |
20 | # [\.\/] | # dot or slash | |
21 | # !- | # !- | |
22 | # !\+ | # !+ | |
23 | # !\. | # !. | |
24 | # "\+ | # "+ | |
25 | # \([.\/]? | # ( and optional . or slash | |
26 | # [[\$\\%?!-"] | # [ $ \ % ! " or - | |
27 | # \*{1,2} | # * or ** | |
28 | # \^{1,2}\/? # ^ or ^^ and optional slash | |
29 | # )/x; | |
30 | # $pat[5] = qr/(?: []\$\\%)?!"] | # ] $ \ % ) ? " or ! ) | |
31 | # \*{1,2} | # * or ** ) optional | |
32 | # \^{1,2})? # ^ or ^^ ) | |
33 | # \\/x; # then \ | |
34 | # $pat[6] = qr/(?<!@)::/; | |
35 | # $pat[7] = qr/\sR[FS]\b/; | |
36 | # $pat[8] = qr/``/; | |
37 | # $pat[9] = qr/''/; | |
38 | # $pat[10] = qr/`/; | |
39 | # $pat[11] = qr/'/; | |
40 | # $pat[12] = qr/,/; | |
41 | # $pat[13] = qr/\(e?s\)/; | |
42 | ||
43 | ||
44 | # Other patterns | |
45 | ||
46 | # $keysplit = qr/^(.*?)(\|\|.*?)?\s(R[AZ])?\s?(\d+)$/; | |
47 | ||
48 | $keysplit = qr/^(.*?)(\@\|\@\|.*?)?\s(R[AZ])?\s?(\d+)$/; | |
49 | ||
50 | ||
51 | # The sort function | |
52 | ||
53 | sub cf { | |
54 | my($x,$y) = ($a,$b); | |
55 | ||
56 | ############old############# | |
57 | #foreach $pattern (@pat) # Remove strings by pattern | |
58 | # { | |
59 | # $x =~ s/$pattern//g; | |
60 | # $y =~ s/$pattern//g; | |
61 | # } | |
62 | ########################## | |
63 | ||
64 | ||
65 | # Turn || into @|@| | |
66 | ||
67 | $x =~ s/\|\|/@|@|/g; | |
68 | $y =~ s/\|\|/@|@|/g; | |
69 | ||
70 | # Remove all special characters, except those preceded by @ | |
71 | ||
72 | $x =~ s/(?<!\@)[^\w\@\s]//g; | |
73 | $y =~ s/(?<!\@)[^\w\@\s]//g; | |
74 | ||
75 | # Remove the escaping @s | |
76 | ||
77 | #$x =~ s/\@(.)/$1/g; | |
78 | #$y =~ s/\@(.)/$1/g; | |
79 | ||
80 | ||
81 | ||
82 | ################old ######################## | |
83 | #$x =~ s/:(\w+):/$1/g; # :fail: etc => fail | |
84 | #$y =~ s/:(\w+):/$1/g; | |
85 | ||
86 | #$x =~ s/^\@[^a-z]+/\@/i; # Make keys starting with @ | |
87 | #$y =~ s/^\@[^a-z]+/\@/i; # sort on @ followed by the first letter | |
88 | ##############################################3 | |
89 | ||
90 | ||
91 | $x =~ s/\@_/\x7f/g; # Make underscore sort late (option names) | |
92 | $y =~ s/\@_/\x7f/g; | |
93 | ||
94 | # Split up to sort on individual parts | |
95 | ||
96 | my($xp,$xs,$xr,$xn) = $x =~ /$keysplit/; | |
97 | my($yp,$ys,$yr,$yn) = $y =~ /$keysplit/; | |
98 | ||
99 | $xr = "" if !defined $xr; | |
100 | $yr = "" if !defined $yr; | |
101 | ||
102 | $xs = "" if !defined $xs; | |
103 | $ys = "" if !defined $ys; | |
104 | ||
105 | if ($show_keys) | |
106 | { | |
107 | print "a=$a\n x=$x\n xp=$xp\n xs=$xs\n xr=$xr\n xn=$xn\n"; | |
108 | print "b=$b\n y=$y\n yp=$yp\n ys=$ys\n yr=$yr\n yn=$yn\n"; | |
109 | } | |
110 | ||
111 | my ($c) = "\L$xp" cmp "\L$yp"; # Caseless, primary text only | |
112 | $c = $xp cmp $yp if $c == 0; # Caseful, primary text only | |
113 | $c = "\L$xs" cmp "\L$ys" if $c == 0; # Caseless, secondary text only | |
114 | $c = $xs cmp $ys if $c == 0; # Caseful, secondary text only | |
115 | $c = $xn <=> $yn if $c == 0; # Compare the numbers | |
116 | $c = $xr cmp $yr if $c == 0; # Sort RA before RZ | |
117 | return $c; | |
118 | } | |
119 | ||
120 | ||
121 | ||
122 | ############################################################################## | |
123 | # Function for getting the next line from the @lines vector, using the global | |
124 | # index $1. If the next pair of lines specifies a range of pages, combine them. | |
125 | # That's why $linenumber has to be global - so we can increment it. If there's | |
126 | # a range error, return "". | |
127 | ||
128 | sub getnextentry { | |
129 | my($line) = $lines[$linenumber]; | |
130 | my($aa,$zz,$tline,$nextline,$tnextline); | |
131 | ||
132 | if ($line =~ / RA (\d+)/) | |
133 | { | |
134 | $aa = $1; | |
135 | $nextline = $lines[++$linenumber]; | |
136 | if ($nextline =~ / RZ (\d+)/) | |
137 | { | |
138 | $zz = $1; | |
139 | } | |
140 | else | |
141 | { | |
142 | print STDERR "** Bad range data (1)\n"; | |
143 | print STDERR " $line\n"; | |
144 | print STDERR " $nextline\n"; | |
145 | return ""; | |
146 | } | |
147 | ||
148 | $tline = $line; | |
149 | $tnextline = $nextline; | |
150 | ||
151 | $tline =~ s/ RA \d+//; | |
152 | $tnextline =~ s/ RZ \d+//; | |
153 | ||
154 | if ($tline ne $tnextline) | |
155 | { | |
156 | print STDERR "** Bad range data (2)\n"; | |
157 | print STDERR " $line\n"; | |
158 | print STDERR " $nextline\n"; | |
159 | return ""; | |
160 | } | |
161 | ||
162 | $line = ($aa eq $zz)? "$tline $aa" : "$tline $aa--$zz"; | |
163 | } | |
164 | ||
165 | elsif ($line =~ / RZ (\d+)/) | |
166 | { | |
167 | print STDERR "** Bad range data (RZ without RA)\n"; | |
168 | print STDERR " $line\n"; | |
169 | return ""; | |
170 | } | |
171 | ||
172 | return $line | |
173 | } | |
174 | ||
175 | ||
176 | ||
177 | ||
178 | ############################################################################## | |
179 | # Function for outputting a line, checking for the current primary | |
180 | # and indenting a bit for secondaries. We also need a newpar | |
181 | # before each item, because the main indent is set to a largish indent | |
182 | # for long reference lists, but the parindent is set to counter this. | |
183 | # This is where we handle the break between letters. We know that any non- | |
184 | # alphamerics at the start of lines are markup, except for @. A reference | |
185 | # value of 99999 is for the "see also" lines. Suppress it. | |
186 | ||
187 | sub outline { | |
188 | my($text,$ref) = ($_[0],$_[1]); | |
189 | my ($letter) = $text =~ /^[^A-Za-z0-9\@]*(.)/; | |
190 | ||
191 | return if $text =~ /^\s*$/; | |
192 | ||
193 | if ($ref eq "99999") # dummy for see also | |
194 | { | |
195 | $ref = "" | |
196 | } | |
197 | else | |
198 | { | |
199 | $ref = "#$ref"; # prepend space | |
200 | } | |
201 | ||
202 | if ($letter =~ /\d/) { $letter = "0"; } else { $letter = "\U$letter"; } | |
203 | ||
204 | print OUT ".newpar\n"; | |
205 | ||
206 | if ($letter ne $currentletter && $letter ge "A") | |
207 | { | |
208 | print OUT ".newletter\n"; | |
209 | $currentletter = $letter; | |
210 | } | |
211 | ||
212 | $text =~ s/\@'/\$'/g; # Turns @' into $' so that it prints a non-curly quote | |
213 | ||
214 | if ($text =~ /^(.+)\|\|(.*)$/) | |
215 | { | |
216 | my($primary,$secondary) = ($1,$2); | |
217 | ||
218 | if ($primary ne $lastprimary) | |
219 | { | |
220 | print OUT ".primary $primary\n"; | |
221 | $lastprimary = $primary; | |
222 | } | |
223 | ||
224 | $primary =~ s/"/""/g; | |
225 | $secondary =~ s/"/""/g; | |
226 | ||
227 | my($contprim) = $primary; | |
228 | $contprim =~ s/ \(\\\*see also\*\\[^)]+\)//; | |
229 | ||
230 | print OUT ".secondary \"$primary\" \"$secondary$ref\" \"$contprim\"\n"; | |
231 | } | |
232 | ||
233 | # Not a two-part item; insert @ if the first char is a dot | |
234 | ||
235 | else | |
236 | { | |
237 | print OUT "@" if $text =~ /^\./; | |
238 | print OUT "$text$ref\n"; | |
239 | $lastprimary = $text; | |
240 | } | |
241 | } | |
242 | ||
243 | ||
244 | ||
245 | ||
246 | ||
247 | ############################################################################## | |
248 | # The main script | |
249 | ||
250 | $save_sorted = 0; | |
251 | $test_index = 0; | |
252 | $show_keys = 0; | |
253 | ||
254 | while (@ARGV > 0) | |
255 | { | |
256 | my($arg) = shift @ARGV; | |
257 | if ($arg eq "-k") { $show_keys = 1; } | |
258 | elsif ($arg eq "-s") { $save_sorted = 1; } | |
259 | elsif ($arg eq "-t") { $test_index = $save_sorted = 1; } | |
260 | else { die "Unknown option $arg\n"; } | |
261 | } | |
262 | ||
263 | if ($test_index) | |
264 | { | |
265 | open(IN, "z-testindex") || die "Can't open z-testindex\n"; | |
266 | } | |
267 | else | |
268 | { | |
269 | open(IN, "z-rawindex") || die "Can't open z-rawindex\n"; | |
270 | } | |
271 | ||
272 | open(OUT, ">z-index") || die "Can't open z-index\n"; | |
273 | ||
274 | # Extract index lines ($e lines are contents). Until we hit the first | |
275 | # $e line, we are dealing with "see also" index lines, for which we want | |
276 | # to turn the line number into 99999. | |
277 | ||
278 | $#lines = -1; | |
279 | $prestuff = 1; | |
280 | ||
281 | while (<IN>) | |
282 | { | |
283 | s/\n$//; | |
284 | if (/\$e/) | |
285 | { | |
286 | $prestuff = 0; | |
287 | } | |
288 | else | |
289 | { | |
290 | s/(\D)$/$1 99999/ if $prestuff; # No number in "see also" | |
291 | push(@lines, $_); | |
292 | } | |
293 | $index_pagenumber = $1 if /^Index\$e(\d+)/; | |
294 | } | |
295 | close(IN); | |
296 | ||
297 | # Sort, ignoring markup | |
298 | ||
299 | print STDERR "Sorting ...\n"; | |
300 | @lines = sort cf @lines; | |
301 | ||
302 | # Keep a copy of the sorted data, for reference | |
303 | ||
304 | if ($save_sorted) | |
305 | { | |
306 | open(X, ">z-indexsorted") || die "Can't open z-indexsorted\n"; | |
307 | foreach $line (@lines) | |
308 | { | |
309 | print X "$line\n"; | |
310 | } | |
311 | close(X); | |
312 | } | |
313 | ||
314 | # Heading for the index file | |
315 | ||
316 | print OUT <<"EOF"; | |
317 | .library "a4ps" | |
318 | .linelength ~~sys.linelength + 16.0 | |
319 | ||
320 | .include "markup.sg" | |
321 | ||
322 | .indent 3em | |
323 | .parspace 0 | |
324 | .parindent -3em | |
325 | .justify left | |
326 | . | |
327 | .foot | |
328 | \$c [~~sys.pagenumber] | |
329 | .endfoot | |
330 | . | |
331 | .cancelflag # | |
332 | .flag # "\$S*1" | |
333 | .set INDEX true | |
334 | . | |
335 | .macro primary "text" | |
336 | .if ~~sys.leftonpage < 2ld | |
337 | .newcolumn | |
338 | .fi | |
339 | ~~1 | |
340 | .newpar | |
341 | .endm | |
342 | . | |
343 | .macro secondary "prim" "sec" "contprim" | |
344 | .if ~~sys.leftonpage < 1ld | |
345 | .newcolumn | |
346 | .newpar | |
347 | ~~3 \$it\{(continued)\} | |
348 | .newpar | |
349 | .fi | |
350 | ##~~2 | |
351 | .endm | |
352 | . | |
353 | .macro newletter | |
354 | .if ~~sys.leftonpage < 4ld | |
355 | .newcolumn | |
356 | .else | |
357 | .space 1ld | |
358 | .fi | |
359 | .newpar | |
360 | .endm | |
361 | . | |
362 | .set chapter -1 | |
363 | .page $index_pagenumber | |
364 | .chapter Index | |
365 | .columns 2 | |
366 | .newpar | |
367 | . | |
368 | EOF | |
369 | ||
370 | # Process the lines and output the result. | |
371 | # Note that $linenumber is global, and is changed by getnextentry() for | |
372 | # pairs of lines that represent ranges. | |
373 | ||
374 | $lastprimary = ""; | |
375 | $lastref = ""; | |
376 | $currenttext = $currentref = ""; | |
377 | $currentletter = ""; | |
378 | $badrange = 0; | |
379 | ||
380 | print STDERR "Processing ...\n"; | |
381 | ||
382 | for ($linenumber = 0; $linenumber < @lines; $linenumber++) | |
383 | { | |
384 | $line = &getnextentry(); | |
385 | ||
386 | if ($line eq "") # Bad range data - but carry on to get all of it | |
387 | { | |
388 | $badrange = 1; | |
389 | next; | |
390 | } | |
391 | ||
392 | # Split off the text and reference | |
393 | ||
394 | ($text,$ref) = $line =~ /^(.*)\s+([\d-]+)$/; | |
395 | ||
396 | # If same as current text, just add the new reference, unless its a duplicate | |
397 | ||
398 | if ($text eq $currenttext) | |
399 | { | |
400 | if ($ref ne $lastref) | |
401 | { | |
402 | $currentref .= ", $ref"; | |
403 | $lastref = $ref; | |
404 | } | |
405 | next; | |
406 | } | |
407 | ||
408 | # Not the same as the current text. Output the current text, then | |
409 | # set up a new current. | |
410 | ||
411 | &outline($currenttext, $currentref); | |
412 | ||
413 | $currenttext = $text; | |
414 | $currentref = $lastref = $ref; | |
415 | } | |
416 | ||
417 | # Output the final line and close the file | |
418 | ||
419 | &outline($currenttext, $currentref); | |
420 | close(OUT); | |
421 | ||
422 | die "** Aborted\n" if $badrange; | |
423 | ||
424 | # Format the index | |
425 | ||
426 | system("sgcal z-index -to zi-gcode -index /dev/null"); | |
427 | system("sgtops zi-gcode -to zi-ps"); | |
428 | print "PostScript in zi-ps\n"; | |
429 | ||
430 | # End |