Commit | Line | Data |
---|---|---|
495ae4b0 | 1 | #! /usr/bin/perl -w |
495ae4b0 PH |
2 | |
3 | # Script for producing the Index for the Exim manual from the output of the | |
4 | # SGCAL run. This is copied from the script for the Exim book. | |
5 | ||
6 | ||
7 | ############################################################################## | |
8 | # Patterns for matching things to be removed from the sort keys | |
9 | ||
10 | # This was copied from the Exim book processor, but we have now found a | |
11 | # better way of doing this. Leave the code until I am quite sure... | |
12 | ||
13 | # $pat[0] = qr/ \(\\\*see also\*\\[^)]+\)/; | |
14 | # $pat[1] = qr/(?<!@)\/\//; # // | |
15 | # $pat[2] = qr/(?<!@)\/\\/; # /\ | |
16 | # $pat[3] = qr/(?<!@)\\\//; # \/ | |
17 | # $pat[4] = qr/(?<!@) \\ # non-@ \, followed by one of | |
18 | # (?: | |
19 | # [\.\/] | # dot or slash | |
20 | # !- | # !- | |
21 | # !\+ | # !+ | |
22 | # !\. | # !. | |
23 | # "\+ | # "+ | |
24 | # \([.\/]? | # ( and optional . or slash | |
25 | # [[\$\\%?!-"] | # [ $ \ % ! " or - | |
26 | # \*{1,2} | # * or ** | |
27 | # \^{1,2}\/? # ^ or ^^ and optional slash | |
28 | # )/x; | |
29 | # $pat[5] = qr/(?: []\$\\%)?!"] | # ] $ \ % ) ? " or ! ) | |
30 | # \*{1,2} | # * or ** ) optional | |
31 | # \^{1,2})? # ^ or ^^ ) | |
32 | # \\/x; # then \ | |
33 | # $pat[6] = qr/(?<!@)::/; | |
34 | # $pat[7] = qr/\sR[FS]\b/; | |
35 | # $pat[8] = qr/``/; | |
36 | # $pat[9] = qr/''/; | |
37 | # $pat[10] = qr/`/; | |
38 | # $pat[11] = qr/'/; | |
39 | # $pat[12] = qr/,/; | |
40 | # $pat[13] = qr/\(e?s\)/; | |
41 | ||
42 | ||
43 | # Other patterns | |
44 | ||
45 | # $keysplit = qr/^(.*?)(\|\|.*?)?\s(R[AZ])?\s?(\d+)$/; | |
46 | ||
47 | $keysplit = qr/^(.*?)(\@\|\@\|.*?)?\s(R[AZ])?\s?(\d+)$/; | |
48 | ||
49 | ||
50 | # The sort function | |
51 | ||
52 | sub cf { | |
53 | my($x,$y) = ($a,$b); | |
54 | ||
55 | ############old############# | |
56 | #foreach $pattern (@pat) # Remove strings by pattern | |
57 | # { | |
58 | # $x =~ s/$pattern//g; | |
59 | # $y =~ s/$pattern//g; | |
60 | # } | |
61 | ########################## | |
62 | ||
63 | ||
64 | # Turn || into @|@| | |
65 | ||
66 | $x =~ s/\|\|/@|@|/g; | |
67 | $y =~ s/\|\|/@|@|/g; | |
68 | ||
69 | # Remove all special characters, except those preceded by @ | |
70 | ||
71 | $x =~ s/(?<!\@)[^\w\@\s]//g; | |
72 | $y =~ s/(?<!\@)[^\w\@\s]//g; | |
73 | ||
74 | # Remove the escaping @s | |
75 | ||
76 | #$x =~ s/\@(.)/$1/g; | |
77 | #$y =~ s/\@(.)/$1/g; | |
78 | ||
79 | ||
80 | ||
81 | ################old ######################## | |
82 | #$x =~ s/:(\w+):/$1/g; # :fail: etc => fail | |
83 | #$y =~ s/:(\w+):/$1/g; | |
84 | ||
85 | #$x =~ s/^\@[^a-z]+/\@/i; # Make keys starting with @ | |
86 | #$y =~ s/^\@[^a-z]+/\@/i; # sort on @ followed by the first letter | |
87 | ##############################################3 | |
88 | ||
89 | ||
90 | $x =~ s/\@_/\x7f/g; # Make underscore sort late (option names) | |
91 | $y =~ s/\@_/\x7f/g; | |
92 | ||
93 | # Split up to sort on individual parts | |
94 | ||
95 | my($xp,$xs,$xr,$xn) = $x =~ /$keysplit/; | |
96 | my($yp,$ys,$yr,$yn) = $y =~ /$keysplit/; | |
97 | ||
98 | $xr = "" if !defined $xr; | |
99 | $yr = "" if !defined $yr; | |
100 | ||
101 | $xs = "" if !defined $xs; | |
102 | $ys = "" if !defined $ys; | |
103 | ||
104 | if ($show_keys) | |
105 | { | |
106 | print "a=$a\n x=$x\n xp=$xp\n xs=$xs\n xr=$xr\n xn=$xn\n"; | |
107 | print "b=$b\n y=$y\n yp=$yp\n ys=$ys\n yr=$yr\n yn=$yn\n"; | |
108 | } | |
109 | ||
110 | my ($c) = "\L$xp" cmp "\L$yp"; # Caseless, primary text only | |
111 | $c = $xp cmp $yp if $c == 0; # Caseful, primary text only | |
112 | $c = "\L$xs" cmp "\L$ys" if $c == 0; # Caseless, secondary text only | |
113 | $c = $xs cmp $ys if $c == 0; # Caseful, secondary text only | |
114 | $c = $xn <=> $yn if $c == 0; # Compare the numbers | |
115 | $c = $xr cmp $yr if $c == 0; # Sort RA before RZ | |
116 | return $c; | |
117 | } | |
118 | ||
119 | ||
120 | ||
121 | ############################################################################## | |
122 | # Function for getting the next line from the @lines vector, using the global | |
123 | # index $1. If the next pair of lines specifies a range of pages, combine them. | |
124 | # That's why $linenumber has to be global - so we can increment it. If there's | |
125 | # a range error, return "". | |
126 | ||
127 | sub getnextentry { | |
128 | my($line) = $lines[$linenumber]; | |
129 | my($aa,$zz,$tline,$nextline,$tnextline); | |
130 | ||
131 | if ($line =~ / RA (\d+)/) | |
132 | { | |
133 | $aa = $1; | |
134 | $nextline = $lines[++$linenumber]; | |
135 | if ($nextline =~ / RZ (\d+)/) | |
136 | { | |
137 | $zz = $1; | |
138 | } | |
139 | else | |
140 | { | |
141 | print STDERR "** Bad range data (1)\n"; | |
142 | print STDERR " $line\n"; | |
143 | print STDERR " $nextline\n"; | |
144 | return ""; | |
145 | } | |
146 | ||
147 | $tline = $line; | |
148 | $tnextline = $nextline; | |
149 | ||
150 | $tline =~ s/ RA \d+//; | |
151 | $tnextline =~ s/ RZ \d+//; | |
152 | ||
153 | if ($tline ne $tnextline) | |
154 | { | |
155 | print STDERR "** Bad range data (2)\n"; | |
156 | print STDERR " $line\n"; | |
157 | print STDERR " $nextline\n"; | |
158 | return ""; | |
159 | } | |
160 | ||
161 | $line = ($aa eq $zz)? "$tline $aa" : "$tline $aa--$zz"; | |
162 | } | |
163 | ||
164 | elsif ($line =~ / RZ (\d+)/) | |
165 | { | |
166 | print STDERR "** Bad range data (RZ without RA)\n"; | |
167 | print STDERR " $line\n"; | |
168 | return ""; | |
169 | } | |
170 | ||
171 | return $line | |
172 | } | |
173 | ||
174 | ||
175 | ||
176 | ||
177 | ############################################################################## | |
178 | # Function for outputting a line, checking for the current primary | |
179 | # and indenting a bit for secondaries. We also need a newpar | |
180 | # before each item, because the main indent is set to a largish indent | |
181 | # for long reference lists, but the parindent is set to counter this. | |
182 | # This is where we handle the break between letters. We know that any non- | |
183 | # alphamerics at the start of lines are markup, except for @. A reference | |
184 | # value of 99999 is for the "see also" lines. Suppress it. | |
185 | ||
186 | sub outline { | |
187 | my($text,$ref) = ($_[0],$_[1]); | |
188 | my ($letter) = $text =~ /^[^A-Za-z0-9\@]*(.)/; | |
189 | ||
190 | return if $text =~ /^\s*$/; | |
191 | ||
192 | if ($ref eq "99999") # dummy for see also | |
193 | { | |
194 | $ref = "" | |
195 | } | |
196 | else | |
197 | { | |
198 | $ref = "#$ref"; # prepend space | |
199 | } | |
200 | ||
201 | if ($letter =~ /\d/) { $letter = "0"; } else { $letter = "\U$letter"; } | |
202 | ||
203 | print OUT ".newpar\n"; | |
204 | ||
205 | if ($letter ne $currentletter && $letter ge "A") | |
206 | { | |
207 | print OUT ".newletter\n"; | |
208 | $currentletter = $letter; | |
209 | } | |
210 | ||
211 | $text =~ s/\@'/\$'/g; # Turns @' into $' so that it prints a non-curly quote | |
212 | ||
213 | if ($text =~ /^(.+)\|\|(.*)$/) | |
214 | { | |
215 | my($primary,$secondary) = ($1,$2); | |
216 | ||
217 | if ($primary ne $lastprimary) | |
218 | { | |
219 | print OUT ".primary $primary\n"; | |
220 | $lastprimary = $primary; | |
221 | } | |
222 | ||
223 | $primary =~ s/"/""/g; | |
224 | $secondary =~ s/"/""/g; | |
225 | ||
226 | my($contprim) = $primary; | |
227 | $contprim =~ s/ \(\\\*see also\*\\[^)]+\)//; | |
228 | ||
229 | print OUT ".secondary \"$primary\" \"$secondary$ref\" \"$contprim\"\n"; | |
230 | } | |
231 | ||
232 | # Not a two-part item; insert @ if the first char is a dot | |
233 | ||
234 | else | |
235 | { | |
236 | print OUT "@" if $text =~ /^\./; | |
237 | print OUT "$text$ref\n"; | |
238 | $lastprimary = $text; | |
239 | } | |
240 | } | |
241 | ||
242 | ||
243 | ||
244 | ||
245 | ||
246 | ############################################################################## | |
247 | # The main script | |
248 | ||
249 | $save_sorted = 0; | |
250 | $test_index = 0; | |
251 | $show_keys = 0; | |
252 | ||
253 | while (@ARGV > 0) | |
254 | { | |
255 | my($arg) = shift @ARGV; | |
256 | if ($arg eq "-k") { $show_keys = 1; } | |
257 | elsif ($arg eq "-s") { $save_sorted = 1; } | |
258 | elsif ($arg eq "-t") { $test_index = $save_sorted = 1; } | |
259 | else { die "Unknown option $arg\n"; } | |
260 | } | |
261 | ||
262 | if ($test_index) | |
263 | { | |
264 | open(IN, "z-testindex") || die "Can't open z-testindex\n"; | |
265 | } | |
266 | else | |
267 | { | |
268 | open(IN, "z-rawindex") || die "Can't open z-rawindex\n"; | |
269 | } | |
270 | ||
271 | open(OUT, ">z-index") || die "Can't open z-index\n"; | |
272 | ||
273 | # Extract index lines ($e lines are contents). Until we hit the first | |
274 | # $e line, we are dealing with "see also" index lines, for which we want | |
275 | # to turn the line number into 99999. | |
276 | ||
277 | $#lines = -1; | |
278 | $prestuff = 1; | |
279 | ||
280 | while (<IN>) | |
281 | { | |
282 | s/\n$//; | |
283 | if (/\$e/) | |
284 | { | |
285 | $prestuff = 0; | |
286 | } | |
287 | else | |
288 | { | |
289 | s/(\D)$/$1 99999/ if $prestuff; # No number in "see also" | |
290 | push(@lines, $_); | |
291 | } | |
292 | $index_pagenumber = $1 if /^Index\$e(\d+)/; | |
293 | } | |
294 | close(IN); | |
295 | ||
296 | # Sort, ignoring markup | |
297 | ||
298 | print STDERR "Sorting ...\n"; | |
299 | @lines = sort cf @lines; | |
300 | ||
301 | # Keep a copy of the sorted data, for reference | |
302 | ||
303 | if ($save_sorted) | |
304 | { | |
305 | open(X, ">z-indexsorted") || die "Can't open z-indexsorted\n"; | |
306 | foreach $line (@lines) | |
307 | { | |
308 | print X "$line\n"; | |
309 | } | |
310 | close(X); | |
311 | } | |
312 | ||
313 | # Heading for the index file | |
314 | ||
315 | print OUT <<"EOF"; | |
316 | .library "a4ps" | |
317 | .linelength ~~sys.linelength + 16.0 | |
318 | ||
319 | .include "markup.sg" | |
320 | ||
321 | .indent 3em | |
322 | .parspace 0 | |
323 | .parindent -3em | |
324 | .justify left | |
325 | . | |
326 | .foot | |
327 | \$c [~~sys.pagenumber] | |
328 | .endfoot | |
329 | . | |
330 | .cancelflag # | |
331 | .flag # "\$S*1" | |
332 | .set INDEX true | |
333 | . | |
334 | .macro primary "text" | |
335 | .if ~~sys.leftonpage < 2ld | |
336 | .newcolumn | |
337 | .fi | |
338 | ~~1 | |
339 | .newpar | |
340 | .endm | |
341 | . | |
342 | .macro secondary "prim" "sec" "contprim" | |
343 | .if ~~sys.leftonpage < 1ld | |
344 | .newcolumn | |
345 | .newpar | |
346 | ~~3 \$it\{(continued)\} | |
347 | .newpar | |
348 | .fi | |
349 | ##~~2 | |
350 | .endm | |
351 | . | |
352 | .macro newletter | |
353 | .if ~~sys.leftonpage < 4ld | |
354 | .newcolumn | |
355 | .else | |
356 | .space 1ld | |
357 | .fi | |
358 | .newpar | |
359 | .endm | |
360 | . | |
361 | .set chapter -1 | |
362 | .page $index_pagenumber | |
363 | .chapter Index | |
364 | .columns 2 | |
365 | .newpar | |
366 | . | |
367 | EOF | |
368 | ||
369 | # Process the lines and output the result. | |
370 | # Note that $linenumber is global, and is changed by getnextentry() for | |
371 | # pairs of lines that represent ranges. | |
372 | ||
373 | $lastprimary = ""; | |
374 | $lastref = ""; | |
375 | $currenttext = $currentref = ""; | |
376 | $currentletter = ""; | |
377 | $badrange = 0; | |
378 | ||
379 | print STDERR "Processing ...\n"; | |
380 | ||
381 | for ($linenumber = 0; $linenumber < @lines; $linenumber++) | |
382 | { | |
383 | $line = &getnextentry(); | |
384 | ||
385 | if ($line eq "") # Bad range data - but carry on to get all of it | |
386 | { | |
387 | $badrange = 1; | |
388 | next; | |
389 | } | |
390 | ||
391 | # Split off the text and reference | |
392 | ||
393 | ($text,$ref) = $line =~ /^(.*)\s+([\d-]+)$/; | |
394 | ||
395 | # If same as current text, just add the new reference, unless its a duplicate | |
396 | ||
397 | if ($text eq $currenttext) | |
398 | { | |
399 | if ($ref ne $lastref) | |
400 | { | |
401 | $currentref .= ", $ref"; | |
402 | $lastref = $ref; | |
403 | } | |
404 | next; | |
405 | } | |
406 | ||
407 | # Not the same as the current text. Output the current text, then | |
408 | # set up a new current. | |
409 | ||
410 | &outline($currenttext, $currentref); | |
411 | ||
412 | $currenttext = $text; | |
413 | $currentref = $lastref = $ref; | |
414 | } | |
415 | ||
416 | # Output the final line and close the file | |
417 | ||
418 | &outline($currenttext, $currentref); | |
419 | close(OUT); | |
420 | ||
421 | die "** Aborted\n" if $badrange; | |
422 | ||
423 | # Format the index | |
424 | ||
425 | system("sgcal z-index -to zi-gcode -index /dev/null"); | |
426 | system("sgtops zi-gcode -to zi-ps"); | |
427 | print "PostScript in zi-ps\n"; | |
428 | ||
429 | # End |