Two minor corrections in the BMI section
[exim.git] / doc / doc-scripts / DoIndex
CommitLineData
495ae4b0
PH
1#! /usr/bin/perl -w
2# $Cambridge: exim/doc/doc-scripts/DoIndex,v 1.1 2004/10/07 15:04:35 ph10 Exp $
3
4# Script for producing the Index for the Exim manual from the output of the
5# SGCAL run. This is copied from the script for the Exim book.
6
7
8##############################################################################
9# Patterns for matching things to be removed from the sort keys
10
11# This was copied from the Exim book processor, but we have now found a
12# better way of doing this. Leave the code until I am quite sure...
13
14# $pat[0] = qr/ \(\\\*see also\*\\[^)]+\)/;
15# $pat[1] = qr/(?<!@)\/\//; # //
16# $pat[2] = qr/(?<!@)\/\\/; # /\
17# $pat[3] = qr/(?<!@)\\\//; # \/
18# $pat[4] = qr/(?<!@) \\ # non-@ \, followed by one of
19# (?:
20# [\.\/] | # dot or slash
21# !- | # !-
22# !\+ | # !+
23# !\. | # !.
24# "\+ | # "+
25# \([.\/]? | # ( and optional . or slash
26# [[\$\\%?!-"] | # [ $ \ % ! " or -
27# \*{1,2} | # * or **
28# \^{1,2}\/? # ^ or ^^ and optional slash
29# )/x;
30# $pat[5] = qr/(?: []\$\\%)?!"] | # ] $ \ % ) ? " or ! )
31# \*{1,2} | # * or ** ) optional
32# \^{1,2})? # ^ or ^^ )
33# \\/x; # then \
34# $pat[6] = qr/(?<!@)::/;
35# $pat[7] = qr/\sR[FS]\b/;
36# $pat[8] = qr/``/;
37# $pat[9] = qr/''/;
38# $pat[10] = qr/`/;
39# $pat[11] = qr/'/;
40# $pat[12] = qr/,/;
41# $pat[13] = qr/\(e?s\)/;
42
43
44# Other patterns
45
46# $keysplit = qr/^(.*?)(\|\|.*?)?\s(R[AZ])?\s?(\d+)$/;
47
48$keysplit = qr/^(.*?)(\@\|\@\|.*?)?\s(R[AZ])?\s?(\d+)$/;
49
50
51# The sort function
52
53sub cf {
54my($x,$y) = ($a,$b);
55
56############old#############
57#foreach $pattern (@pat) # Remove strings by pattern
58# {
59# $x =~ s/$pattern//g;
60# $y =~ s/$pattern//g;
61# }
62##########################
63
64
65# Turn || into @|@|
66
67$x =~ s/\|\|/@|@|/g;
68$y =~ s/\|\|/@|@|/g;
69
70# Remove all special characters, except those preceded by @
71
72$x =~ s/(?<!\@)[^\w\@\s]//g;
73$y =~ s/(?<!\@)[^\w\@\s]//g;
74
75# Remove the escaping @s
76
77#$x =~ s/\@(.)/$1/g;
78#$y =~ s/\@(.)/$1/g;
79
80
81
82################old ########################
83#$x =~ s/:(\w+):/$1/g; # :fail: etc => fail
84#$y =~ s/:(\w+):/$1/g;
85
86#$x =~ s/^\@[^a-z]+/\@/i; # Make keys starting with @
87#$y =~ s/^\@[^a-z]+/\@/i; # sort on @ followed by the first letter
88##############################################3
89
90
91$x =~ s/\@_/\x7f/g; # Make underscore sort late (option names)
92$y =~ s/\@_/\x7f/g;
93
94# Split up to sort on individual parts
95
96my($xp,$xs,$xr,$xn) = $x =~ /$keysplit/;
97my($yp,$ys,$yr,$yn) = $y =~ /$keysplit/;
98
99$xr = "" if !defined $xr;
100$yr = "" if !defined $yr;
101
102$xs = "" if !defined $xs;
103$ys = "" if !defined $ys;
104
105if ($show_keys)
106 {
107 print "a=$a\n x=$x\n xp=$xp\n xs=$xs\n xr=$xr\n xn=$xn\n";
108 print "b=$b\n y=$y\n yp=$yp\n ys=$ys\n yr=$yr\n yn=$yn\n";
109 }
110
111my ($c) = "\L$xp" cmp "\L$yp"; # Caseless, primary text only
112$c = $xp cmp $yp if $c == 0; # Caseful, primary text only
113$c = "\L$xs" cmp "\L$ys" if $c == 0; # Caseless, secondary text only
114$c = $xs cmp $ys if $c == 0; # Caseful, secondary text only
115$c = $xn <=> $yn if $c == 0; # Compare the numbers
116$c = $xr cmp $yr if $c == 0; # Sort RA before RZ
117return $c;
118}
119
120
121
122##############################################################################
123# Function for getting the next line from the @lines vector, using the global
124# index $1. If the next pair of lines specifies a range of pages, combine them.
125# That's why $linenumber has to be global - so we can increment it. If there's
126# a range error, return "".
127
128sub getnextentry {
129my($line) = $lines[$linenumber];
130my($aa,$zz,$tline,$nextline,$tnextline);
131
132if ($line =~ / RA (\d+)/)
133 {
134 $aa = $1;
135 $nextline = $lines[++$linenumber];
136 if ($nextline =~ / RZ (\d+)/)
137 {
138 $zz = $1;
139 }
140 else
141 {
142 print STDERR "** Bad range data (1)\n";
143 print STDERR " $line\n";
144 print STDERR " $nextline\n";
145 return "";
146 }
147
148 $tline = $line;
149 $tnextline = $nextline;
150
151 $tline =~ s/ RA \d+//;
152 $tnextline =~ s/ RZ \d+//;
153
154 if ($tline ne $tnextline)
155 {
156 print STDERR "** Bad range data (2)\n";
157 print STDERR " $line\n";
158 print STDERR " $nextline\n";
159 return "";
160 }
161
162 $line = ($aa eq $zz)? "$tline $aa" : "$tline $aa--$zz";
163 }
164
165elsif ($line =~ / RZ (\d+)/)
166 {
167 print STDERR "** Bad range data (RZ without RA)\n";
168 print STDERR " $line\n";
169 return "";
170 }
171
172return $line
173}
174
175
176
177
178##############################################################################
179# Function for outputting a line, checking for the current primary
180# and indenting a bit for secondaries. We also need a newpar
181# before each item, because the main indent is set to a largish indent
182# for long reference lists, but the parindent is set to counter this.
183# This is where we handle the break between letters. We know that any non-
184# alphamerics at the start of lines are markup, except for @. A reference
185# value of 99999 is for the "see also" lines. Suppress it.
186
187sub outline {
188my($text,$ref) = ($_[0],$_[1]);
189my ($letter) = $text =~ /^[^A-Za-z0-9\@]*(.)/;
190
191return if $text =~ /^\s*$/;
192
193if ($ref eq "99999") # dummy for see also
194 {
195 $ref = ""
196 }
197else
198 {
199 $ref = "#$ref"; # prepend space
200 }
201
202if ($letter =~ /\d/) { $letter = "0"; } else { $letter = "\U$letter"; }
203
204print OUT ".newpar\n";
205
206if ($letter ne $currentletter && $letter ge "A")
207 {
208 print OUT ".newletter\n";
209 $currentletter = $letter;
210 }
211
212$text =~ s/\@'/\$'/g; # Turns @' into $' so that it prints a non-curly quote
213
214if ($text =~ /^(.+)\|\|(.*)$/)
215 {
216 my($primary,$secondary) = ($1,$2);
217
218 if ($primary ne $lastprimary)
219 {
220 print OUT ".primary $primary\n";
221 $lastprimary = $primary;
222 }
223
224 $primary =~ s/"/""/g;
225 $secondary =~ s/"/""/g;
226
227 my($contprim) = $primary;
228 $contprim =~ s/ \(\\\*see also\*\\[^)]+\)//;
229
230 print OUT ".secondary \"$primary\" \"$secondary$ref\" \"$contprim\"\n";
231 }
232
233# Not a two-part item; insert @ if the first char is a dot
234
235else
236 {
237 print OUT "@" if $text =~ /^\./;
238 print OUT "$text$ref\n";
239 $lastprimary = $text;
240 }
241}
242
243
244
245
246
247##############################################################################
248# The main script
249
250$save_sorted = 0;
251$test_index = 0;
252$show_keys = 0;
253
254while (@ARGV > 0)
255 {
256 my($arg) = shift @ARGV;
257 if ($arg eq "-k") { $show_keys = 1; }
258 elsif ($arg eq "-s") { $save_sorted = 1; }
259 elsif ($arg eq "-t") { $test_index = $save_sorted = 1; }
260 else { die "Unknown option $arg\n"; }
261 }
262
263if ($test_index)
264 {
265 open(IN, "z-testindex") || die "Can't open z-testindex\n";
266 }
267else
268 {
269 open(IN, "z-rawindex") || die "Can't open z-rawindex\n";
270 }
271
272open(OUT, ">z-index") || die "Can't open z-index\n";
273
274# Extract index lines ($e lines are contents). Until we hit the first
275# $e line, we are dealing with "see also" index lines, for which we want
276# to turn the line number into 99999.
277
278$#lines = -1;
279$prestuff = 1;
280
281while (<IN>)
282 {
283 s/\n$//;
284 if (/\$e/)
285 {
286 $prestuff = 0;
287 }
288 else
289 {
290 s/(\D)$/$1 99999/ if $prestuff; # No number in "see also"
291 push(@lines, $_);
292 }
293 $index_pagenumber = $1 if /^Index\$e(\d+)/;
294 }
295close(IN);
296
297# Sort, ignoring markup
298
299print STDERR "Sorting ...\n";
300@lines = sort cf @lines;
301
302# Keep a copy of the sorted data, for reference
303
304if ($save_sorted)
305 {
306 open(X, ">z-indexsorted") || die "Can't open z-indexsorted\n";
307 foreach $line (@lines)
308 {
309 print X "$line\n";
310 }
311 close(X);
312 }
313
314# Heading for the index file
315
316print OUT <<"EOF";
317.library "a4ps"
318.linelength ~~sys.linelength + 16.0
319
320.include "markup.sg"
321
322.indent 3em
323.parspace 0
324.parindent -3em
325.justify left
326.
327.foot
328\$c [~~sys.pagenumber]
329.endfoot
330.
331.cancelflag #
332.flag # "\$S*1"
333.set INDEX true
334.
335.macro primary "text"
336.if ~~sys.leftonpage < 2ld
337.newcolumn
338.fi
339~~1
340.newpar
341.endm
342.
343.macro secondary "prim" "sec" "contprim"
344.if ~~sys.leftonpage < 1ld
345.newcolumn
346.newpar
347~~3 \$it\{(continued)\}
348.newpar
349.fi
350##~~2
351.endm
352.
353.macro newletter
354.if ~~sys.leftonpage < 4ld
355.newcolumn
356.else
357.space 1ld
358.fi
359.newpar
360.endm
361.
362.set chapter -1
363.page $index_pagenumber
364.chapter Index
365.columns 2
366.newpar
367.
368EOF
369
370# Process the lines and output the result.
371# Note that $linenumber is global, and is changed by getnextentry() for
372# pairs of lines that represent ranges.
373
374$lastprimary = "";
375$lastref = "";
376$currenttext = $currentref = "";
377$currentletter = "";
378$badrange = 0;
379
380print STDERR "Processing ...\n";
381
382for ($linenumber = 0; $linenumber < @lines; $linenumber++)
383 {
384 $line = &getnextentry();
385
386 if ($line eq "") # Bad range data - but carry on to get all of it
387 {
388 $badrange = 1;
389 next;
390 }
391
392 # Split off the text and reference
393
394 ($text,$ref) = $line =~ /^(.*)\s+([\d-]+)$/;
395
396 # If same as current text, just add the new reference, unless its a duplicate
397
398 if ($text eq $currenttext)
399 {
400 if ($ref ne $lastref)
401 {
402 $currentref .= ", $ref";
403 $lastref = $ref;
404 }
405 next;
406 }
407
408 # Not the same as the current text. Output the current text, then
409 # set up a new current.
410
411 &outline($currenttext, $currentref);
412
413 $currenttext = $text;
414 $currentref = $lastref = $ref;
415 }
416
417# Output the final line and close the file
418
419&outline($currenttext, $currentref);
420close(OUT);
421
422die "** Aborted\n" if $badrange;
423
424# Format the index
425
426system("sgcal z-index -to zi-gcode -index /dev/null");
427system("sgtops zi-gcode -to zi-ps");
428print "PostScript in zi-ps\n";
429
430# End