Bail out if a configuration file starts with a byte order mark
[exim.git] / doc / doc-scripts / DoIndex
CommitLineData
495ae4b0 1#! /usr/bin/perl -w
495ae4b0
PH
2
3# Script for producing the Index for the Exim manual from the output of the
4# SGCAL run. This is copied from the script for the Exim book.
5
6
7##############################################################################
8# Patterns for matching things to be removed from the sort keys
9
10# This was copied from the Exim book processor, but we have now found a
11# better way of doing this. Leave the code until I am quite sure...
12
13# $pat[0] = qr/ \(\\\*see also\*\\[^)]+\)/;
14# $pat[1] = qr/(?<!@)\/\//; # //
15# $pat[2] = qr/(?<!@)\/\\/; # /\
16# $pat[3] = qr/(?<!@)\\\//; # \/
17# $pat[4] = qr/(?<!@) \\ # non-@ \, followed by one of
18# (?:
19# [\.\/] | # dot or slash
20# !- | # !-
21# !\+ | # !+
22# !\. | # !.
23# "\+ | # "+
24# \([.\/]? | # ( and optional . or slash
25# [[\$\\%?!-"] | # [ $ \ % ! " or -
26# \*{1,2} | # * or **
27# \^{1,2}\/? # ^ or ^^ and optional slash
28# )/x;
29# $pat[5] = qr/(?: []\$\\%)?!"] | # ] $ \ % ) ? " or ! )
30# \*{1,2} | # * or ** ) optional
31# \^{1,2})? # ^ or ^^ )
32# \\/x; # then \
33# $pat[6] = qr/(?<!@)::/;
34# $pat[7] = qr/\sR[FS]\b/;
35# $pat[8] = qr/``/;
36# $pat[9] = qr/''/;
37# $pat[10] = qr/`/;
38# $pat[11] = qr/'/;
39# $pat[12] = qr/,/;
40# $pat[13] = qr/\(e?s\)/;
41
42
43# Other patterns
44
45# $keysplit = qr/^(.*?)(\|\|.*?)?\s(R[AZ])?\s?(\d+)$/;
46
47$keysplit = qr/^(.*?)(\@\|\@\|.*?)?\s(R[AZ])?\s?(\d+)$/;
48
49
50# The sort function
51
52sub cf {
53my($x,$y) = ($a,$b);
54
55############old#############
56#foreach $pattern (@pat) # Remove strings by pattern
57# {
58# $x =~ s/$pattern//g;
59# $y =~ s/$pattern//g;
60# }
61##########################
62
63
64# Turn || into @|@|
65
66$x =~ s/\|\|/@|@|/g;
67$y =~ s/\|\|/@|@|/g;
68
69# Remove all special characters, except those preceded by @
70
71$x =~ s/(?<!\@)[^\w\@\s]//g;
72$y =~ s/(?<!\@)[^\w\@\s]//g;
73
74# Remove the escaping @s
75
76#$x =~ s/\@(.)/$1/g;
77#$y =~ s/\@(.)/$1/g;
78
79
80
81################old ########################
82#$x =~ s/:(\w+):/$1/g; # :fail: etc => fail
83#$y =~ s/:(\w+):/$1/g;
84
85#$x =~ s/^\@[^a-z]+/\@/i; # Make keys starting with @
86#$y =~ s/^\@[^a-z]+/\@/i; # sort on @ followed by the first letter
87##############################################3
88
89
90$x =~ s/\@_/\x7f/g; # Make underscore sort late (option names)
91$y =~ s/\@_/\x7f/g;
92
93# Split up to sort on individual parts
94
95my($xp,$xs,$xr,$xn) = $x =~ /$keysplit/;
96my($yp,$ys,$yr,$yn) = $y =~ /$keysplit/;
97
98$xr = "" if !defined $xr;
99$yr = "" if !defined $yr;
100
101$xs = "" if !defined $xs;
102$ys = "" if !defined $ys;
103
104if ($show_keys)
105 {
106 print "a=$a\n x=$x\n xp=$xp\n xs=$xs\n xr=$xr\n xn=$xn\n";
107 print "b=$b\n y=$y\n yp=$yp\n ys=$ys\n yr=$yr\n yn=$yn\n";
108 }
109
110my ($c) = "\L$xp" cmp "\L$yp"; # Caseless, primary text only
111$c = $xp cmp $yp if $c == 0; # Caseful, primary text only
112$c = "\L$xs" cmp "\L$ys" if $c == 0; # Caseless, secondary text only
113$c = $xs cmp $ys if $c == 0; # Caseful, secondary text only
114$c = $xn <=> $yn if $c == 0; # Compare the numbers
115$c = $xr cmp $yr if $c == 0; # Sort RA before RZ
116return $c;
117}
118
119
120
121##############################################################################
122# Function for getting the next line from the @lines vector, using the global
123# index $1. If the next pair of lines specifies a range of pages, combine them.
124# That's why $linenumber has to be global - so we can increment it. If there's
125# a range error, return "".
126
127sub getnextentry {
128my($line) = $lines[$linenumber];
129my($aa,$zz,$tline,$nextline,$tnextline);
130
131if ($line =~ / RA (\d+)/)
132 {
133 $aa = $1;
134 $nextline = $lines[++$linenumber];
135 if ($nextline =~ / RZ (\d+)/)
136 {
137 $zz = $1;
138 }
139 else
140 {
141 print STDERR "** Bad range data (1)\n";
142 print STDERR " $line\n";
143 print STDERR " $nextline\n";
144 return "";
145 }
146
147 $tline = $line;
148 $tnextline = $nextline;
149
150 $tline =~ s/ RA \d+//;
151 $tnextline =~ s/ RZ \d+//;
152
153 if ($tline ne $tnextline)
154 {
155 print STDERR "** Bad range data (2)\n";
156 print STDERR " $line\n";
157 print STDERR " $nextline\n";
158 return "";
159 }
160
161 $line = ($aa eq $zz)? "$tline $aa" : "$tline $aa--$zz";
162 }
163
164elsif ($line =~ / RZ (\d+)/)
165 {
166 print STDERR "** Bad range data (RZ without RA)\n";
167 print STDERR " $line\n";
168 return "";
169 }
170
171return $line
172}
173
174
175
176
177##############################################################################
178# Function for outputting a line, checking for the current primary
179# and indenting a bit for secondaries. We also need a newpar
180# before each item, because the main indent is set to a largish indent
181# for long reference lists, but the parindent is set to counter this.
182# This is where we handle the break between letters. We know that any non-
183# alphamerics at the start of lines are markup, except for @. A reference
184# value of 99999 is for the "see also" lines. Suppress it.
185
186sub outline {
187my($text,$ref) = ($_[0],$_[1]);
188my ($letter) = $text =~ /^[^A-Za-z0-9\@]*(.)/;
189
190return if $text =~ /^\s*$/;
191
192if ($ref eq "99999") # dummy for see also
193 {
194 $ref = ""
195 }
196else
197 {
198 $ref = "#$ref"; # prepend space
199 }
200
201if ($letter =~ /\d/) { $letter = "0"; } else { $letter = "\U$letter"; }
202
203print OUT ".newpar\n";
204
205if ($letter ne $currentletter && $letter ge "A")
206 {
207 print OUT ".newletter\n";
208 $currentletter = $letter;
209 }
210
211$text =~ s/\@'/\$'/g; # Turns @' into $' so that it prints a non-curly quote
212
213if ($text =~ /^(.+)\|\|(.*)$/)
214 {
215 my($primary,$secondary) = ($1,$2);
216
217 if ($primary ne $lastprimary)
218 {
219 print OUT ".primary $primary\n";
220 $lastprimary = $primary;
221 }
222
223 $primary =~ s/"/""/g;
224 $secondary =~ s/"/""/g;
225
226 my($contprim) = $primary;
227 $contprim =~ s/ \(\\\*see also\*\\[^)]+\)//;
228
229 print OUT ".secondary \"$primary\" \"$secondary$ref\" \"$contprim\"\n";
230 }
231
232# Not a two-part item; insert @ if the first char is a dot
233
234else
235 {
236 print OUT "@" if $text =~ /^\./;
237 print OUT "$text$ref\n";
238 $lastprimary = $text;
239 }
240}
241
242
243
244
245
246##############################################################################
247# The main script
248
249$save_sorted = 0;
250$test_index = 0;
251$show_keys = 0;
252
253while (@ARGV > 0)
254 {
255 my($arg) = shift @ARGV;
256 if ($arg eq "-k") { $show_keys = 1; }
257 elsif ($arg eq "-s") { $save_sorted = 1; }
258 elsif ($arg eq "-t") { $test_index = $save_sorted = 1; }
259 else { die "Unknown option $arg\n"; }
260 }
261
262if ($test_index)
263 {
264 open(IN, "z-testindex") || die "Can't open z-testindex\n";
265 }
266else
267 {
268 open(IN, "z-rawindex") || die "Can't open z-rawindex\n";
269 }
270
271open(OUT, ">z-index") || die "Can't open z-index\n";
272
273# Extract index lines ($e lines are contents). Until we hit the first
274# $e line, we are dealing with "see also" index lines, for which we want
275# to turn the line number into 99999.
276
277$#lines = -1;
278$prestuff = 1;
279
280while (<IN>)
281 {
282 s/\n$//;
283 if (/\$e/)
284 {
285 $prestuff = 0;
286 }
287 else
288 {
289 s/(\D)$/$1 99999/ if $prestuff; # No number in "see also"
290 push(@lines, $_);
291 }
292 $index_pagenumber = $1 if /^Index\$e(\d+)/;
293 }
294close(IN);
295
296# Sort, ignoring markup
297
298print STDERR "Sorting ...\n";
299@lines = sort cf @lines;
300
301# Keep a copy of the sorted data, for reference
302
303if ($save_sorted)
304 {
305 open(X, ">z-indexsorted") || die "Can't open z-indexsorted\n";
306 foreach $line (@lines)
307 {
308 print X "$line\n";
309 }
310 close(X);
311 }
312
313# Heading for the index file
314
315print OUT <<"EOF";
316.library "a4ps"
317.linelength ~~sys.linelength + 16.0
318
319.include "markup.sg"
320
321.indent 3em
322.parspace 0
323.parindent -3em
324.justify left
325.
326.foot
327\$c [~~sys.pagenumber]
328.endfoot
329.
330.cancelflag #
331.flag # "\$S*1"
332.set INDEX true
333.
334.macro primary "text"
335.if ~~sys.leftonpage < 2ld
336.newcolumn
337.fi
338~~1
339.newpar
340.endm
341.
342.macro secondary "prim" "sec" "contprim"
343.if ~~sys.leftonpage < 1ld
344.newcolumn
345.newpar
346~~3 \$it\{(continued)\}
347.newpar
348.fi
349##~~2
350.endm
351.
352.macro newletter
353.if ~~sys.leftonpage < 4ld
354.newcolumn
355.else
356.space 1ld
357.fi
358.newpar
359.endm
360.
361.set chapter -1
362.page $index_pagenumber
363.chapter Index
364.columns 2
365.newpar
366.
367EOF
368
369# Process the lines and output the result.
370# Note that $linenumber is global, and is changed by getnextentry() for
371# pairs of lines that represent ranges.
372
373$lastprimary = "";
374$lastref = "";
375$currenttext = $currentref = "";
376$currentletter = "";
377$badrange = 0;
378
379print STDERR "Processing ...\n";
380
381for ($linenumber = 0; $linenumber < @lines; $linenumber++)
382 {
383 $line = &getnextentry();
384
385 if ($line eq "") # Bad range data - but carry on to get all of it
386 {
387 $badrange = 1;
388 next;
389 }
390
391 # Split off the text and reference
392
393 ($text,$ref) = $line =~ /^(.*)\s+([\d-]+)$/;
394
395 # If same as current text, just add the new reference, unless its a duplicate
396
397 if ($text eq $currenttext)
398 {
399 if ($ref ne $lastref)
400 {
401 $currentref .= ", $ref";
402 $lastref = $ref;
403 }
404 next;
405 }
406
407 # Not the same as the current text. Output the current text, then
408 # set up a new current.
409
410 &outline($currenttext, $currentref);
411
412 $currenttext = $text;
413 $currentref = $lastref = $ref;
414 }
415
416# Output the final line and close the file
417
418&outline($currenttext, $currentref);
419close(OUT);
420
421die "** Aborted\n" if $badrange;
422
423# Format the index
424
425system("sgcal z-index -to zi-gcode -index /dev/null");
426system("sgtops zi-gcode -to zi-ps");
427print "PostScript in zi-ps\n";
428
429# End