| 1 | #! /usr/bin/perl -w |
| 2 | |
| 3 | # Script for producing the Index for the Exim manual from the output of the |
| 4 | # SGCAL run. This is copied from the script for the Exim book. |
| 5 | |
| 6 | |
| 7 | ############################################################################## |
| 8 | # Patterns for matching things to be removed from the sort keys |
| 9 | |
| 10 | # This was copied from the Exim book processor, but we have now found a |
| 11 | # better way of doing this. Leave the code until I am quite sure... |
| 12 | |
| 13 | # $pat[0] = qr/ \(\\\*see also\*\\[^)]+\)/; |
| 14 | # $pat[1] = qr/(?<!@)\/\//; # // |
| 15 | # $pat[2] = qr/(?<!@)\/\\/; # /\ |
| 16 | # $pat[3] = qr/(?<!@)\\\//; # \/ |
| 17 | # $pat[4] = qr/(?<!@) \\ # non-@ \, followed by one of |
| 18 | # (?: |
| 19 | # [\.\/] | # dot or slash |
| 20 | # !- | # !- |
| 21 | # !\+ | # !+ |
| 22 | # !\. | # !. |
| 23 | # "\+ | # "+ |
| 24 | # \([.\/]? | # ( and optional . or slash |
| 25 | # [[\$\\%?!-"] | # [ $ \ % ! " or - |
| 26 | # \*{1,2} | # * or ** |
| 27 | # \^{1,2}\/? # ^ or ^^ and optional slash |
| 28 | # )/x; |
| 29 | # $pat[5] = qr/(?: []\$\\%)?!"] | # ] $ \ % ) ? " or ! ) |
| 30 | # \*{1,2} | # * or ** ) optional |
| 31 | # \^{1,2})? # ^ or ^^ ) |
| 32 | # \\/x; # then \ |
| 33 | # $pat[6] = qr/(?<!@)::/; |
| 34 | # $pat[7] = qr/\sR[FS]\b/; |
| 35 | # $pat[8] = qr/``/; |
| 36 | # $pat[9] = qr/''/; |
| 37 | # $pat[10] = qr/`/; |
| 38 | # $pat[11] = qr/'/; |
| 39 | # $pat[12] = qr/,/; |
| 40 | # $pat[13] = qr/\(e?s\)/; |
| 41 | |
| 42 | |
| 43 | # Other patterns |
| 44 | |
| 45 | # $keysplit = qr/^(.*?)(\|\|.*?)?\s(R[AZ])?\s?(\d+)$/; |
| 46 | |
| 47 | $keysplit = qr/^(.*?)(\@\|\@\|.*?)?\s(R[AZ])?\s?(\d+)$/; |
| 48 | |
| 49 | |
| 50 | # The sort function |
| 51 | |
| 52 | sub cf { |
| 53 | my($x,$y) = ($a,$b); |
| 54 | |
| 55 | ############old############# |
| 56 | #foreach $pattern (@pat) # Remove strings by pattern |
| 57 | # { |
| 58 | # $x =~ s/$pattern//g; |
| 59 | # $y =~ s/$pattern//g; |
| 60 | # } |
| 61 | ########################## |
| 62 | |
| 63 | |
| 64 | # Turn || into @|@| |
| 65 | |
| 66 | $x =~ s/\|\|/@|@|/g; |
| 67 | $y =~ s/\|\|/@|@|/g; |
| 68 | |
| 69 | # Remove all special characters, except those preceded by @ |
| 70 | |
| 71 | $x =~ s/(?<!\@)[^\w\@\s]//g; |
| 72 | $y =~ s/(?<!\@)[^\w\@\s]//g; |
| 73 | |
| 74 | # Remove the escaping @s |
| 75 | |
| 76 | #$x =~ s/\@(.)/$1/g; |
| 77 | #$y =~ s/\@(.)/$1/g; |
| 78 | |
| 79 | |
| 80 | |
| 81 | ################old ######################## |
| 82 | #$x =~ s/:(\w+):/$1/g; # :fail: etc => fail |
| 83 | #$y =~ s/:(\w+):/$1/g; |
| 84 | |
| 85 | #$x =~ s/^\@[^a-z]+/\@/i; # Make keys starting with @ |
| 86 | #$y =~ s/^\@[^a-z]+/\@/i; # sort on @ followed by the first letter |
| 87 | ##############################################3 |
| 88 | |
| 89 | |
| 90 | $x =~ s/\@_/\x7f/g; # Make underscore sort late (option names) |
| 91 | $y =~ s/\@_/\x7f/g; |
| 92 | |
| 93 | # Split up to sort on individual parts |
| 94 | |
| 95 | my($xp,$xs,$xr,$xn) = $x =~ /$keysplit/; |
| 96 | my($yp,$ys,$yr,$yn) = $y =~ /$keysplit/; |
| 97 | |
| 98 | $xr = "" if !defined $xr; |
| 99 | $yr = "" if !defined $yr; |
| 100 | |
| 101 | $xs = "" if !defined $xs; |
| 102 | $ys = "" if !defined $ys; |
| 103 | |
| 104 | if ($show_keys) |
| 105 | { |
| 106 | print "a=$a\n x=$x\n xp=$xp\n xs=$xs\n xr=$xr\n xn=$xn\n"; |
| 107 | print "b=$b\n y=$y\n yp=$yp\n ys=$ys\n yr=$yr\n yn=$yn\n"; |
| 108 | } |
| 109 | |
| 110 | my ($c) = "\L$xp" cmp "\L$yp"; # Caseless, primary text only |
| 111 | $c = $xp cmp $yp if $c == 0; # Caseful, primary text only |
| 112 | $c = "\L$xs" cmp "\L$ys" if $c == 0; # Caseless, secondary text only |
| 113 | $c = $xs cmp $ys if $c == 0; # Caseful, secondary text only |
| 114 | $c = $xn <=> $yn if $c == 0; # Compare the numbers |
| 115 | $c = $xr cmp $yr if $c == 0; # Sort RA before RZ |
| 116 | return $c; |
| 117 | } |
| 118 | |
| 119 | |
| 120 | |
| 121 | ############################################################################## |
| 122 | # Function for getting the next line from the @lines vector, using the global |
| 123 | # index $1. If the next pair of lines specifies a range of pages, combine them. |
| 124 | # That's why $linenumber has to be global - so we can increment it. If there's |
| 125 | # a range error, return "". |
| 126 | |
| 127 | sub getnextentry { |
| 128 | my($line) = $lines[$linenumber]; |
| 129 | my($aa,$zz,$tline,$nextline,$tnextline); |
| 130 | |
| 131 | if ($line =~ / RA (\d+)/) |
| 132 | { |
| 133 | $aa = $1; |
| 134 | $nextline = $lines[++$linenumber]; |
| 135 | if ($nextline =~ / RZ (\d+)/) |
| 136 | { |
| 137 | $zz = $1; |
| 138 | } |
| 139 | else |
| 140 | { |
| 141 | print STDERR "** Bad range data (1)\n"; |
| 142 | print STDERR " $line\n"; |
| 143 | print STDERR " $nextline\n"; |
| 144 | return ""; |
| 145 | } |
| 146 | |
| 147 | $tline = $line; |
| 148 | $tnextline = $nextline; |
| 149 | |
| 150 | $tline =~ s/ RA \d+//; |
| 151 | $tnextline =~ s/ RZ \d+//; |
| 152 | |
| 153 | if ($tline ne $tnextline) |
| 154 | { |
| 155 | print STDERR "** Bad range data (2)\n"; |
| 156 | print STDERR " $line\n"; |
| 157 | print STDERR " $nextline\n"; |
| 158 | return ""; |
| 159 | } |
| 160 | |
| 161 | $line = ($aa eq $zz)? "$tline $aa" : "$tline $aa--$zz"; |
| 162 | } |
| 163 | |
| 164 | elsif ($line =~ / RZ (\d+)/) |
| 165 | { |
| 166 | print STDERR "** Bad range data (RZ without RA)\n"; |
| 167 | print STDERR " $line\n"; |
| 168 | return ""; |
| 169 | } |
| 170 | |
| 171 | return $line |
| 172 | } |
| 173 | |
| 174 | |
| 175 | |
| 176 | |
| 177 | ############################################################################## |
| 178 | # Function for outputting a line, checking for the current primary |
| 179 | # and indenting a bit for secondaries. We also need a newpar |
| 180 | # before each item, because the main indent is set to a largish indent |
| 181 | # for long reference lists, but the parindent is set to counter this. |
| 182 | # This is where we handle the break between letters. We know that any non- |
| 183 | # alphamerics at the start of lines are markup, except for @. A reference |
| 184 | # value of 99999 is for the "see also" lines. Suppress it. |
| 185 | |
| 186 | sub outline { |
| 187 | my($text,$ref) = ($_[0],$_[1]); |
| 188 | my ($letter) = $text =~ /^[^A-Za-z0-9\@]*(.)/; |
| 189 | |
| 190 | return if $text =~ /^\s*$/; |
| 191 | |
| 192 | if ($ref eq "99999") # dummy for see also |
| 193 | { |
| 194 | $ref = "" |
| 195 | } |
| 196 | else |
| 197 | { |
| 198 | $ref = "#$ref"; # prepend space |
| 199 | } |
| 200 | |
| 201 | if ($letter =~ /\d/) { $letter = "0"; } else { $letter = "\U$letter"; } |
| 202 | |
| 203 | print OUT ".newpar\n"; |
| 204 | |
| 205 | if ($letter ne $currentletter && $letter ge "A") |
| 206 | { |
| 207 | print OUT ".newletter\n"; |
| 208 | $currentletter = $letter; |
| 209 | } |
| 210 | |
| 211 | $text =~ s/\@'/\$'/g; # Turns @' into $' so that it prints a non-curly quote |
| 212 | |
| 213 | if ($text =~ /^(.+)\|\|(.*)$/) |
| 214 | { |
| 215 | my($primary,$secondary) = ($1,$2); |
| 216 | |
| 217 | if ($primary ne $lastprimary) |
| 218 | { |
| 219 | print OUT ".primary $primary\n"; |
| 220 | $lastprimary = $primary; |
| 221 | } |
| 222 | |
| 223 | $primary =~ s/"/""/g; |
| 224 | $secondary =~ s/"/""/g; |
| 225 | |
| 226 | my($contprim) = $primary; |
| 227 | $contprim =~ s/ \(\\\*see also\*\\[^)]+\)//; |
| 228 | |
| 229 | print OUT ".secondary \"$primary\" \"$secondary$ref\" \"$contprim\"\n"; |
| 230 | } |
| 231 | |
| 232 | # Not a two-part item; insert @ if the first char is a dot |
| 233 | |
| 234 | else |
| 235 | { |
| 236 | print OUT "@" if $text =~ /^\./; |
| 237 | print OUT "$text$ref\n"; |
| 238 | $lastprimary = $text; |
| 239 | } |
| 240 | } |
| 241 | |
| 242 | |
| 243 | |
| 244 | |
| 245 | |
| 246 | ############################################################################## |
| 247 | # The main script |
| 248 | |
| 249 | $save_sorted = 0; |
| 250 | $test_index = 0; |
| 251 | $show_keys = 0; |
| 252 | |
| 253 | while (@ARGV > 0) |
| 254 | { |
| 255 | my($arg) = shift @ARGV; |
| 256 | if ($arg eq "-k") { $show_keys = 1; } |
| 257 | elsif ($arg eq "-s") { $save_sorted = 1; } |
| 258 | elsif ($arg eq "-t") { $test_index = $save_sorted = 1; } |
| 259 | else { die "Unknown option $arg\n"; } |
| 260 | } |
| 261 | |
| 262 | if ($test_index) |
| 263 | { |
| 264 | open(IN, "z-testindex") || die "Can't open z-testindex\n"; |
| 265 | } |
| 266 | else |
| 267 | { |
| 268 | open(IN, "z-rawindex") || die "Can't open z-rawindex\n"; |
| 269 | } |
| 270 | |
| 271 | open(OUT, ">z-index") || die "Can't open z-index\n"; |
| 272 | |
| 273 | # Extract index lines ($e lines are contents). Until we hit the first |
| 274 | # $e line, we are dealing with "see also" index lines, for which we want |
| 275 | # to turn the line number into 99999. |
| 276 | |
| 277 | $#lines = -1; |
| 278 | $prestuff = 1; |
| 279 | |
| 280 | while (<IN>) |
| 281 | { |
| 282 | s/\n$//; |
| 283 | if (/\$e/) |
| 284 | { |
| 285 | $prestuff = 0; |
| 286 | } |
| 287 | else |
| 288 | { |
| 289 | s/(\D)$/$1 99999/ if $prestuff; # No number in "see also" |
| 290 | push(@lines, $_); |
| 291 | } |
| 292 | $index_pagenumber = $1 if /^Index\$e(\d+)/; |
| 293 | } |
| 294 | close(IN); |
| 295 | |
| 296 | # Sort, ignoring markup |
| 297 | |
| 298 | print STDERR "Sorting ...\n"; |
| 299 | @lines = sort cf @lines; |
| 300 | |
| 301 | # Keep a copy of the sorted data, for reference |
| 302 | |
| 303 | if ($save_sorted) |
| 304 | { |
| 305 | open(X, ">z-indexsorted") || die "Can't open z-indexsorted\n"; |
| 306 | foreach $line (@lines) |
| 307 | { |
| 308 | print X "$line\n"; |
| 309 | } |
| 310 | close(X); |
| 311 | } |
| 312 | |
| 313 | # Heading for the index file |
| 314 | |
| 315 | print OUT <<"EOF"; |
| 316 | .library "a4ps" |
| 317 | .linelength ~~sys.linelength + 16.0 |
| 318 | |
| 319 | .include "markup.sg" |
| 320 | |
| 321 | .indent 3em |
| 322 | .parspace 0 |
| 323 | .parindent -3em |
| 324 | .justify left |
| 325 | . |
| 326 | .foot |
| 327 | \$c [~~sys.pagenumber] |
| 328 | .endfoot |
| 329 | . |
| 330 | .cancelflag # |
| 331 | .flag # "\$S*1" |
| 332 | .set INDEX true |
| 333 | . |
| 334 | .macro primary "text" |
| 335 | .if ~~sys.leftonpage < 2ld |
| 336 | .newcolumn |
| 337 | .fi |
| 338 | ~~1 |
| 339 | .newpar |
| 340 | .endm |
| 341 | . |
| 342 | .macro secondary "prim" "sec" "contprim" |
| 343 | .if ~~sys.leftonpage < 1ld |
| 344 | .newcolumn |
| 345 | .newpar |
| 346 | ~~3 \$it\{(continued)\} |
| 347 | .newpar |
| 348 | .fi |
| 349 | ##~~2 |
| 350 | .endm |
| 351 | . |
| 352 | .macro newletter |
| 353 | .if ~~sys.leftonpage < 4ld |
| 354 | .newcolumn |
| 355 | .else |
| 356 | .space 1ld |
| 357 | .fi |
| 358 | .newpar |
| 359 | .endm |
| 360 | . |
| 361 | .set chapter -1 |
| 362 | .page $index_pagenumber |
| 363 | .chapter Index |
| 364 | .columns 2 |
| 365 | .newpar |
| 366 | . |
| 367 | EOF |
| 368 | |
| 369 | # Process the lines and output the result. |
| 370 | # Note that $linenumber is global, and is changed by getnextentry() for |
| 371 | # pairs of lines that represent ranges. |
| 372 | |
| 373 | $lastprimary = ""; |
| 374 | $lastref = ""; |
| 375 | $currenttext = $currentref = ""; |
| 376 | $currentletter = ""; |
| 377 | $badrange = 0; |
| 378 | |
| 379 | print STDERR "Processing ...\n"; |
| 380 | |
| 381 | for ($linenumber = 0; $linenumber < @lines; $linenumber++) |
| 382 | { |
| 383 | $line = &getnextentry(); |
| 384 | |
| 385 | if ($line eq "") # Bad range data - but carry on to get all of it |
| 386 | { |
| 387 | $badrange = 1; |
| 388 | next; |
| 389 | } |
| 390 | |
| 391 | # Split off the text and reference |
| 392 | |
| 393 | ($text,$ref) = $line =~ /^(.*)\s+([\d-]+)$/; |
| 394 | |
| 395 | # If same as current text, just add the new reference, unless its a duplicate |
| 396 | |
| 397 | if ($text eq $currenttext) |
| 398 | { |
| 399 | if ($ref ne $lastref) |
| 400 | { |
| 401 | $currentref .= ", $ref"; |
| 402 | $lastref = $ref; |
| 403 | } |
| 404 | next; |
| 405 | } |
| 406 | |
| 407 | # Not the same as the current text. Output the current text, then |
| 408 | # set up a new current. |
| 409 | |
| 410 | &outline($currenttext, $currentref); |
| 411 | |
| 412 | $currenttext = $text; |
| 413 | $currentref = $lastref = $ref; |
| 414 | } |
| 415 | |
| 416 | # Output the final line and close the file |
| 417 | |
| 418 | &outline($currenttext, $currentref); |
| 419 | close(OUT); |
| 420 | |
| 421 | die "** Aborted\n" if $badrange; |
| 422 | |
| 423 | # Format the index |
| 424 | |
| 425 | system("sgcal z-index -to zi-gcode -index /dev/null"); |
| 426 | system("sgtops zi-gcode -to zi-ps"); |
| 427 | print "PostScript in zi-ps\n"; |
| 428 | |
| 429 | # End |