doc/doc-scripts/DoIndex

   1 #! /usr/bin/perl -w
   2 # $Cambridge: exim/doc/doc-scripts/DoIndex,v 1.1 2004/10/07 15:04:35 ph10 Exp $
   3
   4 # Script for producing the Index for the Exim manual from the output of the
   5 # SGCAL run. This is copied from the script for the Exim book.
   6
   7
   8 ##############################################################################
   9 # Patterns for matching things to be removed from the sort keys
  10
  11 # This was copied from the Exim book processor, but we have now found a
  12 # better way of doing this. Leave the code until I am quite sure...
  13
  14 # $pat[0]  = qr/ \(\\\*see also\*\\[^)]+\)/;
  15 # $pat[1]  = qr/(?<!@)\/\//;                     # //
  16 # $pat[2]  = qr/(?<!@)\/\\/;                     # /\
  17 # $pat[3]  = qr/(?<!@)\\\//;                     # \/
  18 # $pat[4]  = qr/(?<!@) \\                        # non-@ \, followed by one of
  19 #                             (?:
  20 #                             [\.\/] |           # dot or slash
  21 #                             !- |               # !-
  22 #                             !\+ |              # !+
  23 #                             !\. |              # !.
  24 #                             "\+ |              # "+
  25 #                             \([.\/]? |         # ( and optional . or slash
  26 #                             [[\$\\%?!-"] |     # [ $ \ % ! " or -
  27 #                             \*{1,2} |          # * or **
  28 #                             \^{1,2}\/?         # ^ or ^^ and optional slash
  29 #                             )/x;
  30 # $pat[5]  = qr/(?: []\$\\%)?!"] |               # ] $ \ % ) ? " or ! )
  31 #                   \*{1,2}  |                   # * or **            ) optional
  32 #                   \^{1,2})?                    # ^ or ^^            )
  33 #                   \\/x;                        # then \
  34 # $pat[6]  = qr/(?<!@)::/;
  35 # $pat[7]  = qr/\sR[FS]\b/;
  36 # $pat[8]  = qr/``/;
  37 # $pat[9]  = qr/''/;
  38 # $pat[10] = qr/`/;
  39 # $pat[11] = qr/'/;
  40 # $pat[12] = qr/,/;
  41 # $pat[13] = qr/\(e?s\)/;
  42
  43
  44 # Other patterns
  45
  46 # $keysplit = qr/^(.*?)(\|\|.*?)?\s(R[AZ])?\s?(\d+)$/;
  47
  48 $keysplit = qr/^(.*?)(\@\|\@\|.*?)?\s(R[AZ])?\s?(\d+)$/;
  49
  50
  51 # The sort function
  52
  53 sub cf {
  54 my($x,$y) = ($a,$b);
  55
  56 ############old#############
  57 #foreach $pattern (@pat)    # Remove strings by pattern
  58 #  {
  59 #  $x =~ s/$pattern//g;
  60 #  $y =~ s/$pattern//g;
  61 #  }
  62 ##########################
  63
  64
  65 # Turn || into @|@|
  66
  67 $x =~ s/\|\|/@|@|/g;
  68 $y =~ s/\|\|/@|@|/g;
  69
  70 # Remove all special characters, except those preceded by @
  71
  72 $x =~ s/(?<!\@)[^\w\@\s]//g;
  73 $y =~ s/(?<!\@)[^\w\@\s]//g;
  74
  75 # Remove the escaping @s
  76
  77 #$x =~ s/\@(.)/$1/g;
  78 #$y =~ s/\@(.)/$1/g;
  79
  80
  81
  82 ################old ########################
  83 #$x =~ s/:(\w+):/$1/g;      # :fail: etc => fail
  84 #$y =~ s/:(\w+):/$1/g;
  85
  86 #$x =~ s/^\@[^a-z]+/\@/i;   # Make keys starting with @
  87 #$y =~ s/^\@[^a-z]+/\@/i;   # sort on @ followed by the first letter
  88 ##############################################3
  89
  90
  91 $x =~ s/\@_/\x7f/g;        # Make underscore sort late (option names)
  92 $y =~ s/\@_/\x7f/g;
  93
  94 # Split up to sort on individual parts
  95
  96 my($xp,$xs,$xr,$xn) = $x =~ /$keysplit/;
  97 my($yp,$ys,$yr,$yn) = $y =~ /$keysplit/;
  98
  99 $xr = "" if !defined $xr;
 100 $yr = "" if !defined $yr;
 101
 102 $xs = "" if !defined $xs;
 103 $ys = "" if !defined $ys;
 104
 105 if ($show_keys)
 106   {
 107   print "a=$a\n  x=$x\n  xp=$xp\n  xs=$xs\n  xr=$xr\n  xn=$xn\n";
 108   print "b=$b\n  y=$y\n  yp=$yp\n  ys=$ys\n  yr=$yr\n  yn=$yn\n";
 109   }
 110
 111 my ($c) = "\L$xp" cmp "\L$yp";        # Caseless, primary text only
 112 $c = $xp cmp $yp if $c == 0;          # Caseful, primary text only
 113 $c = "\L$xs" cmp "\L$ys" if $c == 0;  # Caseless, secondary text only
 114 $c = $xs cmp $ys if $c == 0;          # Caseful, secondary text only
 115 $c = $xn <=> $yn if $c == 0;          # Compare the numbers
 116 $c = $xr cmp $yr if $c == 0;          # Sort RA before RZ
 117 return $c;
 118 }
 119
 120
 121
 122 ##############################################################################
 123 # Function for getting the next line from the @lines vector, using the global
 124 # index $1. If the next pair of lines specifies a range of pages, combine them.
 125 # That's why $linenumber has to be global - so we can increment it. If there's
 126 # a range error, return "".
 127
 128 sub getnextentry {
 129 my($line) = $lines[$linenumber];
 130 my($aa,$zz,$tline,$nextline,$tnextline);
 131
 132 if ($line =~ / RA (\d+)/)
 133   {
 134   $aa = $1;
 135   $nextline = $lines[++$linenumber];
 136   if ($nextline =~ / RZ (\d+)/)
 137     {
 138     $zz = $1;
 139     }
 140   else
 141     {
 142     print STDERR "** Bad range data (1)\n";
 143     print STDERR "   $line\n";
 144     print STDERR "   $nextline\n";
 145     return "";
 146     }
 147
 148   $tline = $line;
 149   $tnextline = $nextline;
 150
 151   $tline =~ s/ RA \d+//;
 152   $tnextline =~ s/ RZ \d+//;
 153
 154   if ($tline ne $tnextline)
 155     {
 156     print STDERR "** Bad range data (2)\n";
 157     print STDERR "   $line\n";
 158     print STDERR "   $nextline\n";
 159     return "";
 160     }
 161
 162   $line = ($aa eq $zz)? "$tline $aa" : "$tline $aa--$zz";
 163   }
 164
 165 elsif ($line =~ / RZ (\d+)/)
 166   {
 167   print STDERR "** Bad range data (RZ without RA)\n";
 168   print STDERR "   $line\n";
 169   return "";
 170   }
 171
 172 return $line
 173 }
 174
 175
 176
 177
 178 ##############################################################################
 179 # Function for outputting a line, checking for the current primary
 180 # and indenting a bit for secondaries. We also need a newpar
 181 # before each item, because the main indent is set to a largish indent
 182 # for long reference lists, but the parindent is set to counter this.
 183 # This is where we handle the break between letters. We know that any non-
 184 # alphamerics at the start of lines are markup, except for @. A reference
 185 # value of 99999 is for the "see also" lines. Suppress it.
 186
 187 sub outline {
 188 my($text,$ref) = ($_[0],$_[1]);
 189 my ($letter) = $text =~ /^[^A-Za-z0-9\@]*(.)/;
 190
 191 return if $text =~ /^\s*$/;
 192
 193 if ($ref eq "99999")    # dummy for see also
 194   {
 195   $ref = ""
 196   }
 197 else
 198   {
 199   $ref = "#$ref";       # prepend space
 200   }
 201
 202 if ($letter =~ /\d/) { $letter = "0"; } else { $letter = "\U$letter"; }
 203
 204 print OUT ".newpar\n";
 205
 206 if ($letter ne $currentletter && $letter ge "A")
 207   {
 208   print OUT ".newletter\n";
 209   $currentletter = $letter;
 210   }
 211
 212 $text =~ s/\@'/\$'/g;   # Turns @' into $' so that it prints a non-curly quote
 213
 214 if ($text =~ /^(.+)\|\|(.*)$/)
 215   {
 216   my($primary,$secondary) = ($1,$2);
 217
 218   if ($primary ne $lastprimary)
 219     {
 220     print OUT ".primary $primary\n";
 221     $lastprimary = $primary;
 222     }
 223
 224   $primary =~ s/"/""/g;
 225   $secondary =~ s/"/""/g;
 226
 227   my($contprim) = $primary;
 228   $contprim =~ s/ \(\\\*see also\*\\[^)]+\)//;
 229
 230   print OUT ".secondary \"$primary\" \"$secondary$ref\" \"$contprim\"\n";
 231   }
 232
 233 # Not a two-part item; insert @ if the first char is a dot
 234
 235 else
 236   {
 237   print OUT "@" if $text =~ /^\./;
 238   print OUT "$text$ref\n";
 239   $lastprimary = $text;
 240   }
 241 }
 242
 243
 244
 245
 246
 247 ##############################################################################
 248 # The main script
 249
 250 $save_sorted = 0;
 251 $test_index = 0;
 252 $show_keys = 0;
 253
 254 while (@ARGV > 0)
 255   {
 256   my($arg) = shift @ARGV;
 257   if    ($arg eq "-k") { $show_keys = 1; }
 258   elsif ($arg eq "-s") { $save_sorted = 1; }
 259   elsif ($arg eq "-t") { $test_index = $save_sorted = 1; }
 260   else  { die "Unknown option $arg\n"; }
 261   }
 262
 263 if ($test_index)
 264   {
 265   open(IN, "z-testindex") || die "Can't open z-testindex\n";
 266   }
 267 else
 268   {
 269   open(IN, "z-rawindex") || die "Can't open z-rawindex\n";
 270   }
 271
 272 open(OUT, ">z-index")  || die "Can't open z-index\n";
 273
 274 # Extract index lines ($e lines are contents). Until we hit the first
 275 # $e line, we are dealing with "see also" index lines, for which we want
 276 # to turn the line number into 99999.
 277
 278 $#lines = -1;
 279 $prestuff = 1;
 280
 281 while (<IN>)
 282   {
 283   s/\n$//;
 284   if (/\$e/)
 285     {
 286     $prestuff = 0;
 287     }
 288   else
 289     {
 290     s/(\D)$/$1 99999/ if $prestuff;          # No number in "see also"
 291     push(@lines, $_);
 292     }
 293   $index_pagenumber = $1 if /^Index\$e(\d+)/;
 294   }
 295 close(IN);
 296
 297 # Sort, ignoring markup
 298
 299 print STDERR "Sorting ...\n";
 300 @lines = sort cf @lines;
 301
 302 # Keep a copy of the sorted data, for reference
 303
 304 if ($save_sorted)
 305   {
 306   open(X, ">z-indexsorted") || die "Can't open z-indexsorted\n";
 307   foreach $line (@lines)
 308    {
 309    print X "$line\n";
 310    }
 311   close(X);
 312   }
 313
 314 # Heading for the index file
 315
 316 print OUT <<"EOF";
 317 .library "a4ps"
 318 .linelength ~~sys.linelength + 16.0
 319
 320 .include "markup.sg"
 321
 322 .indent 3em
 323 .parspace 0
 324 .parindent -3em
 325 .justify left
 326 .
 327 .foot
 328 \$c [~~sys.pagenumber]
 329 .endfoot
 330 .
 331 .cancelflag #
 332 .flag # "\$S*1"
 333 .set INDEX true
 334 .
 335 .macro primary "text"
 336 .if ~~sys.leftonpage < 2ld
 337 .newcolumn
 338 .fi
 339 ~~1
 340 .newpar
 341 .endm
 342 .
 343 .macro secondary "prim" "sec" "contprim"
 344 .if ~~sys.leftonpage < 1ld
 345 .newcolumn
 346 .newpar
 347 ~~3 \$it\{(continued)\}
 348 .newpar
 349 .fi
 350 ##~~2
 351 .endm
 352 .
 353 .macro newletter
 354 .if ~~sys.leftonpage < 4ld
 355 .newcolumn
 356 .else
 357 .space 1ld
 358 .fi
 359 .newpar
 360 .endm
 361 .
 362 .set chapter -1
 363 .page $index_pagenumber
 364 .chapter Index
 365 .columns 2
 366 .newpar
 367 .
 368 EOF
 369
 370 # Process the lines and output the result.
 371 # Note that $linenumber is global, and is changed by getnextentry() for
 372 # pairs of lines that represent ranges.
 373
 374 $lastprimary = "";
 375 $lastref = "";
 376 $currenttext = $currentref = "";
 377 $currentletter = "";
 378 $badrange = 0;
 379
 380 print STDERR "Processing ...\n";
 381
 382 for ($linenumber = 0; $linenumber < @lines; $linenumber++)
 383   {
 384   $line = &getnextentry();
 385
 386   if ($line eq "")   # Bad range data - but carry on to get all of it
 387     {
 388     $badrange = 1;
 389     next;
 390     }
 391
 392   # Split off the text and reference
 393
 394   ($text,$ref) = $line =~ /^(.*)\s+([\d-]+)$/;
 395
 396   # If same as current text, just add the new reference, unless its a duplicate
 397
 398   if ($text eq $currenttext)
 399     {
 400     if ($ref ne $lastref)
 401       {
 402       $currentref .= ", $ref";
 403       $lastref = $ref;
 404       }
 405     next;
 406     }
 407
 408   # Not the same as the current text. Output the current text, then
 409   # set up a new current.
 410
 411   &outline($currenttext, $currentref);
 412
 413   $currenttext = $text;
 414   $currentref = $lastref = $ref;
 415   }
 416
 417 # Output the final line and close the file
 418
 419 &outline($currenttext, $currentref);
 420 close(OUT);
 421
 422 die "** Aborted\n" if $badrange;
 423
 424 # Format the index
 425
 426 system("sgcal z-index -to zi-gcode -index /dev/null");
 427 system("sgtops zi-gcode -to zi-ps");
 428 print "PostScript in zi-ps\n";
 429
 430 # End