#! /usr/bin/perl # $Cambridge: exim/doc/doc-docbook/TidyHTML-spec,v 1.4 2006/04/04 14:03:49 ph10 Exp $ # Script to tidy up the spec HTML files that are generated by xmlto. The # following changes are made: # # 1. Tidy the index.html file by splitting the very long lines. # 2. Create reverse links from chapter and section titles back to the TOC. # 3. Tidy the ix01.html file - the actual index - by splitting long lines. # 4. Insert links from the letter divisions to the top of the Index. # 5. Turn

into

and # a matching

into

to get rid of unwanted vertical white # space. # 6. Before each occurrence of insert so that the table's cell # is a little bit wider than the text itself. chdir "spec_html"; $tocref = 1; # Read in the index.html file. It's really the TOC. open(IN, "index.html") || die "Failed to open index.html for reading: $!\n"; @toc = ; close(IN); # Insert a newline after every > except when it is preceded by 'class="quote"', # because the whole toc is generated as one humungous line that is hard to # check. We have to avoid it in the quote case because that puts a space into # the output, and similarly for the the comes afterwards. Easy way out # is just not to do it for all occurrences. Unfortunately, Perl does # not implement lookbehinds where the alternatives are of different lengths, so # we have to take two passes. foreach $line (@toc) { $line =~ s/(?\s*/>\n/g; $line =~ s/<\/span>\n/<\/span>/g; } # Split the lines so that each one is a separate element in the vector. for ($i = 0; $i < scalar(@toc); $i++) { splice @toc, $i, 1, (split /(?<=\n)/, $toc[$i]); } # We want to create reverse links from each chapter and section title back to # the relevant place in the TOC. Scan the TOC for the relevant entries. Add # an id to each entry, and create tables that remember the file names and the # new link ids. foreach $line (@toc) { if ($line =~ /^/) { my($chix) = $1; my($ss) = $2; my($id) = sprintf "%04d", $tocref++; $line =~ s/index.html") || die "Failed to open index.html for writing: $!\n"; print OUT @toc; close(OUT); # Now scan each of the other page files and insert the reverse links. While # we are at it, we tidy up

by removing unwanted # paragraph marks, which generate unwanted vertical space. We also insert # before to push table cells apart from each other. foreach $file (@chlist) { open(IN, "$file") || die "Failed to open $file for reading: $!\n"; @text = ; close(IN); # Insert a newline after certain elements, and split the lines so that each # one is a separate element in the vector. This makes it easier to recognize # these elements. foreach $line (@text) { $line =~ s/

\s*(?!\n)/

\n/g; $line =~ s/<\/p>\s*(?!\n)/<\/p>\n/g; $line =~ s/<\/div>\s*(?!\n)/<\/div>\n/g; $line =~ s/]*)>(?!\n)/\n/g; } for ($i = 0; $i < scalar(@text); $i++) { splice @text, $i, 1, (split /(?<=\n)/, $text[$i]); } $thisdiv = 0; for ($i = 0; $i < scalar(@text); $i++) { if ($text[$i] =~ /^(.*?)<\/a>(.+?)<\/h(.*)$/) { my($pre, $opt, $id, $title, $post) = ($1, $2, $3, $4, $5); # Section reference my($ref) = $backref{"$file#$id"}; # If not found, try for a chapter reference $ref = $backref{"$file"} if !defined $ref; # Adjust the line $text[$i]= "$pre$title]*?class="literallayout">$/ && $text[$i+1] eq "

\n") { $text[++$i] = ""; $thisdiv = 1; } elsif ($thisdiv && $text[$i] eq "

\n" && $text[$i+1] eq "

\n") { $text[$i] = ""; $thisdiv = 0; } elsif ($text[$i] =~ /^\s*<\/td>/) { $text[$i] = " $text[$i]"; } } open(OUT, ">$file") || die "Failed to open $file for writing: $!\n"; print OUT @text; close(OUT); } # Now process the ix01.html file open(IN, "ix01.html") || die "Failed to open ix01.html for reading: $!\n"; @index = ; close(IN); # Insert a newline after every > because the whole index is generated as one # humungous line that is hard to check. Then split the lines so that each one # is a separate element in the vector. foreach $line (@index) { $line =~ s/>\s*/>\n/g; } for ($i = 0; $i < scalar(@index); $i++) { splice @index, $i, 1, (split /(?<=\n)/, $index[$i]); } # We want to add a list of letters at the top of the index, and link back # to them from each letter heading. First find the index title and remember # where to insert the list of letters. for ($i = 0; $i < scalar(@index); $i++) { if ($index[$i] =~ /^<\/h2>$/) { $listindex = $i; last; } } # Now scan through for the letter headings and build the cross references, # while also building up the list to insert. $list = "

\n"; for (; $i < scalar(@index); $i++) { if ($index[$i] =~ /^(.)<\/h3>$/) { $letter = $1; $index[$i-1] =~ s/^//; $index[$i] =~ s/$/<\/a>/; $list .= " $letter\n"; } } # Now we know which letters we have, we can insert the list. $list .= "

\n"; splice @index, $listindex, 0, $list; # Write out the modified index.html file. open (OUT, ">ix01.html") || die "Failed to open ix01.html for writing: $!\n"; print OUT @index; close(OUT); # End