| 1 | #! /usr/bin/perl |
| 2 | |
| 3 | # $Cambridge: exim/doc/doc-docbook/Tidytxt,v 1.2 2006/02/01 11:01:01 ph10 Exp $ |
| 4 | |
| 5 | # Script to tidy up the output of w3m when it makes a text file. First we |
| 6 | # convert sequences of blank lines into a single blank line, to get everything |
| 7 | # uniform. Then we go through and insert blank lines before chapter and |
| 8 | # sections, also converting chapter titles to uppercase. |
| 9 | |
| 10 | @lines = <>; |
| 11 | |
| 12 | $lastwasblank = 0; |
| 13 | foreach $line (@lines) |
| 14 | { |
| 15 | if ($line =~ /^\s*$/) |
| 16 | { |
| 17 | $line = "" if $lastwasblank; |
| 18 | $lastwasblank = 1; |
| 19 | next; |
| 20 | } |
| 21 | $lastwasblank = 0; |
| 22 | } |
| 23 | |
| 24 | # Find start of TOC, uppercasing its title |
| 25 | |
| 26 | for ($i = 0; $i < scalar @lines; $i++) |
| 27 | { |
| 28 | $lines[$i] = "TABLE OF CONTENTS\n" if $lines[$i] =~ /^Table of Contents/; |
| 29 | last if $lines[$i] =~ /^1. /; |
| 30 | } |
| 31 | |
| 32 | # Find start of first chapter |
| 33 | |
| 34 | for ($i++; $i < scalar @lines; $i++) |
| 35 | { last if $lines[$i] =~ /^1. /; } |
| 36 | |
| 37 | # Process the body. We can detect the starts of chapters and sections by |
| 38 | # looking for preceding and following blank lines, and then matching against |
| 39 | # the numbers. |
| 40 | |
| 41 | $chapter = 0; |
| 42 | for (; $i < scalar @lines; $i++) |
| 43 | { |
| 44 | next if $lines[$i-1] !~ /^$/ || $lines[$i+1] !~ /^$/; |
| 45 | |
| 46 | # Start of chapter |
| 47 | |
| 48 | if ($lines[$i] =~ /^(\d+)\. / && $1 == $chapter + 1) |
| 49 | { |
| 50 | $chapter++; |
| 51 | $section = 0; |
| 52 | $lines[$i] = "\n\n" . ("=" x 79) . "\n" . uc($lines[$i]); |
| 53 | } |
| 54 | |
| 55 | # Start of next section |
| 56 | |
| 57 | elsif ($lines[$i] =~ /^(\d+)\.(\d+) / && $1 == $chapter && $2 == $section + 1) |
| 58 | { |
| 59 | $section++; |
| 60 | $lines[$i] = "\n$lines[$i]" . "-" x (length($lines[$i]) - 1) . "\n"; |
| 61 | } |
| 62 | } |
| 63 | |
| 64 | print @lines; |
| 65 | |
| 66 | # End |