[exim.git] / doc / doc-docbook / TidyHTML-filter

#! /usr/bin/perl

# $Cambridge: exim/doc/doc-docbook/TidyHTML-filter,v 1.2 2005/11/10 12:30:13 ph10 Exp $

# Script to tidy up the filter HTML file that is generated by xmlto. The
# following changes are made:
#
# 1. Split very long lines.
# 2. Create reverse links from chapter and section titles back to the TOC.
# 3. Turn <div class="literallayout"><p> into <div class="literallayout"> and
#    a matching </p></div> into </div> to get rid of unwanted vertical white
#    space.


$tocref = 1;
$thisdiv = 0;

# Read in the filter.html file.

open(IN, "filter.html") || die "Failed to open filter.html for reading: $!\n";
@text = <IN>;
close(IN);

# Insert a newline after every > because the whole toc is generated as one
# humungous line that is hard to check. Then split the lines so that each one
# is a separate element in the vector.

foreach $line (@text) { $line =~ s/>\s*/>\n/g; }
for ($i = 0; $i < scalar(@text); $i++)
  { splice @text, $i, 1, (split /(?<=\n)/, $text[$i]); }

# We want to create reverse links from each chapter and section title back to
# the relevant place in the TOC. Scan the TOC for the relevant entries. Add
# an id to each entry, and create tables that remember the new link ids. We
# detect the start of the TOC by <div class="toc" and the end of the TOC by
# <div class="chapter".

# Skip to start of TOC

for ($i = 0; $i < scalar(@text); $i++)
  {
  last if $text[$i] =~ /^<div class="toc"/;
  }

# Scan the TOC

for (; $i < scalar(@text); $i++)
  {
  last if $text[$i] =~ /^<div class="chapter"/;
  if ($text[$i] =~ /^<a href="(#[^"]+)">/)
    {
    my($ss) = $1;
    my($id) = sprintf "%04d", $tocref++;
    $text[$i] =~ s/<a/<a id="toc$id"/;
    $backref{"$ss"} = "toc$id";
    }
  }

# Scan remainder of the document

for (; $i < scalar(@text); $i++)
  {
  if ($text[$i] eq "<div class=\"literallayout\">\n" && $text[$i+1] eq "<p>\n")
    {
    $text[++$i] = "";
    $thisdiv = 1;
    }
  elsif ($thisdiv && $text[$i] eq "</p>\n" && $text[$i+1] eq "</div>\n")
    {
    $text[$i] = "";
    $thisdiv = 0;
    }
  elsif ($text[$i] =~ /^<h[23] /)
    {
    $i++;
    if ($text[$i] =~ /^<a( xmlns="[^"]+")? id="([^"]+)">$/)
      {
      my($ref) = $backref{"#$2"};
      $text[$i++] = "<a$1 href=\"#$ref\" id=\"$2\">\n";
      my($temp) = $text[$i];
      $text[$i] = $text[$i+1];
      $text[++$i] = $temp;
      }
    }
  }

# Write out the revised file

open(OUT, ">filter.html") || die "Failed to open filter.html for writing: $!\n";
print OUT @text;
close(OUT);

# End
Commit	Line	Data
168e428f PH	1	#! /usr/bin/perl
168e428f PH	2
068aaea8	3	# $Cambridge: exim/doc/doc-docbook/TidyHTML-filter,v 1.2 2005/11/10 12:30:13 ph10 Exp $
168e428f PH	4
	5	# Script to tidy up the filter HTML file that is generated by xmlto. The
	6	# following changes are made:
	7	#
	8	# 1. Split very long lines.
	9	# 2. Create reverse links from chapter and section titles back to the TOC.
068aaea8 PH	10	# 3. Turn <div class="literallayout"><p> into <div class="literallayout"> and
	11	# a matching </p></div> into </div> to get rid of unwanted vertical white
	12	# space.
168e428f PH	13
	14
	15	$tocref = 1;
068aaea8	16	$thisdiv = 0;
168e428f PH	17
	18	# Read in the filter.html file.
	19
	20	open(IN, "filter.html") \|\| die "Failed to open filter.html for reading: $!\n";
	21	@text = <IN>;
	22	close(IN);
	23
	24	# Insert a newline after every > because the whole toc is generated as one
	25	# humungous line that is hard to check. Then split the lines so that each one
	26	# is a separate element in the vector.
	27
	28	foreach $line (@text) { $line =~ s/>\s*/>\n/g; }
	29	for ($i = 0; $i < scalar(@text); $i++)
	30	{ splice @text, $i, 1, (split /(?<=\n)/, $text[$i]); }
	31
	32	# We want to create reverse links from each chapter and section title back to
	33	# the relevant place in the TOC. Scan the TOC for the relevant entries. Add
	34	# an id to each entry, and create tables that remember the new link ids. We
	35	# detect the start of the TOC by <div class="toc" and the end of the TOC by
	36	# <div class="chapter".
	37
	38	# Skip to start of TOC
	39
	40	for ($i = 0; $i < scalar(@text); $i++)
	41	{
	42	last if $text[$i] =~ /^<div class="toc"/;
	43	}
	44
	45	# Scan the TOC
	46
	47	for (; $i < scalar(@text); $i++)
	48	{
	49	last if $text[$i] =~ /^<div class="chapter"/;
	50	if ($text[$i] =~ /^<a href="(#[^"]+)">/)
	51	{
	52	my($ss) = $1;
	53	my($id) = sprintf "%04d", $tocref++;
	54	$text[$i] =~ s/<a/<a id="toc$id"/;
	55	$backref{"$ss"} = "toc$id";
	56	}
	57	}
	58
	59	# Scan remainder of the document
	60
	61	for (; $i < scalar(@text); $i++)
	62	{
068aaea8 PH	63	if ($text[$i] eq "<div class=\"literallayout\">\n" && $text[$i+1] eq "<p>\n")
	64	{
	65	$text[++$i] = "";
	66	$thisdiv = 1;
	67	}
	68	elsif ($thisdiv && $text[$i] eq "</p>\n" && $text[$i+1] eq "</div>\n")
	69	{
	70	$text[$i] = "";
	71	$thisdiv = 0;
	72	}
	73	elsif ($text[$i] =~ /^<h[23] /)
168e428f PH	74	{
	75	$i++;
	76	if ($text[$i] =~ /^<a( xmlns="[^"]+")? id="([^"]+)">$/)
	77	{
	78	my($ref) = $backref{"#$2"};
	79	$text[$i++] = "<a$1 href=\"#$ref\" id=\"$2\">\n";
	80	my($temp) = $text[$i];
	81	$text[$i] = $text[$i+1];
	82	$text[++$i] = $temp;
	83	}
	84	}
	85	}
	86
	87	# Write out the revised file
	88
	89	open(OUT, ">filter.html") \|\| die "Failed to open filter.html for writing: $!\n";
	90	print OUT @text;
	91	close(OUT);
	92
	93	# End