[exim.git] / doc / doc-docbook / TidyHTML-filter

#! /usr/bin/perl

# $Cambridge: exim/doc/doc-docbook/TidyHTML-filter,v 1.3 2006/02/01 11:01:01 ph10 Exp $

# Script to tidy up the filter HTML file that is generated by xmlto. The
# following changes are made:
#
# 1. Split very long lines.
# 2. Create reverse links from chapter and section titles back to the TOC.
# 3. Turn <div class="literallayout"><p> into <div class="literallayout"> and
#    a matching </p></div> into </div> to get rid of unwanted vertical white
#    space.


$tocref = 1;
$thisdiv = 0;

# Read in the filter.html file.

open(IN, "filter.html") || die "Failed to open filter.html for reading: $!\n";
@text = <IN>;
close(IN);

# Insert a newline after every > in the toc, because the whole toc is generated
# as one humungous line that is hard to check. Indeed, the start of the first
# chapter is also on the line, so we have to split if off first. Having
# inserted newlines, we split the toc into separate items in the vector.

for ($i = 0; $i < scalar(@text); $i++)
  {
  if ($text[$i] =~ ?<title>Exim's interfaces to mail filtering</title>?)
    {
    splice @text, $i, 1, (split /(?=<div class="chapter")/, $text[$i]);
    $text[$i] =~ s/>\s*/>\n/g;
    splice @text, $i, 1, (split /(?<=\n)/, $text[$i]);
    last;
    }
  }

# We want to create reverse links from each chapter and section title back to
# the relevant place in the TOC. Scan the TOC for the relevant entries. Add
# an id to each entry, and create tables that remember the new link ids. We
# detect the start of the TOC by <div class="toc" and the end of the TOC by
# <div class="chapter".

# Skip to start of TOC

for ($i = 0; $i < scalar(@text); $i++)
  {
  last if $text[$i] =~ /^<div class="toc"/;
  }

# Scan the TOC

for (; $i < scalar(@text); $i++)
  {
  last if $text[$i] =~ /^<div class="chapter"/;
  if ($text[$i] =~ /^<a href="(#[^"]+)">/)
    {
    my($ss) = $1;
    my($id) = sprintf "%04d", $tocref++;
    $text[$i] =~ s/<a/<a id="toc$id"/;
    $backref{"$ss"} = "toc$id";
    }
  }

# Scan remainder of the document

for (; $i < scalar(@text); $i++)
  {
  while ($text[$i] =~
      /^(.*)<a( xmlns="[^"]+")? id="([^"]+)"><\/a>(.*?)<\/h(.*)/)
    {
    my($ref) = $backref{"#$2"};
    $text[$i] = "$1<a$2 href=\"#$ref\" id=\"$3\">$4</a></h$5";
    }

  if ($text[$i] =~ /^(.*)<div class="literallayout"><p>(?:<br \/>)?(.*)/)
    {
    my($j);
    $text[$i] = "$1<div class=\"literallayout\">$2";

    for ($j = $i + 1; $j < scalar(@text); $j++)
      {
      if ($text[$j] =~ /^<\/p><\/div>/)
        {
        $text[$j] =~ s/<\/p>//;
        last;
        }
      }
    }
  }

# Write out the revised file

open(OUT, ">filter.html") || die "Failed to open filter.html for writing: $!\n";
print OUT @text;
close(OUT);

# End
Commit	Line	Data
	1	#! /usr/bin/perl
	2
	3	# $Cambridge: exim/doc/doc-docbook/TidyHTML-filter,v 1.3 2006/02/01 11:01:01 ph10 Exp $
	4
	5	# Script to tidy up the filter HTML file that is generated by xmlto. The
	6	# following changes are made:
	7	#
	8	# 1. Split very long lines.
	9	# 2. Create reverse links from chapter and section titles back to the TOC.
	10	# 3. Turn <div class="literallayout"><p> into <div class="literallayout"> and
	11	# a matching </p></div> into </div> to get rid of unwanted vertical white
	12	# space.
	13
	14
	15	$tocref = 1;
	16	$thisdiv = 0;
	17
	18	# Read in the filter.html file.
	19
	20	open(IN, "filter.html") \|\| die "Failed to open filter.html for reading: $!\n";
	21	@text = <IN>;
	22	close(IN);
	23
	24	# Insert a newline after every > in the toc, because the whole toc is generated
	25	# as one humungous line that is hard to check. Indeed, the start of the first
	26	# chapter is also on the line, so we have to split if off first. Having
	27	# inserted newlines, we split the toc into separate items in the vector.
	28
	29	for ($i = 0; $i < scalar(@text); $i++)
	30	{
	31	if ($text[$i] =~ ?<title>Exim's interfaces to mail filtering</title>?)
	32	{
	33	splice @text, $i, 1, (split /(?=<div class="chapter")/, $text[$i]);
	34	$text[$i] =~ s/>\s*/>\n/g;
	35	splice @text, $i, 1, (split /(?<=\n)/, $text[$i]);
	36	last;
	37	}
	38	}
	39
	40	# We want to create reverse links from each chapter and section title back to
	41	# the relevant place in the TOC. Scan the TOC for the relevant entries. Add
	42	# an id to each entry, and create tables that remember the new link ids. We
	43	# detect the start of the TOC by <div class="toc" and the end of the TOC by
	44	# <div class="chapter".
	45
	46	# Skip to start of TOC
	47
	48	for ($i = 0; $i < scalar(@text); $i++)
	49	{
	50	last if $text[$i] =~ /^<div class="toc"/;
	51	}
	52
	53	# Scan the TOC
	54
	55	for (; $i < scalar(@text); $i++)
	56	{
	57	last if $text[$i] =~ /^<div class="chapter"/;
	58	if ($text[$i] =~ /^<a href="(#[^"]+)">/)
	59	{
	60	my($ss) = $1;
	61	my($id) = sprintf "%04d", $tocref++;
	62	$text[$i] =~ s/<a/<a id="toc$id"/;
	63	$backref{"$ss"} = "toc$id";
	64	}
	65	}
	66
	67	# Scan remainder of the document
	68
	69	for (; $i < scalar(@text); $i++)
	70	{
	71	while ($text[$i] =~
	72	/^(.)<a( xmlns="[^"]+")? id="([^"]+)"><\/a>(.?)<\/h(.*)/)
	73	{
	74	my($ref) = $backref{"#$2"};
	75	$text[$i] = "$1<a$2 href=\"#$ref\" id=\"$3\">$4</a></h$5";
	76	}
	77
	78	if ($text[$i] =~ /^(.)<div class="literallayout"><p>(?:<br \/>)?(.)/)
	79	{
	80	my($j);
	81	$text[$i] = "$1<div class=\"literallayout\">$2";
	82
	83	for ($j = $i + 1; $j < scalar(@text); $j++)
	84	{
	85	if ($text[$j] =~ /^<\/p><\/div>/)
	86	{
	87	$text[$j] =~ s/<\/p>//;
	88	last;
	89	}
	90	}
	91	}
	92	}
	93
	94	# Write out the revised file
	95
	96	open(OUT, ">filter.html") \|\| die "Failed to open filter.html for writing: $!\n";
	97	print OUT @text;
	98	close(OUT);
	99
	100	# End