[exim.git] / doc / doc-docbook / Pre-xml

#! /usr/bin/perl

# $Cambridge: exim/doc/doc-docbook/Pre-xml,v 1.1 2005/06/16 10:32:31 ph10 Exp $

# Script to pre-process XML input before processing it for various purposes.
# Options specify which transformations are to be done. Monospaced literal
# layout blocks are never touched.

# Changes:

# -abstract: Remove the <abstract> element

# -ascii:    Replace &8230;   (sic, no x) with ...
#            Replace &#x2019; by '
#            Replace &#x201C; by "
#            Replace &#x201D; by "
#            Replace &#x2013; by -
#            Replace &#x2020; by *
#            Replace &#x2021; by **
#            Replace &#x00a0; by a space
#            Replace &#169;   by (c)
#            Put quotes round <literal> text
#            Put quotes round <quote> text

# -bookinfo: Remove the <bookinfo> element from the file

# -fi:       Replace "fi" by &#xFB01; except when it is in an XML element, or
#            inside a <literal>.

# -noindex   Remove the XML to generate a Concept and an Options index.
# -oneindex  Ditto, but add XML to generate a single index.


# The function that processes non-literal monospaced text

sub process()
{
my($s) = $_[0];

$s =~ s/fi(?![^<>]*>)/&#xFB01;/g if $ligatures;

if ($ascii)
  {
  $s =~ s/&#8230;/.../g;
  $s =~ s/&#x2019;/'/g;
  $s =~ s/&#x201C;/"/g;
  $s =~ s/&#x201D;/"/g;
  $s =~ s/&#x2013;/-/g;
  $s =~ s/&#x2020;/*/g;
  $s =~ s/&#x2021;/**/g;
  $s =~ s/&#x00a0;/ /g;
  $s =~ s/&#x00a9;/(c)/g;
  $s =~ s/<quote>/"/g;
  $s =~ s/<\/quote>/"/g;
  }

$s;
}


# The main program

$abstract  = 0;
$ascii     = 0;
$bookinfo  = 0;
$inliteral = 0;
$ligatures = 0;
$madeindex = 0;
$noindex   = 0;
$oneindex  = 0;

foreach $arg (@ARGV)
  {
  if    ($arg eq "-fi")       { $ligatures = 1; }
  elsif ($arg eq "-abstract") { $abstract = 1; }
  elsif ($arg eq "-ascii")    { $ascii = 1; }
  elsif ($arg eq "-bookinfo") { $bookinfo = 1; }
  elsif ($arg eq "-noindex")  { $noindex = 1; }
  elsif ($arg eq "-oneindex") { $oneindex = 1; }
  else  { die "** Pre-xml: Unknown option \"$arg\"\n"; }
  }

while (<STDIN>)
  {
  # Remove <abstract> if required

  next if ($abstract && /^\s*<abstract>/);

  # Remove <bookinfo> if required

  if ($bookinfo && /^<bookinfo/)
    {
    while (<STDIN>) { last if /^<\/bookinfo/; }
    next;
    }

  # Copy monospaced literallayout blocks

  if (/^<literallayout class="monospaced">/)
    {
    print;
    while (<STDIN>)
      {
      print;
      last if /^<\/literallayout>/;
      }
    next;
    }

  # Adjust index-generation code if required

  if (($noindex || $oneindex) && /^<index[\s>]/)
    {
    while (<STDIN>)
      {
      last if /^<\/index>/;
      }

    if ($oneindex && !$madeindex)
      {
      $madeindex = 1;
      print "<index><title>Index</title></index>\n";
      }

    next;
    }

  # A line that is not in a monospaced literal block; keep track of which
  # parts are in <literal> and which not. The latter get processed by the
  # function above.

  for (;;)
    {
    if ($inliteral)
      {
      if (/^(.*?)<\/literal>(.*)$/)
        {
        print $1;
        print "\"" if $ascii;
        print "</literal>";
        $inliteral = 0;
        $_ = "$2\n";
        }
      else
        {
        print;
        last;
        }
      }

    # Not in literal state

    else
      {
      if (/^(.*?)<literal>(.*)$/)
        {
        print &process($1);
        print "<literal>";
        print "\"" if $ascii;
        $inliteral = 1;
        $_ = "$2\n";
        }
      else
        {
        print &process($_);
        last;
        }
      }
    }    # Loop for different parts of one line
  }      # Loop for multiple lines

# End
Commit	Line	Data
168e428f PH	1	#! /usr/bin/perl
	2
	3	# $Cambridge: exim/doc/doc-docbook/Pre-xml,v 1.1 2005/06/16 10:32:31 ph10 Exp $
	4
	5	# Script to pre-process XML input before processing it for various purposes.
	6	# Options specify which transformations are to be done. Monospaced literal
	7	# layout blocks are never touched.
	8
	9	# Changes:
	10
	11	# -abstract: Remove the <abstract> element
	12
	13	# -ascii: Replace &8230; (sic, no x) with ...
	14	# Replace ’ by '
	15	# Replace “ by "
	16	# Replace ” by "
	17	# Replace – by -
	18	# Replace † by *
	19	# Replace ‡ by **
	20	# Replace   by a space
	21	# Replace © by (c)
	22	# Put quotes round <literal> text
	23	# Put quotes round <quote> text
	24
	25	# -bookinfo: Remove the <bookinfo> element from the file
	26
	27	# -fi: Replace "fi" by ﬁ except when it is in an XML element, or
	28	# inside a <literal>.
	29
	30	# -noindex Remove the XML to generate a Concept and an Options index.
	31	# -oneindex Ditto, but add XML to generate a single index.
	32
	33
	34
	35	# The function that processes non-literal monospaced text
	36
	37	sub process()
	38	{
	39	my($s) = $_[0];
	40
	41	$s =~ s/fi(?![^<>]*>)/ﬁ/g if $ligatures;
	42
	43	if ($ascii)
	44	{
	45	$s =~ s/…/.../g;
	46	$s =~ s/’/'/g;
	47	$s =~ s/“/"/g;
	48	$s =~ s/”/"/g;
	49	$s =~ s/–/-/g;
	50	$s =~ s/†/*/g;
	51	$s =~ s/‡/**/g;
	52	$s =~ s/ / /g;
	53	$s =~ s/©/(c)/g;
	54	$s =~ s/<quote>/"/g;
	55	$s =~ s/<\/quote>/"/g;
	56	}
	57
	58	$s;
	59	}
	60
	61
	62	# The main program
	63
	64	$abstract = 0;
65	$ascii = 0;
66	$bookinfo = 0;
67	$inliteral = 0;
68	$ligatures = 0;
69	$madeindex = 0;
70	$noindex = 0;
71	$oneindex = 0;
72
73	foreach $arg (@ARGV)
74	{
75	if ($arg eq "-fi") { $ligatures = 1; }
76	elsif ($arg eq "-abstract") { $abstract = 1; }
77	elsif ($arg eq "-ascii") { $ascii = 1; }
78	elsif ($arg eq "-bookinfo") { $bookinfo = 1; }
79	elsif ($arg eq "-noindex") { $noindex = 1; }
80	elsif ($arg eq "-oneindex") { $oneindex = 1; }
81	else { die "** Pre-xml: Unknown option \"$arg\"\n"; }
82	}
83
84	while (<STDIN>)
85	{
86	# Remove <abstract> if required
87
88	next if ($abstract && /^\s*<abstract>/);
89
90	# Remove <bookinfo> if required
91
92	if ($bookinfo && /^<bookinfo/)
93	{
94	while (<STDIN>) { last if /^<\/bookinfo/; }
95	next;
96	}
97
98	# Copy monospaced literallayout blocks
99
100	if (/^<literallayout class="monospaced">/)
101	{
102	print;
103	while (<STDIN>)
104	{
105	print;
106	last if /^<\/literallayout>/;
107	}
108	next;
109	}
110
111	# Adjust index-generation code if required
112
113	if (($noindex \|\| $oneindex) && /^<index[\s>]/)
114	{
115	while (<STDIN>)
116	{
117	last if /^<\/index>/;
118	}
119
120	if ($oneindex && !$madeindex)
121	{
122	$madeindex = 1;
123	print "<index><title>Index</title></index>\n";
124	}
125
126	next;
127	}
128
129	# A line that is not in a monospaced literal block; keep track of which
130	# parts are in <literal> and which not. The latter get processed by the
131	# function above.
132
133	for (;;)
134	{
135	if ($inliteral)
136	{
137	if (/^(.?)<\/literal>(.)$/)
138	{
139	print $1;
140	print "\"" if $ascii;
141	print "</literal>";
142	$inliteral = 0;
143	$_ = "$2\n";
144	}
145	else
146	{
147	print;
148	last;
149	}
150	}
151
152	# Not in literal state
153
154	else
155	{
156	if (/^(.?)<literal>(.)$/)
157	{
158	print &process($1);
159	print "<literal>";
160	print "\"" if $ascii;
161	$inliteral = 1;
162	$_ = "$2\n";
163	}
164	else
165	{
166	print &process($_);
167	last;
168	}
169	}
170	} # Loop for different parts of one line
171	} # Loop for multiple lines
172
173	# End