Docs: fix typoes for dnssec_require_domains. Bug 1842
[exim.git] / doc / doc-scripts / fc2k
CommitLineData
495ae4b0 1#! /usr/bin/perl -w
495ae4b0
PH
2
3# Script to read the HTML table of contents for the Exim FAQ and create an
4# HTML KWIC index out of it.
5
6
7########################################################################
8# List of words to ignore - kept alphabetically for reference, but they
9# don't have to be in order.
10
11$ignore_list = "
12
8e26e4bf
PH
13a ability able about absence access according actual address addresses addressed
14affect affected after against aka all allow allowed allows along already also
15although always am amount an ancient and and/or annoying another any anybody
16anyone anything anywhere apparent apparently are aren't around arrange arrive
17arrives as at
495ae4b0
PH
18
19back bad based basically be because been behave behaviour being best between
8e26e4bf 20bob both box bug build builds built busy but by
495ae4b0
PH
21
22call called calls can can't cannot causes causing central certain code comes
23coming command commands complain complaining complains configure configured
24conjunction contact contain contains contained correct correctly could
25currently customer
26
27day days defined deliver delivers delivered delivery deliveries did do does
28doesn't doing don't down during
29
8e26e4bf
PH
30e-mail e-mails each easy either else email emails entirely entries entry
31especially etc even ever every example exim exim's experiencing
495ae4b0 32
8e26e4bf
PH
33far few file files find finds fine fix fixed fly following for form found from
34fully
495ae4b0 35
8e26e4bf 36generate generated get gets getting given gives giving go goes going got
495ae4b0
PH
37
38handle handles handled handling happen happens has have haven't having helpful
39him host hosts how however
40
41i i'd i'm i've if in indeed instead into is issue issues isn't it it's its
42
43jim just
44
45keep keeps know knows
46
47like line lines look looked looking lot
48
8e26e4bf
PH
49m machine machines machine's mail mails main make me mean means message messages
50might more much must my myself
495ae4b0
PH
51
52near need neither no nor not now
53
54occur of off often ok on one only or other our out over own
55
56part parts particular per place possibility possible present problem problems
57put puts
58
59quite
60
61raised rather really reason rid right round run runs
62
8e26e4bf
PH
63same say saying see seeing seem seems seen sees set setting she should simply
64sit so some somehow something sometimes stand state statement still strange such
65supposed system systems
495ae4b0 66
8e26e4bf
PH
67take takes tell than that the their them then there these they things think this
68those thought to try though to/for told too tried tries trying
495ae4b0
PH
69
70under until up use uses used using usually
71
72valid value values via
73
74want wanted wanting was way we we've well what what's when where whereabouts
75whenever whether which while who whose why will with within without wish won't
76wondered work worked working works would wrong
77
78xxx
79
80yet yyy
81
82";
83########################################################################
84
85
86# The regular expression fragment that defines the separator between words
87
88$wordgap = "(?:[]().?,;:\"']|(?><[^>]*>))*(?:\\s+|\$)(?:[[(\"'`]|(?><[^>]*>))*";
89
90
91########################################################################
92# Function to add to a length to accommodate HTML stuff
93
94sub setlen{
95my($len, $s) = @_;
96
97$len += length($1) while ($s =~ /(<\/?[a-z]+>)/ig);
98$len += 1 while ($s =~ /&#\d+;/g);
99
100return $len;
101}
102
103
104########################################################################
105# Function to write out the list of initials with references
106
107sub write_initials {
108my($this_initial) = "$_[0]";
109
110print OUT "<p>\n&nbsp;&nbsp;";
111
112foreach $initial (sort keys %initials)
113 {
114 if ($initial eq $this_initial)
115 {
116 print OUT "&nbsp;&nbsp;&nbsp;<font size=7 color=\"#FF0A0A\"><b>$initial</b></font>&nbsp;";
117 }
118 else
119 {
120 print OUT "<a href=\"FAQ-KWIC_$initial.html\">&nbsp;&nbsp;$initial</a>";
121 }
122 }
123
124print OUT "&nbsp;"x4 . "<a href=\"FAQ.html#TOC\">FAQ Contents</a>\n</p>\n";
125}
126
127
128
129########################################################################
130# The main program. We can pick out the contents lines because they lie
131# between <li> and </li> in the file, sometimes on more than one physical
132# line.
133
134# Turn the list of ignorable words into a hash for quick lookup. Add the
135# empty word to the list.
136
137@words = split /\s+/, $ignore_list;
138foreach $word (@words) { $ignore{$word} = 1; }
139$ignore{""} = 1;
140
141
142# Open the file and do the job
143
144open(IN, "html/FAQ.html") || die "Can't open html/FAQ.html\n";
145
146while (<IN>)
147 {
148 next unless /^<li>/;
149 $_ .= <IN> while !/<\/li>$/;
150 chomp;
151 s/\n\s*/ /g;
152
153 # Extract the operative text into $text, with the beginning in $pre.
154
155 my($pre,$text,$post) = /^<li>(.*<\/a>:(?:&nbsp;)*)(.*)<br><br><\/li>$/;
156
157 # Now split into words. As well as punctuation, there may be HTML thingies
158 # between words. Absorb them into the separators.
159
160 my(@words) = split /$wordgap/, $text;
161
162 # Lower case all the words, and remove those that we don't want.
163 # Then keep a list of all the used initials.
164
165 REMOVE_IGNORE:
166 for ($i = 0; $i < scalar @words; $i++)
167 {
168 my($word) = $words[$i] = "\L$words[$i]\E";
169
170 # Remove certain forms of word and those on the ignore list
171
172 if (defined $ignore{$word} || # word on ignore list
173 $word =~ /^-+$/ || # word consists entirely of hyphens
174 $word =~ /^-[^a-z]/ || # follows leading hyphen with non-letter
175 $word =~ /^[^a-z-]/ || # starts with a non-letter or hyphen
176 $word =~ /[@^.]/ # contains @ or ^ or .
177 )
178 {
179 splice(@words, $i, 1);
180 redo REMOVE_IGNORE if $i < scalar @words;
181 }
182
183 # Otherwise, build up a list of initials
184
185 else
186 {
187 my($inword) = $word;
188 $inword =~ s/^-//;
189 $initial = substr($inword, 0, 1);
190 $initials{"\U$initial\E"} = 1;
191 }
192 }
193
194 # Create the lines for the KWIC index, and store them in associative
195 # arrays, with the keyword as the key. That will get them sorted
196 # automatically.
197
198 while (scalar @words > 0)
199 {
200 my($word) = shift @words;
201 my($pretext, $casedword, $posttext) =
202 $text =~ /(.*?)(?<![a-z])(\Q$word\E)(?![a-z])(.*)/i;
203
204 # Remove a leading hyphen from $word so that it sorts according to
205 # the leading letter. What is actually output is $casedword, which
206 # retains the hyphen.
207
208 $word =~ s/^-//;
209
210 my($prelen) = length $pretext;
211 my($postlen) = length $posttext;
212
213 # We want to chop excessively long entries on either side. We can't set
214 # a fixed length because of the HTML control data. Call a function to
215 # add the given length to allow for HTML stuff. This is crude, but it
216 # does roughtly the right thing.
217
218 my($leftlen) = &setlen(70, $pretext);
219 my($rightlen) = &setlen(70, $posttext);
220
221 if ($prelen > $leftlen)
222 {
223 my($cutoff) = $leftlen;
224 $cutoff++
225 while ($cutoff < $prelen && substr($pretext, -$cutoff, 1) ne " ");
226 $pretext = "... " . substr($pretext, -$cutoff);
227 }
228
229 if ($postlen > $rightlen)
230 {
231 my($cutoff) = $rightlen;
232 $cutoff++
233 while ($cutoff < $postlen && substr($posttext, $cutoff, 1) ne " ");
234 $posttext = substr($posttext, 0, $cutoff) . "...";
235 }
236
237 # If the pre text has a font-ending not preceded by a font beginning
238 # (i.e. we've chopped the beginning off), we must insert a beginning.
239
240 while ($pretext =~ /^(.*?)<\/(small|tt|b|i)>/ && $1 !~ /<$2>/)
241 {
242 $pretext = "<$2>" . $pretext;
243 }
244
245 # If the pre text ends in a special font, we have to terminate that,
246 # and reset it at the start of the post text.
247
248 my($poststart) = "";
249
250 while ($pretext =~ /<(small|tt|b|i)>(?!.*?<\/\1>)/)
251 {
252 $pretext .= "</$1>";
253 $poststart .= "<$1>";
254 }
255
256 # If the post text changes font but doesn't close it, we must add
257 # the closure.
258
259 while ($posttext =~ /<(small|tt|b|i)>(?!.*?<\/\1>)/)
260 {
261 $posttext .= "</$1>";
262 }
263
264 # Remove any unnecessary changes in either of them
265
266 $pretext =~ s/<(small|tt|b|i)>\s*<\/\1>//g;
267 $posttext =~ s/<(small|tt|b|i)>\s*<\/\1>//g;
268
269 # Save the texts in associative arrays. Add the question number to
270 # the end of the word to make the key.
271
272 $pre =~ /(Q\d\d\d\d)/;
273 my($key) = "$word-$1";
274
275 $tableft{$key} = $pre . $pretext;
276 $tabright{$key} = $poststart .
277 "<font color=\"#FF0A0A\">$casedword</font>" . $posttext;
278 }
279 }
280
281close(IN);
282
283# Now write out the files. Each letter in the index goes in a different file
284
285$current_initial = "";
286
287foreach $key (sort keys %tableft)
288 {
289 my($initial) = $key =~ /^(.)/;
290 $initial = "\U$initial\E";
291
292 if ($initial ne $current_initial)
293 {
294 if ($current_initial ne "")
295 {
296 print OUT "</table>\n";
297 &write_initials($current_initial);
298 print OUT "</body>\n</html>\n";
299 close OUT;
300 }
301
302 open (OUT, ">html/FAQ-KWIC_$initial.html") ||
303 die "Can't open html/FAQ-KWIC_$initial.html\n";
304 print OUT
305 "<html>\n" .
306 "<head>\n" .
307 "<title>Exim FAQ: KWIC index section $initial</title>\n" .
308 "</head>\n" .
309 "<body bgcolor=\"#F8F8F8\" text=\"#00005A\" link=\"#0066FF\" alink=\"#0066FF\" vlink=\"#000099\">\n" .
310 "<h1>Exim FAQ: Keyword-in-context index</h1>\n";
311
312 write_initials($initial);
313
314 if ($initial eq "A")
315 {
316 print OUT <<End ;
317<p>
318This <i>Keyword-in-context</i> index for the Exim FAQ is generated
319automatically from the FAQ source. Browsers may not display the data very
320prettily, but it is hoped that it may provide a useful aid for finding things
321in the FAQ.
322</p>
323End
324 }
325
326 print OUT "<table border>\n";
327 $current_initial = $initial;
328 }
329
330 print OUT "<tr>\n";
331 print OUT "<td align=\"right\">$tableft{$key}</td>\n";
332 print OUT "<td align=\"left\">$tabright{$key}</td>\n";
333 print OUT "</tr>\n";
334 }
335
336# Close the final file
337
338if ($current_initial ne "")
339 {
340 print OUT "</table>\n";
341 &write_initials($current_initial);
342 print OUT "</body>\n</html>\n";
343 close OUT;
344 }
345
346# End