fix mbox archive link
[mharc.git] / bin / web-archive
CommitLineData
2ea8f66b
IK
1#!/usr/bin/perl
2##---------------------------------------------------------------------------##
3## File:
4## $Id: web-archive,v 1.44 2003/08/09 17:56:05 ehood Exp $
5## Description:
6## Updates/creates web archives from mailbox archives.
7## Run script with '-man' option to view manpage for this program.
8##---------------------------------------------------------------------------##
9## Copyright (C) 2001-2002 Earl Hood <earl@earlhood.com>
10##
11## This program is free software; you can redistribute it and/or modify
12## it under the terms of the GNU General Public License as published by
13## the Free Software Foundation; either version 2 of the License, or
14## (at your option) any later version.
15##
16## This program is distributed in the hope that it will be useful,
17## but WITHOUT ANY WARRANTY; without even the implied warranty of
18## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19## GNU General Public License for more details.
20##
21## You should have received a copy of the GNU General Public License
22## along with this program; if not, write to the Free Software
23## Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
24## 02111-1307, USA
25##---------------------------------------------------------------------------##
26
27package MHArc::web_archive;
28
29##--------------------------------------------------------------------------##
30# <x-boot-strap>
31BEGIN { die qq/CGI use FORBIDDEN!\n/ if (defined($ENV{'GATEWAY_INTERFACE'})); }
32my $Dir; BEGIN { $Dir = `dirname $0`; chomp $Dir; }
33use lib "$Dir/../lib"; # Add relative lib to search path
34# </x-boot-strap>
35##--------------------------------------------------------------------------##
36# <x-config>
37use MHArc::Config;
38my $config = MHArc::Config->load("$Dir/../lib/config.sh");
39# </x-config>
40##--------------------------------------------------------------------------##
41
42
43use Getopt::Long;
44use POSIX;
45use MHArc::ListDef;
46use MHArc::Util qw( usage );
47
48# Load MHonArc library
49require 'mhamain.pl';
50
51my $debug = 0;
52
53# Regular expression to match mail folder/mboxes
54my $folder_regex = '\d+(?:-\d+)?';
55
56MAIN: {
57 my %opt = ( );
58 my $clstatus = GetOptions(\%opt,
59 'alllistsurl=s', # Root to all lists URL.
60 'alllistsfile=s', # Pathname to all lists index page.
61 'debug|verbose', # Show what is going on in detail.
62 'editidx', # Edit archive pages; useful to apply MHonArc resource
63 # changes.
64 'editallidx', # Regen all lists index.
65 'editidxonly', # Edit archive index pages only.
66 'editrootidx', # Regen top index.
67 'home=s', # Pathname of home directory of archive account.
68 'htmldir=s', # Root directory for html archives.
69 'htmlurl=s', # Root URL for html archives.
70 'keepsearch!', # Keep search index on a rebuild.
71 'listsdef=s', # Pathname to list definition file.
72 'mboxdir=s', # Root directory for mbox archives.
73 'mboxurl=s', # Root URL for mbox archives.
74 'mharc=s', # MHonArc resource file for archives.
75 'mhamaxsize=i', # Maximum MHonArc archive size.
76 'mhapagesize=i', # Maximum MHonArc index page size.
77 'mknmz=s', # Pathname to Namazu make search index program.
78 'mknmzrc=s', # Pathname to Namazu configuration file.
79 'mknmztmpldir=s', # Pathname to Namazu template directory.
80 'mesgcgi=s', # Message CGI URL.
81 'mnavcgi=s', # Month navigation CGI URL.
82 'mtimeage=i', # Modify time age of a mailbox file to be considered
83 # for processing.
84 'nosearch', # Do not update search indexes.
85 'rebuild', # Rebuild archives from scratch.
86 'rooturl=s', # Root URL to archives.
87 'searchcgi=s', # Search CGI URL.
88
89 'man',
90 'help'
91 );
92 usage(0) unless $clstatus;
93 usage(1) if $opt{'help'};
94 usage(2) if $opt{'man'};
95
96 my $HOME = $opt{'home'} ||
97 $config->{'SW_ROOT'} ||
98 "$Dir/..";
99 my $ROOT_URL = $opt{'rooturl'} ||
100 $config->{'ROOT_URL'} ||
101 "/~mhonarc/archives";
102 my $LISTS_DEF_FILE = $opt{'listsdef'} ||
103 $config->{'LISTS_DEF_FILE'} ||
104 "$HOME/lib/lists.def";
105 my $HTML_DIR = $opt{'htmldir'} ||
106 $config->{'HTML_DIR'} ||
107 "$HOME/html";
108 my $HTML_URL = $opt{'htmlurl'} ||
109 $config->{'HTML_URL'} ||
110 "$ROOT_URL/html";
111 my $MBOX_DIR = $opt{'mboxdir'} ||
112 $config->{'MBOX_DIR'} ||
113 "$HOME/mbox";
114 my $MBOX_URL = $opt{'mboxurl'} ||
115 $config->{'MBOX_URL'} ||
116 "$ROOT_URL/mbox";
117 my $INFO_DIR = $opt{'infodir'} ||
118 $config->{'INFO_DIR'} ||
119 "$HOME/info";
120 my $INFO_URL = $opt{'infourl'} ||
121 $config->{'INFO_URL'} ||
122 "$ROOT_URL/info";
123 my $MHA_RC = $opt{'mharc'} ||
124 $config->{'MHA_RC'} ||
125 "$HOME/lib/common.mrc";
126 my $MHA_RC_DIR = $opt{'mharcdir'} ||
127 $config->{'MHA_RC_DIR'} ||
128 "$HOME/lib/mrc";
129 my $MHA_MAXSIZE = $opt{'mhamaxsize'} ||
130 $ENV{'WA_MAXSIZE'} ||
131 2000;
132 my $MHA_PAGESIZE = $opt{'mhapagesize'} ||
133 $ENV{'WA_PAGESIZE'} ||
134 200;
135 my $MTIME_AGE = $opt{'mtimeage'} ||
136 $ENV{'WA_MTIME_AGE'} ||
137 $config->{'MTIME_AGE'} ||
138 86400;
139 my $MKNMZ = $opt{'mknmz'} ||
140 $config->{'MKNMZ'} ||
141 '/usr/local/bin/mknmz';
142 my $MKNMZRC = $opt{'mknmzrc'} ||
143 $config->{'MKNMZ_RC'} ||
144 "$HOME/cgi-bin/mknmzrc";
145 my $MKNMZTMPLDIR = $opt{'mknmztmpldir'} ||
146 $config->{'MKNMZ_TMPL_DIR'} ||
147 "$HOME/cgi-bin/template",
148 my $ALL_LISTS_URL = $opt{'alllistsurl'} ||
149 $config->{'ALL_LISTS_URL'} ||
150 $HTML_URL;
151 my $MESG_CGI = $opt{'mesgcgi'} ||
152 $config->{'MESG_CGI'} ||
153 join('/', $ROOT_URL,'cgi-bin/mesg.cgi');
154 my $MNAV_CGI = $opt{'mnavcgi'} ||
155 $config->{'MNAV_CGI'} ||
156 join('/', $ROOT_URL,'cgi-bin/mnav.cgi');
157 my $SEARCH_CGI = $opt{'searchcgi'} ||
158 $config->{'SEARCH_CGI'} ||
159 join('/', $ROOT_URL,'cgi-bin/namazu.cgi');
160 my $EXTRACT_CGI = $opt{'extractchcgi'} ||
161 $config->{'EXTRACT_CGI'} ||
162 join('/', $ROOT_URL,'cgi-bin/extract-mesg.cgi');
163
164 my $rebuild = $opt{'rebuild'} ||
165 $ENV{'WA_REBUILD'} || 0;
166 my $keepsearch = $opt{'keepsearch'};
167 my $editidx = $opt{'editidx'} ||
168 $ENV{'WA_EDIT'} || 0;
169 my $editidxonly = $opt{'editidxonly'} || 0;
170 my $editrootidx = $opt{'editrootidx'};
171 my $editallidx = $opt{'editallidx'};
172 my $nosearch = $opt{'nosearch'} ||
173 $ENV{'WA_NOSEARCH'} || 0;
174 $debug = $opt{'debug'} ||
175 $ENV{'WA_DEBUG'};
176
177 my $all_index = $opt{'alllistsfile'} ||
178 $config->{'ALL_LISTS_FILE'} ||
179 join('/', $HTML_DIR, 'lists.html');
180 my $main_header = $config->{'MAIN_HEADER'} ||
181 join('/', $HTML_DIR, '.PNM.head');
182 my $main_footer = $config->{'MAIN_FOOTER'} ||
183 join('/', $HTML_DIR, '.PNM.foot');
184
185 my $time = time;
186 if ($rebuild) {
187 $editidx = 0;
188 $editrootidx = 0;
189 $editallidx = 0;
190 } else {
191 $keepsearch = 0;
192 }
193 $editidx = 1 if $editidxonly;
194 if ($editidx) {
195 $editrootidx = 0;
196 $editallidx = 0;
197 }
198
199 my $listdef = MHArc::ListDef->new($LISTS_DEF_FILE);
200 print "Loaded lists definitions.\n" if $debug;
201
202 if ($editallidx) {
203 # Just updating all-lists index
204 update_archive_index(
205 '-config' => $config,
206 '-listdef' => $listdef,
207 '-htmldir' => $HTML_DIR,
208 '-htmlurl' => $HTML_URL,
209 '-infodir' => $INFO_DIR,
210 '-infourl' => $INFO_URL,
211 '-allindex' => $all_index
212 );
213 last MAIN;
214 }
215
216 mhonarc::initialize();
217 print "MHonArc initialized.\n" if $debug;
218
219 local(*DIR, *INDEX, *FILE);
220
221 print "Reading $MBOX_DIR.\n" if $debug;
222 opendir(DIR, $MBOX_DIR) || die qq/Unable to open "$MBOX_DIR": $!/;
223 my @dirs = ();
224
225 # Get list of archives to process
226 if (@ARGV) {
227 # list of archives specified on the command-line
228 @dirs = @ARGV;
229 } else {
230 # read mbox dir to get list
231 @dirs = grep { (-d "$MBOX_DIR/$_") &&
232 ($_ ne '.') &&
233 ($_ ne '..')
234 } readdir(DIR);
235 closedir(DIR);
236 }
237
238 my(@months, @folders);
239 my($dir, $list, $mon, $mondir, $htmldir, $cvs, $title, $mtime,
240 $folder, $i, $yr, $prevdir, $nextdir, $prevmon, $nextmon,
241 $disable_search, $listname, $short_title);
242
243 print "Lists: ", join(', ', @dirs), "\n" if $debug;
244 foreach $list (@dirs) {
245 print "Processing $list ...\n" if $debug;
246
247 @folders = ();
248 $listname = $list;
249 $cvs = ($listname =~ s/\.CVS$//);
250
251 if (!$editidx && !$editrootidx) {
252 # Get list of input mailboxes to process
253
254 $dir = join('/', $MBOX_DIR, $list);
255 if (!opendir(DIR, $dir)) {
256 warn qq/Unable to open "$dir": $!/;
257 next;
258 }
259
260 # create .noraw file indicator if no-raw-link specified
261 my $no_raw_file = join('/', $dir, '.noraw');
262 my $no_raw_htaccess = join('/', $dir, '.htaccess');
263 if ($listdef->{$listname}{'no-raw-link'}[0]) {
264 if (! -e $no_raw_file) {
265 local(*NORAW);
266 if (!open(NORAW, ">$no_raw_file")) {
267 warn qq/Warning: Unable to create "$no_raw_file": $!\n/;
268 } else {
269 close(NORAW);
270 }
271 }
272 if (! -e $no_raw_htaccess) {
273 local(*HTACCESS);
274 if (!open(HTACCESS, ">$no_raw_htaccess")) {
275 warn qq/Warning: Unable to create "$no_raw_htaccess": $!\n/;
276 } else {
277 print HTACCESS 'Order allow,deny', "\n",
278 'Deny from all', "\n";
279 close(HTACCESS);
280 }
281 }
282 } elsif (-e $no_raw_file) {
283 if (!unlink($no_raw_file)) {
284 warn qq/Warning: Unable to remove "$no_raw_file": $!\n/;
285 }
286 }
287
288 @months = grep { /^$folder_regex(?:\.gz)?$/o } readdir(DIR);
289 closedir(DIR);
290 print "Mboxes: ", join(', ', @months), "\n" if $debug;
291
292 foreach $mon (@months) {
293 $mondir = join('/', $dir, $mon);
294 if ($rebuild) {
295 push(@folders, $mondir);
296 next;
297 }
298 $mtime = (stat($mondir))[9];
299 print "$mondir mtime: $mtime\n" if $debug;
300 if (($time - $mtime) < $MTIME_AGE) {
301 push(@folders, $mondir);
302 }
303 }
304
305 next if (!@folders);
306 print "Folders: ", join(', ', @folders), "\n" if $debug;
307
308 } elsif ($editidx) {
309 # Just editing pages so we get folder list from html directory
310 $dir = join('/', $HTML_DIR, $list);
311 if (!opendir(DIR, $dir)) {
312 warn qq/Unable to open "$dir": $!/;
313 next;
314 }
315 @months = grep { /^$folder_regex$/o } readdir(DIR);
316 closedir(DIR);
317
318 foreach $mon (@months) {
319 $mondir = join('/', $dir, $mon);
320 push(@folders, $mondir);
321 }
322 next if (!@folders);
323 print "Editidx Folders: ", join(', ', @folders), "\n" if $debug;
324 }
325 @folders = reverse sort @folders;
326
327 $htmldir = join('/', $HTML_DIR, $list);
328 if ($rebuild) {
329 clean_html_archive($htmldir, $keepsearch);
330 }
331 mkdir($htmldir, 0777);
332
333 $disable_search = ($list =~ /^\./) ||
334 ((defined($listdef->{$listname}{'no-search'}) &&
335 $listdef->{$listname}{'no-search'}[0]));
336
337 if (defined($listdef->{$listname}{'description'})) {
338 $title = join(' ', @{$listdef->{$listname}{'description'}});
339 } else {
340 $title = $listname;
341 }
342 $short_title = $listname;
343 if ($cvs) {
344 $title = '[CVS] '.$title;
345 $short_title = '[CVS] '.$short_title;
346 }
347
348 if (!$editrootidx) {
349 # define arguments to mhonarc
350 my @mhaargs = (
351 '-modtime',
352 '-lockmethod', 'flock',
353 #'-maxsize', $MHA_MAXSIZE,
354 #'-idxsize', $MHA_PAGESIZE,
355 '-rcfile', $MHA_RC,
356 #'-outdir' , $htmldir,
357 '-title', "$title (date)",
358 '-ttitle', "$title (thread)",
359 '-definevar', "LIST-TITLE='$short_title'",
360 '-definevar', "LIST-NAME='$list'",
361 '-definevar', "SEARCH-CGI=$SEARCH_CGI",
362 '-definevar', "PNAV-CGI=$MNAV_CGI",
363 '-definevar', "EXTRACT-CGI=$EXTRACT_CGI",
364 '-definevar', "MESG-CGI=$MESG_CGI",
365 '-definevar', "ALL-LISTS-URL=$ALL_LISTS_URL",
366
367 '-definevar', "MNAV-CGI=$MNAV_CGI", # backwards compatibility
368 );
369
370 if (defined($listdef->{$listname}{'lang'})) {
371 push(@mhaargs, '-lang', $listdef->{$listname}{'lang'}[0]);
372 }
373
374 if (-e "$MHA_RC_DIR/$list.mrc") {
375 push(@mhaargs, '-rcfile', "$MHA_RC_DIR/$list.mrc");
376 }
377 if ($cvs) {
378 push(@mhaargs, '-nothread');
379 push(@mhaargs, '-definevar', "THREAD-IDX-LINK=''");
380 } else {
381 push(@mhaargs, '-thread');
382 }
383 if ($list =~ /^\./) {
384 push(@mhaargs,
385 '-nothread',
386 '-definevar', "SEARCH-FORM=''");
387 push(@mhaargs, '-definevar', "THREAD-IDX-LINK=''");
388 }
389 if ($editidx) {
390 push(@mhaargs, '-editidx');
391 push(@mhaargs, '-nomsgpgs') if $editidxonly;
392 }
393 if (defined($config->{'MSG_DATE_FIELDS'})) {
394 push(@mhaargs, '-datefields', $config->{'DATE_FIELDS'});
395 }
396 if (!$debug && !$rebuild) {
397 push(@mhaargs, '-quiet');
398 }
399 if (!$rebuild && !$editidx) {
400 push(@mhaargs, '-add');
401 }
402 if ($listdef->{$listname}{'check-no-archive'}) {
403 push(@mhaargs, '-checknoarchive');
404 }
405
406 # add any custom options specified in definition file
407 if (defined($listdef->{$listname}{'mhonarc-options'})) {
408 require 'shellwords.pl';
409 push(@mhaargs,
410 shellwords(join(' ', @{$listdef->{$listname}{'mhonarc-options'}})));
411 }
412
413 # if searching is disabled, zero-out $SEARCH-FORM$
414 if ($disable_search) {
415 push(@mhaargs, '-definevar', "SEARCH-FORM=''");
416 }
417
418 my(@fmhaargs);
419 @months = ( );
420 foreach $folder (@folders) {
421 ($mon = $folder) =~ s/\.gz$//;
422 $mon =~ s/^.*\///;
423 push(@months, $mon);
424 }
425
426 my $cur_msg_cnt;
427 for ($i=0; $i < @folders; ++$i) {
428 $folder = $folders[$i];
429 $mon = $months[$i];
430 $mondir = join('/', $htmldir, $mon);
431
432 # make sure directory exists
433 mkdir($mondir, 0777);
434
435 # set final arguments to mhonarc
436 @fmhaargs = (
437 @mhaargs,
438 '-outdir', $mondir,
439 '-definevar', "CUR-PERIOD='$mon'",
440
441 '-definevar', "CUR-MONTH='$mon'", # backwards compatibility
442 );
443 push(@fmhaargs, $folder) unless $editidx;
444
445 # call mhonarc
446 print "Processing archive $mondir...\n" if $debug;
447 print "\tmhonarc options: ", join(' ', @fmhaargs), "\n" if $debug;
448 if (!mhonarc::open_archive(@fmhaargs)) {
449 warn qq/Warning: Unable to open "$mondir" archive: /,
450 qq/($mhonarc::CODE) $mhonarc::ERROR\n/;
451 next;
452 }
453 $mhonarc::CBRcVarExpand = \&mha_rcvar_expand;
454 $cur_msg_cnt = $mhonarc::NumOfMsgs || 0;
455 mhonarc::process_input();
456 if ($mhonarc::CODE != 0) {
457 warn qq/Warning: Problem processing "$mondir": /,
458 qq/($mhonarc::CODE) $mhonarc::ERROR\n/;
459 next;
460 }
461 if ($cur_msg_cnt == $mhonarc::NumOfMsgs) {
462 print "Skipping search index, no new messages in archive\n"
463 if $debug;
464 next;
465 }
466
467 # update search index
468 # The -Y option is used so we do not have to process all months
469 # to update index.
470 if (!$keepsearch && !$nosearch && !$disable_search) {
471 my @nmzargs = (
472 $MKNMZ,
473 '--mhonarc', # only do mhonarc pages
474 '-f', $MKNMZRC, # specify resource file
475 '-T', $MKNMZTMPLDIR, # specify template directory
476 '-O', $htmldir, # specify location to place index
477 '-Y' # do not delete existing files
478 );
479 if (!$debug && !$rebuild) {
480 push(@nmzargs, '--quiet');
481 }
482 push(@nmzargs, $mondir);
483 print "Search Index Command: ", join(" ", @nmzargs), "\n" if $debug;
484
485 if (system(@nmzargs)) {
486 warn qq/Warning: Non-zero exit status returned from /,
487 qq/"@nmzargs": $?\n/;
488 }
489 namazu_cleanup($htmldir);
490 }
491 }
492 }
493
494 ## Update monthly index
495 if (!opendir(DIR, $htmldir)) {
496 warn qq/Warning: Unable to open $htmldir for reading: $!\n/;
497 next;
498 }
499 @months = reverse sort grep { /^$folder_regex/o } readdir(DIR);
500 print "Month listing for main index: @months\n" if $debug;
501 closedir(DIR);
502 my $indexhtml = join('/', $htmldir, 'index.html');
503 if (!open(INDEX, ">$indexhtml.tmp")) {
504 warn qq/Warning: Unable to open $htmldir for reading: $!\n/;
505 next;
506 }
507
508 my @vars = (
509 '-nosearch' => $disable_search,
510 'SEARCH-CGI' => $SEARCH_CGI,
511 'LIST-TITLE' => $short_title,
512 'LIST-NAME' => $list,
513 'LIST-DESC' => $title,
514 );
515 print_template(\*INDEX, $main_header, @vars);
516 print INDEX "<ul>\n";
517 foreach $mon (@months) {
518 print INDEX qq|<li><b>$mon</b>:|;
519 print INDEX qq|&nbsp;&nbsp;<a href="$mon/index.html">[Date]</a>|
520 if (-e join('/', $htmldir, $mon, 'index.html'));
521 print INDEX qq|&nbsp;&nbsp;<a href="$mon/threads.html">[Thread]</a>|
522 if (-e join('/', $htmldir, $mon, 'threads.html'));
523
524 if (!$listdef->{$listname}{'no-raw-link'}[0]) {
525 my $raw_label = '[Raw: ]';
526 my $compressed = 0;
527 my $mbox_file = join('/', $MBOX_DIR, $list, $mon);
528 my $mbox_url = join('/', $MBOX_URL, $list, $mon);
529 if (! -e $mbox_file) {
530 $mbox_file .= ".gz";
531 $mbox_url .= ".gz";
532 $compressed = 1;
533 }
534# if (-e $mbox_file) {
535# print INDEX qq|&nbsp;&nbsp;<a href="$mbox_url">[mbox: |,
536# (-s _), qq| bytes|;
537# print INDEX qq|, gzipped| if $compressed;
538# print INDEX qq|]</a>|;
539# }
540 }
541 print INDEX qq|</li>\n|;
542 }
543 print INDEX "</ul>\n";
544 print_template(\*INDEX, $main_footer, @vars);
545 close(INDEX);
546 if (!rename("$indexhtml.tmp", $indexhtml)) {
547 warn qq|Warning: Unable to rename "$indexhtml.tmp" to |,
548 qq|"$indexhtml": $!\n|;
549 }
550 }
551
552 update_archive_index(
553 '-config' => $config,
554 '-listdef' => $listdef,
555 '-htmldir' => $HTML_DIR,
556 '-htmlurl' => $HTML_URL,
557 '-infodir' => $INFO_DIR,
558 '-infourl' => $INFO_URL,
559 '-allindex' => $all_index
560 );
561
562} # End: MAIN
563
564############################################################################
565
566sub entify {
567 my $str = shift;
568 $str =~ s/\&/\&amp;/;
569 $str =~ s/</\&lt;/;
570 $str =~ s/>/\&gt;/;
571 $str;
572}
573
574sub get_periods {
575 my $dir = shift;
576
577 local(*DIR);
578 if (!opendir(DIR, $dir)) {
579 warn qq/Warning: Unable to open "$dir": $!/;
580 return ( );
581 }
582 my @months = reverse sort grep { /^$folder_regex$/o } readdir(DIR);
583 closedir(DIR);
584 @months;
585}
586
587sub read_template {
588 my $fh = shift;
589 my %varhash = @_;
590 my $data = "";
591
592 if ($varhash{'-nosearch'}) {
593 local $_;
594 my $ignore = 0;
595 while (<$fh>) {
596 if ($ignore) {
597 $ignore = 0 if /<!--\/x-search-form-->/;
598 next;
599 }
600 if (/<!--x-search-form-->/) {
601 $ignore = 1;
602 next;
603 }
604 s/\$([^\$]+)\$/$varhash{$1}/ge;
605 $data .= $_;
606 }
607 } else {
608 local $/;
609 $data = <$fh>;
610 $data =~ s/\$([^\$]+)\$/$varhash{$1}/ge;
611 }
612 $data;
613}
614
615sub print_template {
616 my $fhout = shift;
617 my $file = shift;
618 if (-e $file) {
619 print "Reading template file $file\n" if $debug;
620 local(*FILE);
621 if (open(FILE, $file)) {
622 print $fhout read_template(\*FILE, @_);
623 close(FILE);
624 } else {
625 warn qq/Warning: Unable to open "$file": $!\n/;
626 }
627 }
628}
629
630sub namazu_cleanup {
631 my $dir = shift;
632 my $lock = join('/', $dir, 'NMZ.lock2');
633 local(*LOCK);
634 if (!open(LOCK, $lock)) {
635 # no lock file left around, so everything should be okay
636 return;
637 }
638 my $pid = <LOCK>;
639 close(LOCK);
640 if (!kill(0, $pid)) {
641 warn qq/Warning: Stale "$lock", removing it\n/;
642 if (!unlink($lock)) {
643 warn qq/Warning: Unable to remove "$lock": $!\n/;
644 }
645 }
646}
647
648sub format_date {
649 my $time = shift;
650 my $fmt = shift || '%Y-%m-%d %H:%M:%S';
651 #my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime($time);
652 #$year += 1900; ++$mon;
653 #sprintf("%d-%02d-%02d %02d:%02d:%02d", $year,$mon,$mday,$hour,$min,$sec);
654 POSIX::strftime($fmt, localtime($time));
655}
656
657## Retrieve the last time an archive was updated.
658# We scan the message pages since their mtime should be set to the
659# date of the message.
660#
661sub retrieve_last_update {
662 my $archive = shift;
663
664 local(*DIR);
665 if (!opendir(DIR, $archive)) {
666 warn qq/Warning: Unable to open "$archive" for reading: $!\n/;
667 return undef;
668 }
669
670 my $latest = 0;
671 my $mtime = 0;
672 my $file;
673 foreach $file (readdir(DIR)) {
674 next unless $file =~ /^msg\d+\.html/;
675 $mtime = (stat(join('/', $archive, $file)))[9];
676 $latest = $mtime if ($mtime > $latest);
677 }
678 close(DIR);
679
680 if ($latest == 0) {
681 # No luck with message pages, so try database file
682 warn qq/Warning: Unable to determine last update from message pages /,
683 qq/for "$archive"\n/;
684 if (-e join('/', $archive, '.mhonarc.db')) {
685 $latest = ((stat(_))[9]);
686 } elsif (-e join('/', $archive, 'mhonarc.db')) {
687 $latest = ((stat(_))[9]);
688 }
689 }
690 if ($latest == 0) {
691 # No luck with data, so use directory mtime
692 $latest = ((stat($archive))[9]);
693 }
694
695 $latest;
696}
697
698## Remove HTML archive
699#
700sub clean_html_archive {
701 my $dir = shift; # Directory of archive
702 my $ks = shift; # Flag is search index files should be preserved
703 if (!$ks) {
704 # delete everything
705 print "Removing $htmldir\n" if $debug;
706 system('/bin/rm', '-rf', $dir);
707 return;
708 }
709
710 # keep search index, so must delete each period sub-directory
711 local(*DIR);
712 opendir(DIR, $dir) ||
713 die qq/ERROR: Unable to open "$dir" for reading: $!\n/;
714 my @subdirs = map { join('/',$dir,$_) }
715 grep { /^$folder_regex$/o } readdir(DIR);
716 closedir(DIR);
717 my $subdir;
718 foreach $subdir (@subdirs) {
719 print "Removing $subdir\n" if $debug;
720 system('/bin/rm', '-rf', $subdir);
721 }
722}
723
724## Retrieve the list info URL.
725#
726sub get_info_url {
727 my %opts = @_;
728 my $pathname = join('/', $opts{'-dir'}, $opts{'-name'}) . '.html';
729 if (! -e $pathname) {
730 return undef;
731 }
732 join('/', $opts{'-baseurl'}, $opts{'-name'}) . '.html';
733}
734
735## Generats the all-lists index.
736#
737sub update_archive_index {
738 print "Generating root archive index...\n" if $debug;
739
740 my %opts = @_;
741 my $config = $opts{'-config'};
742 my $listdef = $opts{'-listdef'};
743 my $html_dir = $opts{'-htmldir'};
744 my $html_url = $opts{'-htmlurl'};
745 my $info_dir = $opts{'-infodir'};
746 my $info_url = $opts{'-infourl'};
747 my $index_html = $opts{'-allindex'};
748
749 my $header = $opts{'-header'} ||
750 $config->{'ALL_LISTS_HEADER'} ||
751 join('/', $html_dir, '.PNM.all-head');
752 my $footer = $opts{'-footer'} ||
753 $config->{'ALL_LISTS_FOOTER'} ||
754 join('/', $html_dir, '.PNM.all-foot');
755
756 my $label_name = $opts{'-label-name'} ||
757 $config->{'ALL_LISTS_LABEL_NAME'} ||
758 'Name';
759 my $label_indexes
760 = $opts{'-label-indexes'} ||
761 $config->{'ALL_LISTS_LABEL_INDEXES'} ||
762 'Current&nbsp;Index';
763 my $label_last = $opts{'-label-last-updated'} ||
764 $config->{'ALL_LISTS_LABEL_LAST_UPDATED'} ||
765 'Last&nbsp;Updated';
766 my $label_info = $opts{'-label-info'} ||
767 $config->{'ALL_LISTS_LABEL_INFO'} ||
768 '[info]';
769 my $label_date = $opts{'-label-date'} ||
770 $config->{'ALL_LISTS_LABEL_DATE'} ||
771 '[Date]';
772 my $label_threads
773 = $opts{'-label-threads'} ||
774 $config->{'ALL_LISTS_LABEL_THREADS'} ||
775 '[Threads]';
776 my $time_fmt = $opts{'-time-fmt'} ||
777 $config->{'ALL_LISTS_DATE_FORMAT'} ||
778 '%Y-%m-%d %H:%M:%S';
779
780 local(*IDX);
781 my $tmp_index = $index_html . ".tmp";
782 if (!open(IDX, ">$tmp_index")) {
783 warn qq/Warning: Unable to create "$tmp_index": $!\n/;
784 return;
785 }
786
787 my %updated = ( );
788 my($list, $listname, $last_updated, $dir, $latest, $info);
789
790 foreach $listname (keys %$listdef) {
791 next if $listname =~ /^\./; # skip hidden archives
792 next if $listdef->{'hide-from-all-lists'}[0];
793
794 foreach $list ($listname, "$listname.CVS") {
795 $dir = join('/', $html_dir, $list);
796 next unless -e $dir;
797
798 print "Computing last update for $list...\n" if $debug;
799 my @months = get_periods($dir);
800 next unless @months;
801 $latest = $months[0];
802 $last_updated = retrieve_last_update(join('/', $dir, $latest));
803 if (!defined($last_updated)) {
804 print "Unable to compute last update for $list.\n" if $debug;
805 next;
806 }
807 $updated{$list} = [ $last_updated, $list, $listname, $dir, $latest ];
808 }
809 }
810
811 print_template(\*IDX, $header);
812 print IDX qq|<table class="archiveLists" cellpadding="3" cellspacing="1">\n|,
813 qq|<tr class="listsHeaderRow" valign="baseline" align="left">\n|,
814 qq|<th>$label_name</th>|,
815 qq|<th>$label_indexes</th>|,
816 qq|<th>$label_last</th>|,
817 qq|\n</tr>\n|;
818
819 my($time);
820 foreach $list (sort { $updated{$b}->[0] <=> $updated{$a}->[0] }
821 keys(%updated)) {
822 ($time, $list, $listname, $dir, $latest) = @{$updated{$list}};
823
824 print "Printing listing for $list\n" if $debug;
825 $last_updated = format_date($time, $time_fmt);
826 $last_updated =~ s/ /\&nbsp;/g;
827 my $short_title = entify($listdef->{$listname}{'all-lists-name'}[0] ||
828 $listname);
829 my $description = entify($listdef->{$listname}{'description'}[0] ||
830 $listname);
831 if ($list =~ /\.CVS$/) {
832 $short_title .= "&nbsp;(CVS)";
833 $description .= " (CVS commits)";
834 }
835 $info = get_info_url(
836 '-name' => $listname,
837 '-dir' => $info_dir,
838 '-baseurl' => $info_url
839 );
840
841 print IDX qq|<tr valign="baseline">\n|;
842
843 print IDX qq|<td>&nbsp;<span class="listName">|,
844 qq|<a href="$html_url/$list/">$short_title</a></span>&nbsp;|;
845 print IDX qq|<a class="infoLink" href="$info">$label_info</a>&nbsp;|
846 if defined($info);
847 print IDX qq|</td>\n|;
848
849
850 print IDX qq|<td>|;
851 print IDX qq|&nbsp;<a href="$html_url/$list/$latest/index.html">$label_date</a>&nbsp;|
852 if (-e join('/', $dir, $latest, 'index.html'));
853 print IDX qq|&nbsp;<a href="$html_url/$list/$latest/threads.html">$label_threads</a>&nbsp;|
854 if (-e join('/', $dir, $latest, 'threads.html'));
855 print IDX qq|</td>\n|;
856
857 print IDX qq|<td>&nbsp;<tt>|, $last_updated, qq|</tt>&nbsp;</td>\n|;
858 }
859
860 print IDX qq|</table>\n|;
861 print_template(\*IDX, $footer);
862 close(IDX);
863
864 if (!rename($tmp_index, $index_html)) {
865 warn qq/Warning: Unable to rename "$tmp_index" to "$index_html": $!\n/;
866 }
867}
868
869sub mha_rcvar_expand {
870 my $mha_index = shift;
871 my $var_name = shift;
872 my $arg = shift;
873
874 my $val = undef;
875 if ($var_name eq 'NMZ-SUBJECT-QUERY') {
876 my($lref, $key, $pos) =
877 mhonarc::compute_msg_pos($mha_index, $var_name, $arg);
878 return undef unless defined($key);
879
880 my $clipped = 0;
881 $val = mhonarc::get_base_subject($key);
882 if (length($val) > 128) {
883 $val = substr($val, 0, 128);
884 $clipped = 1;
885 }
886 $val = "\Q$val\E";
887 $val =~ s/(?:\\\s)+/\\s+/g; # \Q will escape whitespace
888 my $repl_re = $mhonarc::SubReplyRxp;
889 my $query = "+subject:/^(?:$repl_re)*$val";
890 $query .= '\s*$' unless $clipped;
891 $query .= '/';
892 return ($query, 0, 0);
893 }
894
895 return undef;
896}
897
898
899############################################################################
900__END__
901
902=head1 NAME
903
904web-archive - Update/create MHonArc archives from mailbox archives.
905
906=head1 SYNOPSIS
907
908 web-archive
909 web-archive [options]
910 web-archive [options] [list-name ...]
911
912=head1 DESCRIPTION
913
914This program is part of mharc and has the responsibility of processing
915the mailbox archives created by the L<filter-spool|filter-spool> script to
916update and/or create MHonArc archives.
917
918This program is automatically called by the L<read-mail|read-mail> script for
919processing incoming mail within the mail spool if L<filter-spool|filter-spool>
920returns with an okay status. However, this program can be manually
921invoked to rebuild archives, edit existing archives, or other
922administrative tasks. Since there may be a need to do selective archive
923processing, any non-option related argument is treated as mailing
924list archive name to process.
925
926=head1 OPTIONS
927
928=over
929
930=item C<-alllistidx> I<pathname>
931
932Pathname of file to generate the all lists index.
933If not specified, the value of the C<ALL_LISTS_FILE> variable in
934C<config.sh> is used, else it defaults to "C<I<-htmldir>/lists.html>".
935
936=item C<-alllistsurl> I<url>
937
938URL to page containing list of all mailing lists archived.
939If not specified, the value of the C<ALL_LISTS_URL> variable in
940C<config.sh> is used, else it defaults to C<-htmlurl>.
941
942=item C<-editidx>
943
944Edit archive pages, useful to apply MHonArc resource
945changes.
946
947=item C<-editrootidx>
948
949Only regenerate root index pages for archives. This is useful if
950you make changes to the C<.PNM.head> or C<.PNM.foot> files that you
951want immediately applied.
952
953=item C<-help>
954
955Print out usage information.
956
957=item C<-home> I<pathname>
958
959Root pathname of archiving software and data.
960If not specified, the parent directory that contains this program
961is used.
962
963=item C<-htmldir> I<pathname>
964
965Root directory for html archives.
966If not specified, "C<I<-home>/html>" is used.
967
968=item C<-htmlurl> I<url>
969
970URL root to HTML archives.
971If not specified, defaults to C<I<rooturl>/html>.
972
973=item C<-infodir> I<pathname>
974
975Pathname of directory containing informational pages for each list
976archive. Information for a list archive can be provided by creating a
977file called "C<I<list-name>.html>". Once created, a link to the file
978(based on the value of the C<-infourl> option) will be generated in
979the all-lists index to it.
980
981If this option is not specified, the value of the C<INFO_DIR> variable
982in C<config.sh> is used, else it defaults to "C<I<-home>/info>".
983
984=item C<-infourl> I<url>
985
986Base URL containing informational pages for each list archive.
987If not specified, the value of the C<INFO_URL> variable in
988C<config.sh> is used, else it defaults to "C<I<-rooturl>/info>".
989
990=item C<-keepsearch>
991
992Preserve search index if C<-rebuild> is specified. This option
993is handy if all that is desired is to rebuild the HTML archives
994from the raw data since the overhead of rebuilding the search indexes
995will be avoided.
996
997B<CAUTION:> Do not use C<-keepsearch> if you have removed messages
998from the raw mail archives since resulting HTML message pages may
999have different URIs than what is stored within the search index.
1000
1001=item C<-listsdef> I<pathname>
1002
1003Pathname to mailing lists definition file.
1004If not specified, "C<I<-home>/lib/lists.def>" is used.
1005
1006=item C<-man>
1007
1008Print out entire manpage.
1009
1010=item C<-mboxdir> I<pathname>
1011
1012Root directory for mbox archives.
1013If not specified, "C<I<-home>/mbox>" is used.
1014
1015=item C<-mharc> I<pathname>
1016
1017MHonArc resource file for archives.
1018If not specified, "C<I<-home>/lib/common.mrc>" is used.
1019
1020=item C<-mharcdir> I<pathname>
1021
1022Directory containing list-specifc MHonArc resource files. A given
1023list archive can have additional resource settings by creating a
1024file called C<I<list-name>.mrc> within the directory specified by
1025C<-mharcdir>.
1026
1027If C<-mharcdir> is not specified, "C<I<-home>/lib/mrc>" is used.
1028
1029=item C<-mhamaxsize> I<number>
1030
1031Maximum MHonArc archive size.
1032If not specified the value of the C<WA_MAXSIZE> environment variable is used.
1033
1034=item C<-mhapagesize> I<number>
1035
1036Maximum MHonArc index page size.
1037If not specified the value of the C<WA_PAGESIZE> environment variable is used.
1038
1039=item C<-mknmz> I<pathname>
1040
1041Pathname to Namazu make search index program.
1042If not specified, "C</usr/local/bin/mknmz>" is used.
1043
1044=item C<-mknmzrc> I<pathname>
1045
1046Pathname to Namazu configuration file.
1047If not specified, "C<I<-home>/cgi-bin/mknmzrc>" is used.
1048
1049=item C<-mknmztmpldir> I<pathname>
1050
1051Pathname to Namazu template directory.
1052If not specified, "C<I<-home>/cgi-bin/template>" is used.
1053
1054=item C<-mnavcgi> I<url>
1055
1056URL to monthly navigation cgi program.
1057If not specified, C<I<rooturl>/cgi-bin/mnav.cgi> is used.
1058
1059=item C<-mtimeage> I<seconds>
1060
1061Modify time age of a mailbox file to be considered for processing. If
1062not specified the value of the C<WA_MTIME_AGE> environment variable is
1063used.
1064
1065=item C<-nosearch>
1066
1067Do not update search indexes.
1068
1069=item C<-rebuild>
1070
1071Rebuild archives from scratch.
1072
1073=item C<-rooturl> I<url>
1074
1075URL root of archives.
1076If not specified, C</~mhonarc/archives> is used.
1077
1078=item C<-searchcgi> I<url>
1079
1080URL to search cgi program.
1081If not specified, C<I<rooturl>/cgi-bin/namazu.cgi> is used.
1082
1083=item C<-verbose>
1084
1085Show what is going on in detail.
1086
1087=back
1088
1089=head1 ENVIRONMENT
1090
1091Environment variable usage is deprecated.
1092
1093The following environment variables are recognized:
1094
1095=over
1096
1097=item C<WA_DEBUG>
1098
1099If set to a true value, detailed information of progress will be
1100printed to stdout. Debugging can also be enabled by the
1101C<-debug> command-line option.
1102
1103=item C<WA_EDIT>
1104
1105If set to a true value, archives will be editted. It is probably
1106better to use the C<-editidx> command-line option instead if archives
1107editing is desired.
1108
1109=item C<WA_MAXSIZE>
1110
1111Maximum MHonArc archive size. The default value is 2000. This setting
1112can be overridden by the C<-mhamaxsize> command-line option.
1113
1114=item C<WA_MTIME_AGE>
1115
1116The modification age, in seconds, for a mailbox to be considered
1117for processing. The default value is C<86400> (one day).
1118This setting can be overridden by the C<-mtimeage> command-line option.
1119
1120=item C<WA_NOSEARCH>
1121
1122If set to a true value, the Namazu search indexes will not be updated
1123for archives processed. Disabling of search index updates can also be
1124disabled by the C<-nosearch> command-line option.
1125
1126=item C<WA_PAGESIZE>
1127
1128MHonArc index page size. The default value is 200. This setting
1129can be overridden by the C<-mhapagesize> command-line option.
1130
1131=item C<WA_REBUILD>
1132
1133If set to a true value, archives will be rebuilt. It is probably
1134better to use the C<-rebuild> command-line option instead if rebuilding
1135is desired.
1136
1137=back
1138
1139=head1 VERSION
1140
1141$Id: web-archive,v 1.44 2003/08/09 17:56:05 ehood Exp $
1142
1143=head1 AUTHOR
1144
1145Earl Hood, earl@earlhood.com
1146
1147This program is part of the mharc archiving system and comes with
1148ABSOLUTELY NO WARRANTY and may be copied only under the terms of
1149the GNU General Public License, which may be found in the mharc
1150distribution.
1151
1152=cut
1153