2 ##---------------------------------------------------------------------------##
4 ## $Id: web-archive,v 1.44 2003/08/09 17:56:05 ehood Exp $
6 ## Updates/creates web archives from mailbox archives.
7 ## Run script with '-man' option to view manpage for this program.
8 ##---------------------------------------------------------------------------##
9 ## Copyright (C) 2001-2002 Earl Hood <earl@earlhood.com>
11 ## This program is free software; you can redistribute it and/or modify
12 ## it under the terms of the GNU General Public License as published by
13 ## the Free Software Foundation; either version 2 of the License, or
14 ## (at your option) any later version.
16 ## This program is distributed in the hope that it will be useful,
17 ## but WITHOUT ANY WARRANTY; without even the implied warranty of
18 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 ## GNU General Public License for more details.
21 ## You should have received a copy of the GNU General Public License
22 ## along with this program; if not, write to the Free Software
23 ## Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
25 ##---------------------------------------------------------------------------##
27 package MHArc
::web_archive
;
29 ##--------------------------------------------------------------------------##
31 BEGIN { die qq/CGI use FORBIDDEN!\n/ if (defined($ENV{'GATEWAY_INTERFACE'})); }
32 my $Dir; BEGIN { $Dir = `dirname $0`; chomp $Dir; }
33 use lib
"$Dir/../lib"; # Add relative lib to search path
35 ##--------------------------------------------------------------------------##
38 my $config = MHArc
::Config
->load("$Dir/../lib/config.sh");
40 ##--------------------------------------------------------------------------##
46 use MHArc
::Util
qw( usage );
48 # Load MHonArc library
53 # Regular expression to match mail folder/mboxes
54 my $folder_regex = '\d+(?:-\d+)?';
58 my $clstatus = GetOptions
(\
%opt,
59 'alllistsurl=s', # Root to all lists URL.
60 'alllistsfile=s', # Pathname to all lists index page.
61 'debug|verbose', # Show what is going on in detail.
62 'editidx', # Edit archive pages; useful to apply MHonArc resource
64 'editallidx', # Regen all lists index.
65 'editidxonly', # Edit archive index pages only.
66 'editrootidx', # Regen top index.
67 'home=s', # Pathname of home directory of archive account.
68 'htmldir=s', # Root directory for html archives.
69 'htmlurl=s', # Root URL for html archives.
70 'keepsearch!', # Keep search index on a rebuild.
71 'listsdef=s', # Pathname to list definition file.
72 'mboxdir=s', # Root directory for mbox archives.
73 'mboxurl=s', # Root URL for mbox archives.
74 'mharc=s', # MHonArc resource file for archives.
75 'mhamaxsize=i', # Maximum MHonArc archive size.
76 'mhapagesize=i', # Maximum MHonArc index page size.
77 'mknmz=s', # Pathname to Namazu make search index program.
78 'mknmzrc=s', # Pathname to Namazu configuration file.
79 'mknmztmpldir=s', # Pathname to Namazu template directory.
80 'mesgcgi=s', # Message CGI URL.
81 'mnavcgi=s', # Month navigation CGI URL.
82 'mtimeage=i', # Modify time age of a mailbox file to be considered
84 'nosearch', # Do not update search indexes.
85 'rebuild', # Rebuild archives from scratch.
86 'rooturl=s', # Root URL to archives.
87 'searchcgi=s', # Search CGI URL.
92 usage
(0) unless $clstatus;
93 usage
(1) if $opt{'help'};
94 usage
(2) if $opt{'man'};
96 my $HOME = $opt{'home'} ||
97 $config->{'SW_ROOT'} ||
99 my $ROOT_URL = $opt{'rooturl'} ||
100 $config->{'ROOT_URL'} ||
101 "/~mhonarc/archives";
102 my $LISTS_DEF_FILE = $opt{'listsdef'} ||
103 $config->{'LISTS_DEF_FILE'} ||
104 "$HOME/lib/lists.def";
105 my $HTML_DIR = $opt{'htmldir'} ||
106 $config->{'HTML_DIR'} ||
108 my $HTML_URL = $opt{'htmlurl'} ||
109 $config->{'HTML_URL'} ||
111 my $MBOX_DIR = $opt{'mboxdir'} ||
112 $config->{'MBOX_DIR'} ||
114 my $MBOX_URL = $opt{'mboxurl'} ||
115 $config->{'MBOX_URL'} ||
117 my $INFO_DIR = $opt{'infodir'} ||
118 $config->{'INFO_DIR'} ||
120 my $INFO_URL = $opt{'infourl'} ||
121 $config->{'INFO_URL'} ||
123 my $MHA_RC = $opt{'mharc'} ||
124 $config->{'MHA_RC'} ||
125 "$HOME/lib/common.mrc";
126 my $MHA_RC_DIR = $opt{'mharcdir'} ||
127 $config->{'MHA_RC_DIR'} ||
129 my $MHA_MAXSIZE = $opt{'mhamaxsize'} ||
130 $ENV{'WA_MAXSIZE'} ||
132 my $MHA_PAGESIZE = $opt{'mhapagesize'} ||
133 $ENV{'WA_PAGESIZE'} ||
135 my $MTIME_AGE = $opt{'mtimeage'} ||
136 $ENV{'WA_MTIME_AGE'} ||
137 $config->{'MTIME_AGE'} ||
139 my $MKNMZ = $opt{'mknmz'} ||
140 $config->{'MKNMZ'} ||
141 '/usr/local/bin/mknmz';
142 my $MKNMZRC = $opt{'mknmzrc'} ||
143 $config->{'MKNMZ_RC'} ||
144 "$HOME/cgi-bin/mknmzrc";
145 my $MKNMZTMPLDIR = $opt{'mknmztmpldir'} ||
146 $config->{'MKNMZ_TMPL_DIR'} ||
147 "$HOME/cgi-bin/template",
148 my $ALL_LISTS_URL = $opt{'alllistsurl'} ||
149 $config->{'ALL_LISTS_URL'} ||
151 my $MESG_CGI = $opt{'mesgcgi'} ||
152 $config->{'MESG_CGI'} ||
153 join('/', $ROOT_URL,'cgi-bin/mesg.cgi');
154 my $MNAV_CGI = $opt{'mnavcgi'} ||
155 $config->{'MNAV_CGI'} ||
156 join('/', $ROOT_URL,'cgi-bin/mnav.cgi');
157 my $SEARCH_CGI = $opt{'searchcgi'} ||
158 $config->{'SEARCH_CGI'} ||
159 join('/', $ROOT_URL,'cgi-bin/namazu.cgi');
160 my $EXTRACT_CGI = $opt{'extractchcgi'} ||
161 $config->{'EXTRACT_CGI'} ||
162 join('/', $ROOT_URL,'cgi-bin/extract-mesg.cgi');
164 my $rebuild = $opt{'rebuild'} ||
165 $ENV{'WA_REBUILD'} || 0;
166 my $keepsearch = $opt{'keepsearch'};
167 my $editidx = $opt{'editidx'} ||
168 $ENV{'WA_EDIT'} || 0;
169 my $editidxonly = $opt{'editidxonly'} || 0;
170 my $editrootidx = $opt{'editrootidx'};
171 my $editallidx = $opt{'editallidx'};
172 my $nosearch = $opt{'nosearch'} ||
173 $ENV{'WA_NOSEARCH'} || 0;
174 $debug = $opt{'debug'} ||
177 my $all_index = $opt{'alllistsfile'} ||
178 $config->{'ALL_LISTS_FILE'} ||
179 join('/', $HTML_DIR, 'lists.html');
180 my $main_header = $config->{'MAIN_HEADER'} ||
181 join('/', $HTML_DIR, '.PNM.head');
182 my $main_footer = $config->{'MAIN_FOOTER'} ||
183 join('/', $HTML_DIR, '.PNM.foot');
193 $editidx = 1 if $editidxonly;
199 my $listdef = MHArc
::ListDef
->new($LISTS_DEF_FILE);
200 print "Loaded lists definitions.\n" if $debug;
203 # Just updating all-lists index
204 update_archive_index
(
205 '-config' => $config,
206 '-listdef' => $listdef,
207 '-htmldir' => $HTML_DIR,
208 '-htmlurl' => $HTML_URL,
209 '-infodir' => $INFO_DIR,
210 '-infourl' => $INFO_URL,
211 '-allindex' => $all_index
216 mhonarc
::initialize
();
217 print "MHonArc initialized.\n" if $debug;
219 local(*DIR
, *INDEX
, *FILE
);
221 print "Reading $MBOX_DIR.\n" if $debug;
222 opendir(DIR
, $MBOX_DIR) || die qq/Unable to open "$MBOX_DIR": $!/;
225 # Get list of archives to process
227 # list of archives specified on the command-line
230 # read mbox dir to get list
231 @dirs = grep { (-d
"$MBOX_DIR/$_") &&
238 my(@months, @folders);
239 my($dir, $list, $mon, $mondir, $htmldir, $cvs, $title, $mtime,
240 $folder, $i, $yr, $prevdir, $nextdir, $prevmon, $nextmon,
241 $disable_search, $listname, $short_title);
243 print "Lists: ", join(', ', @dirs), "\n" if $debug;
244 foreach $list (@dirs) {
245 print "Processing $list ...\n" if $debug;
249 $cvs = ($listname =~ s/\.CVS$//);
251 if (!$editidx && !$editrootidx) {
252 # Get list of input mailboxes to process
254 $dir = join('/', $MBOX_DIR, $list);
255 if (!opendir(DIR
, $dir)) {
256 warn qq/Unable to open "$dir": $!/;
260 # create .noraw file indicator if no-raw-link specified
261 my $no_raw_file = join('/', $dir, '.noraw');
262 my $no_raw_htaccess = join('/', $dir, '.htaccess');
263 if ($listdef->{$listname}{'no-raw-link'}[0]) {
264 if (! -e
$no_raw_file) {
266 if (!open(NORAW
, ">$no_raw_file")) {
267 warn qq/Warning: Unable to create "$no_raw_file": $!\n/;
272 if (! -e
$no_raw_htaccess) {
274 if (!open(HTACCESS
, ">$no_raw_htaccess")) {
275 warn qq/Warning: Unable to create "$no_raw_htaccess": $!\n/;
277 print HTACCESS
'Order allow,deny', "\n",
278 'Deny from all', "\n";
282 } elsif (-e
$no_raw_file) {
283 if (!unlink($no_raw_file)) {
284 warn qq/Warning: Unable to remove "$no_raw_file": $!\n/;
288 @months = grep { /^$folder_regex(?:\.gz)?$/o } readdir(DIR
);
290 print "Mboxes: ", join(', ', @months), "\n" if $debug;
292 foreach $mon (@months) {
293 $mondir = join('/', $dir, $mon);
295 push(@folders, $mondir);
298 $mtime = (stat($mondir))[9];
299 print "$mondir mtime: $mtime\n" if $debug;
300 if (($time - $mtime) < $MTIME_AGE) {
301 push(@folders, $mondir);
306 print "Folders: ", join(', ', @folders), "\n" if $debug;
309 # Just editing pages so we get folder list from html directory
310 $dir = join('/', $HTML_DIR, $list);
311 if (!opendir(DIR
, $dir)) {
312 warn qq/Unable to open "$dir": $!/;
315 @months = grep { /^$folder_regex$/o } readdir(DIR
);
318 foreach $mon (@months) {
319 $mondir = join('/', $dir, $mon);
320 push(@folders, $mondir);
323 print "Editidx Folders: ", join(', ', @folders), "\n" if $debug;
325 @folders = reverse sort @folders;
327 $htmldir = join('/', $HTML_DIR, $list);
329 clean_html_archive
($htmldir, $keepsearch);
331 mkdir($htmldir, 0777);
333 $disable_search = ($list =~ /^\./) ||
334 ((defined($listdef->{$listname}{'no-search'}) &&
335 $listdef->{$listname}{'no-search'}[0]));
337 if (defined($listdef->{$listname}{'description'})) {
338 $title = join(' ', @
{$listdef->{$listname}{'description'}});
342 $short_title = $listname;
344 $title = '[CVS] '.$title;
345 $short_title = '[CVS] '.$short_title;
349 # define arguments to mhonarc
352 '-lockmethod', 'flock',
353 #'-maxsize', $MHA_MAXSIZE,
354 #'-idxsize', $MHA_PAGESIZE,
356 #'-outdir' , $htmldir,
357 '-title', "$title (date)",
358 '-ttitle', "$title (thread)",
359 '-definevar', "LIST-TITLE='$short_title'",
360 '-definevar', "LIST-NAME='$list'",
361 '-definevar', "SEARCH-CGI=$SEARCH_CGI",
362 '-definevar', "PNAV-CGI=$MNAV_CGI",
363 '-definevar', "EXTRACT-CGI=$EXTRACT_CGI",
364 '-definevar', "MESG-CGI=$MESG_CGI",
365 '-definevar', "ALL-LISTS-URL=$ALL_LISTS_URL",
367 '-definevar', "MNAV-CGI=$MNAV_CGI", # backwards compatibility
370 if (defined($listdef->{$listname}{'lang'})) {
371 push(@mhaargs, '-lang', $listdef->{$listname}{'lang'}[0]);
374 if (-e
"$MHA_RC_DIR/$list.mrc") {
375 push(@mhaargs, '-rcfile', "$MHA_RC_DIR/$list.mrc");
378 push(@mhaargs, '-nothread');
379 push(@mhaargs, '-definevar', "THREAD-IDX-LINK=''");
381 push(@mhaargs, '-thread');
383 if ($list =~ /^\./) {
386 '-definevar', "SEARCH-FORM=''");
387 push(@mhaargs, '-definevar', "THREAD-IDX-LINK=''");
390 push(@mhaargs, '-editidx');
391 push(@mhaargs, '-nomsgpgs') if $editidxonly;
393 if (defined($config->{'MSG_DATE_FIELDS'})) {
394 push(@mhaargs, '-datefields', $config->{'DATE_FIELDS'});
396 if (!$debug && !$rebuild) {
397 push(@mhaargs, '-quiet');
399 if (!$rebuild && !$editidx) {
400 push(@mhaargs, '-add');
402 if ($listdef->{$listname}{'check-no-archive'}) {
403 push(@mhaargs, '-checknoarchive');
406 # add any custom options specified in definition file
407 if (defined($listdef->{$listname}{'mhonarc-options'})) {
408 require 'shellwords.pl';
410 shellwords
(join(' ', @
{$listdef->{$listname}{'mhonarc-options'}})));
413 # if searching is disabled, zero-out $SEARCH-FORM$
414 if ($disable_search) {
415 push(@mhaargs, '-definevar', "SEARCH-FORM=''");
420 foreach $folder (@folders) {
421 ($mon = $folder) =~ s/\.gz$//;
427 for ($i=0; $i < @folders; ++$i) {
428 $folder = $folders[$i];
430 $mondir = join('/', $htmldir, $mon);
432 # make sure directory exists
433 mkdir($mondir, 0777);
435 # set final arguments to mhonarc
439 '-definevar', "CUR-PERIOD='$mon'",
441 '-definevar', "CUR-MONTH='$mon'", # backwards compatibility
443 push(@fmhaargs, $folder) unless $editidx;
446 print "Processing archive $mondir...\n" if $debug;
447 print "\tmhonarc options: ", join(' ', @fmhaargs), "\n" if $debug;
448 if (!mhonarc
::open_archive
(@fmhaargs)) {
449 warn qq/Warning: Unable to open "$mondir" archive: /,
450 qq/($mhonarc::CODE) $mhonarc::ERROR\n/;
453 $mhonarc::CBRcVarExpand
= \
&mha_rcvar_expand
;
454 $cur_msg_cnt = $mhonarc::NumOfMsgs
|| 0;
455 mhonarc
::process_input
();
456 if ($mhonarc::CODE
!= 0) {
457 warn qq/Warning: Problem processing "$mondir": /,
458 qq/($mhonarc::CODE) $mhonarc::ERROR\n/;
461 if ($cur_msg_cnt == $mhonarc::NumOfMsgs
) {
462 print "Skipping search index, no new messages in archive\n"
467 # update search index
468 # The -Y option is used so we do not have to process all months
470 if (!$keepsearch && !$nosearch && !$disable_search) {
473 '--mhonarc', # only do mhonarc pages
474 '-f', $MKNMZRC, # specify resource file
475 '-T', $MKNMZTMPLDIR, # specify template directory
476 '-O', $htmldir, # specify location to place index
477 '-Y' # do not delete existing files
479 if (!$debug && !$rebuild) {
480 push(@nmzargs, '--quiet');
482 push(@nmzargs, $mondir);
483 print "Search Index Command: ", join(" ", @nmzargs), "\n" if $debug;
485 if (system(@nmzargs)) {
486 warn qq/Warning: Non-zero exit status returned from /,
487 qq/"@nmzargs": $?\n/;
489 namazu_cleanup
($htmldir);
494 ## Update monthly index
495 if (!opendir(DIR
, $htmldir)) {
496 warn qq/Warning: Unable to open $htmldir for reading: $!\n/;
499 @months = reverse sort grep { /^$folder_regex/o } readdir(DIR
);
500 print "Month listing for main index: @months\n" if $debug;
502 my $indexhtml = join('/', $htmldir, 'index.html');
503 if (!open(INDEX
, ">$indexhtml.tmp")) {
504 warn qq/Warning: Unable to open $htmldir for reading: $!\n/;
509 '-nosearch' => $disable_search,
510 'SEARCH-CGI' => $SEARCH_CGI,
511 'LIST-TITLE' => $short_title,
512 'LIST-NAME' => $list,
513 'LIST-DESC' => $title,
515 print_template
(\
*INDEX
, $main_header, @vars);
516 print INDEX
"<ul>\n";
517 foreach $mon (@months) {
518 print INDEX
qq|<li
><b
>$mon</b
>:|;
519 print INDEX
qq| 
; 
;<a href
="$mon/index.html">[Date
]</a
>|
520 if (-e
join('/', $htmldir, $mon, 'index.html'));
521 print INDEX
qq| 
; 
;<a href
="$mon/threads.html">[Thread
]</a
>|
522 if (-e
join('/', $htmldir, $mon, 'threads.html'));
524 if (!$listdef->{$listname}{'no-raw-link'}[0]) {
525 my $raw_label = '[Raw: ]';
527 my $mbox_file = join('/', $MBOX_DIR, $list, $mon);
528 my $mbox_url = join('/', $MBOX_URL, $list, $mon);
529 if (! -e
$mbox_file) {
534 # if (-e $mbox_file) {
535 # print INDEX qq| <a href="$mbox_url">[mbox: |,
536 # (-s _), qq| bytes|;
537 # print INDEX qq|, gzipped| if $compressed;
538 # print INDEX qq|]</a>|;
541 print INDEX
qq|</li
>\n|;
543 print INDEX
"</ul>\n";
544 print_template
(\
*INDEX
, $main_footer, @vars);
546 if (!rename("$indexhtml.tmp", $indexhtml)) {
547 warn qq|Warning
: Unable to
rename "$indexhtml.tmp" to
|,
548 qq|"$indexhtml": $!\n|;
552 update_archive_index
(
553 '-config' => $config,
554 '-listdef' => $listdef,
555 '-htmldir' => $HTML_DIR,
556 '-htmlurl' => $HTML_URL,
557 '-infodir' => $INFO_DIR,
558 '-infourl' => $INFO_URL,
559 '-allindex' => $all_index
564 ############################################################################
568 $str =~ s/\&/\&/;
578 if (!opendir(DIR
, $dir)) {
579 warn qq/Warning: Unable to open "$dir": $!/;
582 my @months = reverse sort grep { /^$folder_regex$/o } readdir(DIR
);
592 if ($varhash{'-nosearch'}) {
597 $ignore = 0 if /<!--\/x-search
-form
-->/;
600 if (/<!--x-search-form-->/) {
604 s/\$([^\$]+)\$/$varhash{$1}/ge;
610 $data =~ s/\$([^\$]+)\$/$varhash{$1}/ge;
619 print "Reading template file $file\n" if $debug;
621 if (open(FILE
, $file)) {
622 print $fhout read_template
(\
*FILE
, @_);
625 warn qq/Warning: Unable to open "$file": $!\n/;
632 my $lock = join('/', $dir, 'NMZ.lock2');
634 if (!open(LOCK
, $lock)) {
635 # no lock file left around, so everything should be okay
640 if (!kill(0, $pid)) {
641 warn qq/Warning: Stale "$lock", removing it\n/;
642 if (!unlink($lock)) {
643 warn qq/Warning: Unable to remove "$lock": $!\n/;
650 my $fmt = shift || '%Y-%m-%d %H:%M:%S';
651 #my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime($time);
652 #$year += 1900; ++$mon;
653 #sprintf("%d-%02d-%02d %02d:%02d:%02d", $year,$mon,$mday,$hour,$min,$sec);
654 POSIX
::strftime
($fmt, localtime($time));
657 ## Retrieve the last time an archive was updated.
658 # We scan the message pages since their mtime should be set to the
659 # date of the message.
661 sub retrieve_last_update
{
665 if (!opendir(DIR
, $archive)) {
666 warn qq/Warning: Unable to open "$archive" for reading: $!\n/;
673 foreach $file (readdir(DIR
)) {
674 next unless $file =~ /^msg\d+\.html/;
675 $mtime = (stat(join('/', $archive, $file)))[9];
676 $latest = $mtime if ($mtime > $latest);
681 # No luck with message pages, so try database file
682 warn qq/Warning: Unable to determine last update from message pages /,
683 qq/for "$archive"\n/;
684 if (-e
join('/', $archive, '.mhonarc.db')) {
685 $latest = ((stat(_
))[9]);
686 } elsif (-e
join('/', $archive, 'mhonarc.db')) {
687 $latest = ((stat(_
))[9]);
691 # No luck with data, so use directory mtime
692 $latest = ((stat($archive))[9]);
698 ## Remove HTML archive
700 sub clean_html_archive
{
701 my $dir = shift; # Directory of archive
702 my $ks = shift; # Flag is search index files should be preserved
705 print "Removing $htmldir\n" if $debug;
706 system('/bin/rm', '-rf', $dir);
710 # keep search index, so must delete each period sub-directory
712 opendir(DIR
, $dir) ||
713 die qq/ERROR: Unable to open "$dir" for reading: $!\n/;
714 my @subdirs = map { join('/',$dir,$_) }
715 grep { /^$folder_regex$/o } readdir(DIR
);
718 foreach $subdir (@subdirs) {
719 print "Removing $subdir\n" if $debug;
720 system('/bin/rm', '-rf', $subdir);
724 ## Retrieve the list info URL.
728 my $pathname = join('/', $opts{'-dir'}, $opts{'-name'}) . '.html';
729 if (! -e
$pathname) {
732 join('/', $opts{'-baseurl'}, $opts{'-name'}) . '.html';
735 ## Generats the all-lists index.
737 sub update_archive_index
{
738 print "Generating root archive index...\n" if $debug;
741 my $config = $opts{'-config'};
742 my $listdef = $opts{'-listdef'};
743 my $html_dir = $opts{'-htmldir'};
744 my $html_url = $opts{'-htmlurl'};
745 my $info_dir = $opts{'-infodir'};
746 my $info_url = $opts{'-infourl'};
747 my $index_html = $opts{'-allindex'};
749 my $header = $opts{'-header'} ||
750 $config->{'ALL_LISTS_HEADER'} ||
751 join('/', $html_dir, '.PNM.all-head');
752 my $footer = $opts{'-footer'} ||
753 $config->{'ALL_LISTS_FOOTER'} ||
754 join('/', $html_dir, '.PNM.all-foot');
756 my $label_name = $opts{'-label-name'} ||
757 $config->{'ALL_LISTS_LABEL_NAME'} ||
760 = $opts{'-label-indexes'} ||
761 $config->{'ALL_LISTS_LABEL_INDEXES'} ||
762 'Current Index';
763 my $label_last = $opts{'-label-last-updated'} ||
764 $config->{'ALL_LISTS_LABEL_LAST_UPDATED'} ||
766 my $label_info = $opts{'-label-info'} ||
767 $config->{'ALL_LISTS_LABEL_INFO'} ||
769 my $label_date = $opts{'-label-date'} ||
770 $config->{'ALL_LISTS_LABEL_DATE'} ||
773 = $opts{'-label-threads'} ||
774 $config->{'ALL_LISTS_LABEL_THREADS'} ||
776 my $time_fmt = $opts{'-time-fmt'} ||
777 $config->{'ALL_LISTS_DATE_FORMAT'} ||
781 my $tmp_index = $index_html . ".tmp";
782 if (!open(IDX
, ">$tmp_index")) {
783 warn qq/Warning: Unable to create "$tmp_index": $!\n/;
788 my($list, $listname, $last_updated, $dir, $latest, $info);
790 foreach $listname (keys %$listdef) {
791 next if $listname =~ /^\./; # skip hidden archives
792 next if $listdef->{'hide-from-all-lists'}[0];
794 foreach $list ($listname, "$listname.CVS") {
795 $dir = join('/', $html_dir, $list);
798 print "Computing last update for $list...\n" if $debug;
799 my @months = get_periods
($dir);
801 $latest = $months[0];
802 $last_updated = retrieve_last_update
(join('/', $dir, $latest));
803 if (!defined($last_updated)) {
804 print "Unable to compute last update for $list.\n" if $debug;
807 $updated{$list} = [ $last_updated, $list, $listname, $dir, $latest ];
811 print_template
(\
*IDX
, $header);
812 print IDX
qq|<table
class="archiveLists" cellpadding
="3" cellspacing
="1">\n|,
813 qq|<tr
class="listsHeaderRow" valign
="baseline" align
="left">\n|,
814 qq|<th
>$label_name</th
>|,
815 qq|<th
>$label_indexes</th
>|,
816 qq|<th
>$label_last</th
>|,
820 foreach $list (sort { $updated{$b}->[0] <=> $updated{$a}->[0] }
822 ($time, $list, $listname, $dir, $latest) = @
{$updated{$list}};
824 print "Printing listing for $list\n" if $debug;
825 $last_updated = format_date
($time, $time_fmt);
826 $last_updated =~ s/ /\ /g;
827 my $short_title = entify
($listdef->{$listname}{'all-lists-name'}[0] ||
829 my $description = entify
($listdef->{$listname}{'description'}[0] ||
831 if ($list =~ /\.CVS$/) {
832 $short_title .= " (CVS)";
833 $description .= " (CVS commits)";
835 $info = get_info_url
(
836 '-name' => $listname,
838 '-baseurl' => $info_url
841 print IDX
qq|<tr valign
="baseline">\n|;
843 print IDX
qq|<td
> 
;<span
class="listName">|,
844 qq|<a href
="$html_url/$list/">$short_title</a></span
> 
;|;
845 print IDX
qq|<a
class="infoLink" href
="$info">$label_info</a
> 
;|
847 print IDX
qq|</td
>\n|;
851 print IDX
qq| 
;<a href
="$html_url/$list/$latest/index.html">$label_date</a
> 
;|
852 if (-e
join('/', $dir, $latest, 'index.html'));
853 print IDX
qq| 
;<a href
="$html_url/$list/$latest/threads.html">$label_threads</a
> 
;|
854 if (-e
join('/', $dir, $latest, 'threads.html'));
855 print IDX
qq|</td
>\n|;
857 print IDX
qq|<td
> 
;<tt
>|, $last_updated, qq|</tt> </td
>\n|;
860 print IDX
qq|</table
>\n|;
861 print_template
(\
*IDX
, $footer);
864 if (!rename($tmp_index, $index_html)) {
865 warn qq/Warning: Unable to rename "$tmp_index" to "$index_html": $!\n/;
869 sub mha_rcvar_expand
{
870 my $mha_index = shift;
871 my $var_name = shift;
875 if ($var_name eq 'NMZ-SUBJECT-QUERY') {
876 my($lref, $key, $pos) =
877 mhonarc
::compute_msg_pos
($mha_index, $var_name, $arg);
878 return undef unless defined($key);
881 $val = mhonarc
::get_base_subject
($key);
882 if (length($val) > 128) {
883 $val = substr($val, 0, 128);
887 $val =~ s/(?:\\\s)+/\\s+/g; # \Q will escape whitespace
888 my $repl_re = $mhonarc::SubReplyRxp
;
889 my $query = "+subject:/^(?:$repl_re)*$val";
890 $query .= '\s*$' unless $clipped;
892 return ($query, 0, 0);
899 ############################################################################
904 web-archive - Update/create MHonArc archives from mailbox archives.
909 web-archive [options]
910 web-archive [options] [list-name ...]
914 This program is part of mharc and has the responsibility of processing
915 the mailbox archives created by the L<filter-spool|filter-spool> script to
916 update and/or create MHonArc archives.
918 This program is automatically called by the L<read-mail|read-mail> script for
919 processing incoming mail within the mail spool if L<filter-spool|filter-spool>
920 returns with an okay status. However, this program can be manually
921 invoked to rebuild archives, edit existing archives, or other
922 administrative tasks. Since there may be a need to do selective archive
923 processing, any non-option related argument is treated as mailing
924 list archive name to process.
930 =item C<-alllistidx> I<pathname>
932 Pathname of file to generate the all lists index.
933 If not specified, the value of the C<ALL_LISTS_FILE> variable in
934 C<config.sh> is used, else it defaults to "C<I<-htmldir>/lists.html>".
936 =item C<-alllistsurl> I<url>
938 URL to page containing list of all mailing lists archived.
939 If not specified, the value of the C<ALL_LISTS_URL> variable in
940 C<config.sh> is used, else it defaults to C<-htmlurl>.
944 Edit archive pages, useful to apply MHonArc resource
947 =item C<-editrootidx>
949 Only regenerate root index pages for archives. This is useful if
950 you make changes to the C<.PNM.head> or C<.PNM.foot> files that you
951 want immediately applied.
955 Print out usage information.
957 =item C<-home> I<pathname>
959 Root pathname of archiving software and data.
960 If not specified, the parent directory that contains this program
963 =item C<-htmldir> I<pathname>
965 Root directory for html archives.
966 If not specified, "C<I<-home>/html>" is used.
968 =item C<-htmlurl> I<url>
970 URL root to HTML archives.
971 If not specified, defaults to C<I<rooturl>/html>.
973 =item C<-infodir> I<pathname>
975 Pathname of directory containing informational pages for each list
976 archive. Information for a list archive can be provided by creating a
977 file called "C<I<list-name>.html>". Once created, a link to the file
978 (based on the value of the C<-infourl> option) will be generated in
979 the all-lists index to it.
981 If this option is not specified, the value of the C<INFO_DIR> variable
982 in C<config.sh> is used, else it defaults to "C<I<-home>/info>".
984 =item C<-infourl> I<url>
986 Base URL containing informational pages for each list archive.
987 If not specified, the value of the C<INFO_URL> variable in
988 C<config.sh> is used, else it defaults to "C<I<-rooturl>/info>".
992 Preserve search index if C<-rebuild> is specified. This option
993 is handy if all that is desired is to rebuild the HTML archives
994 from the raw data since the overhead of rebuilding the search indexes
997 B<CAUTION:> Do not use C<-keepsearch> if you have removed messages
998 from the raw mail archives since resulting HTML message pages may
999 have different URIs than what is stored within the search index.
1001 =item C<-listsdef> I<pathname>
1003 Pathname to mailing lists definition file.
1004 If not specified, "C<I<-home>/lib/lists.def>" is used.
1008 Print out entire manpage.
1010 =item C<-mboxdir> I<pathname>
1012 Root directory for mbox archives.
1013 If not specified, "C<I<-home>/mbox>" is used.
1015 =item C<-mharc> I<pathname>
1017 MHonArc resource file for archives.
1018 If not specified, "C<I<-home>/lib/common.mrc>" is used.
1020 =item C<-mharcdir> I<pathname>
1022 Directory containing list-specifc MHonArc resource files. A given
1023 list archive can have additional resource settings by creating a
1024 file called C<I<list-name>.mrc> within the directory specified by
1027 If C<-mharcdir> is not specified, "C<I<-home>/lib/mrc>" is used.
1029 =item C<-mhamaxsize> I<number>
1031 Maximum MHonArc archive size.
1032 If not specified the value of the C<WA_MAXSIZE> environment variable is used.
1034 =item C<-mhapagesize> I<number>
1036 Maximum MHonArc index page size.
1037 If not specified the value of the C<WA_PAGESIZE> environment variable is used.
1039 =item C<-mknmz> I<pathname>
1041 Pathname to Namazu make search index program.
1042 If not specified, "C</usr/local/bin/mknmz>" is used.
1044 =item C<-mknmzrc> I<pathname>
1046 Pathname to Namazu configuration file.
1047 If not specified, "C<I<-home>/cgi-bin/mknmzrc>" is used.
1049 =item C<-mknmztmpldir> I<pathname>
1051 Pathname to Namazu template directory.
1052 If not specified, "C<I<-home>/cgi-bin/template>" is used.
1054 =item C<-mnavcgi> I<url>
1056 URL to monthly navigation cgi program.
1057 If not specified, C<I<rooturl>/cgi-bin/mnav.cgi> is used.
1059 =item C<-mtimeage> I<seconds>
1061 Modify time age of a mailbox file to be considered for processing. If
1062 not specified the value of the C<WA_MTIME_AGE> environment variable is
1067 Do not update search indexes.
1071 Rebuild archives from scratch.
1073 =item C<-rooturl> I<url>
1075 URL root of archives.
1076 If not specified, C</~mhonarc/archives> is used.
1078 =item C<-searchcgi> I<url>
1080 URL to search cgi program.
1081 If not specified, C<I<rooturl>/cgi-bin/namazu.cgi> is used.
1085 Show what is going on in detail.
1091 Environment variable usage is deprecated.
1093 The following environment variables are recognized:
1099 If set to a true value, detailed information of progress will be
1100 printed to stdout. Debugging can also be enabled by the
1101 C<-debug> command-line option.
1105 If set to a true value, archives will be editted. It is probably
1106 better to use the C<-editidx> command-line option instead if archives
1111 Maximum MHonArc archive size. The default value is 2000. This setting
1112 can be overridden by the C<-mhamaxsize> command-line option.
1114 =item C<WA_MTIME_AGE>
1116 The modification age, in seconds, for a mailbox to be considered
1117 for processing. The default value is C<86400> (one day).
1118 This setting can be overridden by the C<-mtimeage> command-line option.
1120 =item C<WA_NOSEARCH>
1122 If set to a true value, the Namazu search indexes will not be updated
1123 for archives processed. Disabling of search index updates can also be
1124 disabled by the C<-nosearch> command-line option.
1126 =item C<WA_PAGESIZE>
1128 MHonArc index page size. The default value is 200. This setting
1129 can be overridden by the C<-mhapagesize> command-line option.
1133 If set to a true value, archives will be rebuilt. It is probably
1134 better to use the C<-rebuild> command-line option instead if rebuilding
1141 $Id: web-archive,v 1.44 2003/08/09 17:56:05 ehood Exp $
1145 Earl Hood, earl@earlhood.com
1147 This program is part of the mharc archiving system and comes with
1148 ABSOLUTELY NO WARRANTY and may be copied only under the terms of
1149 the GNU General Public License, which may be found in the mharc