#!/usr/bin/perl ##---------------------------------------------------------------------------## ## File: ## $Id: web-archive,v 1.44 2003/08/09 17:56:05 ehood Exp $ ## Description: ## Updates/creates web archives from mailbox archives. ## Run script with '-man' option to view manpage for this program. ##---------------------------------------------------------------------------## ## Copyright (C) 2001-2002 Earl Hood ## ## This program is free software; you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by ## the Free Software Foundation; either version 2 of the License, or ## (at your option) any later version. ## ## This program is distributed in the hope that it will be useful, ## but WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ## GNU General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with this program; if not, write to the Free Software ## Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA ## 02111-1307, USA ##---------------------------------------------------------------------------## package MHArc::web_archive; ##--------------------------------------------------------------------------## # BEGIN { die qq/CGI use FORBIDDEN!\n/ if (defined($ENV{'GATEWAY_INTERFACE'})); } my $Dir; BEGIN { $Dir = `dirname $0`; chomp $Dir; } use lib "$Dir/../lib"; # Add relative lib to search path # ##--------------------------------------------------------------------------## # use MHArc::Config; my $config = MHArc::Config->load("$Dir/../lib/config.sh"); # ##--------------------------------------------------------------------------## use Getopt::Long; use POSIX; use MHArc::ListDef; use MHArc::Util qw( usage ); # Load MHonArc library require 'mhamain.pl'; my $debug = 0; # Regular expression to match mail folder/mboxes my $folder_regex = '\d+(?:-\d+)?'; MAIN: { my %opt = ( ); my $clstatus = GetOptions(\%opt, 'alllistsurl=s', # Root to all lists URL. 'alllistsfile=s', # Pathname to all lists index page. 'debug|verbose', # Show what is going on in detail. 'editidx', # Edit archive pages; useful to apply MHonArc resource # changes. 'editallidx', # Regen all lists index. 'editidxonly', # Edit archive index pages only. 'editrootidx', # Regen top index. 'home=s', # Pathname of home directory of archive account. 'htmldir=s', # Root directory for html archives. 'htmlurl=s', # Root URL for html archives. 'keepsearch!', # Keep search index on a rebuild. 'listsdef=s', # Pathname to list definition file. 'mboxdir=s', # Root directory for mbox archives. 'mboxurl=s', # Root URL for mbox archives. 'mharc=s', # MHonArc resource file for archives. 'mhamaxsize=i', # Maximum MHonArc archive size. 'mhapagesize=i', # Maximum MHonArc index page size. 'mknmz=s', # Pathname to Namazu make search index program. 'mknmzrc=s', # Pathname to Namazu configuration file. 'mknmztmpldir=s', # Pathname to Namazu template directory. 'mesgcgi=s', # Message CGI URL. 'mnavcgi=s', # Month navigation CGI URL. 'mtimeage=i', # Modify time age of a mailbox file to be considered # for processing. 'nosearch', # Do not update search indexes. 'rebuild', # Rebuild archives from scratch. 'rooturl=s', # Root URL to archives. 'searchcgi=s', # Search CGI URL. 'man', 'help' ); usage(0) unless $clstatus; usage(1) if $opt{'help'}; usage(2) if $opt{'man'}; my $HOME = $opt{'home'} || $config->{'SW_ROOT'} || "$Dir/.."; my $ROOT_URL = $opt{'rooturl'} || $config->{'ROOT_URL'} || "/~mhonarc/archives"; my $LISTS_DEF_FILE = $opt{'listsdef'} || $config->{'LISTS_DEF_FILE'} || "$HOME/lib/lists.def"; my $HTML_DIR = $opt{'htmldir'} || $config->{'HTML_DIR'} || "$HOME/html"; my $HTML_URL = $opt{'htmlurl'} || $config->{'HTML_URL'} || "$ROOT_URL/html"; my $MBOX_DIR = $opt{'mboxdir'} || $config->{'MBOX_DIR'} || "$HOME/mbox"; my $MBOX_URL = $opt{'mboxurl'} || $config->{'MBOX_URL'} || "$ROOT_URL/mbox"; my $INFO_DIR = $opt{'infodir'} || $config->{'INFO_DIR'} || "$HOME/info"; my $INFO_URL = $opt{'infourl'} || $config->{'INFO_URL'} || "$ROOT_URL/info"; my $MHA_RC = $opt{'mharc'} || $config->{'MHA_RC'} || "$HOME/lib/common.mrc"; my $MHA_RC_DIR = $opt{'mharcdir'} || $config->{'MHA_RC_DIR'} || "$HOME/lib/mrc"; my $MHA_MAXSIZE = $opt{'mhamaxsize'} || $ENV{'WA_MAXSIZE'} || 2000; my $MHA_PAGESIZE = $opt{'mhapagesize'} || $ENV{'WA_PAGESIZE'} || 200; my $MTIME_AGE = $opt{'mtimeage'} || $ENV{'WA_MTIME_AGE'} || $config->{'MTIME_AGE'} || 86400; my $MKNMZ = $opt{'mknmz'} || $config->{'MKNMZ'} || '/usr/local/bin/mknmz'; my $MKNMZRC = $opt{'mknmzrc'} || $config->{'MKNMZ_RC'} || "$HOME/cgi-bin/mknmzrc"; my $MKNMZTMPLDIR = $opt{'mknmztmpldir'} || $config->{'MKNMZ_TMPL_DIR'} || "$HOME/cgi-bin/template", my $ALL_LISTS_URL = $opt{'alllistsurl'} || $config->{'ALL_LISTS_URL'} || $HTML_URL; my $MESG_CGI = $opt{'mesgcgi'} || $config->{'MESG_CGI'} || join('/', $ROOT_URL,'cgi-bin/mesg.cgi'); my $MNAV_CGI = $opt{'mnavcgi'} || $config->{'MNAV_CGI'} || join('/', $ROOT_URL,'cgi-bin/mnav.cgi'); my $SEARCH_CGI = $opt{'searchcgi'} || $config->{'SEARCH_CGI'} || join('/', $ROOT_URL,'cgi-bin/namazu.cgi'); my $EXTRACT_CGI = $opt{'extractchcgi'} || $config->{'EXTRACT_CGI'} || join('/', $ROOT_URL,'cgi-bin/extract-mesg.cgi'); my $rebuild = $opt{'rebuild'} || $ENV{'WA_REBUILD'} || 0; my $keepsearch = $opt{'keepsearch'}; my $editidx = $opt{'editidx'} || $ENV{'WA_EDIT'} || 0; my $editidxonly = $opt{'editidxonly'} || 0; my $editrootidx = $opt{'editrootidx'}; my $editallidx = $opt{'editallidx'}; my $nosearch = $opt{'nosearch'} || $ENV{'WA_NOSEARCH'} || 0; $debug = $opt{'debug'} || $ENV{'WA_DEBUG'}; my $all_index = $opt{'alllistsfile'} || $config->{'ALL_LISTS_FILE'} || join('/', $HTML_DIR, 'lists.html'); my $main_header = $config->{'MAIN_HEADER'} || join('/', $HTML_DIR, '.PNM.head'); my $main_footer = $config->{'MAIN_FOOTER'} || join('/', $HTML_DIR, '.PNM.foot'); my $time = time; if ($rebuild) { $editidx = 0; $editrootidx = 0; $editallidx = 0; } else { $keepsearch = 0; } $editidx = 1 if $editidxonly; if ($editidx) { $editrootidx = 0; $editallidx = 0; } my $listdef = MHArc::ListDef->new($LISTS_DEF_FILE); print "Loaded lists definitions.\n" if $debug; if ($editallidx) { # Just updating all-lists index update_archive_index( '-config' => $config, '-listdef' => $listdef, '-htmldir' => $HTML_DIR, '-htmlurl' => $HTML_URL, '-infodir' => $INFO_DIR, '-infourl' => $INFO_URL, '-allindex' => $all_index ); last MAIN; } mhonarc::initialize(); print "MHonArc initialized.\n" if $debug; local(*DIR, *INDEX, *FILE); print "Reading $MBOX_DIR.\n" if $debug; opendir(DIR, $MBOX_DIR) || die qq/Unable to open "$MBOX_DIR": $!/; my @dirs = (); # Get list of archives to process if (@ARGV) { # list of archives specified on the command-line @dirs = @ARGV; } else { # read mbox dir to get list @dirs = grep { (-d "$MBOX_DIR/$_") && ($_ ne '.') && ($_ ne '..') } readdir(DIR); closedir(DIR); } my(@months, @folders); my($dir, $list, $mon, $mondir, $htmldir, $cvs, $title, $mtime, $folder, $i, $yr, $prevdir, $nextdir, $prevmon, $nextmon, $disable_search, $listname, $short_title); print "Lists: ", join(', ', @dirs), "\n" if $debug; foreach $list (@dirs) { print "Processing $list ...\n" if $debug; @folders = (); $listname = $list; $cvs = ($listname =~ s/\.CVS$//); if (!$editidx && !$editrootidx) { # Get list of input mailboxes to process $dir = join('/', $MBOX_DIR, $list); if (!opendir(DIR, $dir)) { warn qq/Unable to open "$dir": $!/; next; } # create .noraw file indicator if no-raw-link specified my $no_raw_file = join('/', $dir, '.noraw'); my $no_raw_htaccess = join('/', $dir, '.htaccess'); if ($listdef->{$listname}{'no-raw-link'}[0]) { if (! -e $no_raw_file) { local(*NORAW); if (!open(NORAW, ">$no_raw_file")) { warn qq/Warning: Unable to create "$no_raw_file": $!\n/; } else { close(NORAW); } } if (! -e $no_raw_htaccess) { local(*HTACCESS); if (!open(HTACCESS, ">$no_raw_htaccess")) { warn qq/Warning: Unable to create "$no_raw_htaccess": $!\n/; } else { print HTACCESS 'Order allow,deny', "\n", 'Deny from all', "\n"; close(HTACCESS); } } } elsif (-e $no_raw_file) { if (!unlink($no_raw_file)) { warn qq/Warning: Unable to remove "$no_raw_file": $!\n/; } } @months = grep { /^$folder_regex(?:\.gz)?$/o } readdir(DIR); closedir(DIR); print "Mboxes: ", join(', ', @months), "\n" if $debug; foreach $mon (@months) { $mondir = join('/', $dir, $mon); if ($rebuild) { push(@folders, $mondir); next; } $mtime = (stat($mondir))[9]; print "$mondir mtime: $mtime\n" if $debug; if (($time - $mtime) < $MTIME_AGE) { push(@folders, $mondir); } } next if (!@folders); print "Folders: ", join(', ', @folders), "\n" if $debug; } elsif ($editidx) { # Just editing pages so we get folder list from html directory $dir = join('/', $HTML_DIR, $list); if (!opendir(DIR, $dir)) { warn qq/Unable to open "$dir": $!/; next; } @months = grep { /^$folder_regex$/o } readdir(DIR); closedir(DIR); foreach $mon (@months) { $mondir = join('/', $dir, $mon); push(@folders, $mondir); } next if (!@folders); print "Editidx Folders: ", join(', ', @folders), "\n" if $debug; } @folders = reverse sort @folders; $htmldir = join('/', $HTML_DIR, $list); if ($rebuild) { clean_html_archive($htmldir, $keepsearch); } mkdir($htmldir, 0777); $disable_search = ($list =~ /^\./) || ((defined($listdef->{$listname}{'no-search'}) && $listdef->{$listname}{'no-search'}[0])); if (defined($listdef->{$listname}{'description'})) { $title = join(' ', @{$listdef->{$listname}{'description'}}); } else { $title = $listname; } $short_title = $listname; if ($cvs) { $title = '[CVS] '.$title; $short_title = '[CVS] '.$short_title; } if (!$editrootidx) { # define arguments to mhonarc my @mhaargs = ( '-modtime', '-lockmethod', 'flock', #'-maxsize', $MHA_MAXSIZE, #'-idxsize', $MHA_PAGESIZE, '-rcfile', $MHA_RC, #'-outdir' , $htmldir, '-title', "$title (date)", '-ttitle', "$title (thread)", '-definevar', "LIST-TITLE='$short_title'", '-definevar', "LIST-NAME='$list'", '-definevar', "SEARCH-CGI=$SEARCH_CGI", '-definevar', "PNAV-CGI=$MNAV_CGI", '-definevar', "EXTRACT-CGI=$EXTRACT_CGI", '-definevar', "MESG-CGI=$MESG_CGI", '-definevar', "ALL-LISTS-URL=$ALL_LISTS_URL", '-definevar', "MNAV-CGI=$MNAV_CGI", # backwards compatibility ); if (defined($listdef->{$listname}{'lang'})) { push(@mhaargs, '-lang', $listdef->{$listname}{'lang'}[0]); } if (-e "$MHA_RC_DIR/$list.mrc") { push(@mhaargs, '-rcfile', "$MHA_RC_DIR/$list.mrc"); } if ($cvs) { push(@mhaargs, '-nothread'); push(@mhaargs, '-definevar', "THREAD-IDX-LINK=''"); } else { push(@mhaargs, '-thread'); } if ($list =~ /^\./) { push(@mhaargs, '-nothread', '-definevar', "SEARCH-FORM=''"); push(@mhaargs, '-definevar', "THREAD-IDX-LINK=''"); } if ($editidx) { push(@mhaargs, '-editidx'); push(@mhaargs, '-nomsgpgs') if $editidxonly; } if (defined($config->{'MSG_DATE_FIELDS'})) { push(@mhaargs, '-datefields', $config->{'DATE_FIELDS'}); } if (!$debug && !$rebuild) { push(@mhaargs, '-quiet'); } if (!$rebuild && !$editidx) { push(@mhaargs, '-add'); } if ($listdef->{$listname}{'check-no-archive'}) { push(@mhaargs, '-checknoarchive'); } # add any custom options specified in definition file if (defined($listdef->{$listname}{'mhonarc-options'})) { require 'shellwords.pl'; push(@mhaargs, shellwords(join(' ', @{$listdef->{$listname}{'mhonarc-options'}}))); } # if searching is disabled, zero-out $SEARCH-FORM$ if ($disable_search) { push(@mhaargs, '-definevar', "SEARCH-FORM=''"); } my(@fmhaargs); @months = ( ); foreach $folder (@folders) { ($mon = $folder) =~ s/\.gz$//; $mon =~ s/^.*\///; push(@months, $mon); } my $cur_msg_cnt; for ($i=0; $i < @folders; ++$i) { $folder = $folders[$i]; $mon = $months[$i]; $mondir = join('/', $htmldir, $mon); # make sure directory exists mkdir($mondir, 0777); # set final arguments to mhonarc @fmhaargs = ( @mhaargs, '-outdir', $mondir, '-definevar', "CUR-PERIOD='$mon'", '-definevar', "CUR-MONTH='$mon'", # backwards compatibility ); push(@fmhaargs, $folder) unless $editidx; # call mhonarc print "Processing archive $mondir...\n" if $debug; print "\tmhonarc options: ", join(' ', @fmhaargs), "\n" if $debug; if (!mhonarc::open_archive(@fmhaargs)) { warn qq/Warning: Unable to open "$mondir" archive: /, qq/($mhonarc::CODE) $mhonarc::ERROR\n/; next; } $mhonarc::CBRcVarExpand = \&mha_rcvar_expand; $cur_msg_cnt = $mhonarc::NumOfMsgs || 0; mhonarc::process_input(); if ($mhonarc::CODE != 0) { warn qq/Warning: Problem processing "$mondir": /, qq/($mhonarc::CODE) $mhonarc::ERROR\n/; next; } if ($cur_msg_cnt == $mhonarc::NumOfMsgs) { print "Skipping search index, no new messages in archive\n" if $debug; next; } # update search index # The -Y option is used so we do not have to process all months # to update index. if (!$keepsearch && !$nosearch && !$disable_search) { my @nmzargs = ( $MKNMZ, '--mhonarc', # only do mhonarc pages '-f', $MKNMZRC, # specify resource file '-T', $MKNMZTMPLDIR, # specify template directory '-O', $htmldir, # specify location to place index '-Y' # do not delete existing files ); if (!$debug && !$rebuild) { push(@nmzargs, '--quiet'); } push(@nmzargs, $mondir); print "Search Index Command: ", join(" ", @nmzargs), "\n" if $debug; if (system(@nmzargs)) { warn qq/Warning: Non-zero exit status returned from /, qq/"@nmzargs": $?\n/; } namazu_cleanup($htmldir); } } } ## Update monthly index if (!opendir(DIR, $htmldir)) { warn qq/Warning: Unable to open $htmldir for reading: $!\n/; next; } @months = reverse sort grep { /^$folder_regex/o } readdir(DIR); print "Month listing for main index: @months\n" if $debug; closedir(DIR); my $indexhtml = join('/', $htmldir, 'index.html'); if (!open(INDEX, ">$indexhtml.tmp")) { warn qq/Warning: Unable to open $htmldir for reading: $!\n/; next; } my @vars = ( '-nosearch' => $disable_search, 'SEARCH-CGI' => $SEARCH_CGI, 'LIST-TITLE' => $short_title, 'LIST-NAME' => $list, 'LIST-DESC' => $title, ); print_template(\*INDEX, $main_header, @vars); print INDEX "\n"; print_template(\*INDEX, $main_footer, @vars); close(INDEX); if (!rename("$indexhtml.tmp", $indexhtml)) { warn qq|Warning: Unable to rename "$indexhtml.tmp" to |, qq|"$indexhtml": $!\n|; } } update_archive_index( '-config' => $config, '-listdef' => $listdef, '-htmldir' => $HTML_DIR, '-htmlurl' => $HTML_URL, '-infodir' => $INFO_DIR, '-infourl' => $INFO_URL, '-allindex' => $all_index ); } # End: MAIN ############################################################################ sub entify { my $str = shift; $str =~ s/\&/\&/; $str =~ s//\>/; $str; } sub get_periods { my $dir = shift; local(*DIR); if (!opendir(DIR, $dir)) { warn qq/Warning: Unable to open "$dir": $!/; return ( ); } my @months = reverse sort grep { /^$folder_regex$/o } readdir(DIR); closedir(DIR); @months; } sub read_template { my $fh = shift; my %varhash = @_; my $data = ""; if ($varhash{'-nosearch'}) { local $_; my $ignore = 0; while (<$fh>) { if ($ignore) { $ignore = 0 if //; next; } if (//) { $ignore = 1; next; } s/\$([^\$]+)\$/$varhash{$1}/ge; $data .= $_; } } else { local $/; $data = <$fh>; $data =~ s/\$([^\$]+)\$/$varhash{$1}/ge; } $data; } sub print_template { my $fhout = shift; my $file = shift; if (-e $file) { print "Reading template file $file\n" if $debug; local(*FILE); if (open(FILE, $file)) { print $fhout read_template(\*FILE, @_); close(FILE); } else { warn qq/Warning: Unable to open "$file": $!\n/; } } } sub namazu_cleanup { my $dir = shift; my $lock = join('/', $dir, 'NMZ.lock2'); local(*LOCK); if (!open(LOCK, $lock)) { # no lock file left around, so everything should be okay return; } my $pid = ; close(LOCK); if (!kill(0, $pid)) { warn qq/Warning: Stale "$lock", removing it\n/; if (!unlink($lock)) { warn qq/Warning: Unable to remove "$lock": $!\n/; } } } sub format_date { my $time = shift; my $fmt = shift || '%Y-%m-%d %H:%M:%S'; #my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime($time); #$year += 1900; ++$mon; #sprintf("%d-%02d-%02d %02d:%02d:%02d", $year,$mon,$mday,$hour,$min,$sec); POSIX::strftime($fmt, localtime($time)); } ## Retrieve the last time an archive was updated. # We scan the message pages since their mtime should be set to the # date of the message. # sub retrieve_last_update { my $archive = shift; local(*DIR); if (!opendir(DIR, $archive)) { warn qq/Warning: Unable to open "$archive" for reading: $!\n/; return undef; } my $latest = 0; my $mtime = 0; my $file; foreach $file (readdir(DIR)) { next unless $file =~ /^msg\d+\.html/; $mtime = (stat(join('/', $archive, $file)))[9]; $latest = $mtime if ($mtime > $latest); } close(DIR); if ($latest == 0) { # No luck with message pages, so try database file warn qq/Warning: Unable to determine last update from message pages /, qq/for "$archive"\n/; if (-e join('/', $archive, '.mhonarc.db')) { $latest = ((stat(_))[9]); } elsif (-e join('/', $archive, 'mhonarc.db')) { $latest = ((stat(_))[9]); } } if ($latest == 0) { # No luck with data, so use directory mtime $latest = ((stat($archive))[9]); } $latest; } ## Remove HTML archive # sub clean_html_archive { my $dir = shift; # Directory of archive my $ks = shift; # Flag is search index files should be preserved if (!$ks) { # delete everything print "Removing $htmldir\n" if $debug; system('/bin/rm', '-rf', $dir); return; } # keep search index, so must delete each period sub-directory local(*DIR); opendir(DIR, $dir) || die qq/ERROR: Unable to open "$dir" for reading: $!\n/; my @subdirs = map { join('/',$dir,$_) } grep { /^$folder_regex$/o } readdir(DIR); closedir(DIR); my $subdir; foreach $subdir (@subdirs) { print "Removing $subdir\n" if $debug; system('/bin/rm', '-rf', $subdir); } } ## Retrieve the list info URL. # sub get_info_url { my %opts = @_; my $pathname = join('/', $opts{'-dir'}, $opts{'-name'}) . '.html'; if (! -e $pathname) { return undef; } join('/', $opts{'-baseurl'}, $opts{'-name'}) . '.html'; } ## Generats the all-lists index. # sub update_archive_index { print "Generating root archive index...\n" if $debug; my %opts = @_; my $config = $opts{'-config'}; my $listdef = $opts{'-listdef'}; my $html_dir = $opts{'-htmldir'}; my $html_url = $opts{'-htmlurl'}; my $info_dir = $opts{'-infodir'}; my $info_url = $opts{'-infourl'}; my $index_html = $opts{'-allindex'}; my $header = $opts{'-header'} || $config->{'ALL_LISTS_HEADER'} || join('/', $html_dir, '.PNM.all-head'); my $footer = $opts{'-footer'} || $config->{'ALL_LISTS_FOOTER'} || join('/', $html_dir, '.PNM.all-foot'); my $label_name = $opts{'-label-name'} || $config->{'ALL_LISTS_LABEL_NAME'} || 'Name'; my $label_indexes = $opts{'-label-indexes'} || $config->{'ALL_LISTS_LABEL_INDEXES'} || 'Current Index'; my $label_last = $opts{'-label-last-updated'} || $config->{'ALL_LISTS_LABEL_LAST_UPDATED'} || 'Last Updated'; my $label_info = $opts{'-label-info'} || $config->{'ALL_LISTS_LABEL_INFO'} || '[info]'; my $label_date = $opts{'-label-date'} || $config->{'ALL_LISTS_LABEL_DATE'} || '[Date]'; my $label_threads = $opts{'-label-threads'} || $config->{'ALL_LISTS_LABEL_THREADS'} || '[Threads]'; my $time_fmt = $opts{'-time-fmt'} || $config->{'ALL_LISTS_DATE_FORMAT'} || '%Y-%m-%d %H:%M:%S'; local(*IDX); my $tmp_index = $index_html . ".tmp"; if (!open(IDX, ">$tmp_index")) { warn qq/Warning: Unable to create "$tmp_index": $!\n/; return; } my %updated = ( ); my($list, $listname, $last_updated, $dir, $latest, $info); foreach $listname (keys %$listdef) { next if $listname =~ /^\./; # skip hidden archives next if $listdef->{'hide-from-all-lists'}[0]; foreach $list ($listname, "$listname.CVS") { $dir = join('/', $html_dir, $list); next unless -e $dir; print "Computing last update for $list...\n" if $debug; my @months = get_periods($dir); next unless @months; $latest = $months[0]; $last_updated = retrieve_last_update(join('/', $dir, $latest)); if (!defined($last_updated)) { print "Unable to compute last update for $list.\n" if $debug; next; } $updated{$list} = [ $last_updated, $list, $listname, $dir, $latest ]; } } print_template(\*IDX, $header); print IDX qq|\n|, qq|\n|, qq||, qq||, qq||, qq|\n\n|; my($time); foreach $list (sort { $updated{$b}->[0] <=> $updated{$a}->[0] } keys(%updated)) { ($time, $list, $listname, $dir, $latest) = @{$updated{$list}}; print "Printing listing for $list\n" if $debug; $last_updated = format_date($time, $time_fmt); $last_updated =~ s/ /\ /g; my $short_title = entify($listdef->{$listname}{'all-lists-name'}[0] || $listname); my $description = entify($listdef->{$listname}{'description'}[0] || $listname); if ($list =~ /\.CVS$/) { $short_title .= " (CVS)"; $description .= " (CVS commits)"; } $info = get_info_url( '-name' => $listname, '-dir' => $info_dir, '-baseurl' => $info_url ); print IDX qq|\n|; print IDX qq|\n|; print IDX qq|\n|; print IDX qq|\n|; } print IDX qq|
$label_name$label_indexes$label_last
 |, qq|$short_title |; print IDX qq|$label_info | if defined($info); print IDX qq||; print IDX qq| $label_date | if (-e join('/', $dir, $latest, 'index.html')); print IDX qq| $label_threads | if (-e join('/', $dir, $latest, 'threads.html')); print IDX qq| |, $last_updated, qq| 
\n|; print_template(\*IDX, $footer); close(IDX); if (!rename($tmp_index, $index_html)) { warn qq/Warning: Unable to rename "$tmp_index" to "$index_html": $!\n/; } } sub mha_rcvar_expand { my $mha_index = shift; my $var_name = shift; my $arg = shift; my $val = undef; if ($var_name eq 'NMZ-SUBJECT-QUERY') { my($lref, $key, $pos) = mhonarc::compute_msg_pos($mha_index, $var_name, $arg); return undef unless defined($key); my $clipped = 0; $val = mhonarc::get_base_subject($key); if (length($val) > 128) { $val = substr($val, 0, 128); $clipped = 1; } $val = "\Q$val\E"; $val =~ s/(?:\\\s)+/\\s+/g; # \Q will escape whitespace my $repl_re = $mhonarc::SubReplyRxp; my $query = "+subject:/^(?:$repl_re)*$val"; $query .= '\s*$' unless $clipped; $query .= '/'; return ($query, 0, 0); } return undef; } ############################################################################ __END__ =head1 NAME web-archive - Update/create MHonArc archives from mailbox archives. =head1 SYNOPSIS web-archive web-archive [options] web-archive [options] [list-name ...] =head1 DESCRIPTION This program is part of mharc and has the responsibility of processing the mailbox archives created by the L script to update and/or create MHonArc archives. This program is automatically called by the L script for processing incoming mail within the mail spool if L returns with an okay status. However, this program can be manually invoked to rebuild archives, edit existing archives, or other administrative tasks. Since there may be a need to do selective archive processing, any non-option related argument is treated as mailing list archive name to process. =head1 OPTIONS =over =item C<-alllistidx> I Pathname of file to generate the all lists index. If not specified, the value of the C variable in C is used, else it defaults to "C/lists.html>". =item C<-alllistsurl> I URL to page containing list of all mailing lists archived. If not specified, the value of the C variable in C is used, else it defaults to C<-htmlurl>. =item C<-editidx> Edit archive pages, useful to apply MHonArc resource changes. =item C<-editrootidx> Only regenerate root index pages for archives. This is useful if you make changes to the C<.PNM.head> or C<.PNM.foot> files that you want immediately applied. =item C<-help> Print out usage information. =item C<-home> I Root pathname of archiving software and data. If not specified, the parent directory that contains this program is used. =item C<-htmldir> I Root directory for html archives. If not specified, "C/html>" is used. =item C<-htmlurl> I URL root to HTML archives. If not specified, defaults to C/html>. =item C<-infodir> I Pathname of directory containing informational pages for each list archive. Information for a list archive can be provided by creating a file called "C.html>". Once created, a link to the file (based on the value of the C<-infourl> option) will be generated in the all-lists index to it. If this option is not specified, the value of the C variable in C is used, else it defaults to "C/info>". =item C<-infourl> I Base URL containing informational pages for each list archive. If not specified, the value of the C variable in C is used, else it defaults to "C/info>". =item C<-keepsearch> Preserve search index if C<-rebuild> is specified. This option is handy if all that is desired is to rebuild the HTML archives from the raw data since the overhead of rebuilding the search indexes will be avoided. B Do not use C<-keepsearch> if you have removed messages from the raw mail archives since resulting HTML message pages may have different URIs than what is stored within the search index. =item C<-listsdef> I Pathname to mailing lists definition file. If not specified, "C/lib/lists.def>" is used. =item C<-man> Print out entire manpage. =item C<-mboxdir> I Root directory for mbox archives. If not specified, "C/mbox>" is used. =item C<-mharc> I MHonArc resource file for archives. If not specified, "C/lib/common.mrc>" is used. =item C<-mharcdir> I Directory containing list-specifc MHonArc resource files. A given list archive can have additional resource settings by creating a file called C.mrc> within the directory specified by C<-mharcdir>. If C<-mharcdir> is not specified, "C/lib/mrc>" is used. =item C<-mhamaxsize> I Maximum MHonArc archive size. If not specified the value of the C environment variable is used. =item C<-mhapagesize> I Maximum MHonArc index page size. If not specified the value of the C environment variable is used. =item C<-mknmz> I Pathname to Namazu make search index program. If not specified, "C" is used. =item C<-mknmzrc> I Pathname to Namazu configuration file. If not specified, "C/cgi-bin/mknmzrc>" is used. =item C<-mknmztmpldir> I Pathname to Namazu template directory. If not specified, "C/cgi-bin/template>" is used. =item C<-mnavcgi> I URL to monthly navigation cgi program. If not specified, C/cgi-bin/mnav.cgi> is used. =item C<-mtimeage> I Modify time age of a mailbox file to be considered for processing. If not specified the value of the C environment variable is used. =item C<-nosearch> Do not update search indexes. =item C<-rebuild> Rebuild archives from scratch. =item C<-rooturl> I URL root of archives. If not specified, C is used. =item C<-searchcgi> I URL to search cgi program. If not specified, C/cgi-bin/namazu.cgi> is used. =item C<-verbose> Show what is going on in detail. =back =head1 ENVIRONMENT Environment variable usage is deprecated. The following environment variables are recognized: =over =item C If set to a true value, detailed information of progress will be printed to stdout. Debugging can also be enabled by the C<-debug> command-line option. =item C If set to a true value, archives will be editted. It is probably better to use the C<-editidx> command-line option instead if archives editing is desired. =item C Maximum MHonArc archive size. The default value is 2000. This setting can be overridden by the C<-mhamaxsize> command-line option. =item C The modification age, in seconds, for a mailbox to be considered for processing. The default value is C<86400> (one day). This setting can be overridden by the C<-mtimeage> command-line option. =item C If set to a true value, the Namazu search indexes will not be updated for archives processed. Disabling of search index updates can also be disabled by the C<-nosearch> command-line option. =item C MHonArc index page size. The default value is 200. This setting can be overridden by the C<-mhapagesize> command-line option. =item C If set to a true value, archives will be rebuilt. It is probably better to use the C<-rebuild> command-line option instead if rebuilding is desired. =back =head1 VERSION $Id: web-archive,v 1.44 2003/08/09 17:56:05 ehood Exp $ =head1 AUTHOR Earl Hood, earl@earlhood.com This program is part of the mharc archiving system and comes with ABSOLUTELY NO WARRANTY and may be copied only under the terms of the GNU General Public License, which may be found in the mharc distribution. =cut