+#!/usr/bin/perl
+##---------------------------------------------------------------------------##
+## File:
+## $Id: web-archive,v 1.44 2003/08/09 17:56:05 ehood Exp $
+## Description:
+## Updates/creates web archives from mailbox archives.
+## Run script with '-man' option to view manpage for this program.
+##---------------------------------------------------------------------------##
+## Copyright (C) 2001-2002 Earl Hood <earl@earlhood.com>
+##
+## This program is free software; you can redistribute it and/or modify
+## it under the terms of the GNU General Public License as published by
+## the Free Software Foundation; either version 2 of the License, or
+## (at your option) any later version.
+##
+## This program is distributed in the hope that it will be useful,
+## but WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+## GNU General Public License for more details.
+##
+## You should have received a copy of the GNU General Public License
+## along with this program; if not, write to the Free Software
+## Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+## 02111-1307, USA
+##---------------------------------------------------------------------------##
+
+package MHArc::web_archive;
+
+##--------------------------------------------------------------------------##
+# <x-boot-strap>
+BEGIN { die qq/CGI use FORBIDDEN!\n/ if (defined($ENV{'GATEWAY_INTERFACE'})); }
+my $Dir; BEGIN { $Dir = `dirname $0`; chomp $Dir; }
+use lib "$Dir/../lib"; # Add relative lib to search path
+# </x-boot-strap>
+##--------------------------------------------------------------------------##
+# <x-config>
+use MHArc::Config;
+my $config = MHArc::Config->load("$Dir/../lib/config.sh");
+# </x-config>
+##--------------------------------------------------------------------------##
+
+
+use Getopt::Long;
+use POSIX;
+use MHArc::ListDef;
+use MHArc::Util qw( usage );
+
+# Load MHonArc library
+require 'mhamain.pl';
+
+my $debug = 0;
+
+# Regular expression to match mail folder/mboxes
+my $folder_regex = '\d+(?:-\d+)?';
+
+MAIN: {
+ my %opt = ( );
+ my $clstatus = GetOptions(\%opt,
+ 'alllistsurl=s', # Root to all lists URL.
+ 'alllistsfile=s', # Pathname to all lists index page.
+ 'debug|verbose', # Show what is going on in detail.
+ 'editidx', # Edit archive pages; useful to apply MHonArc resource
+ # changes.
+ 'editallidx', # Regen all lists index.
+ 'editidxonly', # Edit archive index pages only.
+ 'editrootidx', # Regen top index.
+ 'home=s', # Pathname of home directory of archive account.
+ 'htmldir=s', # Root directory for html archives.
+ 'htmlurl=s', # Root URL for html archives.
+ 'keepsearch!', # Keep search index on a rebuild.
+ 'listsdef=s', # Pathname to list definition file.
+ 'mboxdir=s', # Root directory for mbox archives.
+ 'mboxurl=s', # Root URL for mbox archives.
+ 'mharc=s', # MHonArc resource file for archives.
+ 'mhamaxsize=i', # Maximum MHonArc archive size.
+ 'mhapagesize=i', # Maximum MHonArc index page size.
+ 'mknmz=s', # Pathname to Namazu make search index program.
+ 'mknmzrc=s', # Pathname to Namazu configuration file.
+ 'mknmztmpldir=s', # Pathname to Namazu template directory.
+ 'mesgcgi=s', # Message CGI URL.
+ 'mnavcgi=s', # Month navigation CGI URL.
+ 'mtimeage=i', # Modify time age of a mailbox file to be considered
+ # for processing.
+ 'nosearch', # Do not update search indexes.
+ 'rebuild', # Rebuild archives from scratch.
+ 'rooturl=s', # Root URL to archives.
+ 'searchcgi=s', # Search CGI URL.
+
+ 'man',
+ 'help'
+ );
+ usage(0) unless $clstatus;
+ usage(1) if $opt{'help'};
+ usage(2) if $opt{'man'};
+
+ my $HOME = $opt{'home'} ||
+ $config->{'SW_ROOT'} ||
+ "$Dir/..";
+ my $ROOT_URL = $opt{'rooturl'} ||
+ $config->{'ROOT_URL'} ||
+ "/~mhonarc/archives";
+ my $LISTS_DEF_FILE = $opt{'listsdef'} ||
+ $config->{'LISTS_DEF_FILE'} ||
+ "$HOME/lib/lists.def";
+ my $HTML_DIR = $opt{'htmldir'} ||
+ $config->{'HTML_DIR'} ||
+ "$HOME/html";
+ my $HTML_URL = $opt{'htmlurl'} ||
+ $config->{'HTML_URL'} ||
+ "$ROOT_URL/html";
+ my $MBOX_DIR = $opt{'mboxdir'} ||
+ $config->{'MBOX_DIR'} ||
+ "$HOME/mbox";
+ my $MBOX_URL = $opt{'mboxurl'} ||
+ $config->{'MBOX_URL'} ||
+ "$ROOT_URL/mbox";
+ my $INFO_DIR = $opt{'infodir'} ||
+ $config->{'INFO_DIR'} ||
+ "$HOME/info";
+ my $INFO_URL = $opt{'infourl'} ||
+ $config->{'INFO_URL'} ||
+ "$ROOT_URL/info";
+ my $MHA_RC = $opt{'mharc'} ||
+ $config->{'MHA_RC'} ||
+ "$HOME/lib/common.mrc";
+ my $MHA_RC_DIR = $opt{'mharcdir'} ||
+ $config->{'MHA_RC_DIR'} ||
+ "$HOME/lib/mrc";
+ my $MHA_MAXSIZE = $opt{'mhamaxsize'} ||
+ $ENV{'WA_MAXSIZE'} ||
+ 2000;
+ my $MHA_PAGESIZE = $opt{'mhapagesize'} ||
+ $ENV{'WA_PAGESIZE'} ||
+ 200;
+ my $MTIME_AGE = $opt{'mtimeage'} ||
+ $ENV{'WA_MTIME_AGE'} ||
+ $config->{'MTIME_AGE'} ||
+ 86400;
+ my $MKNMZ = $opt{'mknmz'} ||
+ $config->{'MKNMZ'} ||
+ '/usr/local/bin/mknmz';
+ my $MKNMZRC = $opt{'mknmzrc'} ||
+ $config->{'MKNMZ_RC'} ||
+ "$HOME/cgi-bin/mknmzrc";
+ my $MKNMZTMPLDIR = $opt{'mknmztmpldir'} ||
+ $config->{'MKNMZ_TMPL_DIR'} ||
+ "$HOME/cgi-bin/template",
+ my $ALL_LISTS_URL = $opt{'alllistsurl'} ||
+ $config->{'ALL_LISTS_URL'} ||
+ $HTML_URL;
+ my $MESG_CGI = $opt{'mesgcgi'} ||
+ $config->{'MESG_CGI'} ||
+ join('/', $ROOT_URL,'cgi-bin/mesg.cgi');
+ my $MNAV_CGI = $opt{'mnavcgi'} ||
+ $config->{'MNAV_CGI'} ||
+ join('/', $ROOT_URL,'cgi-bin/mnav.cgi');
+ my $SEARCH_CGI = $opt{'searchcgi'} ||
+ $config->{'SEARCH_CGI'} ||
+ join('/', $ROOT_URL,'cgi-bin/namazu.cgi');
+ my $EXTRACT_CGI = $opt{'extractchcgi'} ||
+ $config->{'EXTRACT_CGI'} ||
+ join('/', $ROOT_URL,'cgi-bin/extract-mesg.cgi');
+
+ my $rebuild = $opt{'rebuild'} ||
+ $ENV{'WA_REBUILD'} || 0;
+ my $keepsearch = $opt{'keepsearch'};
+ my $editidx = $opt{'editidx'} ||
+ $ENV{'WA_EDIT'} || 0;
+ my $editidxonly = $opt{'editidxonly'} || 0;
+ my $editrootidx = $opt{'editrootidx'};
+ my $editallidx = $opt{'editallidx'};
+ my $nosearch = $opt{'nosearch'} ||
+ $ENV{'WA_NOSEARCH'} || 0;
+ $debug = $opt{'debug'} ||
+ $ENV{'WA_DEBUG'};
+
+ my $all_index = $opt{'alllistsfile'} ||
+ $config->{'ALL_LISTS_FILE'} ||
+ join('/', $HTML_DIR, 'lists.html');
+ my $main_header = $config->{'MAIN_HEADER'} ||
+ join('/', $HTML_DIR, '.PNM.head');
+ my $main_footer = $config->{'MAIN_FOOTER'} ||
+ join('/', $HTML_DIR, '.PNM.foot');
+
+ my $time = time;
+ if ($rebuild) {
+ $editidx = 0;
+ $editrootidx = 0;
+ $editallidx = 0;
+ } else {
+ $keepsearch = 0;
+ }
+ $editidx = 1 if $editidxonly;
+ if ($editidx) {
+ $editrootidx = 0;
+ $editallidx = 0;
+ }
+
+ my $listdef = MHArc::ListDef->new($LISTS_DEF_FILE);
+ print "Loaded lists definitions.\n" if $debug;
+
+ if ($editallidx) {
+ # Just updating all-lists index
+ update_archive_index(
+ '-config' => $config,
+ '-listdef' => $listdef,
+ '-htmldir' => $HTML_DIR,
+ '-htmlurl' => $HTML_URL,
+ '-infodir' => $INFO_DIR,
+ '-infourl' => $INFO_URL,
+ '-allindex' => $all_index
+ );
+ last MAIN;
+ }
+
+ mhonarc::initialize();
+ print "MHonArc initialized.\n" if $debug;
+
+ local(*DIR, *INDEX, *FILE);
+
+ print "Reading $MBOX_DIR.\n" if $debug;
+ opendir(DIR, $MBOX_DIR) || die qq/Unable to open "$MBOX_DIR": $!/;
+ my @dirs = ();
+
+ # Get list of archives to process
+ if (@ARGV) {
+ # list of archives specified on the command-line
+ @dirs = @ARGV;
+ } else {
+ # read mbox dir to get list
+ @dirs = grep { (-d "$MBOX_DIR/$_") &&
+ ($_ ne '.') &&
+ ($_ ne '..')
+ } readdir(DIR);
+ closedir(DIR);
+ }
+
+ my(@months, @folders);
+ my($dir, $list, $mon, $mondir, $htmldir, $cvs, $title, $mtime,
+ $folder, $i, $yr, $prevdir, $nextdir, $prevmon, $nextmon,
+ $disable_search, $listname, $short_title);
+
+ print "Lists: ", join(', ', @dirs), "\n" if $debug;
+ foreach $list (@dirs) {
+ print "Processing $list ...\n" if $debug;
+
+ @folders = ();
+ $listname = $list;
+ $cvs = ($listname =~ s/\.CVS$//);
+
+ if (!$editidx && !$editrootidx) {
+ # Get list of input mailboxes to process
+
+ $dir = join('/', $MBOX_DIR, $list);
+ if (!opendir(DIR, $dir)) {
+ warn qq/Unable to open "$dir": $!/;
+ next;
+ }
+
+ # create .noraw file indicator if no-raw-link specified
+ my $no_raw_file = join('/', $dir, '.noraw');
+ my $no_raw_htaccess = join('/', $dir, '.htaccess');
+ if ($listdef->{$listname}{'no-raw-link'}[0]) {
+ if (! -e $no_raw_file) {
+ local(*NORAW);
+ if (!open(NORAW, ">$no_raw_file")) {
+ warn qq/Warning: Unable to create "$no_raw_file": $!\n/;
+ } else {
+ close(NORAW);
+ }
+ }
+ if (! -e $no_raw_htaccess) {
+ local(*HTACCESS);
+ if (!open(HTACCESS, ">$no_raw_htaccess")) {
+ warn qq/Warning: Unable to create "$no_raw_htaccess": $!\n/;
+ } else {
+ print HTACCESS 'Order allow,deny', "\n",
+ 'Deny from all', "\n";
+ close(HTACCESS);
+ }
+ }
+ } elsif (-e $no_raw_file) {
+ if (!unlink($no_raw_file)) {
+ warn qq/Warning: Unable to remove "$no_raw_file": $!\n/;
+ }
+ }
+
+ @months = grep { /^$folder_regex(?:\.gz)?$/o } readdir(DIR);
+ closedir(DIR);
+ print "Mboxes: ", join(', ', @months), "\n" if $debug;
+
+ foreach $mon (@months) {
+ $mondir = join('/', $dir, $mon);
+ if ($rebuild) {
+ push(@folders, $mondir);
+ next;
+ }
+ $mtime = (stat($mondir))[9];
+ print "$mondir mtime: $mtime\n" if $debug;
+ if (($time - $mtime) < $MTIME_AGE) {
+ push(@folders, $mondir);
+ }
+ }
+
+ next if (!@folders);
+ print "Folders: ", join(', ', @folders), "\n" if $debug;
+
+ } elsif ($editidx) {
+ # Just editing pages so we get folder list from html directory
+ $dir = join('/', $HTML_DIR, $list);
+ if (!opendir(DIR, $dir)) {
+ warn qq/Unable to open "$dir": $!/;
+ next;
+ }
+ @months = grep { /^$folder_regex$/o } readdir(DIR);
+ closedir(DIR);
+
+ foreach $mon (@months) {
+ $mondir = join('/', $dir, $mon);
+ push(@folders, $mondir);
+ }
+ next if (!@folders);
+ print "Editidx Folders: ", join(', ', @folders), "\n" if $debug;
+ }
+ @folders = reverse sort @folders;
+
+ $htmldir = join('/', $HTML_DIR, $list);
+ if ($rebuild) {
+ clean_html_archive($htmldir, $keepsearch);
+ }
+ mkdir($htmldir, 0777);
+
+ $disable_search = ($list =~ /^\./) ||
+ ((defined($listdef->{$listname}{'no-search'}) &&
+ $listdef->{$listname}{'no-search'}[0]));
+
+ if (defined($listdef->{$listname}{'description'})) {
+ $title = join(' ', @{$listdef->{$listname}{'description'}});
+ } else {
+ $title = $listname;
+ }
+ $short_title = $listname;
+ if ($cvs) {
+ $title = '[CVS] '.$title;
+ $short_title = '[CVS] '.$short_title;
+ }
+
+ if (!$editrootidx) {
+ # define arguments to mhonarc
+ my @mhaargs = (
+ '-modtime',
+ '-lockmethod', 'flock',
+ #'-maxsize', $MHA_MAXSIZE,
+ #'-idxsize', $MHA_PAGESIZE,
+ '-rcfile', $MHA_RC,
+ #'-outdir' , $htmldir,
+ '-title', "$title (date)",
+ '-ttitle', "$title (thread)",
+ '-definevar', "LIST-TITLE='$short_title'",
+ '-definevar', "LIST-NAME='$list'",
+ '-definevar', "SEARCH-CGI=$SEARCH_CGI",
+ '-definevar', "PNAV-CGI=$MNAV_CGI",
+ '-definevar', "EXTRACT-CGI=$EXTRACT_CGI",
+ '-definevar', "MESG-CGI=$MESG_CGI",
+ '-definevar', "ALL-LISTS-URL=$ALL_LISTS_URL",
+
+ '-definevar', "MNAV-CGI=$MNAV_CGI", # backwards compatibility
+ );
+
+ if (defined($listdef->{$listname}{'lang'})) {
+ push(@mhaargs, '-lang', $listdef->{$listname}{'lang'}[0]);
+ }
+
+ if (-e "$MHA_RC_DIR/$list.mrc") {
+ push(@mhaargs, '-rcfile', "$MHA_RC_DIR/$list.mrc");
+ }
+ if ($cvs) {
+ push(@mhaargs, '-nothread');
+ push(@mhaargs, '-definevar', "THREAD-IDX-LINK=''");
+ } else {
+ push(@mhaargs, '-thread');
+ }
+ if ($list =~ /^\./) {
+ push(@mhaargs,
+ '-nothread',
+ '-definevar', "SEARCH-FORM=''");
+ push(@mhaargs, '-definevar', "THREAD-IDX-LINK=''");
+ }
+ if ($editidx) {
+ push(@mhaargs, '-editidx');
+ push(@mhaargs, '-nomsgpgs') if $editidxonly;
+ }
+ if (defined($config->{'MSG_DATE_FIELDS'})) {
+ push(@mhaargs, '-datefields', $config->{'DATE_FIELDS'});
+ }
+ if (!$debug && !$rebuild) {
+ push(@mhaargs, '-quiet');
+ }
+ if (!$rebuild && !$editidx) {
+ push(@mhaargs, '-add');
+ }
+ if ($listdef->{$listname}{'check-no-archive'}) {
+ push(@mhaargs, '-checknoarchive');
+ }
+
+ # add any custom options specified in definition file
+ if (defined($listdef->{$listname}{'mhonarc-options'})) {
+ require 'shellwords.pl';
+ push(@mhaargs,
+ shellwords(join(' ', @{$listdef->{$listname}{'mhonarc-options'}})));
+ }
+
+ # if searching is disabled, zero-out $SEARCH-FORM$
+ if ($disable_search) {
+ push(@mhaargs, '-definevar', "SEARCH-FORM=''");
+ }
+
+ my(@fmhaargs);
+ @months = ( );
+ foreach $folder (@folders) {
+ ($mon = $folder) =~ s/\.gz$//;
+ $mon =~ s/^.*\///;
+ push(@months, $mon);
+ }
+
+ my $cur_msg_cnt;
+ for ($i=0; $i < @folders; ++$i) {
+ $folder = $folders[$i];
+ $mon = $months[$i];
+ $mondir = join('/', $htmldir, $mon);
+
+ # make sure directory exists
+ mkdir($mondir, 0777);
+
+ # set final arguments to mhonarc
+ @fmhaargs = (
+ @mhaargs,
+ '-outdir', $mondir,
+ '-definevar', "CUR-PERIOD='$mon'",
+
+ '-definevar', "CUR-MONTH='$mon'", # backwards compatibility
+ );
+ push(@fmhaargs, $folder) unless $editidx;
+
+ # call mhonarc
+ print "Processing archive $mondir...\n" if $debug;
+ print "\tmhonarc options: ", join(' ', @fmhaargs), "\n" if $debug;
+ if (!mhonarc::open_archive(@fmhaargs)) {
+ warn qq/Warning: Unable to open "$mondir" archive: /,
+ qq/($mhonarc::CODE) $mhonarc::ERROR\n/;
+ next;
+ }
+ $mhonarc::CBRcVarExpand = \&mha_rcvar_expand;
+ $cur_msg_cnt = $mhonarc::NumOfMsgs || 0;
+ mhonarc::process_input();
+ if ($mhonarc::CODE != 0) {
+ warn qq/Warning: Problem processing "$mondir": /,
+ qq/($mhonarc::CODE) $mhonarc::ERROR\n/;
+ next;
+ }
+ if ($cur_msg_cnt == $mhonarc::NumOfMsgs) {
+ print "Skipping search index, no new messages in archive\n"
+ if $debug;
+ next;
+ }
+
+ # update search index
+ # The -Y option is used so we do not have to process all months
+ # to update index.
+ if (!$keepsearch && !$nosearch && !$disable_search) {
+ my @nmzargs = (
+ $MKNMZ,
+ '--mhonarc', # only do mhonarc pages
+ '-f', $MKNMZRC, # specify resource file
+ '-T', $MKNMZTMPLDIR, # specify template directory
+ '-O', $htmldir, # specify location to place index
+ '-Y' # do not delete existing files
+ );
+ if (!$debug && !$rebuild) {
+ push(@nmzargs, '--quiet');
+ }
+ push(@nmzargs, $mondir);
+ print "Search Index Command: ", join(" ", @nmzargs), "\n" if $debug;
+
+ if (system(@nmzargs)) {
+ warn qq/Warning: Non-zero exit status returned from /,
+ qq/"@nmzargs": $?\n/;
+ }
+ namazu_cleanup($htmldir);
+ }
+ }
+ }
+
+ ## Update monthly index
+ if (!opendir(DIR, $htmldir)) {
+ warn qq/Warning: Unable to open $htmldir for reading: $!\n/;
+ next;
+ }
+ @months = reverse sort grep { /^$folder_regex/o } readdir(DIR);
+ print "Month listing for main index: @months\n" if $debug;
+ closedir(DIR);
+ my $indexhtml = join('/', $htmldir, 'index.html');
+ if (!open(INDEX, ">$indexhtml.tmp")) {
+ warn qq/Warning: Unable to open $htmldir for reading: $!\n/;
+ next;
+ }
+
+ my @vars = (
+ '-nosearch' => $disable_search,
+ 'SEARCH-CGI' => $SEARCH_CGI,
+ 'LIST-TITLE' => $short_title,
+ 'LIST-NAME' => $list,
+ 'LIST-DESC' => $title,
+ );
+ print_template(\*INDEX, $main_header, @vars);
+ print INDEX "<ul>\n";
+ foreach $mon (@months) {
+ print INDEX qq|<li><b>$mon</b>:|;
+ print INDEX qq| <a href="$mon/index.html">[Date]</a>|
+ if (-e join('/', $htmldir, $mon, 'index.html'));
+ print INDEX qq| <a href="$mon/threads.html">[Thread]</a>|
+ if (-e join('/', $htmldir, $mon, 'threads.html'));
+
+ if (!$listdef->{$listname}{'no-raw-link'}[0]) {
+ my $raw_label = '[Raw: ]';
+ my $compressed = 0;
+ my $mbox_file = join('/', $MBOX_DIR, $list, $mon);
+ my $mbox_url = join('/', $MBOX_URL, $list, $mon);
+ if (! -e $mbox_file) {
+ $mbox_file .= ".gz";
+ $mbox_url .= ".gz";
+ $compressed = 1;
+ }
+# if (-e $mbox_file) {
+# print INDEX qq| <a href="$mbox_url">[mbox: |,
+# (-s _), qq| bytes|;
+# print INDEX qq|, gzipped| if $compressed;
+# print INDEX qq|]</a>|;
+# }
+ }
+ print INDEX qq|</li>\n|;
+ }
+ print INDEX "</ul>\n";
+ print_template(\*INDEX, $main_footer, @vars);
+ close(INDEX);
+ if (!rename("$indexhtml.tmp", $indexhtml)) {
+ warn qq|Warning: Unable to rename "$indexhtml.tmp" to |,
+ qq|"$indexhtml": $!\n|;
+ }
+ }
+
+ update_archive_index(
+ '-config' => $config,
+ '-listdef' => $listdef,
+ '-htmldir' => $HTML_DIR,
+ '-htmlurl' => $HTML_URL,
+ '-infodir' => $INFO_DIR,
+ '-infourl' => $INFO_URL,
+ '-allindex' => $all_index
+ );
+
+} # End: MAIN
+
+############################################################################
+
+sub entify {
+ my $str = shift;
+ $str =~ s/\&/\&/;
+ $str =~ s/</\</;
+ $str =~ s/>/\>/;
+ $str;
+}
+
+sub get_periods {
+ my $dir = shift;
+
+ local(*DIR);
+ if (!opendir(DIR, $dir)) {
+ warn qq/Warning: Unable to open "$dir": $!/;
+ return ( );
+ }
+ my @months = reverse sort grep { /^$folder_regex$/o } readdir(DIR);
+ closedir(DIR);
+ @months;
+}
+
+sub read_template {
+ my $fh = shift;
+ my %varhash = @_;
+ my $data = "";
+
+ if ($varhash{'-nosearch'}) {
+ local $_;
+ my $ignore = 0;
+ while (<$fh>) {
+ if ($ignore) {
+ $ignore = 0 if /<!--\/x-search-form-->/;
+ next;
+ }
+ if (/<!--x-search-form-->/) {
+ $ignore = 1;
+ next;
+ }
+ s/\$([^\$]+)\$/$varhash{$1}/ge;
+ $data .= $_;
+ }
+ } else {
+ local $/;
+ $data = <$fh>;
+ $data =~ s/\$([^\$]+)\$/$varhash{$1}/ge;
+ }
+ $data;
+}
+
+sub print_template {
+ my $fhout = shift;
+ my $file = shift;
+ if (-e $file) {
+ print "Reading template file $file\n" if $debug;
+ local(*FILE);
+ if (open(FILE, $file)) {
+ print $fhout read_template(\*FILE, @_);
+ close(FILE);
+ } else {
+ warn qq/Warning: Unable to open "$file": $!\n/;
+ }
+ }
+}
+
+sub namazu_cleanup {
+ my $dir = shift;
+ my $lock = join('/', $dir, 'NMZ.lock2');
+ local(*LOCK);
+ if (!open(LOCK, $lock)) {
+ # no lock file left around, so everything should be okay
+ return;
+ }
+ my $pid = <LOCK>;
+ close(LOCK);
+ if (!kill(0, $pid)) {
+ warn qq/Warning: Stale "$lock", removing it\n/;
+ if (!unlink($lock)) {
+ warn qq/Warning: Unable to remove "$lock": $!\n/;
+ }
+ }
+}
+
+sub format_date {
+ my $time = shift;
+ my $fmt = shift || '%Y-%m-%d %H:%M:%S';
+ #my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime($time);
+ #$year += 1900; ++$mon;
+ #sprintf("%d-%02d-%02d %02d:%02d:%02d", $year,$mon,$mday,$hour,$min,$sec);
+ POSIX::strftime($fmt, localtime($time));
+}
+
+## Retrieve the last time an archive was updated.
+# We scan the message pages since their mtime should be set to the
+# date of the message.
+#
+sub retrieve_last_update {
+ my $archive = shift;
+
+ local(*DIR);
+ if (!opendir(DIR, $archive)) {
+ warn qq/Warning: Unable to open "$archive" for reading: $!\n/;
+ return undef;
+ }
+
+ my $latest = 0;
+ my $mtime = 0;
+ my $file;
+ foreach $file (readdir(DIR)) {
+ next unless $file =~ /^msg\d+\.html/;
+ $mtime = (stat(join('/', $archive, $file)))[9];
+ $latest = $mtime if ($mtime > $latest);
+ }
+ close(DIR);
+
+ if ($latest == 0) {
+ # No luck with message pages, so try database file
+ warn qq/Warning: Unable to determine last update from message pages /,
+ qq/for "$archive"\n/;
+ if (-e join('/', $archive, '.mhonarc.db')) {
+ $latest = ((stat(_))[9]);
+ } elsif (-e join('/', $archive, 'mhonarc.db')) {
+ $latest = ((stat(_))[9]);
+ }
+ }
+ if ($latest == 0) {
+ # No luck with data, so use directory mtime
+ $latest = ((stat($archive))[9]);
+ }
+
+ $latest;
+}
+
+## Remove HTML archive
+#
+sub clean_html_archive {
+ my $dir = shift; # Directory of archive
+ my $ks = shift; # Flag is search index files should be preserved
+ if (!$ks) {
+ # delete everything
+ print "Removing $htmldir\n" if $debug;
+ system('/bin/rm', '-rf', $dir);
+ return;
+ }
+
+ # keep search index, so must delete each period sub-directory
+ local(*DIR);
+ opendir(DIR, $dir) ||
+ die qq/ERROR: Unable to open "$dir" for reading: $!\n/;
+ my @subdirs = map { join('/',$dir,$_) }
+ grep { /^$folder_regex$/o } readdir(DIR);
+ closedir(DIR);
+ my $subdir;
+ foreach $subdir (@subdirs) {
+ print "Removing $subdir\n" if $debug;
+ system('/bin/rm', '-rf', $subdir);
+ }
+}
+
+## Retrieve the list info URL.
+#
+sub get_info_url {
+ my %opts = @_;
+ my $pathname = join('/', $opts{'-dir'}, $opts{'-name'}) . '.html';
+ if (! -e $pathname) {
+ return undef;
+ }
+ join('/', $opts{'-baseurl'}, $opts{'-name'}) . '.html';
+}
+
+## Generats the all-lists index.
+#
+sub update_archive_index {
+ print "Generating root archive index...\n" if $debug;
+
+ my %opts = @_;
+ my $config = $opts{'-config'};
+ my $listdef = $opts{'-listdef'};
+ my $html_dir = $opts{'-htmldir'};
+ my $html_url = $opts{'-htmlurl'};
+ my $info_dir = $opts{'-infodir'};
+ my $info_url = $opts{'-infourl'};
+ my $index_html = $opts{'-allindex'};
+
+ my $header = $opts{'-header'} ||
+ $config->{'ALL_LISTS_HEADER'} ||
+ join('/', $html_dir, '.PNM.all-head');
+ my $footer = $opts{'-footer'} ||
+ $config->{'ALL_LISTS_FOOTER'} ||
+ join('/', $html_dir, '.PNM.all-foot');
+
+ my $label_name = $opts{'-label-name'} ||
+ $config->{'ALL_LISTS_LABEL_NAME'} ||
+ 'Name';
+ my $label_indexes
+ = $opts{'-label-indexes'} ||
+ $config->{'ALL_LISTS_LABEL_INDEXES'} ||
+ 'Current Index';
+ my $label_last = $opts{'-label-last-updated'} ||
+ $config->{'ALL_LISTS_LABEL_LAST_UPDATED'} ||
+ 'Last Updated';
+ my $label_info = $opts{'-label-info'} ||
+ $config->{'ALL_LISTS_LABEL_INFO'} ||
+ '[info]';
+ my $label_date = $opts{'-label-date'} ||
+ $config->{'ALL_LISTS_LABEL_DATE'} ||
+ '[Date]';
+ my $label_threads
+ = $opts{'-label-threads'} ||
+ $config->{'ALL_LISTS_LABEL_THREADS'} ||
+ '[Threads]';
+ my $time_fmt = $opts{'-time-fmt'} ||
+ $config->{'ALL_LISTS_DATE_FORMAT'} ||
+ '%Y-%m-%d %H:%M:%S';
+
+ local(*IDX);
+ my $tmp_index = $index_html . ".tmp";
+ if (!open(IDX, ">$tmp_index")) {
+ warn qq/Warning: Unable to create "$tmp_index": $!\n/;
+ return;
+ }
+
+ my %updated = ( );
+ my($list, $listname, $last_updated, $dir, $latest, $info);
+
+ foreach $listname (keys %$listdef) {
+ next if $listname =~ /^\./; # skip hidden archives
+ next if $listdef->{'hide-from-all-lists'}[0];
+
+ foreach $list ($listname, "$listname.CVS") {
+ $dir = join('/', $html_dir, $list);
+ next unless -e $dir;
+
+ print "Computing last update for $list...\n" if $debug;
+ my @months = get_periods($dir);
+ next unless @months;
+ $latest = $months[0];
+ $last_updated = retrieve_last_update(join('/', $dir, $latest));
+ if (!defined($last_updated)) {
+ print "Unable to compute last update for $list.\n" if $debug;
+ next;
+ }
+ $updated{$list} = [ $last_updated, $list, $listname, $dir, $latest ];
+ }
+ }
+
+ print_template(\*IDX, $header);
+ print IDX qq|<table class="archiveLists" cellpadding="3" cellspacing="1">\n|,
+ qq|<tr class="listsHeaderRow" valign="baseline" align="left">\n|,
+ qq|<th>$label_name</th>|,
+ qq|<th>$label_indexes</th>|,
+ qq|<th>$label_last</th>|,
+ qq|\n</tr>\n|;
+
+ my($time);
+ foreach $list (sort { $updated{$b}->[0] <=> $updated{$a}->[0] }
+ keys(%updated)) {
+ ($time, $list, $listname, $dir, $latest) = @{$updated{$list}};
+
+ print "Printing listing for $list\n" if $debug;
+ $last_updated = format_date($time, $time_fmt);
+ $last_updated =~ s/ /\ /g;
+ my $short_title = entify($listdef->{$listname}{'all-lists-name'}[0] ||
+ $listname);
+ my $description = entify($listdef->{$listname}{'description'}[0] ||
+ $listname);
+ if ($list =~ /\.CVS$/) {
+ $short_title .= " (CVS)";
+ $description .= " (CVS commits)";
+ }
+ $info = get_info_url(
+ '-name' => $listname,
+ '-dir' => $info_dir,
+ '-baseurl' => $info_url
+ );
+
+ print IDX qq|<tr valign="baseline">\n|;
+
+ print IDX qq|<td> <span class="listName">|,
+ qq|<a href="$html_url/$list/">$short_title</a></span> |;
+ print IDX qq|<a class="infoLink" href="$info">$label_info</a> |
+ if defined($info);
+ print IDX qq|</td>\n|;
+
+
+ print IDX qq|<td>|;
+ print IDX qq| <a href="$html_url/$list/$latest/index.html">$label_date</a> |
+ if (-e join('/', $dir, $latest, 'index.html'));
+ print IDX qq| <a href="$html_url/$list/$latest/threads.html">$label_threads</a> |
+ if (-e join('/', $dir, $latest, 'threads.html'));
+ print IDX qq|</td>\n|;
+
+ print IDX qq|<td> <tt>|, $last_updated, qq|</tt> </td>\n|;
+ }
+
+ print IDX qq|</table>\n|;
+ print_template(\*IDX, $footer);
+ close(IDX);
+
+ if (!rename($tmp_index, $index_html)) {
+ warn qq/Warning: Unable to rename "$tmp_index" to "$index_html": $!\n/;
+ }
+}
+
+sub mha_rcvar_expand {
+ my $mha_index = shift;
+ my $var_name = shift;
+ my $arg = shift;
+
+ my $val = undef;
+ if ($var_name eq 'NMZ-SUBJECT-QUERY') {
+ my($lref, $key, $pos) =
+ mhonarc::compute_msg_pos($mha_index, $var_name, $arg);
+ return undef unless defined($key);
+
+ my $clipped = 0;
+ $val = mhonarc::get_base_subject($key);
+ if (length($val) > 128) {
+ $val = substr($val, 0, 128);
+ $clipped = 1;
+ }
+ $val = "\Q$val\E";
+ $val =~ s/(?:\\\s)+/\\s+/g; # \Q will escape whitespace
+ my $repl_re = $mhonarc::SubReplyRxp;
+ my $query = "+subject:/^(?:$repl_re)*$val";
+ $query .= '\s*$' unless $clipped;
+ $query .= '/';
+ return ($query, 0, 0);
+ }
+
+ return undef;
+}
+
+
+############################################################################
+__END__
+
+=head1 NAME
+
+web-archive - Update/create MHonArc archives from mailbox archives.
+
+=head1 SYNOPSIS
+
+ web-archive
+ web-archive [options]
+ web-archive [options] [list-name ...]
+
+=head1 DESCRIPTION
+
+This program is part of mharc and has the responsibility of processing
+the mailbox archives created by the L<filter-spool|filter-spool> script to
+update and/or create MHonArc archives.
+
+This program is automatically called by the L<read-mail|read-mail> script for
+processing incoming mail within the mail spool if L<filter-spool|filter-spool>
+returns with an okay status. However, this program can be manually
+invoked to rebuild archives, edit existing archives, or other
+administrative tasks. Since there may be a need to do selective archive
+processing, any non-option related argument is treated as mailing
+list archive name to process.
+
+=head1 OPTIONS
+
+=over
+
+=item C<-alllistidx> I<pathname>
+
+Pathname of file to generate the all lists index.
+If not specified, the value of the C<ALL_LISTS_FILE> variable in
+C<config.sh> is used, else it defaults to "C<I<-htmldir>/lists.html>".
+
+=item C<-alllistsurl> I<url>
+
+URL to page containing list of all mailing lists archived.
+If not specified, the value of the C<ALL_LISTS_URL> variable in
+C<config.sh> is used, else it defaults to C<-htmlurl>.
+
+=item C<-editidx>
+
+Edit archive pages, useful to apply MHonArc resource
+changes.
+
+=item C<-editrootidx>
+
+Only regenerate root index pages for archives. This is useful if
+you make changes to the C<.PNM.head> or C<.PNM.foot> files that you
+want immediately applied.
+
+=item C<-help>
+
+Print out usage information.
+
+=item C<-home> I<pathname>
+
+Root pathname of archiving software and data.
+If not specified, the parent directory that contains this program
+is used.
+
+=item C<-htmldir> I<pathname>
+
+Root directory for html archives.
+If not specified, "C<I<-home>/html>" is used.
+
+=item C<-htmlurl> I<url>
+
+URL root to HTML archives.
+If not specified, defaults to C<I<rooturl>/html>.
+
+=item C<-infodir> I<pathname>
+
+Pathname of directory containing informational pages for each list
+archive. Information for a list archive can be provided by creating a
+file called "C<I<list-name>.html>". Once created, a link to the file
+(based on the value of the C<-infourl> option) will be generated in
+the all-lists index to it.
+
+If this option is not specified, the value of the C<INFO_DIR> variable
+in C<config.sh> is used, else it defaults to "C<I<-home>/info>".
+
+=item C<-infourl> I<url>
+
+Base URL containing informational pages for each list archive.
+If not specified, the value of the C<INFO_URL> variable in
+C<config.sh> is used, else it defaults to "C<I<-rooturl>/info>".
+
+=item C<-keepsearch>
+
+Preserve search index if C<-rebuild> is specified. This option
+is handy if all that is desired is to rebuild the HTML archives
+from the raw data since the overhead of rebuilding the search indexes
+will be avoided.
+
+B<CAUTION:> Do not use C<-keepsearch> if you have removed messages
+from the raw mail archives since resulting HTML message pages may
+have different URIs than what is stored within the search index.
+
+=item C<-listsdef> I<pathname>
+
+Pathname to mailing lists definition file.
+If not specified, "C<I<-home>/lib/lists.def>" is used.
+
+=item C<-man>
+
+Print out entire manpage.
+
+=item C<-mboxdir> I<pathname>
+
+Root directory for mbox archives.
+If not specified, "C<I<-home>/mbox>" is used.
+
+=item C<-mharc> I<pathname>
+
+MHonArc resource file for archives.
+If not specified, "C<I<-home>/lib/common.mrc>" is used.
+
+=item C<-mharcdir> I<pathname>
+
+Directory containing list-specifc MHonArc resource files. A given
+list archive can have additional resource settings by creating a
+file called C<I<list-name>.mrc> within the directory specified by
+C<-mharcdir>.
+
+If C<-mharcdir> is not specified, "C<I<-home>/lib/mrc>" is used.
+
+=item C<-mhamaxsize> I<number>
+
+Maximum MHonArc archive size.
+If not specified the value of the C<WA_MAXSIZE> environment variable is used.
+
+=item C<-mhapagesize> I<number>
+
+Maximum MHonArc index page size.
+If not specified the value of the C<WA_PAGESIZE> environment variable is used.
+
+=item C<-mknmz> I<pathname>
+
+Pathname to Namazu make search index program.
+If not specified, "C</usr/local/bin/mknmz>" is used.
+
+=item C<-mknmzrc> I<pathname>
+
+Pathname to Namazu configuration file.
+If not specified, "C<I<-home>/cgi-bin/mknmzrc>" is used.
+
+=item C<-mknmztmpldir> I<pathname>
+
+Pathname to Namazu template directory.
+If not specified, "C<I<-home>/cgi-bin/template>" is used.
+
+=item C<-mnavcgi> I<url>
+
+URL to monthly navigation cgi program.
+If not specified, C<I<rooturl>/cgi-bin/mnav.cgi> is used.
+
+=item C<-mtimeage> I<seconds>
+
+Modify time age of a mailbox file to be considered for processing. If
+not specified the value of the C<WA_MTIME_AGE> environment variable is
+used.
+
+=item C<-nosearch>
+
+Do not update search indexes.
+
+=item C<-rebuild>
+
+Rebuild archives from scratch.
+
+=item C<-rooturl> I<url>
+
+URL root of archives.
+If not specified, C</~mhonarc/archives> is used.
+
+=item C<-searchcgi> I<url>
+
+URL to search cgi program.
+If not specified, C<I<rooturl>/cgi-bin/namazu.cgi> is used.
+
+=item C<-verbose>
+
+Show what is going on in detail.
+
+=back
+
+=head1 ENVIRONMENT
+
+Environment variable usage is deprecated.
+
+The following environment variables are recognized:
+
+=over
+
+=item C<WA_DEBUG>
+
+If set to a true value, detailed information of progress will be
+printed to stdout. Debugging can also be enabled by the
+C<-debug> command-line option.
+
+=item C<WA_EDIT>
+
+If set to a true value, archives will be editted. It is probably
+better to use the C<-editidx> command-line option instead if archives
+editing is desired.
+
+=item C<WA_MAXSIZE>
+
+Maximum MHonArc archive size. The default value is 2000. This setting
+can be overridden by the C<-mhamaxsize> command-line option.
+
+=item C<WA_MTIME_AGE>
+
+The modification age, in seconds, for a mailbox to be considered
+for processing. The default value is C<86400> (one day).
+This setting can be overridden by the C<-mtimeage> command-line option.
+
+=item C<WA_NOSEARCH>
+
+If set to a true value, the Namazu search indexes will not be updated
+for archives processed. Disabling of search index updates can also be
+disabled by the C<-nosearch> command-line option.
+
+=item C<WA_PAGESIZE>
+
+MHonArc index page size. The default value is 200. This setting
+can be overridden by the C<-mhapagesize> command-line option.
+
+=item C<WA_REBUILD>
+
+If set to a true value, archives will be rebuilt. It is probably
+better to use the C<-rebuild> command-line option instead if rebuilding
+is desired.
+
+=back
+
+=head1 VERSION
+
+$Id: web-archive,v 1.44 2003/08/09 17:56:05 ehood Exp $
+
+=head1 AUTHOR
+
+Earl Hood, earl@earlhood.com
+
+This program is part of the mharc archiving system and comes with
+ABSOLUTELY NO WARRANTY and may be copied only under the terms of
+the GNU General Public License, which may be found in the mharc
+distribution.
+
+=cut
+