From: Bastard Operator From GNU Date: Wed, 18 May 2011 17:25:54 +0000 (-0400) Subject: Initial checkin X-Git-Url: https://vcs.fsf.org/?a=commitdiff_plain;h=01c223d0717b58ff45eab5b876597812a5280af4;p=mharc.git Initial checkin --- 01c223d0717b58ff45eab5b876597812a5280af4 diff --git a/cgi-bin/.namazurc b/cgi-bin/.namazurc new file mode 100644 index 0000000..611122c --- /dev/null +++ b/cgi-bin/.namazurc @@ -0,0 +1,10 @@ +# $Id: .namazurc.in.dist,v 1.2 2002/03/06 19:33:45 ehood Exp $ +# This is a Namazu configuration file for namazu or namazu.cgi. + +Index /home/mharc/html +Template /home/mharc/cgi-bin/template +Replace /home/mharc/html/ /archive/html/ +Logging off +#Lang en +#Scoring tfidf +#EmphasisTags "" "" diff --git a/cgi-bin/.namazurc.in b/cgi-bin/.namazurc.in new file mode 100644 index 0000000..9c82beb --- /dev/null +++ b/cgi-bin/.namazurc.in @@ -0,0 +1,10 @@ +# $Id: .namazurc.in.dist,v 1.2 2002/03/06 19:33:45 ehood Exp $ +# This is a Namazu configuration file for namazu or namazu.cgi. + +Index @@HTML_DIR@@ +Template @@MKNMZ_TMPL_DIR@@ +Replace @@HTML_DIR@@/ @@HTML_URL@@/ +Logging off +#Lang en +#Scoring tfidf +#EmphasisTags "" "" diff --git a/cgi-bin/.namazurc.in.dist b/cgi-bin/.namazurc.in.dist new file mode 100644 index 0000000..9c82beb --- /dev/null +++ b/cgi-bin/.namazurc.in.dist @@ -0,0 +1,10 @@ +# $Id: .namazurc.in.dist,v 1.2 2002/03/06 19:33:45 ehood Exp $ +# This is a Namazu configuration file for namazu or namazu.cgi. + +Index @@HTML_DIR@@ +Template @@MKNMZ_TMPL_DIR@@ +Replace @@HTML_DIR@@/ @@HTML_URL@@/ +Logging off +#Lang en +#Scoring tfidf +#EmphasisTags "" "" diff --git a/cgi-bin/extract-mesg.cgi b/cgi-bin/extract-mesg.cgi new file mode 100755 index 0000000..5972fdb --- /dev/null +++ b/cgi-bin/extract-mesg.cgi @@ -0,0 +1,215 @@ +#!/usr/bin/perl +##--------------------------------------------------------------------------## +## File: +## $Id: extract-mesg.cgi.in.dist,v 1.5 2002/09/20 03:29:28 ehood Exp $ +## Author: +## Earl Hood earl@earlhood.com +## Description: +## POD at end-of-file. +##--------------------------------------------------------------------------## +## Copyright (C) 2002 Earl Hood +## +## This program is free software; you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 2 of the License, or +## (at your option) any later version. +## +## This program is distributed in the hope that it will be useful, +## but WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +## GNU General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with this program; if not, write to the Free Software +## Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA +## 02111-1307, USA +##--------------------------------------------------------------------------## + +package MHArc::extract_mesg_cgi; + +use lib '/home/mharc/lib'; + +use CGI::Carp; +use MHArc::CGI; + +############################################################################# +## BEGIN: Config Section +############################################################################# + +## Full pathname to where raw archives are located. +my $mbox_archive_root = '/home/mharc/mbox'; + +## Message media-type: This is the media-type this script will return +## to the client when serving up the raw mail message. Note, some +## browsers actually support message/rfc822, but this could potentially +## cause XSS HTML email attacks, so use with caution. +my $message_media_type = 'text/plain'; + +############################################################################# +## END: Config Section +############################################################################# + +$ENV{'PATH'} = '/usr/local/bin:/bin:/usr/bin'; + +## Query argument name to contain name of archive +my $argname_archive = 'a'; + +## Query argument name to contain month +my $argname_month = 'm'; + +## Query argument name to contain message-id +my $argname_id = 'i'; + +## Mbox message separator: Try to be more strict than '^From ', but +## not too strict to deal with possible variations. +my $msgsep = '^From \S+.*\d+:\d+:\d+'; + +MAIN: { + my $form = MHArc::CGI::parse_input(); + my $archive = $form->{$argname_archive} || ""; + my $month = $form->{$argname_month} || ""; + my $id = $form->{$argname_id} || ""; + + my $list_dir; + if (($month !~ /^\d{4}(?:-\d{2})?$/) || + ($id !~ /.\@./) || + ($archive !~ /\S/) || + ($archive =~ /\.\./) || + (! -d ($list_dir = join('/', $mbox_archive_root,$archive)))) { + warn qq/Invalid arguments: a=$archive, m=$month, i=$id\n/; + MHArc::CGI::print_input_error(); + last MAIN; + } + + # Check if list has raw archive access disabled. + if (-e join('/', $list_dir, '.noraw')) { + MHArc::CGI::print_forbidden(); + last MAIN; + } + + my $gzipped = 0; + my $mbox_file = join('/', $list_dir, $month); + if (! -e $mbox_file) { + $mbox_file .= '.gz'; + $gzipped = 1; + } + if (! -e $mbox_file) { + warn qq/"$mbox_file" does not exist\n/; + MHArc::CGI::print_input_error(); + last MAIN; + } + + local(*MBOX); + if ($gzipped) { + if (!open(MBOX, "gzip -dc '$mbox_file' |")) { + warn qq/Unable to exec "gzip -dc '$mbox_file'": $!\n/; + MHArc::CGI::print_error(); + last MAIN; + } + } else { + if (!open(MBOX, $mbox_file)) { + warn qq/Unable to open "$mbox_file": $!\n/; + MHArc::CGI::print_error(); + last MAIN; + } + } + + local $_; + my $cache = ''; + my $in_header = 1; + my $msg_id = ''; + my $found = 0; + + SCAN: while () { + if (/$msgsep/o) { + $cache = ''; + $in_header = 1; + next SCAN; + } + next SCAN unless $in_header; + + if (/^\r?$/) { + $cache = ''; + $in_header = 0; + next SCAN; + } + + $cache .= $_; + if (s/^message-id:\s*//i) { + s/\s+\Z//; + s/[<>]//g; + if ($_ eq $id) { + $found = 1; + last SCAN; + } + $cache = ''; + $in_header = 0; + } + } + + if (!$found) { + MHArc::CGI::print_not_found_error(); + close(MBOX); + last MAIN; + } + + MHArc::CGI::print_content_type($message_media_type); + print STDOUT $cache; + while () { + last if /$msgsep/o; + print STDOUT $_; + } + close(MBOX); +} + +######################################################################## +__END__ + +=head1 NAME + +extract-mesg.cgi - mharc CGI program to retrieve raw version of a message + +=head1 SYNOPSIS + + http://.../cgi-bin/extract-mesg.cgi?a=&m=&i= + +=head1 DESCRIPTION + +This CGI program retrieves the raw version of a message from an +archive archived at a specified period and with a specified message-id. + +The CGI program will output the retrieved message to the web client. + +=head1 CGI OPTIONS + +=over + +=item C + +The name of the archive. Archive names are defined by C. + +=item C + +The message-id. + +=item C + +The period in YYYY-MM or YYYY format. + +=back + +=head1 VERSION + +C<$Id: extract-mesg.cgi.in.dist,v 1.5 2002/09/20 03:29:28 ehood Exp $> + +=head1 AUTHOR + +Earl Hood, earl@earlhood.com + +This module is part of the mharc archiving system and comes with +ABSOLUTELY NO WARRANTY and may be copied only under the terms of +the GNU General Public License, which may be found in the MHArc +distribution. + +=cut + diff --git a/cgi-bin/extract-mesg.cgi.in b/cgi-bin/extract-mesg.cgi.in new file mode 100755 index 0000000..c588509 --- /dev/null +++ b/cgi-bin/extract-mesg.cgi.in @@ -0,0 +1,215 @@ +#!/usr/bin/perl +##--------------------------------------------------------------------------## +## File: +## $Id: extract-mesg.cgi.in.dist,v 1.5 2002/09/20 03:29:28 ehood Exp $ +## Author: +## Earl Hood earl@earlhood.com +## Description: +## POD at end-of-file. +##--------------------------------------------------------------------------## +## Copyright (C) 2002 Earl Hood +## +## This program is free software; you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 2 of the License, or +## (at your option) any later version. +## +## This program is distributed in the hope that it will be useful, +## but WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +## GNU General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with this program; if not, write to the Free Software +## Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA +## 02111-1307, USA +##--------------------------------------------------------------------------## + +package MHArc::extract_mesg_cgi; + +use lib '@@SW_ROOT@@/lib'; + +use CGI::Carp; +use MHArc::CGI; + +############################################################################# +## BEGIN: Config Section +############################################################################# + +## Full pathname to where raw archives are located. +my $mbox_archive_root = '@@MBOX_DIR@@'; + +## Message media-type: This is the media-type this script will return +## to the client when serving up the raw mail message. Note, some +## browsers actually support message/rfc822, but this could potentially +## cause XSS HTML email attacks, so use with caution. +my $message_media_type = 'text/plain'; + +############################################################################# +## END: Config Section +############################################################################# + +$ENV{'PATH'} = '/usr/local/bin:/bin:/usr/bin'; + +## Query argument name to contain name of archive +my $argname_archive = 'a'; + +## Query argument name to contain month +my $argname_month = 'm'; + +## Query argument name to contain message-id +my $argname_id = 'i'; + +## Mbox message separator: Try to be more strict than '^From ', but +## not too strict to deal with possible variations. +my $msgsep = '^From \S+.*\d+:\d+:\d+'; + +MAIN: { + my $form = MHArc::CGI::parse_input(); + my $archive = $form->{$argname_archive} || ""; + my $month = $form->{$argname_month} || ""; + my $id = $form->{$argname_id} || ""; + + my $list_dir; + if (($month !~ /^\d{4}(?:-\d{2})?$/) || + ($id !~ /.\@./) || + ($archive !~ /\S/) || + ($archive =~ /\.\./) || + (! -d ($list_dir = join('/', $mbox_archive_root,$archive)))) { + warn qq/Invalid arguments: a=$archive, m=$month, i=$id\n/; + MHArc::CGI::print_input_error(); + last MAIN; + } + + # Check if list has raw archive access disabled. + if (-e join('/', $list_dir, '.noraw')) { + MHArc::CGI::print_forbidden(); + last MAIN; + } + + my $gzipped = 0; + my $mbox_file = join('/', $list_dir, $month); + if (! -e $mbox_file) { + $mbox_file .= '.gz'; + $gzipped = 1; + } + if (! -e $mbox_file) { + warn qq/"$mbox_file" does not exist\n/; + MHArc::CGI::print_input_error(); + last MAIN; + } + + local(*MBOX); + if ($gzipped) { + if (!open(MBOX, "gzip -dc '$mbox_file' |")) { + warn qq/Unable to exec "gzip -dc '$mbox_file'": $!\n/; + MHArc::CGI::print_error(); + last MAIN; + } + } else { + if (!open(MBOX, $mbox_file)) { + warn qq/Unable to open "$mbox_file": $!\n/; + MHArc::CGI::print_error(); + last MAIN; + } + } + + local $_; + my $cache = ''; + my $in_header = 1; + my $msg_id = ''; + my $found = 0; + + SCAN: while () { + if (/$msgsep/o) { + $cache = ''; + $in_header = 1; + next SCAN; + } + next SCAN unless $in_header; + + if (/^\r?$/) { + $cache = ''; + $in_header = 0; + next SCAN; + } + + $cache .= $_; + if (s/^message-id:\s*//i) { + s/\s+\Z//; + s/[<>]//g; + if ($_ eq $id) { + $found = 1; + last SCAN; + } + $cache = ''; + $in_header = 0; + } + } + + if (!$found) { + MHArc::CGI::print_not_found_error(); + close(MBOX); + last MAIN; + } + + MHArc::CGI::print_content_type($message_media_type); + print STDOUT $cache; + while () { + last if /$msgsep/o; + print STDOUT $_; + } + close(MBOX); +} + +######################################################################## +__END__ + +=head1 NAME + +extract-mesg.cgi - mharc CGI program to retrieve raw version of a message + +=head1 SYNOPSIS + + http://.../cgi-bin/extract-mesg.cgi?a=&m=&i= + +=head1 DESCRIPTION + +This CGI program retrieves the raw version of a message from an +archive archived at a specified period and with a specified message-id. + +The CGI program will output the retrieved message to the web client. + +=head1 CGI OPTIONS + +=over + +=item C + +The name of the archive. Archive names are defined by C. + +=item C + +The message-id. + +=item C + +The period in YYYY-MM or YYYY format. + +=back + +=head1 VERSION + +C<$Id: extract-mesg.cgi.in.dist,v 1.5 2002/09/20 03:29:28 ehood Exp $> + +=head1 AUTHOR + +Earl Hood, earl@earlhood.com + +This module is part of the mharc archiving system and comes with +ABSOLUTELY NO WARRANTY and may be copied only under the terms of +the GNU General Public License, which may be found in the MHArc +distribution. + +=cut + diff --git a/cgi-bin/extract-mesg.cgi.in.dist b/cgi-bin/extract-mesg.cgi.in.dist new file mode 100755 index 0000000..c588509 --- /dev/null +++ b/cgi-bin/extract-mesg.cgi.in.dist @@ -0,0 +1,215 @@ +#!/usr/bin/perl +##--------------------------------------------------------------------------## +## File: +## $Id: extract-mesg.cgi.in.dist,v 1.5 2002/09/20 03:29:28 ehood Exp $ +## Author: +## Earl Hood earl@earlhood.com +## Description: +## POD at end-of-file. +##--------------------------------------------------------------------------## +## Copyright (C) 2002 Earl Hood +## +## This program is free software; you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 2 of the License, or +## (at your option) any later version. +## +## This program is distributed in the hope that it will be useful, +## but WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +## GNU General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with this program; if not, write to the Free Software +## Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA +## 02111-1307, USA +##--------------------------------------------------------------------------## + +package MHArc::extract_mesg_cgi; + +use lib '@@SW_ROOT@@/lib'; + +use CGI::Carp; +use MHArc::CGI; + +############################################################################# +## BEGIN: Config Section +############################################################################# + +## Full pathname to where raw archives are located. +my $mbox_archive_root = '@@MBOX_DIR@@'; + +## Message media-type: This is the media-type this script will return +## to the client when serving up the raw mail message. Note, some +## browsers actually support message/rfc822, but this could potentially +## cause XSS HTML email attacks, so use with caution. +my $message_media_type = 'text/plain'; + +############################################################################# +## END: Config Section +############################################################################# + +$ENV{'PATH'} = '/usr/local/bin:/bin:/usr/bin'; + +## Query argument name to contain name of archive +my $argname_archive = 'a'; + +## Query argument name to contain month +my $argname_month = 'm'; + +## Query argument name to contain message-id +my $argname_id = 'i'; + +## Mbox message separator: Try to be more strict than '^From ', but +## not too strict to deal with possible variations. +my $msgsep = '^From \S+.*\d+:\d+:\d+'; + +MAIN: { + my $form = MHArc::CGI::parse_input(); + my $archive = $form->{$argname_archive} || ""; + my $month = $form->{$argname_month} || ""; + my $id = $form->{$argname_id} || ""; + + my $list_dir; + if (($month !~ /^\d{4}(?:-\d{2})?$/) || + ($id !~ /.\@./) || + ($archive !~ /\S/) || + ($archive =~ /\.\./) || + (! -d ($list_dir = join('/', $mbox_archive_root,$archive)))) { + warn qq/Invalid arguments: a=$archive, m=$month, i=$id\n/; + MHArc::CGI::print_input_error(); + last MAIN; + } + + # Check if list has raw archive access disabled. + if (-e join('/', $list_dir, '.noraw')) { + MHArc::CGI::print_forbidden(); + last MAIN; + } + + my $gzipped = 0; + my $mbox_file = join('/', $list_dir, $month); + if (! -e $mbox_file) { + $mbox_file .= '.gz'; + $gzipped = 1; + } + if (! -e $mbox_file) { + warn qq/"$mbox_file" does not exist\n/; + MHArc::CGI::print_input_error(); + last MAIN; + } + + local(*MBOX); + if ($gzipped) { + if (!open(MBOX, "gzip -dc '$mbox_file' |")) { + warn qq/Unable to exec "gzip -dc '$mbox_file'": $!\n/; + MHArc::CGI::print_error(); + last MAIN; + } + } else { + if (!open(MBOX, $mbox_file)) { + warn qq/Unable to open "$mbox_file": $!\n/; + MHArc::CGI::print_error(); + last MAIN; + } + } + + local $_; + my $cache = ''; + my $in_header = 1; + my $msg_id = ''; + my $found = 0; + + SCAN: while () { + if (/$msgsep/o) { + $cache = ''; + $in_header = 1; + next SCAN; + } + next SCAN unless $in_header; + + if (/^\r?$/) { + $cache = ''; + $in_header = 0; + next SCAN; + } + + $cache .= $_; + if (s/^message-id:\s*//i) { + s/\s+\Z//; + s/[<>]//g; + if ($_ eq $id) { + $found = 1; + last SCAN; + } + $cache = ''; + $in_header = 0; + } + } + + if (!$found) { + MHArc::CGI::print_not_found_error(); + close(MBOX); + last MAIN; + } + + MHArc::CGI::print_content_type($message_media_type); + print STDOUT $cache; + while () { + last if /$msgsep/o; + print STDOUT $_; + } + close(MBOX); +} + +######################################################################## +__END__ + +=head1 NAME + +extract-mesg.cgi - mharc CGI program to retrieve raw version of a message + +=head1 SYNOPSIS + + http://.../cgi-bin/extract-mesg.cgi?a=&m=&i= + +=head1 DESCRIPTION + +This CGI program retrieves the raw version of a message from an +archive archived at a specified period and with a specified message-id. + +The CGI program will output the retrieved message to the web client. + +=head1 CGI OPTIONS + +=over + +=item C + +The name of the archive. Archive names are defined by C. + +=item C + +The message-id. + +=item C + +The period in YYYY-MM or YYYY format. + +=back + +=head1 VERSION + +C<$Id: extract-mesg.cgi.in.dist,v 1.5 2002/09/20 03:29:28 ehood Exp $> + +=head1 AUTHOR + +Earl Hood, earl@earlhood.com + +This module is part of the mharc archiving system and comes with +ABSOLUTELY NO WARRANTY and may be copied only under the terms of +the GNU General Public License, which may be found in the MHArc +distribution. + +=cut + diff --git a/cgi-bin/mesg.cgi b/cgi-bin/mesg.cgi new file mode 100755 index 0000000..fb454c2 --- /dev/null +++ b/cgi-bin/mesg.cgi @@ -0,0 +1,201 @@ +#!/usr/bin/perl +##--------------------------------------------------------------------------## +## File: +## $Id: mesg.cgi.in.dist,v 1.1 2002/09/03 16:30:47 ehood Exp $ +## Author: +## Earl Hood earl@earlhood.com +## Description: +## POD at end-of-file. +##--------------------------------------------------------------------------## +## Copyright (C) 2002 Earl Hood +## +## This program is free software; you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 2 of the License, or +## (at your option) any later version. +## +## This program is distributed in the hope that it will be useful, +## but WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +## GNU General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with this program; if not, write to the Free Software +## Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA +## 02111-1307, USA +##--------------------------------------------------------------------------## + +package MHArc::mesg_cgi; + +use lib '/home/mharc/lib'; + +use Fcntl; +use CGI::Carp; +use MHArc::CGI; +use MHArc::Namazu qw( + nmz_load_rc + nmz_get_field + nmz_msg_id_search +); + +############################################################################# +## BEGIN: Config Section +############################################################################# + +## Full pathname to where html archives are located. +my $html_archive_root = '/home/mharc/html'; + +############################################################################# +## END: Config Section +############################################################################# + +$ENV{'PATH'} = '/usr/local/bin:/bin:/usr/bin'; + +## Query argument name to contain name of archive +my $argname_archive = 'a'; + +## Query argument name to contain message-id +my $argname_id = 'i'; + +## Namazu conf file (should be the same used by namazu.cgi) +my $namazurc = '.namazurc'; + +MAIN: { + my $form = MHArc::CGI::parse_input(); + my $archive = $form->{$argname_archive} || ""; + my $id = $form->{$argname_id} || ""; + my $host = $ENV{'HTTP_HOST'} || $ENV{'SERVER_NAME'} || + $ENV{'SERVER_ADDR'} || "localhost"; + my $port = $ENV{'SERVER_PORT'} || ""; + if ($port && $port ne '80') { + $port = ":$port"; + } else { + $port = ""; + } + my $server_url= "http://$host$port"; + + my $list_dir = undef; + if (($id !~ /.\@./) || + ($archive !~ /\S/) || + ($archive =~ /\.\./) || + (! -d ($list_dir = join('/', $html_archive_root,$archive)))) { + warn qq/Invalid arguments: a=$archive, i=$id\n/; + MHArc::CGI::print_input_error(); + last MAIN; + } + + my $nmzrc = nmz_load_rc($namazurc); + if (!defined($nmzrc)) { + MHArc::CGI::print_script_error(); + last MAIN; + } + + my $pathname = find_id($list_dir, $id); + if (!defined($pathname)) { + MHArc::CGI::print_not_found_error(); + last MAIN; + } + if (! -e $pathname) { + warn qq/"$pathname" does not exist\n/; + MHArc::CGI::print_not_found_error(); + last MAIN; + } + + # Apply replace string to pathname + my $url = $pathname; + foreach my $r (@{$nmzrc->{'replace'}}) { + my $pos = index($pathname, $r->[0]); + if ($pos == 0) { + $url = $r->[1] . substr($pathname, length($r->[0])); + last; + } + } + + # Print out message page + local(*MESG); + if (!open(MESG, $pathname)) { + warn qq/Unable top open "$pathname": $!\n/; + MHArc::CGI::print_script_error(); + last MAIN; + } + + MHArc::CGI::print_content_type('text/html'); + my $did_base = 0; + my $str; + foreach $str () { + print STDOUT $str; + next if $did_base; + if ($str =~ //i) { + print STDOUT '', "\n"; + $did_base = 1; + } + } + close(MESG); +} + +############################################################################# +## Generic subroutines for CGI use +############################################################################# + +sub find_id { + my $list_dir = shift; + my $id = shift; + + my $docid = nmz_msg_id_search($list_dir, $id); + if ($docid < 0) { + return undef; + } + return nmz_get_field($list_dir, $docid, 'uri'); +} + +######################################################################## +__END__ + +=head1 NAME + +mesg.cgi - mharc CGI program to retrieve a message by message-id + +=head1 SYNOPSIS + + http://.../cgi-bin/mesg.cgi?a=&i= + +=head1 DESCRIPTION + +This CGI program retrieves a message from a specified archive with +a give message-id. The CGI program's main purpose is to provide +a persistent URL to archived messages that is immune to archive +rebuilds. + +The CGI program will output the retrieved message to the web client. +The message will have a Cbase hrefE> tag added so relative +links in the message page will function properly. + +=head1 CGI OPTIONS + +=over + +=item C + +The name of the archive. Archive names are defined by C. + +=item C + +The message-id. + +=back + +=head1 VERSION + +C<$Id: mesg.cgi.in.dist,v 1.1 2002/09/03 16:30:47 ehood Exp $> + +=head1 AUTHOR + +Earl Hood, earl@earlhood.com + +This module is part of the mharc archiving system and comes with +ABSOLUTELY NO WARRANTY and may be copied only under the terms of +the GNU General Public License, which may be found in the MHArc +distribution. + +=cut + diff --git a/cgi-bin/mesg.cgi.in b/cgi-bin/mesg.cgi.in new file mode 100755 index 0000000..156ccb2 --- /dev/null +++ b/cgi-bin/mesg.cgi.in @@ -0,0 +1,201 @@ +#!/usr/bin/perl +##--------------------------------------------------------------------------## +## File: +## $Id: mesg.cgi.in.dist,v 1.1 2002/09/03 16:30:47 ehood Exp $ +## Author: +## Earl Hood earl@earlhood.com +## Description: +## POD at end-of-file. +##--------------------------------------------------------------------------## +## Copyright (C) 2002 Earl Hood +## +## This program is free software; you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 2 of the License, or +## (at your option) any later version. +## +## This program is distributed in the hope that it will be useful, +## but WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +## GNU General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with this program; if not, write to the Free Software +## Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA +## 02111-1307, USA +##--------------------------------------------------------------------------## + +package MHArc::mesg_cgi; + +use lib '@@SW_ROOT@@/lib'; + +use Fcntl; +use CGI::Carp; +use MHArc::CGI; +use MHArc::Namazu qw( + nmz_load_rc + nmz_get_field + nmz_msg_id_search +); + +############################################################################# +## BEGIN: Config Section +############################################################################# + +## Full pathname to where html archives are located. +my $html_archive_root = '@@HTML_DIR@@'; + +############################################################################# +## END: Config Section +############################################################################# + +$ENV{'PATH'} = '/usr/local/bin:/bin:/usr/bin'; + +## Query argument name to contain name of archive +my $argname_archive = 'a'; + +## Query argument name to contain message-id +my $argname_id = 'i'; + +## Namazu conf file (should be the same used by namazu.cgi) +my $namazurc = '.namazurc'; + +MAIN: { + my $form = MHArc::CGI::parse_input(); + my $archive = $form->{$argname_archive} || ""; + my $id = $form->{$argname_id} || ""; + my $host = $ENV{'HTTP_HOST'} || $ENV{'SERVER_NAME'} || + $ENV{'SERVER_ADDR'} || "localhost"; + my $port = $ENV{'SERVER_PORT'} || ""; + if ($port && $port ne '80') { + $port = ":$port"; + } else { + $port = ""; + } + my $server_url= "http://$host$port"; + + my $list_dir = undef; + if (($id !~ /.\@./) || + ($archive !~ /\S/) || + ($archive =~ /\.\./) || + (! -d ($list_dir = join('/', $html_archive_root,$archive)))) { + warn qq/Invalid arguments: a=$archive, i=$id\n/; + MHArc::CGI::print_input_error(); + last MAIN; + } + + my $nmzrc = nmz_load_rc($namazurc); + if (!defined($nmzrc)) { + MHArc::CGI::print_script_error(); + last MAIN; + } + + my $pathname = find_id($list_dir, $id); + if (!defined($pathname)) { + MHArc::CGI::print_not_found_error(); + last MAIN; + } + if (! -e $pathname) { + warn qq/"$pathname" does not exist\n/; + MHArc::CGI::print_not_found_error(); + last MAIN; + } + + # Apply replace string to pathname + my $url = $pathname; + foreach my $r (@{$nmzrc->{'replace'}}) { + my $pos = index($pathname, $r->[0]); + if ($pos == 0) { + $url = $r->[1] . substr($pathname, length($r->[0])); + last; + } + } + + # Print out message page + local(*MESG); + if (!open(MESG, $pathname)) { + warn qq/Unable top open "$pathname": $!\n/; + MHArc::CGI::print_script_error(); + last MAIN; + } + + MHArc::CGI::print_content_type('text/html'); + my $did_base = 0; + my $str; + foreach $str () { + print STDOUT $str; + next if $did_base; + if ($str =~ //i) { + print STDOUT '', "\n"; + $did_base = 1; + } + } + close(MESG); +} + +############################################################################# +## Generic subroutines for CGI use +############################################################################# + +sub find_id { + my $list_dir = shift; + my $id = shift; + + my $docid = nmz_msg_id_search($list_dir, $id); + if ($docid < 0) { + return undef; + } + return nmz_get_field($list_dir, $docid, 'uri'); +} + +######################################################################## +__END__ + +=head1 NAME + +mesg.cgi - mharc CGI program to retrieve a message by message-id + +=head1 SYNOPSIS + + http://.../cgi-bin/mesg.cgi?a=&i= + +=head1 DESCRIPTION + +This CGI program retrieves a message from a specified archive with +a give message-id. The CGI program's main purpose is to provide +a persistent URL to archived messages that is immune to archive +rebuilds. + +The CGI program will output the retrieved message to the web client. +The message will have a Cbase hrefE> tag added so relative +links in the message page will function properly. + +=head1 CGI OPTIONS + +=over + +=item C + +The name of the archive. Archive names are defined by C. + +=item C + +The message-id. + +=back + +=head1 VERSION + +C<$Id: mesg.cgi.in.dist,v 1.1 2002/09/03 16:30:47 ehood Exp $> + +=head1 AUTHOR + +Earl Hood, earl@earlhood.com + +This module is part of the mharc archiving system and comes with +ABSOLUTELY NO WARRANTY and may be copied only under the terms of +the GNU General Public License, which may be found in the MHArc +distribution. + +=cut + diff --git a/cgi-bin/mesg.cgi.in.dist b/cgi-bin/mesg.cgi.in.dist new file mode 100755 index 0000000..156ccb2 --- /dev/null +++ b/cgi-bin/mesg.cgi.in.dist @@ -0,0 +1,201 @@ +#!/usr/bin/perl +##--------------------------------------------------------------------------## +## File: +## $Id: mesg.cgi.in.dist,v 1.1 2002/09/03 16:30:47 ehood Exp $ +## Author: +## Earl Hood earl@earlhood.com +## Description: +## POD at end-of-file. +##--------------------------------------------------------------------------## +## Copyright (C) 2002 Earl Hood +## +## This program is free software; you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 2 of the License, or +## (at your option) any later version. +## +## This program is distributed in the hope that it will be useful, +## but WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +## GNU General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with this program; if not, write to the Free Software +## Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA +## 02111-1307, USA +##--------------------------------------------------------------------------## + +package MHArc::mesg_cgi; + +use lib '@@SW_ROOT@@/lib'; + +use Fcntl; +use CGI::Carp; +use MHArc::CGI; +use MHArc::Namazu qw( + nmz_load_rc + nmz_get_field + nmz_msg_id_search +); + +############################################################################# +## BEGIN: Config Section +############################################################################# + +## Full pathname to where html archives are located. +my $html_archive_root = '@@HTML_DIR@@'; + +############################################################################# +## END: Config Section +############################################################################# + +$ENV{'PATH'} = '/usr/local/bin:/bin:/usr/bin'; + +## Query argument name to contain name of archive +my $argname_archive = 'a'; + +## Query argument name to contain message-id +my $argname_id = 'i'; + +## Namazu conf file (should be the same used by namazu.cgi) +my $namazurc = '.namazurc'; + +MAIN: { + my $form = MHArc::CGI::parse_input(); + my $archive = $form->{$argname_archive} || ""; + my $id = $form->{$argname_id} || ""; + my $host = $ENV{'HTTP_HOST'} || $ENV{'SERVER_NAME'} || + $ENV{'SERVER_ADDR'} || "localhost"; + my $port = $ENV{'SERVER_PORT'} || ""; + if ($port && $port ne '80') { + $port = ":$port"; + } else { + $port = ""; + } + my $server_url= "http://$host$port"; + + my $list_dir = undef; + if (($id !~ /.\@./) || + ($archive !~ /\S/) || + ($archive =~ /\.\./) || + (! -d ($list_dir = join('/', $html_archive_root,$archive)))) { + warn qq/Invalid arguments: a=$archive, i=$id\n/; + MHArc::CGI::print_input_error(); + last MAIN; + } + + my $nmzrc = nmz_load_rc($namazurc); + if (!defined($nmzrc)) { + MHArc::CGI::print_script_error(); + last MAIN; + } + + my $pathname = find_id($list_dir, $id); + if (!defined($pathname)) { + MHArc::CGI::print_not_found_error(); + last MAIN; + } + if (! -e $pathname) { + warn qq/"$pathname" does not exist\n/; + MHArc::CGI::print_not_found_error(); + last MAIN; + } + + # Apply replace string to pathname + my $url = $pathname; + foreach my $r (@{$nmzrc->{'replace'}}) { + my $pos = index($pathname, $r->[0]); + if ($pos == 0) { + $url = $r->[1] . substr($pathname, length($r->[0])); + last; + } + } + + # Print out message page + local(*MESG); + if (!open(MESG, $pathname)) { + warn qq/Unable top open "$pathname": $!\n/; + MHArc::CGI::print_script_error(); + last MAIN; + } + + MHArc::CGI::print_content_type('text/html'); + my $did_base = 0; + my $str; + foreach $str () { + print STDOUT $str; + next if $did_base; + if ($str =~ //i) { + print STDOUT '', "\n"; + $did_base = 1; + } + } + close(MESG); +} + +############################################################################# +## Generic subroutines for CGI use +############################################################################# + +sub find_id { + my $list_dir = shift; + my $id = shift; + + my $docid = nmz_msg_id_search($list_dir, $id); + if ($docid < 0) { + return undef; + } + return nmz_get_field($list_dir, $docid, 'uri'); +} + +######################################################################## +__END__ + +=head1 NAME + +mesg.cgi - mharc CGI program to retrieve a message by message-id + +=head1 SYNOPSIS + + http://.../cgi-bin/mesg.cgi?a=&i= + +=head1 DESCRIPTION + +This CGI program retrieves a message from a specified archive with +a give message-id. The CGI program's main purpose is to provide +a persistent URL to archived messages that is immune to archive +rebuilds. + +The CGI program will output the retrieved message to the web client. +The message will have a Cbase hrefE> tag added so relative +links in the message page will function properly. + +=head1 CGI OPTIONS + +=over + +=item C + +The name of the archive. Archive names are defined by C. + +=item C + +The message-id. + +=back + +=head1 VERSION + +C<$Id: mesg.cgi.in.dist,v 1.1 2002/09/03 16:30:47 ehood Exp $> + +=head1 AUTHOR + +Earl Hood, earl@earlhood.com + +This module is part of the mharc archiving system and comes with +ABSOLUTELY NO WARRANTY and may be copied only under the terms of +the GNU General Public License, which may be found in the MHArc +distribution. + +=cut + diff --git a/cgi-bin/mknmzrc b/cgi-bin/mknmzrc new file mode 100644 index 0000000..cfe0038 --- /dev/null +++ b/cgi-bin/mknmzrc @@ -0,0 +1,203 @@ +# $Id: mknmzrc.in.dist,v 1.3 2002/03/06 22:47:29 ehood Exp $ +# This is a Namazu configuration file for mknmz. +# +package conf; # Don't remove this line! + +#=================================================================== +# +# Administrator's email address +# +$ADDRESS = 'sysadmin@gnu.org'; + + +#=================================================================== +# +# Regular Expression Patterns +# + +# +# This pattern specifies HTML suffixes. +# +$HTML_SUFFIX = "html|[ps]html"; + +# +# This pattern specifies file names which will be targeted. +# NOTE: It can be specified by --allow=regex option. +# Do NOT use `$' or `^' anchors. +# Case-insensitive. +# +$ALLOW_FILE = ".*\\.(?:$HTML_SUFFIX)|.*\\.txt" . # HTML, plain text + "|.*\\.gz|.*\\.Z|.*\\.bz2" . # Compressed files + "|\\d+|[-\\w]+\\.[1-9n]"; # Mail/News, man + +# +# This pattern specifies file names which will NOT be targeted. +# NOTE: It can be specified by --deny=regex option. +# Do NOT use `$' or `^' anchors. +# Case-insensitive. +# +$DENY_FILE = "(index|threads)\\.html|.*\\.(gif|png|jpg|jpeg)|.*\\.tar\\.gz|core|.*\\.bak|.*~|\\..*|\x23.*"; + +# +# This pattern specifies PATHNAMEs which will NOT be targeted. +# NOTE: Usually specified by --exclude=regex option. +# +# $EXCLUDE_PATH = undef; + +# +# This pattern specifies file names which can be omitted +# in URI. e.g., 'index.html|index.htm|Default.html' +# +# NOTE: This is similar to Apache's "DirectoryIndex" directive. +# +# $DIRECTORY_INDEX = ""; + +# +# This pattern specifies Mail/News's fields in its header which +# should be searchable. NOTE: case-insensitive +# +# $REMAIN_HEADER = "From|Date|Message-ID"; + +# +# This pattern specifies fields which used for field-specified +# searching. NOTE: case-insensitive +# +# $SEARCH_FIELD = "message-id|subject|from|date|uri|newsgroups|to|summary|size"; + +# +# This pattern specifies meta tags which used for field-specified +# searching. NOTE: case-insensitive +# +# $META_TAGS = "keywords|description"; + +# +# This pattern specifies aliases for NMZ.field.* files. +# NOTE: Editing NOT recommended. +# +# %FIELD_ALIASES = ('title' => 'subject', 'author' => 'from'); + +# +# This pattern specifies HTML elements which should be replaced with +# null string when removing them. Normally, the elements are replaced +# with a single space character. +# +# $NON_SEPARATION_ELEMENTS = 'A|TT|CODE|SAMP|KBD|VAR|B|STRONG|I|EM|CITE|FONT|U|'. +# 'STRIKE|BIG|SMALL|DFN|ABBR|ACRONYM|Q|SUB|SUP|SPAN|BDO'; + +#=================================================================== +# +# Critical Numbers +# + +# +# The max size of files which can be loaded in memory at once. +# If you have much memory, you can increase the value. +# If you have less memory, you can decrease the value. +# +$ON_MEMORY_MAX = 5000000; + +# +# The max file size for indexing. Files larger than this +# will be ignored. +# NOTE: This value is usually larger than TEXT_SIZE_MAX because +# binary-formated files such as PDF, Word are larger. +# +$FILE_SIZE_MAX = 500000; + +# +# The max text size for indexing. Files larger than this +# will be ignored. +# +$TEXT_SIZE_MAX = 100000; + +# +# The max length of a word. the word longer than this will be ignored. +# +$WORD_LENG_MAX = 40; + + +# +# Weights for HTML elements which are used for term weightning. +# +# %Weight = +# ( +# 'html' => { +# 'title' => 16, +# 'h1' => 8, +# 'h2' => 7, +# 'h3' => 6, +# 'h4' => 5, +# 'h5' => 4, +# 'h6' => 3, +# 'a' => 4, +# 'strong' => 2, +# 'em' => 2, +# 'kbd' => 2, +# 'samp' => 2, +# 'var' => 2, +# 'code' => 2, +# 'cite' => 2, +# 'abbr' => 2, +# 'acronym'=> 2, +# 'dfn' => 2, +# }, +# 'metakey' => 32, # for +# 'headers' => 8, # for Mail/News' headers +# ); + +# +# The max length of a HTML-tagged string which can be processed for +# term weighting. +# NOTE: There are not a few people has a bad manner using +# for changing a font size. +# +# $INVALID_LENG = 128; + +# +# The max length of a field. +# This MUST be smaller than libnamazu.h's BUFSIZE (usually 1024). +# +# $MAX_FIELD_LENGTH = 200; + + +#=================================================================== +# +# Softwares for handling a Japanese text +# + +# +# Network Kanji Filter nkf v1.62 or later +# +# $NKF = "no"; + +# +# KAKASI +# +# $KAKASI = "no -ieuc -oeuc -w"; + +# +# ChaSen 1.51 or later (simple wakatigaki) +# +# $CHASEN = "no -j -F '\%m '"; + +# +# ChaSen 1.51 or later (with noun words extraction) +# +# $CHASEN_NOUN = "no -j -F '\%m %H\\n'"; + +# +# Default Japanese processer: KAKASI or ChaSen. +# +# $WAKATI = $none; + + +#=================================================================== +# +# Directories +# +# $LIBDIR = "@PERLLIBDIR@"; +# $FILTERDIR = "@FILTERDIR@"; +# $TEMPLATEDIR = "@TEMPLATEDIR@"; + +# 1; + diff --git a/cgi-bin/mknmzrc.in b/cgi-bin/mknmzrc.in new file mode 100644 index 0000000..d732a13 --- /dev/null +++ b/cgi-bin/mknmzrc.in @@ -0,0 +1,203 @@ +# $Id: mknmzrc.in.dist,v 1.3 2002/03/06 22:47:29 ehood Exp $ +# This is a Namazu configuration file for mknmz. +# +package conf; # Don't remove this line! + +#=================================================================== +# +# Administrator's email address +# +$ADDRESS = '@@ADMIN_ADDRESS@@'; + + +#=================================================================== +# +# Regular Expression Patterns +# + +# +# This pattern specifies HTML suffixes. +# +$HTML_SUFFIX = "html|[ps]html"; + +# +# This pattern specifies file names which will be targeted. +# NOTE: It can be specified by --allow=regex option. +# Do NOT use `$' or `^' anchors. +# Case-insensitive. +# +$ALLOW_FILE = ".*\\.(?:$HTML_SUFFIX)|.*\\.txt" . # HTML, plain text + "|.*\\.gz|.*\\.Z|.*\\.bz2" . # Compressed files + "|\\d+|[-\\w]+\\.[1-9n]"; # Mail/News, man + +# +# This pattern specifies file names which will NOT be targeted. +# NOTE: It can be specified by --deny=regex option. +# Do NOT use `$' or `^' anchors. +# Case-insensitive. +# +$DENY_FILE = "(index|threads)\\.html|.*\\.(gif|png|jpg|jpeg)|.*\\.tar\\.gz|core|.*\\.bak|.*~|\\..*|\x23.*"; + +# +# This pattern specifies PATHNAMEs which will NOT be targeted. +# NOTE: Usually specified by --exclude=regex option. +# +# $EXCLUDE_PATH = undef; + +# +# This pattern specifies file names which can be omitted +# in URI. e.g., 'index.html|index.htm|Default.html' +# +# NOTE: This is similar to Apache's "DirectoryIndex" directive. +# +# $DIRECTORY_INDEX = ""; + +# +# This pattern specifies Mail/News's fields in its header which +# should be searchable. NOTE: case-insensitive +# +# $REMAIN_HEADER = "From|Date|Message-ID"; + +# +# This pattern specifies fields which used for field-specified +# searching. NOTE: case-insensitive +# +# $SEARCH_FIELD = "message-id|subject|from|date|uri|newsgroups|to|summary|size"; + +# +# This pattern specifies meta tags which used for field-specified +# searching. NOTE: case-insensitive +# +# $META_TAGS = "keywords|description"; + +# +# This pattern specifies aliases for NMZ.field.* files. +# NOTE: Editing NOT recommended. +# +# %FIELD_ALIASES = ('title' => 'subject', 'author' => 'from'); + +# +# This pattern specifies HTML elements which should be replaced with +# null string when removing them. Normally, the elements are replaced +# with a single space character. +# +# $NON_SEPARATION_ELEMENTS = 'A|TT|CODE|SAMP|KBD|VAR|B|STRONG|I|EM|CITE|FONT|U|'. +# 'STRIKE|BIG|SMALL|DFN|ABBR|ACRONYM|Q|SUB|SUP|SPAN|BDO'; + +#=================================================================== +# +# Critical Numbers +# + +# +# The max size of files which can be loaded in memory at once. +# If you have much memory, you can increase the value. +# If you have less memory, you can decrease the value. +# +$ON_MEMORY_MAX = 5000000; + +# +# The max file size for indexing. Files larger than this +# will be ignored. +# NOTE: This value is usually larger than TEXT_SIZE_MAX because +# binary-formated files such as PDF, Word are larger. +# +$FILE_SIZE_MAX = 500000; + +# +# The max text size for indexing. Files larger than this +# will be ignored. +# +$TEXT_SIZE_MAX = 100000; + +# +# The max length of a word. the word longer than this will be ignored. +# +$WORD_LENG_MAX = 40; + + +# +# Weights for HTML elements which are used for term weightning. +# +# %Weight = +# ( +# 'html' => { +# 'title' => 16, +# 'h1' => 8, +# 'h2' => 7, +# 'h3' => 6, +# 'h4' => 5, +# 'h5' => 4, +# 'h6' => 3, +# 'a' => 4, +# 'strong' => 2, +# 'em' => 2, +# 'kbd' => 2, +# 'samp' => 2, +# 'var' => 2, +# 'code' => 2, +# 'cite' => 2, +# 'abbr' => 2, +# 'acronym'=> 2, +# 'dfn' => 2, +# }, +# 'metakey' => 32, # for +# 'headers' => 8, # for Mail/News' headers +# ); + +# +# The max length of a HTML-tagged string which can be processed for +# term weighting. +# NOTE: There are not a few people has a bad manner using +# for changing a font size. +# +# $INVALID_LENG = 128; + +# +# The max length of a field. +# This MUST be smaller than libnamazu.h's BUFSIZE (usually 1024). +# +# $MAX_FIELD_LENGTH = 200; + + +#=================================================================== +# +# Softwares for handling a Japanese text +# + +# +# Network Kanji Filter nkf v1.62 or later +# +# $NKF = "no"; + +# +# KAKASI +# +# $KAKASI = "no -ieuc -oeuc -w"; + +# +# ChaSen 1.51 or later (simple wakatigaki) +# +# $CHASEN = "no -j -F '\%m '"; + +# +# ChaSen 1.51 or later (with noun words extraction) +# +# $CHASEN_NOUN = "no -j -F '\%m %H\\n'"; + +# +# Default Japanese processer: KAKASI or ChaSen. +# +# $WAKATI = $none; + + +#=================================================================== +# +# Directories +# +# $LIBDIR = "@PERLLIBDIR@"; +# $FILTERDIR = "@FILTERDIR@"; +# $TEMPLATEDIR = "@TEMPLATEDIR@"; + +# 1; + diff --git a/cgi-bin/mknmzrc.in.dist b/cgi-bin/mknmzrc.in.dist new file mode 100644 index 0000000..d732a13 --- /dev/null +++ b/cgi-bin/mknmzrc.in.dist @@ -0,0 +1,203 @@ +# $Id: mknmzrc.in.dist,v 1.3 2002/03/06 22:47:29 ehood Exp $ +# This is a Namazu configuration file for mknmz. +# +package conf; # Don't remove this line! + +#=================================================================== +# +# Administrator's email address +# +$ADDRESS = '@@ADMIN_ADDRESS@@'; + + +#=================================================================== +# +# Regular Expression Patterns +# + +# +# This pattern specifies HTML suffixes. +# +$HTML_SUFFIX = "html|[ps]html"; + +# +# This pattern specifies file names which will be targeted. +# NOTE: It can be specified by --allow=regex option. +# Do NOT use `$' or `^' anchors. +# Case-insensitive. +# +$ALLOW_FILE = ".*\\.(?:$HTML_SUFFIX)|.*\\.txt" . # HTML, plain text + "|.*\\.gz|.*\\.Z|.*\\.bz2" . # Compressed files + "|\\d+|[-\\w]+\\.[1-9n]"; # Mail/News, man + +# +# This pattern specifies file names which will NOT be targeted. +# NOTE: It can be specified by --deny=regex option. +# Do NOT use `$' or `^' anchors. +# Case-insensitive. +# +$DENY_FILE = "(index|threads)\\.html|.*\\.(gif|png|jpg|jpeg)|.*\\.tar\\.gz|core|.*\\.bak|.*~|\\..*|\x23.*"; + +# +# This pattern specifies PATHNAMEs which will NOT be targeted. +# NOTE: Usually specified by --exclude=regex option. +# +# $EXCLUDE_PATH = undef; + +# +# This pattern specifies file names which can be omitted +# in URI. e.g., 'index.html|index.htm|Default.html' +# +# NOTE: This is similar to Apache's "DirectoryIndex" directive. +# +# $DIRECTORY_INDEX = ""; + +# +# This pattern specifies Mail/News's fields in its header which +# should be searchable. NOTE: case-insensitive +# +# $REMAIN_HEADER = "From|Date|Message-ID"; + +# +# This pattern specifies fields which used for field-specified +# searching. NOTE: case-insensitive +# +# $SEARCH_FIELD = "message-id|subject|from|date|uri|newsgroups|to|summary|size"; + +# +# This pattern specifies meta tags which used for field-specified +# searching. NOTE: case-insensitive +# +# $META_TAGS = "keywords|description"; + +# +# This pattern specifies aliases for NMZ.field.* files. +# NOTE: Editing NOT recommended. +# +# %FIELD_ALIASES = ('title' => 'subject', 'author' => 'from'); + +# +# This pattern specifies HTML elements which should be replaced with +# null string when removing them. Normally, the elements are replaced +# with a single space character. +# +# $NON_SEPARATION_ELEMENTS = 'A|TT|CODE|SAMP|KBD|VAR|B|STRONG|I|EM|CITE|FONT|U|'. +# 'STRIKE|BIG|SMALL|DFN|ABBR|ACRONYM|Q|SUB|SUP|SPAN|BDO'; + +#=================================================================== +# +# Critical Numbers +# + +# +# The max size of files which can be loaded in memory at once. +# If you have much memory, you can increase the value. +# If you have less memory, you can decrease the value. +# +$ON_MEMORY_MAX = 5000000; + +# +# The max file size for indexing. Files larger than this +# will be ignored. +# NOTE: This value is usually larger than TEXT_SIZE_MAX because +# binary-formated files such as PDF, Word are larger. +# +$FILE_SIZE_MAX = 500000; + +# +# The max text size for indexing. Files larger than this +# will be ignored. +# +$TEXT_SIZE_MAX = 100000; + +# +# The max length of a word. the word longer than this will be ignored. +# +$WORD_LENG_MAX = 40; + + +# +# Weights for HTML elements which are used for term weightning. +# +# %Weight = +# ( +# 'html' => { +# 'title' => 16, +# 'h1' => 8, +# 'h2' => 7, +# 'h3' => 6, +# 'h4' => 5, +# 'h5' => 4, +# 'h6' => 3, +# 'a' => 4, +# 'strong' => 2, +# 'em' => 2, +# 'kbd' => 2, +# 'samp' => 2, +# 'var' => 2, +# 'code' => 2, +# 'cite' => 2, +# 'abbr' => 2, +# 'acronym'=> 2, +# 'dfn' => 2, +# }, +# 'metakey' => 32, # for +# 'headers' => 8, # for Mail/News' headers +# ); + +# +# The max length of a HTML-tagged string which can be processed for +# term weighting. +# NOTE: There are not a few people has a bad manner using +# for changing a font size. +# +# $INVALID_LENG = 128; + +# +# The max length of a field. +# This MUST be smaller than libnamazu.h's BUFSIZE (usually 1024). +# +# $MAX_FIELD_LENGTH = 200; + + +#=================================================================== +# +# Softwares for handling a Japanese text +# + +# +# Network Kanji Filter nkf v1.62 or later +# +# $NKF = "no"; + +# +# KAKASI +# +# $KAKASI = "no -ieuc -oeuc -w"; + +# +# ChaSen 1.51 or later (simple wakatigaki) +# +# $CHASEN = "no -j -F '\%m '"; + +# +# ChaSen 1.51 or later (with noun words extraction) +# +# $CHASEN_NOUN = "no -j -F '\%m %H\\n'"; + +# +# Default Japanese processer: KAKASI or ChaSen. +# +# $WAKATI = $none; + + +#=================================================================== +# +# Directories +# +# $LIBDIR = "@PERLLIBDIR@"; +# $FILTERDIR = "@FILTERDIR@"; +# $TEMPLATEDIR = "@TEMPLATEDIR@"; + +# 1; + diff --git a/cgi-bin/mnav.cgi b/cgi-bin/mnav.cgi new file mode 100755 index 0000000..f1bd726 --- /dev/null +++ b/cgi-bin/mnav.cgi @@ -0,0 +1,183 @@ +#!/usr/bin/perl +##--------------------------------------------------------------------------## +## File: +## $Id: mnav.cgi.in.dist,v 1.5 2002/10/17 03:11:31 ehood Exp $ +## Author: +## Earl Hood earl@earlhood.com +## Description: +## POD at end-of-file. +##--------------------------------------------------------------------------## +## Copyright (C) 2001-2002 Earl Hood +## +## This program is free software; you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 2 of the License, or +## (at your option) any later version. +## +## This program is distributed in the hope that it will be useful, +## but WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +## GNU General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with this program; if not, write to the Free Software +## Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA +## 02111-1307, USA +##--------------------------------------------------------------------------## + +package mnav_cgi; + +use lib '/home/mharc/lib'; + +use CGI::Carp; +use MHArc::CGI; + +############################################################################# +## BEGIN: Config Section +############################################################################# + +## Full pathname to where HTML archives are located. +my $html_archive_root = '/home/mharc/html'; + +## URL pathname to where HTML archives are located. +my $url_archive_root = '/archive/html'; + +############################################################################# +## END: Config Section +############################################################################# + +## Query argument name to contain name of archive +my $argname_archive = 'a'; + +## Query argumant name to contain nav direction ('next' or 'prev') +my $argname_direction = 'd'; + +## Query argument name to contain month +my $argname_month = 'm'; + +## Query argument name to contain type of index +my $argname_type = 't'; + +MAIN: { + my $form = MHArc::CGI::parse_input(); + my $archive = $form->{$argname_archive}; + my $direction = $form->{$argname_direction}; + my $month = $form->{$argname_month}; + my $type = $form->{$argname_type}; + my $host = $ENV{'HTTP_HOST'} || $ENV{'SERVER_NAME'} || + $ENV{'SERVER_ADDR'} || 'localhost'; + my $port = $ENV{'SERVER_PORT'} || ""; + my $http = ($ENV{'HTTPS'} eq 'on') ? 'https' : 'http'; + if ($port && $port ne '80') { + $port = ":$port"; + } else { + $port = ""; + } + + if (($archive =~ /\.\./) || ($archive =~ /[\\\/]/)) { + warn qq/Fishy looking archive setting: $archive\n/; + MHArc::CGI::print_input_error(); + last MAIN; + } + if ($month !~ /^\d+(?:-\d+)?/) { + warn qq/Invalid month: $month\n/; + MHArc::CGI::print_input_error(); + last MAIN; + } + + my $server_url= "$http://$host$port"; + my $dir = join('/', $html_archive_root, $archive); + my $url = $server_url . join('/', $url_archive_root, $archive); + + local(*DIR); + if (!opendir(DIR, $dir)) { + warn qq/Unable to open "$dir": $!\n/; + MHArc::CGI::print_location($url); + last MAIN; + } + + my @months = sort grep { /^\d+(?:-\d+)?/ } readdir(DIR); + close(DIR); + if (scalar(@months) <= 0) { + # No month directories, so jump to top index + MHArc::CGI::print_location($url); + last MAIN; + } + + # Search for current month in listing + my($i); + for ($i=0; $i <= $#months; ++$i) { + last if $month eq $months[$i]; + } + # Adjust offset according to direction + if ($direction =~ /prev/) { --$i; } else { ++$i; } + if (($i < 0) || ($i > $#months)) { + # Hit bounds, so jump user to top index + MHArc::CGI::print_location($url); + last MAIN; + } + + # Redirect user to new month + $url .= '/' . $months[$i] . '/' . + ($type eq 't' ? 'threads.html' : 'index.html'); + MHArc::CGI::print_location($url) +} + +######################################################################## +__END__ + +=head1 NAME + +mnav.cgi - mharc CGI program to navigate between period indexes + +=head1 SYNOPSIS + + http://.../cgi-bin/mnav?a=&m=&d=&t= + +=head1 DESCRIPTION + +This CGI program is used for the next/prev period navigation for +an archive. + +The CGI program will send a client redirect URL to the period index +determined by specified input. + +=head1 CGI OPTIONS + +=over + +=item C + +The name of the archive. Archive names are defined by C. + +=item C + +The direction. Possible values are "C" or "C". + +=item C + +The period in YYYY-MM or YYYY format. + +=item C + +The type of index to goto. For thread index, the value should be +set to "C". If not set, or set to something else, date indexes +are used. + +=back + +=head1 VERSION + +C<$Id: mnav.cgi.in.dist,v 1.5 2002/10/17 03:11:31 ehood Exp $> + +=head1 AUTHOR + +Earl Hood, earl@earlhood.com + +This module is part of the mharc archiving system and comes with +ABSOLUTELY NO WARRANTY and may be copied only under the terms of +the GNU General Public License, which may be found in the MHArc +distribution. + +=cut + diff --git a/cgi-bin/mnav.cgi.in b/cgi-bin/mnav.cgi.in new file mode 100755 index 0000000..f415bf1 --- /dev/null +++ b/cgi-bin/mnav.cgi.in @@ -0,0 +1,183 @@ +#!/usr/bin/perl +##--------------------------------------------------------------------------## +## File: +## $Id: mnav.cgi.in.dist,v 1.5 2002/10/17 03:11:31 ehood Exp $ +## Author: +## Earl Hood earl@earlhood.com +## Description: +## POD at end-of-file. +##--------------------------------------------------------------------------## +## Copyright (C) 2001-2002 Earl Hood +## +## This program is free software; you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 2 of the License, or +## (at your option) any later version. +## +## This program is distributed in the hope that it will be useful, +## but WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +## GNU General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with this program; if not, write to the Free Software +## Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA +## 02111-1307, USA +##--------------------------------------------------------------------------## + +package mnav_cgi; + +use lib '@@SW_ROOT@@/lib'; + +use CGI::Carp; +use MHArc::CGI; + +############################################################################# +## BEGIN: Config Section +############################################################################# + +## Full pathname to where HTML archives are located. +my $html_archive_root = '@@HTML_DIR@@'; + +## URL pathname to where HTML archives are located. +my $url_archive_root = '@@HTML_URL@@'; + +############################################################################# +## END: Config Section +############################################################################# + +## Query argument name to contain name of archive +my $argname_archive = 'a'; + +## Query argumant name to contain nav direction ('next' or 'prev') +my $argname_direction = 'd'; + +## Query argument name to contain month +my $argname_month = 'm'; + +## Query argument name to contain type of index +my $argname_type = 't'; + +MAIN: { + my $form = MHArc::CGI::parse_input(); + my $archive = $form->{$argname_archive}; + my $direction = $form->{$argname_direction}; + my $month = $form->{$argname_month}; + my $type = $form->{$argname_type}; + my $host = $ENV{'HTTP_HOST'} || $ENV{'SERVER_NAME'} || + $ENV{'SERVER_ADDR'} || 'localhost'; + my $port = $ENV{'SERVER_PORT'} || ""; + my $http = ($ENV{'HTTPS'} eq 'on') ? 'https' : 'http'; + if ($port && $port ne '80') { + $port = ":$port"; + } else { + $port = ""; + } + + if (($archive =~ /\.\./) || ($archive =~ /[\\\/]/)) { + warn qq/Fishy looking archive setting: $archive\n/; + MHArc::CGI::print_input_error(); + last MAIN; + } + if ($month !~ /^\d+(?:-\d+)?/) { + warn qq/Invalid month: $month\n/; + MHArc::CGI::print_input_error(); + last MAIN; + } + + my $server_url= "$http://$host$port"; + my $dir = join('/', $html_archive_root, $archive); + my $url = $server_url . join('/', $url_archive_root, $archive); + + local(*DIR); + if (!opendir(DIR, $dir)) { + warn qq/Unable to open "$dir": $!\n/; + MHArc::CGI::print_location($url); + last MAIN; + } + + my @months = sort grep { /^\d+(?:-\d+)?/ } readdir(DIR); + close(DIR); + if (scalar(@months) <= 0) { + # No month directories, so jump to top index + MHArc::CGI::print_location($url); + last MAIN; + } + + # Search for current month in listing + my($i); + for ($i=0; $i <= $#months; ++$i) { + last if $month eq $months[$i]; + } + # Adjust offset according to direction + if ($direction =~ /prev/) { --$i; } else { ++$i; } + if (($i < 0) || ($i > $#months)) { + # Hit bounds, so jump user to top index + MHArc::CGI::print_location($url); + last MAIN; + } + + # Redirect user to new month + $url .= '/' . $months[$i] . '/' . + ($type eq 't' ? 'threads.html' : 'index.html'); + MHArc::CGI::print_location($url) +} + +######################################################################## +__END__ + +=head1 NAME + +mnav.cgi - mharc CGI program to navigate between period indexes + +=head1 SYNOPSIS + + http://.../cgi-bin/mnav?a=&m=&d=&t= + +=head1 DESCRIPTION + +This CGI program is used for the next/prev period navigation for +an archive. + +The CGI program will send a client redirect URL to the period index +determined by specified input. + +=head1 CGI OPTIONS + +=over + +=item C + +The name of the archive. Archive names are defined by C. + +=item C + +The direction. Possible values are "C" or "C". + +=item C + +The period in YYYY-MM or YYYY format. + +=item C + +The type of index to goto. For thread index, the value should be +set to "C". If not set, or set to something else, date indexes +are used. + +=back + +=head1 VERSION + +C<$Id: mnav.cgi.in.dist,v 1.5 2002/10/17 03:11:31 ehood Exp $> + +=head1 AUTHOR + +Earl Hood, earl@earlhood.com + +This module is part of the mharc archiving system and comes with +ABSOLUTELY NO WARRANTY and may be copied only under the terms of +the GNU General Public License, which may be found in the MHArc +distribution. + +=cut + diff --git a/cgi-bin/mnav.cgi.in.dist b/cgi-bin/mnav.cgi.in.dist new file mode 100755 index 0000000..f415bf1 --- /dev/null +++ b/cgi-bin/mnav.cgi.in.dist @@ -0,0 +1,183 @@ +#!/usr/bin/perl +##--------------------------------------------------------------------------## +## File: +## $Id: mnav.cgi.in.dist,v 1.5 2002/10/17 03:11:31 ehood Exp $ +## Author: +## Earl Hood earl@earlhood.com +## Description: +## POD at end-of-file. +##--------------------------------------------------------------------------## +## Copyright (C) 2001-2002 Earl Hood +## +## This program is free software; you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 2 of the License, or +## (at your option) any later version. +## +## This program is distributed in the hope that it will be useful, +## but WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +## GNU General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with this program; if not, write to the Free Software +## Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA +## 02111-1307, USA +##--------------------------------------------------------------------------## + +package mnav_cgi; + +use lib '@@SW_ROOT@@/lib'; + +use CGI::Carp; +use MHArc::CGI; + +############################################################################# +## BEGIN: Config Section +############################################################################# + +## Full pathname to where HTML archives are located. +my $html_archive_root = '@@HTML_DIR@@'; + +## URL pathname to where HTML archives are located. +my $url_archive_root = '@@HTML_URL@@'; + +############################################################################# +## END: Config Section +############################################################################# + +## Query argument name to contain name of archive +my $argname_archive = 'a'; + +## Query argumant name to contain nav direction ('next' or 'prev') +my $argname_direction = 'd'; + +## Query argument name to contain month +my $argname_month = 'm'; + +## Query argument name to contain type of index +my $argname_type = 't'; + +MAIN: { + my $form = MHArc::CGI::parse_input(); + my $archive = $form->{$argname_archive}; + my $direction = $form->{$argname_direction}; + my $month = $form->{$argname_month}; + my $type = $form->{$argname_type}; + my $host = $ENV{'HTTP_HOST'} || $ENV{'SERVER_NAME'} || + $ENV{'SERVER_ADDR'} || 'localhost'; + my $port = $ENV{'SERVER_PORT'} || ""; + my $http = ($ENV{'HTTPS'} eq 'on') ? 'https' : 'http'; + if ($port && $port ne '80') { + $port = ":$port"; + } else { + $port = ""; + } + + if (($archive =~ /\.\./) || ($archive =~ /[\\\/]/)) { + warn qq/Fishy looking archive setting: $archive\n/; + MHArc::CGI::print_input_error(); + last MAIN; + } + if ($month !~ /^\d+(?:-\d+)?/) { + warn qq/Invalid month: $month\n/; + MHArc::CGI::print_input_error(); + last MAIN; + } + + my $server_url= "$http://$host$port"; + my $dir = join('/', $html_archive_root, $archive); + my $url = $server_url . join('/', $url_archive_root, $archive); + + local(*DIR); + if (!opendir(DIR, $dir)) { + warn qq/Unable to open "$dir": $!\n/; + MHArc::CGI::print_location($url); + last MAIN; + } + + my @months = sort grep { /^\d+(?:-\d+)?/ } readdir(DIR); + close(DIR); + if (scalar(@months) <= 0) { + # No month directories, so jump to top index + MHArc::CGI::print_location($url); + last MAIN; + } + + # Search for current month in listing + my($i); + for ($i=0; $i <= $#months; ++$i) { + last if $month eq $months[$i]; + } + # Adjust offset according to direction + if ($direction =~ /prev/) { --$i; } else { ++$i; } + if (($i < 0) || ($i > $#months)) { + # Hit bounds, so jump user to top index + MHArc::CGI::print_location($url); + last MAIN; + } + + # Redirect user to new month + $url .= '/' . $months[$i] . '/' . + ($type eq 't' ? 'threads.html' : 'index.html'); + MHArc::CGI::print_location($url) +} + +######################################################################## +__END__ + +=head1 NAME + +mnav.cgi - mharc CGI program to navigate between period indexes + +=head1 SYNOPSIS + + http://.../cgi-bin/mnav?a=&m=&d=&t= + +=head1 DESCRIPTION + +This CGI program is used for the next/prev period navigation for +an archive. + +The CGI program will send a client redirect URL to the period index +determined by specified input. + +=head1 CGI OPTIONS + +=over + +=item C + +The name of the archive. Archive names are defined by C. + +=item C + +The direction. Possible values are "C" or "C". + +=item C + +The period in YYYY-MM or YYYY format. + +=item C + +The type of index to goto. For thread index, the value should be +set to "C". If not set, or set to something else, date indexes +are used. + +=back + +=head1 VERSION + +C<$Id: mnav.cgi.in.dist,v 1.5 2002/10/17 03:11:31 ehood Exp $> + +=head1 AUTHOR + +Earl Hood, earl@earlhood.com + +This module is part of the mharc archiving system and comes with +ABSOLUTELY NO WARRANTY and may be copied only under the terms of +the GNU General Public License, which may be found in the MHArc +distribution. + +=cut + diff --git a/cgi-bin/namazu.cgi b/cgi-bin/namazu.cgi new file mode 120000 index 0000000..715722f --- /dev/null +++ b/cgi-bin/namazu.cgi @@ -0,0 +1 @@ +/usr/lib/cgi-bin/namazu.cgi \ No newline at end of file diff --git a/cgi-bin/template/NMZ.body b/cgi-bin/template/NMZ.body new file mode 100644 index 0000000..f7706f1 --- /dev/null +++ b/cgi-bin/template/NMZ.body @@ -0,0 +1,184 @@ +

Query

+ +

Single term query

+

+The query specifies only one term for retrieving all +documents which contain the term. e.g., +

+ +

+namazu +

+ +

AND query

+ +

+The query specifies two or more terms for retrieving all +documents which contain both terms. You can insert the +and operator between the terms. e.g., +

+ +

+Linux and Netscape +

+ +

+You can ommit the and operator. Terms which is +separated by one ore more spaces is assumed to be AND query. +

+ +

OR query

+

+The query specifies two or more terms for retrieving all +documents which contain either term. You can insert the +or operator between the terms. +e.g., +

+ +

+Linux or FreeBSD +

+ +

NOT query

+

+The query specifies two or more terms for retrieving all +documents which contain a first term but does't contain the +following terms. You can insert the not +operator between the terms to do NOT query. e.g., +

+ +

+Linux not UNIX +

+ + +

Grouping

+

+You can group queries by surrounding them by +parentheses. The parentheses should be separated by one or +more spaces. e.g., +

+ +

+( Linux or FreeBSD ) and Netscape not Windows +

+ +

Phrase searching

+

+You can search for a phrase which consists of two or more terms +by surrounding them with double quotes like +"..." or with braces like {...}. +In Namazu, precision of phrase searching is not 100 %, +so it causes wrong results occasionally. e.g., +

+ +

+{GNU Emacs} +

+ + + +

Substring matching

+

+The are three types of substring matching searching. +

+ +
+
Prefix matching +
inter* (terms which begin with inter) +
Inside matching +
*text* (terms which contain text) +
Suffix matching +
*net (terms which terminated +with net) +
+ + +

Regular expressions

+ +

+You can use regular expressions for pattern matching. The +regular expressions must be surrounded by slashes like /.../. Namazu uses Ruby's regular +regular expressions engine. It offers generally Perl compatible flavor. +e.g., +

+ +

+/pro(gram|blem)s?/ +

+ + +

Field-specified searching

+

+You can limit your search to specific fields such as +Subject:, From:, +Message-Id:. It's especially convenient for +Mail/News documents. e.g., +

+ +
    +
  • +subject:Linux
    +(Retrieving all documents which contain Linux +in a Subject: field) + +
  • +subject:"GNU Emacs"
    +(Retrieving all documents which contain GNU Emacs +in a Subject: field) + +
  • +from:foo@bar.jp
    +(Retrieving all documents which contain foo@bar.jp +in a From: field) + + +
  • +message-id:<199801240555.OAA18737@foo.bar.jp>
    +(Retrieving a certain document which contains specified +Message-Id:) +
+ +

Notes

+ +
    +
  • In any queries, Namazu ignores case distinctions of +alphabet characters. In other words, Namazu does +case-insensitive pattern matching in any time. + + +
  • Japanese phrases are forced to be segmented into +morphemes automatically and are handled them as phrase searching. This processing +causes invalid segmentation occasionally. + + +
  • Alphabet, numbers or a part of symbols (duplicated in +ASCII) characters which defined in JIS X 0208 (Japanese +Industrial Standards) are handled as ASCII characters. + +
  • Namazu can handle a term which contains symbols like +TCP/IP. Since this handling isn't complete, +you can describe TCP and IP instead of +TCP/IP, but it may cause noisy results. + + +
  • Substring matching and field-specified searching takes +more time than other methods. + +
  • If you want to use and, +or or not simply as terms, you can +surround them respectively with double quotes like "..." or braces like {...}. + + + +
+ diff --git a/cgi-bin/template/NMZ.body.in b/cgi-bin/template/NMZ.body.in new file mode 100644 index 0000000..f7706f1 --- /dev/null +++ b/cgi-bin/template/NMZ.body.in @@ -0,0 +1,184 @@ +

Query

+ +

Single term query

+

+The query specifies only one term for retrieving all +documents which contain the term. e.g., +

+ +

+namazu +

+ +

AND query

+ +

+The query specifies two or more terms for retrieving all +documents which contain both terms. You can insert the +and operator between the terms. e.g., +

+ +

+Linux and Netscape +

+ +

+You can ommit the and operator. Terms which is +separated by one ore more spaces is assumed to be AND query. +

+ +

OR query

+

+The query specifies two or more terms for retrieving all +documents which contain either term. You can insert the +or operator between the terms. +e.g., +

+ +

+Linux or FreeBSD +

+ +

NOT query

+

+The query specifies two or more terms for retrieving all +documents which contain a first term but does't contain the +following terms. You can insert the not +operator between the terms to do NOT query. e.g., +

+ +

+Linux not UNIX +

+ + +

Grouping

+

+You can group queries by surrounding them by +parentheses. The parentheses should be separated by one or +more spaces. e.g., +

+ +

+( Linux or FreeBSD ) and Netscape not Windows +

+ +

Phrase searching

+

+You can search for a phrase which consists of two or more terms +by surrounding them with double quotes like +"..." or with braces like {...}. +In Namazu, precision of phrase searching is not 100 %, +so it causes wrong results occasionally. e.g., +

+ +

+{GNU Emacs} +

+ + + +

Substring matching

+

+The are three types of substring matching searching. +

+ +
+
Prefix matching +
inter* (terms which begin with inter) +
Inside matching +
*text* (terms which contain text) +
Suffix matching +
*net (terms which terminated +with net) +
+ + +

Regular expressions

+ +

+You can use regular expressions for pattern matching. The +regular expressions must be surrounded by slashes like /.../. Namazu uses Ruby's regular +regular expressions engine. It offers generally Perl compatible flavor. +e.g., +

+ +

+/pro(gram|blem)s?/ +

+ + +

Field-specified searching

+

+You can limit your search to specific fields such as +Subject:, From:, +Message-Id:. It's especially convenient for +Mail/News documents. e.g., +

+ +
    +
  • +subject:Linux
    +(Retrieving all documents which contain Linux +in a Subject: field) + +
  • +subject:"GNU Emacs"
    +(Retrieving all documents which contain GNU Emacs +in a Subject: field) + +
  • +from:foo@bar.jp
    +(Retrieving all documents which contain foo@bar.jp +in a From: field) + + +
  • +message-id:<199801240555.OAA18737@foo.bar.jp>
    +(Retrieving a certain document which contains specified +Message-Id:) +
+ +

Notes

+ +
    +
  • In any queries, Namazu ignores case distinctions of +alphabet characters. In other words, Namazu does +case-insensitive pattern matching in any time. + + +
  • Japanese phrases are forced to be segmented into +morphemes automatically and are handled them as phrase searching. This processing +causes invalid segmentation occasionally. + + +
  • Alphabet, numbers or a part of symbols (duplicated in +ASCII) characters which defined in JIS X 0208 (Japanese +Industrial Standards) are handled as ASCII characters. + +
  • Namazu can handle a term which contains symbols like +TCP/IP. Since this handling isn't complete, +you can describe TCP and IP instead of +TCP/IP, but it may cause noisy results. + + +
  • Substring matching and field-specified searching takes +more time than other methods. + +
  • If you want to use and, +or or not simply as terms, you can +surround them respectively with double quotes like "..." or braces like {...}. + + + +
+ diff --git a/cgi-bin/template/NMZ.body.in.dist b/cgi-bin/template/NMZ.body.in.dist new file mode 100644 index 0000000..f7706f1 --- /dev/null +++ b/cgi-bin/template/NMZ.body.in.dist @@ -0,0 +1,184 @@ +

Query

+ +

Single term query

+

+The query specifies only one term for retrieving all +documents which contain the term. e.g., +

+ +

+namazu +

+ +

AND query

+ +

+The query specifies two or more terms for retrieving all +documents which contain both terms. You can insert the +and operator between the terms. e.g., +

+ +

+Linux and Netscape +

+ +

+You can ommit the and operator. Terms which is +separated by one ore more spaces is assumed to be AND query. +

+ +

OR query

+

+The query specifies two or more terms for retrieving all +documents which contain either term. You can insert the +or operator between the terms. +e.g., +

+ +

+Linux or FreeBSD +

+ +

NOT query

+

+The query specifies two or more terms for retrieving all +documents which contain a first term but does't contain the +following terms. You can insert the not +operator between the terms to do NOT query. e.g., +

+ +

+Linux not UNIX +

+ + +

Grouping

+

+You can group queries by surrounding them by +parentheses. The parentheses should be separated by one or +more spaces. e.g., +

+ +

+( Linux or FreeBSD ) and Netscape not Windows +

+ +

Phrase searching

+

+You can search for a phrase which consists of two or more terms +by surrounding them with double quotes like +"..." or with braces like {...}. +In Namazu, precision of phrase searching is not 100 %, +so it causes wrong results occasionally. e.g., +

+ +

+{GNU Emacs} +

+ + + +

Substring matching

+

+The are three types of substring matching searching. +

+ +
+
Prefix matching +
inter* (terms which begin with inter) +
Inside matching +
*text* (terms which contain text) +
Suffix matching +
*net (terms which terminated +with net) +
+ + +

Regular expressions

+ +

+You can use regular expressions for pattern matching. The +regular expressions must be surrounded by slashes like /.../. Namazu uses Ruby's regular +regular expressions engine. It offers generally Perl compatible flavor. +e.g., +

+ +

+/pro(gram|blem)s?/ +

+ + +

Field-specified searching

+

+You can limit your search to specific fields such as +Subject:, From:, +Message-Id:. It's especially convenient for +Mail/News documents. e.g., +

+ +
    +
  • +subject:Linux
    +(Retrieving all documents which contain Linux +in a Subject: field) + +
  • +subject:"GNU Emacs"
    +(Retrieving all documents which contain GNU Emacs +in a Subject: field) + +
  • +from:foo@bar.jp
    +(Retrieving all documents which contain foo@bar.jp +in a From: field) + + +
  • +message-id:<199801240555.OAA18737@foo.bar.jp>
    +(Retrieving a certain document which contains specified +Message-Id:) +
+ +

Notes

+ +
    +
  • In any queries, Namazu ignores case distinctions of +alphabet characters. In other words, Namazu does +case-insensitive pattern matching in any time. + + +
  • Japanese phrases are forced to be segmented into +morphemes automatically and are handled them as phrase searching. This processing +causes invalid segmentation occasionally. + + +
  • Alphabet, numbers or a part of symbols (duplicated in +ASCII) characters which defined in JIS X 0208 (Japanese +Industrial Standards) are handled as ASCII characters. + +
  • Namazu can handle a term which contains symbols like +TCP/IP. Since this handling isn't complete, +you can describe TCP and IP instead of +TCP/IP, but it may cause noisy results. + + +
  • Substring matching and field-specified searching takes +more time than other methods. + +
  • If you want to use and, +or or not simply as terms, you can +surround them respectively with double quotes like "..." or braces like {...}. + + + +
+ diff --git a/cgi-bin/template/NMZ.foot b/cgi-bin/template/NMZ.foot new file mode 100644 index 0000000..6e5abdb --- /dev/null +++ b/cgi-bin/template/NMZ.foot @@ -0,0 +1,7 @@ +
+
+This search system is powered by +Namazu +
+ + diff --git a/cgi-bin/template/NMZ.foot.in b/cgi-bin/template/NMZ.foot.in new file mode 100644 index 0000000..6e5abdb --- /dev/null +++ b/cgi-bin/template/NMZ.foot.in @@ -0,0 +1,7 @@ +
+
+This search system is powered by +Namazu +
+ + diff --git a/cgi-bin/template/NMZ.foot.in.dist b/cgi-bin/template/NMZ.foot.in.dist new file mode 100644 index 0000000..6e5abdb --- /dev/null +++ b/cgi-bin/template/NMZ.foot.in.dist @@ -0,0 +1,7 @@ +
+
+This search system is powered by +Namazu +
+ + diff --git a/cgi-bin/template/NMZ.head b/cgi-bin/template/NMZ.head new file mode 100644 index 0000000..e9f2743 --- /dev/null +++ b/cgi-bin/template/NMZ.head @@ -0,0 +1,81 @@ + + +Mail Archive Search + + + + + +
+

Search String: + + + +Display: + +Description: + +Sort: + +

+
diff --git a/cgi-bin/template/NMZ.head.in b/cgi-bin/template/NMZ.head.in new file mode 100644 index 0000000..6d7f091 --- /dev/null +++ b/cgi-bin/template/NMZ.head.in @@ -0,0 +1,81 @@ + + +Mail Archive Search + + + + + +
+

Search String: + + + +Display: + +Description: + +Sort: + +

+
diff --git a/cgi-bin/template/NMZ.head.in.dist b/cgi-bin/template/NMZ.head.in.dist new file mode 100644 index 0000000..6d7f091 --- /dev/null +++ b/cgi-bin/template/NMZ.head.in.dist @@ -0,0 +1,81 @@ + + +Mail Archive Search + + + + + +
+

Search String: + + + +Display: + +Description: + +Sort: + +

+
diff --git a/cgi-bin/template/NMZ.result.normal b/cgi-bin/template/NMZ.result.normal new file mode 100644 index 0000000..cd8d609 --- /dev/null +++ b/cgi-bin/template/NMZ.result.normal @@ -0,0 +1,5 @@ +
${namazu::counter}. ${title} (score: ${namazu::score}) +
Author: ${author} +
Date: ${date} +
${summary} +
${uri} (${size} bytes)

diff --git a/cgi-bin/template/NMZ.result.normal.in b/cgi-bin/template/NMZ.result.normal.in new file mode 100644 index 0000000..cd8d609 --- /dev/null +++ b/cgi-bin/template/NMZ.result.normal.in @@ -0,0 +1,5 @@ +
${namazu::counter}. ${title} (score: ${namazu::score}) +
Author: ${author} +
Date: ${date} +
${summary} +
${uri} (${size} bytes)

diff --git a/cgi-bin/template/NMZ.result.normal.in.dist b/cgi-bin/template/NMZ.result.normal.in.dist new file mode 100644 index 0000000..cd8d609 --- /dev/null +++ b/cgi-bin/template/NMZ.result.normal.in.dist @@ -0,0 +1,5 @@ +
${namazu::counter}. ${title} (score: ${namazu::score}) +
Author: ${author} +
Date: ${date} +
${summary} +
${uri} (${size} bytes)

diff --git a/cgi-bin/template/NMZ.result.short b/cgi-bin/template/NMZ.result.short new file mode 100644 index 0000000..7f087a9 --- /dev/null +++ b/cgi-bin/template/NMZ.result.short @@ -0,0 +1,2 @@ +
${namazu::counter}. ${title} (score: ${namazu::score}) +
${uri} (${size} bytes)

diff --git a/cgi-bin/template/NMZ.result.short.in b/cgi-bin/template/NMZ.result.short.in new file mode 100644 index 0000000..7f087a9 --- /dev/null +++ b/cgi-bin/template/NMZ.result.short.in @@ -0,0 +1,2 @@ +
${namazu::counter}. ${title} (score: ${namazu::score}) +
${uri} (${size} bytes)

diff --git a/cgi-bin/template/NMZ.result.short.in.dist b/cgi-bin/template/NMZ.result.short.in.dist new file mode 100644 index 0000000..7f087a9 --- /dev/null +++ b/cgi-bin/template/NMZ.result.short.in.dist @@ -0,0 +1,2 @@ +
${namazu::counter}. ${title} (score: ${namazu::score}) +
${uri} (${size} bytes)

diff --git a/cgi-bin/template/NMZ.tips b/cgi-bin/template/NMZ.tips new file mode 100644 index 0000000..b19d4d6 --- /dev/null +++ b/cgi-bin/template/NMZ.tips @@ -0,0 +1,60 @@ +

Tips on searching

+ +

+If you have trouble with searching, you can check the following tips. +

+ +
    +
  • Check a spelling of your keyword
    +Namazu can't find anything with wrong spelling. + +
  • Add keywords
    + +If you gained no results or too few results, you can add one +or more related keywords with or operator. It makes your search +more hittable. e.g.,
    +tex or ptex or latex or latex2e
    + +If you gaind too many results, you can add one or more +related keywords with and +operator. It makes your search more limited. e.g.,
    +latex and dvi2ps and eps + +
  • Try substring matching
    + +If you gained no results or too few results, you can try +substring matching. + +You can specify tex* to +search for terms which begin with +tex (e.g., tex, +texi2html, +texindex, text). +
    + +You can specify *tex to +search for terms which terminated with tex (e.g., +bibtex, +jlatex, latex, +platex, ptex, vertex). +
    + +You can specify *tex* to +search for terms which contain tex (many). +
    + +
  • You tried phrase searching but it hit documents which +didn't contain your phrase.
    + +It's a defect of Namazu. Precision of phrase searching is +not 100 %, so it cause wrong results occasionally. + +
  • You want to use and, +or or not as ordinary keywords
    +You can surround them respectively with double quotes like "..." or braces like {...}. + +
diff --git a/cgi-bin/template/NMZ.tips.in b/cgi-bin/template/NMZ.tips.in new file mode 100644 index 0000000..b19d4d6 --- /dev/null +++ b/cgi-bin/template/NMZ.tips.in @@ -0,0 +1,60 @@ +

Tips on searching

+ +

+If you have trouble with searching, you can check the following tips. +

+ +
    +
  • Check a spelling of your keyword
    +Namazu can't find anything with wrong spelling. + +
  • Add keywords
    + +If you gained no results or too few results, you can add one +or more related keywords with or operator. It makes your search +more hittable. e.g.,
    +tex or ptex or latex or latex2e
    + +If you gaind too many results, you can add one or more +related keywords with and +operator. It makes your search more limited. e.g.,
    +latex and dvi2ps and eps + +
  • Try substring matching
    + +If you gained no results or too few results, you can try +substring matching. + +You can specify tex* to +search for terms which begin with +tex (e.g., tex, +texi2html, +texindex, text). +
    + +You can specify *tex to +search for terms which terminated with tex (e.g., +bibtex, +jlatex, latex, +platex, ptex, vertex). +
    + +You can specify *tex* to +search for terms which contain tex (many). +
    + +
  • You tried phrase searching but it hit documents which +didn't contain your phrase.
    + +It's a defect of Namazu. Precision of phrase searching is +not 100 %, so it cause wrong results occasionally. + +
  • You want to use and, +or or not as ordinary keywords
    +You can surround them respectively with double quotes like "..." or braces like {...}. + +
diff --git a/cgi-bin/template/NMZ.tips.in.dist b/cgi-bin/template/NMZ.tips.in.dist new file mode 100644 index 0000000..b19d4d6 --- /dev/null +++ b/cgi-bin/template/NMZ.tips.in.dist @@ -0,0 +1,60 @@ +

Tips on searching

+ +

+If you have trouble with searching, you can check the following tips. +

+ +
    +
  • Check a spelling of your keyword
    +Namazu can't find anything with wrong spelling. + +
  • Add keywords
    + +If you gained no results or too few results, you can add one +or more related keywords with or operator. It makes your search +more hittable. e.g.,
    +tex or ptex or latex or latex2e
    + +If you gaind too many results, you can add one or more +related keywords with and +operator. It makes your search more limited. e.g.,
    +latex and dvi2ps and eps + +
  • Try substring matching
    + +If you gained no results or too few results, you can try +substring matching. + +You can specify tex* to +search for terms which begin with +tex (e.g., tex, +texi2html, +texindex, text). +
    + +You can specify *tex to +search for terms which terminated with tex (e.g., +bibtex, +jlatex, latex, +platex, ptex, vertex). +
    + +You can specify *tex* to +search for terms which contain tex (many). +
    + +
  • You tried phrase searching but it hit documents which +didn't contain your phrase.
    + +It's a defect of Namazu. Precision of phrase searching is +not 100 %, so it cause wrong results occasionally. + +
  • You want to use and, +or or not as ordinary keywords
    +You can surround them respectively with double quotes like "..." or braces like {...}. + +
diff --git a/etc/.htaccess b/etc/.htaccess new file mode 100644 index 0000000..2f1124b --- /dev/null +++ b/etc/.htaccess @@ -0,0 +1,38 @@ +############################################################################ +## $Id: .htaccess.in.dist,v 1.3 2002/09/15 03:43:29 ehood Exp $ +## Description: +## Sample Apache local configuration file to deny access to special +## files. If local configuration files are not enabled, then these +## settings would have to be present in httpd.conf. +## +## To use this file, copy the generated .htaccess file to the +## root of the installation when +## 'make configure' is done, or create a symlink to it by +## executing the following command from the installation root: +## +## ln -s ./etc/.htacess +## +## This way, you do not have to re-copy each time you make +## changes to this file. +## +############################################################################ + +# Deny access to files that we probably do not want the public to +# see. The main one is .mhonarc.db files, especially if we have +# mail address obfsucation in the HTML archives. + + Order allow,deny + Deny from all + + +# Deny access to log files + + Order allow,deny + Deny from all + + +# Deny access to temporary incoming mail file used by filter-spool + + Order allow,deny + Deny from all + diff --git a/etc/.htaccess.in b/etc/.htaccess.in new file mode 100644 index 0000000..2f1124b --- /dev/null +++ b/etc/.htaccess.in @@ -0,0 +1,38 @@ +############################################################################ +## $Id: .htaccess.in.dist,v 1.3 2002/09/15 03:43:29 ehood Exp $ +## Description: +## Sample Apache local configuration file to deny access to special +## files. If local configuration files are not enabled, then these +## settings would have to be present in httpd.conf. +## +## To use this file, copy the generated .htaccess file to the +## root of the installation when +## 'make configure' is done, or create a symlink to it by +## executing the following command from the installation root: +## +## ln -s ./etc/.htacess +## +## This way, you do not have to re-copy each time you make +## changes to this file. +## +############################################################################ + +# Deny access to files that we probably do not want the public to +# see. The main one is .mhonarc.db files, especially if we have +# mail address obfsucation in the HTML archives. + + Order allow,deny + Deny from all + + +# Deny access to log files + + Order allow,deny + Deny from all + + +# Deny access to temporary incoming mail file used by filter-spool + + Order allow,deny + Deny from all + diff --git a/etc/.htaccess.in.dist b/etc/.htaccess.in.dist new file mode 100644 index 0000000..2f1124b --- /dev/null +++ b/etc/.htaccess.in.dist @@ -0,0 +1,38 @@ +############################################################################ +## $Id: .htaccess.in.dist,v 1.3 2002/09/15 03:43:29 ehood Exp $ +## Description: +## Sample Apache local configuration file to deny access to special +## files. If local configuration files are not enabled, then these +## settings would have to be present in httpd.conf. +## +## To use this file, copy the generated .htaccess file to the +## root of the installation when +## 'make configure' is done, or create a symlink to it by +## executing the following command from the installation root: +## +## ln -s ./etc/.htacess +## +## This way, you do not have to re-copy each time you make +## changes to this file. +## +############################################################################ + +# Deny access to files that we probably do not want the public to +# see. The main one is .mhonarc.db files, especially if we have +# mail address obfsucation in the HTML archives. + + Order allow,deny + Deny from all + + +# Deny access to log files + + Order allow,deny + Deny from all + + +# Deny access to temporary incoming mail file used by filter-spool + + Order allow,deny + Deny from all + diff --git a/etc/apache.conf b/etc/apache.conf new file mode 100644 index 0000000..440d613 --- /dev/null +++ b/etc/apache.conf @@ -0,0 +1,77 @@ +############################################################################ +## $Id: apache.conf.in.dist,v 1.8 2003/08/09 18:01:39 ehood Exp $ +## Description: +## The following are template/example Apache configuration +## directives that apply to the mail archives. +## +## IMPORTANT: MAKE SURE TO REVIEW THE DIRECTIVES HERE IF +## USE THIS FILE AGAINST ANY SECURITY CONCERNS +## YOU MAY HAVE. +############################################################################ + + + # Uncomment and change the following if ROOT_URL in config.sh + # is not already directly accessible from a web client. + #Alias /archive "/home/mharc" + + # Enable mharc CGI programs. Alternate enabling method is + # defined below. + ScriptAlias /archive/cgi-bin/ "/home/mharc/cgi-bin/" + + + + AllowOverride All + + Options MultiViews Indexes FollowSymLinks + IndexOptions FancyIndexing NameWidth=* SuppressSize SuppressLastModified + + # Make sure namazu index files are not visible in directory listings + IndexIgnore NMZ.* + + # Deny access to files that we probably do not want the public to + # see. The main one is .mhonarc.db files, especially if we have + # mail address obfsucation in the HTML archives. + + Order allow,deny + Deny from all + + + # Deny access to log files + + Order allow,deny + Deny from all + + + # Deny access to temporary incoming mail file used by filter-spool + + Order allow,deny + Deny from all + + + # Limit the types of HTTP methods allowed + + Order allow,deny + Allow from all + + + Order deny,allow + Deny from all + + + +# Enable CGI execution for cgi-bin directory. This is probably not +# necessary if the above ScriptAlias directive is used. + + Options +ExecCGI + + AddHandler cgi-script .cgi + + + +# Browsing access to raw mail archives. + + AllowOverride All + Options Indexes FollowSymLinks + IndexOptions FancyIndexing SuppressDescription + + diff --git a/etc/apache.conf.in b/etc/apache.conf.in new file mode 100644 index 0000000..ecd40f7 --- /dev/null +++ b/etc/apache.conf.in @@ -0,0 +1,77 @@ +############################################################################ +## $Id: apache.conf.in.dist,v 1.8 2003/08/09 18:01:39 ehood Exp $ +## Description: +## The following are template/example Apache configuration +## directives that apply to the mail archives. +## +## IMPORTANT: MAKE SURE TO REVIEW THE DIRECTIVES HERE IF +## USE THIS FILE AGAINST ANY SECURITY CONCERNS +## YOU MAY HAVE. +############################################################################ + + + # Uncomment and change the following if ROOT_URL in config.sh + # is not already directly accessible from a web client. + #Alias @@ROOT_URL@@ "@@SW_ROOT@@" + + # Enable mharc CGI programs. Alternate enabling method is + # defined below. + ScriptAlias @@CGI_URL@@/ "@@CGI_DIR@@/" + + + + AllowOverride All + + Options MultiViews Indexes FollowSymLinks + IndexOptions FancyIndexing NameWidth=* SuppressSize SuppressLastModified + + # Make sure namazu index files are not visible in directory listings + IndexIgnore NMZ.* + + # Deny access to files that we probably do not want the public to + # see. The main one is .mhonarc.db files, especially if we have + # mail address obfsucation in the HTML archives. + + Order allow,deny + Deny from all + + + # Deny access to log files + + Order allow,deny + Deny from all + + + # Deny access to temporary incoming mail file used by filter-spool + + Order allow,deny + Deny from all + + + # Limit the types of HTTP methods allowed + + Order allow,deny + Allow from all + + + Order deny,allow + Deny from all + + + +# Enable CGI execution for cgi-bin directory. This is probably not +# necessary if the above ScriptAlias directive is used. + + Options +ExecCGI + + AddHandler cgi-script .cgi + + + +# Browsing access to raw mail archives. + + AllowOverride All + Options Indexes FollowSymLinks + IndexOptions FancyIndexing SuppressDescription + + diff --git a/etc/apache.conf.in.dist b/etc/apache.conf.in.dist new file mode 100644 index 0000000..ecd40f7 --- /dev/null +++ b/etc/apache.conf.in.dist @@ -0,0 +1,77 @@ +############################################################################ +## $Id: apache.conf.in.dist,v 1.8 2003/08/09 18:01:39 ehood Exp $ +## Description: +## The following are template/example Apache configuration +## directives that apply to the mail archives. +## +## IMPORTANT: MAKE SURE TO REVIEW THE DIRECTIVES HERE IF +## USE THIS FILE AGAINST ANY SECURITY CONCERNS +## YOU MAY HAVE. +############################################################################ + + + # Uncomment and change the following if ROOT_URL in config.sh + # is not already directly accessible from a web client. + #Alias @@ROOT_URL@@ "@@SW_ROOT@@" + + # Enable mharc CGI programs. Alternate enabling method is + # defined below. + ScriptAlias @@CGI_URL@@/ "@@CGI_DIR@@/" + + + + AllowOverride All + + Options MultiViews Indexes FollowSymLinks + IndexOptions FancyIndexing NameWidth=* SuppressSize SuppressLastModified + + # Make sure namazu index files are not visible in directory listings + IndexIgnore NMZ.* + + # Deny access to files that we probably do not want the public to + # see. The main one is .mhonarc.db files, especially if we have + # mail address obfsucation in the HTML archives. + + Order allow,deny + Deny from all + + + # Deny access to log files + + Order allow,deny + Deny from all + + + # Deny access to temporary incoming mail file used by filter-spool + + Order allow,deny + Deny from all + + + # Limit the types of HTTP methods allowed + + Order allow,deny + Allow from all + + + Order deny,allow + Deny from all + + + +# Enable CGI execution for cgi-bin directory. This is probably not +# necessary if the above ScriptAlias directive is used. + + Options +ExecCGI + + AddHandler cgi-script .cgi + + + +# Browsing access to raw mail archives. + + AllowOverride All + Options Indexes FollowSymLinks + IndexOptions FancyIndexing SuppressDescription + + diff --git a/etc/crontab b/etc/crontab new file mode 100644 index 0000000..e4b5722 --- /dev/null +++ b/etc/crontab @@ -0,0 +1,17 @@ +# $Id: crontab.in.dist,v 1.4 2003/08/09 18:02:42 ehood Exp $ +# Template crontab for archive user. Messages are processed on +# an hourly basis. However, for one day of the week new message +# processing is not done to allow for cleanup. +# +57 * * * 1-6 /home/mharc/bin/logcmd -log /home/mharc/log/cron.log -- /home/mharc/bin/read-mail +57 0,4-23 * * 0 /home/mharc/bin/logcmd -log /home/mharc/log/cron.log -- /home/mharc/bin/read-mail +57 1 * * 0 /home/mharc/bin/logcmd -log /home/mharc/log/cron.log -- /home/mharc/bin/gc-search-indexes +57 3 * * 0 /home/mharc/bin/logcmd -log /home/mharc/log/cron.log -- /home/mharc/bin/compress-mboxes +# +# Uncomment the following and edit pathname to logrotate(8) program +# if you want log rotation of mharc log files. The default frequency +# is once a week to match frequence of mharc provided logrotate.conf. +# Logrotate is provided under RedHat Linux and and other linux and +# Unix distros. +# +#57 2 * * 0 /usr/sbin/logrotate -s /home/mharc/etc/logrotate.status /home/mharc/etc/logrotate.conf diff --git a/etc/crontab.in b/etc/crontab.in new file mode 100644 index 0000000..2a1aff0 --- /dev/null +++ b/etc/crontab.in @@ -0,0 +1,17 @@ +# $Id: crontab.in.dist,v 1.4 2003/08/09 18:02:42 ehood Exp $ +# Template crontab for archive user. Messages are processed on +# an hourly basis. However, for one day of the week new message +# processing is not done to allow for cleanup. +# +57 * * * 1-6 @@SW_ROOT@@/bin/logcmd -log @@SW_ROOT@@/log/cron.log -- @@SW_ROOT@@/bin/read-mail +57 0,4-23 * * 0 @@SW_ROOT@@/bin/logcmd -log @@SW_ROOT@@/log/cron.log -- @@SW_ROOT@@/bin/read-mail +57 1 * * 0 @@SW_ROOT@@/bin/logcmd -log @@SW_ROOT@@/log/cron.log -- @@SW_ROOT@@/bin/gc-search-indexes +57 3 * * 0 @@SW_ROOT@@/bin/logcmd -log @@SW_ROOT@@/log/cron.log -- @@SW_ROOT@@/bin/compress-mboxes +# +# Uncomment the following and edit pathname to logrotate(8) program +# if you want log rotation of mharc log files. The default frequency +# is once a week to match frequence of mharc provided logrotate.conf. +# Logrotate is provided under RedHat Linux and and other linux and +# Unix distros. +# +#57 2 * * 0 /usr/sbin/logrotate -s @@SW_ROOT@@/etc/logrotate.status @@SW_ROOT@@/etc/logrotate.conf diff --git a/etc/crontab.in.dist b/etc/crontab.in.dist new file mode 100644 index 0000000..2a1aff0 --- /dev/null +++ b/etc/crontab.in.dist @@ -0,0 +1,17 @@ +# $Id: crontab.in.dist,v 1.4 2003/08/09 18:02:42 ehood Exp $ +# Template crontab for archive user. Messages are processed on +# an hourly basis. However, for one day of the week new message +# processing is not done to allow for cleanup. +# +57 * * * 1-6 @@SW_ROOT@@/bin/logcmd -log @@SW_ROOT@@/log/cron.log -- @@SW_ROOT@@/bin/read-mail +57 0,4-23 * * 0 @@SW_ROOT@@/bin/logcmd -log @@SW_ROOT@@/log/cron.log -- @@SW_ROOT@@/bin/read-mail +57 1 * * 0 @@SW_ROOT@@/bin/logcmd -log @@SW_ROOT@@/log/cron.log -- @@SW_ROOT@@/bin/gc-search-indexes +57 3 * * 0 @@SW_ROOT@@/bin/logcmd -log @@SW_ROOT@@/log/cron.log -- @@SW_ROOT@@/bin/compress-mboxes +# +# Uncomment the following and edit pathname to logrotate(8) program +# if you want log rotation of mharc log files. The default frequency +# is once a week to match frequence of mharc provided logrotate.conf. +# Logrotate is provided under RedHat Linux and and other linux and +# Unix distros. +# +#57 2 * * 0 /usr/sbin/logrotate -s @@SW_ROOT@@/etc/logrotate.status @@SW_ROOT@@/etc/logrotate.conf diff --git a/etc/logrotate.conf b/etc/logrotate.conf new file mode 100644 index 0000000..ff6e862 --- /dev/null +++ b/etc/logrotate.conf @@ -0,0 +1,13 @@ +# $Id: logrotate.conf.in.dist,v 1.1 2003/08/09 18:02:42 ehood Exp $ +# Sample logrotate(8) configuration file for rotating mharc +# log files. logrotate is provided under RedHat Linux and +# and other linux and Unix distros. +# +compress + +/home/mharc/log/*.log { + rotate 5 + weekly + missingok +} + diff --git a/etc/logrotate.conf.in b/etc/logrotate.conf.in new file mode 100644 index 0000000..c595d88 --- /dev/null +++ b/etc/logrotate.conf.in @@ -0,0 +1,13 @@ +# $Id: logrotate.conf.in.dist,v 1.1 2003/08/09 18:02:42 ehood Exp $ +# Sample logrotate(8) configuration file for rotating mharc +# log files. logrotate is provided under RedHat Linux and +# and other linux and Unix distros. +# +compress + +@@LOG_DIR@@/*.log { + rotate 5 + weekly + missingok +} + diff --git a/etc/logrotate.conf.in.dist b/etc/logrotate.conf.in.dist new file mode 100644 index 0000000..c595d88 --- /dev/null +++ b/etc/logrotate.conf.in.dist @@ -0,0 +1,13 @@ +# $Id: logrotate.conf.in.dist,v 1.1 2003/08/09 18:02:42 ehood Exp $ +# Sample logrotate(8) configuration file for rotating mharc +# log files. logrotate is provided under RedHat Linux and +# and other linux and Unix distros. +# +compress + +@@LOG_DIR@@/*.log { + rotate 5 + weekly + missingok +} + diff --git a/lib/MHArc/CGI.pm b/lib/MHArc/CGI.pm new file mode 100644 index 0000000..c165983 --- /dev/null +++ b/lib/MHArc/CGI.pm @@ -0,0 +1,177 @@ +##--------------------------------------------------------------------------## +## File: +## $Id: CGI.pm,v 1.2 2002/09/18 17:23:29 ehood Exp $ +## Description: +## POD at end of file. +##--------------------------------------------------------------------------## +## Copyright (C) 2002 Earl Hood +## +## This program is free software; you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 2 of the License, or +## (at your option) any later version. +## +## This program is distributed in the hope that it will be useful, +## but WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +## GNU General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with this program; if not, write to the Free Software +## Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA +## 02111-1307, USA +##--------------------------------------------------------------------------## + +package MHArc::CGI; + +use Exporter; +@ISA = qw(Exporter); + +@EXPORT_OK = qw( + &parse_input + &print_content_type + &print_error + &print_forbidden + &print_input_error + &print_location + &print_not_found_error + &print_script_error +); + +##--------------------------------------------------------------------------## + +BEGIN { + $Debug = 0; +} + +##--------------------------------------------------------------------------## + +sub parse_input { + my($method) = ($ENV{"REQUEST_METHOD"}) || 'GET'; + my($data); + if ($method eq "GET") { + $data = $ENV{"QUERY_STRING"} || ""; + } elsif ($method eq "POST") { + read(STDIN, $data, $ENV{"CONTENT_LENGTH"}); + } else { + warn qq/Unknown method: $method/; + return undef; + } + + my(@pairs, $name, $value); + local $_; + + my $form = { }; + if ($data ne '') { + @pairs = split(/&/, $data); + foreach (@pairs) { + ($name, $value) = split(/=/); + $name = expandstr($name); + $value = expandstr($value); + $form->{$name} = $value; + } + } + $form; +} + +sub print_forbidden { + print STDOUT 'Status: 403 Forbidden', "\r\n"; + print_content_type('text/plain'); + print STDOUT "Access Denied\n"; +} + +sub print_input_error { + print STDOUT 'Status: 400 Bad Request', "\r\n"; + print_content_type('text/plain'); + print STDOUT "Input Error\n"; +} + +sub print_error { + print_content_type('text/plain'); + print STDOUT "Script Error\n"; +} + +sub print_not_found_error { + print STDOUT 'Status: 404 Not Found', "\r\n"; + print_content_type('text/plain'); + print STDOUT "Not Found\n"; +} + +sub print_location { + print STDOUT 'Location: ', $_[0], "\r\n\r\n"; +} + +sub print_content_type { + my($type) = shift; + print STDOUT "Content-type: $type\r\n\r\n"; +} + +sub expandstr { + my($str) = shift; + $str =~ tr/+/ /; + $str =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/ge; + $str; +} + +##--------------------------------------------------------------------------## +1; +__END__ + +=head1 NAME + +MHArc::CGI - General CGI-related utilities for mail archiving system. + +=head1 SYNOPSIS + + use MHArc::CGI; + +=head1 DESCRIPTION + +This module contains a collection of CGI-related utility routines used +by the various mharc CGI programs. + +=head1 VARIABLES + +The following module variables can be set to affect the behavior +of the utility routines: + +=over + +=item C<$Debug> + +If set to a true value, routines will print out debugging information, +if appropriate. + +=back + +=head1 ROUTINES + +By default, no routines are exported into the calling namespace. +Routines in this module can be imported by explicitly listing the +routines to import in the C declaration: + + use MHArc::CGI qw( parse_input ); + +The following routines are availale: + +=over + +=item ... + +=back + +=head1 VERSION + +C<$Id: CGI.pm,v 1.2 2002/09/18 17:23:29 ehood Exp $> + +=head1 AUTHOR + +Earl Hood, earl@earlhood.com + +This module is part of the mharc archiving system and comes with +ABSOLUTELY NO WARRANTY and may be copied only under the terms of +the GNU General Public License, which may be found in the MHArc +distribution. + +=cut + diff --git a/lib/MHArc/Config.pm b/lib/MHArc/Config.pm new file mode 100644 index 0000000..0c8f78b --- /dev/null +++ b/lib/MHArc/Config.pm @@ -0,0 +1,245 @@ +##---------------------------------------------------------------------------## +## File: +## $Id: Config.pm,v 1.8 2002/09/20 02:58:38 ehood Exp $ +## Description: +## POD at end of file. +##---------------------------------------------------------------------------## +## Copyright (C) 2002 Earl Hood +## +## This program is free software; you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 2 of the License, or +## (at your option) any later version. +## +## This program is distributed in the hope that it will be useful, +## but WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +## GNU General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with this program; if not, write to the Free Software +## Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA +## 02111-1307, USA +##---------------------------------------------------------------------------## + +package MHArc::Config; + +BEGIN { + $Debug = 0; + $Cache = 1; +} + +sub load { + my $self = { }; + my $mod = shift; # Name of module + my $fname = shift; # Filename to read configuration from + my $class = ref($mod) || $mod; + + bless $self, $class; + $self->read_filename($fname); +} + +sub read_filename { + my $self = shift; + my $in_file = shift; + + READ_FILE: { + if ($in_file eq '-') { + $fh = \*STDIN; + $self->parse_config_sh($fh); + last READ_FILE; + } + + my $filename = $in_file; + my $cache_file = $in_file . '.cache.pl'; + + if ($Cache) { + my $cache_mtime; + if (-e $cache_file) { + $cache_mtime = (stat(_))[9]; + print 'Cache mtime: ', $cache_mtime, "\n" if $Debug; + } + if (!(-e $filename)) { + $filename .= '.dist'; + if (-e $filename) { + warn qq/Warning: Using "$filename"\n/; + } else { + die qq/ERROR: "$in_file" does not exist\n/; + } + } + my $file_mtime = (stat(_))[9]; + print 'Config mtime: ', $file_mtime, "\n" if $Debug; + if (defined($cache_mtime) && ($cache_mtime >= $file_mtime)) { + print "Using cache\n" if $Debug; + delete $INC{$cache_file}; + my $vars; + eval { + $vars = require $cache_file; + }; + if ($@) { + warn qq/Warning: Problem requiring "$cache_file": $@\n/; + } else { + $self = $vars; + last READ_FILE; + } + } + } + + local(*FILE); + open(FILE, $filename) || + die qq/ERROR: Unable to open "$filename": $!\n/; + print "Using $filename\n" if $Debug; + $self->parse_config_sh(\*FILE); + close(FILE); + + if ($Cache) { + eval { + require Data::Dumper; + local $Data::Dumper::Terse = 1; + local $Data::Dumper::Indent = 0; + print 'Create cache ', $cache_file, "\n" if $Debug; + open(FILE, '>'.$cache_file) || + die qq/Unable to create "$cache_file": $!\n/; + print FILE '# DO NOT EDIT THIS FILE!', "\n", + Data::Dumper::Dumper($self), ';'; + close(FILE); + }; + if ($@) { + warn qq/Warning: Problem writing "$cache_file": $@\n/; + unlink($cache_file); + }; + } + } + + # Check for MHONARC_LIB, and if defined, add to perl's @INC + if ($self->{'MHONARC_LIB'}) { + print 'Adding ', $self->{'MHONARC_LIB'}, "to \@INC\n" if $Debug; + unshift(@INC, $self->{'MHONARC_LIB'}); + } + $self; +} + +sub parse_config_sh { + my $self = shift; + my $fh = shift; + + my($line, $key, $value); + while (defined($line = <$fh>)) { + next unless $line =~ /\S/; + next if $line =~ /^\s*#/; + chomp $line; + ($key, $value) = split(/=/, $line, 2); + + if ($value =~ s/^'//) { + $value =~ s/'$//; + } else { + $value =~ s/^"//; $value =~ s/"$//; + $value = _expand_vars($self, $value); + } + $self->{$key} = $value; + } +} + +sub dump_config { + my $self = shift; + my $fh = shift; + + foreach my $key (sort keys %$self) { + print $fh $key, '=', $self->{$key}, "\n"; + } +} + +sub _expand_vars { + my $map = shift; + my $str = shift; + + $str =~ s/(^|[^\$])\$(\{[^}]+\}|[\w]+)/$1 . _var_lookup($map, $2);/ge; + $str; +} + +sub _var_lookup { + my $map = shift; + my $key = shift; + $key =~ s/[{}]//g; + defined($map->{$key}) ? $map->{$key} : + defined($ENV{$key}) ? $ENV{$key} : ""; +} + +#my $config = MHArc::Config->load('-'); +#foreach (sort keys %$config) { +# print "$_=", $config->{$_}, "\n"; +#} + +#========================================================================== +1; +__END__ + +=head1 NAME + +MHArc::Config - Load mail archive configuration file + +=head1 SYNOPSIS + + $conf = MHArc::Config->load($filename); + print $conf->{'HTML_DIR'}; + +=head1 DESCRIPTION + +This module will load in the archive configuration file. The +archive configuration file defines variables in Bourne shell syntax format. + +B Only a subset of the Bourne shell syntax is supported, so do +not get fancy with the file. + +The C static method will create a new configuration instance +that is a bless hash reference. The variables defined in the file +will be the hash keys. + +B If the filename specified does not exist, then the C +method will check for the filename with a C<.dist> extension. It +is recommended to not rely on the C<.dist> version since it will +get overwritten on software updates. + +=head1 CACHING + +This module will create a cached version of the file loaded to +make subsequent loadings quicker. The cached file will be called +CfilenameE.cache.pl> and will contain the configuration +data in a Perl format. + +When loading the configuration of a file, the timestamps of the +cache file and the regular file are compared. If the cache is newer, +it is used. Else, the regular file will be loaded and a new cache +file created. + +=head1 VARIABLES + +=over + +=item C<$MHArc::Config::Cache> + +If set to C<0>, no cache processing will be done. Configuration will +be loaded directly from specified file. + +=item C<$MHArc::Config::Debug> + +If set to C<1>, diagnostic information will be printed. This variable +should only be used for debugging and not in production. + +=back + +=head1 VERSION + +C<$Id: Config.pm,v 1.8 2002/09/20 02:58:38 ehood Exp $> + +=head1 AUTHOR + +Earl Hood, earl@earlhood.com + +This module is part of the mharc archiving system and comes with +ABSOLUTELY NO WARRANTY and may be copied only under the terms of +the GNU General Public License, which may be found in the MHArc +distribution. + +=cut + diff --git a/lib/MHArc/ListDef.pm b/lib/MHArc/ListDef.pm new file mode 100644 index 0000000..5713aaf --- /dev/null +++ b/lib/MHArc/ListDef.pm @@ -0,0 +1,139 @@ +##---------------------------------------------------------------------------## +## File: +## $Id: ListDef.pm,v 1.5 2003/07/16 00:49:58 ehood Exp $ +## Description: +##---------------------------------------------------------------------------## +## Copyright (C) 2002 Earl Hood +## +## This program is free software; you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 2 of the License, or +## (at your option) any later version. +## +## This program is distributed in the hope that it will be useful, +## but WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +## GNU General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with this program; if not, write to the Free Software +## Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA +## 02111-1307, USA +##---------------------------------------------------------------------------## + +package MHArc::ListDef; + +sub _DEF_ORDER() { return '-X-DEF-ORDER-'; } + +sub new { + my $self = { }; + my $mod = shift; # Name of module + my $fh = shift; # Filehandle to read definitions from + my $class = ref($mod) || $mod; + + bless $self, $class; + $self->{_DEF_ORDER} = [ ]; + $self->load_file($fh); +} + +sub load_file { + my $self = shift; + my $file = shift; + my $fh; + + local(*LISTDEF); + if (!ref($file)) { + if (!open(LISTDEF, $file)) { + die qq/ERROR: Unable to open "$file": $!\n/; + } + $fh = \*LISTDEF; + + } else { + $fh = $file; + } + + my $name = undef; + my($key, $line); + + while (defined($line = <$fh>)) { + next unless $line =~ /\S/; + next if $line =~ /^\s*#/; + chomp $line; + ($key, $value) = $line =~ /^([^:]+):\s*(.*)$/; + $key = lc $key; + if ($key eq 'name') { + $value =~ s/\s//g; + $name = $value; + push(@{$self->{_DEF_ORDER}}, $name); + $self->{$name}{'name'} = [ $name ]; + next; + } + if (!defined($name)) { + warn qq/Warning: No name defined for '$line'\n/; + next; + } + + if (defined($self->{$name}{$key})) { + push(@{$self->{$name}{$key}}, $value); + } else { + $self->{$name}{$key} = [ $value ]; + } + } + + if (!ref($file)) { + close($fh); + } + + $self; +} + +sub get_names { + my $self = shift; + @{$self->{_DEF_ORDER}}; +} + +1; +__END__ + +=head1 NAME + +MHArc::ListDef - Load mailing lists definition file + +=head1 SYNOPSIS + + $def = MHArc::ListDef->new($fh); + $def = MHArc::ListDef->new($filename); + + # Return list of names in order defined + @names = $def->get_names; + + # Access an option + $description = $def->{$name}{'description'}[0]; + +=head1 DESCRIPTION + +This module parses a mailing list definition file for use in the +auto-mail archiving system. + +The C method either takes a filehandle reference or a filename +string. + +=head1 SEE ALSO + +L + +=head1 VERSION + +C<$Id: ListDef.pm,v 1.5 2003/07/16 00:49:58 ehood Exp $> + +=head1 AUTHOR + +Earl Hood, earl@earlhood.com + +This module is part of the mharc archiving system and comes with +ABSOLUTELY NO WARRANTY and may be copied only under the terms of +the GNU General Public License, which may be found in the MHArc +distribution. + +=cut + diff --git a/lib/MHArc/MailUtil.pm b/lib/MHArc/MailUtil.pm new file mode 100644 index 0000000..4902bc8 --- /dev/null +++ b/lib/MHArc/MailUtil.pm @@ -0,0 +1,160 @@ +##--------------------------------------------------------------------------## +## File: +## $Id: MailUtil.pm,v 1.3 2002/09/13 07:24:18 ehood Exp $ +## Description: +## POD at end of file. +##--------------------------------------------------------------------------## +## Copyright (C) 2002 Earl Hood +## +## This program is free software; you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 2 of the License, or +## (at your option) any later version. +## +## This program is distributed in the hope that it will be useful, +## but WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +## GNU General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with this program; if not, write to the Free Software +## Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA +## 02111-1307, USA +##--------------------------------------------------------------------------## + +package MHArc::MailUtil; + +use Exporter; +@ISA = qw(Exporter); + +@EXPORT_OK = qw( + &extract_date +); + +##--------------------------------------------------------------------------## + +BEGIN { + $Debug = 0; +} + +##--------------------------------------------------------------------------## + +sub extract_date { + my $fields = shift; + my @dfs = @_; + + local $_; + my @date_fields = ( ); + if (!@dfs) { + @date_fields = ( ['received',0],['delivery-date',0],['date',0] ); + } else { + foreach (@dfs) { + s/\s//g; tr/A-Z/a-z/; + if (s/\[(\d+)\]//) { + push(@date_fields, [ $_, $1 ]); + } else { + push(@date_fields, [ $_, 0 ]); + } + } + } + + my @date; + my($field_str, $unix_time, $df, $i); + foreach (@date_fields) { + ($df, $i) = @$_; + if (defined($field_str = $fields->{$df}[$i])) { + print qq/Debug: $df: $field_str\n/ if $Debug; + if ($df eq 'received') { + my @ra = split(/;/, $field_str); + $field_str = pop(@ra); + } + @date = mhonarc::parse_date($field_str); + if (scalar(@date)) { + print qq/Debug: \@date=/, join('|',@date), qq/\n/ if $Debug; + $unix_time = mhonarc::get_time_from_date(@date[1..$#date]); + last; + } + } + } + if (!defined($unix_time)) { + print qq/Debug: Unable to parse date, using current time\n/ if $Debug; + $unix_time = time; + } + $unix_time; +} + +##--------------------------------------------------------------------------## +1; +__END__ + +=head1 NAME + +MHArc::MailUtil - General mail-related utilities for mail archiving system. + +=head1 SYNOPSIS + + use MHArc::MailUtil; + +=head1 DESCRIPTION + +This module contains a collection of mail-related utility routines. + +=head1 VARIABLES + +The following module variables can be set to affect the behavior +of the utility routines: + +=over + +=item C<$Debug> + +If set to a true value, routines will print out debuging information. + +=back + +=head1 ROUTINES + +By default, no routines are exported into the calling namespace. +Routines in this module can be imported by explicitly listing the +routines to import in the C declaration: + + use MHArc::MailUtil qw( extract_date ); + +The following routines are availale: + +=over + +=item C)> + +Extract the date from message header fields represented by C<$fields>. +Any additional arguments are treated as message fields names (which +should be lowercase names) to examine to find the date. If no fields +names are specified, than the following fields are checked in order: +C, C, and C. + +The return value of this function is the date of the message in +Unix time format: the same as what is returned by Perl's +builtin C