| 1 | #!/usr/bin/perl |
| 2 | ##--------------------------------------------------------------------------## |
| 3 | ## File: |
| 4 | ## $Id: extract-mesg.cgi.in.dist,v 1.5 2002/09/20 03:29:28 ehood Exp $ |
| 5 | ## Author: |
| 6 | ## Earl Hood earl@earlhood.com |
| 7 | ## Description: |
| 8 | ## POD at end-of-file. |
| 9 | ##--------------------------------------------------------------------------## |
| 10 | ## Copyright (C) 2002 Earl Hood <earl@earlhood.com> |
| 11 | ## |
| 12 | ## This program is free software; you can redistribute it and/or modify |
| 13 | ## it under the terms of the GNU General Public License as published by |
| 14 | ## the Free Software Foundation; either version 2 of the License, or |
| 15 | ## (at your option) any later version. |
| 16 | ## |
| 17 | ## This program is distributed in the hope that it will be useful, |
| 18 | ## but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 19 | ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 20 | ## GNU General Public License for more details. |
| 21 | ## |
| 22 | ## You should have received a copy of the GNU General Public License |
| 23 | ## along with this program; if not, write to the Free Software |
| 24 | ## Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA |
| 25 | ## 02111-1307, USA |
| 26 | ##--------------------------------------------------------------------------## |
| 27 | |
| 28 | package MHArc::extract_mesg_cgi; |
| 29 | |
| 30 | use lib '/home/mharc/lib'; |
| 31 | |
| 32 | use CGI::Carp; |
| 33 | use MHArc::CGI; |
| 34 | |
| 35 | ############################################################################# |
| 36 | ## BEGIN: Config Section |
| 37 | ############################################################################# |
| 38 | |
| 39 | ## Full pathname to where raw archives are located. |
| 40 | my $mbox_archive_root = '/home/mharc/mbox'; |
| 41 | |
| 42 | ## Message media-type: This is the media-type this script will return |
| 43 | ## to the client when serving up the raw mail message. Note, some |
| 44 | ## browsers actually support message/rfc822, but this could potentially |
| 45 | ## cause XSS HTML email attacks, so use with caution. |
| 46 | my $message_media_type = 'text/plain'; |
| 47 | |
| 48 | ############################################################################# |
| 49 | ## END: Config Section |
| 50 | ############################################################################# |
| 51 | |
| 52 | $ENV{'PATH'} = '/usr/local/bin:/bin:/usr/bin'; |
| 53 | |
| 54 | ## Query argument name to contain name of archive |
| 55 | my $argname_archive = 'a'; |
| 56 | |
| 57 | ## Query argument name to contain month |
| 58 | my $argname_month = 'm'; |
| 59 | |
| 60 | ## Query argument name to contain message-id |
| 61 | my $argname_id = 'i'; |
| 62 | |
| 63 | ## Mbox message separator: Try to be more strict than '^From ', but |
| 64 | ## not too strict to deal with possible variations. |
| 65 | my $msgsep = '^From \S+.*\d+:\d+:\d+'; |
| 66 | |
| 67 | MAIN: { |
| 68 | my $form = MHArc::CGI::parse_input(); |
| 69 | my $archive = $form->{$argname_archive} || ""; |
| 70 | my $month = $form->{$argname_month} || ""; |
| 71 | my $id = $form->{$argname_id} || ""; |
| 72 | |
| 73 | my $list_dir; |
| 74 | if (($month !~ /^\d{4}(?:-\d{2})?$/) || |
| 75 | ($id !~ /.\@./) || |
| 76 | ($archive !~ /\S/) || |
| 77 | ($archive =~ /\.\./) || |
| 78 | (! -d ($list_dir = join('/', $mbox_archive_root,$archive)))) { |
| 79 | warn qq/Invalid arguments: a=$archive, m=$month, i=$id\n/; |
| 80 | MHArc::CGI::print_input_error(); |
| 81 | last MAIN; |
| 82 | } |
| 83 | |
| 84 | # Check if list has raw archive access disabled. |
| 85 | if (-e join('/', $list_dir, '.noraw')) { |
| 86 | MHArc::CGI::print_forbidden(); |
| 87 | last MAIN; |
| 88 | } |
| 89 | |
| 90 | my $gzipped = 0; |
| 91 | my $mbox_file = join('/', $list_dir, $month); |
| 92 | if (! -e $mbox_file) { |
| 93 | $mbox_file .= '.gz'; |
| 94 | $gzipped = 1; |
| 95 | } |
| 96 | if (! -e $mbox_file) { |
| 97 | warn qq/"$mbox_file" does not exist\n/; |
| 98 | MHArc::CGI::print_input_error(); |
| 99 | last MAIN; |
| 100 | } |
| 101 | |
| 102 | local(*MBOX); |
| 103 | if ($gzipped) { |
| 104 | if (!open(MBOX, "gzip -dc '$mbox_file' |")) { |
| 105 | warn qq/Unable to exec "gzip -dc '$mbox_file'": $!\n/; |
| 106 | MHArc::CGI::print_error(); |
| 107 | last MAIN; |
| 108 | } |
| 109 | } else { |
| 110 | if (!open(MBOX, $mbox_file)) { |
| 111 | warn qq/Unable to open "$mbox_file": $!\n/; |
| 112 | MHArc::CGI::print_error(); |
| 113 | last MAIN; |
| 114 | } |
| 115 | } |
| 116 | |
| 117 | local $_; |
| 118 | my $cache = ''; |
| 119 | my $in_header = 1; |
| 120 | my $msg_id = ''; |
| 121 | my $found = 0; |
| 122 | |
| 123 | SCAN: while (<MBOX>) { |
| 124 | if (/$msgsep/o) { |
| 125 | $cache = ''; |
| 126 | $in_header = 1; |
| 127 | next SCAN; |
| 128 | } |
| 129 | next SCAN unless $in_header; |
| 130 | |
| 131 | if (/^\r?$/) { |
| 132 | $cache = ''; |
| 133 | $in_header = 0; |
| 134 | next SCAN; |
| 135 | } |
| 136 | |
| 137 | $cache .= $_; |
| 138 | if (s/^message-id:\s*//i) { |
| 139 | s/\s+\Z//; |
| 140 | s/[<>]//g; |
| 141 | if ($_ eq $id) { |
| 142 | $found = 1; |
| 143 | last SCAN; |
| 144 | } |
| 145 | $cache = ''; |
| 146 | $in_header = 0; |
| 147 | } |
| 148 | } |
| 149 | |
| 150 | if (!$found) { |
| 151 | MHArc::CGI::print_not_found_error(); |
| 152 | close(MBOX); |
| 153 | last MAIN; |
| 154 | } |
| 155 | |
| 156 | MHArc::CGI::print_content_type($message_media_type); |
| 157 | print STDOUT $cache; |
| 158 | while (<MBOX>) { |
| 159 | last if /$msgsep/o; |
| 160 | print STDOUT $_; |
| 161 | } |
| 162 | close(MBOX); |
| 163 | } |
| 164 | |
| 165 | ######################################################################## |
| 166 | __END__ |
| 167 | |
| 168 | =head1 NAME |
| 169 | |
| 170 | extract-mesg.cgi - mharc CGI program to retrieve raw version of a message |
| 171 | |
| 172 | =head1 SYNOPSIS |
| 173 | |
| 174 | http://.../cgi-bin/extract-mesg.cgi?a=<archive-name>&m=<period>&i=<message-id> |
| 175 | |
| 176 | =head1 DESCRIPTION |
| 177 | |
| 178 | This CGI program retrieves the raw version of a message from an |
| 179 | archive archived at a specified period and with a specified message-id. |
| 180 | |
| 181 | The CGI program will output the retrieved message to the web client. |
| 182 | |
| 183 | =head1 CGI OPTIONS |
| 184 | |
| 185 | =over |
| 186 | |
| 187 | =item C<a> |
| 188 | |
| 189 | The name of the archive. Archive names are defined by C<lists.def>. |
| 190 | |
| 191 | =item C<i> |
| 192 | |
| 193 | The message-id. |
| 194 | |
| 195 | =item C<m> |
| 196 | |
| 197 | The period in YYYY-MM or YYYY format. |
| 198 | |
| 199 | =back |
| 200 | |
| 201 | =head1 VERSION |
| 202 | |
| 203 | C<$Id: extract-mesg.cgi.in.dist,v 1.5 2002/09/20 03:29:28 ehood Exp $> |
| 204 | |
| 205 | =head1 AUTHOR |
| 206 | |
| 207 | Earl Hood, earl@earlhood.com |
| 208 | |
| 209 | This module is part of the mharc archiving system and comes with |
| 210 | ABSOLUTELY NO WARRANTY and may be copied only under the terms of |
| 211 | the GNU General Public License, which may be found in the MHArc |
| 212 | distribution. |
| 213 | |
| 214 | =cut |
| 215 | |