2 ##--------------------------------------------------------------------------##
4 ## $Id: mk-procmailrc,v 1.25 2003/08/09 17:51:04 ehood Exp $
6 ## Program to create a procmail recipe file from lists.def.
7 ## Run script with '-man' option to view manpage for this program.
8 ##--------------------------------------------------------------------------##
9 ## Copyright (C) 2001-2002 Earl Hood <earl@earlhood.com>
11 ## This program is free software; you can redistribute it and/or modify
12 ## it under the terms of the GNU General Public License as published by
13 ## the Free Software Foundation; either version 2 of the License, or
14 ## (at your option) any later version.
16 ## This program is distributed in the hope that it will be useful,
17 ## but WITHOUT ANY WARRANTY; without even the implied warranty of
18 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 ## GNU General Public License for more details.
21 ## You should have received a copy of the GNU General Public License
22 ## along with this program; if not, write to the Free Software
23 ## Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
25 ##--------------------------------------------------------------------------##
27 package MHArc
::mk_procmailrc
;
29 ##--------------------------------------------------------------------------##
31 BEGIN { die qq/CGI use FORBIDDEN!\n/ if (defined($ENV{'GATEWAY_INTERFACE'})); }
32 my $Dir; BEGIN { $Dir = `dirname $0`; chomp $Dir; }
33 use lib
"$Dir/../lib"; # Add relative lib to search path
35 ##--------------------------------------------------------------------------##
38 my $config = MHArc
::Config
->load("$Dir/../lib/config.sh");
40 ##--------------------------------------------------------------------------##
45 use MHArc
::Util
qw( usage );
53 my $clstatus = GetOptions
(\
%opt,
56 'disable-catch-archive!',
68 usage
(0) unless $clstatus;
69 usage
(1) if $opt{'help'};
70 usage
(2) if $opt{'man'};
72 $Verbose = $opt{'verbose'};
74 my $basedir = $opt{'home'} ||
75 $config->{'SW_ROOT'} ||
77 my $mbox_dir = $opt{'mbox-dir'} ||
78 $config->{'MBOX_DIR'} ||
79 join('/',$basedir,'mbox');
80 my $out_file = $opt{'out'} ||
81 $config->{'PROCMAILRC'} ||
82 join('/', $basedir, 'procmailrc.mharc');
83 my $procmail_path = $opt{'procmail-path'} ||
84 $config->{'PROCMAIL_PATH'};
85 my $cache_size = $opt{'msgid-cache-size'} ||
86 $config->{'MSGID_CACHE_SIZE'} ||
88 my $catch_addr = $opt{'catch-address'} ||
89 $config->{'CATCH_ADDRESS'} ||
91 my $catch_arc = $opt{'catch-archive'} ||
92 $config->{'CATCH_ARCHIVE'} ||
94 my $nocatch = defined($opt{'disable-catch-archive'}) ?
95 $opt{'disable-catch-archive'} :
96 ($config->{'DISABLE_CATCH_ARCHIVE'} || 0);
97 my $final_dest = $opt{'final-dest'} ||
98 $config->{'FINAL_MSG_DESTINATION'} ||
101 ## Read lists definition file
102 my $file = shift(@ARGV) ||
103 $config->{'LISTS_DEF_FILE'} ||
104 "$basedir/lib/lists.def";
105 my $listdef = MHArc
::ListDef
->new($file);
107 my $extract_date_prg = join('/', $basedir, 'bin', 'extract-mesg-date');
111 if (!defined($out_file) || ($out_file eq "") || ($out_file eq '-')) {
115 open(OUTFILE
, ">$out_file") ||
116 die qq/ERROR: Unable to create "$out_file": $!\n/;
121 ## Print procmailrc header
123 ##======================================================================
124 ##!!!!!!!!!!!!!!!!!!!!!! AUTO-CREATED, DO NOT EDIT !!!!!!!!!!!!!!!!!!!!!
125 ##======================================================================
126 ## Procmail resource file for mharc archives generated from
129 ##======================================================================
137 ## Logging disabled as it grows fast and isn't very useful
139 #LOGFILE=\$BASEDIR/log/procmail.log
141 ## Do alot of logging?
144 ## Should deliveries be logged?
147 ## Root path to mail folders
150 ## Flag if a list was matched
156 ##======================================================================
158 ##======================================================================
160 ## Avoid duplicate messages
161 :0 Wh: \$BASEDIR/msgid.lock
162 | formail -D $cache_size \$BASEDIR/msgid.cache
179 foreach $name ($listdef->get_names) {
180 print $Vfh "Generating rule for $name...\n" if $Verbose;
184 if (defined($listdef->{$name}{'address'})) {
185 @addr = @
{$listdef->{$name}{'address'}};
187 if (defined($listdef->{$name}{'from-address'})) {
188 @from_addr = @
{$listdef->{$name}{'from-address'}};
191 if (!scalar(@addr) && !scalar(@from_addr) &&
192 !defined($listdef->{$name}{'procmail-condition'})) {
193 # no addresses defined
194 warn qq/Warning: No addresses or conditions defined for '$name'\n/;
200 # create procmail regex for list
201 if (scalar(@addr) || scalar(@from_addr)) {
202 $pm_conditions .= '* (';
204 $pm_conditions .= '^TO_';
205 $pm_conditions .= '(' if (scalar(@addr) > 1);
206 $pm_conditions .= join('|', @addr);
207 $pm_conditions .= ')' if (scalar(@addr) > 1);
208 $pm_conditions .= '|(^(Delivered-To:|List-Post:).*';
209 $pm_conditions .= '(' if (scalar(@addr) > 1);
210 $pm_conditions .= join('|', @addr);
211 $pm_conditions .= ')' if (scalar(@addr) > 1);
212 $pm_conditions .= ')';
214 if (scalar(@from_addr)) {
215 $pm_conditions .= '|(' if (scalar(@addr));
216 $pm_conditions .= '^From:(.*[^-a-zA-Z0-9_.])?';
217 $pm_conditions .= '(' if (scalar(@from_addr) > 1);
218 $pm_conditions .= join('|', @from_addr);
219 $pm_conditions .= ')' if (scalar(@from_addr) > 1);
220 $pm_conditions .= ')' if (scalar(@addr));
222 $pm_conditions .= ')' . "\n";
225 if (defined($listdef->{$name}{'procmail-condition'})) {
226 foreach $str (@
{$listdef->{$name}{'procmail-condition'}}) {
227 $pm_conditions .= "$str\n";
231 # check if doing monthly or yearly archives
232 $period = lc($listdef->{$name}{'period'}[0]) || 'month';
233 $period = 'month' if ($name eq $catch_arc);
234 if ($period eq 'year') {
240 # check if rule should not be final if matched
241 if ($listdef->{$name}{'final'}) {
247 # check if sender specified no archiving should be honored
248 if ($listdef->{$name}{'check-no-archive'}) {
252 * ^(X-no-archive: yes|Restrict: no-external-archive)
259 # check if separating out cvs commits
260 if (($check_cvs = $listdef->{$name}{'cvs-commits'}[0]) &&
261 ($pm_conditions =~ /\S/)) {
262 my $cvs_prefix = $listdef->{$name}{'cvs-subject-prefix'}[0] ||
267 $pm_conditions\* ^Subject: $cvs_prefix
270 { CMDEXEC_=`mkdir -m 755 -p \$MBOXROOT/$name.CVS` }
274 MESGDATE=| $extract_date_prg -fmt '$time_fmt'
277 \$MBOXROOT/$name.CVS/\$MESGDATE
287 $pm_conditions\{$noarchive
291 { CMDEXC_=`mkdir -m 755 -p \$MBOXROOT/$name` }
295 MESGDATE=| $extract_date_prg -fmt '$time_fmt'
298 \$MBOXROOT/$name/\$MESGDATE
306 ##======================================================================
307 ## Send unmatched message to $catch_addr
308 ##======================================================================
317 } elsif (!$nocatch) {
319 ##======================================================================
321 ##======================================================================
326 { CMDEXC_=`mkdir -m 755 -p \$MBOXROOT/$catch_arc` }
330 MESGDATE=| $extract_date_prg -fmt '%Y-%m'
333 \$MBOXROOT/$catch_arc/\$MESGDATE
340 ##======================================================================
341 ## Final destination, generally discard since at least one of the
342 ## above rules will have matched. But if paranoid, or for debugging
343 ## you can have all message copied to a real mailbox by the -final-dest
345 ##======================================================================
350 if ($outfh != \
*STDOUT
) {
352 print $Vfh qq/Procmail rcfile written to "$out_file"\n/ if $Verbose;
357 ##--------------------------------------------------------------------------##
362 mk-procmailrc - Generate procmailrc from lists.def
367 mk-procmailrc [options]
371 This program generates the main procmailrc used by the
372 L<filter-spool|filter-spool> program. The procmailrc is generated
373 from C<E<lt>mharc-rootE<gt>/lib/lists.def>.
375 This program is typically invoked by calling C<make> from
376 the mharc root directory with configuration options specified
377 in C<E<lt>mharc-rootE<gt>/lib/config.sh>.
379 =head1 LIST DEFINITION FILE
381 The list definition file, C<E<lt>mharc-rootE<gt>/lib/lists.def>,
382 controls how the procmailrc used by L<filter-spool|filter-spool>
383 is generated. The format of the file is simple and more convenient
384 than writing the procmailrc file yourself.
386 The basic format of the file is as follows:
392 Any blank links or lines starting with a C<#> are ignored.
396 Lines with the following format:
398 Option-Name: Option-Value
404 =head2 lists.def Supported Options
410 Starts, and defines, the name of an archive. This name serves
411 as the directory name containing archive data and the list title
412 (the C<$LIST-TITLE$> MHonArc resource variable) for archive pages.
413 A common practice is to use the list address, but this is not required,
414 especially if the archive is a combination of multiple lists, or it
415 is prefered to use a more abstract name for simplicity.
417 The name also is used when the L<web-archive> script looks for an
418 archive specific MHonArc resource settings. If the following
419 resource file exists,
421 $MHA_RC_DIR/<name>.mrc
423 where C<$MHA_RC_DIR> is the value of the MHA_RC_DIR C<config.sh> variable
424 and C<E<lt>nameE<gt>> is the name of the archive, L<web-archive> will
425 pass the resource file to MHonArc when processing the HTML archive.
426 This provides a convenient way to provide list-specific customization
431 Mail address of the list.
432 Multiple C<Address> options can be specified
433 for an archive if a list has more than one known address (e.g. due
434 to migration) or the archive is a collection of multiple lists.
436 B<NOTE:> The address is technically treated as part of
437 a procmail regular expression. Take the following as an example:
439 Address: mharc-users@mhonarc.org
441 In regular expressions, the '.' character represents any character.
442 Therefore, the following strings would match the above specification:
444 mharc-users@mhonarc#org
445 mharc-users@mhonarcXorg
446 mharc-users@mhonarc@org
448 In practice, this technicality will generally have no affect, but if
449 you want to be pendantic, do the following:
451 Address: mharc-users@mhonarc\.org
453 The '\' tells procmail to treat the '.' literally.
455 Because the actual address string given is treated as part of a regular
456 expression, you can specify a range of addresses with a single option.
459 mharc-[^@]*@mhonarc\.org
461 will match the following addresses:
463 mharc-users@mhonarc.org
464 mharc-rules@mhonarc.org
465 mharc-rocks@mhonarc.org
466 mharc-is-the-best@mhonarc.org
471 Label to use for Name column of all-lists index. If not specified,
472 the name provided by the C<Name> option is used.
474 =item Check-No-Archive
476 Boolean option (C<0> or C<1>) if author specified archiving permission
477 is honored. The author can request no archiving should be done
478 by defining one of the the following header fields:
480 Restrict: no-external-archive
483 If C<Check-No-Archive> is enabled, a message to the list with either
484 field defined will not be archived.
488 Boolean option (C<0> or C<1>) if CVS commit messages should be
491 Use this option for development lists that have CVS project commits
492 mailed to the list, and you want to avoid cluttering regular discussion
495 =item CVS-Subject-Prefix
497 Specifies the C<Subject:> prefix denoting CVS commits to the list.
498 This option is only used if C<CVS-Commits> is specified.
502 Brief description of archive and serves as the main title of archive
507 Boolean option (C<0> or C<1>) if generated rule should be final.
508 I.e. If a message matches, further rules will not be examined.
510 Use this option to short-circuit messages from being stored in multiple
511 archives. For example, you may want to catch messages cross-posted
512 to a special address to only be archived in a special archive and
513 not any of the regular archives.
515 Another example is if you use the special C<Name> "C<.catch>"
516 (or the C<-catch-archive> setting described in L<"OPTIONS">).
517 Using "C<.catch>" is handy for C<Final> definitions to pre-catch
518 messages that should not be placed in any list archive.
522 Mail address of the list. This option is an alternative to C<Address>
523 where a list can only be donoted by the C<From: > field of messages.
524 This is fairly common for one-way lists, like newsletters, where
525 subscribers receive list messages but cannot post to the list.
527 Multiple C<From-Address> options can be specified if a list has more
528 than one known address (e.g. due to migration) or the archive is a
529 collection of multiple lists.
531 =item Hide-From-All-Lists
533 Boolean option (C<0> or C<1>) if archive should not be listed in
535 If set to C<1>, the archive will not be shown.
536 The default value for this option is C<0>.
540 Sets the language/locale of the archive.
542 B<CAUTION:> Only set this option if using a version of MHonArc
543 (v2.6.7, or later) that supports the LANG resource. Otherwise,
544 you will get a runtime error when L<web-archive|web-archive> is
547 See the LANG resource reference page of the MHonArc documentation
548 for more information.
550 =item MHonArc-Options
552 Additional MHonArc command-line options.
556 Boolean option (C<0> or C<1>) if links to raw archives should exist.
557 If set to C<1>, links will not be created or disabled. The
558 default value for this option is C<0>.
560 Use this option if your HTML archives have been customized to obscure
561 addresses to prevent address harvesting.
565 Boolean option (C<0> or C<1>) if searching should be disabled.
566 If set to C<1>, no search index is created and the C<$SEARCH-FORM$>
567 custom MHonArc resource variable will be set to the empty string.
569 B<NOTE:> Disable searching will diable some navigational features
570 that are dependent upon the search index.
574 If archive is a yearly or monthly archive. Legal values are C<year>
575 or C<month>. If Period is not defined, C<month> is the default.
577 =item Procmail-Condition
579 Additional condition to apply to base address check. The condition
580 must be legal procmailrc syntax and should include any prefixing C<*>,
581 C<!>, et. al. This option can be specified multiple times.
583 C<Procmail-Condition> can also be used inplace of C<Address> and
584 C<From-Address> to provide arbitrary matching rules for archiving
585 messages. This is useful for rare cases where messages to be archived
586 cannot be determined by receipient or from addresses.
588 B<CAUTION:> Exercise caution when using this option, especially if
589 C<CVS-Commits> is true. When C<CVS-commits> is true, an additionaly
590 rule already exists to check for the C<CVS-Subject-Prefix> setting.
594 =head2 lists.def Notes
600 Every archive defined must define at least one C<Address>, C<From-Address>,
601 or C<Procmail-Condition> option.
605 The order of archive definitions is mirrored in the generated procmail
606 rcfile. This is important to properly honor the sematics of archives
607 with the C<Final> option specified.
611 =head2 lists.def Example
613 ## In this definition, we define multiple addresses to check.
615 Description: MHonArc Users
616 Address: mhonarc-users@mhonarc.org
617 Address: mhonarc@ncsa.uiuc.edu
618 Address: mhonarc@rosat.mpe-garching.mpg.de
620 ## This definition defines a list that receives CVS commits and those
621 ## commits should be separated into a special archive as to not
622 ## pollute the discussion messages with cvs commit messages
624 Description: MHonArc Development
625 Address: mhonarc-dev@mhonarc.org
627 CVS-Subject-Prefix: CVS:
631 You should never have to invoke this program with any options since
632 C<config.sh> specifies any options used by this program. However,
633 for advanced uses, or you do not care if you mess things up, the
634 following options are available:
638 =item C<-catch-address> I<mail-address>
640 The name of the email address to forward all unmatched message to.
641 This is an alternative to C<-catch-archive>, and will supercede
642 C<-catch-archive>, if defined.
643 If this option is not specified, the C<CATCH_ADDRESS> variable in
644 C<config.sh> is used.
646 =item C<-catch-archive> I<name>
648 The name of the I<catch> archive. The I<catch> archive collects
649 all messages that do not match any list rules. If this
650 option is not specified, the C<CATCH_ARCHIVE> variable in
651 C<config.sh> is used, else the name "C<.catch>" is used.
653 B<Note:> If you use this option, it is recommended that the name
654 starts with a C<.> (a dot). This insures that no search index is built
655 and it will not be listed in the all-lists page.
657 =item C<-disable-catch-archive>
659 If specified, no I<catch> archive will be defined.
661 B<CAUTION:> Care should be used when using this option since any
662 message that does not match a normal rule will be lost.
664 =item C<-final-dest> I<mailbox>
666 The destination of messages that make it to the end of the procmailrc.
667 It is normal for messages to make it to the end since the list matching
668 rules create copies of the message during filtering. Hence, it is
669 normal to see "C<Folder: /dev/null>" destinations in the procmail log
670 and it does not indicate that a message was lost.
672 Message copying is done inorder to properly archive a message that has
673 been cross-posted to multiple lists. Message copying is not done for
674 archives with the C<Final> option set to 1, for CVS commit archives,
675 or for messages that are captured by the catch archive.
677 This option is generally only used for debugging purposes.
679 If C<-final-dest> is not specified, the
680 C<FINAL_MSG_DESTINATION> variable in C<config.sh> is used, else
681 C</dev/null> is used.
685 Print out usage information.
687 =item C<-home> I<pathname>
689 B<You should not use this option>.
691 Root pathname of software and archives. If not specified,
692 C<SW_ROOT> variable in C<config.sh> is used, else the parent directory
693 that containing this program is used.
697 Print out entire manpage.
699 =item C<-mbox-dir> I<pathname>
701 Root pathname containing raw mailbox archives. If not specified,
702 C<MBOX_DIR> variable in C<config.sh> is used, else C<I<-home>/mbox>
705 =item C<-msgid-cache-size> I<number-of-bytes>
707 The maximum size, in bytes, of the message-id cache. The message-id
708 cache helps avoid processing duplicate messages.
710 If this option is not specified, the C<MSGID_CACHE_SIZE> variable
711 in C<config.sh> is used, else 16384 will be used.
713 =item C<-out> I<pathname>
715 Output filename. If this option is not specified, the C<PROCMAILRC>
716 variable in C<config.sh> is used, else C<I<-home>/procmailrc.mharc>
719 If "-" is the I<pathname>, then the procmailrc will be dumped to
722 =item C<-procmail-path> I<pathname-list>
724 The search path for C<procmail> to use. If this option is not
725 specified, the C<PROCMAIL_PATH> variable in C<config.sh> is used.
733 =item C<E<lt>mharc-rootE<gt>/lib/lists.def>
735 Mailing lists definition file.
737 =item C<E<lt>mharc-rootE<gt>/lib/config.sh>
739 Main configuration file for mharc.
745 $Id: mk-procmailrc,v 1.25 2003/08/09 17:51:04 ehood Exp $
749 Earl Hood, earl@earlhood.com
751 This program is part of the mharc archiving system and comes with
752 ABSOLUTELY NO WARRANTY and may be copied only under the terms of
753 the GNU General Public License, which may be found in the mharc