gitignore some irrelevant dirs
[mharc.git] / bin / mk-procmailrc
CommitLineData
2ea8f66b
IK
1#!/usr/bin/perl
2##--------------------------------------------------------------------------##
3## File:
4## $Id: mk-procmailrc,v 1.25 2003/08/09 17:51:04 ehood Exp $
5## Description:
6## Program to create a procmail recipe file from lists.def.
7## Run script with '-man' option to view manpage for this program.
8##--------------------------------------------------------------------------##
9## Copyright (C) 2001-2002 Earl Hood <earl@earlhood.com>
10##
11## This program is free software; you can redistribute it and/or modify
12## it under the terms of the GNU General Public License as published by
13## the Free Software Foundation; either version 2 of the License, or
14## (at your option) any later version.
15##
16## This program is distributed in the hope that it will be useful,
17## but WITHOUT ANY WARRANTY; without even the implied warranty of
18## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19## GNU General Public License for more details.
20##
21## You should have received a copy of the GNU General Public License
22## along with this program; if not, write to the Free Software
23## Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
24## 02111-1307, USA
25##--------------------------------------------------------------------------##
26
27package MHArc::mk_procmailrc;
28
29##--------------------------------------------------------------------------##
30# <x-boot-strap>
31BEGIN { die qq/CGI use FORBIDDEN!\n/ if (defined($ENV{'GATEWAY_INTERFACE'})); }
32my $Dir; BEGIN { $Dir = `dirname $0`; chomp $Dir; }
33use lib "$Dir/../lib"; # Add relative lib to search path
34# </x-boot-strap>
35##--------------------------------------------------------------------------##
36# <x-config>
37use MHArc::Config;
38my $config = MHArc::Config->load("$Dir/../lib/config.sh");
39# </x-config>
40##--------------------------------------------------------------------------##
41
42
43use Getopt::Long;
44use MHArc::ListDef;
45use MHArc::Util qw( usage );
46
47my $Verbose = 0;
48my $Vfh = \*STDOUT;
49
50MAIN: {
51 my @htaccess = ();
52 my %opt = ( );
53 my $clstatus = GetOptions(\%opt,
54 'catch-address=s',
55 'catch-archive=s',
56 'disable-catch-archive!',
57 'final-dest=s',
58 'home=s',
59 'mbox-dir=s',
60 'msgid-cache-size=i',
61 'out=s',
62 'procmail-path=s',
63
64 'verbose!',
65 'help',
66 'man'
67 );
68 usage(0) unless $clstatus;
69 usage(1) if $opt{'help'};
70 usage(2) if $opt{'man'};
71
72 $Verbose = $opt{'verbose'};
73
74 my $basedir = $opt{'home'} ||
75 $config->{'SW_ROOT'} ||
76 "$Dir/..";
77 my $mbox_dir = $opt{'mbox-dir'} ||
78 $config->{'MBOX_DIR'} ||
79 join('/',$basedir,'mbox');
80 my $out_file = $opt{'out'} ||
81 $config->{'PROCMAILRC'} ||
82 join('/', $basedir, 'procmailrc.mharc');
83 my $procmail_path = $opt{'procmail-path'} ||
84 $config->{'PROCMAIL_PATH'};
85 my $cache_size = $opt{'msgid-cache-size'} ||
86 $config->{'MSGID_CACHE_SIZE'} ||
87 16384;
88 my $catch_addr = $opt{'catch-address'} ||
89 $config->{'CATCH_ADDRESS'} ||
90 "";
91 my $catch_arc = $opt{'catch-archive'} ||
92 $config->{'CATCH_ARCHIVE'} ||
93 '.catch';
94 my $nocatch = defined($opt{'disable-catch-archive'}) ?
95 $opt{'disable-catch-archive'} :
96 ($config->{'DISABLE_CATCH_ARCHIVE'} || 0);
97 my $final_dest = $opt{'final-dest'} ||
98 $config->{'FINAL_MSG_DESTINATION'} ||
99 '/dev/null';
100
101 ## Read lists definition file
102 my $file = shift(@ARGV) ||
103 $config->{'LISTS_DEF_FILE'} ||
104 "$basedir/lib/lists.def";
105 my $listdef = MHArc::ListDef->new($file);
106
107 my $extract_date_prg = join('/', $basedir, 'bin', 'extract-mesg-date');
108
109 local(*OUTFILE);
110 my $outfh;
111 if (!defined($out_file) || ($out_file eq "") || ($out_file eq '-')) {
112 $outfh = \*STDOUT;
113 $Vfh = \*STDERR;
114 } else {
115 open(OUTFILE, ">$out_file") ||
116 die qq/ERROR: Unable to create "$out_file": $!\n/;
117 $outfh = \*OUTFILE;
118 $Vfh = \*STDOUT;;
119 }
120
121 ## Print procmailrc header
122 print $outfh <<EOT;
123##======================================================================
124##!!!!!!!!!!!!!!!!!!!!!! AUTO-CREATED, DO NOT EDIT !!!!!!!!!!!!!!!!!!!!!
125##======================================================================
126## Procmail resource file for mharc archives generated from
127## $file
128## by $0
129##======================================================================
130
131SHELL=/bin/sh
132LINEBUF=4096
133UMASK=133
134PATH=$procmail_path
135BASEDIR=$basedir
136
137## Logging disabled as it grows fast and isn't very useful
138LOGFILE=/dev/null
139#LOGFILE=\$BASEDIR/log/procmail.log
140
141## Do alot of logging?
142#VERBOSE=yes
143
144## Should deliveries be logged?
145LOGABSTRACT=yes
146
147## Root path to mail folders
148MBOXROOT=$mbox_dir
149
150## Flag if a list was matched
151HAVEMATCH=no
152
153## Date of message
154MESGDATE=
155
156##======================================================================
157## Start Rules
158##======================================================================
159
160## Avoid duplicate messages
161:0 Wh: \$BASEDIR/msgid.lock
162| formail -D $cache_size \$BASEDIR/msgid.cache
163
164EOT
165
166 ## Print recipies
167 my ($name,
168 $check_cvs,
169 $addr,
170 $period,
171 $noarchive,
172 $pm_conditions,
173 $pm_copy,
174 $time_fmt);
175 my (@addr);
176 my (@from_addr);
177 my ($str);
178
179 foreach $name ($listdef->get_names) {
180 print $Vfh "Generating rule for $name...\n" if $Verbose;
181 @addr = ( );
182 @from_addr = ( );
183
184 if (defined($listdef->{$name}{'address'})) {
185 @addr = @{$listdef->{$name}{'address'}};
186 }
187 if (defined($listdef->{$name}{'from-address'})) {
188 @from_addr = @{$listdef->{$name}{'from-address'}};
189 }
190
191 if (!scalar(@addr) && !scalar(@from_addr) &&
192 !defined($listdef->{$name}{'procmail-condition'})) {
193 # no addresses defined
194 warn qq/Warning: No addresses or conditions defined for '$name'\n/;
195 next;
196 }
197
198 $pm_conditions = '';
199
200 # create procmail regex for list
201 if (scalar(@addr) || scalar(@from_addr)) {
202 $pm_conditions .= '* (';
203 if (scalar(@addr)) {
204 $pm_conditions .= '^TO_';
205 $pm_conditions .= '(' if (scalar(@addr) > 1);
206 $pm_conditions .= join('|', @addr);
207 $pm_conditions .= ')' if (scalar(@addr) > 1);
208 $pm_conditions .= '|(^(Delivered-To:|List-Post:).*';
209 $pm_conditions .= '(' if (scalar(@addr) > 1);
210 $pm_conditions .= join('|', @addr);
211 $pm_conditions .= ')' if (scalar(@addr) > 1);
212 $pm_conditions .= ')';
213 }
214 if (scalar(@from_addr)) {
215 $pm_conditions .= '|(' if (scalar(@addr));
216 $pm_conditions .= '^From:(.*[^-a-zA-Z0-9_.])?';
217 $pm_conditions .= '(' if (scalar(@from_addr) > 1);
218 $pm_conditions .= join('|', @from_addr);
219 $pm_conditions .= ')' if (scalar(@from_addr) > 1);
220 $pm_conditions .= ')' if (scalar(@addr));
221 }
222 $pm_conditions .= ')' . "\n";
223 }
224
225 if (defined($listdef->{$name}{'procmail-condition'})) {
226 foreach $str (@{$listdef->{$name}{'procmail-condition'}}) {
227 $pm_conditions .= "$str\n";
228 }
229 }
230
231 # check if doing monthly or yearly archives
232 $period = lc($listdef->{$name}{'period'}[0]) || 'month';
233 $period = 'month' if ($name eq $catch_arc);
234 if ($period eq 'year') {
235 $time_fmt = '%Y';
236 } else {
237 $time_fmt = '%Y-%m';
238 }
239
240 # check if rule should not be final if matched
241 if ($listdef->{$name}{'final'}) {
242 $pm_copy = '';
243 } else {
244 $pm_copy = ' c';
245 }
246
247 # check if sender specified no archiving should be honored
248 if ($listdef->{$name}{'check-no-archive'}) {
249 $noarchive =<<'EOT';
250
251 :0
252 * ^(X-no-archive: yes|Restrict: no-external-archive)
253 /dev/null
254EOT
255 } else {
256 $noarchive = "";
257 }
258
259 # check if separating out cvs commits
260 if (($check_cvs = $listdef->{$name}{'cvs-commits'}[0]) &&
261 ($pm_conditions =~ /\S/)) {
262 my $cvs_prefix = $listdef->{$name}{'cvs-subject-prefix'}[0] ||
263 'CVS commit';
264 print $outfh <<EOT;
265## $name (CVS)
266:0
267$pm_conditions\* ^Subject: $cvs_prefix
268{$noarchive
269 :0
270 { CMDEXEC_=`mkdir -m 755 -p \$MBOXROOT/$name.CVS` }
271
272 :0 Wih
273 * MESGDATE ?? ^^^^
274 MESGDATE=| $extract_date_prg -fmt '$time_fmt'
275
276 :0:
277 \$MBOXROOT/$name.CVS/\$MESGDATE
278}
279
280EOT
281 } # End: $check_cvs
282
283 # address recipe
284 print $outfh <<EOT;
285## $name
286:0
287$pm_conditions\{$noarchive
288 HAVEMATCH=yes
289
290 :0
291 { CMDEXC_=`mkdir -m 755 -p \$MBOXROOT/$name` }
292
293 :0 Wih
294 * MESGDATE ?? ^^^^
295 MESGDATE=| $extract_date_prg -fmt '$time_fmt'
296
297 :0$pm_copy:
298 \$MBOXROOT/$name/\$MESGDATE
299}
300
301EOT
302 }
303
304 if ($catch_addr) {
305 print $outfh <<EOT;
306##======================================================================
307## Send unmatched message to $catch_addr
308##======================================================================
309:0
310* HAVEMATCH ?? no
311{
312 :0
313 ! $catch_addr
314}
315EOT
316
317 } elsif (!$nocatch) {
318 print $outfh <<EOT;
319##======================================================================
320## No Matches
321##======================================================================
322:0
323* HAVEMATCH ?? no
324{
325 :0
326 { CMDEXC_=`mkdir -m 755 -p \$MBOXROOT/$catch_arc` }
327
328 :0 Wih
329 * MESGDATE ?? ^^^^
330 MESGDATE=| $extract_date_prg -fmt '%Y-%m'
331
332 :0
333 \$MBOXROOT/$catch_arc/\$MESGDATE
334}
335
336EOT
337 }
338
339 print $outfh <<EOT;
340##======================================================================
341## Final destination, generally discard since at least one of the
342## above rules will have matched. But if paranoid, or for debugging
343## you can have all message copied to a real mailbox by the -final-dest
344## option.
345##======================================================================
346:0
347$final_dest
348EOT
349
350 if ($outfh != \*STDOUT) {
351 close($outfh);
352 print $Vfh qq/Procmail rcfile written to "$out_file"\n/ if $Verbose;
353 }
354
355} # End: MAIN
356
357##--------------------------------------------------------------------------##
358__END__
359
360=head1 NAME
361
362mk-procmailrc - Generate procmailrc from lists.def
363
364=head1 SYNOPSIS
365
366 mk-procmailrc
367 mk-procmailrc [options]
368
369=head1 DESCRIPTION
370
371This program generates the main procmailrc used by the
372L<filter-spool|filter-spool> program. The procmailrc is generated
373from C<E<lt>mharc-rootE<gt>/lib/lists.def>.
374
375This program is typically invoked by calling C<make> from
376the mharc root directory with configuration options specified
377in C<E<lt>mharc-rootE<gt>/lib/config.sh>.
378
379=head1 LIST DEFINITION FILE
380
381The list definition file, C<E<lt>mharc-rootE<gt>/lib/lists.def>,
382controls how the procmailrc used by L<filter-spool|filter-spool>
383is generated. The format of the file is simple and more convenient
384than writing the procmailrc file yourself.
385
386The basic format of the file is as follows:
387
388=over
389
390=item *
391
392Any blank links or lines starting with a C<#> are ignored.
393
394=item *
395
396Lines with the following format:
397
398 Option-Name: Option-Value
399
400is an option.
401
402=back
403
404=head2 lists.def Supported Options
405
406=over
407
408=item Name
409
410Starts, and defines, the name of an archive. This name serves
411as the directory name containing archive data and the list title
412(the C<$LIST-TITLE$> MHonArc resource variable) for archive pages.
413A common practice is to use the list address, but this is not required,
414especially if the archive is a combination of multiple lists, or it
415is prefered to use a more abstract name for simplicity.
416
417The name also is used when the L<web-archive> script looks for an
418archive specific MHonArc resource settings. If the following
419resource file exists,
420
421 $MHA_RC_DIR/<name>.mrc
422
423where C<$MHA_RC_DIR> is the value of the MHA_RC_DIR C<config.sh> variable
424and C<E<lt>nameE<gt>> is the name of the archive, L<web-archive> will
425pass the resource file to MHonArc when processing the HTML archive.
426This provides a convenient way to provide list-specific customization
427to the archive.
428
429=item Address
430
431Mail address of the list.
432Multiple C<Address> options can be specified
433for an archive if a list has more than one known address (e.g. due
434to migration) or the archive is a collection of multiple lists.
435
436B<NOTE:> The address is technically treated as part of
437a procmail regular expression. Take the following as an example:
438
439 Address: mharc-users@mhonarc.org
440
441In regular expressions, the '.' character represents any character.
442Therefore, the following strings would match the above specification:
443
444 mharc-users@mhonarc#org
445 mharc-users@mhonarcXorg
446 mharc-users@mhonarc@org
447
448In practice, this technicality will generally have no affect, but if
449you want to be pendantic, do the following:
450
451 Address: mharc-users@mhonarc\.org
452
453The '\' tells procmail to treat the '.' literally.
454
455Because the actual address string given is treated as part of a regular
456expression, you can specify a range of addresses with a single option.
457For example,
458
459 mharc-[^@]*@mhonarc\.org
460
461will match the following addresses:
462
463 mharc-users@mhonarc.org
464 mharc-rules@mhonarc.org
465 mharc-rocks@mhonarc.org
466 mharc-is-the-best@mhonarc.org
467 ...
468
469=item All-Lists-Name
470
471Label to use for Name column of all-lists index. If not specified,
472the name provided by the C<Name> option is used.
473
474=item Check-No-Archive
475
476Boolean option (C<0> or C<1>) if author specified archiving permission
477is honored. The author can request no archiving should be done
478by defining one of the the following header fields:
479
480 Restrict: no-external-archive
481 X-no-archive: yes
482
483If C<Check-No-Archive> is enabled, a message to the list with either
484field defined will not be archived.
485
486=item CVS-Commits
487
488Boolean option (C<0> or C<1>) if CVS commit messages should be
489archived separately.
490
491Use this option for development lists that have CVS project commits
492mailed to the list, and you want to avoid cluttering regular discussion
493mail.
494
495=item CVS-Subject-Prefix
496
497Specifies the C<Subject:> prefix denoting CVS commits to the list.
498This option is only used if C<CVS-Commits> is specified.
499
500=item Description
501
502Brief description of archive and serves as the main title of archive
503index pages.
504
505=item Final
506
507Boolean option (C<0> or C<1>) if generated rule should be final.
508I.e. If a message matches, further rules will not be examined.
509
510Use this option to short-circuit messages from being stored in multiple
511archives. For example, you may want to catch messages cross-posted
512to a special address to only be archived in a special archive and
513not any of the regular archives.
514
515Another example is if you use the special C<Name> "C<.catch>"
516(or the C<-catch-archive> setting described in L<"OPTIONS">).
517Using "C<.catch>" is handy for C<Final> definitions to pre-catch
518messages that should not be placed in any list archive.
519
520=item From-Address
521
522Mail address of the list. This option is an alternative to C<Address>
523where a list can only be donoted by the C<From: > field of messages.
524This is fairly common for one-way lists, like newsletters, where
525subscribers receive list messages but cannot post to the list.
526
527Multiple C<From-Address> options can be specified if a list has more
528than one known address (e.g. due to migration) or the archive is a
529collection of multiple lists.
530
531=item Hide-From-All-Lists
532
533Boolean option (C<0> or C<1>) if archive should not be listed in
534the all-lists index.
535If set to C<1>, the archive will not be shown.
536The default value for this option is C<0>.
537
538=item Lang
539
540Sets the language/locale of the archive.
541
542B<CAUTION:> Only set this option if using a version of MHonArc
543(v2.6.7, or later) that supports the LANG resource. Otherwise,
544you will get a runtime error when L<web-archive|web-archive> is
545invoked.
546
547See the LANG resource reference page of the MHonArc documentation
548for more information.
549
550=item MHonArc-Options
551
552Additional MHonArc command-line options.
553
554=item No-Raw-Link
555
556Boolean option (C<0> or C<1>) if links to raw archives should exist.
557If set to C<1>, links will not be created or disabled. The
558default value for this option is C<0>.
559
560Use this option if your HTML archives have been customized to obscure
561addresses to prevent address harvesting.
562
563=item No-Search
564
565Boolean option (C<0> or C<1>) if searching should be disabled.
566If set to C<1>, no search index is created and the C<$SEARCH-FORM$>
567custom MHonArc resource variable will be set to the empty string.
568
569B<NOTE:> Disable searching will diable some navigational features
570that are dependent upon the search index.
571
572=item Period
573
574If archive is a yearly or monthly archive. Legal values are C<year>
575or C<month>. If Period is not defined, C<month> is the default.
576
577=item Procmail-Condition
578
579Additional condition to apply to base address check. The condition
580must be legal procmailrc syntax and should include any prefixing C<*>,
581C<!>, et. al. This option can be specified multiple times.
582
583C<Procmail-Condition> can also be used inplace of C<Address> and
584C<From-Address> to provide arbitrary matching rules for archiving
585messages. This is useful for rare cases where messages to be archived
586cannot be determined by receipient or from addresses.
587
588B<CAUTION:> Exercise caution when using this option, especially if
589C<CVS-Commits> is true. When C<CVS-commits> is true, an additionaly
590rule already exists to check for the C<CVS-Subject-Prefix> setting.
591
592=back
593
594=head2 lists.def Notes
595
596=over
597
598=item *
599
600Every archive defined must define at least one C<Address>, C<From-Address>,
601or C<Procmail-Condition> option.
602
603=item *
604
605The order of archive definitions is mirrored in the generated procmail
606rcfile. This is important to properly honor the sematics of archives
607with the C<Final> option specified.
608
609=back
610
611=head2 lists.def Example
612
613 ## In this definition, we define multiple addresses to check.
614 Name: mhonarc-users
615 Description: MHonArc Users
616 Address: mhonarc-users@mhonarc.org
617 Address: mhonarc@ncsa.uiuc.edu
618 Address: mhonarc@rosat.mpe-garching.mpg.de
619
620 ## This definition defines a list that receives CVS commits and those
621 ## commits should be separated into a special archive as to not
622 ## pollute the discussion messages with cvs commit messages
623 Name: mhonarc-dev
624 Description: MHonArc Development
625 Address: mhonarc-dev@mhonarc.org
626 CVS-Commits: 1
627 CVS-Subject-Prefix: CVS:
628
629=head1 OPTIONS
630
631You should never have to invoke this program with any options since
632C<config.sh> specifies any options used by this program. However,
633for advanced uses, or you do not care if you mess things up, the
634following options are available:
635
636=over
637
638=item C<-catch-address> I<mail-address>
639
640The name of the email address to forward all unmatched message to.
641This is an alternative to C<-catch-archive>, and will supercede
642C<-catch-archive>, if defined.
643If this option is not specified, the C<CATCH_ADDRESS> variable in
644C<config.sh> is used.
645
646=item C<-catch-archive> I<name>
647
648The name of the I<catch> archive. The I<catch> archive collects
649all messages that do not match any list rules. If this
650option is not specified, the C<CATCH_ARCHIVE> variable in
651C<config.sh> is used, else the name "C<.catch>" is used.
652
653B<Note:> If you use this option, it is recommended that the name
654starts with a C<.> (a dot). This insures that no search index is built
655and it will not be listed in the all-lists page.
656
657=item C<-disable-catch-archive>
658
659If specified, no I<catch> archive will be defined.
660
661B<CAUTION:> Care should be used when using this option since any
662message that does not match a normal rule will be lost.
663
664=item C<-final-dest> I<mailbox>
665
666The destination of messages that make it to the end of the procmailrc.
667It is normal for messages to make it to the end since the list matching
668rules create copies of the message during filtering. Hence, it is
669normal to see "C<Folder: /dev/null>" destinations in the procmail log
670and it does not indicate that a message was lost.
671
672Message copying is done inorder to properly archive a message that has
673been cross-posted to multiple lists. Message copying is not done for
674archives with the C<Final> option set to 1, for CVS commit archives,
675or for messages that are captured by the catch archive.
676
677This option is generally only used for debugging purposes.
678
679If C<-final-dest> is not specified, the
680C<FINAL_MSG_DESTINATION> variable in C<config.sh> is used, else
681C</dev/null> is used.
682
683=item C<-help>
684
685Print out usage information.
686
687=item C<-home> I<pathname>
688
689B<You should not use this option>.
690
691Root pathname of software and archives. If not specified,
692C<SW_ROOT> variable in C<config.sh> is used, else the parent directory
693that containing this program is used.
694
695=item C<-man>
696
697Print out entire manpage.
698
699=item C<-mbox-dir> I<pathname>
700
701Root pathname containing raw mailbox archives. If not specified,
702C<MBOX_DIR> variable in C<config.sh> is used, else C<I<-home>/mbox>
703is used.
704
705=item C<-msgid-cache-size> I<number-of-bytes>
706
707The maximum size, in bytes, of the message-id cache. The message-id
708cache helps avoid processing duplicate messages.
709
710If this option is not specified, the C<MSGID_CACHE_SIZE> variable
711in C<config.sh> is used, else 16384 will be used.
712
713=item C<-out> I<pathname>
714
715Output filename. If this option is not specified, the C<PROCMAILRC>
716variable in C<config.sh> is used, else C<I<-home>/procmailrc.mharc>
717is used.
718
719If "-" is the I<pathname>, then the procmailrc will be dumped to
720standard out.
721
722=item C<-procmail-path> I<pathname-list>
723
724The search path for C<procmail> to use. If this option is not
725specified, the C<PROCMAIL_PATH> variable in C<config.sh> is used.
726
727=back
728
729=head1 FILES
730
731=over
732
733=item C<E<lt>mharc-rootE<gt>/lib/lists.def>
734
735Mailing lists definition file.
736
737=item C<E<lt>mharc-rootE<gt>/lib/config.sh>
738
739Main configuration file for mharc.
740
741=back
742
743=head1 VERSION
744
745$Id: mk-procmailrc,v 1.25 2003/08/09 17:51:04 ehood Exp $
746
747=head1 AUTHOR
748
749Earl Hood, earl@earlhood.com
750
751This program is part of the mharc archiving system and comes with
752ABSOLUTELY NO WARRANTY and may be copied only under the terms of
753the GNU General Public License, which may be found in the mharc
754distribution.
755
756=cut
757