exigrep: add POD and -h, -m
[exim.git] / src / src / exigrep.src
1 #! PERL_COMMAND
2
3 use warnings;
4 use strict;
5 use Pod::Usage;
6 BEGIN { pop @INC if $INC[-1] eq '.' };
7
8 # Copyright (c) 2007-2015 University of Cambridge.
9 # See the file NOTICE for conditions of use and distribution.
10
11 # Except when they appear in comments, the following placeholders in this
12 # source are replaced when it is turned into a runnable script:
13 #
14 # PERL_COMMAND
15 # ZCAT_COMMAND
16 # COMPRESS_SUFFIX
17
18 # PROCESSED_FLAG
19
20 # This is a perl script which extracts from an Exim log all entries
21 # for all messages that have an entry that matches a given pattern.
22 # If *any* entry for a particular message matches the pattern, *all*
23 # entries for that message are displayed.
24
25 # We buffer up information on a per-message basis. It is done this way rather
26 # than reading the input twice so that the input can be a pipe.
27
28 # There must be one argument, which is the pattern. Subsequent arguments
29 # are the files to scan; if none, the standard input is read. If any file
30 # appears to be compressed, it is passed through zcat. We can't just do this
31 # for all files, because zcat chokes on non-compressed files.
32
33 # Performance optimized in 02/02/2007 by Jori Hamalainen
34 # Typical run time acceleration: 4 times
35
36
37 use Getopt::Std qw(getopts);
38 use POSIX qw(mktime);
39
40
41 # This subroutine converts a time/date string from an Exim log line into
42 # the number of seconds since the epoch. It handles optional timezone
43 # information.
44
45 sub seconds {
46 my($year,$month,$day,$hour,$min,$sec,$tzs,$tzh,$tzm) =
47 $_[0] =~ /^(\d{4})-(\d\d)-(\d\d)\s(\d\d):(\d\d):(\d\d)(?>\s([+-])(\d\d)(\d\d))?/o;
48
49 my $seconds = mktime $sec, $min, $hour, $day, $month - 1, $year - 1900;
50
51 if (defined $tzs)
52 {
53 $seconds -= $tzh * 3600 + $tzm * 60 if $tzs eq "+";
54 $seconds += $tzh * 3600 + $tzm * 60 if $tzs eq "-";
55 }
56
57 return $seconds;
58 }
59
60
61 # This subroutine processes a single line (in $_) from a log file. Program
62 # defensively against short lines finding their way into the log.
63
64 my (%saved, %id_list, $pattern, $queue_time, $insensitive, $invert);
65
66 # If using "related" option, have to track extra message IDs
67 my $related;
68 my $related_re='';
69 my @Mids = ();
70
71 sub do_line {
72
73 # Convert syslog lines to mainlog format, as in eximstats.
74
75 if (!/^\d{4}-/o) { $_ =~ s/^.*? exim\b.*?: //o; }
76
77 return unless
78 my($date,$id) = /^(\d{4}-\d\d-\d\d \d\d:\d\d:\d\d (?:[+-]\d{4} )?)(?:\[\d+\] )?(\w{6}\-\w{6}\-\w{2})?/o;
79
80 # Handle the case when the log line belongs to a specific message. We save
81 # lines for specific messages until the message is complete. Then either print
82 # or discard.
83
84 if (defined $id)
85 {
86 $saved{$id} = '' unless defined($saved{$id});
87
88 # Save up the data for this message in case it becomes interesting later.
89
90 $saved{$id} .= $_;
91
92 # Are we interested in this id ? Short circuit if we already were interested.
93
94 if ($invert)
95 {
96 $id_list{$id} = 1 if (!defined($id_list{$id}));
97 $id_list{$id} = 0 if (($insensitive && /$pattern/io) || /$pattern/o);
98 }
99 else
100 {
101 if (defined $id_list{$id} ||
102 ($insensitive && /$pattern/io) || /$pattern/o)
103 {
104 $id_list{$id} = 1;
105 get_related_ids($id) if $related;
106 }
107 elsif ($related && $related_re)
108 {
109 grep_for_related($_, $id);
110 }
111 }
112
113 # See if this is a completion for some message. If it is interesting,
114 # print it, but in any event, throw away what was saved.
115
116 if (index($_, 'Completed') != -1 ||
117 index($_, 'SMTP data timeout') != -1 ||
118 (index($_, 'rejected') != -1 &&
119 /^(\d{4}-\d\d-\d\d \d\d:\d\d:\d\d (?:[+-]\d{4} )?)(?:\[\d+\] )?\w{6}\-\w{6}\-\w{2} rejected/o))
120 {
121 if ($queue_time != -1 &&
122 $saved{$id} =~ /^(\d{4}-\d\d-\d\d \d\d:\d\d:\d\d ([+-]\d{4} )?)/o)
123 {
124 my $old_sec = &seconds($1);
125 my $sec = &seconds($date);
126 $id_list{$id} = 0 if $id_list{$id} && $sec - $old_sec <= $queue_time;
127 }
128
129 print "$saved{$id}\n" if ($id_list{$id});
130 delete $id_list{$id};
131 delete $saved{$id};
132 }
133 }
134
135 # Handle the case where the log line does not belong to a specific message.
136 # Print it if it is interesting.
137
138 elsif ( ($invert && (($insensitive && !/$pattern/io) || !/$pattern/o)) ||
139 (!$invert && (($insensitive && /$pattern/io) || /$pattern/o)) )
140 { print "$_\n"; }
141 }
142
143 # Rotated log files are frequently compressed and there are a variety of
144 # formats it could be compressed with. Rather than use just one that is
145 # detected and hardcoded at Exim compile time, detect and use what the
146 # logfile is compressed with on the fly.
147 #
148 # List of known compression extensions and their associated commands:
149 my $compressors = {
150 gz => { cmd => 'zcat', args => '' },
151 bz2 => { cmd => 'bzcat', args => '' },
152 xz => { cmd => 'xzcat', args => '' },
153 lzma => { cmd => 'lzma', args => '-dc' }
154 };
155 my $csearch = 0;
156
157 sub detect_compressor_bin
158 {
159 my $ext = shift();
160 my $c = $compressors->{$ext}->{cmd};
161 $compressors->{$ext}->{bin} = `which $c 2>/dev/null`;
162 chomp($compressors->{$ext}->{bin});
163 }
164
165 sub detect_compressor_capable
166 {
167 my $filename = shift();
168 map { &detect_compressor_bin($_) } keys %$compressors
169 if (!$csearch);
170 $csearch = 1;
171 return undef
172 unless (grep {$filename =~ /\.(?:$_)$/} keys %$compressors);
173 # Loop through them, figure out which one it detected,
174 # and build the commandline.
175 my $cmdline = undef;
176 foreach my $ext (keys %$compressors)
177 {
178 if ($filename =~ /\.(?:$ext)$/)
179 {
180 # Just die if compressor not found; if this occurs in the middle of
181 # two valid files with a lot of matches, error could easily be missed.
182 die("Didn't find $ext decompressor for $filename\n")
183 if ($compressors->{$ext}->{bin} eq '');
184 $cmdline = $compressors->{$ext}->{bin} ." ".
185 $compressors->{$ext}->{args};
186 last;
187 }
188 }
189 return $cmdline;
190 }
191
192 sub grep_for_related {
193 my ($line,$id) = @_;
194 $id_list{$id} = 1 if $line =~ m/$related_re/;
195 }
196
197 sub get_related_ids {
198 my ($id) = @_;
199 push @Mids, $id unless grep /\b$id\b/, @Mids;
200 my $re = join '|', @Mids;
201 $related_re = qr/$re/;
202 }
203
204 # The main program. Extract the pattern and make sure any relevant characters
205 # are quoted if the -l flag is given. The -t flag gives a time-on-queue value
206 # which is an additional condition. The -M flag will also display "related"
207 # loglines (msgid from matched lines is searched in following lines).
208
209 getopts('Ilvt:Mhm',\my %args);
210 $queue_time = $args{'t'}? $args{'t'} : -1;
211 $insensitive = $args{'I'}? 0 : 1;
212 $invert = $args{'v'}? 1 : 0;
213 $related = $args{'M'}? 1 : 0;
214
215 pod2usage(-exit => 0, -verbose => 1) if $args{'h'};
216 pod2usage(-exit => 0, -verbose => 2, -noperldoc => system('perldoc -V 2>/dev/null >/dev/null'))
217 if $args{'m'};
218 pod2usage if not @ARGV;
219
220 $pattern = shift @ARGV;
221 $pattern = quotemeta $pattern if $args{l};
222
223
224 # If file arguments are given, open each one and process according as it is
225 # is compressed or not.
226
227 if (@ARGV)
228 {
229 foreach (@ARGV)
230 {
231 my $filename = $_;
232 if (-x 'ZCAT_COMMAND' && $filename =~ /\.(?:COMPRESS_SUFFIX)$/o)
233 {
234 open(LOG, "ZCAT_COMMAND $filename |") ||
235 die "Unable to zcat $filename: $!\n";
236 }
237 elsif (my $cmdline = &detect_compressor_capable($filename))
238 {
239 open(LOG, "$cmdline $filename |") ||
240 die "Unable to decompress $filename: $!\n";
241 }
242 else
243 {
244 open(LOG, "<$filename") || die "Unable to open $filename: $!\n";
245 }
246 do_line() while (<LOG>);
247 close(LOG);
248 }
249 }
250
251 # If no files are named, process STDIN only
252
253 else { do_line() while (<STDIN>); }
254
255 # At the end of processing all the input, print any uncompleted messages.
256
257 for (keys %id_list)
258 {
259 print "+++ $_ has not completed +++\n$saved{$_}\n";
260 }
261
262 __END__
263
264 =head1 NAME
265
266 exigrep - search Exim's main log
267
268 =head1 SYNOPSIS
269
270 B<exigrep> [options] pattern [log] ...
271
272 =head1 DESCRIPTION
273
274 The B<exigrep> utility is a Perl script that searches one or more main log
275 files for entries that match a given pattern. When it finds a match,
276 it extracts all the log entries for the relevant message, not just
277 those that match the pattern. Thus, B<exigrep> can extract complete log
278 entries for a given message, or all mail for a given user, or for a
279 given host, for example.
280
281 If no file names are given on the command line, the standard input is read.
282
283 For known file extensions indicating compression (F<.gz>, F<.bz2>, F<.xz>, and F<.lzma>)
284 a suitable de-compressor is used, if available.
285
286 =head1 OPTIONS
287
288 =over
289
290 =item B<-l>
291
292 This means 'literal', that is, treat all characters in the
293 pattern as standing for themselves. Otherwise the pattern must be a
294 Perl regular expression. The pattern match is case-insensitive.
295
296 =item B<-t> I<seconds>
297
298 Limit the output to messages that spent at least I<seconds> in the
299 queue.
300
301 =item B<-I>
302
303 Do a case sensitive search.
304
305 =item B<-v>
306
307 Invert the meaning of the search pattern. That is, print message log
308 entries that are not related to that pattern.
309
310 =item B<-M>
311
312 Search for related messages too.
313
314 =item B<-h>
315
316 Print a short reference help. For more detailed help try L<exigrep(8)>,
317 or C<exigrep -m>.
318
319 =item B<-m>
320
321 Print this manual page of B<exigrep>.
322
323 =back
324
325 =head1 SEE ALSO
326
327 L<exim(8)>, L<perlre(1)>, L<Exim|http://exim.org/>
328
329 =head1 AUTHOR
330
331 This manual page was stitched together from spec.txt by Andreas Metzler L<ametzler at downhill.at.eu.org>
332 and updated by Heiko Schlittermann L<hs@schlittermann.de>.
333
334 =cut