abc6f3a31323d1e7d2e7906ee704999818b4ca10
[exim.git] / src / src / exigrep.src
1 #! PERL_COMMAND
2
3 use warnings;
4 use strict;
5 use Pod::Usage;
6 BEGIN { pop @INC if $INC[-1] eq '.' };
7
8 # Copyright (c) 2007-2015 University of Cambridge.
9 # See the file NOTICE for conditions of use and distribution.
10
11 # Except when they appear in comments, the following placeholders in this
12 # source are replaced when it is turned into a runnable script:
13 #
14 # PERL_COMMAND
15 # ZCAT_COMMAND
16 # COMPRESS_SUFFIX
17
18 # PROCESSED_FLAG
19
20 # This is a perl script which extracts from an Exim log all entries
21 # for all messages that have an entry that matches a given pattern.
22 # If *any* entry for a particular message matches the pattern, *all*
23 # entries for that message are displayed.
24
25 # We buffer up information on a per-message basis. It is done this way rather
26 # than reading the input twice so that the input can be a pipe.
27
28 # There must be one argument, which is the pattern. Subsequent arguments
29 # are the files to scan; if none, the standard input is read. If any file
30 # appears to be compressed, it is passed through zcat. We can't just do this
31 # for all files, because zcat chokes on non-compressed files.
32
33 # Performance optimized in 02/02/2007 by Jori Hamalainen
34 # Typical run time acceleration: 4 times
35
36
37 use Getopt::Std qw(getopts);
38 use POSIX qw(mktime);
39
40
41 # This subroutine converts a time/date string from an Exim log line into
42 # the number of seconds since the epoch. It handles optional timezone
43 # information.
44
45 sub seconds {
46 my($year,$month,$day,$hour,$min,$sec,$tzs,$tzh,$tzm) =
47 $_[0] =~ /^(\d{4})-(\d\d)-(\d\d)\s(\d\d):(\d\d):(\d\d)(?>\s([+-])(\d\d)(\d\d))?/o;
48
49 my $seconds = mktime $sec, $min, $hour, $day, $month - 1, $year - 1900;
50
51 if (defined $tzs)
52 {
53 $seconds -= $tzh * 3600 + $tzm * 60 if $tzs eq "+";
54 $seconds += $tzh * 3600 + $tzm * 60 if $tzs eq "-";
55 }
56
57 return $seconds;
58 }
59
60
61 # This subroutine processes a single line (in $_) from a log file. Program
62 # defensively against short lines finding their way into the log.
63
64 my (%saved, %id_list, $pattern, $queue_time, $insensitive, $invert);
65
66 # If using "related" option, have to track extra message IDs
67 my $related;
68 my $related_re='';
69 my @Mids = ();
70
71 sub do_line {
72
73 # Convert syslog lines to mainlog format, as in eximstats.
74
75 if (!/^\d{4}-/o) { $_ =~ s/^.*? exim\b.*?: //o; }
76
77 return unless
78 my($date,$id) = /^(\d{4}-\d\d-\d\d \d\d:\d\d:\d\d (?:[+-]\d{4} )?)(?:\[\d+\] )?(\w{6}\-\w{6}\-\w{2})?/o;
79
80 # Handle the case when the log line belongs to a specific message. We save
81 # lines for specific messages until the message is complete. Then either print
82 # or discard.
83
84 if (defined $id)
85 {
86 $saved{$id} = '' unless defined($saved{$id});
87
88 # Save up the data for this message in case it becomes interesting later.
89
90 $saved{$id} .= $_;
91
92 # Are we interested in this id ? Short circuit if we already were interested.
93
94 if ($invert)
95 {
96 $id_list{$id} = 1 if (!defined($id_list{$id}));
97 $id_list{$id} = 0 if (($insensitive && /$pattern/io) || /$pattern/o);
98 }
99 else
100 {
101 if (defined $id_list{$id} ||
102 ($insensitive && /$pattern/io) || /$pattern/o)
103 {
104 $id_list{$id} = 1;
105 get_related_ids($id) if $related;
106 }
107 elsif ($related && $related_re)
108 {
109 grep_for_related($_, $id);
110 }
111 }
112
113 # See if this is a completion for some message. If it is interesting,
114 # print it, but in any event, throw away what was saved.
115
116 if (index($_, 'Completed') != -1 ||
117 index($_, 'SMTP data timeout') != -1 ||
118 (index($_, 'rejected') != -1 &&
119 /^(\d{4}-\d\d-\d\d \d\d:\d\d:\d\d (?:[+-]\d{4} )?)(?:\[\d+\] )?\w{6}\-\w{6}\-\w{2} rejected/o))
120 {
121 if ($queue_time != -1 &&
122 $saved{$id} =~ /^(\d{4}-\d\d-\d\d \d\d:\d\d:\d\d ([+-]\d{4} )?)/o)
123 {
124 my $old_sec = &seconds($1);
125 my $sec = &seconds($date);
126 $id_list{$id} = 0 if $id_list{$id} && $sec - $old_sec <= $queue_time;
127 }
128
129 print "$saved{$id}\n" if ($id_list{$id});
130 delete $id_list{$id};
131 delete $saved{$id};
132 }
133 }
134
135 # Handle the case where the log line does not belong to a specific message.
136 # Print it if it is interesting.
137
138 elsif ( ($invert && (($insensitive && !/$pattern/io) || !/$pattern/o)) ||
139 (!$invert && (($insensitive && /$pattern/io) || /$pattern/o)) )
140 { print "$_\n"; }
141 }
142
143 # Rotated log files are frequently compressed and there are a variety of
144 # formats it could be compressed with. Rather than use just one that is
145 # detected and hardcoded at Exim compile time, detect and use what the
146 # logfile is compressed with on the fly.
147 #
148 # List of known compression extensions and their associated commands:
149 my $compressors = {
150 gz => { cmd => 'zcat', args => '' },
151 bz2 => { cmd => 'bzcat', args => '' },
152 xz => { cmd => 'xzcat', args => '' },
153 lzma => { cmd => 'lzma', args => '-dc' }
154 };
155 my $csearch = 0;
156
157 sub detect_compressor_bin
158 {
159 my $ext = shift();
160 my $c = $compressors->{$ext}->{cmd};
161 $compressors->{$ext}->{bin} = `which $c 2>/dev/null`;
162 chomp($compressors->{$ext}->{bin});
163 }
164
165 sub detect_compressor_capable
166 {
167 my $filename = shift();
168 map { &detect_compressor_bin($_) } keys %$compressors
169 if (!$csearch);
170 $csearch = 1;
171 return undef
172 unless (grep {$filename =~ /\.(?:$_)$/} keys %$compressors);
173 # Loop through them, figure out which one it detected,
174 # and build the commandline.
175 my $cmdline = undef;
176 foreach my $ext (keys %$compressors)
177 {
178 if ($filename =~ /\.(?:$ext)$/)
179 {
180 # Just die if compressor not found; if this occurs in the middle of
181 # two valid files with a lot of matches, error could easily be missed.
182 die("Didn't find $ext decompressor for $filename\n")
183 if ($compressors->{$ext}->{bin} eq '');
184 $cmdline = $compressors->{$ext}->{bin} ." ".
185 $compressors->{$ext}->{args};
186 last;
187 }
188 }
189 return $cmdline;
190 }
191
192 sub grep_for_related {
193 my ($line,$id) = @_;
194 $id_list{$id} = 1 if $line =~ m/$related_re/;
195 }
196
197 sub get_related_ids {
198 my ($id) = @_;
199 push @Mids, $id unless grep /\b$id\b/, @Mids;
200 my $re = join '|', @Mids;
201 $related_re = qr/$re/;
202 }
203
204 # The main program. Extract the pattern and make sure any relevant characters
205 # are quoted if the -l flag is given. The -t flag gives a time-on-queue value
206 # which is an additional condition. The -M flag will also display "related"
207 # loglines (msgid from matched lines is searched in following lines).
208
209 getopts('Ilvt:Mhm',\my %args);
210 $queue_time = $args{'t'}? $args{'t'} : -1;
211 $insensitive = $args{'I'}? 0 : 1;
212 $invert = $args{'v'}? 1 : 0;
213 $related = $args{'M'}? 1 : 0;
214
215 pod2usage(-exit => 0, -verbose => 1) if $args{'h'};
216 pod2usage(-exit => 0, -verbose => 2, -noperldoc => system('perldoc -V 2>/dev/null >/dev/null'))
217 if $args{'m'};
218 pod2usage if not @ARGV;
219
220 $pattern = shift @ARGV;
221 $pattern = quotemeta $pattern if $args{l};
222
223 # Start a pager if output goes to a terminal
224 if (-t 1)
225 {
226 foreach ($ENV{PAGER}//(), 'less', 'more')
227 {
228 open(my $pager, '|-', $_) or next;
229 select $pager;
230 last;
231 }
232 }
233
234 # If file arguments are given, open each one and process according as it is
235 # is compressed or not.
236
237 if (@ARGV)
238 {
239 foreach (@ARGV)
240 {
241 my $filename = $_;
242 if (-x 'ZCAT_COMMAND' && $filename =~ /\.(?:COMPRESS_SUFFIX)$/o)
243 {
244 open(LOG, "ZCAT_COMMAND $filename |") ||
245 die "Unable to zcat $filename: $!\n";
246 }
247 elsif (my $cmdline = &detect_compressor_capable($filename))
248 {
249 open(LOG, "$cmdline $filename |") ||
250 die "Unable to decompress $filename: $!\n";
251 }
252 else
253 {
254 open(LOG, "<$filename") || die "Unable to open $filename: $!\n";
255 }
256 do_line() while (<LOG>);
257 close(LOG);
258 }
259 }
260
261 # If no files are named, process STDIN only
262
263 else { do_line() while (<STDIN>); }
264
265 # At the end of processing all the input, print any uncompleted messages.
266
267 for (keys %id_list)
268 {
269 print "+++ $_ has not completed +++\n$saved{$_}\n";
270 }
271
272 __END__
273
274 =head1 NAME
275
276 exigrep - search Exim's main log
277
278 =head1 SYNOPSIS
279
280 B<exigrep> [options] pattern [log] ...
281
282 =head1 DESCRIPTION
283
284 The B<exigrep> utility is a Perl script that searches one or more main log
285 files for entries that match a given pattern. When it finds a match,
286 it extracts all the log entries for the relevant message, not just
287 those that match the pattern. Thus, B<exigrep> can extract complete log
288 entries for a given message, or all mail for a given user, or for a
289 given host, for example.
290
291 If no file names are given on the command line, the standard input is read.
292
293 For known file extensions indicating compression (F<.gz>, F<.bz2>, F<.xz>, and F<.lzma>)
294 a suitable de-compressor is used, if available.
295
296 =head1 OPTIONS
297
298 =over
299
300 =item B<-l>
301
302 This means 'literal', that is, treat all characters in the
303 pattern as standing for themselves. Otherwise the pattern must be a
304 Perl regular expression. The pattern match is case-insensitive.
305
306 =item B<-t> I<seconds>
307
308 Limit the output to messages that spent at least I<seconds> in the
309 queue.
310
311 =item B<-I>
312
313 Do a case sensitive search.
314
315 =item B<-v>
316
317 Invert the meaning of the search pattern. That is, print message log
318 entries that are not related to that pattern.
319
320 =item B<-M>
321
322 Search for related messages too.
323
324 =item B<-h>
325
326 Print a short reference help. For more detailed help try L<exigrep(8)>,
327 or C<exigrep -m>.
328
329 =item B<-m>
330
331 Print this manual page of B<exigrep>.
332
333 =back
334
335 =head1 SEE ALSO
336
337 L<exim(8)>, L<perlre(1)>, L<Exim|http://exim.org/>
338
339 =head1 AUTHOR
340
341 This manual page was stitched together from spec.txt by Andreas Metzler L<ametzler at downhill.at.eu.org>
342 and updated by Heiko Schlittermann L<hs@schlittermann.de>.
343
344 =cut