More checks on header line length during reception
[exim.git] / src / src / exigrep.src
1 #! PERL_COMMAND
2
3 use warnings;
4 use strict;
5 BEGIN { pop @INC if $INC[-1] eq '.' };
6
7 use Pod::Usage;
8 use Getopt::Long;
9 use File::Basename;
10
11 # Copyright (c) 2007-2017 University of Cambridge.
12 # See the file NOTICE for conditions of use and distribution.
13
14 # Except when they appear in comments, the following placeholders in this
15 # source are replaced when it is turned into a runnable script:
16 #
17 # PERL_COMMAND
18 # ZCAT_COMMAND
19 # COMPRESS_SUFFIX
20
21 # PROCESSED_FLAG
22
23 # This is a perl script which extracts from an Exim log all entries
24 # for all messages that have an entry that matches a given pattern.
25 # If *any* entry for a particular message matches the pattern, *all*
26 # entries for that message are displayed.
27
28 # We buffer up information on a per-message basis. It is done this way rather
29 # than reading the input twice so that the input can be a pipe.
30
31 # There must be one argument, which is the pattern. Subsequent arguments
32 # are the files to scan; if none, the standard input is read. If any file
33 # appears to be compressed, it is passed through zcat. We can't just do this
34 # for all files, because zcat chokes on non-compressed files.
35
36 # Performance optimized in 02/02/2007 by Jori Hamalainen
37 # Typical run time acceleration: 4 times
38
39
40 use POSIX qw(mktime);
41
42
43 # This subroutine converts a time/date string from an Exim log line into
44 # the number of seconds since the epoch. It handles optional timezone
45 # information.
46
47 sub seconds {
48 my($year,$month,$day,$hour,$min,$sec,$tzs,$tzh,$tzm) =
49 $_[0] =~ /^(\d{4})-(\d\d)-(\d\d)\s(\d\d):(\d\d):(\d\d)(?:.\d+)?(?>\s([+-])(\d\d)(\d\d))?/o;
50
51 my $seconds = mktime $sec, $min, $hour, $day, $month - 1, $year - 1900;
52
53 if (defined $tzs)
54 {
55 $seconds -= $tzh * 3600 + $tzm * 60 if $tzs eq "+";
56 $seconds += $tzh * 3600 + $tzm * 60 if $tzs eq "-";
57 }
58
59 return $seconds;
60 }
61
62
63 # This subroutine processes a single line (in $_) from a log file. Program
64 # defensively against short lines finding their way into the log.
65
66 my (%saved, %id_list, $pattern);
67
68 my $queue_time = -1;
69 my $insensitive = 1;
70 my $invert = 0;
71 my $related = 0;
72 my $use_pager = 1;
73 my $literal = 0;
74
75
76 # If using "related" option, have to track extra message IDs
77 my $related_re='';
78 my @Mids = ();
79
80 sub do_line {
81
82 # Convert syslog lines to mainlog format, as in eximstats.
83
84 if (!/^\d{4}-/o) { $_ =~ s/^.*? exim\b.*?: //o; }
85
86 return unless
87 my($date,$id) = /^(\d{4}-\d\d-\d\d \d\d:\d\d:\d\d(?:\.\d+)? (?:[+-]\d{4} )?)(?:\[\d+\] )?(\w{6}\-\w{6}\-\w{2})?/o;
88
89 # Handle the case when the log line belongs to a specific message. We save
90 # lines for specific messages until the message is complete. Then either print
91 # or discard.
92
93 if (defined $id)
94 {
95 $saved{$id} = '' unless defined($saved{$id});
96
97 # Save up the data for this message in case it becomes interesting later.
98
99 $saved{$id} .= $_;
100
101 # Are we interested in this id ? Short circuit if we already were interested.
102
103 if ($invert)
104 {
105 $id_list{$id} = 1 if (!defined($id_list{$id}));
106 $id_list{$id} = 0 if (($insensitive && /$pattern/io) || /$pattern/o);
107 }
108 else
109 {
110 if (defined $id_list{$id} ||
111 ($insensitive && /$pattern/io) || /$pattern/o)
112 {
113 $id_list{$id} = 1;
114 get_related_ids($id) if $related;
115 }
116 elsif ($related && $related_re)
117 {
118 grep_for_related($_, $id);
119 }
120 }
121
122 # See if this is a completion for some message. If it is interesting,
123 # print it, but in any event, throw away what was saved.
124
125 if (index($_, 'Completed') != -1 ||
126 index($_, 'SMTP data timeout') != -1 ||
127 (index($_, 'rejected') != -1 &&
128 /^(\d{4}-\d\d-\d\d \d\d:\d\d:\d\d(?:\.\d+)? (?:[+-]\d{4} )?)(?:\[\d+\] )?\w{6}\-\w{6}\-\w{2} rejected/o))
129 {
130 if ($queue_time != -1 &&
131 $saved{$id} =~ /^(\d{4}-\d\d-\d\d \d\d:\d\d:\d\d ([+-]\d{4} )?)/o)
132 {
133 my $old_sec = &seconds($1);
134 my $sec = &seconds($date);
135 $id_list{$id} = 0 if $id_list{$id} && $sec - $old_sec <= $queue_time;
136 }
137
138 print "$saved{$id}\n" if ($id_list{$id});
139 delete $id_list{$id};
140 delete $saved{$id};
141 }
142 }
143
144 # Handle the case where the log line does not belong to a specific message.
145 # Print it if it is interesting.
146
147 elsif ( ($invert && (($insensitive && !/$pattern/io) || !/$pattern/o)) ||
148 (!$invert && (($insensitive && /$pattern/io) || /$pattern/o)) )
149 { print "$_\n"; }
150 }
151
152 # Rotated log files are frequently compressed and there are a variety of
153 # formats it could be compressed with. Rather than use just one that is
154 # detected and hardcoded at Exim compile time, detect and use what the
155 # logfile is compressed with on the fly.
156 #
157 # List of known compression extensions and their associated commands:
158 my $compressors = {
159 gz => { cmd => 'zcat', args => '' },
160 bz2 => { cmd => 'bzcat', args => '' },
161 xz => { cmd => 'xzcat', args => '' },
162 lzma => { cmd => 'lzma', args => '-dc' }
163 };
164 my $csearch = 0;
165
166 sub detect_compressor_bin
167 {
168 my $ext = shift();
169 my $c = $compressors->{$ext}->{cmd};
170 $compressors->{$ext}->{bin} = `which $c 2>/dev/null`;
171 chomp($compressors->{$ext}->{bin});
172 }
173
174 sub detect_compressor_capable
175 {
176 my $filename = shift();
177 map { &detect_compressor_bin($_) } keys %$compressors
178 if (!$csearch);
179 $csearch = 1;
180 return undef
181 unless (grep {$filename =~ /\.(?:$_)$/} keys %$compressors);
182 # Loop through them, figure out which one it detected,
183 # and build the commandline.
184 my $cmdline = undef;
185 foreach my $ext (keys %$compressors)
186 {
187 if ($filename =~ /\.(?:$ext)$/)
188 {
189 # Just die if compressor not found; if this occurs in the middle of
190 # two valid files with a lot of matches, error could easily be missed.
191 die("Didn't find $ext decompressor for $filename\n")
192 if ($compressors->{$ext}->{bin} eq '');
193 $cmdline = $compressors->{$ext}->{bin} ." ".
194 $compressors->{$ext}->{args};
195 last;
196 }
197 }
198 return $cmdline;
199 }
200
201 sub grep_for_related {
202 my ($line,$id) = @_;
203 $id_list{$id} = 1 if $line =~ m/$related_re/;
204 }
205
206 sub get_related_ids {
207 my ($id) = @_;
208 push @Mids, $id unless grep /\b$id\b/, @Mids;
209 my $re = join '|', @Mids;
210 $related_re = qr/$re/;
211 }
212
213 # The main program. Extract the pattern and make sure any relevant characters
214 # are quoted if the -l flag is given. The -t flag gives a time-on-queue value
215 # which is an additional condition. The -M flag will also display "related"
216 # loglines (msgid from matched lines is searched in following lines).
217
218 GetOptions(
219 'I|sensitive' => sub { $insensitive = 0 },
220 'l|literal' => \$literal,
221 'M|related' => \$related,
222 't|queue-time=i' => \$queue_time,
223 'pager!' => \$use_pager,
224 'v|invert' => \$invert,
225 'h|help' => sub { pod2usage(-exit => 0, -verbose => 1) },
226 'm|man' => sub {
227 pod2usage(
228 -exit => 0,
229 -verbose => 2,
230 -noperldoc => system('perldoc -V 2>/dev/null >&2')
231 );
232 },
233 'version' => sub {
234 print basename($0) . ": $0\n",
235 "build: EXIM_RELEASE_VERSIONEXIM_VARIANT_VERSION\n",
236 "perl(runtime): $]\n";
237 exit 0;
238 },
239 ) and @ARGV or pod2usage;
240
241 $pattern = shift @ARGV;
242 $pattern = quotemeta $pattern if $literal;
243
244 # Start a pager if output goes to a terminal
245 if (-t 1 and $use_pager)
246 {
247 # for perl >= v5.10.x: foreach ($ENV{PAGER}//(), 'less', 'more')
248 foreach (defined $ENV{PAGER} ? $ENV{PAGER} : (), 'less', 'more')
249 {
250 local $ENV{LESS} .= ' --no-init --quit-if-one-screen';
251 open(my $pager, '|-', $_) or next;
252 select $pager;
253 last;
254 }
255 }
256
257 # If file arguments are given, open each one and process according as it is
258 # is compressed or not.
259
260 if (@ARGV)
261 {
262 foreach (@ARGV)
263 {
264 my $filename = $_;
265 if (-x 'ZCAT_COMMAND' && $filename =~ /\.(?:COMPRESS_SUFFIX)$/o)
266 {
267 open(LOG, "ZCAT_COMMAND $filename |") ||
268 die "Unable to zcat $filename: $!\n";
269 }
270 elsif (my $cmdline = &detect_compressor_capable($filename))
271 {
272 open(LOG, "$cmdline $filename |") ||
273 die "Unable to decompress $filename: $!\n";
274 }
275 else
276 {
277 open(LOG, "<$filename") || die "Unable to open $filename: $!\n";
278 }
279 do_line() while (<LOG>);
280 close(LOG);
281 }
282 }
283
284 # If no files are named, process STDIN only
285
286 else { do_line() while (<STDIN>); }
287
288 # At the end of processing all the input, print any uncompleted messages.
289
290 for (keys %id_list)
291 {
292 print "+++ $_ has not completed +++\n$saved{$_}\n";
293 }
294
295 __END__
296
297 =head1 NAME
298
299 exigrep - search Exim's main log
300
301 =head1 SYNOPSIS
302
303 B<exigrep> [options] pattern [log] ...
304
305 =head1 DESCRIPTION
306
307 The B<exigrep> utility is a Perl script that searches one or more main log
308 files for entries that match a given pattern. When it finds a match,
309 it extracts all the log entries for the relevant message, not just
310 those that match the pattern. Thus, B<exigrep> can extract complete log
311 entries for a given message, or all mail for a given user, or for a
312 given host, for example.
313
314 If no file names are given on the command line, the standard input is read.
315
316 For known file extensions indicating compression (F<.gz>, F<.bz2>, F<.xz>, and F<.lzma>)
317 a suitable de-compressor is used, if available.
318
319 The output is sent through a pager if a terminal is connected to STDOUT. As
320 pager are considered: C<$ENV{PAGER}>, C<less>, C<more>.
321
322 =head1 OPTIONS
323
324 =over
325
326 =item B<-l>|B<--literal>
327
328 This means 'literal', that is, treat all characters in the
329 pattern as standing for themselves. Otherwise the pattern must be a
330 Perl regular expression. The pattern match is case-insensitive.
331
332 =item B<-t>|B<--queue-time> I<seconds>
333
334 Limit the output to messages that spent at least I<seconds> in the
335 queue.
336
337 =item B<-I>|B<--sensitive>
338
339 Do a case sensitive search.
340
341 =item B<-v>|B<--invert>
342
343 Invert the meaning of the search pattern. That is, print message log
344 entries that are not related to that pattern.
345
346 =item B<-M>|B<--related>
347
348 Search for related messages too.
349
350 =item B<--no-pager>
351
352 Do not use a pager, even if STDOUT is connected to a terminal.
353
354 =item B<-h>|B<--help>
355
356 Print a short reference help. For more detailed help try L<exigrep(8)>,
357 or C<exigrep -m>.
358
359 =item B<-m>|B<--man>
360
361 Print this manual page of B<exigrep>.
362
363 =back
364
365 =head1 SEE ALSO
366
367 L<exim(8)>, L<perlre(1)>, L<Exim|http://exim.org/>
368
369 =head1 AUTHOR
370
371 This manual page was stitched together from spec.txt by Andreas Metzler L<ametzler at downhill.at.eu.org>
372 and updated by Heiko Schlittermann L<hs@schlittermann.de>.
373
374 =cut