X-Git-Url: https://vcs.fsf.org/?p=exim.git;a=blobdiff_plain;f=src%2Fsrc%2Feximstats.src;h=5e1a0847b58fe8424c630c03e5f298da87a89c8e;hp=dcc26c8a03b9300b0e5d9f7bb5ad340973f42fdb;hb=759502e5af0acfb310b8571f056d2dbf59adb1d3;hpb=37f8a7c915237d565e252f91c047ca88e3b84ab8 diff --git a/src/src/eximstats.src b/src/src/eximstats.src index dcc26c8a0..5e1a0847b 100644 --- a/src/src/eximstats.src +++ b/src/src/eximstats.src @@ -1,7 +1,6 @@ -#!PERL_COMMAND -w -# $Cambridge: exim/src/src/eximstats.src,v 1.17 2007/09/21 08:26:48 steve Exp $ +#!PERL_COMMAND -# Copyright (c) 2001 University of Cambridge. +# Copyright (c) 2001-2017 University of Cambridge. # See the file NOTICE for conditions of use and distribution. # Perl script to generate statistics from one or more Exim log files. @@ -74,7 +73,7 @@ # 2001-10-21 Removed -domain flag and added -bydomain, -byhost, and -byemail. # We now generate our main parsing subroutine as an eval statement # which improves performance dramatically when not all the results -# are required. We also cache the last timestamp to time convertion. +# are required. We also cache the last timestamp to time conversion. # # NOTE: 'Top 50 destinations by (message count|volume)' lines are # now 'Top N (host|email|domain) destinations by (message count|volume)' @@ -143,7 +142,7 @@ # in HTML output. Also added code to convert them back with -merge. # Fixed timestamp offsets to convert to seconds rather than minutes. # Updated -merge to work with output files using timezones. -# Added cacheing to speed up the calculation of timezone offsets. +# Added caching to speed up the calculation of timezone offsets. # # 2003-02-07 V1.25 Steve Campbell # Optimised the usage of mktime() in the seconds subroutine. @@ -163,7 +162,7 @@ # Bernard Massot. # # 2003-06-03 V1.28 John Newman -# Added in the ability to skip over the parsing and evaulation of +# Added in the ability to skip over the parsing and evaluation of # specific transports as passed to eximstats via the new "-nt/.../" # command line argument. This new switch allows the viewing of # not more accurate statistics but more applicable statistics when @@ -201,7 +200,7 @@ # Added -xls and the ability to specify output files. # # 2005-04-29 V1.38 Steve Campbell -# Use FileHandles for outputing results. +# Use FileHandles for outputting results. # Allow any combination of xls, txt, and html output. # Fixed display of large numbers with -nvr option # Fixed merging of reports with empty tables. @@ -282,6 +281,11 @@ # 2007-09-20 V1.59 Steve Campbell # Added the -bylocaldomain option # +# 2007-09-20 V1.60 Heiko Schlittermann +# Fix for misinterpreted log lines +# +# 2013-01-14 V1.61 Steve Campbell +# Watch out for senders sending "HELO [IpAddr]" # # # For documentation on the logfile format, see @@ -386,7 +390,7 @@ Useful for finding out which of your mailing lists are receiving mail. Show the delivery times (B
)for all the messages. -Exim must have been configured to use the +delivery_time logging option +Exim must have been configured to use the +deliver_time logging option for this option to work. I is an optional list of times. Eg -show_dt1,2,4,8 will show @@ -529,7 +533,7 @@ about how to create charts from the tables. =head1 AUTHOR -There is a web site at http://www.exim.org - this contains details of the +There is a website at https://www.exim.org - this contains details of the mailing list exim-users@exim.org. =head1 TO DO @@ -537,19 +541,29 @@ mailing list exim-users@exim.org. This program does not perfectly handle messages whose received and delivered log lines are in different files, which can happen when you have multiple mail servers and a message cannot be -immeadiately delivered. Fixing this could be tricky... +immediately delivered. Fixing this could be tricky... Merging of xls files is not (yet) possible. Be free to implement :) =cut +use warnings; use integer; +BEGIN { pop @INC if $INC[-1] eq '.' }; use strict; use IO::File; +use File::Basename; # use Time::Local; # PH/FANF use POSIX; +if (@ARGV and $ARGV[0] eq '--version') { + print basename($0) . ": $0\n", + "build: EXIM_RELEASE_VERSIONEXIM_VARIANT_VERSION\n", + "perl(runtime): $]\n"; + exit 0; +} + use vars qw($HAVE_GD_Graph_pie $HAVE_GD_Graph_linespoints $HAVE_Spreadsheet_WriteExcel); eval { require GD::Graph::pie; }; $HAVE_GD_Graph_pie = $@ ? 0 : 1; @@ -580,7 +594,7 @@ use vars qw($WEEK $DAY $HOUR $MINUTE); @days_per_month = (0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334); $gig = 1024 * 1024 * 1024; -$VERSION = '1.59'; +$VERSION = '1.61'; # How much space do we allow for the Hosts/Domains/Emails/Edomains column headers? $COLUMN_WIDTHS = 8; @@ -594,9 +608,9 @@ $WEEK = 7 * $DAY; use vars qw($total_received_data $total_received_data_gigs $total_received_count); use vars qw($total_delivered_data $total_delivered_data_gigs $total_delivered_messages $total_delivered_addresses); use vars qw(%timestamp2time); #Hash of timestamp => time. -use vars qw($last_timestamp $last_time); #The last time convertion done. -use vars qw($last_date $date_seconds); #The last date convertion done. -use vars qw($last_offset $offset_seconds); #The last time offset convertion done. +use vars qw($last_timestamp $last_time); #The last time conversion done. +use vars qw($last_date $date_seconds); #The last date conversion done. +use vars qw($last_offset $offset_seconds); #The last time offset conversion done. use vars qw($localtime_offset); use vars qw($i); #General loop counter. use vars qw($debug); #Debug mode? @@ -610,7 +624,7 @@ use vars qw(%ham_count_by_ip %spam_count_by_ip); use vars qw(%rejected_count_by_ip %rejected_count_by_reason); use vars qw(%temporarily_rejected_count_by_ip %temporarily_rejected_count_by_reason); -#For use in Speadsheed::WriteExcel +#For use in Spreadsheet::WriteExcel use vars qw($workbook $ws_global $ws_relayed $ws_errors); use vars qw($row $col $row_hist $col_hist); use vars qw($run_hist); @@ -753,8 +767,8 @@ sub volume_rounded { } else { # We don't want any rounding to be done. - # and we don't need broken formated output which on one hand avoids numbers from - # being interpreted as string by Spreadsheed Calculators, on the other hand + # and we don't need broken formatted output which on one hand avoids numbers from + # being interpreted as string by Spreadsheet Calculators, on the other hand # breaks if more than 4 digits! -> flexible length instead of fixed length # Format the return value at the output routine! -fh #$rounded = sprintf("%d", ($g * $gig) + $x); @@ -867,10 +881,10 @@ $p; # Eg 3h20m5s => 12005 ####################################################################### sub unformat_time { - my($formated_time) = pop @_; + my($formatted_time) = pop @_; my $time = 0; - while ($formated_time =~ s/^(\d+)([wdhms]?)//) { + while ($formatted_time =~ s/^(\d+)([wdhms]?)//) { $time += $1 if ($2 eq '' || $2 eq 's'); $time += $1 * 60 if ($2 eq 'm'); $time += $1 * 60 * 60 if ($2 eq 'h'); @@ -890,6 +904,7 @@ sub unformat_time { # POSIX::mktime. We expect the timestamp to be of the form # "$year-$mon-$day $hour:$min:$sec", with month going from 1 to 12, # and the year to be absolute (we do the necessary conversions). The +# seconds value can be followed by decimals, which we ignore. The # timestamp may be followed with an offset from UTC like "+$hh$mm"; if the # offset is not present, and we have not been told that the log is in UTC # (with the -utc option), then we adjust the time by the current local @@ -913,7 +928,7 @@ sub seconds { # Is the timestamp the same as the last one? return $last_time if ($last_timestamp eq $timestamp); - return 0 unless ($timestamp =~ /^((\d{4})\-(\d\d)-(\d\d))\s(\d\d):(\d\d):(\d\d)( ([+-])(\d\d)(\d\d))?/o); + return 0 unless ($timestamp =~ /^((\d{4})\-(\d\d)-(\d\d))\s(\d\d):(\d\d):(\d\d)(?:\.\d+)?( ([+-])(\d\d)(\d\d))?/o); unless ($last_date eq $1) { $last_date = $1; @@ -924,8 +939,8 @@ sub seconds { } my $time = $date_seconds + ($5 * 3600) + ($6 * 60) + $7; - # SC. Use cacheing. Also note we want seconds not minutes. - #my($this_offset) = ($10 * 60 + $11) * ($9 . "1") if defined $8; + # SC. Use caching. Also note we want seconds not minutes. + #my($this_offset) = ($10 * 60 + $12) * ($9 . "1") if defined $8; if (defined $8 && ($8 ne $last_offset)) { $last_offset = $8; $offset_seconds = ($10 * 60 + $11) * 60; @@ -933,7 +948,7 @@ sub seconds { } - if (defined $7) { + if (defined $8) { #$time -= $this_offset; $time -= $offset_seconds; } elsif (defined $localtime_offset) { @@ -1646,7 +1661,7 @@ sub top_n_sort { # Create a dummy hash entry for the key if required. # Note that setting the dummy_hash value sets it for both href2 & - # href3. Also note that currently we are guarenteed to have a real + # href3. Also note that currently we are guaranteed to have a real # value for href3 if a real value for href2 exists so don't need to # test for it as well. $dummy_hash{$key} = 0 unless exists $href2->{$key}; @@ -1847,12 +1862,23 @@ sub generate_parser { $length = length($_); next if ($length < 38); - next unless /^(\\d{4}\\-\\d\\d-\\d\\d\\s(\\d\\d):(\\d\\d):\\d\\d( [-+]\\d\\d\\d\\d)?)( \\[\\d+\\])?/o; - - ($tod,$m_hour,$m_min) = ($1,$2,$3); + next unless /^ + (\\d{4}\\-\\d\\d-\\d\\d\\s # 1: YYYYMMDD HHMMSS + (\\d\\d) # 2: HH + : + (\\d\\d) # 3: MM + :\\d\\d + ) + (\\.\\d+)? # 4: subseconds + (\s[-+]\\d\\d\\d\\d)? # 5: tz-offset + (\s\\[\\d+\\])? # 6: pid + /ox; + + $tod = defined($5) ? $1 . $5 : $1; + ($m_hour,$m_min) = ($2,$3); # PH - watch for GMT offsets in the timestamp. - if (defined($4)) { + if (defined($5)) { $extra = 6; next if ($length < 44); } @@ -1860,9 +1886,15 @@ sub generate_parser { $extra = 0; } + # watch for subsecond precision + if (defined($4)) { + $extra += length($4); + next if ($length < 38 + $extra); + } + # PH - watch for PID added after the timestamp. - if (defined($5)) { - $extra += length($5); + if (defined($6)) { + $extra += length($6); next if ($length < 38 + $extra); } @@ -1922,7 +1954,13 @@ sub generate_parser { # "H=Host (UnverifiedHost) [IpAddr]" or "H=(UnverifiedHost) [IpAddr]". # We do 2 separate matches to keep the matches simple and fast. # Host is local unless otherwise specified. - $ip = (/\\bH=.*?(\\[[^]]+\\])/) ? $1 : "local"; + # Watch out for "H=([IpAddr])" in case they send "[IpAddr]" as their HELO! + $ip = (/\\bH=(?:|.*? )(\\[[^]]+\\])/) ? $1 + # 2008-03-31 06:25:22 Connection from [213.246.33.217]:39456 refused: too many connections from that IP address // .hs + : (/Connection from (\[\S+\])/) ? $1 + # 2008-03-31 06:52:40 SMTP call from mail.cacoshrf.com (ccsd02.ccsd.local) [69.24.118.229]:4511 dropped: too many nonmail commands (last was "RSET") // .hs + : (/SMTP call from .*?(\[\S+\])/) ? $1 + : "local"; $host = (/\\bH=(\\S+)/) ? $1 : "local"; $domain = "localdomain"; #Domain is localdomain unless otherwise specified. @@ -2359,6 +2397,7 @@ sub generate_parser { # 2005-09-23 15:07:49 1EInHJ-0007Ex-Au H=(a.b.c) [10.0.0.1] F=<> rejected after DATA: This message contains a virus: (Eicar-Test-Signature) please scan your system. # 2005-10-06 10:50:07 1ENRS3-0000Nr-Kt => blackhole (DATA ACL discarded recipients): This message contains a virus: (Worm.SomeFool.P) please scan your system. / rejected after DATA: (.*)/ || + / (rejected DATA: .*)/ || /.DATA ACL discarded recipients.: (.*)/ || /rejected after DATA: (unqualified address not permitted)/ || /(VRFY rejected)/ || @@ -2417,6 +2456,14 @@ sub generate_parser { ++$rejected_count_by_reason{"\u$1$2"}; ++$rejected_count_by_ip{$ip}; } + elsif ( + # 2008-03-31 06:25:22 H=mail.densitron.com [216.70.140.224]:45386 temporarily rejected connection in "connect" ACL: too fast reconnects // .hs + # 2008-03-31 06:25:22 H=mail.densitron.com [216.70.140.224]:45386 temporarily rejected connection in "connect" ACL // .hs + /(temporarily rejected connection in .*?ACL:?.*)/ + ) { + ++$temporarily_rejected_count_by_ip{$ip}; + ++$temporarily_rejected_count_by_reason{"\u$1"}; + } else { ++$rejected_count_by_reason{Unknown}; ++$rejected_count_by_ip{$ip}; @@ -2710,7 +2757,7 @@ sub print_grandtotals { if ($messages > 0) { @content = ($total_aref->[0], '', $messages, ''); - #Count the number of distict IPs for the Hosts column. + #Count the number of distinct IPs for the Hosts column. push(@content,scalar(keys %{$total_aref->[1]})) if $do_sender{Host}; #These rows do not have entries for the following columns (if specified) @@ -3341,8 +3388,8 @@ sub parse_old_eximstat_reports { my $previous_seconds_on_queue = 0; if (/^\s*(Under|Over|)\s+(\d+[smhdw])\s+(\d+)/) { print STDERR "Parsing $_" if $debug; - my($modifier,$formated_time,$count) = ($1,$2,$3); - my $seconds = unformat_time($formated_time); + my($modifier,$formatted_time,$count) = ($1,$2,$3); + my $seconds = unformat_time($formatted_time); my $time_on_queue = ($seconds + $previous_seconds_on_queue) / 2; $previous_seconds_on_queue = $seconds; $time_on_queue = $seconds * 2 if ($modifier eq 'Over'); @@ -3657,7 +3704,7 @@ sub update_relayed { # # add_to_totals(\%totals,\@keys,$values); # -# Given a line of space seperated values, add them into the provided hash using @keys +# Given a line of space separated values, add them into the provided hash using @keys # as the hash keys. # # If the value contains a '%', then the value is set rather than added. Otherwise, we @@ -3687,7 +3734,7 @@ sub add_to_totals { # # line_to_hash(\%hash,\@keys,$line); # -# Given a line of space seperated values, set them into the provided hash +# Given a line of space separated values, set them into the provided hash # using @keys as the hash keys. ####################################################################### sub line_to_hash { @@ -3753,7 +3800,7 @@ sub html2txt { # until we've got all of the argument. # # This isn't perfect as all white space gets reduced to one space, -# but it's as good as we can get! If it's esential that spacing +# but it's as good as we can get! If it's essential that spacing # be preserved precisely, then you get that by not using shell # variables. ####################################################################### @@ -3795,7 +3842,7 @@ sub set_worksheet_line { ####################################################################### # @rcpt_times = parse_time_list($string); # -# Parse a comma seperated list of time values in seconds given by +# Parse a comma separated list of time values in seconds given by # the user and fill an array. # # Return a default list if $string is undefined.