Reset locale after calling embedded Perl, in case it was changed.
[exim.git] / src / src / eximstats.src
CommitLineData
059ec3d9 1#!PERL_COMMAND -w
1b4fe9dd 2# $Cambridge: exim/src/src/eximstats.src,v 1.2 2004/11/24 14:43:57 ph10 Exp $
059ec3d9
PH
3
4# Copyright (c) 2001 University of Cambridge.
5# See the file NOTICE for conditions of use and distribution.
6
7# Perl script to generate statistics from one or more Exim log files.
8
9# Usage: eximstats [<options>] <log file> <log file> ...
10
11# 1996-05-21: Ignore lines not starting with valid date/time, just in case
12# these get into a log file.
13# 1996-11-19: Add the -h option to control the size of the histogram,
14# and optionally turn it off.
15# Use some Perl 5 things; it should be everywhere by now.
16# Add the Perl -w option and rewrite so no warnings are given.
17# Add the -t option to control the length of the "top" listing.
18# Add the -ne, -nt options to turn off errors and transport
19# information.
20# Add information about length of time on queue, and -q<list> to
21# control the intervals and turn it off.
22# Add count and percentage of delayed messages to the Received
23# line.
24# Show total number of errors.
25# Add count and percentage of messages with errors to Received
26# line.
27# Add information about relaying and -nr to suppress it.
28# 1997-02-03 Merged in some of the things Nigel Metheringham had done:
29# Re-worded headings
30# Added received histogram as well as delivered
31# Added local senders' league table
32# Added local recipients' league table
33# 1997-03-10 Fixed typo "destinationss"
34# Allow for intermediate address between final and original
35# when testing for relaying
36# Give better message when no input
37# 1997-04-24 Fixed bug in layout of error listing that was depending on
38# text length (output line got repeated).
39# 1997-05-06 Bug in option decoding when only one option.
40# Overflow bug when handling very large volumes.
41# 1997-10-28 Updated to handle revised log format that might show
42# HELO name as well as host name before IP number
43# 1998-01-26 Bugs in the function for calculating the number of seconds
44# since 1970 from a log date
45# 1998-02-02 Delivery to :blackhole: doesn't have a T= entry in the log
46# line; cope with this, thereby avoiding undefined problems
47# Very short log line gave substring error
48# 1998-02-03 A routed delivery to a local transport may not have <> in the
49# log line; terminate the address at white space, not <
50# 1998-09-07 If first line of input was a => line, $thissize was undefined;
51# ensure it is zero.
52# 1998-12-21 Adding of $thissize from => line should have been adding $size.
53# Oops. Should have looked more closely when fixing the previous
54# bug!
55# 1999-11-12 Increased the field widths for printed integers; numbers are
56# bigger than originally envisaged.
57# 2001-03-21 Converted seconds() routine to use Time::Local, fixing a bug
58# whereby seconds($timestamp) - id_seconds($id) gave an
59# incorrect result.
60# Added POD documentation.
61# Moved usage instructions into help() subroutine.
62# Added 'use strict' and declared all global variables.
63# Added '-html' flag and resultant code.
64# Added '-cache' flag and resultant code.
65# Added add_volume() routine and converted all volume variables
66# to use it, fixing the overflow problems for individual hosts
67# on large sites.
68# Converted all volume output to GB/MB/KB as appropriate.
69# Don't store local user stats if -nfl is specified.
70# Modifications done by: Steve Campbell (<steve@computurn.com>)
71# 2001-04-02 Added the -t_remote_users flag. Steve Campbell.
72# 2001-10-15 Added the -domain flag. Steve Campbell.
73# 2001-10-16 Accept files on STDIN or on the command line. Steve Campbell.
74# 2001-10-21 Removed -domain flag and added -bydomain, -byhost, and -byemail.
75# We now generate our main parsing subroutine as an eval statement
76# which improves performance dramatically when not all the results
77# are required. We also cache the last timestamp to time convertion.
78#
79# NOTE: 'Top 50 destinations by (message count|volume)' lines are
80# now 'Top N (host|email|domain) destinations by (message count|volume)'
81# where N is the topcount. Steve Campbell.
82#
83# 2001-10-30 V1.16 Joachim Wieland.
84# Fixed minor bugs in add_volume() when taking over this version
85# for use in Exim 4: -w gave uninitialized value warnings in
86# two situations: for the first addition to a counter, and if
87# there were never any gigabytes, thereby leaving the $gigs
88# value unset.
89# Initialized $last_timestamp to stop a -w uninitialized warning.
90# Minor layout tweak for grand totals (nitpicking).
91# Put the IP addresses for relaying stats in [] and separated by
92# a space from the domain name.
93# Removed the IPv4-specific address test when picking out addresses
94# for relaying. Anything inside [] is OK.
95#
96# 2002-07-02 Philip Hazel
97# Fixed "uninitialized variable" message that occurred for relay
98# messages that arrived from H=[1.2.3.4] hosts (no name shown).
99# This bug didn't affect the output.
100#
101# 2002-04-15 V1.17 Joachim Wieland.
102# Added -charts, -chartdir. -chartrel options which use
103# GD::Graph modules to create graphical charts of the statistics.
104#
105# 2002-04-15 V1.18 Steve Campbell.
106# Added a check for $domain to to stop a -w uninitialized warning.
107# Added -byemaildomain option.
108# Only print HTML header links to included tables!
109#
110# 2002-08-02 V1.19 Steve Campbell.
111# Changed the debug mode to dump the parser onto STDERR rather
112# than STDOUT. Documented the -d flag into the help().
113# Rejoined the divergent 2002-04-15 and 2002-07-02 releases.
114#
115# 2002-08-21 V1.20 Steve Campbell.
116# Added the '-merge' option to allow merging of previous reports.
117# Fixed a missing semicolon when doing -bydomain.
118# Make volume charts plot the data gigs and bytes rather than just bytes.
119# Only process log lines with $flag =~ /<=|=>|->|==|\*\*|Co/
120# Converted Emaildomain to Edomain - the column header was too wide!
121# This changes the text output slightly. You can revert to the old
122# column widths by changing $COLUMN_WIDTHS to 7;
123#
124# 2002-09-04 V1.21 Andreas J Mueller
125# Local deliveries domain now defaults to 'localdomain'.
126# Don't match F=<From> when looking for the user.
127#
128# 2002-09-05 V1.22 Steve Campbell
129# Fixed a perl 5.005 incompatibility problem ('our' variables).
130#
131# 2002-09-11 V1.23 Steve Campbell
132# Stopped -charts option from throwing errors on null data.
133# Don't print out 'Errors encountered' unless there are any.
134
135# 2002-10-21 V1.23a Philip Hazel - patch from Tony Finch put in until
136# Steve's eximstats catches up.
137# Handle log files that include the timezone after the timestamp.
138# Switch to assuming that log timestamps are in local time, with
139# an option for UTC timestamps, as in Exim itself.
140#
141# 2003-02-05 V1.24 Steve Campbell
142# Added in Sergey Sholokh's code to convert '<' and '>' characters
143# in HTML output. Also added code to convert them back with -merge.
144# Fixed timestamp offsets to convert to seconds rather than minutes.
145# Updated -merge to work with output files using timezones.
146# Added cacheing to speed up the calculation of timezone offsets.
147#
148# 2003-02-07 V1.25 Steve Campbell
149# Optimised the usage of mktime() in the seconds subroutine.
150# Removed the now redundant '-cache' option.
151# html2txt() now explicitly matches HTML tags.
152# Implemented a new sorting algorithm - the top_n_sort() routine.
153# Added Danny Carroll's '-nvr' flag and code.
154#
155# 2003-03-13 V1.26 Steve Campbell
156# Implemented HTML compliance changes recommended by Bernard Massot.
157# Bug fix to allow top_n_sort() to handle null keys.
158# Convert all domains and edomains to lowercase.
159# Remove preceding dots from domains.
160#
161# 2003-03-13 V1.27 Steve Campbell
162# Replaced border attributes with 'border=1', as recommended by
163# Bernard Massot.
164#
165# 2003-06-03 V1.28 John Newman
166# Added in the ability to skip over the parsing and evaulation of
167# specific transports as passed to eximstats via the new "-nt/.../"
168# command line argument. This new switch allows the viewing of
169# not more accurate statistics but more applicable statistics when
170# special transports are in use (ie; SpamAssassin). We need to be
171# able to ignore transports such as this otherwise the resulting
172# local deliveries are significantly skewed (doubled)...
173#
174# 2003-11-06 V1.29 Steve Campbell
175# Added the '-pattern "Description" "/pattern/"' option.
176#
177# 2004-02-17 V1.30 Steve Campbell
178# Added warnings if required GD::Graph modules are not available or
179# insufficient -chart* options are specified.
180#
1b4fe9dd 181# 2004-02-20 V1.31 Andrea Balzi
059ec3d9
PH
182# Only show the Local Sender/Destination links if the tables exist.
183#
1b4fe9dd
PH
184# 2004-07-05 V1.32 Steve Campbell
185# Fix '-merge -h0' divide by zero error.
186#
187# 2004-07-15 V1.33 Steve Campbell
188# Documentation update - I've converted the subroutine
189# documentation from POD to comments.
059ec3d9
PH
190
191
192=head1 NAME
193
194eximstats - generates statistics from Exim mainlog files.
195
196=head1 SYNOPSIS
197
198 eximstats [Options] mainlog1 mainlog2 ... > report.txt
1b4fe9dd 199 eximstats -html [Options] mainlog1 mainlog2 ... > report.html
059ec3d9
PH
200 eximstats -merge [Options] report.1.txt report.2.txt ... > weekly_report.txt
201
202Options:
203
204=over 4
205
206=item B<-h>I<number>
207
208histogram divisions per hour. The default is 1, and
2090 suppresses histograms. Valid values are:
210
2110, 1, 2, 3, 5, 10, 15, 20, 30 or 60.
212
213=item B<-ne>
214
215Don't display error information.
216
217=item B<-nr>
218
219Don't display relaying information.
220
221=item B<-nr>I</pattern/>
222
223Don't display relaying information that matches.
224
225=item B<-nt>
226
227Don't display transport information.
228
229=item B<-nt>I</pattern/>
230
231Don't display transport information that matches
232
233=item B<-q>I<list>
234
235List of times for queuing information single 0 item suppresses.
236
237=item B<-t>I<number>
238
239Display top <number> sources/destinations
240default is 50, 0 suppresses top listing.
241
242=item B<-tnl>
243
244Omit local sources/destinations in top listing.
245
246=item B<-t_remote_users>
247
248Include remote users in the top source/destination listings.
249
250=item B<-byhost>
251
252Show results by sending host. This may be combined with
253B<-bydomain> and/or B<-byemail> and/or B<-byedomain>. If none of these options
254are specified, then B<-byhost> is assumed as a default.
255
256=item B<-bydomain>
257
258Show results by sending domain.
259May be combined with B<-byhost> and/or B<-byemail> and/or B<-byedomain>.
260
261=item B<-byemail>
262
263Show results by sender's email address.
264May be combined with B<-byhost> and/or B<-bydomain> and/or B<-byedomain>.
265
266=item B<-byemaildomain> or B<-byedomain>
267
268Show results by sender's email domain.
269May be combined with B<-byhost> and/or B<-bydomain> and/or B<-byemail>.
270
271=item B<-pattern> I<Description> I</Pattern/>
272
273Look for the specified pattern and count the number of lines in which it appears.
274This option can be specified multiple times. Eg:
275
276 -pattern 'Refused connections' '/refused connection/'
277
278
279=item B<-merge>
280
281This option allows eximstats to merge old eximstat reports together. Eg:
282
283 eximstats mainlog.sun > report.sun.txt
284 eximstats mainlog.mon > report.mon.txt
285 eximstats mainlog.tue > report.tue.txt
286 eximstats mainlog.wed > report.web.txt
287 eximstats mainlog.thu > report.thu.txt
288 eximstats mainlog.fri > report.fri.txt
289 eximstats mainlog.sat > report.sat.txt
290 eximstats -merge report.*.txt > weekly_report.txt
291 eximstats -merge -html report.*.txt > weekly_report.html
292
293=over 4
294
295=item *
296
297You can merge text or html reports and output the results as text or html.
298
299=item *
300
301You can use all the normal eximstat output options, but only data
302included in the original reports can be shown!
303
304=item *
305
306When merging reports, some loss of accuracy may occur in the top I<n> lists.
307This will be towards the ends of the lists.
308
309=item *
310
311The order of items in the top I<n> lists may vary when the data volumes
312round to the same value.
313
314=back
315
316=item B<-html>
317
318Output the results in HTML.
319
320=item B<-charts>
321
322Create graphical charts to be displayed in HTML output.
323
324This requires the following modules which can be obtained
325from http://www.cpan.org/modules/01modules.index.html
326
327=over 4
328
329=item GD
330
331=item GDTextUtil
332
333=item GDGraph
334
335=back
336
337To install these, download and unpack them, then use the normal perl installation procedure:
338
339 perl Makefile.PL
340 make
341 make test
342 make install
343
344=item B<-chartdir>I <dir>
345
346Create the charts in the directory <dir>
347
348=item B<-chartrel>I <dir>
349
350Specify the relative directory for the "img src=" tags from where to include
351the charts
352
353=item B<-d>
354
355Debug flag. This outputs the eval()'d parser onto STDOUT which makes it
356easier to trap errors in the eval section. Remember to add 1 to the line numbers to allow for the
357title!
358
359=back
360
361=head1 DESCRIPTION
362
363Eximstats parses exim mainlog files and outputs a statistical
364analysis of the messages processed. By default, a text
365analysis is generated, but you can request an html analysis
366by using the B<-html> flag. See the help (B<-help>) to learn
367about how to create charts from the tables.
368
369=head1 AUTHOR
370
371There is a web site at http://www.exim.org - this contains details of the
372mailing list exim-users@exim.org.
373
374=head1 TO DO
375
376This program does not perfectly handle messages whose received
377and delivered log lines are in different files, which can happen
378when you have multiple mail servers and a message cannot be
379immeadiately delivered. Fixing this could be tricky...
380
059ec3d9
PH
381=cut
382
383use integer;
384use strict;
385
386# use Time::Local; # PH/FANF
387use POSIX;
388
389use vars qw($HAVE_GD_Graph_pie $HAVE_GD_Graph_linespoints);
390eval { require GD::Graph::pie; };
391$HAVE_GD_Graph_pie = $@ ? 0 : 1;
392eval { require GD::Graph::linespoints; };
393$HAVE_GD_Graph_linespoints = $@ ? 0 : 1;
394
395
396##################################################
397# Static data #
398##################################################
399# 'use vars' instead of 'our' as perl5.005 is still in use out there!
400use vars qw(@tab62 @days_per_month $gig);
401use vars qw($VERSION);
402use vars qw($COLUMN_WIDTHS);
403
404
405@tab62 =
406 (0,1,2,3,4,5,6,7,8,9,0,0,0,0,0,0, # 0-9
407 0,10,11,12,13,14,15,16,17,18,19,20, # A-K
408 21,22,23,24,25,26,27,28,29,30,31,32, # L-W
409 33,34,35, 0, 0, 0, 0, 0, # X-Z
410 0,36,37,38,39,40,41,42,43,44,45,46, # a-k
411 47,48,49,50,51,52,53,54,55,56,57,58, # l-w
412 59,60,61); # x-z
413
414@days_per_month = (0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334);
415$gig = 1024 * 1024 * 1024;
1b4fe9dd 416$VERSION = '1.33';
059ec3d9
PH
417
418# How much space do we allow for the Hosts/Domains/Emails/Edomains column headers?
419$COLUMN_WIDTHS = 8;
420
421# Declare global variables.
422use vars qw($total_received_data $total_received_data_gigs $total_received_count);
423use vars qw($total_delivered_data $total_delivered_data_gigs $total_delivered_count);
424use vars qw(%arrival_time %size %from_host %from_address);
425use vars qw(%timestamp2time); #Hash of timestamp => time.
426use vars qw($last_timestamp $last_time); #The last time convertion done.
427use vars qw($last_date $date_seconds); #The last date convertion done.
428use vars qw($last_offset $offset_seconds); #The last time offset convertion done.
429use vars qw($localtime_offset);
430use vars qw($i); #General loop counter.
431use vars qw($debug); #Debug mode?
432use vars qw($ntopchart); #How many entries should make it into the chart?
433use vars qw($gddirectory); #Where to put files from GD::Graph
434
435$ntopchart = 5;
436
437# The following are parameters whose values are
438# set by command line switches:
439use vars qw($show_errors $show_relay $show_transport $transport_pattern);
440use vars qw($topcount $local_league_table $include_remote_users);
441use vars qw($hist_opt $hist_interval $hist_number $volume_rounding);
442use vars qw($relay_pattern @queue_times $html @user_patterns @user_descriptions);
443
444use vars qw(%do_sender); #Do sender by Host, Domain, Email, and/or Edomain tables.
445use vars qw($charts $chartrel $chartdir $charts_option_specified);
446use vars qw($merge_reports); #Merge old reports ?
447
448# The following are modified in the parse() routine, and
449# referred to in the print_*() routines.
450use vars qw($queue_more_than $delayed_count $relayed_unshown $begin $end);
451use vars qw(%received_count %received_data %received_data_gigs);
452use vars qw(%delivered_count %delivered_data %delivered_data_gigs);
453use vars qw(%received_count_user %received_data_user %received_data_gigs_user);
454use vars qw(%delivered_count_user %delivered_data_user %delivered_data_gigs_user);
455use vars qw(%transported_count %transported_data %transported_data_gigs);
456use vars qw(%remote_delivered %relayed %delayed %had_error %errors_count);
457use vars qw(@queue_bin @remote_queue_bin @received_interval_count @delivered_interval_count);
458use vars qw(@user_pattern_totals);
459
460use vars qw(%report_totals);
461
462
463
464
465##################################################
466# Subroutines #
467##################################################
468
469
1b4fe9dd
PH
470#######################################################################
471# volume_rounded();
472#
473# $rounded_volume = volume_rounded($bytes,$gigabytes);
474#
475# Given a data size in bytes, round it to KB, MB, or GB
476# as appropriate.
477#
478# Eg 12000 => 12KB, 15000000 => 14GB, etc.
479#
480# Note: I've experimented with Math::BigInt and it results in a 33%
481# performance degredation as opposed to storing numbers split into
482# bytes and gigabytes.
483#######################################################################
059ec3d9
PH
484sub volume_rounded {
485 my($x,$g) = @_;
486 $x = 0 unless $x;
487 $g = 0 unless $g;
488 my($rounded);
489
490 while ($x > $gig) {
491 $g++;
492 $x -= $gig;
493 }
494
495 if ($volume_rounding) {
496 # Values < 1 GB
497 if ($g <= 0) {
498 if ($x < 10000) {
499 $rounded = sprintf("%6d", $x);
500 }
501 elsif ($x < 10000000) {
502 $rounded = sprintf("%4dKB", ($x + 512)/1024);
503 }
504 else {
505 $rounded = sprintf("%4dMB", ($x + 512*1024)/(1024*1024));
506 }
507 }
508 # Values between 1GB and 10GB are printed in MB
509 elsif ($g < 10) {
510 $rounded = sprintf("%4dMB", ($g * 1024) + ($x + 512*1024)/(1024*1024));
511 }
512 else {
513 # Handle values over 10GB
514 $rounded = sprintf("%4dGB", $g + ($x + $gig/2)/$gig);
515 }
516 }
517 else {
518 # We don't want any rounding to be done.
519 $rounded = sprintf("%4d", ($g * $gig) + $x);
520 }
521
522 return $rounded;
523}
524
525
1b4fe9dd
PH
526#######################################################################
527# un_round();
528#
529# un_round($rounded_volume,\$bytes,\$gigabytes);
530#
531# Given a volume in KB, MB or GB, as generated by volume_rounded(),
532# do the reverse transformation and convert it back into Bytes and Gigabytes.
533# These are added to the $bytes and $gigabytes parameters.
534#
535# Given a data size in bytes, round it to KB, MB, or GB
536# as appropriate.
537#
538# EG: 500 => (500,0), 14GB => (0,14), etc.
539#######################################################################
059ec3d9
PH
540sub un_round {
541 my($rounded,$bytes_sref,$gigabytes_sref) = @_;
542
543 if ($rounded =~ /(\d+)GB/) {
544 $$gigabytes_sref += $1;
545 }
546 elsif ($rounded =~ /(\d+)MB/) {
547 $$gigabytes_sref += $1 / 1024;
548 $$bytes_sref += (($1 % 1024 ) * 1024 * 1024);
549 }
550 elsif ($rounded =~ /(\d+)KB/) {
551 $$gigabytes_sref += $1 / (1024 * 1024);
552 $$bytes_sref += ($1 % (1024 * 1024) * 1024);
553 }
554 elsif ($rounded =~ /(\d+)/) {
555 $$gigabytes_sref += $1 / $gig;
556 $$bytes_sref += $1 % $gig;
557 }
558
559 #Now reduce the bytes down to less than 1GB.
560 add_volume($bytes_sref,$gigabytes_sref,0) if ($$bytes_sref > $gig);
561}
562
563
1b4fe9dd
PH
564#######################################################################
565# add_volume();
566#
567# add_volume(\$bytes,\$gigs,$size);
568#
569# Add $size to $bytes/$gigs where this is a number split into
570# bytes ($bytes) and gigabytes ($gigs). This is significantly
571# faster than using Math::BigInt.
572#######################################################################
059ec3d9 573sub add_volume {
1b4fe9dd
PH
574 my($bytes_ref,$gigs_ref,$size) = @_;
575 $$bytes_ref = 0 if ! defined $$bytes_ref;
576 $$gigs_ref = 0 if ! defined $$gigs_ref;
577 $$bytes_ref += $size;
578 while ($$bytes_ref > $gig) {
579 $$gigs_ref++;
580 $$bytes_ref -= $gig;
059ec3d9
PH
581 }
582}
583
584
1b4fe9dd
PH
585#######################################################################
586# format_time();
587#
588# $formatted_time = format_time($seconds);
589#
590# Given a time in seconds, break it down into
591# weeks, days, hours, minutes, and seconds.
592#
593# Eg 12005 => 3h20m5s
594#######################################################################
059ec3d9
PH
595sub format_time {
596my($t) = pop @_;
597my($s) = $t % 60;
598$t /= 60;
599my($m) = $t % 60;
600$t /= 60;
601my($h) = $t % 24;
602$t /= 24;
603my($d) = $t % 7;
604my($w) = $t/7;
605my($p) = "";
606$p .= "$w"."w" if $w > 0;
607$p .= "$d"."d" if $d > 0;
608$p .= "$h"."h" if $h > 0;
609$p .= "$m"."m" if $m > 0;
610$p .= "$s"."s" if $s > 0 || $p eq "";
611$p;
612}
613
614
1b4fe9dd
PH
615#######################################################################
616# unformat_time();
617#
618# $seconds = unformat_time($formatted_time);
619#
620# Given a time in weeks, days, hours, minutes, or seconds, convert it to seconds.
621#
622# Eg 3h20m5s => 12005
623#######################################################################
059ec3d9
PH
624sub unformat_time {
625 my($formated_time) = pop @_;
626 my $time = 0;
627
628 while ($formated_time =~ s/^(\d+)([wdhms]?)//) {
629 $time += $1 if ($2 eq '' || $2 eq 's');
630 $time += $1 * 60 if ($2 eq 'm');
631 $time += $1 * 60 * 60 if ($2 eq 'h');
632 $time += $1 * 60 * 60 * 24 if ($2 eq 'd');
633 $time += $1 * 60 * 60 * 24 * 7 if ($2 eq 'w');
634 }
635 $time;
636}
637
638
1b4fe9dd
PH
639#######################################################################
640# seconds();
641#
642# $time = seconds($timestamp);
643#
644# Given a time-of-day timestamp, convert it into a time() value using
645# POSIX::mktime. We expect the timestamp to be of the form
646# "$year-$mon-$day $hour:$min:$sec", with month going from 1 to 12,
647# and the year to be absolute (we do the necessary conversions). The
648# timestamp may be followed with an offset from UTC like "+$hh$mm"; if the
649# offset is not present, and we have not been told that the log is in UTC
650# (with the -utc option), then we adjust the time by the current local
651# time offset so that it can be compared with the time recorded in message
652# IDs, which is UTC.
653#
654# To improve performance, we only use mktime on the date ($year-$mon-$day),
655# and only calculate it if the date is different to the previous time we
656# came here. We then add on seconds for the '$hour:$min:$sec'.
657#
658# We also store the results of the last conversion done, and only
659# recalculate if the date is different.
660#
661# We used to have the '-cache' flag which would store the results of the
662# mktime() call. However, the current way of just using mktime() on the
663# date obsoletes this.
664#######################################################################
059ec3d9
PH
665sub seconds {
666 my($timestamp) = @_;
667
668 # Is the timestamp the same as the last one?
669 return $last_time if ($last_timestamp eq $timestamp);
670
671 return 0 unless ($timestamp =~ /^((\d{4})\-(\d\d)-(\d\d))\s(\d\d):(\d\d):(\d\d)( ([+-])(\d\d)(\d\d))?/o);
672
673 unless ($last_date eq $1) {
674 $last_date = $1;
675 my(@timestamp) = (0,0,0,$4,$3,$2);
676 $timestamp[5] -= 1900;
677 $timestamp[4]--;
678 $date_seconds = mktime(@timestamp);
679 }
680 my $time = $date_seconds + ($5 * 3600) + ($6 * 60) + $7;
681
682 # SC. Use cacheing. Also note we want seconds not minutes.
683 #my($this_offset) = ($10 * 60 + $11) * ($9 . "1") if defined $8;
684 if (defined $8 && ($8 ne $last_offset)) {
685 $last_offset = $8;
686 $offset_seconds = ($10 * 60 + $11) * 60;
687 $offset_seconds = -$offset_seconds if ($9 eq '-');
688 }
689
690
691 if (defined $7) {
692 #$time -= $this_offset;
693 $time -= $offset_seconds;
694 } elsif (defined $localtime_offset) {
695 $time -= $localtime_offset;
696 }
697
698 # Store the last timestamp received.
699 $last_timestamp = $timestamp;
700 $last_time = $time;
701
702 $time;
703}
704
705
1b4fe9dd
PH
706#######################################################################
707# id_seconds();
708#
709# $time = id_seconds($message_id);
710#
711# Given a message ID, convert it into a time() value.
712#######################################################################
059ec3d9
PH
713sub id_seconds {
714my($sub_id) = substr((pop @_), 0, 6);
715my($s) = 0;
716my(@c) = split(//, $sub_id);
717while($#c >= 0) { $s = $s * 62 + $tab62[ord(shift @c) - ord('0')] }
718$s;
719}
720
721
722
1b4fe9dd
PH
723#######################################################################
724# calculate_localtime_offset();
725#
726# $localtime_offset = calculate_localtime_offset();
727#
728# Calculate the the localtime offset from gmtime in seconds.
729#
730# $localtime = time() + $localtime_offset.
731#
732# These are the same semantics as ISO 8601 and RFC 2822 timezone offsets.
733# (West is negative, East is positive.)
734#######################################################################
059ec3d9
PH
735
736# $localtime = gmtime() + $localtime_offset. OLD COMMENT
737# This subroutine commented out as it's not currently in use.
738
739#sub calculate_localtime_offset {
740# # Pick an arbitrary date, convert it to localtime & gmtime, and return the difference.
741# my (@sample_date) = (0,0,0,5,5,100);
742# my $localtime = timelocal(@sample_date);
743# my $gmtime = timegm(@sample_date);
744# my $offset = $localtime - $gmtime;
745# return $offset;
746#}
747
748sub calculate_localtime_offset {
749 # Assume that the offset at the moment is valid across the whole
750 # period covered by the logs that we're analysing. This may not
751 # be true around the time the clocks change in spring or autumn.
752 my $utc = time;
753 # mktime works on local time and gmtime works in UTC
754 my $local = mktime(gmtime($utc));
755 return $local - $utc;
756}
757
758
1b4fe9dd
PH
759#######################################################################
760# print_queue_times();
761#
762# $time = print_queue_times($message_type,\@queue_times,$queue_more_than);
763#
764# Given the type of messages being output, the array of message queue times,
765# and the number of messages which exceeded the queue times, print out
766# a table.
767#######################################################################
059ec3d9
PH
768sub print_queue_times {
769no integer;
770my($string,$array,$queue_more_than) = @_;
771my(@chartdatanames);
772my(@chartdatavals);
773
774my $printed_one = 0;
775my $cumulative_percent = 0;
776#$queue_unknown += keys %arrival_time;
777
778my $queue_total = $queue_more_than;
779for ($i = 0; $i <= $#queue_times; $i++) { $queue_total += $$array[$i] }
780
781my $temp = "Time spent on the queue: $string";
782
783my($format);
784if ($html) {
785 print "<hr><a name=\"$string time\"></a><h2>$temp</h2>\n";
786 print "<table border=0 width=\"100%\">\n";
787 print "<tr><td>\n";
788 print "<table border=1>\n";
789 print "<tr><th>Time</th><th>Messages</th><th>Percentage</th><th>Cumulative Percentage</th>\n";
790 $format = "<tr><td align=\"right\">%s %s</td><td align=\"right\">%d</td><td align=\"right\">%5.1f%%</td><td align=\"right\">%5.1f%%</td>\n";
791}
792else
793{
794 printf("%s\n%s\n\n", $temp, "-" x length($temp));
795 $format = "%5s %4s %6d %5.1f%% %5.1f%%\n";
796}
797
798for ($i = 0; $i <= $#queue_times; $i++) {
799 if ($$array[$i] > 0)
800 {
801 my $percent = ($$array[$i] * 100)/$queue_total;
802 $cumulative_percent += $percent;
803 printf($format,
804 $printed_one? " " : "Under",
805 format_time($queue_times[$i]),
806 $$array[$i], $percent, $cumulative_percent);
807 if (!defined($queue_times[$i])) {
808 print "Not defined";
809 }
810 push(@chartdatanames,
811 ($printed_one? "" : "Under") . format_time($queue_times[$i]));
812 push(@chartdatavals, $$array[$i]);
813 $printed_one = 1;
814 }
815}
816
817if ($queue_more_than > 0) {
818 my $percent = ($queue_more_than * 100)/$queue_total;
819 $cumulative_percent += $percent;
820 printf($format,
821 "Over ",
822 format_time($queue_times[$#queue_times]),
823 $queue_more_than, $percent, $cumulative_percent);
824}
825push(@chartdatanames, "Over " . format_time($queue_times[$#queue_times]));
826push(@chartdatavals, $queue_more_than);
827
828#printf("Unknown %6d\n", $queue_unknown) if $queue_unknown > 0;
829if ($html) {
830 print "</table>\n";
831 print "</td><td>\n";
832
833 if ($HAVE_GD_Graph_pie && $charts) {
834 my @data = (
835 \@chartdatanames,
836 \@chartdatavals
837 );
838 my $graph = GD::Graph::pie->new(200, 200);
839 my $pngname;
840 my $title;
841 if ($string =~ /all/) { $pngname = "queue_all.png"; $title = "Queue (all)"; }
842 if ($string =~ /remote/) { $pngname = "queue_rem.png"; $title = "Queue (remote)"; }
843 $graph->set(
844 title => $title,
845 );
846 my $gd = $graph->plot(\@data) or warn($graph->error);
847 if ($gd) {
848 open(IMG, ">$chartdir/$pngname") or die $!;
849 binmode IMG;
850 print IMG $gd->png;
851 close IMG;
852 print "<img src=\"$chartrel/$pngname\">";
853 }
854 }
855 print "</td></tr></table>\n";
856}
857print "\n";
858}
859
860
861
1b4fe9dd
PH
862#######################################################################
863# print_histogram();
864#
865# print_histogram('Deliverieds|Messages received',@interval_count);
866#
867# Print a histogram of the messages delivered/received per time slot
868# (hour by default).
869#######################################################################
059ec3d9
PH
870sub print_histogram {
871my($text) = shift;
872my(@interval_count) = @_;
873my(@chartdatanames);
874my(@chartdatavals);
875my($maxd) = 0;
876for ($i = 0; $i < $hist_number; $i++)
877 { $maxd = $interval_count[$i] if $interval_count[$i] > $maxd; }
878
879my $scale = int(($maxd + 25)/50);
880$scale = 1 if $scale == 0;
881
882my($type);
883if ($text eq "Deliveries")
884 {
885 $type = ($scale == 1)? "delivery" : "deliveries";
886 }
887else
888 {
889 $type = ($scale == 1)? "message" : "messages";
890 }
891
892my($title) = sprintf("$text per %s (each dot is $scale $type)",
893 ($hist_interval == 60)? "hour" :
894 ($hist_interval == 1)? "minute" : "$hist_interval minutes");
895
896if ($html) {
897 print "<hr><a name=\"$text\"></a><h2>$title</h2>\n";
898 print "<table border=0 width=\"100%\">\n";
899 print "<tr><td><pre>\n";
900}
901else {
902 printf("%s\n%s\n\n", $title, "-" x length($title));
903}
904
905my $hour = 0;
906my $minutes = 0;
907for ($i = 0; $i < $hist_number; $i++)
908 {
909 my $c = $interval_count[$i];
910
911 # If the interval is an hour (the maximum) print the starting and
912 # ending hours as a label. Otherwise print the starting hour and
913 # minutes, which take up the same space.
914
915 my $temp;
916 if ($hist_opt == 1)
917 {
918 $temp = sprintf("%02d-%02d", $hour, $hour + 1);
919 print $temp;
920 push(@chartdatanames, $temp);
921 $hour++;
922 }
923 else
924 {
925 if ($minutes == 0)
926 { $temp = sprintf("%02d:%02d", $hour, $minutes) }
927 else
928 { $temp = sprintf(" :%02d", $minutes) }
929 print $temp;
930 push(@chartdatanames, $temp);
931 $minutes += $hist_interval;
932 if ($minutes >= 60)
933 {
934 $minutes = 0;
935 $hour++;
936 }
937 }
938 push(@chartdatavals, $c);
939 printf(" %6d %s\n", $c, "." x ($c/$scale));
940 }
941print "\n";
942if ($html)
943 {
944 print "</pre>\n";
945 print "</td><td>\n";
946 if ($HAVE_GD_Graph_linespoints && $charts) {
947 # calculate the graph
948 my @data = (
949 \@chartdatanames,
950 \@chartdatavals
951 );
952 my $graph = GD::Graph::linespoints->new(300, 300);
953 $graph->set(
954 x_label => 'Time',
955 y_label => 'Amount',
956 title => $text,
957 x_labels_vertical => 1
958 );
959 my($pngname);
960 if ($text =~ /Deliveries/) { $pngname = "histogram_del.png"; }
961 if ($text =~ /Messages/) { $pngname = "histogram_mes.png"; }
962 my $gd = $graph->plot(\@data) or warn($graph->error);
963 if ($gd) {
964 open(IMG, ">$chartdir/$pngname") or die $!;
965 binmode IMG;
966 print IMG $gd->png;
967 close IMG;
968 print "<img src=\"$chartrel/$pngname\">";
969 }
970 }
971 print "</td></tr></table>\n";
972}
973}
974
975
976
1b4fe9dd
PH
977#######################################################################
978# print_league_table();
979#
980# print_league_table($league_table_type,\%message_count,\%message_data,\%message_data_gigs);
981#
982# Given hashes of message count and message data, which are keyed by
983# the table type (eg by the sending host), print a league table
984# showing the top $topcount (defaults to 50).
985#######################################################################
059ec3d9
PH
986sub print_league_table {
987my($text,$m_count,$m_data,$m_data_gigs) = @_;
988my($name) = ($topcount == 1)? "$text" : "$topcount ${text}s";
989my($temp) = "Top $name by message count";
990my(@chartdatanames) = ();
991my(@chartdatavals) = ();
992my $chartotherval = 0;
993
994my($format);
995if ($html) {
996 print "<hr><a name=\"$text count\"></a><h2>$temp</h2>\n";
997 print "<table border=0 width=\"100%\">\n";
998 print "<tr><td>\n";
999 print "<table border=1>\n";
1000 print "<tr><th>Messages</th><th>Bytes</th><th>\u$text</th>\n";
1001
1002 # Align non-local addresses to the right (so all the .com's line up).
1003 # Local addresses are aligned on the left as they are userids.
1004 my $align = ($text !~ /local/i) ? 'right' : 'left';
1005 $format = "<tr><td align=\"right\">%d</td><td align=\"right\">%s</td><td align=\"$align\" nowrap>%s</td>\n";
1006}
1007else {
1008 printf("%s\n%s\n\n", $temp, "-" x length($temp));
1009 $format = "%7d %10s %s\n";
1010}
1011
1012my($key,$htmlkey);
1013foreach $key (top_n_sort($topcount,$m_count,$m_data_gigs,$m_data)) {
1014 if ($html) {
1015 $htmlkey = $key;
1016 $htmlkey =~ s/>/\&gt\;/g;
1017 $htmlkey =~ s/</\&lt\;/g;
1018 printf($format, $$m_count{$key}, volume_rounded($$m_data{$key},$$m_data_gigs{$key}), $htmlkey);
1019 }
1020 else {
1021 printf($format, $$m_count{$key}, volume_rounded($$m_data{$key},$$m_data_gigs{$key}), $key);
1022 }
1023 if (scalar @chartdatanames < $ntopchart)
1024 {
1025 push(@chartdatanames, $key);
1026 push(@chartdatavals, $$m_count{$key});
1027 }
1028 else
1029 {
1030 $chartotherval += $$m_count{$key};
1031 }
1032 }
1033push(@chartdatanames, "Other");
1034push(@chartdatavals, $chartotherval);
1035
1036if ($html)
1037 {
1038 print "</table>\n";
1039 print "</td><td>\n";
1040 if ($HAVE_GD_Graph_pie && $charts)
1041 {
1042 # calculate the graph
1043 my @data = (
1044 \@chartdatanames,
1045 \@chartdatavals
1046 );
1047 my $graph = GD::Graph::pie->new(300, 300);
1048 $graph->set(
1049 x_label => 'Name',
1050 y_label => 'Amount',
1051 title => 'By count',
1052 );
1053 my $gd = $graph->plot(\@data) or warn($graph->error);
1054 if ($gd) {
1055 my $temp = $text;
1056 $temp =~ s/ /_/g;
1057 open(IMG, ">$chartdir/${temp}_count.png") or die $!;
1058 binmode IMG;
1059 print IMG $gd->png;
1060 close IMG;
1061 print "<img src=\"$chartrel/${temp}_count.png\">";
1062 }
1063 }
1064 print "</td><td>\n";
1065 print "</td></tr></table>\n";
1066}
1067print "\n";
1068
1069$temp = "Top $name by volume";
1070if ($html) {
1071 print "<hr><a name=\"$text volume\"></a><h2>$temp</h2>\n";
1072 print "<table border=0 width=\"100%\">\n";
1073 print "<tr><td>\n";
1074 print "<table border=1>\n";
1075 print "<tr><th>Messages</th><th>Bytes</th><th>\u$text</th>\n";
1076}
1077else {
1078 printf("%s\n%s\n\n", $temp, "-" x length($temp));
1079}
1080
1081@chartdatanames = ();
1082@chartdatavals = ();
1083$chartotherval = 0;
1084foreach $key (top_n_sort($topcount,$m_data_gigs,$m_data,$m_count)) {
1085 if ($html) {
1086 $htmlkey = $key;
1087 $htmlkey =~ s/>/\&gt\;/g;
1088 $htmlkey =~ s/</\&lt\;/g;
1089 printf($format, $$m_count{$key}, volume_rounded($$m_data{$key},$$m_data_gigs{$key}), $htmlkey);
1090 }
1091 else {
1092 printf($format, $$m_count{$key}, volume_rounded($$m_data{$key},$$m_data_gigs{$key}), $key);
1093 }
1094
1095 if (scalar @chartdatanames < $ntopchart)
1096 {
1097 push(@chartdatanames, $key);
1098 push(@chartdatavals, $$m_count{$key});
1099 }
1100 else
1101 {
1102 $chartotherval += $$m_count{$key};
1103 }
1104 }
1105push(@chartdatanames, "Other");
1106push(@chartdatavals, $chartotherval);
1107
1108if ($html) {
1109 print "</table>\n";
1110 print "</td><td>\n";
1111 if ($HAVE_GD_Graph_pie && $charts) {
1112 # calculate the graph
1113 my @data = (
1114 \@chartdatanames,
1115 \@chartdatavals
1116 );
1117 my $graph = GD::Graph::pie->new(300, 300);
1118 $graph->set(
1119 x_label => 'Name',
1120 y_label => 'Volume',
1121 title => 'By Volume',
1122 );
1123 my $gd = $graph->plot(\@data) or warn($graph->error);
1124 if ($gd) {
1125 my $temp = $text;
1126 $temp =~ s/ /_/g;
1127 open(IMG, ">$chartdir/${temp}_volume.png") or die $!;
1128 binmode IMG;
1129 print IMG $gd->png;
1130 close IMG;
1131 print "<img src=\"$chartrel/${temp}_volume.png\">";
1132 }
1133 }
1134 print "</td><td>\n";
1135 print "</td></tr></table>\n";
1136}
1137
1138print "\n";
1139}
1140
1141
1b4fe9dd
PH
1142#######################################################################
1143# top_n_sort();
1144#
1145# @sorted_keys = top_n_sort($n,$href1,$href2,$href3);
1146#
1147# Given a hash which has numerical values, return the sorted $n keys which
1148# point to the top values. The second and third hashes are used as
1149# tiebreakers. They all must have the same keys.
1150#
1151# The idea behind this routine is that when you only want to see the
1152# top n members of a set, rather than sorting the entire set and then
1153# plucking off the top n, sort through the stack as you go, discarding
1154# any member which is lower than your current n'th highest member.
1155#
1156# This proves to be an order of magnitude faster for large hashes.
1157# On 200,000 lines of mainlog it benchmarked 9 times faster.
1158# On 700,000 lines of mainlog it benchmarked 13.8 times faster.
1159#
1160# We assume the values are > 0.
1161#######################################################################
059ec3d9
PH
1162sub top_n_sort {
1163 my($n,$href1,$href2,$href3) = @_;
1164
1165 # PH's original sort was:
1166 #
1167 # foreach $key (sort
1168 # {
1169 # $$m_count{$b} <=> $$m_count{$a} ||
1170 # $$m_data_gigs{$b} <=> $$m_data_gigs{$a} ||
1171 # $$m_data{$b} <=> $$m_data{$a} ||
1172 # $a cmp $b
1173 # }
1174 # keys %{$m_count})
1175 #
1176
1177 #We use a key of '_' to represent non-existant values, as null keys are valid.
1178 #'_' is not a valid domain, edomain, host, or email.
1179 my(@top_n_keys) = ('_') x $n;
1180 my($minimum_value1,$minimum_value2,$minimum_value3) = (0,0,0);
1181 my $top_n_key = '';
1182 my $n_minus_1 = $n - 1;
1183 my $n_minus_2 = $n - 2;
1184
1b4fe9dd 1185 # Pick out the top $n keys.
059ec3d9
PH
1186 my($key,$value1,$value2,$value3,$i,$comparison,$insert_position);
1187 while (($key,$value1) = each %$href1) {
1188
1189 #print STDERR "key $key ($value1,",$href2->{$key},",",$href3->{$key},") <=> ($minimum_value1,$minimum_value2,$minimum_value3)\n";
1b4fe9dd 1190
059ec3d9
PH
1191 # Check to see that the new value is bigger than the lowest of the
1192 # top n keys that we're keeping.
1b4fe9dd 1193 $comparison = $value1 <=> $minimum_value1 ||
059ec3d9
PH
1194 $href2->{$key} <=> $minimum_value2 ||
1195 $href3->{$key} <=> $minimum_value3 ||
1196 $top_n_key cmp $key;
1197 next unless ($comparison == 1);
1198
1199 # As we will be using these values a few times, extract them into scalars.
1200 $value2 = $href2->{$key};
1201 $value3 = $href3->{$key};
1202
1203 # This key is bigger than the bottom n key, so the lowest position we
1204 # will insert it into is $n minus 1 (the bottom of the list).
1205 $insert_position = $n_minus_1;
1206
1207 # Now go through the list, stopping when we find a key that we're
1208 # bigger than, or we come to the penultimate position - we've
1209 # already tested bigger than the last.
1210 #
1211 # Note: we go top down as the list starts off empty.
1212 # Note: stepping through the list in this way benchmarks nearly
1213 # three times faster than doing a sort() on the reduced list.
1214 # I assume this is because the list is already in order, and
1215 # we get a performance boost from not having to do hash lookups
1216 # on the new key.
1217 for ($i = 0; $i < $n_minus_1; $i++) {
1218 $top_n_key = $top_n_keys[$i];
1219 if ( ($top_n_key eq '_') ||
1b4fe9dd 1220 ( ($value1 <=> $href1->{$top_n_key} ||
059ec3d9
PH
1221 $value2 <=> $href2->{$top_n_key} ||
1222 $value3 <=> $href3->{$top_n_key} ||
1223 $top_n_key cmp $key) == 1
1224 )
1225 ) {
1226 $insert_position = $i;
1227 last;
1228 }
1229 }
1230
1231 # Remove the last element, then insert the new one.
1232 $#top_n_keys = $n_minus_2;
1233 splice(@top_n_keys,$insert_position,0,$key);
1234
1235 # Extract our new minimum values.
1236 $top_n_key = $top_n_keys[$n_minus_1];
1237 if ($top_n_key ne '_') {
1238 $minimum_value1 = $href1->{$top_n_key};
1239 $minimum_value2 = $href2->{$top_n_key};
1240 $minimum_value3 = $href3->{$top_n_key};
1241 }
1242 }
1243
1244 # Return the top n list, grepping out non-existant values, just in case
1245 # we didn't have that many values.
1246 return(grep(!/^_$/,@top_n_keys));
1247}
1248
1249
1b4fe9dd
PH
1250#######################################################################
1251# html_header();
1252#
1253# $header = html_header($title);
1254#
1255# Print our HTML header and start the <body> block.
1256#######################################################################
059ec3d9
PH
1257sub html_header {
1258 my($title) = @_;
1259 my $text = << "EoText";
1260<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
1261<html>
1262<head>
1263<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-15">
1264<title>$title</title>
1265</head>
1266<body bgcolor="white">
1267<h1>$title</h1>
1268EoText
1269 return $text;
1270}
1271
1272
1273
1b4fe9dd
PH
1274#######################################################################
1275# help();
1276#
1277# help();
1278#
1279# Display usage instructions and exit.
1280#######################################################################
059ec3d9
PH
1281sub help {
1282 print << "EoText";
1283
1284eximstats Version $VERSION
1285
1286Usage: eximstats [Options] mainlog1 mainlog2 ... > report.txt
1287 eximstats -html [Options] mainlog1 mainlog2 ... > report.html
1288 eximstats -merge [Options] report.1.txt report.2.txt ... > weekly_rep.txt
1289 eximstats -merge -html [Options] report.1.html ... > weekly_rep.html
1290
1291Parses exim mainlog files and generates a statistical analysis of
1292the messages processed. Valid options are:
1293
1294-h<number> histogram divisions per hour. The default is 1, and
1295 0 suppresses histograms. Other valid values are:
1296 2, 3, 5, 10, 15, 20, 30 or 60.
1297-ne don't display error information
1298-nr don't display relaying information
1299-nr/pattern/ don't display relaying information that matches
1300-nt don't display transport information
1301-nt/pattern/ don't display transport information that matches
1302-nvr don't do volume rounding. Display in bytes, not KB/MB/GB.
1303-q<list> list of times for queuing information
1304 single 0 item suppresses
1305-t<number> display top <number> sources/destinations
1306 default is 50, 0 suppresses top listing
1307-tnl omit local sources/destinations in top listing
1308-t_remote_users show top user sources/destinations from non-local domains
1309
1310-byhost show results by sending host (default unless bydomain or
1311 byemail is specified)
1312-bydomain show results by sending domain.
1313-byemail show results by sender's email address
1314-byedomain show results by sender's email domain
1315
1316-pattern "Description" /pattern/
1317 Count lines matching specified patterns and show them in
1318 the results. It can be specified multiple times. Eg:
1319 -pattern 'Refused connections' '/refused connection/'
1320
1321-merge merge previously generated reports into a new report
1322
1323-html output the results in HTML
1324-charts Create charts (this requires the GD::Graph modules)
1325-chartdir <dir> Create the charts' png files in the directory <dir>
1326-chartrel <dir> Specify the relative directory for the "img src=" tags
1327 from where to include the charts in the html file
1328 -chartdir and -chartrel default to '.'
1329
1330-d Debug mode - dump the eval'ed parser onto STDERR.
1331
1332EoText
1333
1334 exit 1;
1335}
1336
1337
1338
1b4fe9dd
PH
1339#######################################################################
1340# generate_parser();
1341#
1342# $parser = generate_parser();
1343#
1344# This subroutine generates the parsing routine which will be
1345# used to parse the mainlog. We take the base operation, and remove bits not in use.
1346# This improves performance depending on what bits you take out or add.
1347#
1348# I've tested using study(), but this does not improve performance.
1349#
1350# We store our parsing routing in a variable, and process it looking for #IFDEF (Expression)
1351# or #IFNDEF (Expression) statements and corresponding #ENDIF (Expression) statements. If
1352# the expression evaluates to true, then it is included/excluded accordingly.
1353#######################################################################
059ec3d9
PH
1354sub generate_parser {
1355 my $parser = '
1356 my($ip,$host,$email,$edomain,$domain,$thissize,$size,$old,$new);
1357 my($tod,$m_hour,$m_min,$id,$flag);
1358 while (<$fh>) {
1359 next if length($_) < 38;
1360
1361 # PH/FANF
1362 # next unless /^(\\d{4}\\-\\d\\d-\\d\\d\\s(\\d\\d):(\\d\\d):\\d\\d)/;
1363 next unless /^(\\d{4}\\-\\d\\d-\\d\\d\\s(\\d\\d):(\\d\\d):\\d\\d( [-+]\\d\\d\\d\\d)?)/o;
1364
1365 ($tod,$m_hour,$m_min) = ($1,$2,$3);
1366
1367 # PH
1368 my($extra) = defined($4)? 6 : 0;
1369 $id = substr($_, 20 + $extra, 16);
1370 $flag = substr($_, 37 + $extra, 2);
1371';
1372
1373 # Watch for user specified patterns.
1374 my $user_pattern_index = 0;
1375 foreach (@user_patterns) {
1376 $user_pattern_totals[$user_pattern_index] = 0;
1377 $parser .= " \$user_pattern_totals[$user_pattern_index]++ if $_;\n";
1378 $user_pattern_index++;
1379 }
1380
1381 $parser .= '
1382 next unless ($flag =~ /<=|=>|->|==|\\*\\*|Co/);
1383
1384 #Strip away the timestamp, ID and flag (which could be "Com" for completed)
1385 #This speeds up the later pattern matches.
1386 # $_ = substr($_, 40);
1387
1388 $_ = substr($_, 40 + $extra); # PH
1389
1390 # JN - Skip over certain transports as specified via the "-nt/.../" command
1391 # line switch (where ... is a perl style regular expression). This is
1392 # required so that transports that skew stats such as SpamAssassin can be
1393 # ignored.
1394 #IFDEF ($transport_pattern)
1395 if (/\\sT=(\\S+)/) {
1396 next if ($1 =~ /$transport_pattern/o) ;
1397 }
1398 #ENDIF ($transport_pattern)
1399
1400
1401 $host = "local"; #Host is local unless otherwise specified.
1402 $domain = "localdomain"; #Domain is localdomain unless otherwise specified.
1403
1404
1405 # Do some pattern matches to get the host and IP address.
1406 # We expect lines to be of the form "H=[IpAddr]" or "H=Host [IpAddr]" or
1407 # "H=Host (UnverifiedHost) [IpAddr]" or "H=(UnverifiedHost) [IpAddr]".
1408 # We do 2 separate matches to keep the matches simple and fast.
1409 if (/\\sH=(\\S+)/) {
1410 $host = $1;
1411
1412 ($ip) = /\\sH=.*?(\\s\\[[^]]+\\])/;
1413 # If there is only an IP address, it will be in $host and $ip will be
1414 # unset. That is OK, because we only use $ip in conjunction with $host
1415 # below. But make it empty to avoid warning messages.
1416 $ip = "" if !defined $ip;
1417
1418 #IFDEF ($do_sender{Domain})
1419 if ($host !~ /^\\[/ && $host =~ /^(\\(?)[^\\.]+\\.([^\\.]+\\..*)/) {
1420 # Remove the host portion from the DNS name. We ensure that we end up with
1421 # at least xxx.yyy. $host can be "(x.y.z)" or "x.y.z".
1422 $domain = lc("$1.$2");
1423 $domain =~ s/^\\.//; #Remove preceding dot.
1424 }
1425 #ENDIF ($do_sender{Domain})
1426
1427 }
1428
1429 #IFDEF ($do_sender{Email})
1430 $email = (/^(\S+)/) ? $1 : "";
1431 #ENDIF ($do_sender{Email})
1432
1433 #IFDEF ($do_sender{Edomain})
1434 $edomain = (/^\S*?\\@(\S+)/) ? lc($1) : "";
1435 #ENDIF ($do_sender{Edomain})
1436
1437 if ($tod lt $begin) {
1438 $begin = $tod;
1439 }
1440 elsif ($tod gt $end) {
1441 $end = $tod;
1442 }
1443
1444
1445 if ($flag eq "<=") {
1446 $thissize = (/\\sS=(\\d+)( |$)/) ? $1 : 0;
1447 $size{$id} = $thissize;
1448
1449 #IFDEF ($show_relay)
1450 if ($host ne "local") {
1451 # Save incoming information in case it becomes interesting
1452 # later, when delivery lines are read.
1453 my($from) = /^(\\S+)/;
1454 $from_host{$id} = "$host$ip";
1455 $from_address{$id} = $from;
1456 }
1457 #ENDIF ($show_relay)
1458
1459 #IFDEF ($local_league_table || $include_remote_users)
1460 if (/\sU=(\\S+)/) {
1461 my $user = $1;
1462
1463 #IFDEF ($local_league_table && $include_remote_users)
1464 { #Store both local and remote users.
1465 #ENDIF ($local_league_table && $include_remote_users)
1466
1467 #IFDEF ($local_league_table && ! $include_remote_users)
1468 if ($host eq "local") { #Store local users only.
1469 #ENDIF ($local_league_table && ! $include_remote_users)
1470
1471 #IFDEF ($include_remote_users && ! $local_league_table)
1472 if ($host ne "local") { #Store remote users only.
1473 #ENDIF ($include_remote_users && ! $local_league_table)
1474
1475 $received_count_user{$user}++;
1476 add_volume(\\$received_data_user{$user},\\$received_data_gigs_user{$user},$thissize);
1477 }
1478 }
1479 #ENDIF ($local_league_table || $include_remote_users)
1480
1481 #IFDEF ($do_sender{Host})
1482 $received_count{Host}{$host}++;
1483 add_volume(\\$received_data{Host}{$host},\\$received_data_gigs{Host}{$host},$thissize);
1484 #ENDIF ($do_sender{Host})
1485
1486 #IFDEF ($do_sender{Domain})
1487 if ($domain) {
1488 $received_count{Domain}{$domain}++;
1489 add_volume(\\$received_data{Domain}{$domain},\\$received_data_gigs{Domain}{$domain},$thissize);
1490 }
1491 #ENDIF ($do_sender{Domain})
1492
1493 #IFDEF ($do_sender{Email})
1494 $received_count{Email}{$email}++;
1495 add_volume(\\$received_data{Email}{$email},\\$received_data_gigs{Email}{$email},$thissize);
1496 #ENDIF ($do_sender{Email})
1497
1498 #IFDEF ($do_sender{Edomain})
1499 $received_count{Edomain}{$edomain}++;
1500 add_volume(\\$received_data{Edomain}{$edomain},\\$received_data_gigs{Edomain}{$edomain},$thissize);
1501 #ENDIF ($do_sender{Edomain})
1502
1503 $total_received_count++;
1504 add_volume(\\$total_received_data,\\$total_received_data_gigs,$thissize);
1505
1506 #IFDEF ($#queue_times >= 0)
1507 $arrival_time{$id} = $tod;
1508 #ENDIF ($#queue_times >= 0)
1509
1510 #IFDEF ($hist_opt > 0)
1511 $received_interval_count[($m_hour*60 + $m_min)/$hist_interval]++;
1512 #ENDIF ($hist_opt > 0)
1513 }
1514
1515 elsif ($flag eq "=>") {
1516 $size = $size{$id} || 0;
1517 if ($host ne "local") {
1518 $remote_delivered{$id} = 1;
1519
1520
1521 #IFDEF ($show_relay)
1522 # Determine relaying address if either only one address listed,
1523 # or two the same. If they are different, it implies a forwarding
1524 # or aliasing, which is not relaying. Note that for multi-aliased
1525 # addresses, there may be a further address between the first
1526 # and last.
1527
1528 if (defined $from_host{$id}) {
1529 if (/^(\\S+)(?:\\s+\\([^)]\\))?\\s+<([^>]+)>/) {
1530 ($old,$new) = ($1,$2);
1531 }
1532 else {
1533 $old = $new = "";
1534 }
1535
1536 if ("\\L$new" eq "\\L$old") {
1537 ($old) = /^(\\S+)/ if $old eq "";
1538 my $key = "H=\\L$from_host{$id}\\E A=\\L$from_address{$id}\\E => " .
1539 "H=\\L$host\\E$ip A=\\L$old\\E";
1540 if (!defined $relay_pattern || $key !~ /$relay_pattern/o) {
1541 $relayed{$key} = 0 if !defined $relayed{$key};
1542 $relayed{$key}++;
1543 }
1544 else {
1545 $relayed_unshown++
1546 }
1547 }
1548 }
1549 #ENDIF ($show_relay)
1550
1551 }
1552
1553 #IFDEF ($local_league_table || $include_remote_users)
1554 #IFDEF ($local_league_table && $include_remote_users)
1555 { #Store both local and remote users.
1556 #ENDIF ($local_league_table && $include_remote_users)
1557
1558 #IFDEF ($local_league_table && ! $include_remote_users)
1559 if ($host eq "local") { #Store local users only.
1560 #ENDIF ($local_league_table && ! $include_remote_users)
1561
1562 #IFDEF ($include_remote_users && ! $local_league_table)
1563 if ($host ne "local") { #Store remote users only.
1564 #ENDIF ($include_remote_users && ! $local_league_table)
1565
1566 if (my($user) = split((/\\s</)? " <" : " ", $_)) {
1567 if ($user =~ /^[\\/|]/) {
1568 my($parent) = $_ =~ /(<[^@]+@?[^>]*>)/;
1569 $user = "$user $parent" if defined $parent;
1570 }
1571 $delivered_count_user{$user}++;
1572 add_volume(\\$delivered_data_user{$user},\\$delivered_data_gigs_user{$user},$size);
1573 }
1574 }
1575 #ENDIF ($local_league_table || $include_remote_users)
1576
1577 #IFDEF ($do_sender{Host})
1578 $delivered_count{Host}{$host}++;
1579 add_volume(\\$delivered_data{Host}{$host},\\$delivered_data_gigs{Host}{$host},$size);
1580 #ENDIF ($do_sender{Host})
1581 #IFDEF ($do_sender{Domain})
1582 if ($domain) {
1583 $delivered_count{Domain}{$domain}++;
1584 add_volume(\\$delivered_data{Domain}{$domain},\\$delivered_data_gigs{Domain}{$domain},$size);
1585 }
1586 #ENDIF ($do_sender{Domain})
1587 #IFDEF ($do_sender{Email})
1588 $delivered_count{Email}{$email}++;
1589 add_volume(\\$delivered_data{Email}{$email},\\$delivered_data_gigs{Email}{$email},$size);
1590 #ENDIF ($do_sender{Email})
1591 #IFDEF ($do_sender{Edomain})
1592 $delivered_count{Edomain}{$edomain}++;
1593 add_volume(\\$delivered_data{Edomain}{$edomain},\\$delivered_data_gigs{Edomain}{$edomain},$size);
1594 #ENDIF ($do_sender{Edomain})
1595
1596 $total_delivered_count++;
1597 add_volume(\\$total_delivered_data,\\$total_delivered_data_gigs,$size);
1598
1599 #IFDEF ($show_transport)
1600 my $transport = (/\\sT=(\\S+)/) ? $1 : ":blackhole:";
1601 $transported_count{$transport}++;
1602 add_volume(\\$transported_data{$transport},\\$transported_data_gigs{$transport},$size);
1603 #ENDIF ($show_transport)
1604
1605 #IFDEF ($hist_opt > 0)
1606 $delivered_interval_count[($m_hour*60 + $m_min)/$hist_interval]++;
1607 #ENDIF ($hist_opt > 0)
1608
1609 }
1610
1611 elsif ($flag eq "==" && defined($size{$id}) && !defined($delayed{$id})) {
1612 $delayed_count++;
1613 $delayed{$id} = 1;
1614 }
1615
1616 elsif ($flag eq "**") {
1617 $had_error{$id} = 1 if defined ($size{$id});
1618
1619 #IFDEF ($show_errors)
1620 $errors_count{$_}++;
1621 #ENDIF ($show_errors)
1622
1623 }
1624
1625 elsif ($flag eq "Co") {
1626 #Completed?
1627 #IFDEF ($#queue_times >= 0)
1628 #Note: id_seconds() benchmarks as 42% slower than seconds() and computing
1629 #the time accounts for a significant portion of the run time.
1630 my($queued);
1631 if (defined $arrival_time{$id}) {
1632 $queued = seconds($tod) - seconds($arrival_time{$id});
1633 delete($arrival_time{$id});
1634 }
1635 else {
1636 $queued = seconds($tod) - id_seconds($id);
1637 }
1638
1639 for ($i = 0; $i <= $#queue_times; $i++) {
1640 if ($queued < $queue_times[$i]) {
1641 $queue_bin[$i]++;
1642 $remote_queue_bin[$i]++ if $remote_delivered{$id};
1643 last;
1644 }
1645 }
1646 $queue_more_than++ if $i > $#queue_times;
1647 #ENDIF ($#queue_times >= 0)
1648
1649 #IFDEF ($show_relay)
1650 delete($from_host{$id});
1651 delete($from_address{$id});
1652 #ENDIF ($show_relay)
1653
1654 }
1655 }';
1656
1657 # We now do a 'C preprocessor style operation on our parser
1658 # to remove bits not in use.
1659 my(%defines_in_operation,$removing_lines,$processed_parser);
1660 foreach (split (/\n/,$parser)) {
1661 if ((/^\s*#\s*IFDEF\s*\((.*?)\)/i && ! eval $1) ||
1662 (/^\s*#\s*IFNDEF\s*\((.*?)\)/i && eval $1) ) {
1663 $defines_in_operation{$1} = 1;
1664 $removing_lines = 1;
1665 }
1666
1667 $processed_parser .= $_."\n" unless $removing_lines;
1668
1669 if (/^\s*#\s*ENDIF\s*\((.*?)\)/i) {
1670 delete $defines_in_operation{$1};
1671 unless (keys %defines_in_operation) {
1672 $removing_lines = 0;
1673 }
1674 }
1675 }
1676 print STDERR "# START OF PARSER:\n$processed_parser\n# END OF PARSER\n\n" if $debug;
1677
1678 return $processed_parser;
1679}
1680
1681
1682
1b4fe9dd
PH
1683#######################################################################
1684# parse();
1685#
1686# parse($parser,\*FILEHANDLE);
1687#
1688# This subroutine accepts a parser and a filehandle from main and parses each
1689# line. We store the results into global variables.
1690#######################################################################
059ec3d9
PH
1691sub parse {
1692 my($parser,$fh) = @_;
1693
1694 if ($merge_reports) {
1695 parse_old_eximstat_reports($fh);
1696 }
1697 else {
1698 eval $parser;
1699 die ($@) if $@;
1700 }
1701
1702}
1703
1704
1705
1b4fe9dd
PH
1706#######################################################################
1707# print_header();
1708#
1709# print_header();
1710#
1711# Print our headers and contents.
1712#######################################################################
059ec3d9
PH
1713sub print_header {
1714
1715 my $title = "Exim statistics from $begin to $end";
1716
1717 if ($html) {
1718 print html_header($title);
1719 print "<ul>\n";
1720 print "<li><a href=\"#grandtotal\">Grand total summary</a>\n";
1721 print "<li><a href=\"#patterns\">User Specified Patterns</a>\n" if @user_patterns;
1722 print "<li><a href=\"#transport\">Deliveries by Transport</a>\n" if $show_transport;
1723 if ($hist_opt) {
1724 print "<li><a href=\"#Messages received\">Messages received per hour</a>\n";
1725 print "<li><a href=\"#Deliveries\">Deliveries per hour</a>\n";
1726 }
1727 if ($#queue_times >= 0) {
1728 print "<li><a href=\"#all messages time\">Time spent on the queue: all messages</a>\n";
1729 print "<li><a href=\"#messages with at least one remote delivery time\">Time spent on the queue: messages with at least one remote delivery</a>\n";
1730 }
1731 print "<li><a href=\"#Relayed messages\">Relayed messages</a>\n" if $show_relay;
1732 if ($topcount) {
1733 foreach ('Host','Domain','Email','Edomain') {
1734 next unless $do_sender{$_};
1735 print "<li><a href=\"#sending \l$_ count\">Top $topcount sending \l${_}s by message count</a>\n";
1736 print "<li><a href=\"#sending \l$_ volume\">Top $topcount sending \l${_}s by volume</a>\n";
1737 }
1738 if ($local_league_table || $include_remote_users) {
1739 print "<li><a href=\"#local sender count\">Top $topcount local senders by message count</a>\n";
1740 print "<li><a href=\"#local sender volume\">Top $topcount local senders by volume</a>\n";
1741 }
1742 foreach ('Host','Domain','Email','Edomain') {
1743 next unless $do_sender{$_};
1744 print "<li><a href=\"#\l$_ destination count\">Top $topcount \l$_ destinations by message count</a>\n";
1745 print "<li><a href=\"#\l$_ destination volume\">Top $topcount \l$_ destinations by volume</a>\n";
1746 }
1747 if ($local_league_table || $include_remote_users) {
1748 print "<li><a href=\"#local destination count\">Top $topcount local destinations by message count</a>\n";
1749 print "<li><a href=\"#local destination volume\">Top $topcount local destinations by volume</a>\n";
1750 }
1751 }
1752 print "<li><a href=\"#errors\">List of errors</a>\n" if %errors_count;
1753 print "</ul>\n<hr>\n";
1754
1755 }
1756 else {
1757 print "\n$title\n";
1758 }
1759}
1760
1761
1b4fe9dd
PH
1762#######################################################################
1763# print_grandtotals();
1764#
1765# print_grandtotals();
1766#
1767# Print the grand totals.
1768#######################################################################
059ec3d9
PH
1769sub print_grandtotals {
1770
1771 # Get the sender by headings and results. This is complicated as we can have
1772 # different numbers of columns.
1773 my($sender_txt_header,$sender_html_header,$sender_txt_format,$sender_html_format);
1774 my(@received_totals,@delivered_totals);
1775 foreach ('Host','Domain','Email','Edomain') {
1776 next unless $do_sender{$_};
1777 if ($merge_reports) {
1778 push(@received_totals, get_report_total($report_totals{Received},"${_}s"));
1779 push(@delivered_totals,get_report_total($report_totals{Delivered},"${_}s"));
1780 }
1781 else {
1782 push(@received_totals,scalar(keys %{$received_data{$_}}));
1783 push(@delivered_totals,scalar(keys %{$delivered_data{$_}}));
1784 }
1785 $sender_html_header .= "<th>${_}s</th>";
1786 $sender_txt_header .= " " x ($COLUMN_WIDTHS - length($_)) . $_ . 's';
1787 $sender_html_format .= "<td align=\"right\">%d</td>";
1788 $sender_txt_format .= " " x ($COLUMN_WIDTHS - 5) . "%6d";
1789 }
1790
1791 my($format1,$format2);
1792 if ($html) {
1793 print << "EoText";
1794<a name="grandtotal"></a>
1795<h2>Grand total summary</h2>
1796<table border=1>
1797<tr><th>TOTAL</th><th>Volume</th><th>Messages</th>$sender_html_header<th colspan=2>At least one addr<br>Delayed</th><th colspan=2>At least one addr<br>Failed</th>
1798EoText
1799
1800 $format1 = "<tr><td>%s</td><td align=\"right\">%s</td>$sender_html_format<td align=\"right\">%d</td>";
1801 $format2 = "<td align=\"right\">%d</td><td align=\"right\">%4.1f%%</td><td align=\"right\">%d</td><td align=\"right\">%4.1f%%</td>";
1802 }
1803 else {
1804 my $sender_spaces = " " x length($sender_txt_header);
1805 print << "EoText";
1806
1807Grand total summary
1808-------------------
1809 $sender_spaces At least one address
1810 TOTAL Volume Messages $sender_txt_header Delayed Failed
1811EoText
1812 $format1 = " %-16s %9s %6d $sender_txt_format";
1813 $format2 = " %6d %4.1f%% %6d %4.1f%%",
1814 }
1815
1816 my($volume,$failed_count);
1817 if ($merge_reports) {
1818 $volume = volume_rounded($report_totals{Received}{Volume}, $report_totals{Received}{'Volume-gigs'});
1819 $total_received_count = get_report_total($report_totals{Received},'Messages');
1820 $failed_count = get_report_total($report_totals{Received},'Failed');
1821 $delayed_count = get_report_total($report_totals{Received},'Delayed');
1822 }
1823 else {
1824 $volume = volume_rounded($total_received_data, $total_received_data_gigs);
1825 $failed_count = keys %had_error;
1826 }
1827
1828 {
1829 no integer;
1830 printf("$format1$format2\n",'Received',$volume,$total_received_count,
1831 @received_totals,$delayed_count,
1832 ($total_received_count) ? ($delayed_count*100/$total_received_count) : 0,
1833 $failed_count,
1834 ($total_received_count) ? ($failed_count*100/$total_received_count) : 0);
1835 }
1836
1837 if ($merge_reports) {
1838 $volume = volume_rounded($report_totals{Delivered}{Volume}, $report_totals{Delivered}{'Volume-gigs'});
1839 $total_delivered_count = get_report_total($report_totals{Delivered},'Messages');
1840 }
1841 else {
1842 $volume = volume_rounded($total_delivered_data, $total_delivered_data_gigs);
1843 }
1844 printf("$format1\n\n",'Delivered',$volume,$total_delivered_count,@delivered_totals);
1845 print "</table>\n" if $html;
1846}
1847
1848
1b4fe9dd
PH
1849#######################################################################
1850# print_user_patterns()
1851#
1852# print_user_patterns();
1853#
1854# Print the counts of user specified patterns.
1855#######################################################################
059ec3d9
PH
1856sub print_user_patterns {
1857 my($format1);
1858
1859 if ($html) {
1860 print "<hr><a name=\"patterns\"></a><h2>User Specified Patterns</h2>\n";
1861 print "<table border=0 width=\"100%\">\n";
1862 print "<tr><td>\n";
1863 print "<table border=1>\n";
1864 print "<tr><th>&nbsp;</th><th>Total</th>\n";
1865 $format1 = "<tr><td>%s</td><td align=\"right\">%d</td>";
1866 }
1867 else {
1868 print "User Specified Patterns\n";
1869 print "-----------------------";
1870 print "\n Total\n";
1871 $format1 = " %-18s %6d";
1872 }
1873
1874 my($key);
1875 if ($merge_reports) {
1876 # We are getting our data from previous reports.
1877 foreach $key (@user_descriptions) {
1878 my $count = get_report_total($report_totals{patterns}{$key},'Total');
1879 printf("$format1\n",$key,$count);
1880 }
1881 }
1882 else {
1883 # We are getting our data from mainlog files.
1884 my $user_pattern_index = 0;
1885 foreach $key (@user_descriptions) {
1886 printf("$format1\n",$key,$user_pattern_totals[$user_pattern_index]);
1887 $user_pattern_index++;
1888 }
1889 }
1890 if ($html) {
1891 print "</table>\n";
1892 }
1893 print "\n";
1894}
1895
1896
1b4fe9dd
PH
1897#######################################################################
1898# print_transport();
1899#
1900# print_transport();
1901#
1902# Print totals by transport.
1903#######################################################################
059ec3d9
PH
1904sub print_transport {
1905 my($format1);
1906 my(@chartdatanames);
1907 my(@chartdatavals_count);
1908 my(@chartdatavals_vol);
1909 no integer; #Lose this for charting the data.
1910
1911 if ($html) {
1912 print "<hr><a name=\"transport\"></a><h2>Deliveries by Transport</h2>\n";
1913 print "<table border=0 width=\"100%\">\n";
1914 print "<tr><td>\n";
1915 print "<table border=1>\n";
1916 print "<tr><th>&nbsp;</th><th>Volume</th><th>Messages</th>\n";
1917 $format1 = "<tr><td>%s</td><td align=\"right\">%s</td><td align=\"right\">%d</td>";
1918 }
1919 else {
1920 print "Deliveries by transport\n";
1921 print "-----------------------";
1922 print "\n Volume Messages\n";
1923 $format1 = " %-18s %6s %6d";
1924 }
1925
1926 my($key);
1927 if ($merge_reports) {
1928 # We are getting our data from previous reports.
1929 foreach $key (sort keys %{$report_totals{transport}}) {
1930 my $count = get_report_total($report_totals{transport}{$key},'Messages');
1931 printf("$format1\n",$key,
1932 volume_rounded($report_totals{transport}{$key}{Volume},$report_totals{transport}{$key}{'Volume-gigs'}),
1933 $count);
1934 push(@chartdatanames, $key);
1935 push(@chartdatavals_count, $count);
1936 push(@chartdatavals_vol, $report_totals{transport}{$key}{'Volume-gigs'}*$gig + $report_totals{transport}{$key}{Volume} );
1937 }
1938 }
1939 else {
1940 # We are getting our data from mainlog files.
1941 foreach $key (sort keys %transported_data) {
1942 printf("$format1\n",$key,
1943 volume_rounded($transported_data{$key},$transported_data_gigs{$key}),
1944 $transported_count{$key});
1945 push(@chartdatanames, $key);
1946 push(@chartdatavals_count, $transported_count{$key});
1947 push(@chartdatavals_vol, $transported_data_gigs{$key}*$gig + $transported_data{$key});
1948 }
1949 }
1950 if ($html) {
1951 print "</table>\n";
1952 print "</td><td>\n";
1953 if ($HAVE_GD_Graph_pie && $charts)
1954 {
1955 # calculate the graph
1956 my @data = (
1957 \@chartdatanames,
1958 \@chartdatavals_count
1959 );
1960 my $graph = GD::Graph::pie->new(200, 200);
1961 $graph->set(
1962 x_label => 'Transport',
1963 y_label => 'Messages',
1964 title => 'By count',
1965 );
1966 my $gd = $graph->plot(\@data) or warn($graph->error);
1967 if ($gd) {
1968 open(IMG, ">$chartdir/transports_count.png") or die $!;
1969 binmode IMG;
1970 print IMG $gd->png;
1971 close IMG;
1972 print "<img src=\"$chartrel/transports_count.png\">";
1973 }
1974 }
1975 print "</td><td>\n";
1976
1977 if ($HAVE_GD_Graph_pie && $charts) {
1978 my @data = (
1979 \@chartdatanames,
1980 \@chartdatavals_vol
1981 );
1982 my $graph = GD::Graph::pie->new(200, 200);
1983 $graph->set(
1984 title => 'By volume',
1985 );
1986 my $gd = $graph->plot(\@data) or warn($graph->error);
1987 if ($gd) {
1988 open(IMG, ">$chartdir/transports_vol.png") or die $!;
1989 binmode IMG;
1990 print IMG $gd->png;
1991 close IMG;
1992 print "<img src=\"$chartrel/transports_vol.png\">";
1993 }
1994 }
1995 print "</td></tr></table>\n";
1996 }
1997 print "\n";
1998}
1999
2000
2001
1b4fe9dd
PH
2002#######################################################################
2003# print_relay();
2004#
2005# print_relay();
2006#
2007# Print our totals by relay.
2008#######################################################################
059ec3d9
PH
2009sub print_relay {
2010 my $temp = "Relayed messages";
2011 print "<hr><a name=\"$temp\"></a><h2>$temp</h2>\n" if $html;
2012 if (scalar(keys %relayed) > 0 || $relayed_unshown > 0) {
2013 my $shown = 0;
2014 my $spacing = "";
2015 my($format);
2016
2017 if ($html) {
2018 print "<table border=1>\n";
2019 print "<tr><th>Count</th><th>From</th><th>To</th>\n";
2020 $format = "<tr><td align=\"right\">%d</td><td>%s</td><td>%s</td>\n";
2021 }
2022 else {
2023 printf("%s\n%s\n\n", $temp, "-" x length($temp));
2024 $format = "%7d %s\n => %s\n";
2025 }
2026
2027 my($key);
2028 foreach $key (sort keys %relayed) {
2029 my $count = $relayed{$key};
2030 $shown += $count;
2031 $key =~ s/[HA]=//g;
2032 my($one,$two) = split(/=> /, $key);
2033 printf($format, $count, $one, $two);
2034 $spacing = "\n";
2035 }
2036 print "</table>\n<p>\n" if $html;
2037 print "${spacing}Total: $shown (plus $relayed_unshown unshown)\n";
2038 }
2039 else {
2040 print "No relayed messages\n";
2041 print "-------------------\n" unless $html;
2042 }
2043 print "\n";
2044}
2045
2046
2047
1b4fe9dd
PH
2048#######################################################################
2049# print_errors();
2050#
2051# print_errors();
2052#
2053# Print our errors. In HTML, we display them as a list rather than a table -
2054# Netscape doesn't like large tables!
2055#######################################################################
059ec3d9
PH
2056sub print_errors {
2057 my $total_errors = 0;
2058
2059 if (scalar(keys %errors_count) != 0) {
2060 my $temp = "List of errors";
2061 my($format);
2062 if ($html) {
2063 print "<hr><a name=\"errors\"></a><h2>$temp</h2>\n";
2064 print "<ul><li><b>Count - Error</b>\n";
2065 $format = "<li>%d - %s\n";
2066 }
2067 else {
2068 printf("%s\n%s\n\n", $temp, "-" x length($temp));
2069 }
2070
2071 my($key);
2072 foreach $key (sort keys %errors_count) {
2073 my $text = $key;
2074 chomp($text);
2075 $text =~ s/\s\s+/ /g; #Convert multiple spaces to a single space.
2076 $total_errors += $errors_count{$key};
2077 if ($html) {
1b4fe9dd 2078
059ec3d9
PH
2079 #Translate HTML tag characters. Sergey Sholokh.
2080 $text =~ s/\</\&lt\;/g;
2081 $text =~ s/\>/\&gt\;/g;
2082
2083 printf($format,$errors_count{$key},$text);
2084 }
2085 else {
2086 printf("%5d ", $errors_count{$key});
2087 while (length($text) > 65) {
2088 my($first,$rest) = $text =~ /(.{50}\S*)\s+(.+)/;
2089 last if !$first;
2090 printf("%s\n ", $first);
2091 $text = $rest;
2092 }
2093 printf("%s\n\n", $text);
2094 }
2095 }
2096 print "</ul>\n<p>\n" if $html;
2097
2098 $temp = "Errors encountered: $total_errors";
2099 print $temp,"\n";
2100 print "-" x length($temp),"\n" unless $html;
2101 }
2102
2103}
2104
2105
1b4fe9dd
PH
2106#######################################################################
2107# parse_old_eximstat_reports();
2108#
2109# parse_old_eximstat_reports($fh);
2110#
2111# Parse old eximstat output so we can merge daily stats to weekly stats and weekly to monthly etc.
2112#
2113# To test that the merging still works after changes, do something like the following.
2114# All the diffs should produce no output.
2115#
2116# options='-bydomain -byemail -byhost -byedomain'
2117# options="$options -pattern 'Completed Messages' /Completed/"
2118# options="$options -pattern 'Received Messages' /<=/"
2119#
2120# ./eximstats $options mainlog > mainlog.txt
2121# ./eximstats $options -merge mainlog.txt > mainlog.2.txt
2122# diff mainlog.txt mainlog.2.txt
2123#
2124# ./eximstats $options -html mainlog > mainlog.html
2125# ./eximstats $options -merge -html mainlog.txt > mainlog.2.html
2126# diff mainlog.html mainlog.2.html
2127#
2128# ./eximstats $options -merge mainlog.html > mainlog.3.txt
2129# diff mainlog.txt mainlog.3.txt
2130#
2131# ./eximstats $options -merge -html mainlog.html > mainlog.3.html
2132# diff mainlog.html mainlog.3.html
2133#
2134# ./eximstats $options -nvr mainlog > mainlog.nvr.txt
2135# ./eximstats $options -merge mainlog.nvr.txt > mainlog.4.txt
2136# diff mainlog.txt mainlog.4.txt
2137#
2138# # double_mainlog.txt should have twice the values that mainlog.txt has.
2139# ./eximstats $options mainlog mainlog > double_mainlog.txt
2140#######################################################################
059ec3d9
PH
2141sub parse_old_eximstat_reports {
2142 my($fh) = @_;
2143
2144 my(%league_table_value_entered, %league_table_value_was_zero, %table_order);
2145
2146 while (<$fh>) {
2147 if (/Exim statistics from ([\d\-]+ [\d:]+(\s+[\+\-]\d+)?) to ([\d\-]+ [\d:]+(\s+[\+\-]\d+)?)/) {
2148 $begin = $1 if ($1 lt $begin);
2149 $end = $3 if ($3 gt $end);
2150 }
2151 elsif (/Grand total summary/) {
2152 # Fill in $report_totals{Received|Delivered}{Volume|Messages|Hosts|Domains|...|Delayed|DelayedPercent|Failed|FailedPercent}
2153 my(@fields);
2154 while (<$fh>) {
2155 $_ = html2txt($_); #Convert general HTML markup to text.
2156 s/At least one addr//g; #Another part of the HTML output we don't want.
2157
2158# TOTAL Volume Messages Hosts Domains Delayed Failed
2159# Received 26MB 237 177 23 8 3.4% 28 11.8%
2160# Delivered 13MB 233 99 88
2161 if (/TOTAL\s+(.*?)\s*$/) {
2162 @fields = split(/\s+/,$1);
2163 #Delayed and Failed have two columns each, so add the extra field names in.
2164 splice(@fields,-1,1,'DelayedPercent','Failed','FailedPercent');
2165 }
2166 elsif (/(Received|Delivered)\s+(.*?)\s*$/) {
2167 print STDERR "Parsing $_" if $debug;
2168 add_to_totals($report_totals{$1},\@fields,$2);
2169 }
2170 last if (/Delivered/); #Last line of this section.
2171 }
2172 }
2173
2174 elsif (/User Specified Patterns/i) {
2175#User Specified Patterns
2176#-----------------------
2177# Total
2178# Description 85
2179
2180 while (<$fh>) { last if (/Total/); } #Wait until we get the table headers.
2181 while (<$fh>) {
2182 print STDERR "Parsing $_" if $debug;
2183 $_ = html2txt($_); #Convert general HTML markup to text.
2184 if (/^\s*(.*?)\s+(\d+)\s*$/) {
2185 $report_totals{patterns}{$1} = {} unless (defined $report_totals{patterns}{$1});
2186 add_to_totals($report_totals{patterns}{$1},['Total'],$2);
2187 }
2188 last if (/^\s*$/); #Finished if we have a blank line.
2189 }
2190 }
2191
2192 elsif (/Deliveries by transport/i) {
2193#Deliveries by transport
2194#-----------------------
2195# Volume Messages
2196# :blackhole: 70KB 51
2197# address_pipe 655KB 1
2198# smtp 11MB 151
2199
2200 while (<$fh>) { last if (/Volume/); } #Wait until we get the table headers.
2201 while (<$fh>) {
2202 print STDERR "Parsing $_" if $debug;
2203 $_ = html2txt($_); #Convert general HTML markup to text.
2204 if (/(\S+)\s+(\d+\S*\s+\d+)/) {
2205 $report_totals{transport}{$1} = {} unless (defined $report_totals{transport}{$1});
2206 add_to_totals($report_totals{transport}{$1},['Volume','Messages'],$2);
2207 }
2208 last if (/^\s*$/); #Finished if we have a blank line.
2209 }
2210 }
2211 elsif (/(Messages received|Deliveries) per/) {
2212# Messages received per hour (each dot is 2 messages)
2213#---------------------------------------------------
2214#
2215#00-01 106 .....................................................
2216#01-02 103 ...................................................
2217
2218 # Set a pointer to the interval array so we can use the same code
2219 # block for both messages received and delivered.
2220 my $interval_aref = ($1 eq 'Deliveries') ? \@delivered_interval_count : \@received_interval_count;
2221 my $reached_table = 0;
2222 while (<$fh>) {
2223 $reached_table = 1 if (/^00/);
2224 next unless $reached_table;
2225 print STDERR "Parsing $_" if $debug;
2226 if (/^(\d+):(\d+)\s+(\d+)/) { #hh:mm start time format ?
1b4fe9dd 2227 $$interval_aref[($1*60 + $2)/$hist_interval] += $3 if $hist_opt;
059ec3d9
PH
2228 }
2229 elsif (/^(\d+)-(\d+)\s+(\d+)/) { #hh-hh start-end time format ?
1b4fe9dd 2230 $$interval_aref[($1*60)/$hist_interval] += $3 if $hist_opt;
059ec3d9
PH
2231 }
2232 else { #Finished the table ?
2233 last;
2234 }
2235 }
2236 }
2237
2238 elsif (/Time spent on the queue: (all messages|messages with at least one remote delivery)/) {
2239#Time spent on the queue: all messages
2240#-------------------------------------
2241#
2242#Under 1m 217 91.9% 91.9%
2243# 5m 2 0.8% 92.8%
2244# 3h 8 3.4% 96.2%
2245# 6h 7 3.0% 99.2%
2246# 12h 2 0.8% 100.0%
2247
2248 # Set a pointer to the queue bin so we can use the same code
2249 # block for both all messages and remote deliveries.
2250 my $bin_aref = ($1 eq 'all messages') ? \@queue_bin : \@remote_queue_bin;
2251 my $reached_table = 0;
2252 while (<$fh>) {
2253 $_ = html2txt($_); #Convert general HTML markup to text.
2254 $reached_table = 1 if (/^\s*Under/);
2255 next unless $reached_table;
2256 my $previous_seconds_on_queue = 0;
2257 if (/^\s*(Under|Over|)\s+(\d+[smhdw])\s+(\d+)/) {
2258 print STDERR "Parsing $_" if $debug;
2259 my($modifier,$formated_time,$count) = ($1,$2,$3);
2260 my $seconds = unformat_time($formated_time);
2261 my $time_on_queue = ($seconds + $previous_seconds_on_queue) / 2;
2262 $previous_seconds_on_queue = $seconds;
2263 $time_on_queue = $seconds * 2 if ($modifier eq 'Over');
2264 my($i);
2265 for ($i = 0; $i <= $#queue_times; $i++) {
2266 if ($time_on_queue < $queue_times[$i]) {
2267 $$bin_aref[$i] += $count;
2268 last;
2269 }
2270 }
2271 # There's only one counter for messages going over the queue
2272 # times so make sure we only count it once.
2273 $queue_more_than += $count if (($bin_aref == \@queue_bin) && ($i > $#queue_times));
2274 }
2275 else {
2276 last; #Finished the table ?
2277 }
2278 }
2279 }
2280
2281 elsif (/Relayed messages/) {
2282#Relayed messages
2283#----------------
2284#
2285# 1 addr.domain.com [1.2.3.4] a.user@domain.com
2286# => addr2.domain2.com [5.6.7.8] a2.user2@domain2.com
2287#
2288#<tr><td align="right">1</td><td>addr.domain.com [1.2.3.4] a.user@domain.com </td><td>addr2.domain2.com [5.6.7.8] a2.user2@domain2.com</td>
2289
2290 my $reached_table = 0;
2291 my($count,$sender);
2292 while (<$fh>) {
2293 unless ($reached_table) {
2294 last if (/No relayed messages/);
2295 $reached_table = 1 if (/^\s*\d/ || />\d+</);
2296 next unless $reached_table;
2297 }
2298 if (/>(\d+)<.td><td>(.*?) ?<.td><td>(.*?)</) {
2299 update_relayed($1,$2,$3);
2300 }
2301 elsif (/^\s*(\d+)\s+(.*?)\s*$/) {
2302 ($count,$sender) = ($1,$2);
2303 }
2304 elsif (/=>\s+(.*?)\s*$/) {
2305 update_relayed($count,$sender,$1);
2306 }
2307 else {
2308 last; #Finished the table ?
2309 }
2310 }
2311 }
2312
2313 elsif (/Top (.*?) by (message count|volume)/) {
2314#Top 50 sending hosts by message count
2315#-------------------------------------
2316#
2317# 48 1468KB local
2318 my($category,$by_count_or_volume) = ($1,$2);
2319
2320 #As we show 2 views of each table (by count and by volume),
2321 #most (but not all) entries will appear in both tables.
2322 #Set up a hash to record which entries we have already seen
2323 #and one to record which ones we are seeing for the first time.
2324 if ($by_count_or_volume =~ /count/) {
2325 undef %league_table_value_entered;
2326 undef %league_table_value_was_zero;
2327 undef %table_order;
2328 }
2329
2330 #As this section processes multiple different table categories,
2331 #set up pointers to the hashes to be updated.
2332 my($count_href,$data_href,$data_gigs_href);
2333 if ($category =~ /local sender/) {
2334 $count_href = \%received_count_user;
2335 $data_href = \%received_data_user;
2336 $data_gigs_href = \%received_data_gigs_user;
2337 }
2338 elsif ($category =~ /sending (\S+?)s?\b/) {
2339 #Top 50 sending (host|domain|email|edomain)s
2340 #Top sending (host|domain|email|edomain)
2341 $count_href = \%{$received_count{"\u$1"}};
2342 $data_href = \%{$received_data{"\u$1"}};
2343 $data_gigs_href = \%{$received_data_gigs{"\u$1"}};
2344 }
2345 elsif ($category =~ /local destination/) {
2346 $count_href = \%delivered_count_user;
2347 $data_href = \%delivered_data_user;
2348 $data_gigs_href = \%delivered_data_gigs_user;
2349 }
2350 elsif ($category =~ /(\S+) destination/) {
2351 #Top 50 (host|domain|email|edomain) destinations
2352 #Top (host|domain|email|edomain) destination
2353 $count_href = \%{$delivered_count{"\u$1"}};
2354 $data_href = \%{$delivered_data{"\u$1"}};
2355 $data_gigs_href = \%{$delivered_data_gigs{"\u$1"}};
2356 }
2357
2358 my $reached_table = 0;
2359 while (<$fh>) {
2360 $_ = html2txt($_); #Convert general HTML markup to text.
2361 $reached_table = 1 if (/^\s*\d/);
2362 next unless $reached_table;
2363 if (/^\s*(\d+)\s+(\S+)\s*(.*?)\s*$/) {
2364 my($count,$rounded_volume,$entry) = ($1,$2,$3);
2365 #Note: $entry fields can be both null and can contain spaces.
2366
2367 #Add the entry into the %table_order hash if it has a rounded volume (KB/MB/GB).
2368 push(@{$table_order{$rounded_volume}{$by_count_or_volume}},$entry) if ($rounded_volume =~ /\D/);
2369
2370 unless ($league_table_value_entered{$entry}) {
2371 $league_table_value_entered{$entry} = 1;
2372 unless ($$count_href{$entry}) {
2373 $$count_href{$entry} = 0;
2374 $$data_href{$entry} = 0;
2375 $$data_gigs_href{$entry} = 0;
2376 $league_table_value_was_zero{$entry} = 1;
2377 }
2378
2379 $$count_href{$entry} += $count;
2380 #Add the rounded value to the data and data_gigs hashes.
2381 un_round($rounded_volume,\$$data_href{$entry},\$$data_gigs_href{$entry});
2382 print STDERR "$category by $by_count_or_volume: added $count,$rounded_volume to $entry\n" if $debug;
2383 }
2384 }
2385 else { #Finished the table ?
2386 if ($by_count_or_volume =~ /volume/) {
2387 #Add a few bytes to appropriate entries to preserve the order.
2388
2389 my($rounded_volume);
2390 foreach $rounded_volume (keys %table_order) {
2391 #For each rounded volume, we want to create a list which has things
2392 #ordered from the volume table at the front, and additional things
2393 #from the count table ordered at the back.
2394 @{$table_order{$rounded_volume}{volume}} = () unless defined $table_order{$rounded_volume}{volume};
2395 @{$table_order{$rounded_volume}{'message count'}} = () unless defined $table_order{$rounded_volume}{'message count'};
2396 my(@order,%mark);
2397 map {$mark{$_} = 1} @{$table_order{$rounded_volume}{volume}};
2398 @order = @{$table_order{$rounded_volume}{volume}};
2399 map {push(@order,$_)} grep(!$mark{$_},@{$table_order{$rounded_volume}{'message count'}});
2400
2401 my $bonus_bytes = $#order;
2402 $bonus_bytes = 511 if ($bonus_bytes > 511); #Don't go over the half-K boundary!
2403 while (@order and ($bonus_bytes > 0)) {
2404 my $entry = shift(@order);
2405 if ($league_table_value_was_zero{$entry}) {
2406 $$data_href{$entry} += $bonus_bytes;
2407 print STDERR "$category by $by_count_or_volume: added $bonus_bytes bonus bytes to $entry\n" if $debug;
2408 }
2409 $bonus_bytes--;
2410 }
2411 }
2412 }
2413
2414 last;
2415 }
2416 }
2417 }
2418 elsif (/List of errors/) {
2419#List of errors
2420#--------------
2421#
2422# 1 07904931641@one2one.net R=external T=smtp: SMTP error
2423# from remote mailer after RCPT TO:<07904931641@one2one.net>:
2424# host mail.one2one.net [193.133.192.24]: 550 User unknown
2425#
2426#<li>1 - ally.dufc@dunbar.org.uk R=external T=smtp: SMTP error from remote mailer after RCPT TO:<ally.dufc@dunbar.org.uk>: host mail.dunbar.org.uk [216.167.89.88]: 550 Unknown local part ally.dufc in <ally.dufc@dunbar.org.uk>
2427
2428
2429 my $reached_table = 0;
2430 my($count,$error,$blanks);
2431 while (<$fh>) {
2432 $reached_table = 1 if (/^( *|<li>)(\d+)/);
2433 next unless $reached_table;
2434
2435 s/^<li>(\d+) -/$1/; #Convert an HTML line to a text line.
2436 $_ = html2txt($_); #Convert general HTML markup to text.
2437
2438 if (/\t\s*(.*)/) {
2439 $error .= ' ' . $1; #Join a multiline error.
2440 }
2441 elsif (/^\s*(\d+)\s+(.*)/) {
2442 if ($error) {
2443 #Finished with a previous multiline error so save it.
2444 $errors_count{$error} = 0 unless $errors_count{$error};
2445 $errors_count{$error} += $count;
2446 }
2447 ($count,$error) = ($1,$2);
2448 }
2449 elsif (/Errors encountered/) {
2450 if ($error) {
2451 #Finished the section, so save our stored last error.
2452 $errors_count{$error} = 0 unless $errors_count{$error};
2453 $errors_count{$error} += $count;
2454 }
2455 last;
2456 }
2457 }
2458 }
2459
2460 }
2461}
2462
2463
2464
1b4fe9dd
PH
2465#######################################################################
2466# update_relayed();
2467#
2468# update_relayed($count,$sender,$recipient);
2469#
2470# Adds an entry into the %relayed hash. Currently only used when
2471# merging reports.
2472#######################################################################
059ec3d9
PH
2473sub update_relayed {
2474 my($count,$sender,$recipient) = @_;
2475
2476 #When generating the key, put in the 'H=' and 'A=' which can be used
2477 #in searches.
2478 my $key = "H=$sender => H=$recipient";
2479 $key =~ s/ ([^=\s]+\@\S+|<>)/ A=$1/g;
2480 if (!defined $relay_pattern || $key !~ /$relay_pattern/o) {
2481 $relayed{$key} = 0 if !defined $relayed{$key};
2482 $relayed{$key} += $count;
2483 }
2484 else {
2485 $relayed_unshown += $count;
2486 }
2487}
2488
2489
1b4fe9dd
PH
2490#######################################################################
2491# add_to_totals();
2492#
2493# add_to_totals(\%totals,\@keys,$values);
2494#
2495# Given a line of space seperated values, add them into the provided hash using @keys
2496# as the hash keys.
2497#
2498# If the value contains a '%', then the value is set rather than added. Otherwise, we
2499# convert the value to bytes and gigs. The gigs get added to I<Key>-gigs.
2500#######################################################################
059ec3d9
PH
2501sub add_to_totals {
2502 my($totals_href,$keys_aref,$values) = @_;
2503 my(@values) = split(/\s+/,$values);
2504 my(@keys) = @$keys_aref; #Make a copy as we destroy the one we use.
2505 my($value);
2506 foreach $value (@values) {
2507 my $key = shift(@keys) or next;
2508 if ($value =~ /%/) {
2509 $$totals_href{$key} = $value;
2510 }
2511 else {
2512 $$totals_href{$key} = 0 unless ($$totals_href{$key});
2513 $$totals_href{"$key-gigs"} = 0 unless ($$totals_href{"$key-gigs"});
2514 un_round($value, \$$totals_href{$key}, \$$totals_href{"$key-gigs"});
2515 print STDERR "Added $value to $key - $$totals_href{$key} , " . $$totals_href{"$key-gigs"} . "GB.\n" if $debug;
2516 }
2517 }
2518}
2519
1b4fe9dd
PH
2520#######################################################################
2521# get_report_total();
2522#
2523# $total = get_report_total(\%hash,$key);
2524#
2525# If %hash contains values split into Units and Gigs, we calculate and return
2526#
2527# $hash{$key} + 1024*1024*1024 * $hash{"${key}-gigs"}
2528#######################################################################
059ec3d9
PH
2529sub get_report_total {
2530 no integer;
2531 my($hash_ref,$key) = @_;
2532 if ($$hash_ref{"${key}-gigs"}) {
2533 return $$hash_ref{$key} + $gig * $$hash_ref{"${key}-gigs"};
2534 }
2535 return $$hash_ref{$key} || 0;
2536}
2537
1b4fe9dd
PH
2538#######################################################################
2539# html2txt();
2540#
2541# $text_line = html2txt($html_line);
2542#
2543# Convert a line from html to text. Currently we just convert HTML tags to spaces
2544# and convert &gt;, &lt;, and &nbsp; tags back.
2545#######################################################################
059ec3d9
PH
2546sub html2txt {
2547 ($_) = @_;
2548
2549 # Convert HTML tags to spacing. Note that the reports may contain <Userid> and
2550 # <Userid@Domain> words, so explicitly specify the HTML tags we will remove
2551 # (the ones used by this program). If someone is careless enough to have their
2552 # Userid the same as an HTML tag, there's not much we can do about it.
2553 s/<\/?(html|head|title|body|h\d|ul|li|a\s+|table|tr|td|th|pre|hr|p|br)\b.*?>/ /og;
2554
2555 s/\&lt\;/\</og; #Convert '&lt;' to '<'.
2556 s/\&gt\;/\>/og; #Convert '&gt;' to '>'.
2557 s/\&nbsp\;/ /og; #Convert '&nbsp;' to ' '.
2558 return($_);
2559}
2560
1b4fe9dd
PH
2561#######################################################################
2562# get_next_arg();
2563#
2564# $arg = get_next_arg();
2565#
2566# Because eximstats arguments are often passed as variables,
2567# we can't rely on shell parsing to deal with quotes. This
2568# subroutine returns $ARGV[1] and does a shift. If $ARGV[1]
2569# starts with a quote (' or "), and doesn't end in one, then
2570# we append the next argument to it and shift again. We repeat
2571# until we've got all of the argument.
2572#
2573# This isn't perfect as all white space gets reduced to one space,
2574# but it's as good as we can get! If it's esential that spacing
2575# be preserved precisely, then you get that by not using shell
2576# variables.
2577#######################################################################
059ec3d9
PH
2578sub get_next_arg {
2579 my $arg = '';
2580 my $matched_pattern = 0;
2581 while ($ARGV[1]) {
2582 $arg .= ' ' if $arg;
2583 $arg .= $ARGV[1]; shift(@ARGV);
2584 if ($arg !~ /^['"]/) {
2585 $matched_pattern = 1;
2586 last;
2587 }
2588 if ($arg =~ s/^(['"])(.*)\1$/$2/) {
2589 $matched_pattern = 1;
2590 last;
2591 }
2592 }
2593 die "Mismatched argument quotes - <$arg>.\n" unless $matched_pattern;
2594 return $arg;
2595}
2596
2597
2598
2599##################################################
2600# Main Program #
2601##################################################
2602
2603
2604$last_timestamp = '';
2605$last_date = '';
2606$show_errors = 1;
2607$show_relay = 1;
2608$show_transport = 1;
2609$topcount = 50;
2610$local_league_table = 1;
2611$include_remote_users = 0;
2612$hist_opt = 1;
2613$volume_rounding = 1;
2614$localtime_offset = calculate_localtime_offset(); # PH/FANF
2615
2616$charts = 0;
2617$charts_option_specified = 0;
2618$chartrel = ".";
2619$chartdir = ".";
2620
2621@queue_times = (60, 5*60, 15*60, 30*60, 60*60, 3*60*60, 6*60*60,
2622 12*60*60, 24*60*60);
2623
2624$last_offset = '';
2625$offset_seconds = 0;
2626
2627# Decode options
2628
2629while (@ARGV > 0 && substr($ARGV[0], 0, 1) eq '-')
2630 {
2631 if ($ARGV[0] =~ /^\-h(\d+)$/) { $hist_opt = $1 }
2632 elsif ($ARGV[0] =~ /^\-ne$/) { $show_errors = 0 }
2633 elsif ($ARGV[0] =~ /^\-nr(.?)(.*)\1$/)
2634 {
2635 if ($1 eq "") { $show_relay = 0 } else { $relay_pattern = $2 }
2636 }
2637 elsif ($ARGV[0] =~ /^\-q([,\d\+\-\*\/]+)$/)
2638 {
2639 @queue_times = split(/,/, $1);
2640 my($q);
2641 foreach $q (@queue_times) { $q = eval($q) + 0 }
2642 @queue_times = sort { $a <=> $b } @queue_times;
2643 @queue_times = () if ($#queue_times == 0 && $queue_times[0] == 0);
2644 }
2645 elsif ($ARGV[0] =~ /^-nt$/) { $show_transport = 0 }
2646 elsif ($ARGV[0] =~ /^\-nt(.?)(.*)\1$/)
2647 {
2648 if ($1 eq "") { $show_transport = 0 } else { $transport_pattern = $2 }
2649 }
2650 elsif ($ARGV[0] =~ /^-t(\d+)$/) { $topcount = $1 }
2651 elsif ($ARGV[0] =~ /^-tnl$/) { $local_league_table = 0 }
2652 elsif ($ARGV[0] =~ /^-html$/) { $html = 1 }
2653 elsif ($ARGV[0] =~ /^-merge$/) { $merge_reports = 1 }
2654 elsif ($ARGV[0] =~ /^-charts$/) {
2655 $charts = 1;
2656 warn "WARNING: CPAN Module GD::Graph::pie not installed. Obtain from www.cpan.org\n" unless $HAVE_GD_Graph_pie;
2657 warn "WARNING: CPAN Module GD::Graph::linespoints not installed. Obtain from www.cpan.org\n" unless $HAVE_GD_Graph_linespoints;
2658 }
2659 elsif ($ARGV[0] =~ /^-chartdir$/) { $chartdir = $ARGV[1]; shift; $charts_option_specified = 1; }
2660 elsif ($ARGV[0] =~ /^-chartrel$/) { $chartrel = $ARGV[1]; shift; $charts_option_specified = 1; }
2661 elsif ($ARGV[0] =~ /^-cache$/) { } #Not currently used.
2662 elsif ($ARGV[0] =~ /^-byhost$/) { $do_sender{Host} = 1 }
2663 elsif ($ARGV[0] =~ /^-bydomain$/) { $do_sender{Domain} = 1 }
2664 elsif ($ARGV[0] =~ /^-byemail$/) { $do_sender{Email} = 1 }
2665 elsif ($ARGV[0] =~ /^-byemaildomain$/) { $do_sender{Edomain} = 1 }
2666 elsif ($ARGV[0] =~ /^-byedomain$/) { $do_sender{Edomain} = 1 }
2667 elsif ($ARGV[0] =~ /^-nvr$/) { $volume_rounding = 0 }
2668 elsif ($ARGV[0] =~ /^-d$/) { $debug = 1 }
2669 elsif ($ARGV[0] =~ /^--?h(elp)?$/){ help() }
2670 elsif ($ARGV[0] =~ /^-t_remote_users$/) { $include_remote_users = 1 }
2671 elsif ($ARGV[0] =~ /^-pattern$/)
2672 {
2673 push(@user_descriptions,get_next_arg());
2674 push(@user_patterns,get_next_arg());
2675 }
2676 elsif ($ARGV[0] =~ /^-utc$/)
2677 {
2678 # We don't need this value if the log is in UTC.
2679 $localtime_offset = undef;
2680 }
2681 else
2682 {
2683 print STDERR "Eximstats: Unknown or malformed option $ARGV[0]\n";
2684 help();
2685 }
2686 shift;
2687 }
2688
2689 # Check that all the charts options are specified.
2690 warn "-charts option not specified. Use -help for help.\n" if ($charts_option_specified && ! $charts);
2691
2692 # Default to display tables by sending Host.
2693 $do_sender{Host} = 1 unless ($do_sender{Domain} || $do_sender{Email} || $do_sender{Edomain});
2694
2695
2696for (my $i = 0; $i <= $#queue_times; $i++) {
2697 $queue_bin[$i] = 0;
2698 $remote_queue_bin[$i] = 0;
2699}
2700
2701# Compute the number of slots for the histogram
2702
2703if ($hist_opt > 0)
2704 {
2705 if ($hist_opt > 60 || 60 % $hist_opt != 0)
2706 {
2707 print "Eximstats: -h must specify a factor of 60\n";
2708 exit 1;
2709 }
2710 $hist_interval = 60/$hist_opt; #Interval in minutes.
2711 $hist_number = (24*60)/$hist_interval; #Number of intervals per day.
2712 @received_interval_count = (0) x $hist_number;
2713 @delivered_interval_count = (0) x $hist_number;
2714 }
2715
2716#$queue_unknown = 0;
2717
2718$total_received_data = 0;
2719$total_received_data_gigs = 0;
2720$total_received_count = 0;
2721
2722$total_delivered_data = 0;
2723$total_delivered_data_gigs = 0;
2724$total_delivered_count = 0;
2725
2726$queue_more_than = 0;
2727$delayed_count = 0;
2728$relayed_unshown = 0;
2729$begin = "9999-99-99 99:99:99";
2730$end = "0000-00-00 00:00:00";
2731my($section,$type);
2732foreach $section ('Received','Delivered') {
2733 foreach $type ('Volume','Messages','Delayed','Failed','Hosts','Domains','Emails','Edomains') {
2734 $report_totals{$section}{$type} = 0;
2735 }
2736}
2737
2738# Generate our parser.
2739my $parser = generate_parser();
2740
2741
2742
2743if (@ARGV) {
2744 # Scan the input files and collect the data
2745 foreach my $file (@ARGV) {
2746 if ($file =~ /\.gz/) {
2747 unless (open(FILE,"gunzip -c $file |")) {
2748 print STDERR "Failed to gunzip -c $file: $!";
2749 next;
2750 }
2751 }
2752 elsif ($file =~ /\.Z/) {
2753 unless (open(FILE,"uncompress -c $file |")) {
2754 print STDERR "Failed to uncompress -c $file: $!";
2755 next;
2756 }
2757 }
2758 else {
2759 unless (open(FILE,$file)) {
2760 print STDERR "Failed to read $file: $!";
2761 next;
2762 }
2763 }
2764 #Now parse the filehandle, updating the global variables.
2765 parse($parser,\*FILE);
2766 close FILE;
2767 }
2768}
2769else {
2770 #No files provided. Parse STDIN, updating the global variables.
2771 parse($parser,\*STDIN);
2772}
2773
2774
2775if ($begin eq "9999-99-99 99:99:99") {
2776 print "**** No valid log lines read\n";
2777 exit 1;
2778}
2779
2780# Output our results.
2781print_header();
2782print_grandtotals();
2783
2784# Print counts of user specified patterns if required.
2785print_user_patterns() if @user_patterns;
2786
2787# Print totals by transport if required.
2788print_transport() if $show_transport;
2789
2790# Print the deliveries per interval as a histogram, unless configured not to.
2791# First find the maximum in one interval and scale accordingly.
2792if ($hist_opt > 0) {
2793 print_histogram("Messages received", @received_interval_count);
2794 print_histogram("Deliveries", @delivered_interval_count);
2795}
2796
2797# Print times on queue if required.
2798if ($#queue_times >= 0) {
2799 print_queue_times("all messages", \@queue_bin,$queue_more_than);
2800 print_queue_times("messages with at least one remote delivery",\@remote_queue_bin,$queue_more_than);
2801}
2802
2803# Print relay information if required.
2804print_relay() if $show_relay;
2805
2806# Print the league tables, if topcount isn't zero.
2807if ($topcount > 0) {
2808 foreach ('Host','Domain','Email','Edomain') {
2809 next unless $do_sender{$_};
2810 print_league_table("sending \l$_", $received_count{$_}, $received_data{$_},$received_data_gigs{$_});
2811 }
2812
2813 print_league_table("local sender", \%received_count_user,
2814 \%received_data_user,\%received_data_gigs_user) if ($local_league_table || $include_remote_users);
2815 foreach ('Host','Domain','Email','Edomain') {
2816 next unless $do_sender{$_};
2817 print_league_table("\l$_ destination", $delivered_count{$_}, $delivered_data{$_},$delivered_data_gigs{$_});
2818 }
2819 print_league_table("local destination", \%delivered_count_user,
2820 \%delivered_data_user,\%delivered_data_gigs_user) if ($local_league_table || $include_remote_users);
2821}
2822
2823# Print the error statistics if required.
2824print_errors() if $show_errors;
2825
2826if ($html) {
2827 print "</body>\n</html>\n"
2828}
2829
2830# End of eximstats