Updated to eximstats version 1.42
[exim.git] / src / src / eximstats.src
CommitLineData
059ec3d9 1#!PERL_COMMAND -w
608bc29d 2# $Cambridge: exim/src/src/eximstats.src,v 1.8 2005/06/29 15:35:09 steve Exp $
059ec3d9
PH
3
4# Copyright (c) 2001 University of Cambridge.
5# See the file NOTICE for conditions of use and distribution.
6
7# Perl script to generate statistics from one or more Exim log files.
8
9# Usage: eximstats [<options>] <log file> <log file> ...
10
11# 1996-05-21: Ignore lines not starting with valid date/time, just in case
12# these get into a log file.
13# 1996-11-19: Add the -h option to control the size of the histogram,
14# and optionally turn it off.
15# Use some Perl 5 things; it should be everywhere by now.
16# Add the Perl -w option and rewrite so no warnings are given.
17# Add the -t option to control the length of the "top" listing.
18# Add the -ne, -nt options to turn off errors and transport
19# information.
20# Add information about length of time on queue, and -q<list> to
21# control the intervals and turn it off.
22# Add count and percentage of delayed messages to the Received
23# line.
24# Show total number of errors.
25# Add count and percentage of messages with errors to Received
26# line.
27# Add information about relaying and -nr to suppress it.
28# 1997-02-03 Merged in some of the things Nigel Metheringham had done:
29# Re-worded headings
30# Added received histogram as well as delivered
31# Added local senders' league table
32# Added local recipients' league table
33# 1997-03-10 Fixed typo "destinationss"
34# Allow for intermediate address between final and original
35# when testing for relaying
36# Give better message when no input
37# 1997-04-24 Fixed bug in layout of error listing that was depending on
38# text length (output line got repeated).
39# 1997-05-06 Bug in option decoding when only one option.
40# Overflow bug when handling very large volumes.
41# 1997-10-28 Updated to handle revised log format that might show
42# HELO name as well as host name before IP number
43# 1998-01-26 Bugs in the function for calculating the number of seconds
44# since 1970 from a log date
45# 1998-02-02 Delivery to :blackhole: doesn't have a T= entry in the log
46# line; cope with this, thereby avoiding undefined problems
47# Very short log line gave substring error
48# 1998-02-03 A routed delivery to a local transport may not have <> in the
49# log line; terminate the address at white space, not <
50# 1998-09-07 If first line of input was a => line, $thissize was undefined;
51# ensure it is zero.
52# 1998-12-21 Adding of $thissize from => line should have been adding $size.
53# Oops. Should have looked more closely when fixing the previous
54# bug!
55# 1999-11-12 Increased the field widths for printed integers; numbers are
56# bigger than originally envisaged.
57# 2001-03-21 Converted seconds() routine to use Time::Local, fixing a bug
58# whereby seconds($timestamp) - id_seconds($id) gave an
59# incorrect result.
60# Added POD documentation.
61# Moved usage instructions into help() subroutine.
62# Added 'use strict' and declared all global variables.
63# Added '-html' flag and resultant code.
64# Added '-cache' flag and resultant code.
65# Added add_volume() routine and converted all volume variables
66# to use it, fixing the overflow problems for individual hosts
67# on large sites.
68# Converted all volume output to GB/MB/KB as appropriate.
69# Don't store local user stats if -nfl is specified.
70# Modifications done by: Steve Campbell (<steve@computurn.com>)
71# 2001-04-02 Added the -t_remote_users flag. Steve Campbell.
72# 2001-10-15 Added the -domain flag. Steve Campbell.
73# 2001-10-16 Accept files on STDIN or on the command line. Steve Campbell.
74# 2001-10-21 Removed -domain flag and added -bydomain, -byhost, and -byemail.
75# We now generate our main parsing subroutine as an eval statement
76# which improves performance dramatically when not all the results
77# are required. We also cache the last timestamp to time convertion.
78#
79# NOTE: 'Top 50 destinations by (message count|volume)' lines are
80# now 'Top N (host|email|domain) destinations by (message count|volume)'
81# where N is the topcount. Steve Campbell.
82#
83# 2001-10-30 V1.16 Joachim Wieland.
84# Fixed minor bugs in add_volume() when taking over this version
85# for use in Exim 4: -w gave uninitialized value warnings in
86# two situations: for the first addition to a counter, and if
87# there were never any gigabytes, thereby leaving the $gigs
88# value unset.
89# Initialized $last_timestamp to stop a -w uninitialized warning.
90# Minor layout tweak for grand totals (nitpicking).
91# Put the IP addresses for relaying stats in [] and separated by
92# a space from the domain name.
93# Removed the IPv4-specific address test when picking out addresses
94# for relaying. Anything inside [] is OK.
95#
96# 2002-07-02 Philip Hazel
97# Fixed "uninitialized variable" message that occurred for relay
98# messages that arrived from H=[1.2.3.4] hosts (no name shown).
99# This bug didn't affect the output.
100#
101# 2002-04-15 V1.17 Joachim Wieland.
102# Added -charts, -chartdir. -chartrel options which use
103# GD::Graph modules to create graphical charts of the statistics.
104#
105# 2002-04-15 V1.18 Steve Campbell.
106# Added a check for $domain to to stop a -w uninitialized warning.
107# Added -byemaildomain option.
108# Only print HTML header links to included tables!
109#
110# 2002-08-02 V1.19 Steve Campbell.
111# Changed the debug mode to dump the parser onto STDERR rather
112# than STDOUT. Documented the -d flag into the help().
113# Rejoined the divergent 2002-04-15 and 2002-07-02 releases.
114#
115# 2002-08-21 V1.20 Steve Campbell.
116# Added the '-merge' option to allow merging of previous reports.
117# Fixed a missing semicolon when doing -bydomain.
118# Make volume charts plot the data gigs and bytes rather than just bytes.
119# Only process log lines with $flag =~ /<=|=>|->|==|\*\*|Co/
120# Converted Emaildomain to Edomain - the column header was too wide!
121# This changes the text output slightly. You can revert to the old
122# column widths by changing $COLUMN_WIDTHS to 7;
123#
124# 2002-09-04 V1.21 Andreas J Mueller
125# Local deliveries domain now defaults to 'localdomain'.
126# Don't match F=<From> when looking for the user.
127#
128# 2002-09-05 V1.22 Steve Campbell
129# Fixed a perl 5.005 incompatibility problem ('our' variables).
130#
131# 2002-09-11 V1.23 Steve Campbell
132# Stopped -charts option from throwing errors on null data.
133# Don't print out 'Errors encountered' unless there are any.
134
135# 2002-10-21 V1.23a Philip Hazel - patch from Tony Finch put in until
136# Steve's eximstats catches up.
137# Handle log files that include the timezone after the timestamp.
138# Switch to assuming that log timestamps are in local time, with
139# an option for UTC timestamps, as in Exim itself.
140#
141# 2003-02-05 V1.24 Steve Campbell
142# Added in Sergey Sholokh's code to convert '<' and '>' characters
143# in HTML output. Also added code to convert them back with -merge.
144# Fixed timestamp offsets to convert to seconds rather than minutes.
145# Updated -merge to work with output files using timezones.
146# Added cacheing to speed up the calculation of timezone offsets.
147#
148# 2003-02-07 V1.25 Steve Campbell
149# Optimised the usage of mktime() in the seconds subroutine.
150# Removed the now redundant '-cache' option.
151# html2txt() now explicitly matches HTML tags.
152# Implemented a new sorting algorithm - the top_n_sort() routine.
153# Added Danny Carroll's '-nvr' flag and code.
154#
155# 2003-03-13 V1.26 Steve Campbell
156# Implemented HTML compliance changes recommended by Bernard Massot.
157# Bug fix to allow top_n_sort() to handle null keys.
158# Convert all domains and edomains to lowercase.
159# Remove preceding dots from domains.
160#
161# 2003-03-13 V1.27 Steve Campbell
162# Replaced border attributes with 'border=1', as recommended by
163# Bernard Massot.
164#
165# 2003-06-03 V1.28 John Newman
166# Added in the ability to skip over the parsing and evaulation of
167# specific transports as passed to eximstats via the new "-nt/.../"
168# command line argument. This new switch allows the viewing of
169# not more accurate statistics but more applicable statistics when
170# special transports are in use (ie; SpamAssassin). We need to be
171# able to ignore transports such as this otherwise the resulting
172# local deliveries are significantly skewed (doubled)...
173#
174# 2003-11-06 V1.29 Steve Campbell
175# Added the '-pattern "Description" "/pattern/"' option.
176#
177# 2004-02-17 V1.30 Steve Campbell
178# Added warnings if required GD::Graph modules are not available or
179# insufficient -chart* options are specified.
180#
8e669ac1 181# 2004-02-20 V1.31 Andrea Balzi
059ec3d9
PH
182# Only show the Local Sender/Destination links if the tables exist.
183#
1b4fe9dd
PH
184# 2004-07-05 V1.32 Steve Campbell
185# Fix '-merge -h0' divide by zero error.
186#
187# 2004-07-15 V1.33 Steve Campbell
188# Documentation update - I've converted the subroutine
189# documentation from POD to comments.
8974000d
SC
190#
191# 2004-12-10 V1.34 Steve Campbell
192# Eximstats can now parse syslog lines as well as mainlog lines.
193#
884c2af8 194# 2004-12-20 V1.35 Wouter Verhelst
d5692f86 195# Pie charts by volume were actually generated by count. Fixed.
50adf73a
SC
196#
197# 2005-02-07 V1.36 Gregor Herrmann / Steve Campbell
198# Added average sizes to HTML Top tables.
d5692f86
SC
199#
200# 2005-04-26 V1.37 Frank Heydlauf
201# Added -xls and the ability to specify output files.
202#
203# 2005-04-29 V1.38 Steve Campbell
204# Use FileHandles for outputing results.
205# Allow any combination of xls, txt, and html output.
206# Fixed display of large numbers with -nvr option
207# Fixed merging of reports with empty tables.
208#
209# 2005-05-27 V1.39 Steve Campbell
210# Added the -include_original_destination flag
211# Removed tabs and trailing whitespace.
212#
608bc29d
SC
213# 2005-06-03 V1.40 Steve Campbell
214# Whilst parsing the mainlog(s), store information about
215# the messages in a hash of arrays rather than using
216# individual hashes. This is a bit cleaner and results in
217# dramatic memory savings, albeit at a slight CPU cost.
218#
219# 2005-06-15 V1.41 Steve Campbell
220# Added the -show_rt<list> flag.
221# Added the -show_dt<list> flag.
222#
223# 2005-06-24 V1.42 Steve Campbell
224# Added Histograms for user specified patterns.
d5692f86
SC
225#
226#
227# For documentation on the logfile format, see
228# http://www.exim.org/exim-html-4.50/doc/html/spec_48.html#IX2793
059ec3d9
PH
229
230=head1 NAME
231
8974000d 232eximstats - generates statistics from Exim mainlog or syslog files.
059ec3d9
PH
233
234=head1 SYNOPSIS
235
d5692f86 236 eximstats [Output] [Options] mainlog1 mainlog2 ...
059ec3d9
PH
237 eximstats -merge [Options] report.1.txt report.2.txt ... > weekly_report.txt
238
d5692f86
SC
239=head2 Output:
240
241=over 4
242
243=item B<-txt>
244
245Output the results in plain text to STDOUT.
246
247=item B<-txt>=I<filename>
248
249Output the results in plain text. Filename '-' for STDOUT is accepted.
250
251=item B<-html>
252
253Output the results in HTML to STDOUT.
254
255=item B<-html>=I<filename>
256
257Output the results in HTML. Filename '-' for STDOUT is accepted.
258
259=item B<-xls>
260
261Output the results in Excel compatible Format to STDOUT.
262Requires the Spreadsheet::WriteExcel CPAN module.
263
264=item B<-xls>=I<filename>
265
266Output the results in Excel compatible format. Filename '-' for STDOUT is accepted.
267
268
269=back
270
271=head2 Options:
059ec3d9
PH
272
273=over 4
274
275=item B<-h>I<number>
276
277histogram divisions per hour. The default is 1, and
2780 suppresses histograms. Valid values are:
279
2800, 1, 2, 3, 5, 10, 15, 20, 30 or 60.
281
282=item B<-ne>
283
284Don't display error information.
285
286=item B<-nr>
287
288Don't display relaying information.
289
290=item B<-nr>I</pattern/>
291
292Don't display relaying information that matches.
293
294=item B<-nt>
295
296Don't display transport information.
297
298=item B<-nt>I</pattern/>
299
300Don't display transport information that matches
301
302=item B<-q>I<list>
303
304List of times for queuing information single 0 item suppresses.
305
306=item B<-t>I<number>
307
308Display top <number> sources/destinations
309default is 50, 0 suppresses top listing.
310
311=item B<-tnl>
312
313Omit local sources/destinations in top listing.
314
315=item B<-t_remote_users>
316
317Include remote users in the top source/destination listings.
318
d5692f86
SC
319=item B<-include_original_destination>
320
321Include the original destination email addresses rather than just
322using the final ones.
323Useful for finding out which of your mailing lists are receiving mail.
324
608bc29d
SC
325=item B<-show_dt>I<list>
326
327Show the delivery times (B<DT>)for all the messages.
328
329Exim must have been configured to use the +delivery_time logging option
330for this option to work.
331
332I<list> is an optional list of times. Eg -show_dt1,2,4,8 will show
333the number of messages with delivery times under 1 second, 2 seconds, 4 seconds,
3348 seconds, and over 8 seconds.
335
336=item B<-show_rt>I<list>
337
338Show the receipt times for all the messages. The receipt time is
339defined as the Completed hh:mm:ss - queue_time_overall - the Receipt hh:mm:ss.
340These figures will be skewed by pipelined messages so might not be that useful.
341
342Exim must have been configured to use the +queue_time_overall logging option
343for this option to work.
344
345I<list> is an optional list of times. Eg -show_rt1,2,4,8 will show
346the number of messages with receipt times under 1 second, 2 seconds, 4 seconds,
3478 seconds, and over 8 seconds.
348
059ec3d9
PH
349=item B<-byhost>
350
351Show results by sending host. This may be combined with
352B<-bydomain> and/or B<-byemail> and/or B<-byedomain>. If none of these options
353are specified, then B<-byhost> is assumed as a default.
354
355=item B<-bydomain>
356
357Show results by sending domain.
358May be combined with B<-byhost> and/or B<-byemail> and/or B<-byedomain>.
359
360=item B<-byemail>
361
362Show results by sender's email address.
363May be combined with B<-byhost> and/or B<-bydomain> and/or B<-byedomain>.
364
365=item B<-byemaildomain> or B<-byedomain>
366
367Show results by sender's email domain.
368May be combined with B<-byhost> and/or B<-bydomain> and/or B<-byemail>.
369
370=item B<-pattern> I<Description> I</Pattern/>
371
372Look for the specified pattern and count the number of lines in which it appears.
373This option can be specified multiple times. Eg:
374
375 -pattern 'Refused connections' '/refused connection/'
376
377
378=item B<-merge>
379
380This option allows eximstats to merge old eximstat reports together. Eg:
381
382 eximstats mainlog.sun > report.sun.txt
383 eximstats mainlog.mon > report.mon.txt
384 eximstats mainlog.tue > report.tue.txt
385 eximstats mainlog.wed > report.web.txt
386 eximstats mainlog.thu > report.thu.txt
387 eximstats mainlog.fri > report.fri.txt
388 eximstats mainlog.sat > report.sat.txt
389 eximstats -merge report.*.txt > weekly_report.txt
390 eximstats -merge -html report.*.txt > weekly_report.html
391
392=over 4
393
394=item *
395
396You can merge text or html reports and output the results as text or html.
397
398=item *
399
400You can use all the normal eximstat output options, but only data
401included in the original reports can be shown!
402
403=item *
404
405When merging reports, some loss of accuracy may occur in the top I<n> lists.
406This will be towards the ends of the lists.
407
408=item *
409
410The order of items in the top I<n> lists may vary when the data volumes
411round to the same value.
412
413=back
414
059ec3d9
PH
415=item B<-charts>
416
417Create graphical charts to be displayed in HTML output.
d5692f86 418Only valid in combination with I<-html>.
059ec3d9
PH
419
420This requires the following modules which can be obtained
421from http://www.cpan.org/modules/01modules.index.html
422
423=over 4
424
425=item GD
426
427=item GDTextUtil
428
429=item GDGraph
430
431=back
432
433To install these, download and unpack them, then use the normal perl installation procedure:
434
435 perl Makefile.PL
436 make
437 make test
438 make install
439
440=item B<-chartdir>I <dir>
441
442Create the charts in the directory <dir>
443
444=item B<-chartrel>I <dir>
445
446Specify the relative directory for the "img src=" tags from where to include
447the charts
448
449=item B<-d>
450
451Debug flag. This outputs the eval()'d parser onto STDOUT which makes it
452easier to trap errors in the eval section. Remember to add 1 to the line numbers to allow for the
453title!
454
455=back
456
457=head1 DESCRIPTION
458
8974000d 459Eximstats parses exim mainlog and syslog files to output a statistical
059ec3d9 460analysis of the messages processed. By default, a text
d5692f86
SC
461analysis is generated, but you can request other output formats
462using flags. See the help (B<-help>) to learn
059ec3d9
PH
463about how to create charts from the tables.
464
465=head1 AUTHOR
466
467There is a web site at http://www.exim.org - this contains details of the
468mailing list exim-users@exim.org.
469
470=head1 TO DO
471
472This program does not perfectly handle messages whose received
473and delivered log lines are in different files, which can happen
474when you have multiple mail servers and a message cannot be
475immeadiately delivered. Fixing this could be tricky...
476
d5692f86
SC
477Merging of xls files is not (yet) possible. Be free to implement :)
478
059ec3d9
PH
479=cut
480
481use integer;
482use strict;
d5692f86 483use IO::File;
059ec3d9
PH
484
485# use Time::Local; # PH/FANF
486use POSIX;
487
d5692f86 488use vars qw($HAVE_GD_Graph_pie $HAVE_GD_Graph_linespoints $HAVE_Spreadsheet_WriteExcel);
059ec3d9
PH
489eval { require GD::Graph::pie; };
490$HAVE_GD_Graph_pie = $@ ? 0 : 1;
491eval { require GD::Graph::linespoints; };
492$HAVE_GD_Graph_linespoints = $@ ? 0 : 1;
d5692f86
SC
493eval { require Spreadsheet::WriteExcel; };
494$HAVE_Spreadsheet_WriteExcel = $@ ? 0 : 1;
059ec3d9
PH
495
496
497##################################################
498# Static data #
499##################################################
500# 'use vars' instead of 'our' as perl5.005 is still in use out there!
501use vars qw(@tab62 @days_per_month $gig);
502use vars qw($VERSION);
503use vars qw($COLUMN_WIDTHS);
608bc29d 504use vars qw($WEEK $DAY $HOUR $MINUTE);
059ec3d9
PH
505
506
507@tab62 =
508 (0,1,2,3,4,5,6,7,8,9,0,0,0,0,0,0, # 0-9
509 0,10,11,12,13,14,15,16,17,18,19,20, # A-K
510 21,22,23,24,25,26,27,28,29,30,31,32, # L-W
511 33,34,35, 0, 0, 0, 0, 0, # X-Z
512 0,36,37,38,39,40,41,42,43,44,45,46, # a-k
513 47,48,49,50,51,52,53,54,55,56,57,58, # l-w
514 59,60,61); # x-z
515
516@days_per_month = (0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334);
517$gig = 1024 * 1024 * 1024;
608bc29d 518$VERSION = '1.42';
059ec3d9
PH
519
520# How much space do we allow for the Hosts/Domains/Emails/Edomains column headers?
521$COLUMN_WIDTHS = 8;
522
608bc29d
SC
523$MINUTE = 60;
524$HOUR = 60 * $MINUTE;
525$DAY = 24 * $HOUR;
526$WEEK = 7 * $DAY;
527
059ec3d9
PH
528# Declare global variables.
529use vars qw($total_received_data $total_received_data_gigs $total_received_count);
530use vars qw($total_delivered_data $total_delivered_data_gigs $total_delivered_count);
d5692f86
SC
531use vars qw(%timestamp2time); #Hash of timestamp => time.
532use vars qw($last_timestamp $last_time); #The last time convertion done.
533use vars qw($last_date $date_seconds); #The last date convertion done.
534use vars qw($last_offset $offset_seconds); #The last time offset convertion done.
059ec3d9 535use vars qw($localtime_offset);
d5692f86
SC
536use vars qw($i); #General loop counter.
537use vars qw($debug); #Debug mode?
538use vars qw($ntopchart); #How many entries should make it into the chart?
539use vars qw($gddirectory); #Where to put files from GD::Graph
540use vars qw($workbook $ws_global $ws_relayed $ws_top50 $ws_errors ); #For use in Speadsheed::WriteExcel
541use vars qw($row $col $row_hist $col_hist $row_league_table);
542use vars qw($run_hist);
543use vars qw($f_default $f_header1 $f_header2 $f_headertab $f_percent); #Format Header
544
545# Output FileHandles
546use vars qw($txt_fh $htm_fh $xls_fh);
059ec3d9
PH
547
548$ntopchart = 5;
549
550# The following are parameters whose values are
551# set by command line switches:
552use vars qw($show_errors $show_relay $show_transport $transport_pattern);
553use vars qw($topcount $local_league_table $include_remote_users);
554use vars qw($hist_opt $hist_interval $hist_number $volume_rounding);
d5692f86 555use vars qw($relay_pattern @queue_times @user_patterns @user_descriptions);
608bc29d 556use vars qw(@rcpt_times @delivery_times);
d5692f86
SC
557use vars qw($include_original_destination);
558use vars qw($txt_fh $htm_fh $xls_fh);
059ec3d9
PH
559
560use vars qw(%do_sender); #Do sender by Host, Domain, Email, and/or Edomain tables.
561use vars qw($charts $chartrel $chartdir $charts_option_specified);
d5692f86 562use vars qw($merge_reports); #Merge old reports ?
059ec3d9
PH
563
564# The following are modified in the parse() routine, and
565# referred to in the print_*() routines.
608bc29d
SC
566use vars qw($delayed_count $relayed_unshown $begin $end);
567use vars qw(%messages $message_aref);
059ec3d9
PH
568use vars qw(%received_count %received_data %received_data_gigs);
569use vars qw(%delivered_count %delivered_data %delivered_data_gigs);
570use vars qw(%received_count_user %received_data_user %received_data_gigs_user);
571use vars qw(%delivered_count_user %delivered_data_user %delivered_data_gigs_user);
572use vars qw(%transported_count %transported_data %transported_data_gigs);
608bc29d
SC
573use vars qw(%relayed %errors_count $message_errors);
574use vars qw(@qt_all_bin @qt_remote_bin);
575use vars qw($qt_all_overflow $qt_remote_overflow);
576use vars qw(@dt_all_bin @dt_remote_bin %rcpt_times_bin);
577use vars qw($dt_all_overflow $dt_remote_overflow %rcpt_times_overflow);
578use vars qw(@received_interval_count @delivered_interval_count);
579use vars qw(@user_pattern_totals @user_pattern_interval_count);
059ec3d9
PH
580
581use vars qw(%report_totals);
582
608bc29d
SC
583# Enumerations
584use vars qw($SIZE $FROM_HOST $FROM_ADDRESS $ARRIVAL_TIME $REMOTE_DELIVERED $PROTOCOL);
585use vars qw($DELAYED $HAD_ERROR);
586$SIZE = 0;
587$FROM_HOST = 1;
588$FROM_ADDRESS = 2;
589$ARRIVAL_TIME = 3;
590$REMOTE_DELIVERED = 4;
591$DELAYED = 5;
592$HAD_ERROR = 6;
593$PROTOCOL = 7;
059ec3d9
PH
594
595
596
597##################################################
598# Subroutines #
599##################################################
600
d5692f86
SC
601#######################################################################
602# get_filehandle($file,\%output_files);
603# Return a filehandle writing to $file.
604#
605# If %output_files is defined, check that $output_files{$file}
606# doesn't exist and die if it does, or set it if it doesn't.
607#######################################################################
608sub get_filehandle {
609 my($file,$output_files_href) = @_;
610
611 $file = '-' if ($file eq '');
612
613 if (defined $output_files_href) {
614 die "You can only output to '$file' once! Use -h for help.\n" if exists $output_files_href->{$file};
615 $output_files_href->{$file} = 1;
616 }
617
618 if ($file eq '-') {
619 return \*STDOUT;
620 }
621
622 if (-e $file) {
623 unlink $file or die "Failed to rm $file: $!";
624 }
625
626 my $fh = new IO::File $file, O_WRONLY|O_CREAT|O_EXCL;
627 die "new IO::File $file failed: $!" unless (defined $fh);
628 return $fh;
629}
630
059ec3d9 631
1b4fe9dd
PH
632#######################################################################
633# volume_rounded();
634#
635# $rounded_volume = volume_rounded($bytes,$gigabytes);
636#
637# Given a data size in bytes, round it to KB, MB, or GB
638# as appropriate.
639#
640# Eg 12000 => 12KB, 15000000 => 14GB, etc.
641#
642# Note: I've experimented with Math::BigInt and it results in a 33%
643# performance degredation as opposed to storing numbers split into
644# bytes and gigabytes.
645#######################################################################
059ec3d9
PH
646sub volume_rounded {
647 my($x,$g) = @_;
648 $x = 0 unless $x;
649 $g = 0 unless $g;
650 my($rounded);
651
652 while ($x > $gig) {
653 $g++;
654 $x -= $gig;
655 }
656
657 if ($volume_rounding) {
658 # Values < 1 GB
659 if ($g <= 0) {
660 if ($x < 10000) {
661 $rounded = sprintf("%6d", $x);
662 }
663 elsif ($x < 10000000) {
664 $rounded = sprintf("%4dKB", ($x + 512)/1024);
665 }
666 else {
667 $rounded = sprintf("%4dMB", ($x + 512*1024)/(1024*1024));
668 }
669 }
670 # Values between 1GB and 10GB are printed in MB
671 elsif ($g < 10) {
672 $rounded = sprintf("%4dMB", ($g * 1024) + ($x + 512*1024)/(1024*1024));
673 }
674 else {
675 # Handle values over 10GB
676 $rounded = sprintf("%4dGB", $g + ($x + $gig/2)/$gig);
677 }
678 }
679 else {
680 # We don't want any rounding to be done.
d5692f86
SC
681 # and we don't need broken formated output which on one hand avoids numbers from
682 # being interpreted as string by Spreadsheed Calculators, on the other hand
683 # breaks if more than 4 digits! -> flexible length instead of fixed length
684 # Format the return value at the output routine! -fh
685 #$rounded = sprintf("%d", ($g * $gig) + $x);
686 no integer;
687 $rounded = sprintf("%.0f", ($g * $gig) + $x);
059ec3d9
PH
688 }
689
690 return $rounded;
691}
692
693
1b4fe9dd
PH
694#######################################################################
695# un_round();
8e669ac1 696#
1b4fe9dd 697# un_round($rounded_volume,\$bytes,\$gigabytes);
8e669ac1 698#
1b4fe9dd
PH
699# Given a volume in KB, MB or GB, as generated by volume_rounded(),
700# do the reverse transformation and convert it back into Bytes and Gigabytes.
701# These are added to the $bytes and $gigabytes parameters.
8e669ac1 702#
1b4fe9dd
PH
703# Given a data size in bytes, round it to KB, MB, or GB
704# as appropriate.
8e669ac1 705#
1b4fe9dd
PH
706# EG: 500 => (500,0), 14GB => (0,14), etc.
707#######################################################################
059ec3d9
PH
708sub un_round {
709 my($rounded,$bytes_sref,$gigabytes_sref) = @_;
710
711 if ($rounded =~ /(\d+)GB/) {
712 $$gigabytes_sref += $1;
713 }
714 elsif ($rounded =~ /(\d+)MB/) {
715 $$gigabytes_sref += $1 / 1024;
716 $$bytes_sref += (($1 % 1024 ) * 1024 * 1024);
717 }
718 elsif ($rounded =~ /(\d+)KB/) {
719 $$gigabytes_sref += $1 / (1024 * 1024);
720 $$bytes_sref += ($1 % (1024 * 1024) * 1024);
721 }
722 elsif ($rounded =~ /(\d+)/) {
d5692f86
SC
723 # We need to turn off integer in case we are merging an -nvr report.
724 no integer;
725 $$gigabytes_sref += int($1 / $gig);
059ec3d9
PH
726 $$bytes_sref += $1 % $gig;
727 }
728
729 #Now reduce the bytes down to less than 1GB.
730 add_volume($bytes_sref,$gigabytes_sref,0) if ($$bytes_sref > $gig);
731}
732
733
1b4fe9dd
PH
734#######################################################################
735# add_volume();
8e669ac1 736#
1b4fe9dd 737# add_volume(\$bytes,\$gigs,$size);
8e669ac1 738#
1b4fe9dd
PH
739# Add $size to $bytes/$gigs where this is a number split into
740# bytes ($bytes) and gigabytes ($gigs). This is significantly
741# faster than using Math::BigInt.
742#######################################################################
059ec3d9 743sub add_volume {
1b4fe9dd
PH
744 my($bytes_ref,$gigs_ref,$size) = @_;
745 $$bytes_ref = 0 if ! defined $$bytes_ref;
746 $$gigs_ref = 0 if ! defined $$gigs_ref;
747 $$bytes_ref += $size;
748 while ($$bytes_ref > $gig) {
749 $$gigs_ref++;
750 $$bytes_ref -= $gig;
059ec3d9
PH
751 }
752}
753
754
1b4fe9dd
PH
755#######################################################################
756# format_time();
8e669ac1 757#
1b4fe9dd 758# $formatted_time = format_time($seconds);
8e669ac1 759#
1b4fe9dd
PH
760# Given a time in seconds, break it down into
761# weeks, days, hours, minutes, and seconds.
8e669ac1 762#
1b4fe9dd
PH
763# Eg 12005 => 3h20m5s
764#######################################################################
059ec3d9
PH
765sub format_time {
766my($t) = pop @_;
767my($s) = $t % 60;
768$t /= 60;
769my($m) = $t % 60;
770$t /= 60;
771my($h) = $t % 24;
772$t /= 24;
773my($d) = $t % 7;
774my($w) = $t/7;
775my($p) = "";
776$p .= "$w"."w" if $w > 0;
777$p .= "$d"."d" if $d > 0;
778$p .= "$h"."h" if $h > 0;
779$p .= "$m"."m" if $m > 0;
780$p .= "$s"."s" if $s > 0 || $p eq "";
781$p;
782}
783
784
1b4fe9dd
PH
785#######################################################################
786# unformat_time();
8e669ac1 787#
1b4fe9dd 788# $seconds = unformat_time($formatted_time);
8e669ac1 789#
1b4fe9dd 790# Given a time in weeks, days, hours, minutes, or seconds, convert it to seconds.
8e669ac1 791#
1b4fe9dd
PH
792# Eg 3h20m5s => 12005
793#######################################################################
059ec3d9
PH
794sub unformat_time {
795 my($formated_time) = pop @_;
796 my $time = 0;
797
798 while ($formated_time =~ s/^(\d+)([wdhms]?)//) {
799 $time += $1 if ($2 eq '' || $2 eq 's');
800 $time += $1 * 60 if ($2 eq 'm');
801 $time += $1 * 60 * 60 if ($2 eq 'h');
802 $time += $1 * 60 * 60 * 24 if ($2 eq 'd');
803 $time += $1 * 60 * 60 * 24 * 7 if ($2 eq 'w');
804 }
805 $time;
806}
807
808
1b4fe9dd
PH
809#######################################################################
810# seconds();
8e669ac1 811#
1b4fe9dd 812# $time = seconds($timestamp);
8e669ac1 813#
1b4fe9dd
PH
814# Given a time-of-day timestamp, convert it into a time() value using
815# POSIX::mktime. We expect the timestamp to be of the form
816# "$year-$mon-$day $hour:$min:$sec", with month going from 1 to 12,
817# and the year to be absolute (we do the necessary conversions). The
818# timestamp may be followed with an offset from UTC like "+$hh$mm"; if the
819# offset is not present, and we have not been told that the log is in UTC
820# (with the -utc option), then we adjust the time by the current local
821# time offset so that it can be compared with the time recorded in message
822# IDs, which is UTC.
8e669ac1 823#
1b4fe9dd
PH
824# To improve performance, we only use mktime on the date ($year-$mon-$day),
825# and only calculate it if the date is different to the previous time we
826# came here. We then add on seconds for the '$hour:$min:$sec'.
8e669ac1 827#
1b4fe9dd
PH
828# We also store the results of the last conversion done, and only
829# recalculate if the date is different.
8e669ac1 830#
1b4fe9dd
PH
831# We used to have the '-cache' flag which would store the results of the
832# mktime() call. However, the current way of just using mktime() on the
833# date obsoletes this.
834#######################################################################
059ec3d9
PH
835sub seconds {
836 my($timestamp) = @_;
837
838 # Is the timestamp the same as the last one?
839 return $last_time if ($last_timestamp eq $timestamp);
840
841 return 0 unless ($timestamp =~ /^((\d{4})\-(\d\d)-(\d\d))\s(\d\d):(\d\d):(\d\d)( ([+-])(\d\d)(\d\d))?/o);
842
843 unless ($last_date eq $1) {
844 $last_date = $1;
845 my(@timestamp) = (0,0,0,$4,$3,$2);
846 $timestamp[5] -= 1900;
847 $timestamp[4]--;
848 $date_seconds = mktime(@timestamp);
849 }
850 my $time = $date_seconds + ($5 * 3600) + ($6 * 60) + $7;
851
852 # SC. Use cacheing. Also note we want seconds not minutes.
853 #my($this_offset) = ($10 * 60 + $11) * ($9 . "1") if defined $8;
854 if (defined $8 && ($8 ne $last_offset)) {
855 $last_offset = $8;
856 $offset_seconds = ($10 * 60 + $11) * 60;
857 $offset_seconds = -$offset_seconds if ($9 eq '-');
858 }
859
860
861 if (defined $7) {
862 #$time -= $this_offset;
863 $time -= $offset_seconds;
864 } elsif (defined $localtime_offset) {
865 $time -= $localtime_offset;
866 }
867
868 # Store the last timestamp received.
869 $last_timestamp = $timestamp;
870 $last_time = $time;
871
872 $time;
873}
874
875
1b4fe9dd
PH
876#######################################################################
877# id_seconds();
8e669ac1 878#
1b4fe9dd 879# $time = id_seconds($message_id);
8e669ac1 880#
1b4fe9dd
PH
881# Given a message ID, convert it into a time() value.
882#######################################################################
059ec3d9
PH
883sub id_seconds {
884my($sub_id) = substr((pop @_), 0, 6);
885my($s) = 0;
886my(@c) = split(//, $sub_id);
887while($#c >= 0) { $s = $s * 62 + $tab62[ord(shift @c) - ord('0')] }
888$s;
889}
890
608bc29d
SC
891#######################################################################
892# wdhms_seconds();
893#
894# $seconds = wdhms_seconds($string);
895#
896# Convert a string in a week/day/hour/minute/second format (eg 4h10s)
897# into seconds.
898#######################################################################
899sub wdhms_seconds {
900 if ($_[0] =~ /^(?:(\d+)w)?(?:(\d+)d)?(?:(\d+)h)?(?:(\d+)m)?(?:(\d+)s)?/) {
901 return((($1||0) * $WEEK) + (($2||0) * $DAY) + (($3||0) * $HOUR) + (($4||0) * $MINUTE) + ($5||0));
902 }
903 return undef;
904}
905
906#######################################################################
907# queue_time();
908#
909# $queued = queue_time($completed_tod, $arrival_time, $id);
910#
911# Given the completed time of day and either the arrival time
912# (preferred), or the message ID, calculate how long the message has
913# been on the queue.
914#
915#######################################################################
916sub queue_time {
917 my($completed_tod, $arrival_time, $id) = @_;
918
919 # Note: id_seconds() benchmarks as 42% slower than seconds()
920 # and computing the time accounts for a significant portion of
921 # the run time.
922 if (defined $arrival_time) {
923 return(seconds($completed_tod) - seconds($arrival_time));
924 }
925 else {
926 return(seconds($completed_tod) - id_seconds($id));
927 }
928}
059ec3d9
PH
929
930
1b4fe9dd
PH
931#######################################################################
932# calculate_localtime_offset();
8e669ac1 933#
1b4fe9dd 934# $localtime_offset = calculate_localtime_offset();
8e669ac1 935#
1b4fe9dd 936# Calculate the the localtime offset from gmtime in seconds.
8e669ac1 937#
1b4fe9dd 938# $localtime = time() + $localtime_offset.
8e669ac1 939#
1b4fe9dd
PH
940# These are the same semantics as ISO 8601 and RFC 2822 timezone offsets.
941# (West is negative, East is positive.)
942#######################################################################
059ec3d9
PH
943
944# $localtime = gmtime() + $localtime_offset. OLD COMMENT
945# This subroutine commented out as it's not currently in use.
946
947#sub calculate_localtime_offset {
948# # Pick an arbitrary date, convert it to localtime & gmtime, and return the difference.
949# my (@sample_date) = (0,0,0,5,5,100);
950# my $localtime = timelocal(@sample_date);
951# my $gmtime = timegm(@sample_date);
952# my $offset = $localtime - $gmtime;
953# return $offset;
954#}
955
956sub calculate_localtime_offset {
957 # Assume that the offset at the moment is valid across the whole
958 # period covered by the logs that we're analysing. This may not
959 # be true around the time the clocks change in spring or autumn.
960 my $utc = time;
961 # mktime works on local time and gmtime works in UTC
962 my $local = mktime(gmtime($utc));
963 return $local - $utc;
964}
965
966
608bc29d 967
1b4fe9dd 968#######################################################################
608bc29d
SC
969# print_duration_table();
970#
971# print_duration_table($title, $message_type, \@times, \@values, $overflow);
972#
973# Print a table showing how long a particular step took for
974# the messages. The parameters are:
975# $title Eg "Time spent on the queue"
976# $message_type Eg "Remote"
977# \@times The maximum time a message took for it to increment
978# the corresponding @values counter.
979# \@values An array of message counters.
980# $overflow The number of messages which exceeded the maximum
981# time.
1b4fe9dd 982#######################################################################
608bc29d 983sub print_duration_table {
059ec3d9 984no integer;
608bc29d 985my($title, $message_type, $times_aref, $values_aref, $overflow) = @_;
059ec3d9
PH
986my(@chartdatanames);
987my(@chartdatavals);
988
989my $printed_one = 0;
990my $cumulative_percent = 0;
059ec3d9 991
608bc29d
SC
992my $queue_total = $overflow;
993map {$queue_total += $_} @$values_aref;
059ec3d9 994
608bc29d 995my $temp = "$title: $message_type";
059ec3d9 996
d5692f86
SC
997
998my $txt_format = "%5s %4s %6d %5.1f%% %5.1f%%\n";
999my $htm_format = "<tr><td align=\"right\">%s %s</td><td align=\"right\">%d</td><td align=\"right\">%5.1f%%</td><td align=\"right\">%5.1f%%</td>\n";
1000
1001# write header
1002printf $txt_fh ("%s\n%s\n\n", $temp, "-" x length($temp)) if $txt_fh;
1003if ($htm_fh) {
608bc29d 1004 print $htm_fh "<hr><a name=\"$title $message_type\"></a><h2>$temp</h2>\n";
d5692f86
SC
1005 print $htm_fh "<table border=0 width=\"100%\">\n";
1006 print $htm_fh "<tr><td>\n";
1007 print $htm_fh "<table border=1>\n";
1008 print $htm_fh "<tr><th>Time</th><th>Messages</th><th>Percentage</th><th>Cumulative Percentage</th>\n";
059ec3d9 1009}
d5692f86 1010if ($xls_fh)
059ec3d9 1011{
d5692f86 1012
608bc29d 1013 $ws_global->write($row++, $col, "$title: ".$message_type, $f_header2);
d5692f86
SC
1014 my @content=("Time", "Messages", "Percentage", "Cumulative Percentage");
1015 &set_worksheet_line($ws_global, $row++, 1, \@content, $f_headertab);
059ec3d9
PH
1016}
1017
d5692f86 1018
608bc29d
SC
1019for ($i = 0; $i <= $#$times_aref; ++$i) {
1020 if ($$values_aref[$i] > 0)
059ec3d9 1021 {
608bc29d 1022 my $percent = ($values_aref->[$i] * 100)/$queue_total;
059ec3d9 1023 $cumulative_percent += $percent;
d5692f86
SC
1024
1025 my @content=($printed_one? " " : "Under",
608bc29d
SC
1026 format_time($times_aref->[$i]),
1027 $values_aref->[$i], $percent, $cumulative_percent);
d5692f86
SC
1028
1029 if ($htm_fh) {
1030 printf $htm_fh ($htm_format, @content);
608bc29d 1031 if (!defined($values_aref->[$i])) {
d5692f86
SC
1032 print $htm_fh "Not defined";
1033 }
1034 }
1035 if ($txt_fh) {
1036 printf $txt_fh ($txt_format, @content);
608bc29d 1037 if (!defined($times_aref->[$i])) {
d5692f86
SC
1038 print $txt_fh "Not defined";
1039 }
1040 }
1041 if ($xls_fh)
1042 {
1043 no integer;
1044 &set_worksheet_line($ws_global, $row, 0, [@content[0,1,2]], $f_default);
1045 &set_worksheet_line($ws_global, $row++, 3, [$content[3]/100,$content[4]/100], $f_percent);
1046
608bc29d 1047 if (!defined($times_aref->[$i])) {
d5692f86
SC
1048 $col=0;
1049 $ws_global->write($row++, $col, "Not defined" );
1050 }
059ec3d9 1051 }
d5692f86 1052
059ec3d9 1053 push(@chartdatanames,
608bc29d
SC
1054 ($printed_one? "" : "Under") . format_time($times_aref->[$i]));
1055 push(@chartdatavals, $$values_aref[$i]);
059ec3d9
PH
1056 $printed_one = 1;
1057 }
1058}
1059
608bc29d
SC
1060if ($overflow && $overflow > 0) {
1061 my $percent = ($overflow * 100)/$queue_total;
059ec3d9 1062 $cumulative_percent += $percent;
d5692f86 1063
608bc29d
SC
1064 my @content = ("Over ", format_time($times_aref->[-1]),
1065 $overflow, $percent, $cumulative_percent);
d5692f86
SC
1066
1067 printf $txt_fh ($txt_format, @content) if $txt_fh;
1068 printf $htm_fh ($htm_format, @content) if $htm_fh;
1069 if ($xls_fh)
1070 {
1071 &set_worksheet_line($ws_global, $row, 0, [@content[0,1,2]], $f_default);
1072 &set_worksheet_line($ws_global, $row++, 3, [$content[3]/100,$content[4]/100], $f_percent);
1073 }
1074
059ec3d9 1075}
d5692f86 1076
608bc29d
SC
1077push(@chartdatanames, "Over " . format_time($times_aref->[-1]));
1078push(@chartdatavals, $overflow);
059ec3d9
PH
1079
1080#printf("Unknown %6d\n", $queue_unknown) if $queue_unknown > 0;
d5692f86
SC
1081if ($htm_fh) {
1082 print $htm_fh "</table>\n";
1083 print $htm_fh "</td><td>\n";
059ec3d9 1084
608bc29d 1085 if ($HAVE_GD_Graph_pie && $charts && ($#chartdatavals > 0)) {
059ec3d9
PH
1086 my @data = (
1087 \@chartdatanames,
1088 \@chartdatavals
1089 );
1090 my $graph = GD::Graph::pie->new(200, 200);
608bc29d
SC
1091 my $pngname = "$title-$message_type.png";
1092 $pngname =~ s/[^\w\-\.]/_/;
1093
1094 my $graph_title = "$title ($message_type)";
1095 $graph->set(title => $graph_title) if (length($graph_title) < 21);
1096
059ec3d9
PH
1097 my $gd = $graph->plot(\@data) or warn($graph->error);
1098 if ($gd) {
d5692f86 1099 open(IMG, ">$chartdir/$pngname") or die "Could not write $chartdir/$pngname: $!\n";
059ec3d9
PH
1100 binmode IMG;
1101 print IMG $gd->png;
1102 close IMG;
d5692f86 1103 print $htm_fh "<img src=\"$chartrel/$pngname\">";
059ec3d9
PH
1104 }
1105 }
d5692f86
SC
1106 print $htm_fh "</td></tr></table>\n";
1107}
1108
1109if ($xls_fh)
1110{
1111 $row++;
059ec3d9 1112}
d5692f86
SC
1113print $txt_fh "\n" if $txt_fh;
1114print $htm_fh "\n" if $htm_fh;
1115
059ec3d9
PH
1116}
1117
1118
1b4fe9dd
PH
1119#######################################################################
1120# print_histogram();
8e669ac1 1121#
608bc29d 1122# print_histogram('Deliveries|Messages received|$pattern', $unit, @interval_count);
8e669ac1 1123#
1b4fe9dd
PH
1124# Print a histogram of the messages delivered/received per time slot
1125# (hour by default).
1126#######################################################################
059ec3d9 1127sub print_histogram {
608bc29d 1128my($text, $unit, @interval_count) = @_;
059ec3d9
PH
1129my(@chartdatanames);
1130my(@chartdatavals);
1131my($maxd) = 0;
d5692f86
SC
1132
1133if (!$run_hist) # save first row of print_histogram for xls output
1134{
1135 $row_hist = $row;
1136}
1137else
1138{
1139 $row = $row_hist;
1140}
1141
059ec3d9
PH
1142for ($i = 0; $i < $hist_number; $i++)
1143 { $maxd = $interval_count[$i] if $interval_count[$i] > $maxd; }
1144
1145my $scale = int(($maxd + 25)/50);
1146$scale = 1 if $scale == 0;
1147
608bc29d
SC
1148if ($scale != 1) {
1149 if ($unit !~ s/y$/ies/) {
1150 $unit .= 's';
1151 }
d5692f86
SC
1152}
1153
1154# make and output title
1155my $title = sprintf("$text per %s",
1156 ($hist_interval == 60)? "hour" :
1157 ($hist_interval == 1)? "minute" : "$hist_interval minutes");
059ec3d9 1158
608bc29d 1159my $txt_htm_title = $title . " (each dot is $scale $unit)";
059ec3d9 1160
d5692f86
SC
1161printf $txt_fh ("%s\n%s\n\n", $txt_htm_title, "-" x length($txt_htm_title)) if $txt_fh;
1162
1163if ($htm_fh) {
1164 print $htm_fh "<hr><a name=\"$text\"></a><h2>$txt_htm_title</h2>\n";
1165 print $htm_fh "<table border=0 width=\"100%\">\n";
1166 print $htm_fh "<tr><td><pre>\n";
059ec3d9 1167}
d5692f86
SC
1168
1169if ($xls_fh)
1170{
1171 $title =~ s/Messages/Msg/ ;
1172 $ws_global->write($row++, $col_hist+1, $title, $f_headertab);
059ec3d9
PH
1173}
1174
d5692f86 1175
059ec3d9
PH
1176my $hour = 0;
1177my $minutes = 0;
1178for ($i = 0; $i < $hist_number; $i++)
d5692f86 1179{
059ec3d9
PH
1180 my $c = $interval_count[$i];
1181
1182 # If the interval is an hour (the maximum) print the starting and
1183 # ending hours as a label. Otherwise print the starting hour and
1184 # minutes, which take up the same space.
1185
1186 my $temp;
1187 if ($hist_opt == 1)
d5692f86 1188 {
059ec3d9 1189 $temp = sprintf("%02d-%02d", $hour, $hour + 1);
d5692f86
SC
1190
1191 print $txt_fh $temp if $txt_fh;
1192 print $htm_fh $temp if $htm_fh;
1193
1194 if ($xls_fh)
1195 {
1196 if ($run_hist==0) # only on first run
1197 {
1198 &set_worksheet_line($ws_global, $row, 0, [$temp], $f_default);
1199 }
1200 }
1201
059ec3d9
PH
1202 push(@chartdatanames, $temp);
1203 $hour++;
d5692f86 1204 }
059ec3d9 1205 else
d5692f86 1206 {
059ec3d9
PH
1207 if ($minutes == 0)
1208 { $temp = sprintf("%02d:%02d", $hour, $minutes) }
1209 else
1210 { $temp = sprintf(" :%02d", $minutes) }
d5692f86
SC
1211
1212 print $txt_fh $temp if $txt_fh;
1213 print $htm_fh $temp if $htm_fh;
1214 if (($xls_fh) and ($run_hist==0)) # only on first run
1215 {
1216 $temp = sprintf("%02d:%02d", $hour, $minutes);
1217 &set_worksheet_line($ws_global, $row, 0, [$temp], $f_default);
1218 }
1219
059ec3d9
PH
1220 push(@chartdatanames, $temp);
1221 $minutes += $hist_interval;
1222 if ($minutes >= 60)
1223 {
1224 $minutes = 0;
1225 $hour++;
1226 }
059ec3d9 1227 }
d5692f86
SC
1228 push(@chartdatavals, $c);
1229
1230 printf $txt_fh (" %6d %s\n", $c, "." x ($c/$scale)) if $txt_fh;
1231 printf $htm_fh (" %6d %s\n", $c, "." x ($c/$scale)) if $htm_fh;
1232 if ($xls_fh)
059ec3d9 1233 {
d5692f86
SC
1234 &set_worksheet_line($ws_global, $row++, $col_hist+1, [$c], $f_default);
1235 }
1236
1237} #end for
1238
1239printf $txt_fh "\n" if $txt_fh;
1240printf $htm_fh "\n" if $htm_fh;
1241
1242if ($htm_fh)
1243{
1244 print $htm_fh "</pre>\n";
1245 print $htm_fh "</td><td>\n";
608bc29d 1246 if ($HAVE_GD_Graph_linespoints && $charts && ($#chartdatavals > 0)) {
059ec3d9
PH
1247 # calculate the graph
1248 my @data = (
1249 \@chartdatanames,
1250 \@chartdatavals
1251 );
1252 my $graph = GD::Graph::linespoints->new(300, 300);
1253 $graph->set(
1254 x_label => 'Time',
1255 y_label => 'Amount',
1256 title => $text,
1257 x_labels_vertical => 1
1258 );
608bc29d
SC
1259 my $pngname = "histogram_$text.png";
1260 $pngname =~ s/[^\w\._]/_/g;
1261
059ec3d9
PH
1262 my $gd = $graph->plot(\@data) or warn($graph->error);
1263 if ($gd) {
d5692f86 1264 open(IMG, ">$chartdir/$pngname") or die "Could not write $chartdir/$pngname: $!\n";
059ec3d9
PH
1265 binmode IMG;
1266 print IMG $gd->png;
1267 close IMG;
d5692f86 1268 print $htm_fh "<img src=\"$chartrel/$pngname\">";
059ec3d9
PH
1269 }
1270 }
d5692f86 1271 print $htm_fh "</td></tr></table>\n";
059ec3d9 1272}
d5692f86
SC
1273
1274$col_hist++; # where to continue next times
1275
1276$row+=2; # leave some space after history block
1277$run_hist=1; # we have done this once or more
059ec3d9
PH
1278}
1279
1280
1281
1b4fe9dd
PH
1282#######################################################################
1283# print_league_table();
8e669ac1 1284#
1b4fe9dd 1285# print_league_table($league_table_type,\%message_count,\%message_data,\%message_data_gigs);
8e669ac1 1286#
1b4fe9dd
PH
1287# Given hashes of message count and message data, which are keyed by
1288# the table type (eg by the sending host), print a league table
1289# showing the top $topcount (defaults to 50).
1290#######################################################################
059ec3d9
PH
1291sub print_league_table {
1292my($text,$m_count,$m_data,$m_data_gigs) = @_;
1293my($name) = ($topcount == 1)? "$text" : "$topcount ${text}s";
1294my($temp) = "Top $name by message count";
1295my(@chartdatanames) = ();
1296my(@chartdatavals) = ();
1297my $chartotherval = 0;
1298
d5692f86
SC
1299my $htm_format;
1300my $txt_format = "%7d %10s %s\n";
1301
1302# write header
1303printf $txt_fh ("%s\n%s\n\n", $temp, "-" x length($temp)) if $txt_fh;
1304if ($htm_fh) {
1305 print $htm_fh "<hr><a name=\"$text count\"></a><h2>$temp</h2>\n";
1306 print $htm_fh "<table border=0 width=\"100%\">\n";
1307 print $htm_fh "<tr><td>\n";
1308 print $htm_fh "<table border=1>\n";
1309 print $htm_fh "<tr><th>Messages</th><th>Bytes</th><th>Average</th><th>\u$text</th>\n";
059ec3d9
PH
1310
1311 # Align non-local addresses to the right (so all the .com's line up).
1312 # Local addresses are aligned on the left as they are userids.
1313 my $align = ($text !~ /local/i) ? 'right' : 'left';
d5692f86 1314 $htm_format = "<tr><td align=\"right\">%d</td><td align=\"right\">%s</td><td align=\"right\">%s</td><td align=\"$align\" nowrap>%s</td>\n";
059ec3d9 1315}
d5692f86
SC
1316if ($xls_fh)
1317{
1318 $ws_top50->write($row_league_table++, 0, $temp, $f_header2);
1319 &set_worksheet_line($ws_top50, $row_league_table++, 0, ["Messages", "Bytes", "Average", $text], $f_headertab );
059ec3d9
PH
1320}
1321
d5692f86
SC
1322
1323# write content
50adf73a 1324my($key,$htmlkey,$rounded_volume,$rounded_average,$count,$data,$gigs);
059ec3d9 1325foreach $key (top_n_sort($topcount,$m_count,$m_data_gigs,$m_data)) {
d5692f86
SC
1326
1327 # When displaying the average figures, we calculate the average of
1328 # the rounded data, as the user would calculate it. This reduces
1329 # the accuracy slightly, but we have to do it this way otherwise
1330 # when using -merge to convert results from text to HTML and
1331 # vice-versa discrepencies would occur.
1332 $rounded_volume = volume_rounded($$m_data{$key},$$m_data_gigs{$key});
1333 $data = $gigs = 0;
1334 un_round($rounded_volume,\$data,\$gigs);
1335 $count = $$m_count{$key};
1336 $rounded_average = volume_rounded($data/$count,$gigs/$count);
1337 my @content=( $count, $rounded_volume, $rounded_average);
1338
1339 # write content
1340 # any reason not to include rounded_average in txt-output? -fh
1341 printf $txt_fh ($txt_format, $count, $rounded_volume, $key) if $txt_fh;
1342
1343 if ($htm_fh) {
059ec3d9
PH
1344 $htmlkey = $key;
1345 $htmlkey =~ s/>/\&gt\;/g;
1346 $htmlkey =~ s/</\&lt\;/g;
d5692f86 1347 printf $htm_fh ($htm_format, @content, $htmlkey);
059ec3d9 1348 }
d5692f86
SC
1349 if ($xls_fh)
1350 {
1351 &set_worksheet_line($ws_top50, $row_league_table++, 0, [@content, $key], $f_default);
059ec3d9 1352 }
d5692f86 1353
059ec3d9 1354 if (scalar @chartdatanames < $ntopchart)
d5692f86 1355 {
059ec3d9
PH
1356 push(@chartdatanames, $key);
1357 push(@chartdatavals, $$m_count{$key});
d5692f86 1358 }
059ec3d9 1359 else
d5692f86 1360 {
059ec3d9 1361 $chartotherval += $$m_count{$key};
059ec3d9 1362 }
d5692f86
SC
1363}
1364
059ec3d9
PH
1365push(@chartdatanames, "Other");
1366push(@chartdatavals, $chartotherval);
1367
d5692f86
SC
1368print $txt_fh "\n" if $txt_fh;
1369if ($htm_fh)
1370{
1371 print $htm_fh "</table>\n";
1372 print $htm_fh "</td><td>\n";
608bc29d 1373 if ($HAVE_GD_Graph_pie && $charts && ($#chartdatavals > 0))
059ec3d9
PH
1374 {
1375 # calculate the graph
1376 my @data = (
1377 \@chartdatanames,
1378 \@chartdatavals
1379 );
1380 my $graph = GD::Graph::pie->new(300, 300);
1381 $graph->set(
1382 x_label => 'Name',
1383 y_label => 'Amount',
1384 title => 'By count',
1385 );
1386 my $gd = $graph->plot(\@data) or warn($graph->error);
1387 if ($gd) {
1388 my $temp = $text;
1389 $temp =~ s/ /_/g;
d5692f86 1390 open(IMG, ">$chartdir/${temp}_count.png") or die "Could not write $chartdir/${temp}_count.png: $!\n";
059ec3d9
PH
1391 binmode IMG;
1392 print IMG $gd->png;
1393 close IMG;
d5692f86 1394 print $htm_fh "<img src=\"$chartrel/${temp}_count.png\">";
059ec3d9
PH
1395 }
1396 }
d5692f86
SC
1397 print $htm_fh "</td><td>\n";
1398 print $htm_fh "</td></tr></table>\n\n";
1399}
1400if ($xls_fh)
1401{
1402 $row_league_table++;
059ec3d9 1403}
d5692f86
SC
1404
1405
1406# write header
059ec3d9
PH
1407
1408$temp = "Top $name by volume";
d5692f86
SC
1409
1410printf $txt_fh ("%s\n%s\n\n", $temp, "-" x length($temp)) if $txt_fh;
1411if ($htm_fh) {
1412 print $htm_fh "<hr><a name=\"$text volume\"></a><h2>$temp</h2>\n";
1413 print $htm_fh "<table border=0 width=\"100%\">\n";
1414 print $htm_fh "<tr><td>\n";
1415 print $htm_fh "<table border=1>\n";
1416 print $htm_fh "<tr><th>Messages</th><th>Bytes</th><th>Average</th><th>\u$text</th>\n";
059ec3d9 1417}
d5692f86
SC
1418if ($xls_fh)
1419{
1420 $ws_top50->write($row_league_table++, 0, $temp, $f_header2);
1421 &set_worksheet_line($ws_top50, $row_league_table++, 0, ["Messages", "Bytes", "Average", $text], $f_headertab);
059ec3d9
PH
1422}
1423
1424@chartdatanames = ();
1425@chartdatavals = ();
1426$chartotherval = 0;
8974000d 1427my $use_gig = 0;
059ec3d9 1428foreach $key (top_n_sort($topcount,$m_data_gigs,$m_data,$m_count)) {
884c2af8
SC
1429 # The largest volume will be the first (top of the list).
1430 # If it has at least 1 gig, then just use gigabytes to avoid
8e669ac1 1431 # risking an integer overflow when generating the pie charts.
8974000d
SC
1432 if ($$m_data_gigs{$key}) {
1433 $use_gig = 1;
1434 }
d5692f86
SC
1435
1436 $rounded_volume = volume_rounded($$m_data{$key},$$m_data_gigs{$key});
1437 $data = $gigs = 0;
1438 un_round($rounded_volume,\$data,\$gigs);
1439 $count = $$m_count{$key};
1440 $rounded_average = volume_rounded($data/$count,$gigs/$count);
1441 my @content=($count, $rounded_volume, $rounded_average );
1442
1443 # write content
1444 # any reasons for not including rounded_average in the txt-version?? -fh
1445 printf $txt_fh ($txt_format, $count, $rounded_volume, $key) if $txt_fh;
1446 if ($htm_fh) {
059ec3d9
PH
1447 $htmlkey = $key;
1448 $htmlkey =~ s/>/\&gt\;/g;
1449 $htmlkey =~ s/</\&lt\;/g;
d5692f86 1450 printf $htm_fh ($htm_format, @content, $htmlkey);
059ec3d9 1451 }
d5692f86
SC
1452 if ($xls_fh)
1453 {
1454 &set_worksheet_line($ws_top50, $row_league_table++, 0, [@content, $key], $f_default);
059ec3d9
PH
1455 }
1456
d5692f86 1457
8974000d
SC
1458 if (scalar @chartdatanames < $ntopchart) {
1459 if ($use_gig) {
1460 if ($$m_data_gigs{$key}) {
d5692f86
SC
1461 push(@chartdatanames, $key);
1462 push(@chartdatavals, $$m_data_gigs{$key});
8974000d 1463 }
059ec3d9 1464 }
8974000d
SC
1465 else {
1466 push(@chartdatanames, $key);
1467 push(@chartdatavals, $$m_data{$key});
059ec3d9
PH
1468 }
1469 }
8974000d
SC
1470 else {
1471 $chartotherval += ($use_gig) ? $$m_data_gigs{$key} : $$m_data{$key};
1472 }
1473}
059ec3d9
PH
1474push(@chartdatanames, "Other");
1475push(@chartdatavals, $chartotherval);
1476
d5692f86
SC
1477print $txt_fh "\n" if $txt_fh;
1478if ($htm_fh) {
1479 print $htm_fh "</table>\n";
1480 print $htm_fh "</td><td>\n";
608bc29d 1481 if ($HAVE_GD_Graph_pie && $charts && ($#chartdatavals > 0)) {
059ec3d9
PH
1482 # calculate the graph
1483 my @data = (
1484 \@chartdatanames,
1485 \@chartdatavals
1486 );
1487 my $graph = GD::Graph::pie->new(300, 300);
1488 $graph->set(
1489 x_label => 'Name',
8974000d 1490 y_label => 'Volume' ,
059ec3d9
PH
1491 title => 'By Volume',
1492 );
1493 my $gd = $graph->plot(\@data) or warn($graph->error);
1494 if ($gd) {
d5692f86 1495 $temp = $text;
059ec3d9 1496 $temp =~ s/ /_/g;
d5692f86 1497 open(IMG, ">$chartdir/${temp}_volume.png") or die "Could not write $chartdir/${temp}_volume.png: $!\n";
059ec3d9
PH
1498 binmode IMG;
1499 print IMG $gd->png;
1500 close IMG;
d5692f86 1501 print $htm_fh "<img src=\"$chartrel/${temp}_volume.png\">";
059ec3d9
PH
1502 }
1503 }
d5692f86
SC
1504 print $htm_fh "</td><td>\n";
1505 print $htm_fh "</td></tr></table>\n\n";
059ec3d9 1506}
d5692f86
SC
1507if ($xls_fh)
1508{
1509 ++$row_league_table;
059ec3d9
PH
1510}
1511
d5692f86 1512}
059ec3d9 1513
1b4fe9dd
PH
1514#######################################################################
1515# top_n_sort();
8e669ac1 1516#
1b4fe9dd 1517# @sorted_keys = top_n_sort($n,$href1,$href2,$href3);
8e669ac1 1518#
1b4fe9dd
PH
1519# Given a hash which has numerical values, return the sorted $n keys which
1520# point to the top values. The second and third hashes are used as
1521# tiebreakers. They all must have the same keys.
8e669ac1 1522#
1b4fe9dd
PH
1523# The idea behind this routine is that when you only want to see the
1524# top n members of a set, rather than sorting the entire set and then
1525# plucking off the top n, sort through the stack as you go, discarding
1526# any member which is lower than your current n'th highest member.
8e669ac1 1527#
1b4fe9dd
PH
1528# This proves to be an order of magnitude faster for large hashes.
1529# On 200,000 lines of mainlog it benchmarked 9 times faster.
1530# On 700,000 lines of mainlog it benchmarked 13.8 times faster.
8e669ac1 1531#
1b4fe9dd
PH
1532# We assume the values are > 0.
1533#######################################################################
059ec3d9
PH
1534sub top_n_sort {
1535 my($n,$href1,$href2,$href3) = @_;
1536
1537 # PH's original sort was:
1538 #
1539 # foreach $key (sort
1540 # {
1541 # $$m_count{$b} <=> $$m_count{$a} ||
1542 # $$m_data_gigs{$b} <=> $$m_data_gigs{$a} ||
1543 # $$m_data{$b} <=> $$m_data{$a} ||
1544 # $a cmp $b
1545 # }
1546 # keys %{$m_count})
1547 #
1548
1549 #We use a key of '_' to represent non-existant values, as null keys are valid.
1550 #'_' is not a valid domain, edomain, host, or email.
1551 my(@top_n_keys) = ('_') x $n;
1552 my($minimum_value1,$minimum_value2,$minimum_value3) = (0,0,0);
1553 my $top_n_key = '';
1554 my $n_minus_1 = $n - 1;
1555 my $n_minus_2 = $n - 2;
1556
8e669ac1 1557 # Pick out the top $n keys.
059ec3d9
PH
1558 my($key,$value1,$value2,$value3,$i,$comparison,$insert_position);
1559 while (($key,$value1) = each %$href1) {
1560
1561 #print STDERR "key $key ($value1,",$href2->{$key},",",$href3->{$key},") <=> ($minimum_value1,$minimum_value2,$minimum_value3)\n";
8e669ac1 1562
059ec3d9
PH
1563 # Check to see that the new value is bigger than the lowest of the
1564 # top n keys that we're keeping.
8e669ac1 1565 $comparison = $value1 <=> $minimum_value1 ||
d5692f86
SC
1566 $href2->{$key} <=> $minimum_value2 ||
1567 $href3->{$key} <=> $minimum_value3 ||
1568 $top_n_key cmp $key;
059ec3d9
PH
1569 next unless ($comparison == 1);
1570
1571 # As we will be using these values a few times, extract them into scalars.
1572 $value2 = $href2->{$key};
1573 $value3 = $href3->{$key};
1574
1575 # This key is bigger than the bottom n key, so the lowest position we
1576 # will insert it into is $n minus 1 (the bottom of the list).
1577 $insert_position = $n_minus_1;
1578
1579 # Now go through the list, stopping when we find a key that we're
1580 # bigger than, or we come to the penultimate position - we've
1581 # already tested bigger than the last.
1582 #
1583 # Note: we go top down as the list starts off empty.
1584 # Note: stepping through the list in this way benchmarks nearly
1585 # three times faster than doing a sort() on the reduced list.
1586 # I assume this is because the list is already in order, and
1587 # we get a performance boost from not having to do hash lookups
1588 # on the new key.
1589 for ($i = 0; $i < $n_minus_1; $i++) {
1590 $top_n_key = $top_n_keys[$i];
1591 if ( ($top_n_key eq '_') ||
d5692f86 1592 ( ($value1 <=> $href1->{$top_n_key} ||
059ec3d9 1593 $value2 <=> $href2->{$top_n_key} ||
d5692f86
SC
1594 $value3 <=> $href3->{$top_n_key} ||
1595 $top_n_key cmp $key) == 1
1596 )
1597 ) {
1598 $insert_position = $i;
1599 last;
059ec3d9
PH
1600 }
1601 }
1602
1603 # Remove the last element, then insert the new one.
1604 $#top_n_keys = $n_minus_2;
1605 splice(@top_n_keys,$insert_position,0,$key);
1606
1607 # Extract our new minimum values.
1608 $top_n_key = $top_n_keys[$n_minus_1];
1609 if ($top_n_key ne '_') {
1610 $minimum_value1 = $href1->{$top_n_key};
1611 $minimum_value2 = $href2->{$top_n_key};
1612 $minimum_value3 = $href3->{$top_n_key};
1613 }
1614 }
1615
1616 # Return the top n list, grepping out non-existant values, just in case
1617 # we didn't have that many values.
1618 return(grep(!/^_$/,@top_n_keys));
1619}
1620
1621
1b4fe9dd
PH
1622#######################################################################
1623# html_header();
8e669ac1 1624#
1b4fe9dd 1625# $header = html_header($title);
8e669ac1 1626#
1b4fe9dd
PH
1627# Print our HTML header and start the <body> block.
1628#######################################################################
059ec3d9
PH
1629sub html_header {
1630 my($title) = @_;
1631 my $text = << "EoText";
1632<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
1633<html>
1634<head>
1635<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-15">
1636<title>$title</title>
1637</head>
1638<body bgcolor="white">
1639<h1>$title</h1>
1640EoText
1641 return $text;
1642}
1643
1644
1645
1b4fe9dd
PH
1646#######################################################################
1647# help();
8e669ac1 1648#
1b4fe9dd 1649# help();
8e669ac1 1650#
1b4fe9dd
PH
1651# Display usage instructions and exit.
1652#######################################################################
059ec3d9
PH
1653sub help {
1654 print << "EoText";
1655
1656eximstats Version $VERSION
1657
d5692f86
SC
1658Usage:
1659 eximstats [Output] [Options] mainlog1 mainlog2 ...
1660 eximstats -merge -html [Options] report.1.html ... > weekly_rep.html
1661
1662Examples:
1663 eximstats -html=eximstats.html mainlog1 mainlog2 ...
1664 eximstats mainlog1 mainlog2 ... > report.txt
059ec3d9 1665
8974000d 1666Parses exim mainlog or syslog files and generates a statistical analysis
d5692f86
SC
1667of the messages processed.
1668
1669Valid output types are:
1670-txt[=<file>] plain text (default unless no other type is specified)
1671-html[=<file>] HTML
1672-xls[=<file>] Excel
1673With no type and file given, defaults to -txt and STDOUT.
059ec3d9 1674
d5692f86 1675Valid options are:
059ec3d9
PH
1676-h<number> histogram divisions per hour. The default is 1, and
1677 0 suppresses histograms. Other valid values are:
d5692f86 1678 2, 3, 5, 10, 15, 20, 30 or 60.
059ec3d9
PH
1679-ne don't display error information
1680-nr don't display relaying information
1681-nr/pattern/ don't display relaying information that matches
1682-nt don't display transport information
1683-nt/pattern/ don't display transport information that matches
d5692f86 1684-nvr don't do volume rounding. Display in bytes, not KB/MB/GB.
059ec3d9
PH
1685-t<number> display top <number> sources/destinations
1686 default is 50, 0 suppresses top listing
1687-tnl omit local sources/destinations in top listing
1688-t_remote_users show top user sources/destinations from non-local domains
608bc29d
SC
1689-q<list> list of times for queuing information. -q0 suppresses.
1690-show_rt<list> Show the receipt times for all the messages.
1691-show_dt<list> Show the delivery times for all the messages.
1692 <list> is an optional list of times in seconds.
1693 Eg -show_rt1,2,4,8.
1694
d5692f86
SC
1695-include_original_destination show both the final and original
1696 destinations in the results rather than just the final ones.
059ec3d9 1697
d5692f86 1698-byhost show results by sending host (default unless bydomain or
059ec3d9 1699 byemail is specified)
d5692f86
SC
1700-bydomain show results by sending domain.
1701-byemail show results by sender's email address
1702-byedomain show results by sender's email domain
059ec3d9
PH
1703
1704-pattern "Description" /pattern/
1705 Count lines matching specified patterns and show them in
d5692f86
SC
1706 the results. It can be specified multiple times. Eg:
1707 -pattern 'Refused connections' '/refused connection/'
059ec3d9
PH
1708
1709-merge merge previously generated reports into a new report
1710
d5692f86
SC
1711-charts Create charts (this requires the GD::Graph modules).
1712 Only valid with -html.
059ec3d9
PH
1713-chartdir <dir> Create the charts' png files in the directory <dir>
1714-chartrel <dir> Specify the relative directory for the "img src=" tags
1715 from where to include the charts in the html file
d5692f86 1716 -chartdir and -chartrel default to '.'
059ec3d9 1717
d5692f86 1718-d Debug mode - dump the eval'ed parser onto STDERR.
059ec3d9
PH
1719
1720EoText
1721
1722 exit 1;
1723}
1724
1725
1726
1b4fe9dd
PH
1727#######################################################################
1728# generate_parser();
8e669ac1 1729#
1b4fe9dd 1730# $parser = generate_parser();
8e669ac1 1731#
1b4fe9dd
PH
1732# This subroutine generates the parsing routine which will be
1733# used to parse the mainlog. We take the base operation, and remove bits not in use.
1734# This improves performance depending on what bits you take out or add.
8e669ac1 1735#
1b4fe9dd 1736# I've tested using study(), but this does not improve performance.
8e669ac1 1737#
1b4fe9dd
PH
1738# We store our parsing routing in a variable, and process it looking for #IFDEF (Expression)
1739# or #IFNDEF (Expression) statements and corresponding #ENDIF (Expression) statements. If
1740# the expression evaluates to true, then it is included/excluded accordingly.
1741#######################################################################
059ec3d9
PH
1742sub generate_parser {
1743 my $parser = '
1744 my($ip,$host,$email,$edomain,$domain,$thissize,$size,$old,$new);
1745 my($tod,$m_hour,$m_min,$id,$flag);
608bc29d 1746 my($seconds,$queued,$rcpt_time);
059ec3d9 1747 while (<$fh>) {
059ec3d9 1748
8974000d
SC
1749 # Convert syslog lines to mainlog format.
1750 if (! /^\\d{4}/) {
1751 next unless s/^.*? exim\\b.*?: //;
1752 }
1753
1754 next if length($_) < 38;
059ec3d9
PH
1755 next unless /^(\\d{4}\\-\\d\\d-\\d\\d\\s(\\d\\d):(\\d\\d):\\d\\d( [-+]\\d\\d\\d\\d)?)/o;
1756
1757 ($tod,$m_hour,$m_min) = ($1,$2,$3);
1758
1759 # PH
1760 my($extra) = defined($4)? 6 : 0;
1761 $id = substr($_, 20 + $extra, 16);
1762 $flag = substr($_, 37 + $extra, 2);
1763';
1764
1765 # Watch for user specified patterns.
1766 my $user_pattern_index = 0;
1767 foreach (@user_patterns) {
1768 $user_pattern_totals[$user_pattern_index] = 0;
608bc29d
SC
1769 $parser .= <<EoText;
1770 if ($_) {
1771 \$user_pattern_totals[$user_pattern_index]++ if $_;
1772 \$user_pattern_interval_count[$user_pattern_index][(\$m_hour*60 + \$m_min)/$hist_interval]++;
1773 }
1774EoText
059ec3d9
PH
1775 $user_pattern_index++;
1776 }
1777
1778 $parser .= '
1779 next unless ($flag =~ /<=|=>|->|==|\\*\\*|Co/);
1780
1781 #Strip away the timestamp, ID and flag (which could be "Com" for completed)
1782 #This speeds up the later pattern matches.
1783 # $_ = substr($_, 40);
1784
1785 $_ = substr($_, 40 + $extra); # PH
1786
608bc29d
SC
1787 # Get a pointer to an array of information about the message.
1788 # This minimises the number of calls to hash functions.
1789 $messages{$id} = [] unless exists $messages{$id};
1790 $message_aref = $messages{$id};
1791
1792
059ec3d9
PH
1793 # JN - Skip over certain transports as specified via the "-nt/.../" command
1794 # line switch (where ... is a perl style regular expression). This is
1795 # required so that transports that skew stats such as SpamAssassin can be
1796 # ignored.
1797 #IFDEF ($transport_pattern)
1798 if (/\\sT=(\\S+)/) {
1799 next if ($1 =~ /$transport_pattern/o) ;
1800 }
1801 #ENDIF ($transport_pattern)
1802
1803
d5692f86
SC
1804 $host = "local"; #Host is local unless otherwise specified.
1805 $domain = "localdomain"; #Domain is localdomain unless otherwise specified.
059ec3d9
PH
1806
1807
1808 # Do some pattern matches to get the host and IP address.
1809 # We expect lines to be of the form "H=[IpAddr]" or "H=Host [IpAddr]" or
1810 # "H=Host (UnverifiedHost) [IpAddr]" or "H=(UnverifiedHost) [IpAddr]".
1811 # We do 2 separate matches to keep the matches simple and fast.
1812 if (/\\sH=(\\S+)/) {
1813 $host = $1;
1814
1815 ($ip) = /\\sH=.*?(\\s\\[[^]]+\\])/;
1816 # If there is only an IP address, it will be in $host and $ip will be
1817 # unset. That is OK, because we only use $ip in conjunction with $host
1818 # below. But make it empty to avoid warning messages.
1819 $ip = "" if !defined $ip;
1820
1821 #IFDEF ($do_sender{Domain})
1822 if ($host !~ /^\\[/ && $host =~ /^(\\(?)[^\\.]+\\.([^\\.]+\\..*)/) {
d5692f86
SC
1823 # Remove the host portion from the DNS name. We ensure that we end up
1824 # with at least xxx.yyy. $host can be "(x.y.z)" or "x.y.z".
1825 $domain = lc("$1.$2");
1826 $domain =~ s/^\\.//; #Remove preceding dot.
059ec3d9
PH
1827 }
1828 #ENDIF ($do_sender{Domain})
1829
1830 }
1831
1832 #IFDEF ($do_sender{Email})
d5692f86
SC
1833 #IFDEF ($include_original_destination)
1834 # Catch both "a@b.com <c@d.com>" and "e@f.com"
1835 #$email = (/^(\S+) (<(\S*?)>)?/) ? $3 || $1 : "";
1836 $email = (/^(\S+ (<[^@>]+@?[^>]*>)?)/) ? $1 : "";
1837 chomp($email);
1838 #ENDIF ($include_original_destination)
1839
1840 #IFNDEF ($include_original_destination)
1841 $email = (/^(\S+)/) ? $1 : "";
1842 #ENDIF ($include_original_destination)
059ec3d9
PH
1843 #ENDIF ($do_sender{Email})
1844
1845 #IFDEF ($do_sender{Edomain})
d5692f86
SC
1846 #IFDEF ($include_original_destination)
1847 #$edomain = (/^(\S+) (<\S*?\\@(\S+)>)?/) ? $3 || $1 : "";
1848 $edomain = (/^(\S+ (<\S*?\\@(\S+?)>)?)/) ? $1 : "";
1849 chomp($edomain);
1850 lc($edomain);
1851 #ENDIF ($include_original_destination)
1852
1853 #IFNDEF ($include_original_destination)
1854 $edomain = (/^\S*?\\@(\S+)/) ? lc($1) : "";
1855 #ENDIF ($include_original_destination)
059ec3d9
PH
1856 #ENDIF ($do_sender{Edomain})
1857
1858 if ($tod lt $begin) {
1859 $begin = $tod;
1860 }
1861 elsif ($tod gt $end) {
1862 $end = $tod;
1863 }
1864
1865
1866 if ($flag eq "<=") {
1867 $thissize = (/\\sS=(\\d+)( |$)/) ? $1 : 0;
608bc29d
SC
1868 $message_aref->[$SIZE] = $thissize;
1869 $message_aref->[$PROTOCOL] = (/ P=(\S+)/) ? $1 : undef;
059ec3d9
PH
1870
1871 #IFDEF ($show_relay)
1872 if ($host ne "local") {
d5692f86
SC
1873 # Save incoming information in case it becomes interesting
1874 # later, when delivery lines are read.
1875 my($from) = /^(\\S+)/;
608bc29d
SC
1876 $message_aref->[$FROM_HOST] = "$host$ip";
1877 $message_aref->[$FROM_ADDRESS] = $from;
059ec3d9
PH
1878 }
1879 #ENDIF ($show_relay)
1880
1881 #IFDEF ($local_league_table || $include_remote_users)
d5692f86
SC
1882 if (/\sU=(\\S+)/) {
1883 my $user = $1;
059ec3d9 1884
d5692f86
SC
1885 #IFDEF ($local_league_table && $include_remote_users)
1886 { #Store both local and remote users.
1887 #ENDIF ($local_league_table && $include_remote_users)
059ec3d9 1888
d5692f86
SC
1889 #IFDEF ($local_league_table && ! $include_remote_users)
1890 if ($host eq "local") { #Store local users only.
1891 #ENDIF ($local_league_table && ! $include_remote_users)
059ec3d9 1892
d5692f86
SC
1893 #IFDEF ($include_remote_users && ! $local_league_table)
1894 if ($host ne "local") { #Store remote users only.
1895 #ENDIF ($include_remote_users && ! $local_league_table)
059ec3d9 1896
608bc29d 1897 ++$received_count_user{$user};
d5692f86 1898 add_volume(\\$received_data_user{$user},\\$received_data_gigs_user{$user},$thissize);
059ec3d9 1899 }
d5692f86 1900 }
059ec3d9
PH
1901 #ENDIF ($local_league_table || $include_remote_users)
1902
1903 #IFDEF ($do_sender{Host})
608bc29d 1904 ++$received_count{Host}{$host};
d5692f86 1905 add_volume(\\$received_data{Host}{$host},\\$received_data_gigs{Host}{$host},$thissize);
059ec3d9
PH
1906 #ENDIF ($do_sender{Host})
1907
1908 #IFDEF ($do_sender{Domain})
1909 if ($domain) {
608bc29d 1910 ++$received_count{Domain}{$domain};
d5692f86
SC
1911 add_volume(\\$received_data{Domain}{$domain},\\$received_data_gigs{Domain}{$domain},$thissize);
1912 }
059ec3d9
PH
1913 #ENDIF ($do_sender{Domain})
1914
1915 #IFDEF ($do_sender{Email})
608bc29d 1916 ++$received_count{Email}{$email};
d5692f86 1917 add_volume(\\$received_data{Email}{$email},\\$received_data_gigs{Email}{$email},$thissize);
059ec3d9
PH
1918 #ENDIF ($do_sender{Email})
1919
1920 #IFDEF ($do_sender{Edomain})
608bc29d 1921 ++$received_count{Edomain}{$edomain};
d5692f86 1922 add_volume(\\$received_data{Edomain}{$edomain},\\$received_data_gigs{Edomain}{$edomain},$thissize);
059ec3d9
PH
1923 #ENDIF ($do_sender{Edomain})
1924
608bc29d 1925 ++$total_received_count;
059ec3d9
PH
1926 add_volume(\\$total_received_data,\\$total_received_data_gigs,$thissize);
1927
608bc29d
SC
1928 #IFDEF ($#queue_times >= 0 || $#rcpt_times >= 0)
1929 $message_aref->[$ARRIVAL_TIME] = $tod;
1930 #ENDIF ($#queue_times >= 0 || $#rcpt_times >= 0)
059ec3d9
PH
1931
1932 #IFDEF ($hist_opt > 0)
d5692f86 1933 $received_interval_count[($m_hour*60 + $m_min)/$hist_interval]++;
059ec3d9
PH
1934 #ENDIF ($hist_opt > 0)
1935 }
1936
1937 elsif ($flag eq "=>") {
608bc29d 1938 $size = $message_aref->[$SIZE] || 0;
059ec3d9 1939 if ($host ne "local") {
608bc29d 1940 $message_aref->[$REMOTE_DELIVERED] = 1;
059ec3d9
PH
1941
1942
1943 #IFDEF ($show_relay)
1944 # Determine relaying address if either only one address listed,
1945 # or two the same. If they are different, it implies a forwarding
1946 # or aliasing, which is not relaying. Note that for multi-aliased
1947 # addresses, there may be a further address between the first
1948 # and last.
1949
608bc29d 1950 if (defined $message_aref->[$FROM_HOST]) {
059ec3d9
PH
1951 if (/^(\\S+)(?:\\s+\\([^)]\\))?\\s+<([^>]+)>/) {
1952 ($old,$new) = ($1,$2);
d5692f86 1953 }
059ec3d9 1954 else {
d5692f86
SC
1955 $old = $new = "";
1956 }
059ec3d9
PH
1957
1958 if ("\\L$new" eq "\\L$old") {
1959 ($old) = /^(\\S+)/ if $old eq "";
608bc29d 1960 my $key = "H=\\L$message_aref->[$FROM_HOST]\\E A=\\L$message_aref->[$FROM_ADDRESS]\\E => " .
059ec3d9
PH
1961 "H=\\L$host\\E$ip A=\\L$old\\E";
1962 if (!defined $relay_pattern || $key !~ /$relay_pattern/o) {
1963 $relayed{$key} = 0 if !defined $relayed{$key};
608bc29d 1964 ++$relayed{$key};
d5692f86 1965 }
059ec3d9 1966 else {
608bc29d 1967 ++$relayed_unshown;
059ec3d9
PH
1968 }
1969 }
1970 }
1971 #ENDIF ($show_relay)
1972
1973 }
1974
1975 #IFDEF ($local_league_table || $include_remote_users)
d5692f86
SC
1976 #IFDEF ($local_league_table && $include_remote_users)
1977 { #Store both local and remote users.
1978 #ENDIF ($local_league_table && $include_remote_users)
1979
1980 #IFDEF ($local_league_table && ! $include_remote_users)
1981 if ($host eq "local") { #Store local users only.
1982 #ENDIF ($local_league_table && ! $include_remote_users)
1983
1984 #IFDEF ($include_remote_users && ! $local_league_table)
1985 if ($host ne "local") { #Store remote users only.
1986 #ENDIF ($include_remote_users && ! $local_league_table)
1987
1988 if (my($user) = split((/\\s</)? " <" : " ", $_)) {
1989 #IFDEF ($include_original_destination)
1990 {
1991 #ENDIF ($include_original_destination)
1992 #IFNDEF ($include_original_destination)
1993 if ($user =~ /^[\\/|]/) {
1994 #ENDIF ($include_original_destination)
1995 my($parent) = $_ =~ /(<[^@]+@?[^>]*>)/;
1996 $user = "$user $parent" if defined $parent;
1997 }
608bc29d 1998 ++$delivered_count_user{$user};
d5692f86
SC
1999 add_volume(\\$delivered_data_user{$user},\\$delivered_data_gigs_user{$user},$size);
2000 }
2001 }
059ec3d9
PH
2002 #ENDIF ($local_league_table || $include_remote_users)
2003
2004 #IFDEF ($do_sender{Host})
d5692f86
SC
2005 $delivered_count{Host}{$host}++;
2006 add_volume(\\$delivered_data{Host}{$host},\\$delivered_data_gigs{Host}{$host},$size);
059ec3d9
PH
2007 #ENDIF ($do_sender{Host})
2008 #IFDEF ($do_sender{Domain})
2009 if ($domain) {
608bc29d 2010 ++$delivered_count{Domain}{$domain};
d5692f86
SC
2011 add_volume(\\$delivered_data{Domain}{$domain},\\$delivered_data_gigs{Domain}{$domain},$size);
2012 }
059ec3d9
PH
2013 #ENDIF ($do_sender{Domain})
2014 #IFDEF ($do_sender{Email})
608bc29d 2015 ++$delivered_count{Email}{$email};
d5692f86 2016 add_volume(\\$delivered_data{Email}{$email},\\$delivered_data_gigs{Email}{$email},$size);
059ec3d9
PH
2017 #ENDIF ($do_sender{Email})
2018 #IFDEF ($do_sender{Edomain})
608bc29d 2019 ++$delivered_count{Edomain}{$edomain};
d5692f86 2020 add_volume(\\$delivered_data{Edomain}{$edomain},\\$delivered_data_gigs{Edomain}{$edomain},$size);
059ec3d9
PH
2021 #ENDIF ($do_sender{Edomain})
2022
608bc29d 2023 ++$total_delivered_count;
059ec3d9
PH
2024 add_volume(\\$total_delivered_data,\\$total_delivered_data_gigs,$size);
2025
2026 #IFDEF ($show_transport)
2027 my $transport = (/\\sT=(\\S+)/) ? $1 : ":blackhole:";
608bc29d 2028 ++$transported_count{$transport};
059ec3d9
PH
2029 add_volume(\\$transported_data{$transport},\\$transported_data_gigs{$transport},$size);
2030 #ENDIF ($show_transport)
2031
2032 #IFDEF ($hist_opt > 0)
2033 $delivered_interval_count[($m_hour*60 + $m_min)/$hist_interval]++;
2034 #ENDIF ($hist_opt > 0)
2035
608bc29d
SC
2036 #IFDEF ($#delivery_times > 0)
2037 if (/ DT=(\S+)/) {
2038 $seconds = wdhms_seconds($1);
2039 for ($i = 0; $i <= $#delivery_times; $i++) {
2040 if ($seconds < $delivery_times[$i]) {
2041 ++$dt_all_bin[$i];
2042 ++$dt_remote_bin[$i] if $message_aref->[$REMOTE_DELIVERED];
2043 last;
2044 }
2045 }
2046 if ($i > $#delivery_times) {
2047 ++$dt_all_overflow;
2048 ++$dt_remote_overflow if $message_aref->[$REMOTE_DELIVERED];
2049 }
2050 }
2051 #ENDIF ($#delivery_times > 0)
2052
059ec3d9
PH
2053 }
2054
608bc29d
SC
2055 elsif ($flag eq "==" && defined($message_aref->[$SIZE]) && !defined($message_aref->[$DELAYED])) {
2056 ++$delayed_count;
2057 $message_aref->[$DELAYED] = 1;
059ec3d9
PH
2058 }
2059
2060 elsif ($flag eq "**") {
608bc29d
SC
2061 if (defined ($message_aref->[$SIZE])) {
2062 unless (defined $message_aref->[$HAD_ERROR]) {
2063 ++$message_errors;
2064 $message_aref->[$HAD_ERROR] = 1;
2065 }
2066 }
059ec3d9
PH
2067
2068 #IFDEF ($show_errors)
608bc29d 2069 ++$errors_count{$_};
059ec3d9
PH
2070 #ENDIF ($show_errors)
2071
2072 }
2073
2074 elsif ($flag eq "Co") {
2075 #Completed?
2076 #IFDEF ($#queue_times >= 0)
608bc29d 2077 $queued = queue_time($tod, $message_aref->[$ARRIVAL_TIME], $id);
059ec3d9
PH
2078
2079 for ($i = 0; $i <= $#queue_times; $i++) {
2080 if ($queued < $queue_times[$i]) {
608bc29d
SC
2081 ++$qt_all_bin[$i];
2082 ++$qt_remote_bin[$i] if $message_aref->[$REMOTE_DELIVERED];
059ec3d9 2083 last;
d5692f86
SC
2084 }
2085 }
608bc29d
SC
2086 if ($i > $#queue_times) {
2087 ++$qt_all_overflow;
2088 ++$qt_remote_overflow if $message_aref->[$REMOTE_DELIVERED];
2089 }
059ec3d9
PH
2090 #ENDIF ($#queue_times >= 0)
2091
608bc29d
SC
2092 #IFDEF ($#rcpt_times >= 0)
2093 if (/ QT=(\S+)/) {
2094 $seconds = wdhms_seconds($1);
2095 #Calculate $queued if not previously calculated above.
2096 #IFNDEF ($#queue_times >= 0)
2097 $queued = queue_time($tod, $message_aref->[$ARRIVAL_TIME], $id);
2098 #ENDIF ($#queue_times >= 0)
2099 $rcpt_time = $seconds - $queued;
2100 my($protocol);
2101
2102 if (defined $message_aref->[$PROTOCOL]) {
2103 $protocol = $message_aref->[$PROTOCOL];
2104
2105 # Create the bin if its not already defined.
2106 unless (exists $rcpt_times_bin{$protocol}) {
2107 initialise_rcpt_times($protocol);
2108 }
2109 }
2110
2111
2112 for ($i = 0; $i <= $#rcpt_times; ++$i) {
2113 if ($rcpt_time < $rcpt_times[$i]) {
2114 ++$rcpt_times_bin{all}[$i];
2115 ++$rcpt_times_bin{$protocol}[$i] if defined $protocol;
2116 last;
2117 }
2118 }
059ec3d9 2119
608bc29d
SC
2120 if ($i > $#rcpt_times) {
2121 ++$rcpt_times_overflow{all};
2122 ++$rcpt_times_overflow{$protocol} if defined $protocol;
2123 }
2124 }
2125 #ENDIF ($#rcpt_times >= 0)
2126
2127 delete($messages{$id});
059ec3d9
PH
2128 }
2129 }';
2130
2131 # We now do a 'C preprocessor style operation on our parser
2132 # to remove bits not in use.
2133 my(%defines_in_operation,$removing_lines,$processed_parser);
2134 foreach (split (/\n/,$parser)) {
2135 if ((/^\s*#\s*IFDEF\s*\((.*?)\)/i && ! eval $1) ||
d5692f86 2136 (/^\s*#\s*IFNDEF\s*\((.*?)\)/i && eval $1) ) {
059ec3d9
PH
2137 $defines_in_operation{$1} = 1;
2138 $removing_lines = 1;
2139 }
2140
608bc29d
SC
2141 # Convert constants.
2142 while (/(\$[A-Z][A-Z_]*)\b/) {
2143 my $constant = eval $1;
2144 s/(\$[A-Z][A-Z_]*)\b/$constant/;
2145 }
2146
059ec3d9
PH
2147 $processed_parser .= $_."\n" unless $removing_lines;
2148
2149 if (/^\s*#\s*ENDIF\s*\((.*?)\)/i) {
2150 delete $defines_in_operation{$1};
2151 unless (keys %defines_in_operation) {
d5692f86 2152 $removing_lines = 0;
059ec3d9
PH
2153 }
2154 }
2155 }
608bc29d 2156 print STDERR "# START OF PARSER:$processed_parser\n# END OF PARSER\n\n" if $debug;
059ec3d9
PH
2157
2158 return $processed_parser;
2159}
2160
2161
2162
1b4fe9dd
PH
2163#######################################################################
2164# parse();
8e669ac1 2165#
1b4fe9dd 2166# parse($parser,\*FILEHANDLE);
8e669ac1 2167#
1b4fe9dd
PH
2168# This subroutine accepts a parser and a filehandle from main and parses each
2169# line. We store the results into global variables.
2170#######################################################################
059ec3d9
PH
2171sub parse {
2172 my($parser,$fh) = @_;
2173
2174 if ($merge_reports) {
2175 parse_old_eximstat_reports($fh);
2176 }
2177 else {
2178 eval $parser;
2179 die ($@) if $@;
2180 }
2181
2182}
2183
2184
2185
1b4fe9dd
PH
2186#######################################################################
2187# print_header();
8e669ac1 2188#
1b4fe9dd 2189# print_header();
8e669ac1 2190#
1b4fe9dd
PH
2191# Print our headers and contents.
2192#######################################################################
059ec3d9
PH
2193sub print_header {
2194
d5692f86 2195
059ec3d9
PH
2196 my $title = "Exim statistics from $begin to $end";
2197
d5692f86
SC
2198 print $txt_fh "\n$title\n" if $txt_fh;
2199 if ($htm_fh) {
2200 print $htm_fh html_header($title);
2201 print $htm_fh "<ul>\n";
2202 print $htm_fh "<li><a href=\"#grandtotal\">Grand total summary</a>\n";
2203 print $htm_fh "<li><a href=\"#patterns\">User Specified Patterns</a>\n" if @user_patterns;
2204 print $htm_fh "<li><a href=\"#transport\">Deliveries by Transport</a>\n" if $show_transport;
059ec3d9 2205 if ($hist_opt) {
d5692f86
SC
2206 print $htm_fh "<li><a href=\"#Messages received\">Messages received per hour</a>\n";
2207 print $htm_fh "<li><a href=\"#Deliveries\">Deliveries per hour</a>\n";
059ec3d9 2208 }
608bc29d 2209
059ec3d9 2210 if ($#queue_times >= 0) {
608bc29d
SC
2211 print $htm_fh "<li><a href=\"#Time spent on the queue all messages\">Time spent on the queue: all messages</a>\n";
2212 print $htm_fh "<li><a href=\"#Time spent on the queue messages with at least one remote delivery\">Time spent on the queue: messages with at least one remote delivery</a>\n";
2213 }
2214
2215 if ($#delivery_times >= 0) {
2216 print $htm_fh "<li><a href=\"#Delivery times all messages\">Delivery times: all messages</a>\n";
2217 print $htm_fh "<li><a href=\"#Delivery times messages with at least one remote delivery\">Delivery times: messages with at least one remote delivery</a>\n";
2218 }
2219
2220 if ($#rcpt_times >= 0) {
2221 print $htm_fh "<li><a href=\"#Receipt times all messages\">Receipt times</a>\n";
059ec3d9 2222 }
608bc29d 2223
d5692f86 2224 print $htm_fh "<li><a href=\"#Relayed messages\">Relayed messages</a>\n" if $show_relay;
059ec3d9
PH
2225 if ($topcount) {
2226 foreach ('Host','Domain','Email','Edomain') {
d5692f86
SC
2227 next unless $do_sender{$_};
2228 print $htm_fh "<li><a href=\"#sending \l$_ count\">Top $topcount sending \l${_}s by message count</a>\n";
2229 print $htm_fh "<li><a href=\"#sending \l$_ volume\">Top $topcount sending \l${_}s by volume</a>\n";
059ec3d9
PH
2230 }
2231 if ($local_league_table || $include_remote_users) {
d5692f86
SC
2232 print $htm_fh "<li><a href=\"#local sender count\">Top $topcount local senders by message count</a>\n";
2233 print $htm_fh "<li><a href=\"#local sender volume\">Top $topcount local senders by volume</a>\n";
059ec3d9
PH
2234 }
2235 foreach ('Host','Domain','Email','Edomain') {
d5692f86
SC
2236 next unless $do_sender{$_};
2237 print $htm_fh "<li><a href=\"#\l$_ destination count\">Top $topcount \l$_ destinations by message count</a>\n";
2238 print $htm_fh "<li><a href=\"#\l$_ destination volume\">Top $topcount \l$_ destinations by volume</a>\n";
059ec3d9
PH
2239 }
2240 if ($local_league_table || $include_remote_users) {
d5692f86
SC
2241 print $htm_fh "<li><a href=\"#local destination count\">Top $topcount local destinations by message count</a>\n";
2242 print $htm_fh "<li><a href=\"#local destination volume\">Top $topcount local destinations by volume</a>\n";
059ec3d9
PH
2243 }
2244 }
d5692f86
SC
2245 print $htm_fh "<li><a href=\"#errors\">List of errors</a>\n" if %errors_count;
2246 print $htm_fh "</ul>\n<hr>\n";
059ec3d9 2247 }
d5692f86
SC
2248 if ($xls_fh)
2249 {
2250 $ws_global->write($row++, $col+0, "Exim Statistics", $f_header1);
2251 &set_worksheet_line($ws_global, $row, $col, ["from:", $begin, "to:", $end], $f_default);
2252 $row+=2;
059ec3d9
PH
2253 }
2254}
2255
2256
1b4fe9dd
PH
2257#######################################################################
2258# print_grandtotals();
8e669ac1 2259#
1b4fe9dd 2260# print_grandtotals();
8e669ac1 2261#
1b4fe9dd
PH
2262# Print the grand totals.
2263#######################################################################
059ec3d9
PH
2264sub print_grandtotals {
2265
2266 # Get the sender by headings and results. This is complicated as we can have
2267 # different numbers of columns.
2268 my($sender_txt_header,$sender_html_header,$sender_txt_format,$sender_html_format);
2269 my(@received_totals,@delivered_totals);
d5692f86
SC
2270 my($row_tablehead, $row_max);
2271
059ec3d9
PH
2272 foreach ('Host','Domain','Email','Edomain') {
2273 next unless $do_sender{$_};
2274 if ($merge_reports) {
2275 push(@received_totals, get_report_total($report_totals{Received},"${_}s"));
2276 push(@delivered_totals,get_report_total($report_totals{Delivered},"${_}s"));
2277 }
2278 else {
2279 push(@received_totals,scalar(keys %{$received_data{$_}}));
2280 push(@delivered_totals,scalar(keys %{$delivered_data{$_}}));
2281 }
2282 $sender_html_header .= "<th>${_}s</th>";
2283 $sender_txt_header .= " " x ($COLUMN_WIDTHS - length($_)) . $_ . 's';
2284 $sender_html_format .= "<td align=\"right\">%d</td>";
2285 $sender_txt_format .= " " x ($COLUMN_WIDTHS - 5) . "%6d";
2286 }
2287
d5692f86
SC
2288 my $txt_format1 = " %-16s %9s %6d $sender_txt_format";
2289 my $txt_format2 = " %6d %4.1f%% %6d %4.1f%%",
2290 my $htm_format1 = "<tr><td>%s</td><td align=\"right\">%s</td>$sender_html_format<td align=\"right\">%d</td>";
2291 my $htm_format2 = "<td align=\"right\">%d</td><td align=\"right\">%4.1f%%</td><td align=\"right\">%d</td><td align=\"right\">%4.1f%%</td>";
059ec3d9 2292
d5692f86 2293 if ($txt_fh) {
059ec3d9 2294 my $sender_spaces = " " x length($sender_txt_header);
d5692f86
SC
2295 print $txt_fh "\n";
2296 print $txt_fh "Grand total summary\n";
2297 print $txt_fh "-------------------\n";
2298 print $txt_fh " $sender_spaces At least one address\n";
2299 print $txt_fh " TOTAL Volume Messages $sender_txt_header Delayed Failed\n";
2300 }
2301 if ($htm_fh) {
2302 print $htm_fh "<a name=\"grandtotal\"></a>\n";
2303 print $htm_fh "<h2>Grand total summary</h2>\n";
2304 print $htm_fh "<table border=1>\n";
2305 print $htm_fh "<tr><th>TOTAL</th><th>Volume</th><th>Messages</th>$sender_html_header<th colspan=2>At least one addr<br>Delayed</th><th colspan=2>At least one addr<br>Failed</th>\n";
2306 }
2307 if ($xls_fh)
2308 {
2309 $ws_global->write($row++, $col, "Grand total summary", $f_header2);
059ec3d9 2310
d5692f86
SC
2311 $row_tablehead = $row+1; # header-row of TOTALS table
2312
2313 &set_worksheet_line($ws_global, $row_tablehead, 0, ['Received', 'Delivered', 'TOTAL'], $f_headertab);
2314
2315 my @content= (
2316 "Volume",
2317 "Messages",
2318 $sender_txt_header,
2319 "At least one address Delayed (Total)",
2320 "At least one address Delayed (Percent)",
2321 "At least one address Failed (Total)",
2322 "At least one address Failed (Percent)"
2323 );
2324
2325 for (my $i=0; $i < scalar(@content); $i++)
2326 {
2327 $ws_global->write($row_tablehead+$i+1, 2, $content[$i], $f_default);
2328 $row++;
2329 }
2330 $row_max = $row_tablehead+scalar(@content)+2; # continue from this row
059ec3d9
PH
2331 }
2332
d5692f86
SC
2333
2334
059ec3d9
PH
2335 my($volume,$failed_count);
2336 if ($merge_reports) {
2337 $volume = volume_rounded($report_totals{Received}{Volume}, $report_totals{Received}{'Volume-gigs'});
2338 $total_received_count = get_report_total($report_totals{Received},'Messages');
2339 $failed_count = get_report_total($report_totals{Received},'Failed');
2340 $delayed_count = get_report_total($report_totals{Received},'Delayed');
2341 }
2342 else {
2343 $volume = volume_rounded($total_received_data, $total_received_data_gigs);
608bc29d 2344 $failed_count = $message_errors;
059ec3d9
PH
2345 }
2346
2347 {
2348 no integer;
059ec3d9 2349
d5692f86
SC
2350 my @content=(
2351 $volume,$total_received_count,
2352 @received_totals,
2353 $delayed_count,
2354 ($total_received_count) ? ($delayed_count*100/$total_received_count) : 0,
2355 $failed_count,
2356 ($total_received_count) ? ($failed_count*100/$total_received_count) : 0
2357 );
2358
2359 printf $txt_fh ("$txt_format1$txt_format2\n", 'Received', @content) if $txt_fh;
2360 printf $htm_fh ("$htm_format1$htm_format2\n", 'Received', @content) if $htm_fh;
2361 if ($xls_fh)
2362 {
2363 $row = $row_tablehead+1;
2364 for (my $i=0; $i < scalar(@content); $i++)
2365 {
2366 if ($i == 4 || $i == 6) {
2367 $ws_global->write($row+$i, 0, $content[$i]/100, $f_percent);
2368 }
2369 else {
2370 $ws_global->write($row+$i, 0, $content[$i], $f_default);
2371 }
2372 }
2373 }
2374 }
059ec3d9
PH
2375 if ($merge_reports) {
2376 $volume = volume_rounded($report_totals{Delivered}{Volume}, $report_totals{Delivered}{'Volume-gigs'});
2377 $total_delivered_count = get_report_total($report_totals{Delivered},'Messages');
2378 }
2379 else {
2380 $volume = volume_rounded($total_delivered_data, $total_delivered_data_gigs);
2381 }
d5692f86
SC
2382
2383 my @content=($volume, $total_delivered_count, @delivered_totals);
2384 printf $txt_fh ("$txt_format1\n\n", 'Delivered', @content) if $txt_fh;
2385 printf $htm_fh ("$htm_format1\n\n", 'Delivered', @content) if $htm_fh;
2386 printf $htm_fh "</table>\n" if $htm_fh;
2387 if ($xls_fh)
2388 {
2389
2390 $row = $row_tablehead+1;
2391 for (my $i=0; $i < scalar(@content); $i++)
2392 {
2393 $ws_global->write($row+$i, 1, $content[$i], $f_default);
2394 }
2395 $row = $row_max;
2396 }
059ec3d9
PH
2397}
2398
2399
1b4fe9dd
PH
2400#######################################################################
2401# print_user_patterns()
8e669ac1 2402#
1b4fe9dd 2403# print_user_patterns();
8e669ac1 2404#
1b4fe9dd
PH
2405# Print the counts of user specified patterns.
2406#######################################################################
059ec3d9 2407sub print_user_patterns {
d5692f86
SC
2408 my $txt_format1 = " %-18s %6d";
2409 my $htm_format1 = "<tr><td>%s</td><td align=\"right\">%d</td>";
059ec3d9 2410
d5692f86
SC
2411 if ($txt_fh) {
2412 print $txt_fh "User Specified Patterns\n";
2413 print $txt_fh "-----------------------";
2414 print $txt_fh "\n Total\n";
059ec3d9 2415 }
d5692f86
SC
2416 if ($htm_fh) {
2417 print $htm_fh "<hr><a name=\"patterns\"></a><h2>User Specified Patterns</h2>\n";
2418 print $htm_fh "<table border=0 width=\"100%\">\n";
2419 print $htm_fh "<tr><td>\n";
2420 print $htm_fh "<table border=1>\n";
2421 print $htm_fh "<tr><th>&nbsp;</th><th>Total</th>\n";
2422 }
2423 if ($xls_fh) {
2424 $ws_global->write($row++, $col, "User Specified Patterns", $f_header2);
2425 &set_worksheet_line($ws_global, $row++, 1, ["Total"], $f_headertab);
059ec3d9
PH
2426 }
2427
d5692f86 2428
059ec3d9
PH
2429 my($key);
2430 if ($merge_reports) {
2431 # We are getting our data from previous reports.
2432 foreach $key (@user_descriptions) {
2433 my $count = get_report_total($report_totals{patterns}{$key},'Total');
d5692f86
SC
2434 printf $txt_fh ("$txt_format1\n",$key,$count) if $txt_fh;
2435 printf $htm_fh ("$htm_format1\n",$key,$count) if $htm_fh;
2436 if ($xls_fh)
2437 {
2438 &set_worksheet_line($ws_global, $row++, 0, [$key,$count], $f_default);
2439 }
059ec3d9
PH
2440 }
2441 }
2442 else {
2443 # We are getting our data from mainlog files.
2444 my $user_pattern_index = 0;
2445 foreach $key (@user_descriptions) {
d5692f86
SC
2446 printf $txt_fh ("$txt_format1\n",$key,$user_pattern_totals[$user_pattern_index]) if $txt_fh;
2447 printf $htm_fh ("$htm_format1\n",$key,$user_pattern_totals[$user_pattern_index]) if $htm_fh;
2448 if ($xls_fh)
2449 {
2450 &set_worksheet_line($ws_global, $row++, 0, [$key,$user_pattern_totals[$user_pattern_index]]);
2451 }
059ec3d9
PH
2452 $user_pattern_index++;
2453 }
2454 }
d5692f86
SC
2455 print $txt_fh "\n" if $txt_fh;
2456 print $htm_fh "</table>\n\n" if $htm_fh;
2457 if ($xls_fh)
2458 {
2459 ++$row;
059ec3d9 2460 }
608bc29d
SC
2461
2462 if ($hist_opt > 0) {
2463 my $user_pattern_index = 0;
2464 foreach $key (@user_descriptions) {
2465 print_histogram($key, 'occurence', @{$user_pattern_interval_count[$user_pattern_index]});
2466 $user_pattern_index++;
2467 }
2468 }
059ec3d9
PH
2469}
2470
2471
1b4fe9dd
PH
2472#######################################################################
2473# print_transport();
8e669ac1 2474#
1b4fe9dd 2475# print_transport();
8e669ac1 2476#
1b4fe9dd
PH
2477# Print totals by transport.
2478#######################################################################
059ec3d9 2479sub print_transport {
059ec3d9
PH
2480 my(@chartdatanames);
2481 my(@chartdatavals_count);
2482 my(@chartdatavals_vol);
d5692f86
SC
2483 no integer; #Lose this for charting the data.
2484
2485 my $txt_format1 = " %-18s %6s %6d";
2486 my $htm_format1 = "<tr><td>%s</td><td align=\"right\">%s</td><td align=\"right\">%d</td>";
059ec3d9 2487
d5692f86
SC
2488 if ($txt_fh) {
2489 print $txt_fh "Deliveries by transport\n";
2490 print $txt_fh "-----------------------";
2491 print $txt_fh "\n Volume Messages\n";
059ec3d9 2492 }
d5692f86
SC
2493 if ($htm_fh) {
2494 print $htm_fh "<hr><a name=\"transport\"></a><h2>Deliveries by Transport</h2>\n";
2495 print $htm_fh "<table border=0 width=\"100%\">\n";
2496 print $htm_fh "<tr><td>\n";
2497 print $htm_fh "<table border=1>\n";
2498 print $htm_fh "<tr><th>&nbsp;</th><th>Volume</th><th>Messages</th>\n";
2499 }
2500 if ($xls_fh) {
2501 $ws_global->write($row++, $col, "Deliveries by transport", $f_header2);
2502 &set_worksheet_line($ws_global, $row++, 1, ["Volume", "Messages"], $f_headertab);
059ec3d9
PH
2503 }
2504
2505 my($key);
2506 if ($merge_reports) {
2507 # We are getting our data from previous reports.
2508 foreach $key (sort keys %{$report_totals{transport}}) {
2509 my $count = get_report_total($report_totals{transport}{$key},'Messages');
d5692f86
SC
2510 my @content=($key, volume_rounded($report_totals{transport}{$key}{Volume},
2511 $report_totals{transport}{$key}{'Volume-gigs'}), $count);
059ec3d9
PH
2512 push(@chartdatanames, $key);
2513 push(@chartdatavals_count, $count);
2514 push(@chartdatavals_vol, $report_totals{transport}{$key}{'Volume-gigs'}*$gig + $report_totals{transport}{$key}{Volume} );
d5692f86
SC
2515 printf $txt_fh ("$txt_format1\n", @content) if $txt_fh;
2516 printf $htm_fh ("$htm_format1\n", @content) if $htm_fh;
2517 if ($xls_fh) {
2518 &set_worksheet_line($ws_global, $row++, 0, \@content, $f_default);
2519 }
059ec3d9
PH
2520 }
2521 }
2522 else {
2523 # We are getting our data from mainlog files.
2524 foreach $key (sort keys %transported_data) {
d5692f86
SC
2525 my @content=($key, volume_rounded($transported_data{$key},$transported_data_gigs{$key}),
2526 $transported_count{$key});
059ec3d9
PH
2527 push(@chartdatanames, $key);
2528 push(@chartdatavals_count, $transported_count{$key});
2529 push(@chartdatavals_vol, $transported_data_gigs{$key}*$gig + $transported_data{$key});
d5692f86
SC
2530 printf $txt_fh ("$txt_format1\n", @content) if $txt_fh;
2531 printf $htm_fh ("$htm_format1\n", @content) if $htm_fh;
2532 if ($xls_fh) {
2533 &set_worksheet_line($ws_global, $row++, 0, \@content);
2534 }
059ec3d9
PH
2535 }
2536 }
d5692f86
SC
2537 print $txt_fh "\n" if $txt_fh;
2538 if ($htm_fh) {
2539 print $htm_fh "</table>\n";
2540 print $htm_fh "</td><td>\n";
608bc29d 2541 if ($HAVE_GD_Graph_pie && $charts && ($#chartdatavals_count > 0))
059ec3d9
PH
2542 {
2543 # calculate the graph
2544 my @data = (
2545 \@chartdatanames,
2546 \@chartdatavals_count
2547 );
2548 my $graph = GD::Graph::pie->new(200, 200);
2549 $graph->set(
2550 x_label => 'Transport',
2551 y_label => 'Messages',
2552 title => 'By count',
2553 );
2554 my $gd = $graph->plot(\@data) or warn($graph->error);
2555 if ($gd) {
d5692f86
SC
2556 open(IMG, ">$chartdir/transports_count.png") or die "Could not write $chartdir/transports_count.png: $!\n";
2557 binmode IMG;
2558 print IMG $gd->png;
2559 close IMG;
2560 print $htm_fh "<img src=\"$chartrel/transports_count.png\">";
059ec3d9
PH
2561 }
2562 }
d5692f86 2563 print $htm_fh "</td><td>\n";
059ec3d9 2564
608bc29d 2565 if ($HAVE_GD_Graph_pie && $charts && ($#chartdatavals_vol > 0)) {
059ec3d9
PH
2566 my @data = (
2567 \@chartdatanames,
2568 \@chartdatavals_vol
2569 );
2570 my $graph = GD::Graph::pie->new(200, 200);
2571 $graph->set(
2572 title => 'By volume',
2573 );
2574 my $gd = $graph->plot(\@data) or warn($graph->error);
2575 if ($gd) {
d5692f86
SC
2576 open(IMG, ">$chartdir/transports_vol.png") or die "Could not write $chartdir/transports_count.png: $!\n";
2577 binmode IMG;
2578 print IMG $gd->png;
2579 close IMG;
2580 print $htm_fh "<img src=\"$chartrel/transports_vol.png\">";
059ec3d9
PH
2581 }
2582 }
d5692f86
SC
2583 print $htm_fh "</td></tr></table>\n\n";
2584 }
2585 if ($xls_fh) {
2586 $row++;
059ec3d9 2587 }
d5692f86 2588
059ec3d9
PH
2589}
2590
2591
2592
1b4fe9dd
PH
2593#######################################################################
2594# print_relay();
8e669ac1 2595#
1b4fe9dd 2596# print_relay();
8e669ac1 2597#
1b4fe9dd
PH
2598# Print our totals by relay.
2599#######################################################################
059ec3d9 2600sub print_relay {
d5692f86 2601 my $row_print_relay=1;
059ec3d9 2602 my $temp = "Relayed messages";
d5692f86 2603 print $htm_fh "<hr><a name=\"$temp\"></a><h2>$temp</h2>\n" if $htm_fh;
059ec3d9
PH
2604 if (scalar(keys %relayed) > 0 || $relayed_unshown > 0) {
2605 my $shown = 0;
2606 my $spacing = "";
d5692f86
SC
2607 my $txt_format = "%7d %s\n => %s\n";
2608 my $htm_format = "<tr><td align=\"right\">%d</td><td>%s</td><td>%s</td>\n";
059ec3d9 2609
d5692f86
SC
2610 printf $txt_fh ("%s\n%s\n\n", $temp, "-" x length($temp)) if $txt_fh;
2611 if ($htm_fh) {
2612 print $htm_fh "<table border=1>\n";
2613 print $htm_fh "<tr><th>Count</th><th>From</th><th>To</th>\n";
059ec3d9 2614 }
d5692f86
SC
2615 if ($xls_fh) {
2616 $ws_relayed->write($row_print_relay++, $col, $temp, $f_header2);
2617 &set_worksheet_line($ws_relayed, $row_print_relay++, 0, ["Count", "From", "To"], $f_headertab);
059ec3d9
PH
2618 }
2619
d5692f86 2620
059ec3d9
PH
2621 my($key);
2622 foreach $key (sort keys %relayed) {
2623 my $count = $relayed{$key};
2624 $shown += $count;
2625 $key =~ s/[HA]=//g;
2626 my($one,$two) = split(/=> /, $key);
d5692f86
SC
2627 my @content=($count, $one, $two);
2628 printf $txt_fh ($txt_format, @content) if $txt_fh;
2629 printf $htm_fh ($htm_format, @content) if $htm_fh;
2630 if ($xls_fh)
2631 {
2632 &set_worksheet_line($ws_relayed, $row_print_relay++, 0, \@content);
2633 }
059ec3d9
PH
2634 $spacing = "\n";
2635 }
d5692f86
SC
2636
2637 print $htm_fh "</table>\n<p>\n" if $htm_fh;
2638 print $txt_fh "${spacing}Total: $shown (plus $relayed_unshown unshown)\n\n" if $txt_fh;
2639 print $htm_fh "${spacing}Total: $shown (plus $relayed_unshown unshown)\n\n" if $htm_fh;
2640 if ($xls_fh)
2641 {
2642 &set_worksheet_line($ws_relayed, $row_print_relay++, 0, [$shown, "Sum of shown" ]);
2643 &set_worksheet_line($ws_relayed, $row_print_relay++, 0, [$relayed_unshown, "unshown"]);
2644 $row_print_relay++;
2645 }
059ec3d9
PH
2646 }
2647 else {
d5692f86
SC
2648 print $txt_fh "No relayed messages\n-------------------\n\n" if $txt_fh;
2649 print $htm_fh "No relayed messages\n\n" if $htm_fh;
2650 if ($xls_fh)
2651 {
2652 $row_print_relay++;
2653 }
059ec3d9 2654 }
059ec3d9
PH
2655}
2656
2657
2658
1b4fe9dd
PH
2659#######################################################################
2660# print_errors();
8e669ac1 2661#
1b4fe9dd 2662# print_errors();
8e669ac1 2663#
1b4fe9dd
PH
2664# Print our errors. In HTML, we display them as a list rather than a table -
2665# Netscape doesn't like large tables!
2666#######################################################################
059ec3d9
PH
2667sub print_errors {
2668 my $total_errors = 0;
d5692f86 2669 $row=1;
059ec3d9
PH
2670
2671 if (scalar(keys %errors_count) != 0) {
2672 my $temp = "List of errors";
d5692f86
SC
2673 my $htm_format = "<li>%d - %s\n";
2674
2675 printf $txt_fh ("%s\n%s\n\n", $temp, "-" x length($temp)) if $txt_fh;
2676 if ($htm_fh) {
2677 print $htm_fh "<hr><a name=\"errors\"></a><h2>$temp</h2>\n";
2678 print $htm_fh "<ul><li><b>Count - Error</b>\n";
059ec3d9 2679 }
d5692f86
SC
2680 if ($xls_fh)
2681 {
2682 $ws_errors->write($row++, 0, $temp, $f_header2);
2683 &set_worksheet_line($ws_errors, $row++, 0, ["Count", "Error"], $f_headertab);
059ec3d9
PH
2684 }
2685
d5692f86 2686
059ec3d9
PH
2687 my($key);
2688 foreach $key (sort keys %errors_count) {
2689 my $text = $key;
2690 chomp($text);
d5692f86 2691 $text =~ s/\s\s+/ /g; #Convert multiple spaces to a single space.
059ec3d9 2692 $total_errors += $errors_count{$key};
d5692f86
SC
2693
2694 if ($txt_fh) {
2695 printf $txt_fh ("%5d ", $errors_count{$key});
2696 my $text_remaining = $text;
2697 while (length($text_remaining) > 65) {
2698 my($first,$rest) = $text_remaining =~ /(.{50}\S*)\s+(.+)/;
2699 last if !$first;
2700 printf $txt_fh ("%s\n\t ", $first);
2701 $text_remaining = $rest;
2702 }
2703 printf $txt_fh ("%s\n\n", $text_remaining);
2704 }
2705
2706 if ($htm_fh) {
8e669ac1 2707
059ec3d9
PH
2708 #Translate HTML tag characters. Sergey Sholokh.
2709 $text =~ s/\</\&lt\;/g;
2710 $text =~ s/\>/\&gt\;/g;
2711
d5692f86 2712 printf $htm_fh ($htm_format,$errors_count{$key},$text);
059ec3d9 2713 }
d5692f86
SC
2714 if ($xls_fh)
2715 {
2716 &set_worksheet_line($ws_errors, $row++, 0, [$errors_count{$key},$text]);
059ec3d9
PH
2717 }
2718 }
059ec3d9
PH
2719
2720 $temp = "Errors encountered: $total_errors";
d5692f86
SC
2721
2722 if ($txt_fh) {
2723 print $txt_fh $temp, "\n";
2724 print $txt_fh "-" x length($temp),"\n";
2725 }
2726 if ($htm_fh) {
2727 print $htm_fh "</ul>\n<p>\n";
2728 print $htm_fh $temp, "\n";
2729 }
2730 if ($xls_fh)
2731 {
2732 &set_worksheet_line($ws_errors, $row++, 0, [$total_errors, "Sum of Errors encountered"]);
2733 }
059ec3d9
PH
2734 }
2735
2736}
2737
2738
1b4fe9dd
PH
2739#######################################################################
2740# parse_old_eximstat_reports();
8e669ac1 2741#
1b4fe9dd 2742# parse_old_eximstat_reports($fh);
8e669ac1 2743#
1b4fe9dd 2744# Parse old eximstat output so we can merge daily stats to weekly stats and weekly to monthly etc.
8e669ac1 2745#
1b4fe9dd
PH
2746# To test that the merging still works after changes, do something like the following.
2747# All the diffs should produce no output.
8e669ac1 2748#
1b4fe9dd 2749# options='-bydomain -byemail -byhost -byedomain'
608bc29d 2750# options="$options -show_rt1,2,4 -show_dt 1,2,4"
1b4fe9dd
PH
2751# options="$options -pattern 'Completed Messages' /Completed/"
2752# options="$options -pattern 'Received Messages' /<=/"
8e669ac1 2753#
1b4fe9dd
PH
2754# ./eximstats $options mainlog > mainlog.txt
2755# ./eximstats $options -merge mainlog.txt > mainlog.2.txt
2756# diff mainlog.txt mainlog.2.txt
8e669ac1 2757#
1b4fe9dd
PH
2758# ./eximstats $options -html mainlog > mainlog.html
2759# ./eximstats $options -merge -html mainlog.txt > mainlog.2.html
2760# diff mainlog.html mainlog.2.html
8e669ac1 2761#
1b4fe9dd
PH
2762# ./eximstats $options -merge mainlog.html > mainlog.3.txt
2763# diff mainlog.txt mainlog.3.txt
8e669ac1 2764#
1b4fe9dd
PH
2765# ./eximstats $options -merge -html mainlog.html > mainlog.3.html
2766# diff mainlog.html mainlog.3.html
8e669ac1 2767#
1b4fe9dd
PH
2768# ./eximstats $options -nvr mainlog > mainlog.nvr.txt
2769# ./eximstats $options -merge mainlog.nvr.txt > mainlog.4.txt
2770# diff mainlog.txt mainlog.4.txt
8e669ac1 2771#
1b4fe9dd
PH
2772# # double_mainlog.txt should have twice the values that mainlog.txt has.
2773# ./eximstats $options mainlog mainlog > double_mainlog.txt
2774#######################################################################
059ec3d9
PH
2775sub parse_old_eximstat_reports {
2776 my($fh) = @_;
2777
2778 my(%league_table_value_entered, %league_table_value_was_zero, %table_order);
2779
608bc29d
SC
2780 my(%user_pattern_index);
2781 my $user_pattern_index = 0;
2782 map {$user_pattern_index{$_} = $user_pattern_index++} @user_descriptions;
2783 my $user_pattern_keys = join('|', @user_descriptions);
2784
059ec3d9 2785 while (<$fh>) {
d5692f86 2786 PARSE_OLD_REPORT_LINE:
059ec3d9
PH
2787 if (/Exim statistics from ([\d\-]+ [\d:]+(\s+[\+\-]\d+)?) to ([\d\-]+ [\d:]+(\s+[\+\-]\d+)?)/) {
2788 $begin = $1 if ($1 lt $begin);
2789 $end = $3 if ($3 gt $end);
2790 }
2791 elsif (/Grand total summary/) {
2792 # Fill in $report_totals{Received|Delivered}{Volume|Messages|Hosts|Domains|...|Delayed|DelayedPercent|Failed|FailedPercent}
2793 my(@fields);
2794 while (<$fh>) {
d5692f86
SC
2795 $_ = html2txt($_); #Convert general HTML markup to text.
2796 s/At least one addr//g; #Another part of the HTML output we don't want.
059ec3d9
PH
2797
2798# TOTAL Volume Messages Hosts Domains Delayed Failed
2799# Received 26MB 237 177 23 8 3.4% 28 11.8%
2800# Delivered 13MB 233 99 88
d5692f86
SC
2801 if (/TOTAL\s+(.*?)\s*$/) {
2802 @fields = split(/\s+/,$1);
059ec3d9 2803 #Delayed and Failed have two columns each, so add the extra field names in.
d5692f86
SC
2804 splice(@fields,-1,1,'DelayedPercent','Failed','FailedPercent');
2805 }
2806 elsif (/(Received|Delivered)\s+(.*?)\s*$/) {
2807 print STDERR "Parsing $_" if $debug;
2808 add_to_totals($report_totals{$1},\@fields,$2);
2809 }
2810 last if (/Delivered/); #Last line of this section.
059ec3d9
PH
2811 }
2812 }
2813
2814 elsif (/User Specified Patterns/i) {
2815#User Specified Patterns
2816#-----------------------
2817# Total
2818# Description 85
2819
d5692f86 2820 while (<$fh>) { last if (/Total/); } #Wait until we get the table headers.
059ec3d9 2821 while (<$fh>) {
d5692f86
SC
2822 print STDERR "Parsing $_" if $debug;
2823 $_ = html2txt($_); #Convert general HTML markup to text.
2824 if (/^\s*(.*?)\s+(\d+)\s*$/) {
2825 $report_totals{patterns}{$1} = {} unless (defined $report_totals{patterns}{$1});
2826 add_to_totals($report_totals{patterns}{$1},['Total'],$2);
2827 }
2828 last if (/^\s*$/); #Finished if we have a blank line.
059ec3d9
PH
2829 }
2830 }
2831
608bc29d
SC
2832 elsif (/(^|<h2>)($user_pattern_keys) per /o) {
2833 # Parse User defined pattern histograms if they exist.
2834 parse_histogram($fh, $user_pattern_interval_count[$user_pattern_index{$2}] );
2835 }
2836
2837
059ec3d9
PH
2838 elsif (/Deliveries by transport/i) {
2839#Deliveries by transport
2840#-----------------------
2841# Volume Messages
2842# :blackhole: 70KB 51
2843# address_pipe 655KB 1
2844# smtp 11MB 151
2845
d5692f86 2846 while (<$fh>) { last if (/Volume/); } #Wait until we get the table headers.
059ec3d9 2847 while (<$fh>) {
d5692f86
SC
2848 print STDERR "Parsing $_" if $debug;
2849 $_ = html2txt($_); #Convert general HTML markup to text.
2850 if (/(\S+)\s+(\d+\S*\s+\d+)/) {
2851 $report_totals{transport}{$1} = {} unless (defined $report_totals{transport}{$1});
2852 add_to_totals($report_totals{transport}{$1},['Volume','Messages'],$2);
2853 }
2854 last if (/^\s*$/); #Finished if we have a blank line.
059ec3d9
PH
2855 }
2856 }
608bc29d
SC
2857 elsif (/Messages received per/) {
2858 parse_histogram($fh, \@received_interval_count);
2859 }
2860 elsif (/Deliveries per/) {
2861 parse_histogram($fh, \@delivered_interval_count);
059ec3d9
PH
2862 }
2863
608bc29d
SC
2864 #elsif (/Time spent on the queue: (all messages|messages with at least one remote delivery)/) {
2865 elsif (/(Time spent on the queue|Delivery times|Receipt times): ((\S+) messages|messages with at least one remote delivery)((<[^>]*>)*\s*)$/) {
059ec3d9
PH
2866#Time spent on the queue: all messages
2867#-------------------------------------
2868#
2869#Under 1m 217 91.9% 91.9%
2870# 5m 2 0.8% 92.8%
2871# 3h 8 3.4% 96.2%
2872# 6h 7 3.0% 99.2%
2873# 12h 2 0.8% 100.0%
2874
2875 # Set a pointer to the queue bin so we can use the same code
2876 # block for both all messages and remote deliveries.
608bc29d
SC
2877 #my $bin_aref = ($1 eq 'all messages') ? \@qt_all_bin : \@qt_remote_bin;
2878 my($bin_aref, $times_aref, $overflow_sref);
2879 if ($1 eq 'Time spent on the queue') {
2880 $times_aref = \@queue_times;
2881 if ($2 eq 'all messages') {
2882 $bin_aref = \@qt_all_bin;
2883 $overflow_sref = \$qt_all_overflow;
2884 }
2885 else {
2886 $bin_aref = \@qt_remote_bin;
2887 $overflow_sref = \$qt_remote_overflow;
2888 }
2889 }
2890 elsif ($1 eq 'Delivery times') {
2891 $times_aref = \@delivery_times;
2892 if ($2 eq 'all messages') {
2893 $bin_aref = \@dt_all_bin;
2894 $overflow_sref = \$dt_all_overflow;
2895 }
2896 else {
2897 $bin_aref = \@dt_remote_bin;
2898 $overflow_sref = \$dt_remote_overflow;
2899 }
2900 }
2901 else {
2902 unless (exists $rcpt_times_bin{$3}) {
2903 initialise_rcpt_times($3);
2904 }
2905 $bin_aref = $rcpt_times_bin{$3};
2906 $times_aref = \@rcpt_times;
2907 $overflow_sref = \$rcpt_times_overflow{$3};
2908 }
2909
2910
059ec3d9
PH
2911 my $reached_table = 0;
2912 while (<$fh>) {
d5692f86
SC
2913 $_ = html2txt($_); #Convert general HTML markup to text.
2914 $reached_table = 1 if (/^\s*Under/);
2915 next unless $reached_table;
2916 my $previous_seconds_on_queue = 0;
2917 if (/^\s*(Under|Over|)\s+(\d+[smhdw])\s+(\d+)/) {
2918 print STDERR "Parsing $_" if $debug;
2919 my($modifier,$formated_time,$count) = ($1,$2,$3);
2920 my $seconds = unformat_time($formated_time);
2921 my $time_on_queue = ($seconds + $previous_seconds_on_queue) / 2;
2922 $previous_seconds_on_queue = $seconds;
2923 $time_on_queue = $seconds * 2 if ($modifier eq 'Over');
2924 my($i);
608bc29d
SC
2925 for ($i = 0; $i <= $#$times_aref; $i++) {
2926 if ($time_on_queue < $times_aref->[$i]) {
d5692f86
SC
2927 $$bin_aref[$i] += $count;
2928 last;
2929 }
2930 }
608bc29d
SC
2931 $$overflow_sref += $count if ($i > $#$times_aref);
2932
d5692f86
SC
2933 }
2934 else {
2935 last; #Finished the table ?
2936 }
059ec3d9
PH
2937 }
2938 }
2939
2940 elsif (/Relayed messages/) {
2941#Relayed messages
2942#----------------
2943#
2944# 1 addr.domain.com [1.2.3.4] a.user@domain.com
2945# => addr2.domain2.com [5.6.7.8] a2.user2@domain2.com
2946#
2947#<tr><td align="right">1</td><td>addr.domain.com [1.2.3.4] a.user@domain.com </td><td>addr2.domain2.com [5.6.7.8] a2.user2@domain2.com</td>
2948
2949 my $reached_table = 0;
2950 my($count,$sender);
2951 while (<$fh>) {
d5692f86
SC
2952 unless ($reached_table) {
2953 last if (/No relayed messages/);
2954 $reached_table = 1 if (/^\s*\d/ || />\d+</);
2955 next unless $reached_table;
2956 }
2957 if (/>(\d+)<.td><td>(.*?) ?<.td><td>(.*?)</) {
2958 update_relayed($1,$2,$3);
2959 }
2960 elsif (/^\s*(\d+)\s+(.*?)\s*$/) {
2961 ($count,$sender) = ($1,$2);
2962 }
2963 elsif (/=>\s+(.*?)\s*$/) {
2964 update_relayed($count,$sender,$1);
2965 }
2966 else {
2967 last; #Finished the table ?
2968 }
059ec3d9
PH
2969 }
2970 }
2971
2972 elsif (/Top (.*?) by (message count|volume)/) {
2973#Top 50 sending hosts by message count
2974#-------------------------------------
2975#
2976# 48 1468KB local
50adf73a
SC
2977# Could also have average values for HTML output.
2978# 48 1468KB 30KB local
2979
059ec3d9
PH
2980 my($category,$by_count_or_volume) = ($1,$2);
2981
2982 #As we show 2 views of each table (by count and by volume),
2983 #most (but not all) entries will appear in both tables.
2984 #Set up a hash to record which entries we have already seen
2985 #and one to record which ones we are seeing for the first time.
2986 if ($by_count_or_volume =~ /count/) {
d5692f86
SC
2987 undef %league_table_value_entered;
2988 undef %league_table_value_was_zero;
2989 undef %table_order;
059ec3d9
PH
2990 }
2991
2992 #As this section processes multiple different table categories,
2993 #set up pointers to the hashes to be updated.
2994 my($count_href,$data_href,$data_gigs_href);
2995 if ($category =~ /local sender/) {
2996 $count_href = \%received_count_user;
d5692f86
SC
2997 $data_href = \%received_data_user;
2998 $data_gigs_href = \%received_data_gigs_user;
059ec3d9
PH
2999 }
3000 elsif ($category =~ /sending (\S+?)s?\b/) {
3001 #Top 50 sending (host|domain|email|edomain)s
3002 #Top sending (host|domain|email|edomain)
3003 $count_href = \%{$received_count{"\u$1"}};
d5692f86
SC
3004 $data_href = \%{$received_data{"\u$1"}};
3005 $data_gigs_href = \%{$received_data_gigs{"\u$1"}};
059ec3d9
PH
3006 }
3007 elsif ($category =~ /local destination/) {
3008 $count_href = \%delivered_count_user;
d5692f86
SC
3009 $data_href = \%delivered_data_user;
3010 $data_gigs_href = \%delivered_data_gigs_user;
059ec3d9
PH
3011 }
3012 elsif ($category =~ /(\S+) destination/) {
3013 #Top 50 (host|domain|email|edomain) destinations
3014 #Top (host|domain|email|edomain) destination
3015 $count_href = \%{$delivered_count{"\u$1"}};
d5692f86
SC
3016 $data_href = \%{$delivered_data{"\u$1"}};
3017 $data_gigs_href = \%{$delivered_data_gigs{"\u$1"}};
059ec3d9
PH
3018 }
3019
3020 my $reached_table = 0;
3021 while (<$fh>) {
d5692f86
SC
3022 # Watch out for empty tables.
3023 goto PARSE_OLD_REPORT_LINE if (/<h2>/ or /^[a-zA-Z]/);
3024
3025 $_ = html2txt($_); #Convert general HTML markup to text.
3026
3027
3028 $reached_table = 1 if (/^\s*\d/);
3029 next unless $reached_table;
50adf73a
SC
3030
3031 # Remove optional 'average value' column.
d5692f86 3032 s/^\s*(\d+)\s+(\S+)\s+(\d+(KB|MB|GB|\b)\s+)/$1 $2 /;
50adf73a 3033
d5692f86
SC
3034 if (/^\s*(\d+)\s+(\S+)\s*(.*?)\s*$/) {
3035 my($count,$rounded_volume,$entry) = ($1,$2,$3);
059ec3d9
PH
3036 #Note: $entry fields can be both null and can contain spaces.
3037
d5692f86
SC
3038 #Add the entry into the %table_order hash if it has a rounded volume (KB/MB/GB).
3039 push(@{$table_order{$rounded_volume}{$by_count_or_volume}},$entry) if ($rounded_volume =~ /\D/);
059ec3d9
PH
3040
3041 unless ($league_table_value_entered{$entry}) {
d5692f86
SC
3042 $league_table_value_entered{$entry} = 1;
3043 unless ($$count_href{$entry}) {
3044 $$count_href{$entry} = 0;
3045 $$data_href{$entry} = 0;
3046 $$data_gigs_href{$entry} = 0;
3047 $league_table_value_was_zero{$entry} = 1;
3048 }
3049
3050 $$count_href{$entry} += $count;
059ec3d9 3051 #Add the rounded value to the data and data_gigs hashes.
d5692f86
SC
3052 un_round($rounded_volume,\$$data_href{$entry},\$$data_gigs_href{$entry});
3053 print STDERR "$category by $by_count_or_volume: added $count,$rounded_volume to $entry\n" if $debug;
3054 }
3055 }
3056 else { #Finished the table ?
3057 if ($by_count_or_volume =~ /volume/) {
3058 #Add a few bytes to appropriate entries to preserve the order.
3059
3060 my($rounded_volume);
3061 foreach $rounded_volume (keys %table_order) {
3062 #For each rounded volume, we want to create a list which has things
3063 #ordered from the volume table at the front, and additional things
3064 #from the count table ordered at the back.
3065 @{$table_order{$rounded_volume}{volume}} = () unless defined $table_order{$rounded_volume}{volume};
3066 @{$table_order{$rounded_volume}{'message count'}} = () unless defined $table_order{$rounded_volume}{'message count'};
3067 my(@order,%mark);
3068 map {$mark{$_} = 1} @{$table_order{$rounded_volume}{volume}};
3069 @order = @{$table_order{$rounded_volume}{volume}};
3070 map {push(@order,$_)} grep(!$mark{$_},@{$table_order{$rounded_volume}{'message count'}});
3071
3072 my $bonus_bytes = $#order;
3073 $bonus_bytes = 511 if ($bonus_bytes > 511); #Don't go over the half-K boundary!
3074 while (@order and ($bonus_bytes > 0)) {
3075 my $entry = shift(@order);
3076 if ($league_table_value_was_zero{$entry}) {
3077 $$data_href{$entry} += $bonus_bytes;
3078 print STDERR "$category by $by_count_or_volume: added $bonus_bytes bonus bytes to $entry\n" if $debug;
3079 }
3080 $bonus_bytes--;
3081 }
3082 }
3083 }
3084
3085 last;
3086 }
059ec3d9
PH
3087 }
3088 }
3089 elsif (/List of errors/) {
3090#List of errors
3091#--------------
3092#
3093# 1 07904931641@one2one.net R=external T=smtp: SMTP error
3094# from remote mailer after RCPT TO:<07904931641@one2one.net>:
3095# host mail.one2one.net [193.133.192.24]: 550 User unknown
3096#
3097#<li>1 - ally.dufc@dunbar.org.uk R=external T=smtp: SMTP error from remote mailer after RCPT TO:<ally.dufc@dunbar.org.uk>: host mail.dunbar.org.uk [216.167.89.88]: 550 Unknown local part ally.dufc in <ally.dufc@dunbar.org.uk>
3098
3099
3100 my $reached_table = 0;
3101 my($count,$error,$blanks);
3102 while (<$fh>) {
d5692f86
SC
3103 $reached_table = 1 if (/^( *|<li>)(\d+)/);
3104 next unless $reached_table;
059ec3d9 3105
d5692f86
SC
3106 s/^<li>(\d+) -/$1/; #Convert an HTML line to a text line.
3107 $_ = html2txt($_); #Convert general HTML markup to text.
059ec3d9 3108
d5692f86
SC
3109 if (/\t\s*(.*)/) {
3110 $error .= ' ' . $1; #Join a multiline error.
3111 }
3112 elsif (/^\s*(\d+)\s+(.*)/) {
3113 if ($error) {
059ec3d9 3114 #Finished with a previous multiline error so save it.
d5692f86
SC
3115 $errors_count{$error} = 0 unless $errors_count{$error};
3116 $errors_count{$error} += $count;
3117 }
3118 ($count,$error) = ($1,$2);
3119 }
3120 elsif (/Errors encountered/) {
3121 if ($error) {
059ec3d9 3122 #Finished the section, so save our stored last error.
d5692f86
SC
3123 $errors_count{$error} = 0 unless $errors_count{$error};
3124 $errors_count{$error} += $count;
3125 }
3126 last;
3127 }
059ec3d9
PH
3128 }
3129 }
3130
3131 }
3132}
3133
608bc29d
SC
3134#######################################################################
3135# parse_histogram($fh, \@delivered_interval_count);
3136# Parse a histogram into the provided array of counters.
3137#######################################################################
3138sub parse_histogram {
3139 my($fh, $counters_aref) = @_;
3140
3141 # Messages received per hour (each dot is 2 messages)
3142 #---------------------------------------------------
3143 #
3144 #00-01 106 .....................................................
3145 #01-02 103 ...................................................
3146
3147 my $reached_table = 0;
3148 while (<$fh>) {
3149 $reached_table = 1 if (/^00/);
3150 next unless $reached_table;
3151 print STDERR "Parsing $_" if $debug;
3152 if (/^(\d+):(\d+)\s+(\d+)/) { #hh:mm start time format ?
3153 $$counters_aref[($1*60 + $2)/$hist_interval] += $3 if $hist_opt;
3154 }
3155 elsif (/^(\d+)-(\d+)\s+(\d+)/) { #hh-hh start-end time format ?
3156 $$counters_aref[($1*60)/$hist_interval] += $3 if $hist_opt;
3157 }
3158 else { #Finished the table ?
3159 last;
3160 }
3161 }
3162}
059ec3d9
PH
3163
3164
1b4fe9dd
PH
3165#######################################################################
3166# update_relayed();
8e669ac1 3167#
1b4fe9dd 3168# update_relayed($count,$sender,$recipient);
8e669ac1 3169#
1b4fe9dd
PH
3170# Adds an entry into the %relayed hash. Currently only used when
3171# merging reports.
3172#######################################################################
059ec3d9
PH
3173sub update_relayed {
3174 my($count,$sender,$recipient) = @_;
3175
3176 #When generating the key, put in the 'H=' and 'A=' which can be used
3177 #in searches.
3178 my $key = "H=$sender => H=$recipient";
3179 $key =~ s/ ([^=\s]+\@\S+|<>)/ A=$1/g;
3180 if (!defined $relay_pattern || $key !~ /$relay_pattern/o) {
3181 $relayed{$key} = 0 if !defined $relayed{$key};
3182 $relayed{$key} += $count;
3183 }
3184 else {
3185 $relayed_unshown += $count;
3186 }
3187}
3188
3189
1b4fe9dd
PH
3190#######################################################################
3191# add_to_totals();
8e669ac1 3192#
1b4fe9dd 3193# add_to_totals(\%totals,\@keys,$values);
8e669ac1 3194#
1b4fe9dd
PH
3195# Given a line of space seperated values, add them into the provided hash using @keys
3196# as the hash keys.
8e669ac1 3197#
1b4fe9dd
PH
3198# If the value contains a '%', then the value is set rather than added. Otherwise, we
3199# convert the value to bytes and gigs. The gigs get added to I<Key>-gigs.
3200#######################################################################
059ec3d9
PH
3201sub add_to_totals {
3202 my($totals_href,$keys_aref,$values) = @_;
3203 my(@values) = split(/\s+/,$values);
d5692f86 3204 my(@keys) = @$keys_aref; #Make a copy as we destroy the one we use.
059ec3d9
PH
3205 my($value);
3206 foreach $value (@values) {
3207 my $key = shift(@keys) or next;
3208 if ($value =~ /%/) {
3209 $$totals_href{$key} = $value;
3210 }
3211 else {
3212 $$totals_href{$key} = 0 unless ($$totals_href{$key});
3213 $$totals_href{"$key-gigs"} = 0 unless ($$totals_href{"$key-gigs"});
3214 un_round($value, \$$totals_href{$key}, \$$totals_href{"$key-gigs"});
3215 print STDERR "Added $value to $key - $$totals_href{$key} , " . $$totals_href{"$key-gigs"} . "GB.\n" if $debug;
3216 }
3217 }
3218}
3219
1b4fe9dd
PH
3220#######################################################################
3221# get_report_total();
8e669ac1 3222#
1b4fe9dd 3223# $total = get_report_total(\%hash,$key);
8e669ac1 3224#
1b4fe9dd 3225# If %hash contains values split into Units and Gigs, we calculate and return
8e669ac1 3226#
1b4fe9dd
PH
3227# $hash{$key} + 1024*1024*1024 * $hash{"${key}-gigs"}
3228#######################################################################
059ec3d9
PH
3229sub get_report_total {
3230 no integer;
3231 my($hash_ref,$key) = @_;
3232 if ($$hash_ref{"${key}-gigs"}) {
3233 return $$hash_ref{$key} + $gig * $$hash_ref{"${key}-gigs"};
3234 }
3235 return $$hash_ref{$key} || 0;
3236}
3237
1b4fe9dd
PH
3238#######################################################################
3239# html2txt();
8e669ac1 3240#
1b4fe9dd 3241# $text_line = html2txt($html_line);
8e669ac1 3242#
1b4fe9dd
PH
3243# Convert a line from html to text. Currently we just convert HTML tags to spaces
3244# and convert &gt;, &lt;, and &nbsp; tags back.
3245#######################################################################
059ec3d9
PH
3246sub html2txt {
3247 ($_) = @_;
3248
3249 # Convert HTML tags to spacing. Note that the reports may contain <Userid> and
3250 # <Userid@Domain> words, so explicitly specify the HTML tags we will remove
3251 # (the ones used by this program). If someone is careless enough to have their
3252 # Userid the same as an HTML tag, there's not much we can do about it.
608bc29d 3253 s/<\/?(html|head|title|body|h\d|ul|li|a\s+|table|tr|td|th|pre|hr|p|br)\b.*?>/ /g;
059ec3d9
PH
3254
3255 s/\&lt\;/\</og; #Convert '&lt;' to '<'.
3256 s/\&gt\;/\>/og; #Convert '&gt;' to '>'.
3257 s/\&nbsp\;/ /og; #Convert '&nbsp;' to ' '.
3258 return($_);
3259}
3260
1b4fe9dd
PH
3261#######################################################################
3262# get_next_arg();
8e669ac1 3263#
1b4fe9dd 3264# $arg = get_next_arg();
8e669ac1 3265#
1b4fe9dd
PH
3266# Because eximstats arguments are often passed as variables,
3267# we can't rely on shell parsing to deal with quotes. This
3268# subroutine returns $ARGV[1] and does a shift. If $ARGV[1]
3269# starts with a quote (' or "), and doesn't end in one, then
3270# we append the next argument to it and shift again. We repeat
3271# until we've got all of the argument.
8e669ac1 3272#
1b4fe9dd
PH
3273# This isn't perfect as all white space gets reduced to one space,
3274# but it's as good as we can get! If it's esential that spacing
3275# be preserved precisely, then you get that by not using shell
3276# variables.
3277#######################################################################
059ec3d9
PH
3278sub get_next_arg {
3279 my $arg = '';
3280 my $matched_pattern = 0;
3281 while ($ARGV[1]) {
3282 $arg .= ' ' if $arg;
3283 $arg .= $ARGV[1]; shift(@ARGV);
3284 if ($arg !~ /^['"]/) {
3285 $matched_pattern = 1;
3286 last;
3287 }
3288 if ($arg =~ s/^(['"])(.*)\1$/$2/) {
3289 $matched_pattern = 1;
3290 last;
3291 }
3292 }
3293 die "Mismatched argument quotes - <$arg>.\n" unless $matched_pattern;
3294 return $arg;
3295}
3296
d5692f86
SC
3297#######################################################################
3298# set_worksheet_line($ws_global, $startrow, $startcol, \@content, $format);
3299#
3300# set values to a sequence of cells in a row.
3301#
3302#######################################################################
3303sub set_worksheet_line {
3304 my ($worksheet, $row, $col, $content, $format) = @_;
3305
3306 foreach my $token (@$content)
3307 {
3308 $worksheet->write($row, $col++, $token, $format );
3309 }
059ec3d9 3310
d5692f86 3311}
059ec3d9 3312
608bc29d
SC
3313#######################################################################
3314# @rcpt_times = parse_time_list($string);
3315#
3316# Parse a comma seperated list of time values in seconds given by
3317# the user and fill an array.
3318#
3319# Return a default list if $string is undefined.
3320# Return () if $string eq '0'.
3321#######################################################################
3322sub parse_time_list {
3323 my($string) = @_;
3324 if (! defined $string) {
3325 return(60, 5*60, 15*60, 30*60, 60*60, 3*60*60, 6*60*60, 12*60*60, 24*60*60);
3326 }
3327 my(@times) = split(/,/, $string);
3328 foreach my $q (@times) { $q = eval($q) + 0 }
3329 @times = sort { $a <=> $b } @times;
3330 @times = () if ($#times == 0 && $times[0] == 0);
3331 return(@times);
3332}
3333
3334
3335#######################################################################
3336# initialise_rcpt_times($protocol);
3337# Initialise an array of rcpt_times to 0 for the specified protocol.
3338#######################################################################
3339sub initialise_rcpt_times {
3340 my($protocol) = @_;
3341 for (my $i = 0; $i <= $#rcpt_times; ++$i) {
3342 $rcpt_times_bin{$protocol}[$i] = 0;
3343 }
3344 $rcpt_times_overflow{$protocol} = 0;
3345}
3346
3347
059ec3d9
PH
3348##################################################
3349# Main Program #
3350##################################################
3351
3352
3353$last_timestamp = '';
3354$last_date = '';
3355$show_errors = 1;
3356$show_relay = 1;
3357$show_transport = 1;
3358$topcount = 50;
3359$local_league_table = 1;
3360$include_remote_users = 0;
d5692f86 3361$include_original_destination = 0;
059ec3d9
PH
3362$hist_opt = 1;
3363$volume_rounding = 1;
3364$localtime_offset = calculate_localtime_offset(); # PH/FANF
3365
3366$charts = 0;
3367$charts_option_specified = 0;
3368$chartrel = ".";
3369$chartdir = ".";
3370
608bc29d
SC
3371@queue_times = parse_time_list();
3372@rcpt_times = ();
3373@delivery_times = ();
059ec3d9
PH
3374
3375$last_offset = '';
3376$offset_seconds = 0;
3377
d5692f86
SC
3378$row=1;
3379$row_league_table=1;
3380$col=0;
3381$col_hist=0;
3382$run_hist=0;
3383my(%output_files); # What output files have been specified?
3384
059ec3d9
PH
3385# Decode options
3386
608bc29d 3387while (@ARGV > 0 && substr($ARGV[0], 0, 1) eq '-') {
059ec3d9
PH
3388 if ($ARGV[0] =~ /^\-h(\d+)$/) { $hist_opt = $1 }
3389 elsif ($ARGV[0] =~ /^\-ne$/) { $show_errors = 0 }
608bc29d 3390 elsif ($ARGV[0] =~ /^\-nr(.?)(.*)\1$/) {
059ec3d9 3391 if ($1 eq "") { $show_relay = 0 } else { $relay_pattern = $2 }
608bc29d
SC
3392 }
3393 elsif ($ARGV[0] =~ /^\-q([,\d\+\-\*\/]+)$/) { @queue_times = parse_time_list($1) }
059ec3d9
PH
3394 elsif ($ARGV[0] =~ /^-nt$/) { $show_transport = 0 }
3395 elsif ($ARGV[0] =~ /^\-nt(.?)(.*)\1$/)
3396 {
3397 if ($1 eq "") { $show_transport = 0 } else { $transport_pattern = $2 }
3398 }
3399 elsif ($ARGV[0] =~ /^-t(\d+)$/) { $topcount = $1 }
3400 elsif ($ARGV[0] =~ /^-tnl$/) { $local_league_table = 0 }
d5692f86
SC
3401 elsif ($ARGV[0] =~ /^-txt=?(\S*)$/) { $txt_fh = get_filehandle($1,\%output_files) }
3402 elsif ($ARGV[0] =~ /^-html=?(\S*)$/) { $htm_fh = get_filehandle($1,\%output_files) }
3403 elsif ($ARGV[0] =~ /^-xls=?(\S*)$/) {
3404 if ($HAVE_Spreadsheet_WriteExcel) {
3405 $xls_fh = get_filehandle($1,\%output_files);
3406 }
3407 else {
3408 warn "WARNING: CPAN Module Spreadsheet::WriteExcel not installed. Obtain from www.cpan.org\n";
3409 }
3410 }
059ec3d9
PH
3411 elsif ($ARGV[0] =~ /^-merge$/) { $merge_reports = 1 }
3412 elsif ($ARGV[0] =~ /^-charts$/) {
3413 $charts = 1;
3414 warn "WARNING: CPAN Module GD::Graph::pie not installed. Obtain from www.cpan.org\n" unless $HAVE_GD_Graph_pie;
3415 warn "WARNING: CPAN Module GD::Graph::linespoints not installed. Obtain from www.cpan.org\n" unless $HAVE_GD_Graph_linespoints;
3416 }
3417 elsif ($ARGV[0] =~ /^-chartdir$/) { $chartdir = $ARGV[1]; shift; $charts_option_specified = 1; }
3418 elsif ($ARGV[0] =~ /^-chartrel$/) { $chartrel = $ARGV[1]; shift; $charts_option_specified = 1; }
d5692f86
SC
3419 elsif ($ARGV[0] =~ /^-include_original_destination$/) { $include_original_destination = 1 }
3420 elsif ($ARGV[0] =~ /^-cache$/) { } #Not currently used.
059ec3d9
PH
3421 elsif ($ARGV[0] =~ /^-byhost$/) { $do_sender{Host} = 1 }
3422 elsif ($ARGV[0] =~ /^-bydomain$/) { $do_sender{Domain} = 1 }
3423 elsif ($ARGV[0] =~ /^-byemail$/) { $do_sender{Email} = 1 }
3424 elsif ($ARGV[0] =~ /^-byemaildomain$/) { $do_sender{Edomain} = 1 }
3425 elsif ($ARGV[0] =~ /^-byedomain$/) { $do_sender{Edomain} = 1 }
3426 elsif ($ARGV[0] =~ /^-nvr$/) { $volume_rounding = 0 }
608bc29d
SC
3427 elsif ($ARGV[0] =~ /^-show_rt([,\d\+\-\*\/]+)?$/) { @rcpt_times = parse_time_list($1) }
3428 elsif ($ARGV[0] =~ /^-show_dt([,\d\+\-\*\/]+)?$/) { @delivery_times = parse_time_list($1) }
059ec3d9
PH
3429 elsif ($ARGV[0] =~ /^-d$/) { $debug = 1 }
3430 elsif ($ARGV[0] =~ /^--?h(elp)?$/){ help() }
3431 elsif ($ARGV[0] =~ /^-t_remote_users$/) { $include_remote_users = 1 }
3432 elsif ($ARGV[0] =~ /^-pattern$/)
3433 {
3434 push(@user_descriptions,get_next_arg());
3435 push(@user_patterns,get_next_arg());
3436 }
3437 elsif ($ARGV[0] =~ /^-utc$/)
3438 {
3439 # We don't need this value if the log is in UTC.
3440 $localtime_offset = undef;
3441 }
3442 else
3443 {
3444 print STDERR "Eximstats: Unknown or malformed option $ARGV[0]\n";
3445 help();
3446 }
3447 shift;
3448 }
3449
d5692f86
SC
3450 # keep old default behaviour
3451 if (! ($xls_fh or $htm_fh or $txt_fh)) {
3452 $txt_fh = \*STDOUT;
3453 }
3454
059ec3d9
PH
3455 # Check that all the charts options are specified.
3456 warn "-charts option not specified. Use -help for help.\n" if ($charts_option_specified && ! $charts);
3457
3458 # Default to display tables by sending Host.
3459 $do_sender{Host} = 1 unless ($do_sender{Domain} || $do_sender{Email} || $do_sender{Edomain});
3460
d5692f86
SC
3461 # prepare xls Excel Workbook
3462 if (defined $xls_fh)
3463 {
3464
3465 # Create a new Excel workbook
3466 $workbook = Spreadsheet::WriteExcel->new($xls_fh);
3467
3468 # Add worksheets
3469 $ws_global = $workbook->addworksheet('Exim Statistik');
3470 # show $ws_global as initial sheet
3471 $ws_global->set_first_sheet();
3472 $ws_global->activate();
3473
3474 if ($show_relay) {
3475 $ws_relayed = $workbook->addworksheet('Relayed Messages');
3476 $ws_relayed->set_column(1, 2, 80);
3477 }
3478 if ($topcount) {
3479 $ws_top50 = $workbook->addworksheet('Deliveries');
3480 }
3481 if ($show_errors) {
3482 $ws_errors = $workbook->addworksheet('Errors');
3483 }
3484
3485
3486 # set column widths
3487 $ws_global->set_column(0, 2, 20); # Columns B-D width set to 30
3488 $ws_global->set_column(3, 3, 15); # Columns B-D width set to 30
3489 $ws_global->set_column(4, 4, 25); # Columns B-D width set to 30
3490
3491 # Define Formats
3492 $f_default = $workbook->add_format();
3493
3494 $f_header1 = $workbook->add_format();
3495 $f_header1->set_bold();
3496 #$f_header1->set_color('red');
3497 $f_header1->set_size('15');
3498 $f_header1->set_valign();
3499 # $f_header1->set_align('center');
3500 # $ws_global->write($row++, 2, "Testing Headers 1", $f_header1);
3501
3502 $f_header2 = $workbook->add_format();
3503 $f_header2->set_bold();
3504 $f_header2->set_size('12');
3505 $f_header2->set_valign();
3506 # $ws_global->write($row++, 2, "Testing Headers 2", $f_header2);
3507
3508 $f_percent = $workbook->add_format();
3509 $f_percent->set_num_format('0.0%');
3510
3511 $f_headertab = $workbook->add_format();
3512 $f_headertab->set_bold();
3513 $f_headertab->set_valign();
3514 # $ws_global->write($row++, 2, "Testing Headers tab", $f_headertab);
3515
3516 }
3517
059ec3d9 3518
608bc29d 3519# Initialise the queue/delivery/rcpt time counters.
059ec3d9 3520for (my $i = 0; $i <= $#queue_times; $i++) {
608bc29d
SC
3521 $qt_all_bin[$i] = 0;
3522 $qt_remote_bin[$i] = 0;
3523}
3524for (my $i = 0; $i <= $#delivery_times; $i++) {
3525 $dt_all_bin[$i] = 0;
3526 $dt_remote_bin[$i] = 0;
059ec3d9 3527}
608bc29d 3528initialise_rcpt_times('all');
059ec3d9 3529
059ec3d9 3530
608bc29d 3531# Compute the number of slots for the histogram
059ec3d9
PH
3532if ($hist_opt > 0)
3533 {
3534 if ($hist_opt > 60 || 60 % $hist_opt != 0)
3535 {
d5692f86 3536 print STDERR "Eximstats: -h must specify a factor of 60\n";
059ec3d9
PH
3537 exit 1;
3538 }
d5692f86
SC
3539 $hist_interval = 60/$hist_opt; #Interval in minutes.
3540 $hist_number = (24*60)/$hist_interval; #Number of intervals per day.
059ec3d9
PH
3541 @received_interval_count = (0) x $hist_number;
3542 @delivered_interval_count = (0) x $hist_number;
608bc29d
SC
3543 my $user_pattern_index = 0;
3544 for (my $user_pattern_index = 0; $user_pattern_index <= $#user_patterns; ++$user_pattern_index) {
3545 @{$user_pattern_interval_count[$user_pattern_index]} = (0) x $hist_number;
3546 }
3547 @dt_all_bin = (0) x $hist_number;
3548 @dt_remote_bin = (0) x $hist_number;
059ec3d9
PH
3549 }
3550
3551#$queue_unknown = 0;
3552
3553$total_received_data = 0;
3554$total_received_data_gigs = 0;
3555$total_received_count = 0;
3556
3557$total_delivered_data = 0;
3558$total_delivered_data_gigs = 0;
3559$total_delivered_count = 0;
3560
608bc29d
SC
3561$qt_all_overflow = 0;
3562$qt_remote_overflow = 0;
3563$dt_all_overflow = 0;
3564$dt_remote_overflow = 0;
059ec3d9
PH
3565$delayed_count = 0;
3566$relayed_unshown = 0;
608bc29d 3567$message_errors = 0;
059ec3d9
PH
3568$begin = "9999-99-99 99:99:99";
3569$end = "0000-00-00 00:00:00";
3570my($section,$type);
3571foreach $section ('Received','Delivered') {
3572 foreach $type ('Volume','Messages','Delayed','Failed','Hosts','Domains','Emails','Edomains') {
3573 $report_totals{$section}{$type} = 0;
3574 }
3575}
3576
3577# Generate our parser.
3578my $parser = generate_parser();
3579
3580
3581
3582if (@ARGV) {
3583 # Scan the input files and collect the data
3584 foreach my $file (@ARGV) {
3585 if ($file =~ /\.gz/) {
3586 unless (open(FILE,"gunzip -c $file |")) {
d5692f86
SC
3587 print STDERR "Failed to gunzip -c $file: $!";
3588 next;
059ec3d9
PH
3589 }
3590 }
3591 elsif ($file =~ /\.Z/) {
3592 unless (open(FILE,"uncompress -c $file |")) {
d5692f86
SC
3593 print STDERR "Failed to uncompress -c $file: $!";
3594 next;
059ec3d9
PH
3595 }
3596 }
3597 else {
3598 unless (open(FILE,$file)) {
d5692f86
SC
3599 print STDERR "Failed to read $file: $!";
3600 next;
059ec3d9
PH
3601 }
3602 }
3603 #Now parse the filehandle, updating the global variables.
3604 parse($parser,\*FILE);
3605 close FILE;
3606 }
3607}
3608else {
3609 #No files provided. Parse STDIN, updating the global variables.
3610 parse($parser,\*STDIN);
3611}
3612
3613
3614if ($begin eq "9999-99-99 99:99:99") {
d5692f86 3615 print STDERR "**** No valid log lines read\n";
059ec3d9
PH
3616 exit 1;
3617}
3618
3619# Output our results.
3620print_header();
3621print_grandtotals();
3622
3623# Print counts of user specified patterns if required.
3624print_user_patterns() if @user_patterns;
3625
3626# Print totals by transport if required.
3627print_transport() if $show_transport;
3628
3629# Print the deliveries per interval as a histogram, unless configured not to.
3630# First find the maximum in one interval and scale accordingly.
3631if ($hist_opt > 0) {
608bc29d
SC
3632 print_histogram("Messages received", 'message', @received_interval_count);
3633 print_histogram("Deliveries", 'delivery', @delivered_interval_count);
059ec3d9
PH
3634}
3635
3636# Print times on queue if required.
3637if ($#queue_times >= 0) {
608bc29d
SC
3638 print_duration_table("Time spent on the queue", "all messages", \@queue_times, \@qt_all_bin,$qt_all_overflow);
3639 print_duration_table("Time spent on the queue", "messages with at least one remote delivery", \@queue_times, \@qt_remote_bin,$qt_remote_overflow);
3640}
3641
3642# Print delivery times if required.
3643if ($#delivery_times >= 0) {
3644 print_duration_table("Delivery times", "all messages", \@delivery_times, \@dt_all_bin,$dt_all_overflow);
3645 print_duration_table("Delivery times", "messages with at least one remote delivery", \@delivery_times, \@dt_remote_bin,$dt_remote_overflow);
3646}
3647
3648# Print rcpt times if required.
3649if ($#rcpt_times >= 0) {
3650 foreach my $protocol ('all', grep(!/^all$/, sort keys %rcpt_times_bin)) {
3651 print_duration_table("Receipt times", "$protocol messages", \@rcpt_times, $rcpt_times_bin{$protocol}, $rcpt_times_overflow{$protocol});
3652 }
059ec3d9
PH
3653}
3654
3655# Print relay information if required.
3656print_relay() if $show_relay;
3657
3658# Print the league tables, if topcount isn't zero.
3659if ($topcount > 0) {
3660 foreach ('Host','Domain','Email','Edomain') {
3661 next unless $do_sender{$_};
3662 print_league_table("sending \l$_", $received_count{$_}, $received_data{$_},$received_data_gigs{$_});
3663 }
3664
3665 print_league_table("local sender", \%received_count_user,
3666 \%received_data_user,\%received_data_gigs_user) if ($local_league_table || $include_remote_users);
3667 foreach ('Host','Domain','Email','Edomain') {
3668 next unless $do_sender{$_};
3669 print_league_table("\l$_ destination", $delivered_count{$_}, $delivered_data{$_},$delivered_data_gigs{$_});
3670 }
3671 print_league_table("local destination", \%delivered_count_user,
3672 \%delivered_data_user,\%delivered_data_gigs_user) if ($local_league_table || $include_remote_users);
3673}
3674
3675# Print the error statistics if required.
3676print_errors() if $show_errors;
3677
d5692f86
SC
3678print $htm_fh "</body>\n</html>\n" if $htm_fh;
3679
3680
3681$txt_fh->close if $txt_fh;
3682$htm_fh->close if $htm_fh;
3683
3684if ($xls_fh) {
3685 # close Excel Workbook
3686 $ws_global->set_first_sheet();
3687 # FIXME: whyever - activate does not work :-/
3688 $ws_global->activate();
3689 $workbook->close();
059ec3d9
PH
3690}
3691
d5692f86 3692
059ec3d9 3693# End of eximstats
d5692f86 3694
608bc29d 3695
d5692f86 3696# FIXME: Doku