typoes
[exim.git] / src / src / eximstats.src
CommitLineData
059ec3d9 1#!PERL_COMMAND -w
059ec3d9 2
5a66c31b 3# Copyright (c) 2001-2014 University of Cambridge.
059ec3d9
PH
4# See the file NOTICE for conditions of use and distribution.
5
6# Perl script to generate statistics from one or more Exim log files.
7
8# Usage: eximstats [<options>] <log file> <log file> ...
9
10# 1996-05-21: Ignore lines not starting with valid date/time, just in case
11# these get into a log file.
12# 1996-11-19: Add the -h option to control the size of the histogram,
13# and optionally turn it off.
14# Use some Perl 5 things; it should be everywhere by now.
15# Add the Perl -w option and rewrite so no warnings are given.
16# Add the -t option to control the length of the "top" listing.
17# Add the -ne, -nt options to turn off errors and transport
18# information.
19# Add information about length of time on queue, and -q<list> to
20# control the intervals and turn it off.
21# Add count and percentage of delayed messages to the Received
22# line.
23# Show total number of errors.
24# Add count and percentage of messages with errors to Received
25# line.
26# Add information about relaying and -nr to suppress it.
27# 1997-02-03 Merged in some of the things Nigel Metheringham had done:
28# Re-worded headings
29# Added received histogram as well as delivered
30# Added local senders' league table
31# Added local recipients' league table
32# 1997-03-10 Fixed typo "destinationss"
33# Allow for intermediate address between final and original
34# when testing for relaying
35# Give better message when no input
36# 1997-04-24 Fixed bug in layout of error listing that was depending on
37# text length (output line got repeated).
38# 1997-05-06 Bug in option decoding when only one option.
39# Overflow bug when handling very large volumes.
40# 1997-10-28 Updated to handle revised log format that might show
41# HELO name as well as host name before IP number
42# 1998-01-26 Bugs in the function for calculating the number of seconds
43# since 1970 from a log date
44# 1998-02-02 Delivery to :blackhole: doesn't have a T= entry in the log
45# line; cope with this, thereby avoiding undefined problems
46# Very short log line gave substring error
47# 1998-02-03 A routed delivery to a local transport may not have <> in the
48# log line; terminate the address at white space, not <
49# 1998-09-07 If first line of input was a => line, $thissize was undefined;
50# ensure it is zero.
51# 1998-12-21 Adding of $thissize from => line should have been adding $size.
52# Oops. Should have looked more closely when fixing the previous
53# bug!
54# 1999-11-12 Increased the field widths for printed integers; numbers are
55# bigger than originally envisaged.
56# 2001-03-21 Converted seconds() routine to use Time::Local, fixing a bug
57# whereby seconds($timestamp) - id_seconds($id) gave an
58# incorrect result.
59# Added POD documentation.
60# Moved usage instructions into help() subroutine.
61# Added 'use strict' and declared all global variables.
62# Added '-html' flag and resultant code.
63# Added '-cache' flag and resultant code.
64# Added add_volume() routine and converted all volume variables
65# to use it, fixing the overflow problems for individual hosts
66# on large sites.
67# Converted all volume output to GB/MB/KB as appropriate.
68# Don't store local user stats if -nfl is specified.
69# Modifications done by: Steve Campbell (<steve@computurn.com>)
70# 2001-04-02 Added the -t_remote_users flag. Steve Campbell.
71# 2001-10-15 Added the -domain flag. Steve Campbell.
72# 2001-10-16 Accept files on STDIN or on the command line. Steve Campbell.
73# 2001-10-21 Removed -domain flag and added -bydomain, -byhost, and -byemail.
74# We now generate our main parsing subroutine as an eval statement
75# which improves performance dramatically when not all the results
76# are required. We also cache the last timestamp to time convertion.
77#
78# NOTE: 'Top 50 destinations by (message count|volume)' lines are
79# now 'Top N (host|email|domain) destinations by (message count|volume)'
80# where N is the topcount. Steve Campbell.
81#
82# 2001-10-30 V1.16 Joachim Wieland.
83# Fixed minor bugs in add_volume() when taking over this version
84# for use in Exim 4: -w gave uninitialized value warnings in
85# two situations: for the first addition to a counter, and if
86# there were never any gigabytes, thereby leaving the $gigs
87# value unset.
88# Initialized $last_timestamp to stop a -w uninitialized warning.
89# Minor layout tweak for grand totals (nitpicking).
90# Put the IP addresses for relaying stats in [] and separated by
91# a space from the domain name.
92# Removed the IPv4-specific address test when picking out addresses
93# for relaying. Anything inside [] is OK.
94#
95# 2002-07-02 Philip Hazel
96# Fixed "uninitialized variable" message that occurred for relay
97# messages that arrived from H=[1.2.3.4] hosts (no name shown).
98# This bug didn't affect the output.
99#
100# 2002-04-15 V1.17 Joachim Wieland.
101# Added -charts, -chartdir. -chartrel options which use
102# GD::Graph modules to create graphical charts of the statistics.
103#
104# 2002-04-15 V1.18 Steve Campbell.
105# Added a check for $domain to to stop a -w uninitialized warning.
106# Added -byemaildomain option.
107# Only print HTML header links to included tables!
108#
109# 2002-08-02 V1.19 Steve Campbell.
110# Changed the debug mode to dump the parser onto STDERR rather
111# than STDOUT. Documented the -d flag into the help().
112# Rejoined the divergent 2002-04-15 and 2002-07-02 releases.
113#
114# 2002-08-21 V1.20 Steve Campbell.
115# Added the '-merge' option to allow merging of previous reports.
116# Fixed a missing semicolon when doing -bydomain.
117# Make volume charts plot the data gigs and bytes rather than just bytes.
118# Only process log lines with $flag =~ /<=|=>|->|==|\*\*|Co/
119# Converted Emaildomain to Edomain - the column header was too wide!
120# This changes the text output slightly. You can revert to the old
121# column widths by changing $COLUMN_WIDTHS to 7;
122#
123# 2002-09-04 V1.21 Andreas J Mueller
124# Local deliveries domain now defaults to 'localdomain'.
125# Don't match F=<From> when looking for the user.
126#
127# 2002-09-05 V1.22 Steve Campbell
128# Fixed a perl 5.005 incompatibility problem ('our' variables).
129#
130# 2002-09-11 V1.23 Steve Campbell
131# Stopped -charts option from throwing errors on null data.
132# Don't print out 'Errors encountered' unless there are any.
133
134# 2002-10-21 V1.23a Philip Hazel - patch from Tony Finch put in until
135# Steve's eximstats catches up.
136# Handle log files that include the timezone after the timestamp.
137# Switch to assuming that log timestamps are in local time, with
138# an option for UTC timestamps, as in Exim itself.
139#
140# 2003-02-05 V1.24 Steve Campbell
141# Added in Sergey Sholokh's code to convert '<' and '>' characters
142# in HTML output. Also added code to convert them back with -merge.
143# Fixed timestamp offsets to convert to seconds rather than minutes.
144# Updated -merge to work with output files using timezones.
145# Added cacheing to speed up the calculation of timezone offsets.
146#
147# 2003-02-07 V1.25 Steve Campbell
148# Optimised the usage of mktime() in the seconds subroutine.
149# Removed the now redundant '-cache' option.
150# html2txt() now explicitly matches HTML tags.
151# Implemented a new sorting algorithm - the top_n_sort() routine.
152# Added Danny Carroll's '-nvr' flag and code.
153#
154# 2003-03-13 V1.26 Steve Campbell
155# Implemented HTML compliance changes recommended by Bernard Massot.
156# Bug fix to allow top_n_sort() to handle null keys.
157# Convert all domains and edomains to lowercase.
158# Remove preceding dots from domains.
159#
160# 2003-03-13 V1.27 Steve Campbell
161# Replaced border attributes with 'border=1', as recommended by
162# Bernard Massot.
163#
164# 2003-06-03 V1.28 John Newman
165# Added in the ability to skip over the parsing and evaulation of
166# specific transports as passed to eximstats via the new "-nt/.../"
167# command line argument. This new switch allows the viewing of
168# not more accurate statistics but more applicable statistics when
169# special transports are in use (ie; SpamAssassin). We need to be
170# able to ignore transports such as this otherwise the resulting
171# local deliveries are significantly skewed (doubled)...
172#
173# 2003-11-06 V1.29 Steve Campbell
174# Added the '-pattern "Description" "/pattern/"' option.
175#
176# 2004-02-17 V1.30 Steve Campbell
177# Added warnings if required GD::Graph modules are not available or
178# insufficient -chart* options are specified.
179#
8e669ac1 180# 2004-02-20 V1.31 Andrea Balzi
059ec3d9
PH
181# Only show the Local Sender/Destination links if the tables exist.
182#
1b4fe9dd
PH
183# 2004-07-05 V1.32 Steve Campbell
184# Fix '-merge -h0' divide by zero error.
185#
186# 2004-07-15 V1.33 Steve Campbell
187# Documentation update - I've converted the subroutine
188# documentation from POD to comments.
8974000d
SC
189#
190# 2004-12-10 V1.34 Steve Campbell
191# Eximstats can now parse syslog lines as well as mainlog lines.
192#
884c2af8 193# 2004-12-20 V1.35 Wouter Verhelst
d5692f86 194# Pie charts by volume were actually generated by count. Fixed.
50adf73a
SC
195#
196# 2005-02-07 V1.36 Gregor Herrmann / Steve Campbell
197# Added average sizes to HTML Top tables.
d5692f86
SC
198#
199# 2005-04-26 V1.37 Frank Heydlauf
200# Added -xls and the ability to specify output files.
201#
202# 2005-04-29 V1.38 Steve Campbell
203# Use FileHandles for outputing results.
204# Allow any combination of xls, txt, and html output.
205# Fixed display of large numbers with -nvr option
206# Fixed merging of reports with empty tables.
207#
208# 2005-05-27 V1.39 Steve Campbell
209# Added the -include_original_destination flag
210# Removed tabs and trailing whitespace.
211#
608bc29d
SC
212# 2005-06-03 V1.40 Steve Campbell
213# Whilst parsing the mainlog(s), store information about
214# the messages in a hash of arrays rather than using
215# individual hashes. This is a bit cleaner and results in
216# dramatic memory savings, albeit at a slight CPU cost.
217#
218# 2005-06-15 V1.41 Steve Campbell
219# Added the -show_rt<list> flag.
220# Added the -show_dt<list> flag.
221#
222# 2005-06-24 V1.42 Steve Campbell
223# Added Histograms for user specified patterns.
d5692f86 224#
81aad8c9
SC
225# 2005-06-30 V1.43 Steve Campbell
226# Bug fix for V1.42 with -h0 specified. Spotted by Chris Lear.
227#
a83c7e95
SC
228# 2005-07-26 V1.44 Steve Campbell
229# Use a glob alias rather than an array ref in the generated
230# parser. This improves both readability and performance.
231#
232# 2005-09-30 V1.45 Marco Gaiarin / Steve Campbell
233# Collect SpamAssassin and rejection statistics.
234# Don't display local sender or destination tables unless
235# there is data to show.
236# Added average volumes into the top table text output.
237#
238# 2006-02-07 V1.46 Steve Campbell
239# Collect data on the number of addresses (recipients)
240# as well as the number of messages.
241#
242# 2006-05-05 V1.47 Steve Campbell
243# Added 'Message too big' to the list of mail rejection
244# reasons (thanks to Marco Gaiarin).
245#
246# 2006-06-05 V1.48 Steve Campbell
247# Mainlog lines which have GMT offsets and are too short to
248# have a flag are now skipped.
249#
250# 2006-11-10 V1.49 Alain Williams
251# Added the -emptyok flag.
252#
253# 2006-11-16 V1.50 Steve Campbell
254# Fixes for obtaining the IP address from reject messages.
255#
e2f7a0d2
SC
256# 2006-11-27 V1.51 Steve Campbell
257# Another update for obtaining the IP address from reject messages.
258#
259# 2006-11-27 V1.52 Steve Campbell
260# Tally any reject message containing SpamAssassin.
261#
f3f065bb
PH
262# 2007-01-31 V1.53 Philip Hazel
263# Allow for [pid] after date in log lines
264#
a797be69
SC
265# 2007-02-14 V1.54 Daniel Tiefnig
266# Improved the '($parent) =' pattern match.
267#
30e9ac3f
SC
268# 2007-03-19 V1.55 Steve Campbell
269# Differentiate between permanent and temporary rejects.
270#
271# 2007-03-29 V1.56 Jez Hancock
272# Fixed some broken HTML links and added missing column headers.
273#
274# 2007-03-30 V1.57 Steve Campbell
275# Fixed Grand Total Summary Domains, Edomains, and Email columns
276# for Rejects, Temp Rejects, Ham, and Spam rows.
277#
5e6e6734
SC
278# 2007-04-11 V1.58 Steve Campbell
279# Fix to get <> and blackhole to show in edomain tables.
280#
37f8a7c9
SC
281# 2007-09-20 V1.59 Steve Campbell
282# Added the -bylocaldomain option
283#
d5c13d66
NM
284# 2007-09-20 V1.60 Heiko Schlittermann
285# Fix for misinterpreted log lines
286#
514ee161
SC
287# 2013-01-14 V1.61 Steve Campbell
288# Watch out for senders sending "HELO [IpAddr]"
a83c7e95 289#
d5692f86
SC
290#
291# For documentation on the logfile format, see
292# http://www.exim.org/exim-html-4.50/doc/html/spec_48.html#IX2793
059ec3d9
PH
293
294=head1 NAME
295
8974000d 296eximstats - generates statistics from Exim mainlog or syslog files.
059ec3d9
PH
297
298=head1 SYNOPSIS
299
d5692f86 300 eximstats [Output] [Options] mainlog1 mainlog2 ...
059ec3d9
PH
301 eximstats -merge [Options] report.1.txt report.2.txt ... > weekly_report.txt
302
d5692f86
SC
303=head2 Output:
304
305=over 4
306
307=item B<-txt>
308
309Output the results in plain text to STDOUT.
310
311=item B<-txt>=I<filename>
312
313Output the results in plain text. Filename '-' for STDOUT is accepted.
314
315=item B<-html>
316
317Output the results in HTML to STDOUT.
318
319=item B<-html>=I<filename>
320
321Output the results in HTML. Filename '-' for STDOUT is accepted.
322
323=item B<-xls>
324
325Output the results in Excel compatible Format to STDOUT.
326Requires the Spreadsheet::WriteExcel CPAN module.
327
328=item B<-xls>=I<filename>
329
330Output the results in Excel compatible format. Filename '-' for STDOUT is accepted.
331
332
333=back
334
335=head2 Options:
059ec3d9
PH
336
337=over 4
338
339=item B<-h>I<number>
340
341histogram divisions per hour. The default is 1, and
3420 suppresses histograms. Valid values are:
343
3440, 1, 2, 3, 5, 10, 15, 20, 30 or 60.
345
346=item B<-ne>
347
348Don't display error information.
349
350=item B<-nr>
351
352Don't display relaying information.
353
354=item B<-nr>I</pattern/>
355
356Don't display relaying information that matches.
357
358=item B<-nt>
359
360Don't display transport information.
361
362=item B<-nt>I</pattern/>
363
364Don't display transport information that matches
365
366=item B<-q>I<list>
367
368List of times for queuing information single 0 item suppresses.
369
370=item B<-t>I<number>
371
372Display top <number> sources/destinations
373default is 50, 0 suppresses top listing.
374
375=item B<-tnl>
376
377Omit local sources/destinations in top listing.
378
379=item B<-t_remote_users>
380
381Include remote users in the top source/destination listings.
382
d5692f86
SC
383=item B<-include_original_destination>
384
385Include the original destination email addresses rather than just
386using the final ones.
387Useful for finding out which of your mailing lists are receiving mail.
388
608bc29d
SC
389=item B<-show_dt>I<list>
390
391Show the delivery times (B<DT>)for all the messages.
392
14eee748 393Exim must have been configured to use the +deliver_time logging option
608bc29d
SC
394for this option to work.
395
396I<list> is an optional list of times. Eg -show_dt1,2,4,8 will show
397the number of messages with delivery times under 1 second, 2 seconds, 4 seconds,
3988 seconds, and over 8 seconds.
399
400=item B<-show_rt>I<list>
401
402Show the receipt times for all the messages. The receipt time is
403defined as the Completed hh:mm:ss - queue_time_overall - the Receipt hh:mm:ss.
404These figures will be skewed by pipelined messages so might not be that useful.
405
406Exim must have been configured to use the +queue_time_overall logging option
407for this option to work.
408
409I<list> is an optional list of times. Eg -show_rt1,2,4,8 will show
410the number of messages with receipt times under 1 second, 2 seconds, 4 seconds,
4118 seconds, and over 8 seconds.
412
059ec3d9
PH
413=item B<-byhost>
414
415Show results by sending host. This may be combined with
416B<-bydomain> and/or B<-byemail> and/or B<-byedomain>. If none of these options
417are specified, then B<-byhost> is assumed as a default.
418
419=item B<-bydomain>
420
421Show results by sending domain.
422May be combined with B<-byhost> and/or B<-byemail> and/or B<-byedomain>.
423
424=item B<-byemail>
425
426Show results by sender's email address.
427May be combined with B<-byhost> and/or B<-bydomain> and/or B<-byedomain>.
428
429=item B<-byemaildomain> or B<-byedomain>
430
431Show results by sender's email domain.
432May be combined with B<-byhost> and/or B<-bydomain> and/or B<-byemail>.
433
434=item B<-pattern> I<Description> I</Pattern/>
435
436Look for the specified pattern and count the number of lines in which it appears.
437This option can be specified multiple times. Eg:
438
439 -pattern 'Refused connections' '/refused connection/'
440
441
442=item B<-merge>
443
444This option allows eximstats to merge old eximstat reports together. Eg:
445
446 eximstats mainlog.sun > report.sun.txt
447 eximstats mainlog.mon > report.mon.txt
448 eximstats mainlog.tue > report.tue.txt
449 eximstats mainlog.wed > report.web.txt
450 eximstats mainlog.thu > report.thu.txt
451 eximstats mainlog.fri > report.fri.txt
452 eximstats mainlog.sat > report.sat.txt
453 eximstats -merge report.*.txt > weekly_report.txt
454 eximstats -merge -html report.*.txt > weekly_report.html
455
456=over 4
457
458=item *
459
460You can merge text or html reports and output the results as text or html.
461
462=item *
463
464You can use all the normal eximstat output options, but only data
465included in the original reports can be shown!
466
467=item *
468
469When merging reports, some loss of accuracy may occur in the top I<n> lists.
470This will be towards the ends of the lists.
471
472=item *
473
474The order of items in the top I<n> lists may vary when the data volumes
475round to the same value.
476
477=back
478
059ec3d9
PH
479=item B<-charts>
480
481Create graphical charts to be displayed in HTML output.
d5692f86 482Only valid in combination with I<-html>.
059ec3d9
PH
483
484This requires the following modules which can be obtained
485from http://www.cpan.org/modules/01modules.index.html
486
487=over 4
488
489=item GD
490
491=item GDTextUtil
492
493=item GDGraph
494
495=back
496
497To install these, download and unpack them, then use the normal perl installation procedure:
498
499 perl Makefile.PL
500 make
501 make test
502 make install
503
504=item B<-chartdir>I <dir>
505
506Create the charts in the directory <dir>
507
508=item B<-chartrel>I <dir>
509
510Specify the relative directory for the "img src=" tags from where to include
511the charts
512
a83c7e95
SC
513=item B<-emptyok>
514
515Specify that it's OK to not find any valid log lines. Without this
516we will output an error message if we don't find any.
517
059ec3d9
PH
518=item B<-d>
519
520Debug flag. This outputs the eval()'d parser onto STDOUT which makes it
521easier to trap errors in the eval section. Remember to add 1 to the line numbers to allow for the
522title!
523
524=back
525
526=head1 DESCRIPTION
527
8974000d 528Eximstats parses exim mainlog and syslog files to output a statistical
059ec3d9 529analysis of the messages processed. By default, a text
d5692f86
SC
530analysis is generated, but you can request other output formats
531using flags. See the help (B<-help>) to learn
059ec3d9
PH
532about how to create charts from the tables.
533
534=head1 AUTHOR
535
536There is a web site at http://www.exim.org - this contains details of the
537mailing list exim-users@exim.org.
538
539=head1 TO DO
540
541This program does not perfectly handle messages whose received
542and delivered log lines are in different files, which can happen
543when you have multiple mail servers and a message cannot be
43236f35 544immediately delivered. Fixing this could be tricky...
059ec3d9 545
d5692f86
SC
546Merging of xls files is not (yet) possible. Be free to implement :)
547
059ec3d9
PH
548=cut
549
550use integer;
551use strict;
d5692f86 552use IO::File;
059ec3d9
PH
553
554# use Time::Local; # PH/FANF
555use POSIX;
556
d5692f86 557use vars qw($HAVE_GD_Graph_pie $HAVE_GD_Graph_linespoints $HAVE_Spreadsheet_WriteExcel);
059ec3d9
PH
558eval { require GD::Graph::pie; };
559$HAVE_GD_Graph_pie = $@ ? 0 : 1;
560eval { require GD::Graph::linespoints; };
561$HAVE_GD_Graph_linespoints = $@ ? 0 : 1;
d5692f86
SC
562eval { require Spreadsheet::WriteExcel; };
563$HAVE_Spreadsheet_WriteExcel = $@ ? 0 : 1;
059ec3d9
PH
564
565
566##################################################
567# Static data #
568##################################################
569# 'use vars' instead of 'our' as perl5.005 is still in use out there!
570use vars qw(@tab62 @days_per_month $gig);
571use vars qw($VERSION);
572use vars qw($COLUMN_WIDTHS);
608bc29d 573use vars qw($WEEK $DAY $HOUR $MINUTE);
059ec3d9
PH
574
575
576@tab62 =
577 (0,1,2,3,4,5,6,7,8,9,0,0,0,0,0,0, # 0-9
578 0,10,11,12,13,14,15,16,17,18,19,20, # A-K
579 21,22,23,24,25,26,27,28,29,30,31,32, # L-W
580 33,34,35, 0, 0, 0, 0, 0, # X-Z
581 0,36,37,38,39,40,41,42,43,44,45,46, # a-k
582 47,48,49,50,51,52,53,54,55,56,57,58, # l-w
583 59,60,61); # x-z
584
585@days_per_month = (0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334);
586$gig = 1024 * 1024 * 1024;
514ee161 587$VERSION = '1.61';
059ec3d9
PH
588
589# How much space do we allow for the Hosts/Domains/Emails/Edomains column headers?
590$COLUMN_WIDTHS = 8;
591
608bc29d
SC
592$MINUTE = 60;
593$HOUR = 60 * $MINUTE;
594$DAY = 24 * $HOUR;
595$WEEK = 7 * $DAY;
596
059ec3d9
PH
597# Declare global variables.
598use vars qw($total_received_data $total_received_data_gigs $total_received_count);
a83c7e95 599use vars qw($total_delivered_data $total_delivered_data_gigs $total_delivered_messages $total_delivered_addresses);
d5692f86
SC
600use vars qw(%timestamp2time); #Hash of timestamp => time.
601use vars qw($last_timestamp $last_time); #The last time convertion done.
602use vars qw($last_date $date_seconds); #The last date convertion done.
603use vars qw($last_offset $offset_seconds); #The last time offset convertion done.
059ec3d9 604use vars qw($localtime_offset);
d5692f86
SC
605use vars qw($i); #General loop counter.
606use vars qw($debug); #Debug mode?
607use vars qw($ntopchart); #How many entries should make it into the chart?
608use vars qw($gddirectory); #Where to put files from GD::Graph
a83c7e95
SC
609
610# SpamAssassin variables
611use vars qw($spam_score $spam_score_gigs);
612use vars qw($ham_score $ham_score_gigs);
613use vars qw(%ham_count_by_ip %spam_count_by_ip);
614use vars qw(%rejected_count_by_ip %rejected_count_by_reason);
30e9ac3f 615use vars qw(%temporarily_rejected_count_by_ip %temporarily_rejected_count_by_reason);
a83c7e95
SC
616
617#For use in Speadsheed::WriteExcel
618use vars qw($workbook $ws_global $ws_relayed $ws_errors);
619use vars qw($row $col $row_hist $col_hist);
d5692f86 620use vars qw($run_hist);
a83c7e95 621use vars qw($f_default $f_header1 $f_header2 $f_header2_m $f_headertab $f_percent); #Format Header
d5692f86
SC
622
623# Output FileHandles
624use vars qw($txt_fh $htm_fh $xls_fh);
059ec3d9
PH
625
626$ntopchart = 5;
627
628# The following are parameters whose values are
629# set by command line switches:
630use vars qw($show_errors $show_relay $show_transport $transport_pattern);
37f8a7c9 631use vars qw($topcount $local_league_table $include_remote_users $do_local_domain);
a83c7e95 632use vars qw($hist_opt $hist_interval $hist_number $volume_rounding $emptyOK);
d5692f86 633use vars qw($relay_pattern @queue_times @user_patterns @user_descriptions);
608bc29d 634use vars qw(@rcpt_times @delivery_times);
d5692f86
SC
635use vars qw($include_original_destination);
636use vars qw($txt_fh $htm_fh $xls_fh);
059ec3d9
PH
637
638use vars qw(%do_sender); #Do sender by Host, Domain, Email, and/or Edomain tables.
639use vars qw($charts $chartrel $chartdir $charts_option_specified);
d5692f86 640use vars qw($merge_reports); #Merge old reports ?
059ec3d9
PH
641
642# The following are modified in the parse() routine, and
643# referred to in the print_*() routines.
608bc29d 644use vars qw($delayed_count $relayed_unshown $begin $end);
a83c7e95 645use vars qw(%messages @message);
059ec3d9 646use vars qw(%received_count %received_data %received_data_gigs);
a83c7e95 647use vars qw(%delivered_messages %delivered_data %delivered_data_gigs %delivered_addresses);
059ec3d9 648use vars qw(%received_count_user %received_data_user %received_data_gigs_user);
a83c7e95 649use vars qw(%delivered_messages_user %delivered_addresses_user %delivered_data_user %delivered_data_gigs_user);
37f8a7c9 650use vars qw(%delivered_messages_local_domain %delivered_addresses_local_domain %delivered_data_local_domain %delivered_data_gigs_local_domain);
059ec3d9 651use vars qw(%transported_count %transported_data %transported_data_gigs);
608bc29d
SC
652use vars qw(%relayed %errors_count $message_errors);
653use vars qw(@qt_all_bin @qt_remote_bin);
654use vars qw($qt_all_overflow $qt_remote_overflow);
655use vars qw(@dt_all_bin @dt_remote_bin %rcpt_times_bin);
656use vars qw($dt_all_overflow $dt_remote_overflow %rcpt_times_overflow);
657use vars qw(@received_interval_count @delivered_interval_count);
658use vars qw(@user_pattern_totals @user_pattern_interval_count);
059ec3d9
PH
659
660use vars qw(%report_totals);
661
608bc29d
SC
662# Enumerations
663use vars qw($SIZE $FROM_HOST $FROM_ADDRESS $ARRIVAL_TIME $REMOTE_DELIVERED $PROTOCOL);
664use vars qw($DELAYED $HAD_ERROR);
665$SIZE = 0;
666$FROM_HOST = 1;
667$FROM_ADDRESS = 2;
668$ARRIVAL_TIME = 3;
669$REMOTE_DELIVERED = 4;
670$DELAYED = 5;
671$HAD_ERROR = 6;
672$PROTOCOL = 7;
059ec3d9
PH
673
674
675
676##################################################
677# Subroutines #
678##################################################
679
d5692f86
SC
680#######################################################################
681# get_filehandle($file,\%output_files);
682# Return a filehandle writing to $file.
683#
684# If %output_files is defined, check that $output_files{$file}
685# doesn't exist and die if it does, or set it if it doesn't.
686#######################################################################
687sub get_filehandle {
688 my($file,$output_files_href) = @_;
689
690 $file = '-' if ($file eq '');
691
692 if (defined $output_files_href) {
693 die "You can only output to '$file' once! Use -h for help.\n" if exists $output_files_href->{$file};
694 $output_files_href->{$file} = 1;
695 }
696
697 if ($file eq '-') {
698 return \*STDOUT;
699 }
700
701 if (-e $file) {
702 unlink $file or die "Failed to rm $file: $!";
703 }
704
705 my $fh = new IO::File $file, O_WRONLY|O_CREAT|O_EXCL;
706 die "new IO::File $file failed: $!" unless (defined $fh);
707 return $fh;
708}
709
059ec3d9 710
1b4fe9dd
PH
711#######################################################################
712# volume_rounded();
713#
714# $rounded_volume = volume_rounded($bytes,$gigabytes);
715#
716# Given a data size in bytes, round it to KB, MB, or GB
717# as appropriate.
718#
719# Eg 12000 => 12KB, 15000000 => 14GB, etc.
720#
721# Note: I've experimented with Math::BigInt and it results in a 33%
722# performance degredation as opposed to storing numbers split into
723# bytes and gigabytes.
724#######################################################################
059ec3d9
PH
725sub volume_rounded {
726 my($x,$g) = @_;
727 $x = 0 unless $x;
728 $g = 0 unless $g;
729 my($rounded);
730
731 while ($x > $gig) {
732 $g++;
733 $x -= $gig;
734 }
735
736 if ($volume_rounding) {
737 # Values < 1 GB
738 if ($g <= 0) {
739 if ($x < 10000) {
740 $rounded = sprintf("%6d", $x);
741 }
742 elsif ($x < 10000000) {
743 $rounded = sprintf("%4dKB", ($x + 512)/1024);
744 }
745 else {
746 $rounded = sprintf("%4dMB", ($x + 512*1024)/(1024*1024));
747 }
748 }
749 # Values between 1GB and 10GB are printed in MB
750 elsif ($g < 10) {
751 $rounded = sprintf("%4dMB", ($g * 1024) + ($x + 512*1024)/(1024*1024));
752 }
753 else {
754 # Handle values over 10GB
755 $rounded = sprintf("%4dGB", $g + ($x + $gig/2)/$gig);
756 }
757 }
758 else {
759 # We don't want any rounding to be done.
968178ac 760 # and we don't need broken formatted output which on one hand avoids numbers from
d5692f86
SC
761 # being interpreted as string by Spreadsheed Calculators, on the other hand
762 # breaks if more than 4 digits! -> flexible length instead of fixed length
763 # Format the return value at the output routine! -fh
764 #$rounded = sprintf("%d", ($g * $gig) + $x);
765 no integer;
766 $rounded = sprintf("%.0f", ($g * $gig) + $x);
059ec3d9
PH
767 }
768
769 return $rounded;
770}
771
772
1b4fe9dd
PH
773#######################################################################
774# un_round();
8e669ac1 775#
1b4fe9dd 776# un_round($rounded_volume,\$bytes,\$gigabytes);
8e669ac1 777#
1b4fe9dd
PH
778# Given a volume in KB, MB or GB, as generated by volume_rounded(),
779# do the reverse transformation and convert it back into Bytes and Gigabytes.
780# These are added to the $bytes and $gigabytes parameters.
8e669ac1 781#
1b4fe9dd
PH
782# Given a data size in bytes, round it to KB, MB, or GB
783# as appropriate.
8e669ac1 784#
1b4fe9dd
PH
785# EG: 500 => (500,0), 14GB => (0,14), etc.
786#######################################################################
059ec3d9
PH
787sub un_round {
788 my($rounded,$bytes_sref,$gigabytes_sref) = @_;
789
790 if ($rounded =~ /(\d+)GB/) {
791 $$gigabytes_sref += $1;
792 }
793 elsif ($rounded =~ /(\d+)MB/) {
794 $$gigabytes_sref += $1 / 1024;
795 $$bytes_sref += (($1 % 1024 ) * 1024 * 1024);
796 }
797 elsif ($rounded =~ /(\d+)KB/) {
798 $$gigabytes_sref += $1 / (1024 * 1024);
799 $$bytes_sref += ($1 % (1024 * 1024) * 1024);
800 }
801 elsif ($rounded =~ /(\d+)/) {
d5692f86
SC
802 # We need to turn off integer in case we are merging an -nvr report.
803 no integer;
804 $$gigabytes_sref += int($1 / $gig);
059ec3d9
PH
805 $$bytes_sref += $1 % $gig;
806 }
807
808 #Now reduce the bytes down to less than 1GB.
809 add_volume($bytes_sref,$gigabytes_sref,0) if ($$bytes_sref > $gig);
810}
811
812
1b4fe9dd
PH
813#######################################################################
814# add_volume();
8e669ac1 815#
1b4fe9dd 816# add_volume(\$bytes,\$gigs,$size);
8e669ac1 817#
1b4fe9dd
PH
818# Add $size to $bytes/$gigs where this is a number split into
819# bytes ($bytes) and gigabytes ($gigs). This is significantly
820# faster than using Math::BigInt.
821#######################################################################
059ec3d9 822sub add_volume {
1b4fe9dd
PH
823 my($bytes_ref,$gigs_ref,$size) = @_;
824 $$bytes_ref = 0 if ! defined $$bytes_ref;
825 $$gigs_ref = 0 if ! defined $$gigs_ref;
826 $$bytes_ref += $size;
827 while ($$bytes_ref > $gig) {
828 $$gigs_ref++;
829 $$bytes_ref -= $gig;
059ec3d9
PH
830 }
831}
832
833
1b4fe9dd
PH
834#######################################################################
835# format_time();
8e669ac1 836#
1b4fe9dd 837# $formatted_time = format_time($seconds);
8e669ac1 838#
1b4fe9dd
PH
839# Given a time in seconds, break it down into
840# weeks, days, hours, minutes, and seconds.
8e669ac1 841#
1b4fe9dd
PH
842# Eg 12005 => 3h20m5s
843#######################################################################
059ec3d9
PH
844sub format_time {
845my($t) = pop @_;
846my($s) = $t % 60;
847$t /= 60;
848my($m) = $t % 60;
849$t /= 60;
850my($h) = $t % 24;
851$t /= 24;
852my($d) = $t % 7;
853my($w) = $t/7;
854my($p) = "";
855$p .= "$w"."w" if $w > 0;
856$p .= "$d"."d" if $d > 0;
857$p .= "$h"."h" if $h > 0;
858$p .= "$m"."m" if $m > 0;
859$p .= "$s"."s" if $s > 0 || $p eq "";
860$p;
861}
862
863
1b4fe9dd
PH
864#######################################################################
865# unformat_time();
8e669ac1 866#
1b4fe9dd 867# $seconds = unformat_time($formatted_time);
8e669ac1 868#
1b4fe9dd 869# Given a time in weeks, days, hours, minutes, or seconds, convert it to seconds.
8e669ac1 870#
1b4fe9dd
PH
871# Eg 3h20m5s => 12005
872#######################################################################
059ec3d9 873sub unformat_time {
968178ac 874 my($formatted_time) = pop @_;
059ec3d9
PH
875 my $time = 0;
876
968178ac 877 while ($formatted_time =~ s/^(\d+)([wdhms]?)//) {
059ec3d9
PH
878 $time += $1 if ($2 eq '' || $2 eq 's');
879 $time += $1 * 60 if ($2 eq 'm');
880 $time += $1 * 60 * 60 if ($2 eq 'h');
881 $time += $1 * 60 * 60 * 24 if ($2 eq 'd');
882 $time += $1 * 60 * 60 * 24 * 7 if ($2 eq 'w');
883 }
884 $time;
885}
886
887
1b4fe9dd
PH
888#######################################################################
889# seconds();
8e669ac1 890#
1b4fe9dd 891# $time = seconds($timestamp);
8e669ac1 892#
1b4fe9dd
PH
893# Given a time-of-day timestamp, convert it into a time() value using
894# POSIX::mktime. We expect the timestamp to be of the form
895# "$year-$mon-$day $hour:$min:$sec", with month going from 1 to 12,
896# and the year to be absolute (we do the necessary conversions). The
897# timestamp may be followed with an offset from UTC like "+$hh$mm"; if the
898# offset is not present, and we have not been told that the log is in UTC
899# (with the -utc option), then we adjust the time by the current local
900# time offset so that it can be compared with the time recorded in message
901# IDs, which is UTC.
8e669ac1 902#
1b4fe9dd
PH
903# To improve performance, we only use mktime on the date ($year-$mon-$day),
904# and only calculate it if the date is different to the previous time we
905# came here. We then add on seconds for the '$hour:$min:$sec'.
8e669ac1 906#
1b4fe9dd
PH
907# We also store the results of the last conversion done, and only
908# recalculate if the date is different.
8e669ac1 909#
1b4fe9dd
PH
910# We used to have the '-cache' flag which would store the results of the
911# mktime() call. However, the current way of just using mktime() on the
912# date obsoletes this.
913#######################################################################
059ec3d9
PH
914sub seconds {
915 my($timestamp) = @_;
916
917 # Is the timestamp the same as the last one?
918 return $last_time if ($last_timestamp eq $timestamp);
919
920 return 0 unless ($timestamp =~ /^((\d{4})\-(\d\d)-(\d\d))\s(\d\d):(\d\d):(\d\d)( ([+-])(\d\d)(\d\d))?/o);
921
922 unless ($last_date eq $1) {
923 $last_date = $1;
924 my(@timestamp) = (0,0,0,$4,$3,$2);
925 $timestamp[5] -= 1900;
926 $timestamp[4]--;
927 $date_seconds = mktime(@timestamp);
928 }
929 my $time = $date_seconds + ($5 * 3600) + ($6 * 60) + $7;
930
931 # SC. Use cacheing. Also note we want seconds not minutes.
932 #my($this_offset) = ($10 * 60 + $11) * ($9 . "1") if defined $8;
933 if (defined $8 && ($8 ne $last_offset)) {
934 $last_offset = $8;
935 $offset_seconds = ($10 * 60 + $11) * 60;
936 $offset_seconds = -$offset_seconds if ($9 eq '-');
937 }
938
939
940 if (defined $7) {
941 #$time -= $this_offset;
942 $time -= $offset_seconds;
943 } elsif (defined $localtime_offset) {
944 $time -= $localtime_offset;
945 }
946
947 # Store the last timestamp received.
948 $last_timestamp = $timestamp;
949 $last_time = $time;
950
951 $time;
952}
953
954
1b4fe9dd
PH
955#######################################################################
956# id_seconds();
8e669ac1 957#
1b4fe9dd 958# $time = id_seconds($message_id);
8e669ac1 959#
1b4fe9dd
PH
960# Given a message ID, convert it into a time() value.
961#######################################################################
059ec3d9
PH
962sub id_seconds {
963my($sub_id) = substr((pop @_), 0, 6);
964my($s) = 0;
965my(@c) = split(//, $sub_id);
966while($#c >= 0) { $s = $s * 62 + $tab62[ord(shift @c) - ord('0')] }
967$s;
968}
969
608bc29d
SC
970#######################################################################
971# wdhms_seconds();
972#
973# $seconds = wdhms_seconds($string);
974#
975# Convert a string in a week/day/hour/minute/second format (eg 4h10s)
976# into seconds.
977#######################################################################
978sub wdhms_seconds {
979 if ($_[0] =~ /^(?:(\d+)w)?(?:(\d+)d)?(?:(\d+)h)?(?:(\d+)m)?(?:(\d+)s)?/) {
980 return((($1||0) * $WEEK) + (($2||0) * $DAY) + (($3||0) * $HOUR) + (($4||0) * $MINUTE) + ($5||0));
981 }
982 return undef;
983}
984
985#######################################################################
986# queue_time();
987#
988# $queued = queue_time($completed_tod, $arrival_time, $id);
989#
990# Given the completed time of day and either the arrival time
991# (preferred), or the message ID, calculate how long the message has
992# been on the queue.
993#
994#######################################################################
995sub queue_time {
996 my($completed_tod, $arrival_time, $id) = @_;
997
998 # Note: id_seconds() benchmarks as 42% slower than seconds()
999 # and computing the time accounts for a significant portion of
1000 # the run time.
1001 if (defined $arrival_time) {
1002 return(seconds($completed_tod) - seconds($arrival_time));
1003 }
1004 else {
1005 return(seconds($completed_tod) - id_seconds($id));
1006 }
1007}
059ec3d9
PH
1008
1009
1b4fe9dd
PH
1010#######################################################################
1011# calculate_localtime_offset();
8e669ac1 1012#
1b4fe9dd 1013# $localtime_offset = calculate_localtime_offset();
8e669ac1 1014#
1b4fe9dd 1015# Calculate the the localtime offset from gmtime in seconds.
8e669ac1 1016#
1b4fe9dd 1017# $localtime = time() + $localtime_offset.
8e669ac1 1018#
1b4fe9dd
PH
1019# These are the same semantics as ISO 8601 and RFC 2822 timezone offsets.
1020# (West is negative, East is positive.)
1021#######################################################################
059ec3d9
PH
1022
1023# $localtime = gmtime() + $localtime_offset. OLD COMMENT
1024# This subroutine commented out as it's not currently in use.
1025
1026#sub calculate_localtime_offset {
1027# # Pick an arbitrary date, convert it to localtime & gmtime, and return the difference.
1028# my (@sample_date) = (0,0,0,5,5,100);
1029# my $localtime = timelocal(@sample_date);
1030# my $gmtime = timegm(@sample_date);
1031# my $offset = $localtime - $gmtime;
1032# return $offset;
1033#}
1034
1035sub calculate_localtime_offset {
1036 # Assume that the offset at the moment is valid across the whole
1037 # period covered by the logs that we're analysing. This may not
1038 # be true around the time the clocks change in spring or autumn.
1039 my $utc = time;
1040 # mktime works on local time and gmtime works in UTC
1041 my $local = mktime(gmtime($utc));
1042 return $local - $utc;
1043}
1044
1045
608bc29d 1046
1b4fe9dd 1047#######################################################################
608bc29d
SC
1048# print_duration_table();
1049#
1050# print_duration_table($title, $message_type, \@times, \@values, $overflow);
1051#
1052# Print a table showing how long a particular step took for
1053# the messages. The parameters are:
1054# $title Eg "Time spent on the queue"
1055# $message_type Eg "Remote"
1056# \@times The maximum time a message took for it to increment
1057# the corresponding @values counter.
1058# \@values An array of message counters.
1059# $overflow The number of messages which exceeded the maximum
1060# time.
1b4fe9dd 1061#######################################################################
608bc29d 1062sub print_duration_table {
059ec3d9 1063no integer;
608bc29d 1064my($title, $message_type, $times_aref, $values_aref, $overflow) = @_;
059ec3d9
PH
1065my(@chartdatanames);
1066my(@chartdatavals);
1067
1068my $printed_one = 0;
1069my $cumulative_percent = 0;
059ec3d9 1070
608bc29d
SC
1071my $queue_total = $overflow;
1072map {$queue_total += $_} @$values_aref;
059ec3d9 1073
608bc29d 1074my $temp = "$title: $message_type";
059ec3d9 1075
d5692f86
SC
1076
1077my $txt_format = "%5s %4s %6d %5.1f%% %5.1f%%\n";
1078my $htm_format = "<tr><td align=\"right\">%s %s</td><td align=\"right\">%d</td><td align=\"right\">%5.1f%%</td><td align=\"right\">%5.1f%%</td>\n";
1079
1080# write header
1081printf $txt_fh ("%s\n%s\n\n", $temp, "-" x length($temp)) if $txt_fh;
1082if ($htm_fh) {
608bc29d 1083 print $htm_fh "<hr><a name=\"$title $message_type\"></a><h2>$temp</h2>\n";
a83c7e95 1084 print $htm_fh "<table border=0 width=\"100%\"><tr><td><table border=1>\n";
d5692f86 1085 print $htm_fh "<tr><th>Time</th><th>Messages</th><th>Percentage</th><th>Cumulative Percentage</th>\n";
059ec3d9 1086}
a83c7e95 1087if ($xls_fh) {
608bc29d 1088 $ws_global->write($row++, $col, "$title: ".$message_type, $f_header2);
d5692f86
SC
1089 my @content=("Time", "Messages", "Percentage", "Cumulative Percentage");
1090 &set_worksheet_line($ws_global, $row++, 1, \@content, $f_headertab);
059ec3d9
PH
1091}
1092
d5692f86 1093
608bc29d
SC
1094for ($i = 0; $i <= $#$times_aref; ++$i) {
1095 if ($$values_aref[$i] > 0)
059ec3d9 1096 {
608bc29d 1097 my $percent = ($values_aref->[$i] * 100)/$queue_total;
059ec3d9 1098 $cumulative_percent += $percent;
d5692f86
SC
1099
1100 my @content=($printed_one? " " : "Under",
608bc29d
SC
1101 format_time($times_aref->[$i]),
1102 $values_aref->[$i], $percent, $cumulative_percent);
d5692f86
SC
1103
1104 if ($htm_fh) {
1105 printf $htm_fh ($htm_format, @content);
608bc29d 1106 if (!defined($values_aref->[$i])) {
d5692f86
SC
1107 print $htm_fh "Not defined";
1108 }
1109 }
1110 if ($txt_fh) {
1111 printf $txt_fh ($txt_format, @content);
608bc29d 1112 if (!defined($times_aref->[$i])) {
d5692f86
SC
1113 print $txt_fh "Not defined";
1114 }
1115 }
1116 if ($xls_fh)
1117 {
1118 no integer;
1119 &set_worksheet_line($ws_global, $row, 0, [@content[0,1,2]], $f_default);
1120 &set_worksheet_line($ws_global, $row++, 3, [$content[3]/100,$content[4]/100], $f_percent);
1121
608bc29d 1122 if (!defined($times_aref->[$i])) {
d5692f86
SC
1123 $col=0;
1124 $ws_global->write($row++, $col, "Not defined" );
1125 }
059ec3d9 1126 }
d5692f86 1127
059ec3d9 1128 push(@chartdatanames,
608bc29d
SC
1129 ($printed_one? "" : "Under") . format_time($times_aref->[$i]));
1130 push(@chartdatavals, $$values_aref[$i]);
059ec3d9
PH
1131 $printed_one = 1;
1132 }
1133}
1134
608bc29d
SC
1135if ($overflow && $overflow > 0) {
1136 my $percent = ($overflow * 100)/$queue_total;
059ec3d9 1137 $cumulative_percent += $percent;
d5692f86 1138
608bc29d
SC
1139 my @content = ("Over ", format_time($times_aref->[-1]),
1140 $overflow, $percent, $cumulative_percent);
d5692f86
SC
1141
1142 printf $txt_fh ($txt_format, @content) if $txt_fh;
1143 printf $htm_fh ($htm_format, @content) if $htm_fh;
1144 if ($xls_fh)
1145 {
1146 &set_worksheet_line($ws_global, $row, 0, [@content[0,1,2]], $f_default);
1147 &set_worksheet_line($ws_global, $row++, 3, [$content[3]/100,$content[4]/100], $f_percent);
1148 }
1149
059ec3d9 1150}
d5692f86 1151
608bc29d
SC
1152push(@chartdatanames, "Over " . format_time($times_aref->[-1]));
1153push(@chartdatavals, $overflow);
059ec3d9
PH
1154
1155#printf("Unknown %6d\n", $queue_unknown) if $queue_unknown > 0;
d5692f86 1156if ($htm_fh) {
a83c7e95 1157 print $htm_fh "</table></td><td>";
059ec3d9 1158
608bc29d 1159 if ($HAVE_GD_Graph_pie && $charts && ($#chartdatavals > 0)) {
059ec3d9
PH
1160 my @data = (
1161 \@chartdatanames,
1162 \@chartdatavals
1163 );
1164 my $graph = GD::Graph::pie->new(200, 200);
608bc29d
SC
1165 my $pngname = "$title-$message_type.png";
1166 $pngname =~ s/[^\w\-\.]/_/;
1167
1168 my $graph_title = "$title ($message_type)";
1169 $graph->set(title => $graph_title) if (length($graph_title) < 21);
1170
059ec3d9
PH
1171 my $gd = $graph->plot(\@data) or warn($graph->error);
1172 if ($gd) {
d5692f86 1173 open(IMG, ">$chartdir/$pngname") or die "Could not write $chartdir/$pngname: $!\n";
059ec3d9
PH
1174 binmode IMG;
1175 print IMG $gd->png;
1176 close IMG;
d5692f86 1177 print $htm_fh "<img src=\"$chartrel/$pngname\">";
059ec3d9
PH
1178 }
1179 }
d5692f86
SC
1180 print $htm_fh "</td></tr></table>\n";
1181}
1182
1183if ($xls_fh)
1184{
1185 $row++;
059ec3d9 1186}
d5692f86
SC
1187print $txt_fh "\n" if $txt_fh;
1188print $htm_fh "\n" if $htm_fh;
1189
059ec3d9
PH
1190}
1191
1192
1b4fe9dd
PH
1193#######################################################################
1194# print_histogram();
8e669ac1 1195#
608bc29d 1196# print_histogram('Deliveries|Messages received|$pattern', $unit, @interval_count);
8e669ac1 1197#
1b4fe9dd
PH
1198# Print a histogram of the messages delivered/received per time slot
1199# (hour by default).
1200#######################################################################
059ec3d9 1201sub print_histogram {
608bc29d 1202my($text, $unit, @interval_count) = @_;
059ec3d9
PH
1203my(@chartdatanames);
1204my(@chartdatavals);
1205my($maxd) = 0;
d5692f86 1206
a83c7e95
SC
1207# save first row of print_histogram for xls output
1208if (!$run_hist) {
d5692f86
SC
1209 $row_hist = $row;
1210}
a83c7e95 1211else {
d5692f86
SC
1212 $row = $row_hist;
1213}
1214
059ec3d9
PH
1215for ($i = 0; $i < $hist_number; $i++)
1216 { $maxd = $interval_count[$i] if $interval_count[$i] > $maxd; }
1217
1218my $scale = int(($maxd + 25)/50);
1219$scale = 1 if $scale == 0;
1220
608bc29d
SC
1221if ($scale != 1) {
1222 if ($unit !~ s/y$/ies/) {
1223 $unit .= 's';
1224 }
d5692f86
SC
1225}
1226
1227# make and output title
1228my $title = sprintf("$text per %s",
1229 ($hist_interval == 60)? "hour" :
1230 ($hist_interval == 1)? "minute" : "$hist_interval minutes");
059ec3d9 1231
608bc29d 1232my $txt_htm_title = $title . " (each dot is $scale $unit)";
059ec3d9 1233
d5692f86
SC
1234printf $txt_fh ("%s\n%s\n\n", $txt_htm_title, "-" x length($txt_htm_title)) if $txt_fh;
1235
1236if ($htm_fh) {
1237 print $htm_fh "<hr><a name=\"$text\"></a><h2>$txt_htm_title</h2>\n";
1238 print $htm_fh "<table border=0 width=\"100%\">\n";
1239 print $htm_fh "<tr><td><pre>\n";
059ec3d9 1240}
d5692f86 1241
a83c7e95 1242if ($xls_fh) {
d5692f86 1243 $title =~ s/Messages/Msg/ ;
a83c7e95 1244 $row += 2;
d5692f86 1245 $ws_global->write($row++, $col_hist+1, $title, $f_headertab);
059ec3d9
PH
1246}
1247
d5692f86 1248
059ec3d9
PH
1249my $hour = 0;
1250my $minutes = 0;
a83c7e95 1251for ($i = 0; $i < $hist_number; $i++) {
059ec3d9
PH
1252 my $c = $interval_count[$i];
1253
1254 # If the interval is an hour (the maximum) print the starting and
1255 # ending hours as a label. Otherwise print the starting hour and
1256 # minutes, which take up the same space.
1257
1258 my $temp;
a83c7e95 1259 if ($hist_opt == 1) {
059ec3d9 1260 $temp = sprintf("%02d-%02d", $hour, $hour + 1);
d5692f86
SC
1261
1262 print $txt_fh $temp if $txt_fh;
1263 print $htm_fh $temp if $htm_fh;
1264
a83c7e95
SC
1265 if ($xls_fh) {
1266 if ($run_hist==0) {
1267 # only on first run
1268 $ws_global->write($row, 0, [$temp], $f_default);
d5692f86
SC
1269 }
1270 }
1271
059ec3d9
PH
1272 push(@chartdatanames, $temp);
1273 $hour++;
d5692f86 1274 }
a83c7e95 1275 else {
059ec3d9
PH
1276 if ($minutes == 0)
1277 { $temp = sprintf("%02d:%02d", $hour, $minutes) }
1278 else
1279 { $temp = sprintf(" :%02d", $minutes) }
d5692f86
SC
1280
1281 print $txt_fh $temp if $txt_fh;
1282 print $htm_fh $temp if $htm_fh;
a83c7e95
SC
1283 if (($xls_fh) and ($run_hist==0)) {
1284 # only on first run
d5692f86 1285 $temp = sprintf("%02d:%02d", $hour, $minutes);
a83c7e95 1286 $ws_global->write($row, 0, [$temp], $f_default);
d5692f86
SC
1287 }
1288
059ec3d9
PH
1289 push(@chartdatanames, $temp);
1290 $minutes += $hist_interval;
a83c7e95 1291 if ($minutes >= 60) {
059ec3d9
PH
1292 $minutes = 0;
1293 $hour++;
a83c7e95 1294 }
059ec3d9 1295 }
d5692f86
SC
1296 push(@chartdatavals, $c);
1297
1298 printf $txt_fh (" %6d %s\n", $c, "." x ($c/$scale)) if $txt_fh;
1299 printf $htm_fh (" %6d %s\n", $c, "." x ($c/$scale)) if $htm_fh;
a83c7e95 1300 $ws_global->write($row++, $col_hist+1, [$c], $f_default) if $xls_fh;
d5692f86
SC
1301
1302} #end for
1303
1304printf $txt_fh "\n" if $txt_fh;
1305printf $htm_fh "\n" if $htm_fh;
1306
1307if ($htm_fh)
1308{
1309 print $htm_fh "</pre>\n";
1310 print $htm_fh "</td><td>\n";
608bc29d 1311 if ($HAVE_GD_Graph_linespoints && $charts && ($#chartdatavals > 0)) {
059ec3d9
PH
1312 # calculate the graph
1313 my @data = (
1314 \@chartdatanames,
1315 \@chartdatavals
1316 );
1317 my $graph = GD::Graph::linespoints->new(300, 300);
1318 $graph->set(
1319 x_label => 'Time',
1320 y_label => 'Amount',
1321 title => $text,
1322 x_labels_vertical => 1
1323 );
608bc29d
SC
1324 my $pngname = "histogram_$text.png";
1325 $pngname =~ s/[^\w\._]/_/g;
1326
059ec3d9
PH
1327 my $gd = $graph->plot(\@data) or warn($graph->error);
1328 if ($gd) {
d5692f86 1329 open(IMG, ">$chartdir/$pngname") or die "Could not write $chartdir/$pngname: $!\n";
059ec3d9
PH
1330 binmode IMG;
1331 print IMG $gd->png;
1332 close IMG;
d5692f86 1333 print $htm_fh "<img src=\"$chartrel/$pngname\">";
059ec3d9
PH
1334 }
1335 }
d5692f86 1336 print $htm_fh "</td></tr></table>\n";
059ec3d9 1337}
d5692f86
SC
1338
1339$col_hist++; # where to continue next times
1340
1341$row+=2; # leave some space after history block
1342$run_hist=1; # we have done this once or more
059ec3d9
PH
1343}
1344
1345
1346
1b4fe9dd
PH
1347#######################################################################
1348# print_league_table();
8e669ac1 1349#
a83c7e95 1350# print_league_table($league_table_type,\%message_count,\%address_count,\%message_data,\%message_data_gigs, $spreadsheet, $row_sref);
8e669ac1 1351#
a83c7e95
SC
1352# Given hashes of message count, address count, and message data,
1353# which are keyed by the table type (eg by the sending host), print a
1354# league table showing the top $topcount (defaults to 50).
1b4fe9dd 1355#######################################################################
059ec3d9 1356sub print_league_table {
a83c7e95
SC
1357 my($text,$m_count,$a_count,$m_data,$m_data_gigs,$spreadsheet, $row_sref) = @_;
1358 my($name) = ($topcount == 1)? "$text" : "$topcount ${text}s";
1359 my($title) = "Top $name by message count";
1360 my(@chartdatanames) = ();
1361 my(@chartdatavals) = ();
1362 my $chartotherval = 0;
1363 $text = ucfirst($text);
059ec3d9
PH
1364
1365 # Align non-local addresses to the right (so all the .com's line up).
1366 # Local addresses are aligned on the left as they are userids.
1367 my $align = ($text !~ /local/i) ? 'right' : 'left';
059ec3d9 1368
d5692f86 1369
a83c7e95
SC
1370 ################################################
1371 # Generate the printf formats and table headers.
1372 ################################################
1373 my(@headers) = ('Messages');
30e9ac3f
SC
1374 #push(@headers,'Addresses') if defined $a_count;
1375 push(@headers,'Addresses') if defined $a_count && %$a_count;
a83c7e95 1376 push(@headers,'Bytes','Average') if defined $m_data;
d5692f86 1377
a83c7e95
SC
1378 my $txt_format = "%10s " x @headers . " %s\n";
1379 my $txt_col_headers = sprintf $txt_format, @headers, $text;
1380 my $htm_format = "<tr>" . '<td align="right">%s</td>'x@headers . "<td align=\"$align\" nowrap>%s</td></tr>\n";
1381 my $htm_col_headers = sprintf $htm_format, @headers, $text;
1382 $htm_col_headers =~ s/(<\/?)td/$1th/g; #Convert <td>'s to <th>'s for the header.
d5692f86 1383
a83c7e95
SC
1384
1385 ################################################
1386 # Write the table headers
1387 ################################################
1388 printf $txt_fh ("%s\n%s\n%s", $title, "-" x length($title),$txt_col_headers) if $txt_fh;
d5692f86
SC
1389
1390 if ($htm_fh) {
a83c7e95
SC
1391 print $htm_fh <<EoText;
1392<hr><a name="$text count"></a><h2>$title</h2>
1393<table border=0 width="100%">
1394<tr><td>
1395<table border=1>
1396EoText
30e9ac3f 1397 print $htm_fh $htm_col_headers
059ec3d9 1398 }
d5692f86 1399
a83c7e95
SC
1400 if ($xls_fh) {
1401 $spreadsheet->write(${$row_sref}++, 0, $title, $f_header2);
1402 $spreadsheet->write(${$row_sref}++, 0, [@headers, $text], $f_headertab);
d5692f86 1403 }
a83c7e95
SC
1404
1405
1406 # write content
1407 foreach my $key (top_n_sort($topcount,$m_count,$m_data_gigs,$m_data)) {
1408
1409 # When displaying the average figures, we calculate the average of
1410 # the rounded data, as the user would calculate it. This reduces
1411 # the accuracy slightly, but we have to do it this way otherwise
1412 # when using -merge to convert results from text to HTML and
1413 # vice-versa discrepencies would occur.
1414 my $messages = $$m_count{$key};
1415 my @content = ($messages);
1416 push(@content, $$a_count{$key}) if defined $a_count;
1417 if (defined $m_data) {
1418 my $rounded_volume = volume_rounded($$m_data{$key},$$m_data_gigs{$key});
1419 my($data,$gigs) = (0,0);
1420 un_round($rounded_volume,\$data,\$gigs);
1421 my $rounded_average = volume_rounded($data/$messages,$gigs/$messages);
1422 push(@content, $rounded_volume, $rounded_average);
1423 }
1424
1425 # write content
1426 printf $txt_fh ($txt_format, @content, $key) if $txt_fh;
1427
1428 if ($htm_fh) {
1429 my $htmlkey = $key;
1430 $htmlkey =~ s/>/\&gt\;/g;
1431 $htmlkey =~ s/</\&lt\;/g;
1432 printf $htm_fh ($htm_format, @content, $htmlkey);
1433 }
1434 $spreadsheet->write(${$row_sref}++, 0, [@content, $key], $f_default) if $xls_fh;
1435
1436 if (scalar @chartdatanames < $ntopchart) {
1437 push(@chartdatanames, $key);
1438 push(@chartdatavals, $$m_count{$key});
1439 }
1440 else {
1441 $chartotherval += $$m_count{$key};
1442 }
059ec3d9 1443 }
d5692f86 1444
a83c7e95
SC
1445 push(@chartdatanames, "Other");
1446 push(@chartdatavals, $chartotherval);
059ec3d9 1447
a83c7e95
SC
1448 print $txt_fh "\n" if $txt_fh;
1449 if ($htm_fh) {
1450 print $htm_fh "</table>\n";
1451 print $htm_fh "</td><td>\n";
1452 if ($HAVE_GD_Graph_pie && $charts && ($#chartdatavals > 0))
1453 {
1454 # calculate the graph
1455 my @data = (
1456 \@chartdatanames,
1457 \@chartdatavals
1458 );
1459 my $graph = GD::Graph::pie->new(300, 300);
1460 $graph->set(
1461 x_label => 'Name',
1462 y_label => 'Amount',
1463 title => 'By count',
1464 );
1465 my $gd = $graph->plot(\@data) or warn($graph->error);
1466 if ($gd) {
1467 my $temp = $text;
1468 $temp =~ s/ /_/g;
1469 open(IMG, ">$chartdir/${temp}_count.png") or die "Could not write $chartdir/${temp}_count.png: $!\n";
1470 binmode IMG;
1471 print IMG $gd->png;
1472 close IMG;
1473 print $htm_fh "<img src=\"$chartrel/${temp}_count.png\">";
1474 }
059ec3d9 1475 }
a83c7e95
SC
1476 print $htm_fh "</td><td>\n";
1477 print $htm_fh "</td></tr></table>\n\n";
059ec3d9 1478 }
a83c7e95 1479 ++${$row_sref} if $xls_fh;
d5692f86
SC
1480
1481
a83c7e95
SC
1482 if (defined $m_data) {
1483 # write header
059ec3d9 1484
a83c7e95 1485 $title = "Top $name by volume";
d5692f86 1486
a83c7e95 1487 printf $txt_fh ("%s\n%s\n%s", $title, "-" x length($title),$txt_col_headers) if $txt_fh;
059ec3d9 1488
a83c7e95
SC
1489 if ($htm_fh) {
1490 print $htm_fh <<EoText;
1491<hr><a name="$text volume"></a><h2>$title</h2>
1492<table border=0 width="100%">
1493<tr><td>
1494<table border=1>
1495EoText
30e9ac3f 1496 print $htm_fh $htm_col_headers;
a83c7e95
SC
1497 }
1498 if ($xls_fh) {
1499 $spreadsheet->write(${$row_sref}++, 0, $title, $f_header2);
1500 $spreadsheet->write(${$row_sref}++, 0, [@headers, $text], $f_headertab);
1501 }
1502
1503 @chartdatanames = ();
1504 @chartdatavals = ();
1505 $chartotherval = 0;
1506 my $use_gig = 0;
1507 foreach my $key (top_n_sort($topcount,$m_data_gigs,$m_data,$m_count)) {
1508 # The largest volume will be the first (top of the list).
1509 # If it has at least 1 gig, then just use gigabytes to avoid
1510 # risking an integer overflow when generating the pie charts.
1511 if ($$m_data_gigs{$key}) {
1512 $use_gig = 1;
1513 }
d5692f86 1514
a83c7e95
SC
1515 my $messages = $$m_count{$key};
1516 my @content = ($messages);
1517 push(@content, $$a_count{$key}) if defined $a_count;
1518 my $rounded_volume = volume_rounded($$m_data{$key},$$m_data_gigs{$key});
1519 my($data ,$gigs) = (0,0);
1520 un_round($rounded_volume,\$data,\$gigs);
1521 my $rounded_average = volume_rounded($data/$messages,$gigs/$messages);
1522 push(@content, $rounded_volume, $rounded_average );
1523
1524 # write content
1525 printf $txt_fh ($txt_format, @content, $key) if $txt_fh;
1526 if ($htm_fh) {
1527 my $htmlkey = $key;
1528 $htmlkey =~ s/>/\&gt\;/g;
1529 $htmlkey =~ s/</\&lt\;/g;
1530 printf $htm_fh ($htm_format, @content, $htmlkey);
1531 }
1532 $spreadsheet->write(${$row_sref}++, 0, [@content, $key], $f_default) if $xls_fh;
059ec3d9 1533
d5692f86 1534
a83c7e95
SC
1535 if (scalar @chartdatanames < $ntopchart) {
1536 if ($use_gig) {
1537 if ($$m_data_gigs{$key}) {
1538 push(@chartdatanames, $key);
1539 push(@chartdatavals, $$m_data_gigs{$key});
1540 }
1541 }
1542 else {
1543 push(@chartdatanames, $key);
1544 push(@chartdatavals, $$m_data{$key});
1545 }
1546 }
1547 else {
1548 $chartotherval += ($use_gig) ? $$m_data_gigs{$key} : $$m_data{$key};
8974000d 1549 }
059ec3d9 1550 }
a83c7e95
SC
1551 push(@chartdatanames, "Other");
1552 push(@chartdatavals, $chartotherval);
059ec3d9 1553
a83c7e95
SC
1554 print $txt_fh "\n" if $txt_fh;
1555 if ($htm_fh) {
1556 print $htm_fh "</table>\n";
1557 print $htm_fh "</td><td>\n";
1558 if ($HAVE_GD_Graph_pie && $charts && ($#chartdatavals > 0)) {
1559 # calculate the graph
1560 my @data = (
1561 \@chartdatanames,
1562 \@chartdatavals
1563 );
1564 my $graph = GD::Graph::pie->new(300, 300);
1565 $graph->set(
1566 x_label => 'Name',
1567 y_label => 'Volume' ,
1568 title => 'By Volume',
1569 );
1570 my $gd = $graph->plot(\@data) or warn($graph->error);
1571 if ($gd) {
1572 my $temp = $text;
1573 $temp =~ s/ /_/g;
1574 open(IMG, ">$chartdir/${temp}_volume.png") or die "Could not write $chartdir/${temp}_volume.png: $!\n";
1575 binmode IMG;
1576 print IMG $gd->png;
1577 close IMG;
1578 print $htm_fh "<img src=\"$chartrel/${temp}_volume.png\">";
1579 }
1580 }
1581 print $htm_fh "</td><td>\n";
1582 print $htm_fh "</td></tr></table>\n\n";
059ec3d9 1583 }
a83c7e95
SC
1584
1585 ++${$row_sref} if $xls_fh;
059ec3d9 1586 }
059ec3d9
PH
1587}
1588
1589
1b4fe9dd
PH
1590#######################################################################
1591# top_n_sort();
8e669ac1 1592#
1b4fe9dd 1593# @sorted_keys = top_n_sort($n,$href1,$href2,$href3);
8e669ac1 1594#
1b4fe9dd
PH
1595# Given a hash which has numerical values, return the sorted $n keys which
1596# point to the top values. The second and third hashes are used as
1597# tiebreakers. They all must have the same keys.
8e669ac1 1598#
1b4fe9dd
PH
1599# The idea behind this routine is that when you only want to see the
1600# top n members of a set, rather than sorting the entire set and then
1601# plucking off the top n, sort through the stack as you go, discarding
1602# any member which is lower than your current n'th highest member.
8e669ac1 1603#
1b4fe9dd
PH
1604# This proves to be an order of magnitude faster for large hashes.
1605# On 200,000 lines of mainlog it benchmarked 9 times faster.
1606# On 700,000 lines of mainlog it benchmarked 13.8 times faster.
8e669ac1 1607#
1b4fe9dd
PH
1608# We assume the values are > 0.
1609#######################################################################
059ec3d9
PH
1610sub top_n_sort {
1611 my($n,$href1,$href2,$href3) = @_;
1612
1613 # PH's original sort was:
1614 #
1615 # foreach $key (sort
1616 # {
1617 # $$m_count{$b} <=> $$m_count{$a} ||
1618 # $$m_data_gigs{$b} <=> $$m_data_gigs{$a} ||
1619 # $$m_data{$b} <=> $$m_data{$a} ||
1620 # $a cmp $b
1621 # }
1622 # keys %{$m_count})
1623 #
1624
1625 #We use a key of '_' to represent non-existant values, as null keys are valid.
1626 #'_' is not a valid domain, edomain, host, or email.
1627 my(@top_n_keys) = ('_') x $n;
1628 my($minimum_value1,$minimum_value2,$minimum_value3) = (0,0,0);
1629 my $top_n_key = '';
1630 my $n_minus_1 = $n - 1;
1631 my $n_minus_2 = $n - 2;
1632
a83c7e95
SC
1633 # Create a dummy hash incase the user has not provided us with
1634 # tiebreaker hashes.
1635 my(%dummy_hash);
1636 $href2 = \%dummy_hash unless defined $href2;
1637 $href3 = \%dummy_hash unless defined $href3;
1638
8e669ac1 1639 # Pick out the top $n keys.
059ec3d9
PH
1640 my($key,$value1,$value2,$value3,$i,$comparison,$insert_position);
1641 while (($key,$value1) = each %$href1) {
1642
1643 #print STDERR "key $key ($value1,",$href2->{$key},",",$href3->{$key},") <=> ($minimum_value1,$minimum_value2,$minimum_value3)\n";
8e669ac1 1644
059ec3d9 1645 # Check to see that the new value is bigger than the lowest of the
a83c7e95
SC
1646 # top n keys that we're keeping. We test the main key first, because
1647 # for the majority of cases we can skip creating dummy hash values
1648 # should the user have not provided real tie-breaking hashes.
1649 next unless $value1 >= $minimum_value1;
1650
1651 # Create a dummy hash entry for the key if required.
1652 # Note that setting the dummy_hash value sets it for both href2 &
1653 # href3. Also note that currently we are guarenteed to have a real
1654 # value for href3 if a real value for href2 exists so don't need to
1655 # test for it as well.
1656 $dummy_hash{$key} = 0 unless exists $href2->{$key};
1657
8e669ac1 1658 $comparison = $value1 <=> $minimum_value1 ||
d5692f86
SC
1659 $href2->{$key} <=> $minimum_value2 ||
1660 $href3->{$key} <=> $minimum_value3 ||
1661 $top_n_key cmp $key;
059ec3d9
PH
1662 next unless ($comparison == 1);
1663
1664 # As we will be using these values a few times, extract them into scalars.
1665 $value2 = $href2->{$key};
1666 $value3 = $href3->{$key};
1667
1668 # This key is bigger than the bottom n key, so the lowest position we
1669 # will insert it into is $n minus 1 (the bottom of the list).
1670 $insert_position = $n_minus_1;
1671
1672 # Now go through the list, stopping when we find a key that we're
1673 # bigger than, or we come to the penultimate position - we've
1674 # already tested bigger than the last.
1675 #
1676 # Note: we go top down as the list starts off empty.
1677 # Note: stepping through the list in this way benchmarks nearly
1678 # three times faster than doing a sort() on the reduced list.
1679 # I assume this is because the list is already in order, and
1680 # we get a performance boost from not having to do hash lookups
1681 # on the new key.
1682 for ($i = 0; $i < $n_minus_1; $i++) {
1683 $top_n_key = $top_n_keys[$i];
1684 if ( ($top_n_key eq '_') ||
d5692f86 1685 ( ($value1 <=> $href1->{$top_n_key} ||
059ec3d9 1686 $value2 <=> $href2->{$top_n_key} ||
d5692f86
SC
1687 $value3 <=> $href3->{$top_n_key} ||
1688 $top_n_key cmp $key) == 1
1689 )
1690 ) {
1691 $insert_position = $i;
1692 last;
059ec3d9
PH
1693 }
1694 }
1695
1696 # Remove the last element, then insert the new one.
1697 $#top_n_keys = $n_minus_2;
1698 splice(@top_n_keys,$insert_position,0,$key);
1699
1700 # Extract our new minimum values.
1701 $top_n_key = $top_n_keys[$n_minus_1];
1702 if ($top_n_key ne '_') {
1703 $minimum_value1 = $href1->{$top_n_key};
1704 $minimum_value2 = $href2->{$top_n_key};
1705 $minimum_value3 = $href3->{$top_n_key};
1706 }
1707 }
1708
1709 # Return the top n list, grepping out non-existant values, just in case
1710 # we didn't have that many values.
1711 return(grep(!/^_$/,@top_n_keys));
1712}
1713
1714
a83c7e95 1715
1b4fe9dd
PH
1716#######################################################################
1717# html_header();
8e669ac1 1718#
1b4fe9dd 1719# $header = html_header($title);
8e669ac1 1720#
1b4fe9dd
PH
1721# Print our HTML header and start the <body> block.
1722#######################################################################
059ec3d9
PH
1723sub html_header {
1724 my($title) = @_;
1725 my $text = << "EoText";
1726<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
1727<html>
1728<head>
1729<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-15">
1730<title>$title</title>
1731</head>
1732<body bgcolor="white">
1733<h1>$title</h1>
1734EoText
1735 return $text;
1736}
1737
1738
1739
1b4fe9dd
PH
1740#######################################################################
1741# help();
8e669ac1 1742#
1b4fe9dd 1743# help();
8e669ac1 1744#
1b4fe9dd
PH
1745# Display usage instructions and exit.
1746#######################################################################
059ec3d9
PH
1747sub help {
1748 print << "EoText";
1749
1750eximstats Version $VERSION
1751
d5692f86
SC
1752Usage:
1753 eximstats [Output] [Options] mainlog1 mainlog2 ...
1754 eximstats -merge -html [Options] report.1.html ... > weekly_rep.html
1755
1756Examples:
1757 eximstats -html=eximstats.html mainlog1 mainlog2 ...
1758 eximstats mainlog1 mainlog2 ... > report.txt
059ec3d9 1759
8974000d 1760Parses exim mainlog or syslog files and generates a statistical analysis
d5692f86
SC
1761of the messages processed.
1762
1763Valid output types are:
1764-txt[=<file>] plain text (default unless no other type is specified)
1765-html[=<file>] HTML
1766-xls[=<file>] Excel
1767With no type and file given, defaults to -txt and STDOUT.
059ec3d9 1768
d5692f86 1769Valid options are:
059ec3d9
PH
1770-h<number> histogram divisions per hour. The default is 1, and
1771 0 suppresses histograms. Other valid values are:
d5692f86 1772 2, 3, 5, 10, 15, 20, 30 or 60.
059ec3d9
PH
1773-ne don't display error information
1774-nr don't display relaying information
1775-nr/pattern/ don't display relaying information that matches
1776-nt don't display transport information
1777-nt/pattern/ don't display transport information that matches
d5692f86 1778-nvr don't do volume rounding. Display in bytes, not KB/MB/GB.
059ec3d9
PH
1779-t<number> display top <number> sources/destinations
1780 default is 50, 0 suppresses top listing
1781-tnl omit local sources/destinations in top listing
1782-t_remote_users show top user sources/destinations from non-local domains
608bc29d
SC
1783-q<list> list of times for queuing information. -q0 suppresses.
1784-show_rt<list> Show the receipt times for all the messages.
1785-show_dt<list> Show the delivery times for all the messages.
1786 <list> is an optional list of times in seconds.
1787 Eg -show_rt1,2,4,8.
1788
d5692f86
SC
1789-include_original_destination show both the final and original
1790 destinations in the results rather than just the final ones.
059ec3d9 1791
d5692f86 1792-byhost show results by sending host (default unless bydomain or
059ec3d9 1793 byemail is specified)
d5692f86
SC
1794-bydomain show results by sending domain.
1795-byemail show results by sender's email address
1796-byedomain show results by sender's email domain
37f8a7c9 1797-bylocaldomain show results by local domain
059ec3d9
PH
1798
1799-pattern "Description" /pattern/
1800 Count lines matching specified patterns and show them in
d5692f86
SC
1801 the results. It can be specified multiple times. Eg:
1802 -pattern 'Refused connections' '/refused connection/'
059ec3d9
PH
1803
1804-merge merge previously generated reports into a new report
1805
d5692f86
SC
1806-charts Create charts (this requires the GD::Graph modules).
1807 Only valid with -html.
059ec3d9
PH
1808-chartdir <dir> Create the charts' png files in the directory <dir>
1809-chartrel <dir> Specify the relative directory for the "img src=" tags
1810 from where to include the charts in the html file
d5692f86 1811 -chartdir and -chartrel default to '.'
059ec3d9 1812
a83c7e95
SC
1813-emptyok It is OK if there is no valid input, don't print an error.
1814
d5692f86 1815-d Debug mode - dump the eval'ed parser onto STDERR.
059ec3d9
PH
1816
1817EoText
1818
1819 exit 1;
1820}
1821
1822
1823
1b4fe9dd
PH
1824#######################################################################
1825# generate_parser();
8e669ac1 1826#
1b4fe9dd 1827# $parser = generate_parser();
8e669ac1 1828#
1b4fe9dd
PH
1829# This subroutine generates the parsing routine which will be
1830# used to parse the mainlog. We take the base operation, and remove bits not in use.
1831# This improves performance depending on what bits you take out or add.
8e669ac1 1832#
1b4fe9dd 1833# I've tested using study(), but this does not improve performance.
8e669ac1 1834#
1b4fe9dd
PH
1835# We store our parsing routing in a variable, and process it looking for #IFDEF (Expression)
1836# or #IFNDEF (Expression) statements and corresponding #ENDIF (Expression) statements. If
1837# the expression evaluates to true, then it is included/excluded accordingly.
1838#######################################################################
059ec3d9
PH
1839sub generate_parser {
1840 my $parser = '
1841 my($ip,$host,$email,$edomain,$domain,$thissize,$size,$old,$new);
a83c7e95 1842 my($tod,$m_hour,$m_min,$id,$flag,$extra,$length);
37f8a7c9 1843 my($seconds,$queued,$rcpt_time,$local_domain);
a83c7e95 1844 my $rej_id = 0;
059ec3d9 1845 while (<$fh>) {
059ec3d9 1846
8974000d
SC
1847 # Convert syslog lines to mainlog format.
1848 if (! /^\\d{4}/) {
1849 next unless s/^.*? exim\\b.*?: //;
1850 }
1851
a83c7e95
SC
1852 $length = length($_);
1853 next if ($length < 38);
f3f065bb 1854 next unless /^(\\d{4}\\-\\d\\d-\\d\\d\\s(\\d\\d):(\\d\\d):\\d\\d( [-+]\\d\\d\\d\\d)?)( \\[\\d+\\])?/o;
059ec3d9
PH
1855
1856 ($tod,$m_hour,$m_min) = ($1,$2,$3);
1857
a83c7e95
SC
1858 # PH - watch for GMT offsets in the timestamp.
1859 if (defined($4)) {
1860 $extra = 6;
1861 next if ($length < 44);
1862 }
1863 else {
1864 $extra = 0;
1865 }
1866
f3f065bb
PH
1867 # PH - watch for PID added after the timestamp.
1868 if (defined($5)) {
1869 $extra += length($5);
1870 next if ($length < 38 + $extra);
1871 }
1872
059ec3d9
PH
1873 $id = substr($_, 20 + $extra, 16);
1874 $flag = substr($_, 37 + $extra, 2);
a83c7e95
SC
1875
1876 if ($flag !~ /^([<>=*-]+|SA)$/ && /rejected|refused|dropped/) {
1877 $flag = "Re";
1878 $extra -= 3;
1879 }
1880
1881 # Rejects can have no MSGID...
1882 if ($flag eq "Re" && $id !~ /^[-0-9a-zA-Z]+$/) {
1883 $id = "reject:" . ++$rej_id;
1884 $extra -= 17;
1885 }
059ec3d9
PH
1886';
1887
1888 # Watch for user specified patterns.
1889 my $user_pattern_index = 0;
1890 foreach (@user_patterns) {
1891 $user_pattern_totals[$user_pattern_index] = 0;
f2b67a5b
SC
1892 $parser .= " if ($_) {\n";
1893 $parser .= " \$user_pattern_totals[$user_pattern_index]++;\n";
1894 $parser .= " \$user_pattern_interval_count[$user_pattern_index][(\$m_hour*60 + \$m_min)/$hist_interval]++;\n" if ($hist_opt > 0);
1895 $parser .= " }\n";
059ec3d9
PH
1896 $user_pattern_index++;
1897 }
1898
1899 $parser .= '
a83c7e95 1900 next unless ($flag =~ /<=|=>|->|==|\\*\\*|Co|SA|Re/);
059ec3d9 1901
a83c7e95
SC
1902 #Strip away the timestamp, ID and flag to speed up later pattern matches.
1903 #The flags include Co (Completed), Re (Rejected), and SA (SpamAssassin).
059ec3d9
PH
1904 $_ = substr($_, 40 + $extra); # PH
1905
a83c7e95 1906 # Alias @message to the array of information about the message.
608bc29d
SC
1907 # This minimises the number of calls to hash functions.
1908 $messages{$id} = [] unless exists $messages{$id};
a83c7e95 1909 *message = $messages{$id};
608bc29d
SC
1910
1911
059ec3d9
PH
1912 # JN - Skip over certain transports as specified via the "-nt/.../" command
1913 # line switch (where ... is a perl style regular expression). This is
1914 # required so that transports that skew stats such as SpamAssassin can be
1915 # ignored.
1916 #IFDEF ($transport_pattern)
1917 if (/\\sT=(\\S+)/) {
1918 next if ($1 =~ /$transport_pattern/o) ;
1919 }
1920 #ENDIF ($transport_pattern)
1921
1922
059ec3d9
PH
1923
1924 # Do some pattern matches to get the host and IP address.
1925 # We expect lines to be of the form "H=[IpAddr]" or "H=Host [IpAddr]" or
1926 # "H=Host (UnverifiedHost) [IpAddr]" or "H=(UnverifiedHost) [IpAddr]".
1927 # We do 2 separate matches to keep the matches simple and fast.
a83c7e95 1928 # Host is local unless otherwise specified.
514ee161
SC
1929 # Watch out for "H=([IpAddr])" in case they send "[IpAddr]" as their HELO!
1930 $ip = (/\\bH=(?:|.*? )(\\[[^]]+\\])/) ? $1
d5c13d66
NM
1931 # 2008-03-31 06:25:22 Connection from [213.246.33.217]:39456 refused: too many connections from that IP address // .hs
1932 : (/Connection from (\[\S+\])/) ? $1
1933 # 2008-03-31 06:52:40 SMTP call from mail.cacoshrf.com (ccsd02.ccsd.local) [69.24.118.229]:4511 dropped: too many nonmail commands (last was "RSET") // .hs
1934 : (/SMTP call from .*?(\[\S+\])/) ? $1
1935 : "local";
a83c7e95 1936 $host = (/\\bH=(\\S+)/) ? $1 : "local";
059ec3d9 1937
a83c7e95 1938 $domain = "localdomain"; #Domain is localdomain unless otherwise specified.
059ec3d9 1939
a83c7e95 1940 #IFDEF ($do_sender{Domain})
37f8a7c9
SC
1941 if ($host =~ /^\\[/ || $host =~ /^[\\d\\.]+$/) {
1942 # Host is just an IP address.
1943 $domain = $host;
1944 }
1945 elsif ($host =~ /^(\\(?)[^\\.]+\\.([^\\.]+\\..*)/) {
a83c7e95
SC
1946 # Remove the host portion from the DNS name. We ensure that we end up
1947 # with at least xxx.yyy. $host can be "(x.y.z)" or "x.y.z".
1948 $domain = lc("$1.$2");
1949 $domain =~ s/^\\.//; #Remove preceding dot.
059ec3d9 1950 }
a83c7e95 1951 #ENDIF ($do_sender{Domain})
059ec3d9
PH
1952
1953 #IFDEF ($do_sender{Email})
d5692f86
SC
1954 #IFDEF ($include_original_destination)
1955 # Catch both "a@b.com <c@d.com>" and "e@f.com"
1956 #$email = (/^(\S+) (<(\S*?)>)?/) ? $3 || $1 : "";
1957 $email = (/^(\S+ (<[^@>]+@?[^>]*>)?)/) ? $1 : "";
1958 chomp($email);
1959 #ENDIF ($include_original_destination)
1960
1961 #IFNDEF ($include_original_destination)
1962 $email = (/^(\S+)/) ? $1 : "";
1963 #ENDIF ($include_original_destination)
059ec3d9
PH
1964 #ENDIF ($do_sender{Email})
1965
1966 #IFDEF ($do_sender{Edomain})
5e6e6734
SC
1967 if (/^(<>|blackhole)/) {
1968 $edomain = $1;
1969 }
d5692f86 1970 #IFDEF ($include_original_destination)
5e6e6734
SC
1971 elsif (/^(\S+ (<\S*?\\@(\S+?)>)?)/) {
1972 $edomain = $1;
1973 chomp($edomain);
1974 $edomain =~ s/@(\S+?)>/"@" . lc($1) . ">"/e;
1975 }
d5692f86 1976 #ENDIF ($include_original_destination)
d5692f86 1977 #IFNDEF ($include_original_destination)
5e6e6734
SC
1978 elsif (/^\S*?\\@(\S+)/) {
1979 $edomain = lc($1);
1980 }
d5692f86 1981 #ENDIF ($include_original_destination)
5e6e6734
SC
1982 else {
1983 $edomain = "";
1984 }
1985
059ec3d9
PH
1986 #ENDIF ($do_sender{Edomain})
1987
1988 if ($tod lt $begin) {
1989 $begin = $tod;
1990 }
1991 elsif ($tod gt $end) {
1992 $end = $tod;
1993 }
1994
1995
1996 if ($flag eq "<=") {
1997 $thissize = (/\\sS=(\\d+)( |$)/) ? $1 : 0;
a83c7e95
SC
1998 $message[$SIZE] = $thissize;
1999 $message[$PROTOCOL] = (/ P=(\S+)/) ? $1 : undef;
059ec3d9
PH
2000
2001 #IFDEF ($show_relay)
2002 if ($host ne "local") {
d5692f86
SC
2003 # Save incoming information in case it becomes interesting
2004 # later, when delivery lines are read.
2005 my($from) = /^(\\S+)/;
a83c7e95
SC
2006 $message[$FROM_HOST] = "$host$ip";
2007 $message[$FROM_ADDRESS] = $from;
059ec3d9
PH
2008 }
2009 #ENDIF ($show_relay)
2010
2011 #IFDEF ($local_league_table || $include_remote_users)
d5692f86
SC
2012 if (/\sU=(\\S+)/) {
2013 my $user = $1;
059ec3d9 2014
d5692f86
SC
2015 #IFDEF ($local_league_table && $include_remote_users)
2016 { #Store both local and remote users.
2017 #ENDIF ($local_league_table && $include_remote_users)
059ec3d9 2018
d5692f86
SC
2019 #IFDEF ($local_league_table && ! $include_remote_users)
2020 if ($host eq "local") { #Store local users only.
2021 #ENDIF ($local_league_table && ! $include_remote_users)
059ec3d9 2022
d5692f86
SC
2023 #IFDEF ($include_remote_users && ! $local_league_table)
2024 if ($host ne "local") { #Store remote users only.
2025 #ENDIF ($include_remote_users && ! $local_league_table)
059ec3d9 2026
608bc29d 2027 ++$received_count_user{$user};
d5692f86 2028 add_volume(\\$received_data_user{$user},\\$received_data_gigs_user{$user},$thissize);
059ec3d9 2029 }
d5692f86 2030 }
059ec3d9
PH
2031 #ENDIF ($local_league_table || $include_remote_users)
2032
2033 #IFDEF ($do_sender{Host})
608bc29d 2034 ++$received_count{Host}{$host};
d5692f86 2035 add_volume(\\$received_data{Host}{$host},\\$received_data_gigs{Host}{$host},$thissize);
059ec3d9
PH
2036 #ENDIF ($do_sender{Host})
2037
2038 #IFDEF ($do_sender{Domain})
2039 if ($domain) {
608bc29d 2040 ++$received_count{Domain}{$domain};
d5692f86
SC
2041 add_volume(\\$received_data{Domain}{$domain},\\$received_data_gigs{Domain}{$domain},$thissize);
2042 }
059ec3d9
PH
2043 #ENDIF ($do_sender{Domain})
2044
2045 #IFDEF ($do_sender{Email})
608bc29d 2046 ++$received_count{Email}{$email};
d5692f86 2047 add_volume(\\$received_data{Email}{$email},\\$received_data_gigs{Email}{$email},$thissize);
059ec3d9
PH
2048 #ENDIF ($do_sender{Email})
2049
2050 #IFDEF ($do_sender{Edomain})
608bc29d 2051 ++$received_count{Edomain}{$edomain};
d5692f86 2052 add_volume(\\$received_data{Edomain}{$edomain},\\$received_data_gigs{Edomain}{$edomain},$thissize);
059ec3d9
PH
2053 #ENDIF ($do_sender{Edomain})
2054
608bc29d 2055 ++$total_received_count;
059ec3d9
PH
2056 add_volume(\\$total_received_data,\\$total_received_data_gigs,$thissize);
2057
608bc29d 2058 #IFDEF ($#queue_times >= 0 || $#rcpt_times >= 0)
a83c7e95 2059 $message[$ARRIVAL_TIME] = $tod;
608bc29d 2060 #ENDIF ($#queue_times >= 0 || $#rcpt_times >= 0)
059ec3d9
PH
2061
2062 #IFDEF ($hist_opt > 0)
d5692f86 2063 $received_interval_count[($m_hour*60 + $m_min)/$hist_interval]++;
059ec3d9
PH
2064 #ENDIF ($hist_opt > 0)
2065 }
2066
2067 elsif ($flag eq "=>") {
a83c7e95 2068 $size = $message[$SIZE] || 0;
059ec3d9 2069 if ($host ne "local") {
a83c7e95 2070 $message[$REMOTE_DELIVERED] = 1;
059ec3d9
PH
2071
2072
2073 #IFDEF ($show_relay)
2074 # Determine relaying address if either only one address listed,
2075 # or two the same. If they are different, it implies a forwarding
2076 # or aliasing, which is not relaying. Note that for multi-aliased
2077 # addresses, there may be a further address between the first
2078 # and last.
2079
a83c7e95 2080 if (defined $message[$FROM_HOST]) {
059ec3d9
PH
2081 if (/^(\\S+)(?:\\s+\\([^)]\\))?\\s+<([^>]+)>/) {
2082 ($old,$new) = ($1,$2);
d5692f86 2083 }
059ec3d9 2084 else {
d5692f86
SC
2085 $old = $new = "";
2086 }
059ec3d9
PH
2087
2088 if ("\\L$new" eq "\\L$old") {
2089 ($old) = /^(\\S+)/ if $old eq "";
a83c7e95 2090 my $key = "H=\\L$message[$FROM_HOST]\\E A=\\L$message[$FROM_ADDRESS]\\E => " .
059ec3d9
PH
2091 "H=\\L$host\\E$ip A=\\L$old\\E";
2092 if (!defined $relay_pattern || $key !~ /$relay_pattern/o) {
2093 $relayed{$key} = 0 if !defined $relayed{$key};
608bc29d 2094 ++$relayed{$key};
d5692f86 2095 }
059ec3d9 2096 else {
608bc29d 2097 ++$relayed_unshown;
059ec3d9
PH
2098 }
2099 }
2100 }
2101 #ENDIF ($show_relay)
2102
2103 }
2104
2105 #IFDEF ($local_league_table || $include_remote_users)
d5692f86
SC
2106 #IFDEF ($local_league_table && $include_remote_users)
2107 { #Store both local and remote users.
2108 #ENDIF ($local_league_table && $include_remote_users)
2109
2110 #IFDEF ($local_league_table && ! $include_remote_users)
2111 if ($host eq "local") { #Store local users only.
2112 #ENDIF ($local_league_table && ! $include_remote_users)
2113
2114 #IFDEF ($include_remote_users && ! $local_league_table)
2115 if ($host ne "local") { #Store remote users only.
2116 #ENDIF ($include_remote_users && ! $local_league_table)
2117
2118 if (my($user) = split((/\\s</)? " <" : " ", $_)) {
2119 #IFDEF ($include_original_destination)
2120 {
2121 #ENDIF ($include_original_destination)
2122 #IFNDEF ($include_original_destination)
2123 if ($user =~ /^[\\/|]/) {
2124 #ENDIF ($include_original_destination)
a797be69
SC
2125 #my($parent) = $_ =~ /(<[^@]+@?[^>]*>)/;
2126 my($parent) = $_ =~ / (<.+?>) /; #DT 1.54
37f8a7c9
SC
2127 if (defined $parent) {
2128 $user = "$user $parent";
2129 #IFDEF ($do_local_domain)
2130 if ($parent =~ /\\@(.+)>/) {
2131 $local_domain = lc($1);
2132 ++$delivered_messages_local_domain{$local_domain};
2133 ++$delivered_addresses_local_domain{$local_domain};
2134 add_volume(\\$delivered_data_local_domain{$local_domain},\\$delivered_data_gigs_local_domain{$local_domain},$size);
2135 }
2136 #ENDIF ($do_local_domain)
2137 }
d5692f86 2138 }
a83c7e95
SC
2139 ++$delivered_messages_user{$user};
2140 ++$delivered_addresses_user{$user};
d5692f86
SC
2141 add_volume(\\$delivered_data_user{$user},\\$delivered_data_gigs_user{$user},$size);
2142 }
2143 }
059ec3d9
PH
2144 #ENDIF ($local_league_table || $include_remote_users)
2145
2146 #IFDEF ($do_sender{Host})
a83c7e95
SC
2147 $delivered_messages{Host}{$host}++;
2148 $delivered_addresses{Host}{$host}++;
d5692f86 2149 add_volume(\\$delivered_data{Host}{$host},\\$delivered_data_gigs{Host}{$host},$size);
059ec3d9
PH
2150 #ENDIF ($do_sender{Host})
2151 #IFDEF ($do_sender{Domain})
2152 if ($domain) {
a83c7e95
SC
2153 ++$delivered_messages{Domain}{$domain};
2154 ++$delivered_addresses{Domain}{$domain};
d5692f86
SC
2155 add_volume(\\$delivered_data{Domain}{$domain},\\$delivered_data_gigs{Domain}{$domain},$size);
2156 }
059ec3d9
PH
2157 #ENDIF ($do_sender{Domain})
2158 #IFDEF ($do_sender{Email})
a83c7e95
SC
2159 ++$delivered_messages{Email}{$email};
2160 ++$delivered_addresses{Email}{$email};
d5692f86 2161 add_volume(\\$delivered_data{Email}{$email},\\$delivered_data_gigs{Email}{$email},$size);
059ec3d9
PH
2162 #ENDIF ($do_sender{Email})
2163 #IFDEF ($do_sender{Edomain})
a83c7e95
SC
2164 ++$delivered_messages{Edomain}{$edomain};
2165 ++$delivered_addresses{Edomain}{$edomain};
d5692f86 2166 add_volume(\\$delivered_data{Edomain}{$edomain},\\$delivered_data_gigs{Edomain}{$edomain},$size);
059ec3d9
PH
2167 #ENDIF ($do_sender{Edomain})
2168
a83c7e95
SC
2169 ++$total_delivered_messages;
2170 ++$total_delivered_addresses;
059ec3d9
PH
2171 add_volume(\\$total_delivered_data,\\$total_delivered_data_gigs,$size);
2172
2173 #IFDEF ($show_transport)
2174 my $transport = (/\\sT=(\\S+)/) ? $1 : ":blackhole:";
608bc29d 2175 ++$transported_count{$transport};
059ec3d9
PH
2176 add_volume(\\$transported_data{$transport},\\$transported_data_gigs{$transport},$size);
2177 #ENDIF ($show_transport)
2178
2179 #IFDEF ($hist_opt > 0)
2180 $delivered_interval_count[($m_hour*60 + $m_min)/$hist_interval]++;
2181 #ENDIF ($hist_opt > 0)
2182
608bc29d
SC
2183 #IFDEF ($#delivery_times > 0)
2184 if (/ DT=(\S+)/) {
2185 $seconds = wdhms_seconds($1);
2186 for ($i = 0; $i <= $#delivery_times; $i++) {
2187 if ($seconds < $delivery_times[$i]) {
2188 ++$dt_all_bin[$i];
a83c7e95 2189 ++$dt_remote_bin[$i] if $message[$REMOTE_DELIVERED];
608bc29d
SC
2190 last;
2191 }
2192 }
2193 if ($i > $#delivery_times) {
2194 ++$dt_all_overflow;
a83c7e95 2195 ++$dt_remote_overflow if $message[$REMOTE_DELIVERED];
608bc29d
SC
2196 }
2197 }
2198 #ENDIF ($#delivery_times > 0)
2199
059ec3d9
PH
2200 }
2201
a83c7e95
SC
2202 elsif ($flag eq "->") {
2203
2204 #IFDEF ($local_league_table || $include_remote_users)
2205 #IFDEF ($local_league_table && $include_remote_users)
2206 { #Store both local and remote users.
2207 #ENDIF ($local_league_table && $include_remote_users)
2208
2209 #IFDEF ($local_league_table && ! $include_remote_users)
2210 if ($host eq "local") { #Store local users only.
2211 #ENDIF ($local_league_table && ! $include_remote_users)
2212
2213 #IFDEF ($include_remote_users && ! $local_league_table)
2214 if ($host ne "local") { #Store remote users only.
2215 #ENDIF ($include_remote_users && ! $local_league_table)
2216
2217 if (my($user) = split((/\\s</)? " <" : " ", $_)) {
2218 #IFDEF ($include_original_destination)
2219 {
2220 #ENDIF ($include_original_destination)
2221 #IFNDEF ($include_original_destination)
2222 if ($user =~ /^[\\/|]/) {
2223 #ENDIF ($include_original_destination)
a797be69
SC
2224 #my($parent) = $_ =~ /(<[^@]+@?[^>]*>)/;
2225 my($parent) = $_ =~ / (<.+?>) /; #DT 1.54
a83c7e95
SC
2226 $user = "$user $parent" if defined $parent;
2227 }
2228 ++$delivered_addresses_user{$user};
2229 }
2230 }
2231 #ENDIF ($local_league_table || $include_remote_users)
2232
2233 #IFDEF ($do_sender{Host})
2234 $delivered_addresses{Host}{$host}++;
2235 #ENDIF ($do_sender{Host})
2236 #IFDEF ($do_sender{Domain})
2237 if ($domain) {
2238 ++$delivered_addresses{Domain}{$domain};
2239 }
2240 #ENDIF ($do_sender{Domain})
2241 #IFDEF ($do_sender{Email})
2242 ++$delivered_addresses{Email}{$email};
2243 #ENDIF ($do_sender{Email})
2244 #IFDEF ($do_sender{Edomain})
2245 ++$delivered_addresses{Edomain}{$edomain};
2246 #ENDIF ($do_sender{Edomain})
2247
2248 ++$total_delivered_addresses;
2249 }
2250
2251 elsif ($flag eq "==" && defined($message[$SIZE]) && !defined($message[$DELAYED])) {
608bc29d 2252 ++$delayed_count;
a83c7e95 2253 $message[$DELAYED] = 1;
059ec3d9
PH
2254 }
2255
2256 elsif ($flag eq "**") {
a83c7e95
SC
2257 if (defined ($message[$SIZE])) {
2258 unless (defined $message[$HAD_ERROR]) {
608bc29d 2259 ++$message_errors;
a83c7e95 2260 $message[$HAD_ERROR] = 1;
608bc29d
SC
2261 }
2262 }
059ec3d9
PH
2263
2264 #IFDEF ($show_errors)
608bc29d 2265 ++$errors_count{$_};
059ec3d9
PH
2266 #ENDIF ($show_errors)
2267
2268 }
2269
2270 elsif ($flag eq "Co") {
2271 #Completed?
2272 #IFDEF ($#queue_times >= 0)
a83c7e95 2273 $queued = queue_time($tod, $message[$ARRIVAL_TIME], $id);
059ec3d9
PH
2274
2275 for ($i = 0; $i <= $#queue_times; $i++) {
2276 if ($queued < $queue_times[$i]) {
608bc29d 2277 ++$qt_all_bin[$i];
a83c7e95 2278 ++$qt_remote_bin[$i] if $message[$REMOTE_DELIVERED];
059ec3d9 2279 last;
d5692f86
SC
2280 }
2281 }
608bc29d
SC
2282 if ($i > $#queue_times) {
2283 ++$qt_all_overflow;
a83c7e95 2284 ++$qt_remote_overflow if $message[$REMOTE_DELIVERED];
608bc29d 2285 }
059ec3d9
PH
2286 #ENDIF ($#queue_times >= 0)
2287
608bc29d
SC
2288 #IFDEF ($#rcpt_times >= 0)
2289 if (/ QT=(\S+)/) {
2290 $seconds = wdhms_seconds($1);
2291 #Calculate $queued if not previously calculated above.
2292 #IFNDEF ($#queue_times >= 0)
a83c7e95 2293 $queued = queue_time($tod, $message[$ARRIVAL_TIME], $id);
608bc29d
SC
2294 #ENDIF ($#queue_times >= 0)
2295 $rcpt_time = $seconds - $queued;
2296 my($protocol);
2297
a83c7e95
SC
2298 if (defined $message[$PROTOCOL]) {
2299 $protocol = $message[$PROTOCOL];
608bc29d
SC
2300
2301 # Create the bin if its not already defined.
2302 unless (exists $rcpt_times_bin{$protocol}) {
2303 initialise_rcpt_times($protocol);
2304 }
2305 }
2306
2307
2308 for ($i = 0; $i <= $#rcpt_times; ++$i) {
2309 if ($rcpt_time < $rcpt_times[$i]) {
2310 ++$rcpt_times_bin{all}[$i];
2311 ++$rcpt_times_bin{$protocol}[$i] if defined $protocol;
2312 last;
2313 }
2314 }
059ec3d9 2315
608bc29d
SC
2316 if ($i > $#rcpt_times) {
2317 ++$rcpt_times_overflow{all};
2318 ++$rcpt_times_overflow{$protocol} if defined $protocol;
2319 }
2320 }
2321 #ENDIF ($#rcpt_times >= 0)
2322
2323 delete($messages{$id});
059ec3d9 2324 }
a83c7e95
SC
2325 elsif ($flag eq "SA") {
2326 $ip = (/From.*?(\\[[^]]+\\])/ || /\\((local)\\)/) ? $1 : "";
2327 #SpamAssassin message
2328 if (/Action: ((permanently|temporarily) rejected message|flagged as Spam but accepted): score=(\d+\.\d)/) {
2329 #add_volume(\\$spam_score,\\$spam_score_gigs,$3);
2330 ++$spam_count_by_ip{$ip};
2331 } elsif (/Action: scanned but message isn\'t spam: score=(-?\d+\.\d)/) {
2332 #add_volume(\\$ham_score,\\$ham_score_gigs,$1);
2333 ++$ham_count_by_ip{$ip};
2334 } elsif (/(Not running SA because SAEximRunCond expanded to false|check skipped due to message size)/) {
2335 ++$ham_count_by_ip{$ip};
2336 }
2337 }
2338
2339 # Look for Reject messages or blackholed messages (deliveries
2340 # without a transport)
2341 if ($flag eq "Re" || ($flag eq "=>" && ! /\\sT=\\S+/)) {
2342 # Correct the IP address for rejects:
2343 # rejected EHLO from my.test.net [10.0.0.5]: syntactically invalid argument(s):
e2f7a0d2
SC
2344 # rejected EHLO from [10.0.0.6]: syntactically invalid argument(s):
2345 $ip = $1 if ($ip eq "local" && /^rejected [HE][HE]LO from .*?(\[.+?\]):/);
e2f7a0d2
SC
2346 if (/SpamAssassin/) {
2347 ++$rejected_count_by_reason{"Rejected by SpamAssassin"};
30e9ac3f
SC
2348 ++$rejected_count_by_ip{$ip};
2349 }
2350 elsif (
2351 /(temporarily rejected [A-Z]*) .*?(: .*?)(:|\s*$)/
2352 ) {
2353 ++$temporarily_rejected_count_by_reason{"\u$1$2"};
2354 ++$temporarily_rejected_count_by_ip{$ip};
2355 }
2356 elsif (
2357 /(temporarily refused connection)/
2358 ) {
2359 ++$temporarily_rejected_count_by_reason{"\u$1"};
2360 ++$temporarily_rejected_count_by_ip{$ip};
e2f7a0d2
SC
2361 }
2362 elsif (
a83c7e95
SC
2363 /(listed at [^ ]+)/ ||
2364 /(Forged IP detected in HELO)/ ||
2365 /(Invalid domain or IP given in HELO\/EHLO)/ ||
2366 /(unqualified recipient rejected)/ ||
2367 /(closed connection (after|in response) .*?)\s*$/ ||
2368 /(sender rejected)/ ||
2369 # 2005-09-23 15:07:49 1EInHJ-0007Ex-Au H=(a.b.c) [10.0.0.1] F=<> rejected after DATA: This message contains a virus: (Eicar-Test-Signature) please scan your system.
2370 # 2005-10-06 10:50:07 1ENRS3-0000Nr-Kt => blackhole (DATA ACL discarded recipients): This message contains a virus: (Worm.SomeFool.P) please scan your system.
2371 / rejected after DATA: (.*)/ ||
b322aac8 2372 / (rejected DATA: .*)/ ||
a83c7e95
SC
2373 /.DATA ACL discarded recipients.: (.*)/ ||
2374 /rejected after DATA: (unqualified address not permitted)/ ||
2375 /(VRFY rejected)/ ||
2376# /(sender verify (defer|fail))/i ||
2377 /(too many recipients)/ ||
2378 /(refused relay.*?) to/ ||
2379 /(rejected by non-SMTP ACL: .*)/ ||
2380 /(rejected by local_scan.*)/ ||
2381 # SMTP call from %s dropped: too many syntax or protocol errors (last command was "%s"
2382 # SMTP call from %s dropped: too many nonmail commands
2383 /(dropped: too many ((nonmail|unrecognized) commands|syntax or protocol errors))/ ||
2384
2385 # local_scan() function crashed with signal %d - message temporarily rejected
2386 # local_scan() function timed out - message temporarily rejected
2387 /(local_scan.. function .* - message temporarily rejected)/ ||
a83c7e95
SC
2388 # SMTP protocol synchronization error (input sent without waiting for greeting): rejected connection from %s
2389 /(SMTP protocol .*?(error|violation))/ ||
2390 /(message too big)/
2391 ) {
2392 ++$rejected_count_by_reason{"\u$1"};
30e9ac3f 2393 ++$rejected_count_by_ip{$ip};
a83c7e95
SC
2394 }
2395 elsif (/rejected [HE][HE]LO from [^:]*: syntactically invalid argument/) {
2396 ++$rejected_count_by_reason{"Rejected HELO/EHLO: syntactically invalid argument"};
30e9ac3f 2397 ++$rejected_count_by_ip{$ip};
a83c7e95
SC
2398 }
2399 elsif (/response to "RCPT TO.*? was: (.*)/) {
2400 ++$rejected_count_by_reason{"Response to RCPT TO was: $1"};
30e9ac3f 2401 ++$rejected_count_by_ip{$ip};
a83c7e95
SC
2402 }
2403 elsif (
2404 /(lookup of host )\S+ (failed)/ ||
2405
2406 # rejected from <%s>%s%s%s%s: message too big:
2407 /(rejected [A-Z]*) .*?(: .*?)(:|\s*$)/ ||
2408 # refused connection from %s (host_reject_connection)
2409 # refused connection from %s (tcp wrappers)
2410 /(refused connection )from.*? (\(.*)/ ||
2411
2412 # error from remote mailer after RCPT TO:<a@b.c>: host a.b.c [10.0.0.1]: 450 <a@b.c>: Recipient address rejected: Greylisted for 60 seconds
2413 # error from remote mailer after MAIL FROM:<> SIZE=3468: host a.b.c [10.0.0.1]: 421 a.b.c has refused your connection because your server did not have a PTR record.
2414 /(error from remote mailer after .*?:).*(: .*?)(:|\s*$)/ ||
2415
2416 # a.b.c F=<a@b.c> rejected after DATA: "@" or "." expected after "Undisclosed-Recipient": failing address in "To" header is: <Undisclosed-Recipient:;>
2417 /rejected after DATA: ("." or "." expected).*?(: failing address in .*? header)/ ||
2418
2419 # connection from %s refused load average = %.2f
2420 /(Connection )from.*? (refused: load average)/ ||
2421 # connection from %s refused (IP options)
2422 # Connection from %s refused: too many connections
2423 # connection from %s refused
2424 /([Cc]onnection )from.*? (refused.*)/ ||
2425 # [10.0.0.1]: connection refused
2426 /: (Connection refused)()/
2427 ) {
2428 ++$rejected_count_by_reason{"\u$1$2"};
30e9ac3f 2429 ++$rejected_count_by_ip{$ip};
a83c7e95 2430 }
d5c13d66
NM
2431 elsif (
2432 # 2008-03-31 06:25:22 H=mail.densitron.com [216.70.140.224]:45386 temporarily rejected connection in "connect" ACL: too fast reconnects // .hs
2433 # 2008-03-31 06:25:22 H=mail.densitron.com [216.70.140.224]:45386 temporarily rejected connection in "connect" ACL // .hs
2434 /(temporarily rejected connection in .*?ACL:?.*)/
2435 ) {
2436 ++$temporarily_rejected_count_by_ip{$ip};
2437 ++$temporarily_rejected_count_by_reason{"\u$1"};
2438 }
a83c7e95
SC
2439 else {
2440 ++$rejected_count_by_reason{Unknown};
30e9ac3f 2441 ++$rejected_count_by_ip{$ip};
a83c7e95
SC
2442 print STDERR "Unknown rejection: $_" if $debug;
2443 }
2444 }
059ec3d9
PH
2445 }';
2446
2447 # We now do a 'C preprocessor style operation on our parser
2448 # to remove bits not in use.
2449 my(%defines_in_operation,$removing_lines,$processed_parser);
2450 foreach (split (/\n/,$parser)) {
2451 if ((/^\s*#\s*IFDEF\s*\((.*?)\)/i && ! eval $1) ||
d5692f86 2452 (/^\s*#\s*IFNDEF\s*\((.*?)\)/i && eval $1) ) {
059ec3d9
PH
2453 $defines_in_operation{$1} = 1;
2454 $removing_lines = 1;
2455 }
2456
608bc29d
SC
2457 # Convert constants.
2458 while (/(\$[A-Z][A-Z_]*)\b/) {
2459 my $constant = eval $1;
2460 s/(\$[A-Z][A-Z_]*)\b/$constant/;
2461 }
2462
059ec3d9
PH
2463 $processed_parser .= $_."\n" unless $removing_lines;
2464
2465 if (/^\s*#\s*ENDIF\s*\((.*?)\)/i) {
2466 delete $defines_in_operation{$1};
2467 unless (keys %defines_in_operation) {
d5692f86 2468 $removing_lines = 0;
059ec3d9
PH
2469 }
2470 }
2471 }
608bc29d 2472 print STDERR "# START OF PARSER:$processed_parser\n# END OF PARSER\n\n" if $debug;
059ec3d9
PH
2473
2474 return $processed_parser;
2475}
2476
2477
2478
1b4fe9dd
PH
2479#######################################################################
2480# parse();
8e669ac1 2481#
1b4fe9dd 2482# parse($parser,\*FILEHANDLE);
8e669ac1 2483#
1b4fe9dd
PH
2484# This subroutine accepts a parser and a filehandle from main and parses each
2485# line. We store the results into global variables.
2486#######################################################################
059ec3d9
PH
2487sub parse {
2488 my($parser,$fh) = @_;
2489
2490 if ($merge_reports) {
2491 parse_old_eximstat_reports($fh);
2492 }
2493 else {
2494 eval $parser;
2495 die ($@) if $@;
2496 }
2497
2498}
2499
2500
2501
1b4fe9dd
PH
2502#######################################################################
2503# print_header();
8e669ac1 2504#
1b4fe9dd 2505# print_header();
8e669ac1 2506#
1b4fe9dd
PH
2507# Print our headers and contents.
2508#######################################################################
059ec3d9
PH
2509sub print_header {
2510
d5692f86 2511
059ec3d9
PH
2512 my $title = "Exim statistics from $begin to $end";
2513
d5692f86
SC
2514 print $txt_fh "\n$title\n" if $txt_fh;
2515 if ($htm_fh) {
2516 print $htm_fh html_header($title);
2517 print $htm_fh "<ul>\n";
30e9ac3f
SC
2518 print $htm_fh "<li><a href=\"#Grandtotal\">Grand total summary</a>\n";
2519 print $htm_fh "<li><a href=\"#Patterns\">User Specified Patterns</a>\n" if @user_patterns;
2520 print $htm_fh "<li><a href=\"#Transport\">Deliveries by Transport</a>\n" if $show_transport;
059ec3d9 2521 if ($hist_opt) {
d5692f86
SC
2522 print $htm_fh "<li><a href=\"#Messages received\">Messages received per hour</a>\n";
2523 print $htm_fh "<li><a href=\"#Deliveries\">Deliveries per hour</a>\n";
059ec3d9 2524 }
608bc29d 2525
059ec3d9 2526 if ($#queue_times >= 0) {
608bc29d
SC
2527 print $htm_fh "<li><a href=\"#Time spent on the queue all messages\">Time spent on the queue: all messages</a>\n";
2528 print $htm_fh "<li><a href=\"#Time spent on the queue messages with at least one remote delivery\">Time spent on the queue: messages with at least one remote delivery</a>\n";
2529 }
2530
2531 if ($#delivery_times >= 0) {
2532 print $htm_fh "<li><a href=\"#Delivery times all messages\">Delivery times: all messages</a>\n";
2533 print $htm_fh "<li><a href=\"#Delivery times messages with at least one remote delivery\">Delivery times: messages with at least one remote delivery</a>\n";
2534 }
2535
2536 if ($#rcpt_times >= 0) {
2537 print $htm_fh "<li><a href=\"#Receipt times all messages\">Receipt times</a>\n";
059ec3d9 2538 }
608bc29d 2539
d5692f86 2540 print $htm_fh "<li><a href=\"#Relayed messages\">Relayed messages</a>\n" if $show_relay;
059ec3d9 2541 if ($topcount) {
30e9ac3f 2542 print $htm_fh "<li><a href=\"#Mail rejection reason count\">Top $topcount mail rejection reasons by message count</a>\n" if %rejected_count_by_reason;
059ec3d9 2543 foreach ('Host','Domain','Email','Edomain') {
d5692f86 2544 next unless $do_sender{$_};
30e9ac3f
SC
2545 print $htm_fh "<li><a href=\"#Sending \l$_ count\">Top $topcount sending \l${_}s by message count</a>\n";
2546 print $htm_fh "<li><a href=\"#Sending \l$_ volume\">Top $topcount sending \l${_}s by volume</a>\n";
059ec3d9 2547 }
a83c7e95 2548 if (($local_league_table || $include_remote_users) && %received_count_user) {
30e9ac3f
SC
2549 print $htm_fh "<li><a href=\"#Local sender count\">Top $topcount local senders by message count</a>\n";
2550 print $htm_fh "<li><a href=\"#Local sender volume\">Top $topcount local senders by volume</a>\n";
059ec3d9
PH
2551 }
2552 foreach ('Host','Domain','Email','Edomain') {
d5692f86 2553 next unless $do_sender{$_};
30e9ac3f
SC
2554 print $htm_fh "<li><a href=\"#$_ destination count\">Top $topcount \l$_ destinations by message count</a>\n";
2555 print $htm_fh "<li><a href=\"#$_ destination volume\">Top $topcount \l$_ destinations by volume</a>\n";
059ec3d9 2556 }
a83c7e95 2557 if (($local_league_table || $include_remote_users) && %delivered_messages_user) {
30e9ac3f
SC
2558 print $htm_fh "<li><a href=\"#Local destination count\">Top $topcount local destinations by message count</a>\n";
2559 print $htm_fh "<li><a href=\"#Local destination volume\">Top $topcount local destinations by volume</a>\n";
059ec3d9 2560 }
37f8a7c9
SC
2561 if (($local_league_table || $include_remote_users) && %delivered_messages_local_domain) {
2562 print $htm_fh "<li><a href=\"#Local domain destination count\">Top $topcount local domain destinations by message count</a>\n";
2563 print $htm_fh "<li><a href=\"#Local domain destination volume\">Top $topcount local domain destinations by volume</a>\n";
2564 }
a83c7e95 2565
30e9ac3f
SC
2566 print $htm_fh "<li><a href=\"#Rejected ip count\">Top $topcount rejected ips by message count</a>\n" if %rejected_count_by_ip;
2567 print $htm_fh "<li><a href=\"#Temporarily rejected ip count\">Top $topcount temporarily rejected ips by message count</a>\n" if %temporarily_rejected_count_by_ip;
2568 print $htm_fh "<li><a href=\"#Non-rejected spamming ip count\">Top $topcount non-rejected spamming ips by message count</a>\n" if %spam_count_by_ip;
a83c7e95 2569
059ec3d9 2570 }
d5692f86
SC
2571 print $htm_fh "<li><a href=\"#errors\">List of errors</a>\n" if %errors_count;
2572 print $htm_fh "</ul>\n<hr>\n";
059ec3d9 2573 }
d5692f86
SC
2574 if ($xls_fh)
2575 {
2576 $ws_global->write($row++, $col+0, "Exim Statistics", $f_header1);
2577 &set_worksheet_line($ws_global, $row, $col, ["from:", $begin, "to:", $end], $f_default);
2578 $row+=2;
059ec3d9
PH
2579 }
2580}
2581
2582
1b4fe9dd
PH
2583#######################################################################
2584# print_grandtotals();
8e669ac1 2585#
1b4fe9dd 2586# print_grandtotals();
8e669ac1 2587#
1b4fe9dd
PH
2588# Print the grand totals.
2589#######################################################################
059ec3d9
PH
2590sub print_grandtotals {
2591
2592 # Get the sender by headings and results. This is complicated as we can have
2593 # different numbers of columns.
a83c7e95 2594 my($sender_txt_header,$sender_txt_format,$sender_html_format);
059ec3d9 2595 my(@received_totals,@delivered_totals);
d5692f86 2596 my($row_tablehead, $row_max);
a83c7e95 2597 my(@col_headers) = ('TOTAL', 'Volume', 'Messages', 'Addresses');
d5692f86 2598
059ec3d9
PH
2599 foreach ('Host','Domain','Email','Edomain') {
2600 next unless $do_sender{$_};
2601 if ($merge_reports) {
2602 push(@received_totals, get_report_total($report_totals{Received},"${_}s"));
2603 push(@delivered_totals,get_report_total($report_totals{Delivered},"${_}s"));
2604 }
2605 else {
2606 push(@received_totals,scalar(keys %{$received_data{$_}}));
2607 push(@delivered_totals,scalar(keys %{$delivered_data{$_}}));
2608 }
059ec3d9 2609 $sender_txt_header .= " " x ($COLUMN_WIDTHS - length($_)) . $_ . 's';
30e9ac3f
SC
2610 $sender_html_format .= "<td align=\"right\">%s</td>";
2611 $sender_txt_format .= " " x ($COLUMN_WIDTHS - 5) . "%6s";
a83c7e95 2612 push(@col_headers,"${_}s");
059ec3d9
PH
2613 }
2614
a83c7e95 2615 my $txt_format1 = " %-16s %9s %6d %6s $sender_txt_format";
d5692f86 2616 my $txt_format2 = " %6d %4.1f%% %6d %4.1f%%",
a83c7e95 2617 my $htm_format1 = "<tr><td>%s</td><td align=\"right\">%s</td><td align=\"right\">%s</td><td align=\"right\">%s</td>$sender_html_format";
d5692f86 2618 my $htm_format2 = "<td align=\"right\">%d</td><td align=\"right\">%4.1f%%</td><td align=\"right\">%d</td><td align=\"right\">%4.1f%%</td>";
059ec3d9 2619
d5692f86 2620 if ($txt_fh) {
059ec3d9 2621 my $sender_spaces = " " x length($sender_txt_header);
d5692f86
SC
2622 print $txt_fh "\n";
2623 print $txt_fh "Grand total summary\n";
2624 print $txt_fh "-------------------\n";
a83c7e95
SC
2625 print $txt_fh " $sender_spaces At least one address\n";
2626 print $txt_fh " TOTAL Volume Messages Addresses $sender_txt_header Delayed Failed\n";
d5692f86
SC
2627 }
2628 if ($htm_fh) {
30e9ac3f 2629 print $htm_fh "<a name=\"Grandtotal\"></a>\n";
d5692f86
SC
2630 print $htm_fh "<h2>Grand total summary</h2>\n";
2631 print $htm_fh "<table border=1>\n";
a83c7e95 2632 print $htm_fh "<tr><th>" . join('</th><th>',@col_headers) . "</th><th colspan=2>At least one addr<br>Delayed</th><th colspan=2>At least one addr<br>Failed</th>\n";
d5692f86 2633 }
a83c7e95
SC
2634 if ($xls_fh) {
2635 $ws_global->write($row++, 0, "Grand total summary", $f_header2);
2636 $ws_global->write($row, 0, \@col_headers, $f_header2);
2637 $ws_global->merge_range($row, scalar(@col_headers), $row, scalar(@col_headers)+1, "At least one addr Delayed", $f_header2_m);
2638 $ws_global->merge_range($row, scalar(@col_headers)+2, $row, scalar(@col_headers)+3, "At least one addr Failed", $f_header2_m);
2639 #$ws_global->write(++$row, scalar(@col_headers), ['Total','Percent','Total','Percent'], $f_header2);
059ec3d9
PH
2640 }
2641
d5692f86 2642
059ec3d9
PH
2643 my($volume,$failed_count);
2644 if ($merge_reports) {
2645 $volume = volume_rounded($report_totals{Received}{Volume}, $report_totals{Received}{'Volume-gigs'});
2646 $total_received_count = get_report_total($report_totals{Received},'Messages');
2647 $failed_count = get_report_total($report_totals{Received},'Failed');
2648 $delayed_count = get_report_total($report_totals{Received},'Delayed');
2649 }
2650 else {
2651 $volume = volume_rounded($total_received_data, $total_received_data_gigs);
608bc29d 2652 $failed_count = $message_errors;
059ec3d9
PH
2653 }
2654
2655 {
2656 no integer;
059ec3d9 2657
d5692f86 2658 my @content=(
a83c7e95 2659 $volume,$total_received_count,'',
d5692f86
SC
2660 @received_totals,
2661 $delayed_count,
2662 ($total_received_count) ? ($delayed_count*100/$total_received_count) : 0,
2663 $failed_count,
2664 ($total_received_count) ? ($failed_count*100/$total_received_count) : 0
2665 );
2666
2667 printf $txt_fh ("$txt_format1$txt_format2\n", 'Received', @content) if $txt_fh;
2668 printf $htm_fh ("$htm_format1$htm_format2\n", 'Received', @content) if $htm_fh;
a83c7e95
SC
2669 if ($xls_fh) {
2670 $ws_global->write(++$row, 0, 'Received', $f_default);
2671 for (my $i=0; $i < scalar(@content); $i++) {
d5692f86 2672 if ($i == 4 || $i == 6) {
a83c7e95 2673 $ws_global->write($row, $i+1, $content[$i]/100, $f_percent);
d5692f86
SC
2674 }
2675 else {
a83c7e95 2676 $ws_global->write($row, $i+1, $content[$i], $f_default);
d5692f86
SC
2677 }
2678 }
2679 }
2680 }
a83c7e95 2681
059ec3d9
PH
2682 if ($merge_reports) {
2683 $volume = volume_rounded($report_totals{Delivered}{Volume}, $report_totals{Delivered}{'Volume-gigs'});
a83c7e95
SC
2684 $total_delivered_messages = get_report_total($report_totals{Delivered},'Messages');
2685 $total_delivered_addresses = get_report_total($report_totals{Delivered},'Addresses');
059ec3d9
PH
2686 }
2687 else {
2688 $volume = volume_rounded($total_delivered_data, $total_delivered_data_gigs);
2689 }
d5692f86 2690
a83c7e95
SC
2691 my @content=($volume, $total_delivered_messages, $total_delivered_addresses, @delivered_totals);
2692 printf $txt_fh ("$txt_format1\n", 'Delivered', @content) if $txt_fh;
2693 printf $htm_fh ("$htm_format1\n", 'Delivered', @content) if $htm_fh;
d5692f86 2694
a83c7e95
SC
2695 if ($xls_fh) {
2696 $ws_global->write(++$row, 0, 'Delivered', $f_default);
2697 for (my $i=0; $i < scalar(@content); $i++) {
2698 $ws_global->write($row, $i+1, $content[$i], $f_default);
2699 }
2700 }
2701
2702 if ($merge_reports) {
30e9ac3f 2703 foreach ('Rejects', 'Temp Rejects', 'Ham', 'Spam') {
a83c7e95
SC
2704 my $messages = get_report_total($report_totals{$_},'Messages');
2705 my $addresses = get_report_total($report_totals{$_},'Addresses');
2706 if ($messages) {
2707 @content = ($_, '', $messages, '');
2708 push(@content,get_report_total($report_totals{$_},'Hosts')) if $do_sender{Host};
30e9ac3f
SC
2709 #These rows do not have entries for the following columns (if specified)
2710 foreach ('Domain','Email','Edomain') {
2711 push(@content,'') if $do_sender{$_};
2712 }
2713
a83c7e95
SC
2714 printf $txt_fh ("$txt_format1\n", @content) if $txt_fh;
2715 printf $htm_fh ("$htm_format1\n", @content) if $htm_fh;
2716 $ws_global->write(++$row, 0, \@content) if $xls_fh;
2717 }
2718 }
2719 }
2720 else {
2721 foreach my $total_aref (['Rejects',\%rejected_count_by_ip],
30e9ac3f 2722 ['Temp Rejects',\%temporarily_rejected_count_by_ip],
a83c7e95
SC
2723 ['Ham',\%ham_count_by_ip],
2724 ['Spam',\%spam_count_by_ip]) {
30e9ac3f 2725 #Count the number of messages of this type.
a83c7e95
SC
2726 my $messages = 0;
2727 map {$messages += $_} values %{$total_aref->[1]};
2728
2729 if ($messages > 0) {
2730 @content = ($total_aref->[0], '', $messages, '');
30e9ac3f
SC
2731
2732 #Count the number of distict IPs for the Hosts column.
a83c7e95
SC
2733 push(@content,scalar(keys %{$total_aref->[1]})) if $do_sender{Host};
2734
30e9ac3f
SC
2735 #These rows do not have entries for the following columns (if specified)
2736 foreach ('Domain','Email','Edomain') {
2737 push(@content,'') if $do_sender{$_};
2738 }
2739
a83c7e95
SC
2740 printf $txt_fh ("$txt_format1\n", @content) if $txt_fh;
2741 printf $htm_fh ("$htm_format1\n", @content) if $htm_fh;
2742 $ws_global->write(++$row, 0, \@content) if $xls_fh;
d5692f86 2743 }
a83c7e95 2744 }
d5692f86 2745 }
a83c7e95
SC
2746
2747 printf $txt_fh "\n" if $txt_fh;
2748 printf $htm_fh "</table>\n" if $htm_fh;
2749 ++$row;
059ec3d9
PH
2750}
2751
2752
1b4fe9dd
PH
2753#######################################################################
2754# print_user_patterns()
8e669ac1 2755#
1b4fe9dd 2756# print_user_patterns();
8e669ac1 2757#
1b4fe9dd
PH
2758# Print the counts of user specified patterns.
2759#######################################################################
059ec3d9 2760sub print_user_patterns {
d5692f86
SC
2761 my $txt_format1 = " %-18s %6d";
2762 my $htm_format1 = "<tr><td>%s</td><td align=\"right\">%d</td>";
059ec3d9 2763
d5692f86
SC
2764 if ($txt_fh) {
2765 print $txt_fh "User Specified Patterns\n";
2766 print $txt_fh "-----------------------";
2767 print $txt_fh "\n Total\n";
059ec3d9 2768 }
d5692f86 2769 if ($htm_fh) {
30e9ac3f 2770 print $htm_fh "<hr><a name=\"Patterns\"></a><h2>User Specified Patterns</h2>\n";
d5692f86
SC
2771 print $htm_fh "<table border=0 width=\"100%\">\n";
2772 print $htm_fh "<tr><td>\n";
2773 print $htm_fh "<table border=1>\n";
2774 print $htm_fh "<tr><th>&nbsp;</th><th>Total</th>\n";
2775 }
2776 if ($xls_fh) {
2777 $ws_global->write($row++, $col, "User Specified Patterns", $f_header2);
2778 &set_worksheet_line($ws_global, $row++, 1, ["Total"], $f_headertab);
059ec3d9
PH
2779 }
2780
d5692f86 2781
059ec3d9
PH
2782 my($key);
2783 if ($merge_reports) {
2784 # We are getting our data from previous reports.
2785 foreach $key (@user_descriptions) {
2786 my $count = get_report_total($report_totals{patterns}{$key},'Total');
d5692f86
SC
2787 printf $txt_fh ("$txt_format1\n",$key,$count) if $txt_fh;
2788 printf $htm_fh ("$htm_format1\n",$key,$count) if $htm_fh;
2789 if ($xls_fh)
2790 {
2791 &set_worksheet_line($ws_global, $row++, 0, [$key,$count], $f_default);
2792 }
059ec3d9
PH
2793 }
2794 }
2795 else {
2796 # We are getting our data from mainlog files.
2797 my $user_pattern_index = 0;
2798 foreach $key (@user_descriptions) {
d5692f86
SC
2799 printf $txt_fh ("$txt_format1\n",$key,$user_pattern_totals[$user_pattern_index]) if $txt_fh;
2800 printf $htm_fh ("$htm_format1\n",$key,$user_pattern_totals[$user_pattern_index]) if $htm_fh;
a83c7e95 2801 $ws_global->write($row++, 0, [$key,$user_pattern_totals[$user_pattern_index]]) if $xls_fh;
059ec3d9
PH
2802 $user_pattern_index++;
2803 }
2804 }
d5692f86
SC
2805 print $txt_fh "\n" if $txt_fh;
2806 print $htm_fh "</table>\n\n" if $htm_fh;
2807 if ($xls_fh)
2808 {
2809 ++$row;
059ec3d9 2810 }
608bc29d
SC
2811
2812 if ($hist_opt > 0) {
2813 my $user_pattern_index = 0;
2814 foreach $key (@user_descriptions) {
2815 print_histogram($key, 'occurence', @{$user_pattern_interval_count[$user_pattern_index]});
2816 $user_pattern_index++;
2817 }
2818 }
059ec3d9
PH
2819}
2820
a83c7e95
SC
2821#######################################################################
2822# print_rejects()
2823#
2824# print_rejects();
2825#
2826# Print statistics about rejected mail.
2827#######################################################################
2828sub print_rejects {
2829 my($format1,$reason);
2830
2831 my $txt_format1 = " %-40s %6d";
2832 my $htm_format1 = "<tr><td>%s</td><td align=\"right\">%d</td>";
2833
2834 if ($txt_fh) {
2835 print $txt_fh "Rejected mail by reason\n";
2836 print $txt_fh "-----------------------";
2837 print $txt_fh "\n Total\n";
2838 }
2839 if ($htm_fh) {
2840 print $htm_fh "<hr><a name=\"patterns\"></a><h2>Rejected mail by reason</h2>\n";
2841 print $htm_fh "<table border=0 width=\"100%\"><tr><td><table border=1>\n";
2842 print $htm_fh "<tr><th>&nbsp;</th><th>Total</th>\n";
2843 }
2844 if ($xls_fh) {
2845 $ws_global->write($row++, $col, "Rejected mail by reason", $f_header2);
2846 &set_worksheet_line($ws_global, $row++, 1, ["Total"], $f_headertab);
2847 }
2848
2849
2850 my $href = ($merge_reports) ? $report_totals{rejected_mail_by_reason} : \%rejected_count_by_reason;
2851 my(@chartdatanames, @chartdatavals_count);
2852
2853 foreach $reason (top_n_sort($topcount, $href, undef, undef)) {
2854 printf $txt_fh ("$txt_format1\n",$reason,$href->{$reason}) if $txt_fh;
2855 printf $htm_fh ("$htm_format1\n",$reason,$href->{$reason}) if $htm_fh;
2856 set_worksheet_line($ws_global, $row++, 0, [$reason,$href->{$reason}], $f_default) if $xls_fh;
2857 push(@chartdatanames, $reason);
2858 push(@chartdatavals_count, $href->{$reason});
2859 }
2860
2861 $row++ if $xls_fh;
2862 print $txt_fh "\n" if $txt_fh;
2863
2864 if ($htm_fh) {
2865 print $htm_fh "</tr></table></td><td>";
2866 if ($HAVE_GD_Graph_pie && $charts && ($#chartdatavals_count > 0)) {
2867 # calculate the graph
2868 my @data = (
2869 \@chartdatanames,
2870 \@chartdatavals_count
2871 );
2872 my $graph = GD::Graph::pie->new(200, 200);
2873 $graph->set(
2874 x_label => 'Rejection Reasons',
2875 y_label => 'Messages',
2876 title => 'By count',
2877 );
2878 my $gd = $graph->plot(\@data) or warn($graph->error);
2879 if ($gd) {
2880 open(IMG, ">$chartdir/rejections_count.png") or die "Could not write $chartdir/rejections_count.png: $!\n";
2881 binmode IMG;
2882 print IMG $gd->png;
2883 close IMG;
2884 print $htm_fh "<img src=\"$chartrel/rejections_count.png\">";
2885 }
2886 }
2887 print $htm_fh "</td></tr></table>\n\n";
2888 }
2889}
2890
2891
2892
2893
059ec3d9 2894
1b4fe9dd
PH
2895#######################################################################
2896# print_transport();
8e669ac1 2897#
1b4fe9dd 2898# print_transport();
8e669ac1 2899#
1b4fe9dd
PH
2900# Print totals by transport.
2901#######################################################################
059ec3d9 2902sub print_transport {
059ec3d9
PH
2903 my(@chartdatanames);
2904 my(@chartdatavals_count);
2905 my(@chartdatavals_vol);
d5692f86
SC
2906 no integer; #Lose this for charting the data.
2907
2908 my $txt_format1 = " %-18s %6s %6d";
2909 my $htm_format1 = "<tr><td>%s</td><td align=\"right\">%s</td><td align=\"right\">%d</td>";
059ec3d9 2910
d5692f86
SC
2911 if ($txt_fh) {
2912 print $txt_fh "Deliveries by transport\n";
2913 print $txt_fh "-----------------------";
2914 print $txt_fh "\n Volume Messages\n";
059ec3d9 2915 }
d5692f86 2916 if ($htm_fh) {
30e9ac3f 2917 print $htm_fh "<hr><a name=\"Transport\"></a><h2>Deliveries by Transport</h2>\n";
a83c7e95 2918 print $htm_fh "<table border=0 width=\"100%\"><tr><td><table border=1>\n";
d5692f86
SC
2919 print $htm_fh "<tr><th>&nbsp;</th><th>Volume</th><th>Messages</th>\n";
2920 }
2921 if ($xls_fh) {
a83c7e95
SC
2922 $ws_global->write(++$row, $col, "Deliveries by transport", $f_header2);
2923 $ws_global->write(++$row, 1, ["Volume", "Messages"], $f_headertab);
059ec3d9
PH
2924 }
2925
2926 my($key);
2927 if ($merge_reports) {
2928 # We are getting our data from previous reports.
2929 foreach $key (sort keys %{$report_totals{transport}}) {
2930 my $count = get_report_total($report_totals{transport}{$key},'Messages');
d5692f86
SC
2931 my @content=($key, volume_rounded($report_totals{transport}{$key}{Volume},
2932 $report_totals{transport}{$key}{'Volume-gigs'}), $count);
059ec3d9
PH
2933 push(@chartdatanames, $key);
2934 push(@chartdatavals_count, $count);
2935 push(@chartdatavals_vol, $report_totals{transport}{$key}{'Volume-gigs'}*$gig + $report_totals{transport}{$key}{Volume} );
d5692f86
SC
2936 printf $txt_fh ("$txt_format1\n", @content) if $txt_fh;
2937 printf $htm_fh ("$htm_format1\n", @content) if $htm_fh;
a83c7e95 2938 $ws_global->write(++$row, 0, \@content) if $xls_fh;
059ec3d9
PH
2939 }
2940 }
2941 else {
2942 # We are getting our data from mainlog files.
2943 foreach $key (sort keys %transported_data) {
d5692f86
SC
2944 my @content=($key, volume_rounded($transported_data{$key},$transported_data_gigs{$key}),
2945 $transported_count{$key});
059ec3d9
PH
2946 push(@chartdatanames, $key);
2947 push(@chartdatavals_count, $transported_count{$key});
2948 push(@chartdatavals_vol, $transported_data_gigs{$key}*$gig + $transported_data{$key});
d5692f86
SC
2949 printf $txt_fh ("$txt_format1\n", @content) if $txt_fh;
2950 printf $htm_fh ("$htm_format1\n", @content) if $htm_fh;
a83c7e95 2951 $ws_global->write(++$row, 0, \@content) if $xls_fh;
059ec3d9
PH
2952 }
2953 }
d5692f86
SC
2954 print $txt_fh "\n" if $txt_fh;
2955 if ($htm_fh) {
a83c7e95
SC
2956 print $htm_fh "</tr></table></td><td>";
2957
608bc29d 2958 if ($HAVE_GD_Graph_pie && $charts && ($#chartdatavals_count > 0))
059ec3d9
PH
2959 {
2960 # calculate the graph
2961 my @data = (
2962 \@chartdatanames,
2963 \@chartdatavals_count
2964 );
2965 my $graph = GD::Graph::pie->new(200, 200);
2966 $graph->set(
2967 x_label => 'Transport',
2968 y_label => 'Messages',
2969 title => 'By count',
2970 );
2971 my $gd = $graph->plot(\@data) or warn($graph->error);
2972 if ($gd) {
d5692f86
SC
2973 open(IMG, ">$chartdir/transports_count.png") or die "Could not write $chartdir/transports_count.png: $!\n";
2974 binmode IMG;
2975 print IMG $gd->png;
2976 close IMG;
2977 print $htm_fh "<img src=\"$chartrel/transports_count.png\">";
059ec3d9
PH
2978 }
2979 }
a83c7e95 2980 print $htm_fh "</td><td>";
059ec3d9 2981
608bc29d 2982 if ($HAVE_GD_Graph_pie && $charts && ($#chartdatavals_vol > 0)) {
059ec3d9
PH
2983 my @data = (
2984 \@chartdatanames,
2985 \@chartdatavals_vol
2986 );
2987 my $graph = GD::Graph::pie->new(200, 200);
2988 $graph->set(
2989 title => 'By volume',
2990 );
2991 my $gd = $graph->plot(\@data) or warn($graph->error);
2992 if ($gd) {
a83c7e95 2993 open(IMG, ">$chartdir/transports_vol.png") or die "Could not write $chartdir/transports_vol.png: $!\n";
d5692f86
SC
2994 binmode IMG;
2995 print IMG $gd->png;
2996 close IMG;
2997 print $htm_fh "<img src=\"$chartrel/transports_vol.png\">";
059ec3d9
PH
2998 }
2999 }
a83c7e95 3000
d5692f86
SC
3001 print $htm_fh "</td></tr></table>\n\n";
3002 }
059ec3d9
PH
3003}
3004
3005
3006
1b4fe9dd
PH
3007#######################################################################
3008# print_relay();
8e669ac1 3009#
1b4fe9dd 3010# print_relay();
8e669ac1 3011#
1b4fe9dd
PH
3012# Print our totals by relay.
3013#######################################################################
059ec3d9 3014sub print_relay {
d5692f86 3015 my $row_print_relay=1;
059ec3d9 3016 my $temp = "Relayed messages";
d5692f86 3017 print $htm_fh "<hr><a name=\"$temp\"></a><h2>$temp</h2>\n" if $htm_fh;
059ec3d9
PH
3018 if (scalar(keys %relayed) > 0 || $relayed_unshown > 0) {
3019 my $shown = 0;
3020 my $spacing = "";
d5692f86
SC
3021 my $txt_format = "%7d %s\n => %s\n";
3022 my $htm_format = "<tr><td align=\"right\">%d</td><td>%s</td><td>%s</td>\n";
059ec3d9 3023
d5692f86
SC
3024 printf $txt_fh ("%s\n%s\n\n", $temp, "-" x length($temp)) if $txt_fh;
3025 if ($htm_fh) {
3026 print $htm_fh "<table border=1>\n";
3027 print $htm_fh "<tr><th>Count</th><th>From</th><th>To</th>\n";
059ec3d9 3028 }
d5692f86
SC
3029 if ($xls_fh) {
3030 $ws_relayed->write($row_print_relay++, $col, $temp, $f_header2);
3031 &set_worksheet_line($ws_relayed, $row_print_relay++, 0, ["Count", "From", "To"], $f_headertab);
059ec3d9
PH
3032 }
3033
d5692f86 3034
059ec3d9
PH
3035 my($key);
3036 foreach $key (sort keys %relayed) {
3037 my $count = $relayed{$key};
3038 $shown += $count;
3039 $key =~ s/[HA]=//g;
3040 my($one,$two) = split(/=> /, $key);
d5692f86
SC
3041 my @content=($count, $one, $two);
3042 printf $txt_fh ($txt_format, @content) if $txt_fh;
3043 printf $htm_fh ($htm_format, @content) if $htm_fh;
3044 if ($xls_fh)
3045 {
3046 &set_worksheet_line($ws_relayed, $row_print_relay++, 0, \@content);
3047 }
059ec3d9
PH
3048 $spacing = "\n";
3049 }
d5692f86
SC
3050
3051 print $htm_fh "</table>\n<p>\n" if $htm_fh;
3052 print $txt_fh "${spacing}Total: $shown (plus $relayed_unshown unshown)\n\n" if $txt_fh;
3053 print $htm_fh "${spacing}Total: $shown (plus $relayed_unshown unshown)\n\n" if $htm_fh;
3054 if ($xls_fh)
3055 {
3056 &set_worksheet_line($ws_relayed, $row_print_relay++, 0, [$shown, "Sum of shown" ]);
3057 &set_worksheet_line($ws_relayed, $row_print_relay++, 0, [$relayed_unshown, "unshown"]);
3058 $row_print_relay++;
3059 }
059ec3d9
PH
3060 }
3061 else {
d5692f86
SC
3062 print $txt_fh "No relayed messages\n-------------------\n\n" if $txt_fh;
3063 print $htm_fh "No relayed messages\n\n" if $htm_fh;
3064 if ($xls_fh)
3065 {
3066 $row_print_relay++;
3067 }
059ec3d9 3068 }
059ec3d9
PH
3069}
3070
3071
3072
1b4fe9dd
PH
3073#######################################################################
3074# print_errors();
8e669ac1 3075#
1b4fe9dd 3076# print_errors();
8e669ac1 3077#
1b4fe9dd
PH
3078# Print our errors. In HTML, we display them as a list rather than a table -
3079# Netscape doesn't like large tables!
3080#######################################################################
059ec3d9
PH
3081sub print_errors {
3082 my $total_errors = 0;
d5692f86 3083 $row=1;
059ec3d9
PH
3084
3085 if (scalar(keys %errors_count) != 0) {
3086 my $temp = "List of errors";
d5692f86
SC
3087 my $htm_format = "<li>%d - %s\n";
3088
3089 printf $txt_fh ("%s\n%s\n\n", $temp, "-" x length($temp)) if $txt_fh;
3090 if ($htm_fh) {
3091 print $htm_fh "<hr><a name=\"errors\"></a><h2>$temp</h2>\n";
3092 print $htm_fh "<ul><li><b>Count - Error</b>\n";
059ec3d9 3093 }
d5692f86
SC
3094 if ($xls_fh)
3095 {
3096 $ws_errors->write($row++, 0, $temp, $f_header2);
3097 &set_worksheet_line($ws_errors, $row++, 0, ["Count", "Error"], $f_headertab);
059ec3d9
PH
3098 }
3099
d5692f86 3100
059ec3d9
PH
3101 my($key);
3102 foreach $key (sort keys %errors_count) {
3103 my $text = $key;
3104 chomp($text);
d5692f86 3105 $text =~ s/\s\s+/ /g; #Convert multiple spaces to a single space.
059ec3d9 3106 $total_errors += $errors_count{$key};
d5692f86
SC
3107
3108 if ($txt_fh) {
3109 printf $txt_fh ("%5d ", $errors_count{$key});
3110 my $text_remaining = $text;
3111 while (length($text_remaining) > 65) {
3112 my($first,$rest) = $text_remaining =~ /(.{50}\S*)\s+(.+)/;
3113 last if !$first;
3114 printf $txt_fh ("%s\n\t ", $first);
3115 $text_remaining = $rest;
3116 }
3117 printf $txt_fh ("%s\n\n", $text_remaining);
3118 }
3119
3120 if ($htm_fh) {
8e669ac1 3121
059ec3d9
PH
3122 #Translate HTML tag characters. Sergey Sholokh.
3123 $text =~ s/\</\&lt\;/g;
3124 $text =~ s/\>/\&gt\;/g;
3125
d5692f86 3126 printf $htm_fh ($htm_format,$errors_count{$key},$text);
059ec3d9 3127 }
d5692f86
SC
3128 if ($xls_fh)
3129 {
3130 &set_worksheet_line($ws_errors, $row++, 0, [$errors_count{$key},$text]);
059ec3d9
PH
3131 }
3132 }
059ec3d9
PH
3133
3134 $temp = "Errors encountered: $total_errors";
d5692f86
SC
3135
3136 if ($txt_fh) {
3137 print $txt_fh $temp, "\n";
3138 print $txt_fh "-" x length($temp),"\n";
3139 }
3140 if ($htm_fh) {
3141 print $htm_fh "</ul>\n<p>\n";
3142 print $htm_fh $temp, "\n";
3143 }
3144 if ($xls_fh)
3145 {
3146 &set_worksheet_line($ws_errors, $row++, 0, [$total_errors, "Sum of Errors encountered"]);
3147 }
059ec3d9
PH
3148 }
3149
3150}
3151
3152
1b4fe9dd
PH
3153#######################################################################
3154# parse_old_eximstat_reports();
8e669ac1 3155#
1b4fe9dd 3156# parse_old_eximstat_reports($fh);
8e669ac1 3157#
1b4fe9dd 3158# Parse old eximstat output so we can merge daily stats to weekly stats and weekly to monthly etc.
8e669ac1 3159#
1b4fe9dd
PH
3160# To test that the merging still works after changes, do something like the following.
3161# All the diffs should produce no output.
8e669ac1 3162#
1b4fe9dd 3163# options='-bydomain -byemail -byhost -byedomain'
608bc29d 3164# options="$options -show_rt1,2,4 -show_dt 1,2,4"
1b4fe9dd
PH
3165# options="$options -pattern 'Completed Messages' /Completed/"
3166# options="$options -pattern 'Received Messages' /<=/"
8e669ac1 3167#
1b4fe9dd
PH
3168# ./eximstats $options mainlog > mainlog.txt
3169# ./eximstats $options -merge mainlog.txt > mainlog.2.txt
3170# diff mainlog.txt mainlog.2.txt
8e669ac1 3171#
1b4fe9dd
PH
3172# ./eximstats $options -html mainlog > mainlog.html
3173# ./eximstats $options -merge -html mainlog.txt > mainlog.2.html
3174# diff mainlog.html mainlog.2.html
8e669ac1 3175#
1b4fe9dd
PH
3176# ./eximstats $options -merge mainlog.html > mainlog.3.txt
3177# diff mainlog.txt mainlog.3.txt
8e669ac1 3178#
1b4fe9dd
PH
3179# ./eximstats $options -merge -html mainlog.html > mainlog.3.html
3180# diff mainlog.html mainlog.3.html
8e669ac1 3181#
1b4fe9dd
PH
3182# ./eximstats $options -nvr mainlog > mainlog.nvr.txt
3183# ./eximstats $options -merge mainlog.nvr.txt > mainlog.4.txt
3184# diff mainlog.txt mainlog.4.txt
8e669ac1 3185#
1b4fe9dd
PH
3186# # double_mainlog.txt should have twice the values that mainlog.txt has.
3187# ./eximstats $options mainlog mainlog > double_mainlog.txt
3188#######################################################################
059ec3d9
PH
3189sub parse_old_eximstat_reports {
3190 my($fh) = @_;
3191
3192 my(%league_table_value_entered, %league_table_value_was_zero, %table_order);
3193
608bc29d
SC
3194 my(%user_pattern_index);
3195 my $user_pattern_index = 0;
3196 map {$user_pattern_index{$_} = $user_pattern_index++} @user_descriptions;
3197 my $user_pattern_keys = join('|', @user_descriptions);
3198
059ec3d9 3199 while (<$fh>) {
d5692f86 3200 PARSE_OLD_REPORT_LINE:
059ec3d9
PH
3201 if (/Exim statistics from ([\d\-]+ [\d:]+(\s+[\+\-]\d+)?) to ([\d\-]+ [\d:]+(\s+[\+\-]\d+)?)/) {
3202 $begin = $1 if ($1 lt $begin);
3203 $end = $3 if ($3 gt $end);
3204 }
3205 elsif (/Grand total summary/) {
a83c7e95
SC
3206 # Fill in $report_totals{Received|Delivered}{Volume|Messages|Addresses|Hosts|Domains|...|Delayed|DelayedPercent|Failed|FailedPercent}
3207 my(@fields, @delivered_fields);
3208 my $doing_table = 0;
059ec3d9 3209 while (<$fh>) {
d5692f86
SC
3210 $_ = html2txt($_); #Convert general HTML markup to text.
3211 s/At least one addr//g; #Another part of the HTML output we don't want.
059ec3d9 3212
a83c7e95
SC
3213# TOTAL Volume Messages Addresses Hosts Domains Delayed Failed
3214# Received 26MB 237 177 23 8 3.4% 28 11.8%
3215# Delivered 13MB 233 250 99 88
d5692f86 3216 if (/TOTAL\s+(.*?)\s*$/) {
a83c7e95
SC
3217 $doing_table = 1;
3218 @delivered_fields = split(/\s+/,$1);
3219
059ec3d9 3220 #Delayed and Failed have two columns each, so add the extra field names in.
a83c7e95
SC
3221 splice(@delivered_fields,-1,1,'DelayedPercent','Failed','FailedPercent');
3222
3223 # Addresses only figure in the Delivered row, so remove them from the
3224 # normal fields.
3225 @fields = grep !/Addresses/, @delivered_fields;
d5692f86 3226 }
a83c7e95 3227 elsif (/(Received)\s+(.*?)\s*$/) {
d5692f86
SC
3228 print STDERR "Parsing $_" if $debug;
3229 add_to_totals($report_totals{$1},\@fields,$2);
3230 }
a83c7e95
SC
3231 elsif (/(Delivered)\s+(.*?)\s*$/) {
3232 print STDERR "Parsing $_" if $debug;
3233 add_to_totals($report_totals{$1},\@delivered_fields,$2);
3234 my $data = $2;
3235 # If we're merging an old report which doesn't include addresses,
3236 # then use the Messages field instead.
3237 unless (grep(/Addresses/, @delivered_fields)) {
3238 my %tmp;
3239 line_to_hash(\%tmp,\@delivered_fields,$data);
3240 add_to_totals($report_totals{Delivered},['Addresses'],$tmp{Messages});
3241 }
3242 }
30e9ac3f 3243 elsif (/(Temp Rejects|Rejects|Ham|Spam)\s+(.*?)\s*$/) {
a83c7e95
SC
3244 print STDERR "Parsing $_" if $debug;
3245 add_to_totals($report_totals{$1},['Messages','Hosts'],$2);
3246 }
3247 else {
3248 last if $doing_table;
3249 }
059ec3d9
PH
3250 }
3251 }
3252
3253 elsif (/User Specified Patterns/i) {
3254#User Specified Patterns
3255#-----------------------
3256# Total
3257# Description 85
3258
d5692f86 3259 while (<$fh>) { last if (/Total/); } #Wait until we get the table headers.
059ec3d9 3260 while (<$fh>) {
d5692f86
SC
3261 print STDERR "Parsing $_" if $debug;
3262 $_ = html2txt($_); #Convert general HTML markup to text.
3263 if (/^\s*(.*?)\s+(\d+)\s*$/) {
3264 $report_totals{patterns}{$1} = {} unless (defined $report_totals{patterns}{$1});
3265 add_to_totals($report_totals{patterns}{$1},['Total'],$2);
3266 }
3267 last if (/^\s*$/); #Finished if we have a blank line.
059ec3d9
PH
3268 }
3269 }
3270
608bc29d
SC
3271 elsif (/(^|<h2>)($user_pattern_keys) per /o) {
3272 # Parse User defined pattern histograms if they exist.
3273 parse_histogram($fh, $user_pattern_interval_count[$user_pattern_index{$2}] );
3274 }
3275
3276
059ec3d9
PH
3277 elsif (/Deliveries by transport/i) {
3278#Deliveries by transport
3279#-----------------------
3280# Volume Messages
3281# :blackhole: 70KB 51
3282# address_pipe 655KB 1
3283# smtp 11MB 151
3284
d5692f86 3285 while (<$fh>) { last if (/Volume/); } #Wait until we get the table headers.
059ec3d9 3286 while (<$fh>) {
d5692f86
SC
3287 print STDERR "Parsing $_" if $debug;
3288 $_ = html2txt($_); #Convert general HTML markup to text.
3289 if (/(\S+)\s+(\d+\S*\s+\d+)/) {
3290 $report_totals{transport}{$1} = {} unless (defined $report_totals{transport}{$1});
3291 add_to_totals($report_totals{transport}{$1},['Volume','Messages'],$2);
3292 }
3293 last if (/^\s*$/); #Finished if we have a blank line.
059ec3d9
PH
3294 }
3295 }
608bc29d
SC
3296 elsif (/Messages received per/) {
3297 parse_histogram($fh, \@received_interval_count);
3298 }
3299 elsif (/Deliveries per/) {
3300 parse_histogram($fh, \@delivered_interval_count);
059ec3d9
PH
3301 }
3302
608bc29d
SC
3303 #elsif (/Time spent on the queue: (all messages|messages with at least one remote delivery)/) {
3304 elsif (/(Time spent on the queue|Delivery times|Receipt times): ((\S+) messages|messages with at least one remote delivery)((<[^>]*>)*\s*)$/) {
059ec3d9
PH
3305#Time spent on the queue: all messages
3306#-------------------------------------
3307#
3308#Under 1m 217 91.9% 91.9%
3309# 5m 2 0.8% 92.8%
3310# 3h 8 3.4% 96.2%
3311# 6h 7 3.0% 99.2%
3312# 12h 2 0.8% 100.0%
3313
3314 # Set a pointer to the queue bin so we can use the same code
3315 # block for both all messages and remote deliveries.
608bc29d
SC
3316 #my $bin_aref = ($1 eq 'all messages') ? \@qt_all_bin : \@qt_remote_bin;
3317 my($bin_aref, $times_aref, $overflow_sref);
3318 if ($1 eq 'Time spent on the queue') {
3319 $times_aref = \@queue_times;
3320 if ($2 eq 'all messages') {
3321 $bin_aref = \@qt_all_bin;
3322 $overflow_sref = \$qt_all_overflow;
3323 }
3324 else {
3325 $bin_aref = \@qt_remote_bin;
3326 $overflow_sref = \$qt_remote_overflow;
3327 }
3328 }
3329 elsif ($1 eq 'Delivery times') {
3330 $times_aref = \@delivery_times;
3331 if ($2 eq 'all messages') {
3332 $bin_aref = \@dt_all_bin;
3333 $overflow_sref = \$dt_all_overflow;
3334 }
3335 else {
3336 $bin_aref = \@dt_remote_bin;
3337 $overflow_sref = \$dt_remote_overflow;
3338 }
3339 }
3340 else {
3341 unless (exists $rcpt_times_bin{$3}) {
3342 initialise_rcpt_times($3);
3343 }
3344 $bin_aref = $rcpt_times_bin{$3};
3345 $times_aref = \@rcpt_times;
3346 $overflow_sref = \$rcpt_times_overflow{$3};
3347 }
3348
3349
a83c7e95 3350 my ($blank_lines, $reached_table) = (0,0);
059ec3d9 3351 while (<$fh>) {
d5692f86 3352 $_ = html2txt($_); #Convert general HTML markup to text.
a83c7e95
SC
3353 # The table is preceded by one blank line, and has one blank line
3354 # following it. As the table may be empty, the best way to determine
3355 # that we've finished it is to look for the second blank line.
3356 ++$blank_lines if /^\s*$/;
3357 last if ($blank_lines >=2); #Finished the table ?
3358 $reached_table = 1 if (/\d/);
d5692f86
SC
3359 next unless $reached_table;
3360 my $previous_seconds_on_queue = 0;
3361 if (/^\s*(Under|Over|)\s+(\d+[smhdw])\s+(\d+)/) {
3362 print STDERR "Parsing $_" if $debug;
968178ac
AM
3363 my($modifier,$formatted_time,$count) = ($1,$2,$3);
3364 my $seconds = unformat_time($formatted_time);
d5692f86
SC
3365 my $time_on_queue = ($seconds + $previous_seconds_on_queue) / 2;
3366 $previous_seconds_on_queue = $seconds;
3367 $time_on_queue = $seconds * 2 if ($modifier eq 'Over');
3368 my($i);
608bc29d
SC
3369 for ($i = 0; $i <= $#$times_aref; $i++) {
3370 if ($time_on_queue < $times_aref->[$i]) {
d5692f86
SC
3371 $$bin_aref[$i] += $count;
3372 last;
3373 }
3374 }
608bc29d
SC
3375 $$overflow_sref += $count if ($i > $#$times_aref);
3376
d5692f86 3377 }
059ec3d9
PH
3378 }
3379 }
3380
3381 elsif (/Relayed messages/) {
3382#Relayed messages
3383#----------------
3384#
3385# 1 addr.domain.com [1.2.3.4] a.user@domain.com
3386# => addr2.domain2.com [5.6.7.8] a2.user2@domain2.com
3387#
3388#<tr><td align="right">1</td><td>addr.domain.com [1.2.3.4] a.user@domain.com </td><td>addr2.domain2.com [5.6.7.8] a2.user2@domain2.com</td>
3389
3390 my $reached_table = 0;
3391 my($count,$sender);
3392 while (<$fh>) {
d5692f86
SC
3393 unless ($reached_table) {
3394 last if (/No relayed messages/);
3395 $reached_table = 1 if (/^\s*\d/ || />\d+</);
3396 next unless $reached_table;
3397 }
3398 if (/>(\d+)<.td><td>(.*?) ?<.td><td>(.*?)</) {
3399 update_relayed($1,$2,$3);
3400 }
3401 elsif (/^\s*(\d+)\s+(.*?)\s*$/) {
3402 ($count,$sender) = ($1,$2);
3403 }
3404 elsif (/=>\s+(.*?)\s*$/) {
3405 update_relayed($count,$sender,$1);
3406 }
3407 else {
3408 last; #Finished the table ?
3409 }
059ec3d9
PH
3410 }
3411 }
3412
3413 elsif (/Top (.*?) by (message count|volume)/) {
3414#Top 50 sending hosts by message count
3415#-------------------------------------
3416#
3417# 48 1468KB local
50adf73a
SC
3418# Could also have average values for HTML output.
3419# 48 1468KB 30KB local
3420
059ec3d9
PH
3421 my($category,$by_count_or_volume) = ($1,$2);
3422
3423 #As we show 2 views of each table (by count and by volume),
3424 #most (but not all) entries will appear in both tables.
3425 #Set up a hash to record which entries we have already seen
3426 #and one to record which ones we are seeing for the first time.
3427 if ($by_count_or_volume =~ /count/) {
d5692f86
SC
3428 undef %league_table_value_entered;
3429 undef %league_table_value_was_zero;
3430 undef %table_order;
059ec3d9
PH
3431 }
3432
3433 #As this section processes multiple different table categories,
3434 #set up pointers to the hashes to be updated.
a83c7e95 3435 my($messages_href,$addresses_href,$data_href,$data_gigs_href);
059ec3d9