Add ${rfc2047d: expansion.
[exim.git] / src / src / eximstats.src
CommitLineData
059ec3d9 1#!PERL_COMMAND -w
e2f7a0d2 2# $Cambridge: exim/src/src/eximstats.src,v 1.12 2007/01/22 15:14:01 steve Exp $
059ec3d9
PH
3
4# Copyright (c) 2001 University of Cambridge.
5# See the file NOTICE for conditions of use and distribution.
6
7# Perl script to generate statistics from one or more Exim log files.
8
9# Usage: eximstats [<options>] <log file> <log file> ...
10
11# 1996-05-21: Ignore lines not starting with valid date/time, just in case
12# these get into a log file.
13# 1996-11-19: Add the -h option to control the size of the histogram,
14# and optionally turn it off.
15# Use some Perl 5 things; it should be everywhere by now.
16# Add the Perl -w option and rewrite so no warnings are given.
17# Add the -t option to control the length of the "top" listing.
18# Add the -ne, -nt options to turn off errors and transport
19# information.
20# Add information about length of time on queue, and -q<list> to
21# control the intervals and turn it off.
22# Add count and percentage of delayed messages to the Received
23# line.
24# Show total number of errors.
25# Add count and percentage of messages with errors to Received
26# line.
27# Add information about relaying and -nr to suppress it.
28# 1997-02-03 Merged in some of the things Nigel Metheringham had done:
29# Re-worded headings
30# Added received histogram as well as delivered
31# Added local senders' league table
32# Added local recipients' league table
33# 1997-03-10 Fixed typo "destinationss"
34# Allow for intermediate address between final and original
35# when testing for relaying
36# Give better message when no input
37# 1997-04-24 Fixed bug in layout of error listing that was depending on
38# text length (output line got repeated).
39# 1997-05-06 Bug in option decoding when only one option.
40# Overflow bug when handling very large volumes.
41# 1997-10-28 Updated to handle revised log format that might show
42# HELO name as well as host name before IP number
43# 1998-01-26 Bugs in the function for calculating the number of seconds
44# since 1970 from a log date
45# 1998-02-02 Delivery to :blackhole: doesn't have a T= entry in the log
46# line; cope with this, thereby avoiding undefined problems
47# Very short log line gave substring error
48# 1998-02-03 A routed delivery to a local transport may not have <> in the
49# log line; terminate the address at white space, not <
50# 1998-09-07 If first line of input was a => line, $thissize was undefined;
51# ensure it is zero.
52# 1998-12-21 Adding of $thissize from => line should have been adding $size.
53# Oops. Should have looked more closely when fixing the previous
54# bug!
55# 1999-11-12 Increased the field widths for printed integers; numbers are
56# bigger than originally envisaged.
57# 2001-03-21 Converted seconds() routine to use Time::Local, fixing a bug
58# whereby seconds($timestamp) - id_seconds($id) gave an
59# incorrect result.
60# Added POD documentation.
61# Moved usage instructions into help() subroutine.
62# Added 'use strict' and declared all global variables.
63# Added '-html' flag and resultant code.
64# Added '-cache' flag and resultant code.
65# Added add_volume() routine and converted all volume variables
66# to use it, fixing the overflow problems for individual hosts
67# on large sites.
68# Converted all volume output to GB/MB/KB as appropriate.
69# Don't store local user stats if -nfl is specified.
70# Modifications done by: Steve Campbell (<steve@computurn.com>)
71# 2001-04-02 Added the -t_remote_users flag. Steve Campbell.
72# 2001-10-15 Added the -domain flag. Steve Campbell.
73# 2001-10-16 Accept files on STDIN or on the command line. Steve Campbell.
74# 2001-10-21 Removed -domain flag and added -bydomain, -byhost, and -byemail.
75# We now generate our main parsing subroutine as an eval statement
76# which improves performance dramatically when not all the results
77# are required. We also cache the last timestamp to time convertion.
78#
79# NOTE: 'Top 50 destinations by (message count|volume)' lines are
80# now 'Top N (host|email|domain) destinations by (message count|volume)'
81# where N is the topcount. Steve Campbell.
82#
83# 2001-10-30 V1.16 Joachim Wieland.
84# Fixed minor bugs in add_volume() when taking over this version
85# for use in Exim 4: -w gave uninitialized value warnings in
86# two situations: for the first addition to a counter, and if
87# there were never any gigabytes, thereby leaving the $gigs
88# value unset.
89# Initialized $last_timestamp to stop a -w uninitialized warning.
90# Minor layout tweak for grand totals (nitpicking).
91# Put the IP addresses for relaying stats in [] and separated by
92# a space from the domain name.
93# Removed the IPv4-specific address test when picking out addresses
94# for relaying. Anything inside [] is OK.
95#
96# 2002-07-02 Philip Hazel
97# Fixed "uninitialized variable" message that occurred for relay
98# messages that arrived from H=[1.2.3.4] hosts (no name shown).
99# This bug didn't affect the output.
100#
101# 2002-04-15 V1.17 Joachim Wieland.
102# Added -charts, -chartdir. -chartrel options which use
103# GD::Graph modules to create graphical charts of the statistics.
104#
105# 2002-04-15 V1.18 Steve Campbell.
106# Added a check for $domain to to stop a -w uninitialized warning.
107# Added -byemaildomain option.
108# Only print HTML header links to included tables!
109#
110# 2002-08-02 V1.19 Steve Campbell.
111# Changed the debug mode to dump the parser onto STDERR rather
112# than STDOUT. Documented the -d flag into the help().
113# Rejoined the divergent 2002-04-15 and 2002-07-02 releases.
114#
115# 2002-08-21 V1.20 Steve Campbell.
116# Added the '-merge' option to allow merging of previous reports.
117# Fixed a missing semicolon when doing -bydomain.
118# Make volume charts plot the data gigs and bytes rather than just bytes.
119# Only process log lines with $flag =~ /<=|=>|->|==|\*\*|Co/
120# Converted Emaildomain to Edomain - the column header was too wide!
121# This changes the text output slightly. You can revert to the old
122# column widths by changing $COLUMN_WIDTHS to 7;
123#
124# 2002-09-04 V1.21 Andreas J Mueller
125# Local deliveries domain now defaults to 'localdomain'.
126# Don't match F=<From> when looking for the user.
127#
128# 2002-09-05 V1.22 Steve Campbell
129# Fixed a perl 5.005 incompatibility problem ('our' variables).
130#
131# 2002-09-11 V1.23 Steve Campbell
132# Stopped -charts option from throwing errors on null data.
133# Don't print out 'Errors encountered' unless there are any.
134
135# 2002-10-21 V1.23a Philip Hazel - patch from Tony Finch put in until
136# Steve's eximstats catches up.
137# Handle log files that include the timezone after the timestamp.
138# Switch to assuming that log timestamps are in local time, with
139# an option for UTC timestamps, as in Exim itself.
140#
141# 2003-02-05 V1.24 Steve Campbell
142# Added in Sergey Sholokh's code to convert '<' and '>' characters
143# in HTML output. Also added code to convert them back with -merge.
144# Fixed timestamp offsets to convert to seconds rather than minutes.
145# Updated -merge to work with output files using timezones.
146# Added cacheing to speed up the calculation of timezone offsets.
147#
148# 2003-02-07 V1.25 Steve Campbell
149# Optimised the usage of mktime() in the seconds subroutine.
150# Removed the now redundant '-cache' option.
151# html2txt() now explicitly matches HTML tags.
152# Implemented a new sorting algorithm - the top_n_sort() routine.
153# Added Danny Carroll's '-nvr' flag and code.
154#
155# 2003-03-13 V1.26 Steve Campbell
156# Implemented HTML compliance changes recommended by Bernard Massot.
157# Bug fix to allow top_n_sort() to handle null keys.
158# Convert all domains and edomains to lowercase.
159# Remove preceding dots from domains.
160#
161# 2003-03-13 V1.27 Steve Campbell
162# Replaced border attributes with 'border=1', as recommended by
163# Bernard Massot.
164#
165# 2003-06-03 V1.28 John Newman
166# Added in the ability to skip over the parsing and evaulation of
167# specific transports as passed to eximstats via the new "-nt/.../"
168# command line argument. This new switch allows the viewing of
169# not more accurate statistics but more applicable statistics when
170# special transports are in use (ie; SpamAssassin). We need to be
171# able to ignore transports such as this otherwise the resulting
172# local deliveries are significantly skewed (doubled)...
173#
174# 2003-11-06 V1.29 Steve Campbell
175# Added the '-pattern "Description" "/pattern/"' option.
176#
177# 2004-02-17 V1.30 Steve Campbell
178# Added warnings if required GD::Graph modules are not available or
179# insufficient -chart* options are specified.
180#
8e669ac1 181# 2004-02-20 V1.31 Andrea Balzi
059ec3d9
PH
182# Only show the Local Sender/Destination links if the tables exist.
183#
1b4fe9dd
PH
184# 2004-07-05 V1.32 Steve Campbell
185# Fix '-merge -h0' divide by zero error.
186#
187# 2004-07-15 V1.33 Steve Campbell
188# Documentation update - I've converted the subroutine
189# documentation from POD to comments.
8974000d
SC
190#
191# 2004-12-10 V1.34 Steve Campbell
192# Eximstats can now parse syslog lines as well as mainlog lines.
193#
884c2af8 194# 2004-12-20 V1.35 Wouter Verhelst
d5692f86 195# Pie charts by volume were actually generated by count. Fixed.
50adf73a
SC
196#
197# 2005-02-07 V1.36 Gregor Herrmann / Steve Campbell
198# Added average sizes to HTML Top tables.
d5692f86
SC
199#
200# 2005-04-26 V1.37 Frank Heydlauf
201# Added -xls and the ability to specify output files.
202#
203# 2005-04-29 V1.38 Steve Campbell
204# Use FileHandles for outputing results.
205# Allow any combination of xls, txt, and html output.
206# Fixed display of large numbers with -nvr option
207# Fixed merging of reports with empty tables.
208#
209# 2005-05-27 V1.39 Steve Campbell
210# Added the -include_original_destination flag
211# Removed tabs and trailing whitespace.
212#
608bc29d
SC
213# 2005-06-03 V1.40 Steve Campbell
214# Whilst parsing the mainlog(s), store information about
215# the messages in a hash of arrays rather than using
216# individual hashes. This is a bit cleaner and results in
217# dramatic memory savings, albeit at a slight CPU cost.
218#
219# 2005-06-15 V1.41 Steve Campbell
220# Added the -show_rt<list> flag.
221# Added the -show_dt<list> flag.
222#
223# 2005-06-24 V1.42 Steve Campbell
224# Added Histograms for user specified patterns.
d5692f86 225#
81aad8c9
SC
226# 2005-06-30 V1.43 Steve Campbell
227# Bug fix for V1.42 with -h0 specified. Spotted by Chris Lear.
228#
a83c7e95
SC
229# 2005-07-26 V1.44 Steve Campbell
230# Use a glob alias rather than an array ref in the generated
231# parser. This improves both readability and performance.
232#
233# 2005-09-30 V1.45 Marco Gaiarin / Steve Campbell
234# Collect SpamAssassin and rejection statistics.
235# Don't display local sender or destination tables unless
236# there is data to show.
237# Added average volumes into the top table text output.
238#
239# 2006-02-07 V1.46 Steve Campbell
240# Collect data on the number of addresses (recipients)
241# as well as the number of messages.
242#
243# 2006-05-05 V1.47 Steve Campbell
244# Added 'Message too big' to the list of mail rejection
245# reasons (thanks to Marco Gaiarin).
246#
247# 2006-06-05 V1.48 Steve Campbell
248# Mainlog lines which have GMT offsets and are too short to
249# have a flag are now skipped.
250#
251# 2006-11-10 V1.49 Alain Williams
252# Added the -emptyok flag.
253#
254# 2006-11-16 V1.50 Steve Campbell
255# Fixes for obtaining the IP address from reject messages.
256#
e2f7a0d2
SC
257# 2006-11-27 V1.51 Steve Campbell
258# Another update for obtaining the IP address from reject messages.
259#
260# 2006-11-27 V1.52 Steve Campbell
261# Tally any reject message containing SpamAssassin.
262#
a83c7e95 263#
d5692f86
SC
264#
265# For documentation on the logfile format, see
266# http://www.exim.org/exim-html-4.50/doc/html/spec_48.html#IX2793
059ec3d9
PH
267
268=head1 NAME
269
8974000d 270eximstats - generates statistics from Exim mainlog or syslog files.
059ec3d9
PH
271
272=head1 SYNOPSIS
273
d5692f86 274 eximstats [Output] [Options] mainlog1 mainlog2 ...
059ec3d9
PH
275 eximstats -merge [Options] report.1.txt report.2.txt ... > weekly_report.txt
276
d5692f86
SC
277=head2 Output:
278
279=over 4
280
281=item B<-txt>
282
283Output the results in plain text to STDOUT.
284
285=item B<-txt>=I<filename>
286
287Output the results in plain text. Filename '-' for STDOUT is accepted.
288
289=item B<-html>
290
291Output the results in HTML to STDOUT.
292
293=item B<-html>=I<filename>
294
295Output the results in HTML. Filename '-' for STDOUT is accepted.
296
297=item B<-xls>
298
299Output the results in Excel compatible Format to STDOUT.
300Requires the Spreadsheet::WriteExcel CPAN module.
301
302=item B<-xls>=I<filename>
303
304Output the results in Excel compatible format. Filename '-' for STDOUT is accepted.
305
306
307=back
308
309=head2 Options:
059ec3d9
PH
310
311=over 4
312
313=item B<-h>I<number>
314
315histogram divisions per hour. The default is 1, and
3160 suppresses histograms. Valid values are:
317
3180, 1, 2, 3, 5, 10, 15, 20, 30 or 60.
319
320=item B<-ne>
321
322Don't display error information.
323
324=item B<-nr>
325
326Don't display relaying information.
327
328=item B<-nr>I</pattern/>
329
330Don't display relaying information that matches.
331
332=item B<-nt>
333
334Don't display transport information.
335
336=item B<-nt>I</pattern/>
337
338Don't display transport information that matches
339
340=item B<-q>I<list>
341
342List of times for queuing information single 0 item suppresses.
343
344=item B<-t>I<number>
345
346Display top <number> sources/destinations
347default is 50, 0 suppresses top listing.
348
349=item B<-tnl>
350
351Omit local sources/destinations in top listing.
352
353=item B<-t_remote_users>
354
355Include remote users in the top source/destination listings.
356
d5692f86
SC
357=item B<-include_original_destination>
358
359Include the original destination email addresses rather than just
360using the final ones.
361Useful for finding out which of your mailing lists are receiving mail.
362
608bc29d
SC
363=item B<-show_dt>I<list>
364
365Show the delivery times (B<DT>)for all the messages.
366
367Exim must have been configured to use the +delivery_time logging option
368for this option to work.
369
370I<list> is an optional list of times. Eg -show_dt1,2,4,8 will show
371the number of messages with delivery times under 1 second, 2 seconds, 4 seconds,
3728 seconds, and over 8 seconds.
373
374=item B<-show_rt>I<list>
375
376Show the receipt times for all the messages. The receipt time is
377defined as the Completed hh:mm:ss - queue_time_overall - the Receipt hh:mm:ss.
378These figures will be skewed by pipelined messages so might not be that useful.
379
380Exim must have been configured to use the +queue_time_overall logging option
381for this option to work.
382
383I<list> is an optional list of times. Eg -show_rt1,2,4,8 will show
384the number of messages with receipt times under 1 second, 2 seconds, 4 seconds,
3858 seconds, and over 8 seconds.
386
059ec3d9
PH
387=item B<-byhost>
388
389Show results by sending host. This may be combined with
390B<-bydomain> and/or B<-byemail> and/or B<-byedomain>. If none of these options
391are specified, then B<-byhost> is assumed as a default.
392
393=item B<-bydomain>
394
395Show results by sending domain.
396May be combined with B<-byhost> and/or B<-byemail> and/or B<-byedomain>.
397
398=item B<-byemail>
399
400Show results by sender's email address.
401May be combined with B<-byhost> and/or B<-bydomain> and/or B<-byedomain>.
402
403=item B<-byemaildomain> or B<-byedomain>
404
405Show results by sender's email domain.
406May be combined with B<-byhost> and/or B<-bydomain> and/or B<-byemail>.
407
408=item B<-pattern> I<Description> I</Pattern/>
409
410Look for the specified pattern and count the number of lines in which it appears.
411This option can be specified multiple times. Eg:
412
413 -pattern 'Refused connections' '/refused connection/'
414
415
416=item B<-merge>
417
418This option allows eximstats to merge old eximstat reports together. Eg:
419
420 eximstats mainlog.sun > report.sun.txt
421 eximstats mainlog.mon > report.mon.txt
422 eximstats mainlog.tue > report.tue.txt
423 eximstats mainlog.wed > report.web.txt
424 eximstats mainlog.thu > report.thu.txt
425 eximstats mainlog.fri > report.fri.txt
426 eximstats mainlog.sat > report.sat.txt
427 eximstats -merge report.*.txt > weekly_report.txt
428 eximstats -merge -html report.*.txt > weekly_report.html
429
430=over 4
431
432=item *
433
434You can merge text or html reports and output the results as text or html.
435
436=item *
437
438You can use all the normal eximstat output options, but only data
439included in the original reports can be shown!
440
441=item *
442
443When merging reports, some loss of accuracy may occur in the top I<n> lists.
444This will be towards the ends of the lists.
445
446=item *
447
448The order of items in the top I<n> lists may vary when the data volumes
449round to the same value.
450
451=back
452
059ec3d9
PH
453=item B<-charts>
454
455Create graphical charts to be displayed in HTML output.
d5692f86 456Only valid in combination with I<-html>.
059ec3d9
PH
457
458This requires the following modules which can be obtained
459from http://www.cpan.org/modules/01modules.index.html
460
461=over 4
462
463=item GD
464
465=item GDTextUtil
466
467=item GDGraph
468
469=back
470
471To install these, download and unpack them, then use the normal perl installation procedure:
472
473 perl Makefile.PL
474 make
475 make test
476 make install
477
478=item B<-chartdir>I <dir>
479
480Create the charts in the directory <dir>
481
482=item B<-chartrel>I <dir>
483
484Specify the relative directory for the "img src=" tags from where to include
485the charts
486
a83c7e95
SC
487=item B<-emptyok>
488
489Specify that it's OK to not find any valid log lines. Without this
490we will output an error message if we don't find any.
491
059ec3d9
PH
492=item B<-d>
493
494Debug flag. This outputs the eval()'d parser onto STDOUT which makes it
495easier to trap errors in the eval section. Remember to add 1 to the line numbers to allow for the
496title!
497
498=back
499
500=head1 DESCRIPTION
501
8974000d 502Eximstats parses exim mainlog and syslog files to output a statistical
059ec3d9 503analysis of the messages processed. By default, a text
d5692f86
SC
504analysis is generated, but you can request other output formats
505using flags. See the help (B<-help>) to learn
059ec3d9
PH
506about how to create charts from the tables.
507
508=head1 AUTHOR
509
510There is a web site at http://www.exim.org - this contains details of the
511mailing list exim-users@exim.org.
512
513=head1 TO DO
514
515This program does not perfectly handle messages whose received
516and delivered log lines are in different files, which can happen
517when you have multiple mail servers and a message cannot be
518immeadiately delivered. Fixing this could be tricky...
519
d5692f86
SC
520Merging of xls files is not (yet) possible. Be free to implement :)
521
059ec3d9
PH
522=cut
523
524use integer;
525use strict;
d5692f86 526use IO::File;
059ec3d9
PH
527
528# use Time::Local; # PH/FANF
529use POSIX;
530
d5692f86 531use vars qw($HAVE_GD_Graph_pie $HAVE_GD_Graph_linespoints $HAVE_Spreadsheet_WriteExcel);
059ec3d9
PH
532eval { require GD::Graph::pie; };
533$HAVE_GD_Graph_pie = $@ ? 0 : 1;
534eval { require GD::Graph::linespoints; };
535$HAVE_GD_Graph_linespoints = $@ ? 0 : 1;
d5692f86
SC
536eval { require Spreadsheet::WriteExcel; };
537$HAVE_Spreadsheet_WriteExcel = $@ ? 0 : 1;
059ec3d9
PH
538
539
540##################################################
541# Static data #
542##################################################
543# 'use vars' instead of 'our' as perl5.005 is still in use out there!
544use vars qw(@tab62 @days_per_month $gig);
545use vars qw($VERSION);
546use vars qw($COLUMN_WIDTHS);
608bc29d 547use vars qw($WEEK $DAY $HOUR $MINUTE);
059ec3d9
PH
548
549
550@tab62 =
551 (0,1,2,3,4,5,6,7,8,9,0,0,0,0,0,0, # 0-9
552 0,10,11,12,13,14,15,16,17,18,19,20, # A-K
553 21,22,23,24,25,26,27,28,29,30,31,32, # L-W
554 33,34,35, 0, 0, 0, 0, 0, # X-Z
555 0,36,37,38,39,40,41,42,43,44,45,46, # a-k
556 47,48,49,50,51,52,53,54,55,56,57,58, # l-w
557 59,60,61); # x-z
558
559@days_per_month = (0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334);
560$gig = 1024 * 1024 * 1024;
e2f7a0d2 561$VERSION = '1.52';
059ec3d9
PH
562
563# How much space do we allow for the Hosts/Domains/Emails/Edomains column headers?
564$COLUMN_WIDTHS = 8;
565
608bc29d
SC
566$MINUTE = 60;
567$HOUR = 60 * $MINUTE;
568$DAY = 24 * $HOUR;
569$WEEK = 7 * $DAY;
570
059ec3d9
PH
571# Declare global variables.
572use vars qw($total_received_data $total_received_data_gigs $total_received_count);
a83c7e95 573use vars qw($total_delivered_data $total_delivered_data_gigs $total_delivered_messages $total_delivered_addresses);
d5692f86
SC
574use vars qw(%timestamp2time); #Hash of timestamp => time.
575use vars qw($last_timestamp $last_time); #The last time convertion done.
576use vars qw($last_date $date_seconds); #The last date convertion done.
577use vars qw($last_offset $offset_seconds); #The last time offset convertion done.
059ec3d9 578use vars qw($localtime_offset);
d5692f86
SC
579use vars qw($i); #General loop counter.
580use vars qw($debug); #Debug mode?
581use vars qw($ntopchart); #How many entries should make it into the chart?
582use vars qw($gddirectory); #Where to put files from GD::Graph
a83c7e95
SC
583
584# SpamAssassin variables
585use vars qw($spam_score $spam_score_gigs);
586use vars qw($ham_score $ham_score_gigs);
587use vars qw(%ham_count_by_ip %spam_count_by_ip);
588use vars qw(%rejected_count_by_ip %rejected_count_by_reason);
589
590#For use in Speadsheed::WriteExcel
591use vars qw($workbook $ws_global $ws_relayed $ws_errors);
592use vars qw($row $col $row_hist $col_hist);
d5692f86 593use vars qw($run_hist);
a83c7e95 594use vars qw($f_default $f_header1 $f_header2 $f_header2_m $f_headertab $f_percent); #Format Header
d5692f86
SC
595
596# Output FileHandles
597use vars qw($txt_fh $htm_fh $xls_fh);
059ec3d9
PH
598
599$ntopchart = 5;
600
601# The following are parameters whose values are
602# set by command line switches:
603use vars qw($show_errors $show_relay $show_transport $transport_pattern);
604use vars qw($topcount $local_league_table $include_remote_users);
a83c7e95 605use vars qw($hist_opt $hist_interval $hist_number $volume_rounding $emptyOK);
d5692f86 606use vars qw($relay_pattern @queue_times @user_patterns @user_descriptions);
608bc29d 607use vars qw(@rcpt_times @delivery_times);
d5692f86
SC
608use vars qw($include_original_destination);
609use vars qw($txt_fh $htm_fh $xls_fh);
059ec3d9
PH
610
611use vars qw(%do_sender); #Do sender by Host, Domain, Email, and/or Edomain tables.
612use vars qw($charts $chartrel $chartdir $charts_option_specified);
d5692f86 613use vars qw($merge_reports); #Merge old reports ?
059ec3d9
PH
614
615# The following are modified in the parse() routine, and
616# referred to in the print_*() routines.
608bc29d 617use vars qw($delayed_count $relayed_unshown $begin $end);
a83c7e95 618use vars qw(%messages @message);
059ec3d9 619use vars qw(%received_count %received_data %received_data_gigs);
a83c7e95 620use vars qw(%delivered_messages %delivered_data %delivered_data_gigs %delivered_addresses);
059ec3d9 621use vars qw(%received_count_user %received_data_user %received_data_gigs_user);
a83c7e95 622use vars qw(%delivered_messages_user %delivered_addresses_user %delivered_data_user %delivered_data_gigs_user);
059ec3d9 623use vars qw(%transported_count %transported_data %transported_data_gigs);
608bc29d
SC
624use vars qw(%relayed %errors_count $message_errors);
625use vars qw(@qt_all_bin @qt_remote_bin);
626use vars qw($qt_all_overflow $qt_remote_overflow);
627use vars qw(@dt_all_bin @dt_remote_bin %rcpt_times_bin);
628use vars qw($dt_all_overflow $dt_remote_overflow %rcpt_times_overflow);
629use vars qw(@received_interval_count @delivered_interval_count);
630use vars qw(@user_pattern_totals @user_pattern_interval_count);
059ec3d9
PH
631
632use vars qw(%report_totals);
633
608bc29d
SC
634# Enumerations
635use vars qw($SIZE $FROM_HOST $FROM_ADDRESS $ARRIVAL_TIME $REMOTE_DELIVERED $PROTOCOL);
636use vars qw($DELAYED $HAD_ERROR);
637$SIZE = 0;
638$FROM_HOST = 1;
639$FROM_ADDRESS = 2;
640$ARRIVAL_TIME = 3;
641$REMOTE_DELIVERED = 4;
642$DELAYED = 5;
643$HAD_ERROR = 6;
644$PROTOCOL = 7;
059ec3d9
PH
645
646
647
648##################################################
649# Subroutines #
650##################################################
651
d5692f86
SC
652#######################################################################
653# get_filehandle($file,\%output_files);
654# Return a filehandle writing to $file.
655#
656# If %output_files is defined, check that $output_files{$file}
657# doesn't exist and die if it does, or set it if it doesn't.
658#######################################################################
659sub get_filehandle {
660 my($file,$output_files_href) = @_;
661
662 $file = '-' if ($file eq '');
663
664 if (defined $output_files_href) {
665 die "You can only output to '$file' once! Use -h for help.\n" if exists $output_files_href->{$file};
666 $output_files_href->{$file} = 1;
667 }
668
669 if ($file eq '-') {
670 return \*STDOUT;
671 }
672
673 if (-e $file) {
674 unlink $file or die "Failed to rm $file: $!";
675 }
676
677 my $fh = new IO::File $file, O_WRONLY|O_CREAT|O_EXCL;
678 die "new IO::File $file failed: $!" unless (defined $fh);
679 return $fh;
680}
681
059ec3d9 682
1b4fe9dd
PH
683#######################################################################
684# volume_rounded();
685#
686# $rounded_volume = volume_rounded($bytes,$gigabytes);
687#
688# Given a data size in bytes, round it to KB, MB, or GB
689# as appropriate.
690#
691# Eg 12000 => 12KB, 15000000 => 14GB, etc.
692#
693# Note: I've experimented with Math::BigInt and it results in a 33%
694# performance degredation as opposed to storing numbers split into
695# bytes and gigabytes.
696#######################################################################
059ec3d9
PH
697sub volume_rounded {
698 my($x,$g) = @_;
699 $x = 0 unless $x;
700 $g = 0 unless $g;
701 my($rounded);
702
703 while ($x > $gig) {
704 $g++;
705 $x -= $gig;
706 }
707
708 if ($volume_rounding) {
709 # Values < 1 GB
710 if ($g <= 0) {
711 if ($x < 10000) {
712 $rounded = sprintf("%6d", $x);
713 }
714 elsif ($x < 10000000) {
715 $rounded = sprintf("%4dKB", ($x + 512)/1024);
716 }
717 else {
718 $rounded = sprintf("%4dMB", ($x + 512*1024)/(1024*1024));
719 }
720 }
721 # Values between 1GB and 10GB are printed in MB
722 elsif ($g < 10) {
723 $rounded = sprintf("%4dMB", ($g * 1024) + ($x + 512*1024)/(1024*1024));
724 }
725 else {
726 # Handle values over 10GB
727 $rounded = sprintf("%4dGB", $g + ($x + $gig/2)/$gig);
728 }
729 }
730 else {
731 # We don't want any rounding to be done.
d5692f86
SC
732 # and we don't need broken formated output which on one hand avoids numbers from
733 # being interpreted as string by Spreadsheed Calculators, on the other hand
734 # breaks if more than 4 digits! -> flexible length instead of fixed length
735 # Format the return value at the output routine! -fh
736 #$rounded = sprintf("%d", ($g * $gig) + $x);
737 no integer;
738 $rounded = sprintf("%.0f", ($g * $gig) + $x);
059ec3d9
PH
739 }
740
741 return $rounded;
742}
743
744
1b4fe9dd
PH
745#######################################################################
746# un_round();
8e669ac1 747#
1b4fe9dd 748# un_round($rounded_volume,\$bytes,\$gigabytes);
8e669ac1 749#
1b4fe9dd
PH
750# Given a volume in KB, MB or GB, as generated by volume_rounded(),
751# do the reverse transformation and convert it back into Bytes and Gigabytes.
752# These are added to the $bytes and $gigabytes parameters.
8e669ac1 753#
1b4fe9dd
PH
754# Given a data size in bytes, round it to KB, MB, or GB
755# as appropriate.
8e669ac1 756#
1b4fe9dd
PH
757# EG: 500 => (500,0), 14GB => (0,14), etc.
758#######################################################################
059ec3d9
PH
759sub un_round {
760 my($rounded,$bytes_sref,$gigabytes_sref) = @_;
761
762 if ($rounded =~ /(\d+)GB/) {
763 $$gigabytes_sref += $1;
764 }
765 elsif ($rounded =~ /(\d+)MB/) {
766 $$gigabytes_sref += $1 / 1024;
767 $$bytes_sref += (($1 % 1024 ) * 1024 * 1024);
768 }
769 elsif ($rounded =~ /(\d+)KB/) {
770 $$gigabytes_sref += $1 / (1024 * 1024);
771 $$bytes_sref += ($1 % (1024 * 1024) * 1024);
772 }
773 elsif ($rounded =~ /(\d+)/) {
d5692f86
SC
774 # We need to turn off integer in case we are merging an -nvr report.
775 no integer;
776 $$gigabytes_sref += int($1 / $gig);
059ec3d9
PH
777 $$bytes_sref += $1 % $gig;
778 }
779
780 #Now reduce the bytes down to less than 1GB.
781 add_volume($bytes_sref,$gigabytes_sref,0) if ($$bytes_sref > $gig);
782}
783
784
1b4fe9dd
PH
785#######################################################################
786# add_volume();
8e669ac1 787#
1b4fe9dd 788# add_volume(\$bytes,\$gigs,$size);
8e669ac1 789#
1b4fe9dd
PH
790# Add $size to $bytes/$gigs where this is a number split into
791# bytes ($bytes) and gigabytes ($gigs). This is significantly
792# faster than using Math::BigInt.
793#######################################################################
059ec3d9 794sub add_volume {
1b4fe9dd
PH
795 my($bytes_ref,$gigs_ref,$size) = @_;
796 $$bytes_ref = 0 if ! defined $$bytes_ref;
797 $$gigs_ref = 0 if ! defined $$gigs_ref;
798 $$bytes_ref += $size;
799 while ($$bytes_ref > $gig) {
800 $$gigs_ref++;
801 $$bytes_ref -= $gig;
059ec3d9
PH
802 }
803}
804
805
1b4fe9dd
PH
806#######################################################################
807# format_time();
8e669ac1 808#
1b4fe9dd 809# $formatted_time = format_time($seconds);
8e669ac1 810#
1b4fe9dd
PH
811# Given a time in seconds, break it down into
812# weeks, days, hours, minutes, and seconds.
8e669ac1 813#
1b4fe9dd
PH
814# Eg 12005 => 3h20m5s
815#######################################################################
059ec3d9
PH
816sub format_time {
817my($t) = pop @_;
818my($s) = $t % 60;
819$t /= 60;
820my($m) = $t % 60;
821$t /= 60;
822my($h) = $t % 24;
823$t /= 24;
824my($d) = $t % 7;
825my($w) = $t/7;
826my($p) = "";
827$p .= "$w"."w" if $w > 0;
828$p .= "$d"."d" if $d > 0;
829$p .= "$h"."h" if $h > 0;
830$p .= "$m"."m" if $m > 0;
831$p .= "$s"."s" if $s > 0 || $p eq "";
832$p;
833}
834
835
1b4fe9dd
PH
836#######################################################################
837# unformat_time();
8e669ac1 838#
1b4fe9dd 839# $seconds = unformat_time($formatted_time);
8e669ac1 840#
1b4fe9dd 841# Given a time in weeks, days, hours, minutes, or seconds, convert it to seconds.
8e669ac1 842#
1b4fe9dd
PH
843# Eg 3h20m5s => 12005
844#######################################################################
059ec3d9
PH
845sub unformat_time {
846 my($formated_time) = pop @_;
847 my $time = 0;
848
849 while ($formated_time =~ s/^(\d+)([wdhms]?)//) {
850 $time += $1 if ($2 eq '' || $2 eq 's');
851 $time += $1 * 60 if ($2 eq 'm');
852 $time += $1 * 60 * 60 if ($2 eq 'h');
853 $time += $1 * 60 * 60 * 24 if ($2 eq 'd');
854 $time += $1 * 60 * 60 * 24 * 7 if ($2 eq 'w');
855 }
856 $time;
857}
858
859
1b4fe9dd
PH
860#######################################################################
861# seconds();
8e669ac1 862#
1b4fe9dd 863# $time = seconds($timestamp);
8e669ac1 864#
1b4fe9dd
PH
865# Given a time-of-day timestamp, convert it into a time() value using
866# POSIX::mktime. We expect the timestamp to be of the form
867# "$year-$mon-$day $hour:$min:$sec", with month going from 1 to 12,
868# and the year to be absolute (we do the necessary conversions). The
869# timestamp may be followed with an offset from UTC like "+$hh$mm"; if the
870# offset is not present, and we have not been told that the log is in UTC
871# (with the -utc option), then we adjust the time by the current local
872# time offset so that it can be compared with the time recorded in message
873# IDs, which is UTC.
8e669ac1 874#
1b4fe9dd
PH
875# To improve performance, we only use mktime on the date ($year-$mon-$day),
876# and only calculate it if the date is different to the previous time we
877# came here. We then add on seconds for the '$hour:$min:$sec'.
8e669ac1 878#
1b4fe9dd
PH
879# We also store the results of the last conversion done, and only
880# recalculate if the date is different.
8e669ac1 881#
1b4fe9dd
PH
882# We used to have the '-cache' flag which would store the results of the
883# mktime() call. However, the current way of just using mktime() on the
884# date obsoletes this.
885#######################################################################
059ec3d9
PH
886sub seconds {
887 my($timestamp) = @_;
888
889 # Is the timestamp the same as the last one?
890 return $last_time if ($last_timestamp eq $timestamp);
891
892 return 0 unless ($timestamp =~ /^((\d{4})\-(\d\d)-(\d\d))\s(\d\d):(\d\d):(\d\d)( ([+-])(\d\d)(\d\d))?/o);
893
894 unless ($last_date eq $1) {
895 $last_date = $1;
896 my(@timestamp) = (0,0,0,$4,$3,$2);
897 $timestamp[5] -= 1900;
898 $timestamp[4]--;
899 $date_seconds = mktime(@timestamp);
900 }
901 my $time = $date_seconds + ($5 * 3600) + ($6 * 60) + $7;
902
903 # SC. Use cacheing. Also note we want seconds not minutes.
904 #my($this_offset) = ($10 * 60 + $11) * ($9 . "1") if defined $8;
905 if (defined $8 && ($8 ne $last_offset)) {
906 $last_offset = $8;
907 $offset_seconds = ($10 * 60 + $11) * 60;
908 $offset_seconds = -$offset_seconds if ($9 eq '-');
909 }
910
911
912 if (defined $7) {
913 #$time -= $this_offset;
914 $time -= $offset_seconds;
915 } elsif (defined $localtime_offset) {
916 $time -= $localtime_offset;
917 }
918
919 # Store the last timestamp received.
920 $last_timestamp = $timestamp;
921 $last_time = $time;
922
923 $time;
924}
925
926
1b4fe9dd
PH
927#######################################################################
928# id_seconds();
8e669ac1 929#
1b4fe9dd 930# $time = id_seconds($message_id);
8e669ac1 931#
1b4fe9dd
PH
932# Given a message ID, convert it into a time() value.
933#######################################################################
059ec3d9
PH
934sub id_seconds {
935my($sub_id) = substr((pop @_), 0, 6);
936my($s) = 0;
937my(@c) = split(//, $sub_id);
938while($#c >= 0) { $s = $s * 62 + $tab62[ord(shift @c) - ord('0')] }
939$s;
940}
941
608bc29d
SC
942#######################################################################
943# wdhms_seconds();
944#
945# $seconds = wdhms_seconds($string);
946#
947# Convert a string in a week/day/hour/minute/second format (eg 4h10s)
948# into seconds.
949#######################################################################
950sub wdhms_seconds {
951 if ($_[0] =~ /^(?:(\d+)w)?(?:(\d+)d)?(?:(\d+)h)?(?:(\d+)m)?(?:(\d+)s)?/) {
952 return((($1||0) * $WEEK) + (($2||0) * $DAY) + (($3||0) * $HOUR) + (($4||0) * $MINUTE) + ($5||0));
953 }
954 return undef;
955}
956
957#######################################################################
958# queue_time();
959#
960# $queued = queue_time($completed_tod, $arrival_time, $id);
961#
962# Given the completed time of day and either the arrival time
963# (preferred), or the message ID, calculate how long the message has
964# been on the queue.
965#
966#######################################################################
967sub queue_time {
968 my($completed_tod, $arrival_time, $id) = @_;
969
970 # Note: id_seconds() benchmarks as 42% slower than seconds()
971 # and computing the time accounts for a significant portion of
972 # the run time.
973 if (defined $arrival_time) {
974 return(seconds($completed_tod) - seconds($arrival_time));
975 }
976 else {
977 return(seconds($completed_tod) - id_seconds($id));
978 }
979}
059ec3d9
PH
980
981
1b4fe9dd
PH
982#######################################################################
983# calculate_localtime_offset();
8e669ac1 984#
1b4fe9dd 985# $localtime_offset = calculate_localtime_offset();
8e669ac1 986#
1b4fe9dd 987# Calculate the the localtime offset from gmtime in seconds.
8e669ac1 988#
1b4fe9dd 989# $localtime = time() + $localtime_offset.
8e669ac1 990#
1b4fe9dd
PH
991# These are the same semantics as ISO 8601 and RFC 2822 timezone offsets.
992# (West is negative, East is positive.)
993#######################################################################
059ec3d9
PH
994
995# $localtime = gmtime() + $localtime_offset. OLD COMMENT
996# This subroutine commented out as it's not currently in use.
997
998#sub calculate_localtime_offset {
999# # Pick an arbitrary date, convert it to localtime & gmtime, and return the difference.
1000# my (@sample_date) = (0,0,0,5,5,100);
1001# my $localtime = timelocal(@sample_date);
1002# my $gmtime = timegm(@sample_date);
1003# my $offset = $localtime - $gmtime;
1004# return $offset;
1005#}
1006
1007sub calculate_localtime_offset {
1008 # Assume that the offset at the moment is valid across the whole
1009 # period covered by the logs that we're analysing. This may not
1010 # be true around the time the clocks change in spring or autumn.
1011 my $utc = time;
1012 # mktime works on local time and gmtime works in UTC
1013 my $local = mktime(gmtime($utc));
1014 return $local - $utc;
1015}
1016
1017
608bc29d 1018
1b4fe9dd 1019#######################################################################
608bc29d
SC
1020# print_duration_table();
1021#
1022# print_duration_table($title, $message_type, \@times, \@values, $overflow);
1023#
1024# Print a table showing how long a particular step took for
1025# the messages. The parameters are:
1026# $title Eg "Time spent on the queue"
1027# $message_type Eg "Remote"
1028# \@times The maximum time a message took for it to increment
1029# the corresponding @values counter.
1030# \@values An array of message counters.
1031# $overflow The number of messages which exceeded the maximum
1032# time.
1b4fe9dd 1033#######################################################################
608bc29d 1034sub print_duration_table {
059ec3d9 1035no integer;
608bc29d 1036my($title, $message_type, $times_aref, $values_aref, $overflow) = @_;
059ec3d9
PH
1037my(@chartdatanames);
1038my(@chartdatavals);
1039
1040my $printed_one = 0;
1041my $cumulative_percent = 0;
059ec3d9 1042
608bc29d
SC
1043my $queue_total = $overflow;
1044map {$queue_total += $_} @$values_aref;
059ec3d9 1045
608bc29d 1046my $temp = "$title: $message_type";
059ec3d9 1047
d5692f86
SC
1048
1049my $txt_format = "%5s %4s %6d %5.1f%% %5.1f%%\n";
1050my $htm_format = "<tr><td align=\"right\">%s %s</td><td align=\"right\">%d</td><td align=\"right\">%5.1f%%</td><td align=\"right\">%5.1f%%</td>\n";
1051
1052# write header
1053printf $txt_fh ("%s\n%s\n\n", $temp, "-" x length($temp)) if $txt_fh;
1054if ($htm_fh) {
608bc29d 1055 print $htm_fh "<hr><a name=\"$title $message_type\"></a><h2>$temp</h2>\n";
a83c7e95 1056 print $htm_fh "<table border=0 width=\"100%\"><tr><td><table border=1>\n";
d5692f86 1057 print $htm_fh "<tr><th>Time</th><th>Messages</th><th>Percentage</th><th>Cumulative Percentage</th>\n";
059ec3d9 1058}
a83c7e95 1059if ($xls_fh) {
608bc29d 1060 $ws_global->write($row++, $col, "$title: ".$message_type, $f_header2);
d5692f86
SC
1061 my @content=("Time", "Messages", "Percentage", "Cumulative Percentage");
1062 &set_worksheet_line($ws_global, $row++, 1, \@content, $f_headertab);
059ec3d9
PH
1063}
1064
d5692f86 1065
608bc29d
SC
1066for ($i = 0; $i <= $#$times_aref; ++$i) {
1067 if ($$values_aref[$i] > 0)
059ec3d9 1068 {
608bc29d 1069 my $percent = ($values_aref->[$i] * 100)/$queue_total;
059ec3d9 1070 $cumulative_percent += $percent;
d5692f86
SC
1071
1072 my @content=($printed_one? " " : "Under",
608bc29d
SC
1073 format_time($times_aref->[$i]),
1074 $values_aref->[$i], $percent, $cumulative_percent);
d5692f86
SC
1075
1076 if ($htm_fh) {
1077 printf $htm_fh ($htm_format, @content);
608bc29d 1078 if (!defined($values_aref->[$i])) {
d5692f86
SC
1079 print $htm_fh "Not defined";
1080 }
1081 }
1082 if ($txt_fh) {
1083 printf $txt_fh ($txt_format, @content);
608bc29d 1084 if (!defined($times_aref->[$i])) {
d5692f86
SC
1085 print $txt_fh "Not defined";
1086 }
1087 }
1088 if ($xls_fh)
1089 {
1090 no integer;
1091 &set_worksheet_line($ws_global, $row, 0, [@content[0,1,2]], $f_default);
1092 &set_worksheet_line($ws_global, $row++, 3, [$content[3]/100,$content[4]/100], $f_percent);
1093
608bc29d 1094 if (!defined($times_aref->[$i])) {
d5692f86
SC
1095 $col=0;
1096 $ws_global->write($row++, $col, "Not defined" );
1097 }
059ec3d9 1098 }
d5692f86 1099
059ec3d9 1100 push(@chartdatanames,
608bc29d
SC
1101 ($printed_one? "" : "Under") . format_time($times_aref->[$i]));
1102 push(@chartdatavals, $$values_aref[$i]);
059ec3d9
PH
1103 $printed_one = 1;
1104 }
1105}
1106
608bc29d
SC
1107if ($overflow && $overflow > 0) {
1108 my $percent = ($overflow * 100)/$queue_total;
059ec3d9 1109 $cumulative_percent += $percent;
d5692f86 1110
608bc29d
SC
1111 my @content = ("Over ", format_time($times_aref->[-1]),
1112 $overflow, $percent, $cumulative_percent);
d5692f86
SC
1113
1114 printf $txt_fh ($txt_format, @content) if $txt_fh;
1115 printf $htm_fh ($htm_format, @content) if $htm_fh;
1116 if ($xls_fh)
1117 {
1118 &set_worksheet_line($ws_global, $row, 0, [@content[0,1,2]], $f_default);
1119 &set_worksheet_line($ws_global, $row++, 3, [$content[3]/100,$content[4]/100], $f_percent);
1120 }
1121
059ec3d9 1122}
d5692f86 1123
608bc29d
SC
1124push(@chartdatanames, "Over " . format_time($times_aref->[-1]));
1125push(@chartdatavals, $overflow);
059ec3d9
PH
1126
1127#printf("Unknown %6d\n", $queue_unknown) if $queue_unknown > 0;
d5692f86 1128if ($htm_fh) {
a83c7e95 1129 print $htm_fh "</table></td><td>";
059ec3d9 1130
608bc29d 1131 if ($HAVE_GD_Graph_pie && $charts && ($#chartdatavals > 0)) {
059ec3d9
PH
1132 my @data = (
1133 \@chartdatanames,
1134 \@chartdatavals
1135 );
1136 my $graph = GD::Graph::pie->new(200, 200);
608bc29d
SC
1137 my $pngname = "$title-$message_type.png";
1138 $pngname =~ s/[^\w\-\.]/_/;
1139
1140 my $graph_title = "$title ($message_type)";
1141 $graph->set(title => $graph_title) if (length($graph_title) < 21);
1142
059ec3d9
PH
1143 my $gd = $graph->plot(\@data) or warn($graph->error);
1144 if ($gd) {
d5692f86 1145 open(IMG, ">$chartdir/$pngname") or die "Could not write $chartdir/$pngname: $!\n";
059ec3d9
PH
1146 binmode IMG;
1147 print IMG $gd->png;
1148 close IMG;
d5692f86 1149 print $htm_fh "<img src=\"$chartrel/$pngname\">";
059ec3d9
PH
1150 }
1151 }
d5692f86
SC
1152 print $htm_fh "</td></tr></table>\n";
1153}
1154
1155if ($xls_fh)
1156{
1157 $row++;
059ec3d9 1158}
d5692f86
SC
1159print $txt_fh "\n" if $txt_fh;
1160print $htm_fh "\n" if $htm_fh;
1161
059ec3d9
PH
1162}
1163
1164
1b4fe9dd
PH
1165#######################################################################
1166# print_histogram();
8e669ac1 1167#
608bc29d 1168# print_histogram('Deliveries|Messages received|$pattern', $unit, @interval_count);
8e669ac1 1169#
1b4fe9dd
PH
1170# Print a histogram of the messages delivered/received per time slot
1171# (hour by default).
1172#######################################################################
059ec3d9 1173sub print_histogram {
608bc29d 1174my($text, $unit, @interval_count) = @_;
059ec3d9
PH
1175my(@chartdatanames);
1176my(@chartdatavals);
1177my($maxd) = 0;
d5692f86 1178
a83c7e95
SC
1179# save first row of print_histogram for xls output
1180if (!$run_hist) {
d5692f86
SC
1181 $row_hist = $row;
1182}
a83c7e95 1183else {
d5692f86
SC
1184 $row = $row_hist;
1185}
1186
059ec3d9
PH
1187for ($i = 0; $i < $hist_number; $i++)
1188 { $maxd = $interval_count[$i] if $interval_count[$i] > $maxd; }
1189
1190my $scale = int(($maxd + 25)/50);
1191$scale = 1 if $scale == 0;
1192
608bc29d
SC
1193if ($scale != 1) {
1194 if ($unit !~ s/y$/ies/) {
1195 $unit .= 's';
1196 }
d5692f86
SC
1197}
1198
1199# make and output title
1200my $title = sprintf("$text per %s",
1201 ($hist_interval == 60)? "hour" :
1202 ($hist_interval == 1)? "minute" : "$hist_interval minutes");
059ec3d9 1203
608bc29d 1204my $txt_htm_title = $title . " (each dot is $scale $unit)";
059ec3d9 1205
d5692f86
SC
1206printf $txt_fh ("%s\n%s\n\n", $txt_htm_title, "-" x length($txt_htm_title)) if $txt_fh;
1207
1208if ($htm_fh) {
1209 print $htm_fh "<hr><a name=\"$text\"></a><h2>$txt_htm_title</h2>\n";
1210 print $htm_fh "<table border=0 width=\"100%\">\n";
1211 print $htm_fh "<tr><td><pre>\n";
059ec3d9 1212}
d5692f86 1213
a83c7e95 1214if ($xls_fh) {
d5692f86 1215 $title =~ s/Messages/Msg/ ;
a83c7e95 1216 $row += 2;
d5692f86 1217 $ws_global->write($row++, $col_hist+1, $title, $f_headertab);
059ec3d9
PH
1218}
1219
d5692f86 1220
059ec3d9
PH
1221my $hour = 0;
1222my $minutes = 0;
a83c7e95 1223for ($i = 0; $i < $hist_number; $i++) {
059ec3d9
PH
1224 my $c = $interval_count[$i];
1225
1226 # If the interval is an hour (the maximum) print the starting and
1227 # ending hours as a label. Otherwise print the starting hour and
1228 # minutes, which take up the same space.
1229
1230 my $temp;
a83c7e95 1231 if ($hist_opt == 1) {
059ec3d9 1232 $temp = sprintf("%02d-%02d", $hour, $hour + 1);
d5692f86
SC
1233
1234 print $txt_fh $temp if $txt_fh;
1235 print $htm_fh $temp if $htm_fh;
1236
a83c7e95
SC
1237 if ($xls_fh) {
1238 if ($run_hist==0) {
1239 # only on first run
1240 $ws_global->write($row, 0, [$temp], $f_default);
d5692f86
SC
1241 }
1242 }
1243
059ec3d9
PH
1244 push(@chartdatanames, $temp);
1245 $hour++;
d5692f86 1246 }
a83c7e95 1247 else {
059ec3d9
PH
1248 if ($minutes == 0)
1249 { $temp = sprintf("%02d:%02d", $hour, $minutes) }
1250 else
1251 { $temp = sprintf(" :%02d", $minutes) }
d5692f86
SC
1252
1253 print $txt_fh $temp if $txt_fh;
1254 print $htm_fh $temp if $htm_fh;
a83c7e95
SC
1255 if (($xls_fh) and ($run_hist==0)) {
1256 # only on first run
d5692f86 1257 $temp = sprintf("%02d:%02d", $hour, $minutes);
a83c7e95 1258 $ws_global->write($row, 0, [$temp], $f_default);
d5692f86
SC
1259 }
1260
059ec3d9
PH
1261 push(@chartdatanames, $temp);
1262 $minutes += $hist_interval;
a83c7e95 1263 if ($minutes >= 60) {
059ec3d9
PH
1264 $minutes = 0;
1265 $hour++;
a83c7e95 1266 }
059ec3d9 1267 }
d5692f86
SC
1268 push(@chartdatavals, $c);
1269
1270 printf $txt_fh (" %6d %s\n", $c, "." x ($c/$scale)) if $txt_fh;
1271 printf $htm_fh (" %6d %s\n", $c, "." x ($c/$scale)) if $htm_fh;
a83c7e95 1272 $ws_global->write($row++, $col_hist+1, [$c], $f_default) if $xls_fh;
d5692f86
SC
1273
1274} #end for
1275
1276printf $txt_fh "\n" if $txt_fh;
1277printf $htm_fh "\n" if $htm_fh;
1278
1279if ($htm_fh)
1280{
1281 print $htm_fh "</pre>\n";
1282 print $htm_fh "</td><td>\n";
608bc29d 1283 if ($HAVE_GD_Graph_linespoints && $charts && ($#chartdatavals > 0)) {
059ec3d9
PH
1284 # calculate the graph
1285 my @data = (
1286 \@chartdatanames,
1287 \@chartdatavals
1288 );
1289 my $graph = GD::Graph::linespoints->new(300, 300);
1290 $graph->set(
1291 x_label => 'Time',
1292 y_label => 'Amount',
1293 title => $text,
1294 x_labels_vertical => 1
1295 );
608bc29d
SC
1296 my $pngname = "histogram_$text.png";
1297 $pngname =~ s/[^\w\._]/_/g;
1298
059ec3d9
PH
1299 my $gd = $graph->plot(\@data) or warn($graph->error);
1300 if ($gd) {
d5692f86 1301 open(IMG, ">$chartdir/$pngname") or die "Could not write $chartdir/$pngname: $!\n";
059ec3d9
PH
1302 binmode IMG;
1303 print IMG $gd->png;
1304 close IMG;
d5692f86 1305 print $htm_fh "<img src=\"$chartrel/$pngname\">";
059ec3d9
PH
1306 }
1307 }
d5692f86 1308 print $htm_fh "</td></tr></table>\n";
059ec3d9 1309}
d5692f86
SC
1310
1311$col_hist++; # where to continue next times
1312
1313$row+=2; # leave some space after history block
1314$run_hist=1; # we have done this once or more
059ec3d9
PH
1315}
1316
1317
1318
1b4fe9dd
PH
1319#######################################################################
1320# print_league_table();
8e669ac1 1321#
a83c7e95 1322# print_league_table($league_table_type,\%message_count,\%address_count,\%message_data,\%message_data_gigs, $spreadsheet, $row_sref);
8e669ac1 1323#
a83c7e95
SC
1324# Given hashes of message count, address count, and message data,
1325# which are keyed by the table type (eg by the sending host), print a
1326# league table showing the top $topcount (defaults to 50).
1b4fe9dd 1327#######################################################################
059ec3d9 1328sub print_league_table {
a83c7e95
SC
1329 my($text,$m_count,$a_count,$m_data,$m_data_gigs,$spreadsheet, $row_sref) = @_;
1330 my($name) = ($topcount == 1)? "$text" : "$topcount ${text}s";
1331 my($title) = "Top $name by message count";
1332 my(@chartdatanames) = ();
1333 my(@chartdatavals) = ();
1334 my $chartotherval = 0;
1335 $text = ucfirst($text);
059ec3d9
PH
1336
1337 # Align non-local addresses to the right (so all the .com's line up).
1338 # Local addresses are aligned on the left as they are userids.
1339 my $align = ($text !~ /local/i) ? 'right' : 'left';
059ec3d9 1340
d5692f86 1341
a83c7e95
SC
1342 ################################################
1343 # Generate the printf formats and table headers.
1344 ################################################
1345 my(@headers) = ('Messages');
1346 push(@headers,'Addresses') if defined $a_count;
1347 push(@headers,'Bytes','Average') if defined $m_data;
d5692f86 1348
a83c7e95
SC
1349 my $txt_format = "%10s " x @headers . " %s\n";
1350 my $txt_col_headers = sprintf $txt_format, @headers, $text;
1351 my $htm_format = "<tr>" . '<td align="right">%s</td>'x@headers . "<td align=\"$align\" nowrap>%s</td></tr>\n";
1352 my $htm_col_headers = sprintf $htm_format, @headers, $text;
1353 $htm_col_headers =~ s/(<\/?)td/$1th/g; #Convert <td>'s to <th>'s for the header.
d5692f86 1354
a83c7e95
SC
1355
1356 ################################################
1357 # Write the table headers
1358 ################################################
1359 printf $txt_fh ("%s\n%s\n%s", $title, "-" x length($title),$txt_col_headers) if $txt_fh;
d5692f86
SC
1360
1361 if ($htm_fh) {
a83c7e95
SC
1362 print $htm_fh <<EoText;
1363<hr><a name="$text count"></a><h2>$title</h2>
1364<table border=0 width="100%">
1365<tr><td>
1366<table border=1>
1367EoText
1368 print $htm_col_headers;
059ec3d9 1369 }
d5692f86 1370
a83c7e95
SC
1371 if ($xls_fh) {
1372 $spreadsheet->write(${$row_sref}++, 0, $title, $f_header2);
1373 $spreadsheet->write(${$row_sref}++, 0, [@headers, $text], $f_headertab);
d5692f86 1374 }
a83c7e95
SC
1375
1376
1377 # write content
1378 foreach my $key (top_n_sort($topcount,$m_count,$m_data_gigs,$m_data)) {
1379
1380 # When displaying the average figures, we calculate the average of
1381 # the rounded data, as the user would calculate it. This reduces
1382 # the accuracy slightly, but we have to do it this way otherwise
1383 # when using -merge to convert results from text to HTML and
1384 # vice-versa discrepencies would occur.
1385 my $messages = $$m_count{$key};
1386 my @content = ($messages);
1387 push(@content, $$a_count{$key}) if defined $a_count;
1388 if (defined $m_data) {
1389 my $rounded_volume = volume_rounded($$m_data{$key},$$m_data_gigs{$key});
1390 my($data,$gigs) = (0,0);
1391 un_round($rounded_volume,\$data,\$gigs);
1392 my $rounded_average = volume_rounded($data/$messages,$gigs/$messages);
1393 push(@content, $rounded_volume, $rounded_average);
1394 }
1395
1396 # write content
1397 printf $txt_fh ($txt_format, @content, $key) if $txt_fh;
1398
1399 if ($htm_fh) {
1400 my $htmlkey = $key;
1401 $htmlkey =~ s/>/\&gt\;/g;
1402 $htmlkey =~ s/</\&lt\;/g;
1403 printf $htm_fh ($htm_format, @content, $htmlkey);
1404 }
1405 $spreadsheet->write(${$row_sref}++, 0, [@content, $key], $f_default) if $xls_fh;
1406
1407 if (scalar @chartdatanames < $ntopchart) {
1408 push(@chartdatanames, $key);
1409 push(@chartdatavals, $$m_count{$key});
1410 }
1411 else {
1412 $chartotherval += $$m_count{$key};
1413 }
059ec3d9 1414 }
d5692f86 1415
a83c7e95
SC
1416 push(@chartdatanames, "Other");
1417 push(@chartdatavals, $chartotherval);
059ec3d9 1418
a83c7e95
SC
1419 print $txt_fh "\n" if $txt_fh;
1420 if ($htm_fh) {
1421 print $htm_fh "</table>\n";
1422 print $htm_fh "</td><td>\n";
1423 if ($HAVE_GD_Graph_pie && $charts && ($#chartdatavals > 0))
1424 {
1425 # calculate the graph
1426 my @data = (
1427 \@chartdatanames,
1428 \@chartdatavals
1429 );
1430 my $graph = GD::Graph::pie->new(300, 300);
1431 $graph->set(
1432 x_label => 'Name',
1433 y_label => 'Amount',
1434 title => 'By count',
1435 );
1436 my $gd = $graph->plot(\@data) or warn($graph->error);
1437 if ($gd) {
1438 my $temp = $text;
1439 $temp =~ s/ /_/g;
1440 open(IMG, ">$chartdir/${temp}_count.png") or die "Could not write $chartdir/${temp}_count.png: $!\n";
1441 binmode IMG;
1442 print IMG $gd->png;
1443 close IMG;
1444 print $htm_fh "<img src=\"$chartrel/${temp}_count.png\">";
1445 }
059ec3d9 1446 }
a83c7e95
SC
1447 print $htm_fh "</td><td>\n";
1448 print $htm_fh "</td></tr></table>\n\n";
059ec3d9 1449 }
a83c7e95 1450 ++${$row_sref} if $xls_fh;
d5692f86
SC
1451
1452
a83c7e95
SC
1453 if (defined $m_data) {
1454 # write header
059ec3d9 1455
a83c7e95 1456 $title = "Top $name by volume";
d5692f86 1457
a83c7e95 1458 printf $txt_fh ("%s\n%s\n%s", $title, "-" x length($title),$txt_col_headers) if $txt_fh;
059ec3d9 1459
a83c7e95
SC
1460 if ($htm_fh) {
1461 print $htm_fh <<EoText;
1462<hr><a name="$text volume"></a><h2>$title</h2>
1463<table border=0 width="100%">
1464<tr><td>
1465<table border=1>
1466EoText
1467 print $htm_col_headers;
1468 }
1469 if ($xls_fh) {
1470 $spreadsheet->write(${$row_sref}++, 0, $title, $f_header2);
1471 $spreadsheet->write(${$row_sref}++, 0, [@headers, $text], $f_headertab);
1472 }
1473
1474 @chartdatanames = ();
1475 @chartdatavals = ();
1476 $chartotherval = 0;
1477 my $use_gig = 0;
1478 foreach my $key (top_n_sort($topcount,$m_data_gigs,$m_data,$m_count)) {
1479 # The largest volume will be the first (top of the list).
1480 # If it has at least 1 gig, then just use gigabytes to avoid
1481 # risking an integer overflow when generating the pie charts.
1482 if ($$m_data_gigs{$key}) {
1483 $use_gig = 1;
1484 }
d5692f86 1485
a83c7e95
SC
1486 my $messages = $$m_count{$key};
1487 my @content = ($messages);
1488 push(@content, $$a_count{$key}) if defined $a_count;
1489 my $rounded_volume = volume_rounded($$m_data{$key},$$m_data_gigs{$key});
1490 my($data ,$gigs) = (0,0);
1491 un_round($rounded_volume,\$data,\$gigs);
1492 my $rounded_average = volume_rounded($data/$messages,$gigs/$messages);
1493 push(@content, $rounded_volume, $rounded_average );
1494
1495 # write content
1496 printf $txt_fh ($txt_format, @content, $key) if $txt_fh;
1497 if ($htm_fh) {
1498 my $htmlkey = $key;
1499 $htmlkey =~ s/>/\&gt\;/g;
1500 $htmlkey =~ s/</\&lt\;/g;
1501 printf $htm_fh ($htm_format, @content, $htmlkey);
1502 }
1503 $spreadsheet->write(${$row_sref}++, 0, [@content, $key], $f_default) if $xls_fh;
059ec3d9 1504
d5692f86 1505
a83c7e95
SC
1506 if (scalar @chartdatanames < $ntopchart) {
1507 if ($use_gig) {
1508 if ($$m_data_gigs{$key}) {
1509 push(@chartdatanames, $key);
1510 push(@chartdatavals, $$m_data_gigs{$key});
1511 }
1512 }
1513 else {
1514 push(@chartdatanames, $key);
1515 push(@chartdatavals, $$m_data{$key});
1516 }
1517 }
1518 else {
1519 $chartotherval += ($use_gig) ? $$m_data_gigs{$key} : $$m_data{$key};
8974000d 1520 }
059ec3d9 1521 }
a83c7e95
SC
1522 push(@chartdatanames, "Other");
1523 push(@chartdatavals, $chartotherval);
059ec3d9 1524
a83c7e95
SC
1525 print $txt_fh "\n" if $txt_fh;
1526 if ($htm_fh) {
1527 print $htm_fh "</table>\n";
1528 print $htm_fh "</td><td>\n";
1529 if ($HAVE_GD_Graph_pie && $charts && ($#chartdatavals > 0)) {
1530 # calculate the graph
1531 my @data = (
1532 \@chartdatanames,
1533 \@chartdatavals
1534 );
1535 my $graph = GD::Graph::pie->new(300, 300);
1536 $graph->set(
1537 x_label => 'Name',
1538 y_label => 'Volume' ,
1539 title => 'By Volume',
1540 );
1541 my $gd = $graph->plot(\@data) or warn($graph->error);
1542 if ($gd) {
1543 my $temp = $text;
1544 $temp =~ s/ /_/g;
1545 open(IMG, ">$chartdir/${temp}_volume.png") or die "Could not write $chartdir/${temp}_volume.png: $!\n";
1546 binmode IMG;
1547 print IMG $gd->png;
1548 close IMG;
1549 print $htm_fh "<img src=\"$chartrel/${temp}_volume.png\">";
1550 }
1551 }
1552 print $htm_fh "</td><td>\n";
1553 print $htm_fh "</td></tr></table>\n\n";
059ec3d9 1554 }
a83c7e95
SC
1555
1556 ++${$row_sref} if $xls_fh;
059ec3d9 1557 }
059ec3d9
PH
1558}
1559
1560
1b4fe9dd
PH
1561#######################################################################
1562# top_n_sort();
8e669ac1 1563#
1b4fe9dd 1564# @sorted_keys = top_n_sort($n,$href1,$href2,$href3);
8e669ac1 1565#
1b4fe9dd
PH
1566# Given a hash which has numerical values, return the sorted $n keys which
1567# point to the top values. The second and third hashes are used as
1568# tiebreakers. They all must have the same keys.
8e669ac1 1569#
1b4fe9dd
PH
1570# The idea behind this routine is that when you only want to see the
1571# top n members of a set, rather than sorting the entire set and then
1572# plucking off the top n, sort through the stack as you go, discarding
1573# any member which is lower than your current n'th highest member.
8e669ac1 1574#
1b4fe9dd
PH
1575# This proves to be an order of magnitude faster for large hashes.
1576# On 200,000 lines of mainlog it benchmarked 9 times faster.
1577# On 700,000 lines of mainlog it benchmarked 13.8 times faster.
8e669ac1 1578#
1b4fe9dd
PH
1579# We assume the values are > 0.
1580#######################################################################
059ec3d9
PH
1581sub top_n_sort {
1582 my($n,$href1,$href2,$href3) = @_;
1583
1584 # PH's original sort was:
1585 #
1586 # foreach $key (sort
1587 # {
1588 # $$m_count{$b} <=> $$m_count{$a} ||
1589 # $$m_data_gigs{$b} <=> $$m_data_gigs{$a} ||
1590 # $$m_data{$b} <=> $$m_data{$a} ||
1591 # $a cmp $b
1592 # }
1593 # keys %{$m_count})
1594 #
1595
1596 #We use a key of '_' to represent non-existant values, as null keys are valid.
1597 #'_' is not a valid domain, edomain, host, or email.
1598 my(@top_n_keys) = ('_') x $n;
1599 my($minimum_value1,$minimum_value2,$minimum_value3) = (0,0,0);
1600 my $top_n_key = '';
1601 my $n_minus_1 = $n - 1;
1602 my $n_minus_2 = $n - 2;
1603
a83c7e95
SC
1604 # Create a dummy hash incase the user has not provided us with
1605 # tiebreaker hashes.
1606 my(%dummy_hash);
1607 $href2 = \%dummy_hash unless defined $href2;
1608 $href3 = \%dummy_hash unless defined $href3;
1609
8e669ac1 1610 # Pick out the top $n keys.
059ec3d9
PH
1611 my($key,$value1,$value2,$value3,$i,$comparison,$insert_position);
1612 while (($key,$value1) = each %$href1) {
1613
1614 #print STDERR "key $key ($value1,",$href2->{$key},",",$href3->{$key},") <=> ($minimum_value1,$minimum_value2,$minimum_value3)\n";
8e669ac1 1615
059ec3d9 1616 # Check to see that the new value is bigger than the lowest of the
a83c7e95
SC
1617 # top n keys that we're keeping. We test the main key first, because
1618 # for the majority of cases we can skip creating dummy hash values
1619 # should the user have not provided real tie-breaking hashes.
1620 next unless $value1 >= $minimum_value1;
1621
1622 # Create a dummy hash entry for the key if required.
1623 # Note that setting the dummy_hash value sets it for both href2 &
1624 # href3. Also note that currently we are guarenteed to have a real
1625 # value for href3 if a real value for href2 exists so don't need to
1626 # test for it as well.
1627 $dummy_hash{$key} = 0 unless exists $href2->{$key};
1628
8e669ac1 1629 $comparison = $value1 <=> $minimum_value1 ||
d5692f86
SC
1630 $href2->{$key} <=> $minimum_value2 ||
1631 $href3->{$key} <=> $minimum_value3 ||
1632 $top_n_key cmp $key;
059ec3d9
PH
1633 next unless ($comparison == 1);
1634
1635 # As we will be using these values a few times, extract them into scalars.
1636 $value2 = $href2->{$key};
1637 $value3 = $href3->{$key};
1638
1639 # This key is bigger than the bottom n key, so the lowest position we
1640 # will insert it into is $n minus 1 (the bottom of the list).
1641 $insert_position = $n_minus_1;
1642
1643 # Now go through the list, stopping when we find a key that we're
1644 # bigger than, or we come to the penultimate position - we've
1645 # already tested bigger than the last.
1646 #
1647 # Note: we go top down as the list starts off empty.
1648 # Note: stepping through the list in this way benchmarks nearly
1649 # three times faster than doing a sort() on the reduced list.
1650 # I assume this is because the list is already in order, and
1651 # we get a performance boost from not having to do hash lookups
1652 # on the new key.
1653 for ($i = 0; $i < $n_minus_1; $i++) {
1654 $top_n_key = $top_n_keys[$i];
1655 if ( ($top_n_key eq '_') ||
d5692f86 1656 ( ($value1 <=> $href1->{$top_n_key} ||
059ec3d9 1657 $value2 <=> $href2->{$top_n_key} ||
d5692f86
SC
1658 $value3 <=> $href3->{$top_n_key} ||
1659 $top_n_key cmp $key) == 1
1660 )
1661 ) {
1662 $insert_position = $i;
1663 last;
059ec3d9
PH
1664 }
1665 }
1666
1667 # Remove the last element, then insert the new one.
1668 $#top_n_keys = $n_minus_2;
1669 splice(@top_n_keys,$insert_position,0,$key);
1670
1671 # Extract our new minimum values.
1672 $top_n_key = $top_n_keys[$n_minus_1];
1673 if ($top_n_key ne '_') {
1674 $minimum_value1 = $href1->{$top_n_key};
1675 $minimum_value2 = $href2->{$top_n_key};
1676 $minimum_value3 = $href3->{$top_n_key};
1677 }
1678 }
1679
1680 # Return the top n list, grepping out non-existant values, just in case
1681 # we didn't have that many values.
1682 return(grep(!/^_$/,@top_n_keys));
1683}
1684
1685
a83c7e95 1686
1b4fe9dd
PH
1687#######################################################################
1688# html_header();
8e669ac1 1689#
1b4fe9dd 1690# $header = html_header($title);
8e669ac1 1691#
1b4fe9dd
PH
1692# Print our HTML header and start the <body> block.
1693#######################################################################
059ec3d9
PH
1694sub html_header {
1695 my($title) = @_;
1696 my $text = << "EoText";
1697<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
1698<html>
1699<head>
1700<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-15">
1701<title>$title</title>
1702</head>
1703<body bgcolor="white">
1704<h1>$title</h1>
1705EoText
1706 return $text;
1707}
1708
1709
1710
1b4fe9dd
PH
1711#######################################################################
1712# help();
8e669ac1 1713#
1b4fe9dd 1714# help();
8e669ac1 1715#
1b4fe9dd
PH
1716# Display usage instructions and exit.
1717#######################################################################
059ec3d9
PH
1718sub help {
1719 print << "EoText";
1720
1721eximstats Version $VERSION
1722
d5692f86
SC
1723Usage:
1724 eximstats [Output] [Options] mainlog1 mainlog2 ...
1725 eximstats -merge -html [Options] report.1.html ... > weekly_rep.html
1726
1727Examples:
1728 eximstats -html=eximstats.html mainlog1 mainlog2 ...
1729 eximstats mainlog1 mainlog2 ... > report.txt
059ec3d9 1730
8974000d 1731Parses exim mainlog or syslog files and generates a statistical analysis
d5692f86
SC
1732of the messages processed.
1733
1734Valid output types are:
1735-txt[=<file>] plain text (default unless no other type is specified)
1736-html[=<file>] HTML
1737-xls[=<file>] Excel
1738With no type and file given, defaults to -txt and STDOUT.
059ec3d9 1739
d5692f86 1740Valid options are:
059ec3d9
PH
1741-h<number> histogram divisions per hour. The default is 1, and
1742 0 suppresses histograms. Other valid values are:
d5692f86 1743 2, 3, 5, 10, 15, 20, 30 or 60.
059ec3d9
PH
1744-ne don't display error information
1745-nr don't display relaying information
1746-nr/pattern/ don't display relaying information that matches
1747-nt don't display transport information
1748-nt/pattern/ don't display transport information that matches
d5692f86 1749-nvr don't do volume rounding. Display in bytes, not KB/MB/GB.
059ec3d9
PH
1750-t<number> display top <number> sources/destinations
1751 default is 50, 0 suppresses top listing
1752-tnl omit local sources/destinations in top listing
1753-t_remote_users show top user sources/destinations from non-local domains
608bc29d
SC
1754-q<list> list of times for queuing information. -q0 suppresses.
1755-show_rt<list> Show the receipt times for all the messages.
1756-show_dt<list> Show the delivery times for all the messages.
1757 <list> is an optional list of times in seconds.
1758 Eg -show_rt1,2,4,8.
1759
d5692f86
SC
1760-include_original_destination show both the final and original
1761 destinations in the results rather than just the final ones.
059ec3d9 1762
d5692f86 1763-byhost show results by sending host (default unless bydomain or
059ec3d9 1764 byemail is specified)
d5692f86
SC
1765-bydomain show results by sending domain.
1766-byemail show results by sender's email address
1767-byedomain show results by sender's email domain
059ec3d9
PH
1768
1769-pattern "Description" /pattern/
1770 Count lines matching specified patterns and show them in
d5692f86
SC
1771 the results. It can be specified multiple times. Eg:
1772 -pattern 'Refused connections' '/refused connection/'
059ec3d9
PH
1773
1774-merge merge previously generated reports into a new report
1775
d5692f86
SC
1776-charts Create charts (this requires the GD::Graph modules).
1777 Only valid with -html.
059ec3d9
PH
1778-chartdir <dir> Create the charts' png files in the directory <dir>
1779-chartrel <dir> Specify the relative directory for the "img src=" tags
1780 from where to include the charts in the html file
d5692f86 1781 -chartdir and -chartrel default to '.'
059ec3d9 1782
a83c7e95
SC
1783-emptyok It is OK if there is no valid input, don't print an error.
1784
d5692f86 1785-d Debug mode - dump the eval'ed parser onto STDERR.
059ec3d9
PH
1786
1787EoText
1788
1789 exit 1;
1790}
1791
1792
1793
1b4fe9dd
PH
1794#######################################################################
1795# generate_parser();
8e669ac1 1796#
1b4fe9dd 1797# $parser = generate_parser();
8e669ac1 1798#
1b4fe9dd
PH
1799# This subroutine generates the parsing routine which will be
1800# used to parse the mainlog. We take the base operation, and remove bits not in use.
1801# This improves performance depending on what bits you take out or add.
8e669ac1 1802#
1b4fe9dd 1803# I've tested using study(), but this does not improve performance.
8e669ac1 1804#
1b4fe9dd
PH
1805# We store our parsing routing in a variable, and process it looking for #IFDEF (Expression)
1806# or #IFNDEF (Expression) statements and corresponding #ENDIF (Expression) statements. If
1807# the expression evaluates to true, then it is included/excluded accordingly.
1808#######################################################################
059ec3d9
PH
1809sub generate_parser {
1810 my $parser = '
1811 my($ip,$host,$email,$edomain,$domain,$thissize,$size,$old,$new);
a83c7e95 1812 my($tod,$m_hour,$m_min,$id,$flag,$extra,$length);
608bc29d 1813 my($seconds,$queued,$rcpt_time);
a83c7e95 1814 my $rej_id = 0;
059ec3d9 1815 while (<$fh>) {
059ec3d9 1816
8974000d
SC
1817 # Convert syslog lines to mainlog format.
1818 if (! /^\\d{4}/) {
1819 next unless s/^.*? exim\\b.*?: //;
1820 }
1821
a83c7e95
SC
1822 $length = length($_);
1823 next if ($length < 38);
059ec3d9
PH
1824 next unless /^(\\d{4}\\-\\d\\d-\\d\\d\\s(\\d\\d):(\\d\\d):\\d\\d( [-+]\\d\\d\\d\\d)?)/o;
1825
1826 ($tod,$m_hour,$m_min) = ($1,$2,$3);
1827
a83c7e95
SC
1828 # PH - watch for GMT offsets in the timestamp.
1829 if (defined($4)) {
1830 $extra = 6;
1831 next if ($length < 44);
1832 }
1833 else {
1834 $extra = 0;
1835 }
1836
059ec3d9
PH
1837 $id = substr($_, 20 + $extra, 16);
1838 $flag = substr($_, 37 + $extra, 2);
a83c7e95
SC
1839
1840 if ($flag !~ /^([<>=*-]+|SA)$/ && /rejected|refused|dropped/) {
1841 $flag = "Re";
1842 $extra -= 3;
1843 }
1844
1845 # Rejects can have no MSGID...
1846 if ($flag eq "Re" && $id !~ /^[-0-9a-zA-Z]+$/) {
1847 $id = "reject:" . ++$rej_id;
1848 $extra -= 17;
1849 }
059ec3d9
PH
1850';
1851
1852 # Watch for user specified patterns.
1853 my $user_pattern_index = 0;
1854 foreach (@user_patterns) {
1855 $user_pattern_totals[$user_pattern_index] = 0;
f2b67a5b
SC
1856 $parser .= " if ($_) {\n";
1857 $parser .= " \$user_pattern_totals[$user_pattern_index]++;\n";
1858 $parser .= " \$user_pattern_interval_count[$user_pattern_index][(\$m_hour*60 + \$m_min)/$hist_interval]++;\n" if ($hist_opt > 0);
1859 $parser .= " }\n";
059ec3d9
PH
1860 $user_pattern_index++;
1861 }
1862
1863 $parser .= '
a83c7e95 1864 next unless ($flag =~ /<=|=>|->|==|\\*\\*|Co|SA|Re/);
059ec3d9 1865
a83c7e95
SC
1866 #Strip away the timestamp, ID and flag to speed up later pattern matches.
1867 #The flags include Co (Completed), Re (Rejected), and SA (SpamAssassin).
059ec3d9
PH
1868 $_ = substr($_, 40 + $extra); # PH
1869
a83c7e95 1870 # Alias @message to the array of information about the message.
608bc29d
SC
1871 # This minimises the number of calls to hash functions.
1872 $messages{$id} = [] unless exists $messages{$id};
a83c7e95 1873 *message = $messages{$id};
608bc29d
SC
1874
1875
059ec3d9
PH
1876 # JN - Skip over certain transports as specified via the "-nt/.../" command
1877 # line switch (where ... is a perl style regular expression). This is
1878 # required so that transports that skew stats such as SpamAssassin can be
1879 # ignored.
1880 #IFDEF ($transport_pattern)
1881 if (/\\sT=(\\S+)/) {
1882 next if ($1 =~ /$transport_pattern/o) ;
1883 }
1884 #ENDIF ($transport_pattern)
1885
1886
059ec3d9
PH
1887
1888 # Do some pattern matches to get the host and IP address.
1889 # We expect lines to be of the form "H=[IpAddr]" or "H=Host [IpAddr]" or
1890 # "H=Host (UnverifiedHost) [IpAddr]" or "H=(UnverifiedHost) [IpAddr]".
1891 # We do 2 separate matches to keep the matches simple and fast.
a83c7e95
SC
1892 # Host is local unless otherwise specified.
1893 $ip = (/\\bH=.*?(\\[[^]]+\\])/) ? $1 : "local";
1894 $host = (/\\bH=(\\S+)/) ? $1 : "local";
059ec3d9 1895
a83c7e95 1896 $domain = "localdomain"; #Domain is localdomain unless otherwise specified.
059ec3d9 1897
a83c7e95
SC
1898 #IFDEF ($do_sender{Domain})
1899 if ($host !~ /^\\[/ && $host =~ /^(\\(?)[^\\.]+\\.([^\\.]+\\..*)/) {
1900 # Remove the host portion from the DNS name. We ensure that we end up
1901 # with at least xxx.yyy. $host can be "(x.y.z)" or "x.y.z".
1902 $domain = lc("$1.$2");
1903 $domain =~ s/^\\.//; #Remove preceding dot.
059ec3d9 1904 }
a83c7e95 1905 #ENDIF ($do_sender{Domain})
059ec3d9
PH
1906
1907 #IFDEF ($do_sender{Email})
d5692f86
SC
1908 #IFDEF ($include_original_destination)
1909 # Catch both "a@b.com <c@d.com>" and "e@f.com"
1910 #$email = (/^(\S+) (<(\S*?)>)?/) ? $3 || $1 : "";
1911 $email = (/^(\S+ (<[^@>]+@?[^>]*>)?)/) ? $1 : "";
1912 chomp($email);
1913 #ENDIF ($include_original_destination)
1914
1915 #IFNDEF ($include_original_destination)
1916 $email = (/^(\S+)/) ? $1 : "";
1917 #ENDIF ($include_original_destination)
059ec3d9
PH
1918 #ENDIF ($do_sender{Email})
1919
1920 #IFDEF ($do_sender{Edomain})
d5692f86
SC
1921 #IFDEF ($include_original_destination)
1922 #$edomain = (/^(\S+) (<\S*?\\@(\S+)>)?/) ? $3 || $1 : "";
1923 $edomain = (/^(\S+ (<\S*?\\@(\S+?)>)?)/) ? $1 : "";
1924 chomp($edomain);
1925 lc($edomain);
1926 #ENDIF ($include_original_destination)
1927
1928 #IFNDEF ($include_original_destination)
1929 $edomain = (/^\S*?\\@(\S+)/) ? lc($1) : "";
1930 #ENDIF ($include_original_destination)
059ec3d9
PH
1931 #ENDIF ($do_sender{Edomain})
1932
1933 if ($tod lt $begin) {
1934 $begin = $tod;
1935 }
1936 elsif ($tod gt $end) {
1937 $end = $tod;
1938 }
1939
1940
1941 if ($flag eq "<=") {
1942 $thissize = (/\\sS=(\\d+)( |$)/) ? $1 : 0;
a83c7e95
SC
1943 $message[$SIZE] = $thissize;
1944 $message[$PROTOCOL] = (/ P=(\S+)/) ? $1 : undef;
059ec3d9
PH
1945
1946 #IFDEF ($show_relay)
1947 if ($host ne "local") {
d5692f86
SC
1948 # Save incoming information in case it becomes interesting
1949 # later, when delivery lines are read.
1950 my($from) = /^(\\S+)/;
a83c7e95
SC
1951 $message[$FROM_HOST] = "$host$ip";
1952 $message[$FROM_ADDRESS] = $from;
059ec3d9
PH
1953 }
1954 #ENDIF ($show_relay)
1955
1956 #IFDEF ($local_league_table || $include_remote_users)
d5692f86
SC
1957 if (/\sU=(\\S+)/) {
1958 my $user = $1;
059ec3d9 1959
d5692f86
SC
1960 #IFDEF ($local_league_table && $include_remote_users)
1961 { #Store both local and remote users.
1962 #ENDIF ($local_league_table && $include_remote_users)
059ec3d9 1963
d5692f86
SC
1964 #IFDEF ($local_league_table && ! $include_remote_users)
1965 if ($host eq "local") { #Store local users only.
1966 #ENDIF ($local_league_table && ! $include_remote_users)
059ec3d9 1967
d5692f86
SC
1968 #IFDEF ($include_remote_users && ! $local_league_table)
1969 if ($host ne "local") { #Store remote users only.
1970 #ENDIF ($include_remote_users && ! $local_league_table)
059ec3d9 1971
608bc29d 1972 ++$received_count_user{$user};
d5692f86 1973 add_volume(\\$received_data_user{$user},\\$received_data_gigs_user{$user},$thissize);
059ec3d9 1974 }
d5692f86 1975 }
059ec3d9
PH
1976 #ENDIF ($local_league_table || $include_remote_users)
1977
1978 #IFDEF ($do_sender{Host})
608bc29d 1979 ++$received_count{Host}{$host};
d5692f86 1980 add_volume(\\$received_data{Host}{$host},\\$received_data_gigs{Host}{$host},$thissize);
059ec3d9
PH
1981 #ENDIF ($do_sender{Host})
1982
1983 #IFDEF ($do_sender{Domain})
1984 if ($domain) {
608bc29d 1985 ++$received_count{Domain}{$domain};
d5692f86
SC
1986 add_volume(\\$received_data{Domain}{$domain},\\$received_data_gigs{Domain}{$domain},$thissize);
1987 }
059ec3d9
PH
1988 #ENDIF ($do_sender{Domain})
1989
1990 #IFDEF ($do_sender{Email})
608bc29d 1991 ++$received_count{Email}{$email};
d5692f86 1992 add_volume(\\$received_data{Email}{$email},\\$received_data_gigs{Email}{$email},$thissize);
059ec3d9
PH
1993 #ENDIF ($do_sender{Email})
1994
1995 #IFDEF ($do_sender{Edomain})
608bc29d 1996 ++$received_count{Edomain}{$edomain};
d5692f86 1997 add_volume(\\$received_data{Edomain}{$edomain},\\$received_data_gigs{Edomain}{$edomain},$thissize);
059ec3d9
PH
1998 #ENDIF ($do_sender{Edomain})
1999
608bc29d 2000 ++$total_received_count;
059ec3d9
PH
2001 add_volume(\\$total_received_data,\\$total_received_data_gigs,$thissize);
2002
608bc29d 2003 #IFDEF ($#queue_times >= 0 || $#rcpt_times >= 0)
a83c7e95 2004 $message[$ARRIVAL_TIME] = $tod;
608bc29d 2005 #ENDIF ($#queue_times >= 0 || $#rcpt_times >= 0)
059ec3d9
PH
2006
2007 #IFDEF ($hist_opt > 0)
d5692f86 2008 $received_interval_count[($m_hour*60 + $m_min)/$hist_interval]++;
059ec3d9
PH
2009 #ENDIF ($hist_opt > 0)
2010 }
2011
2012 elsif ($flag eq "=>") {
a83c7e95 2013 $size = $message[$SIZE] || 0;
059ec3d9 2014 if ($host ne "local") {
a83c7e95 2015 $message[$REMOTE_DELIVERED] = 1;
059ec3d9
PH
2016
2017
2018 #IFDEF ($show_relay)
2019 # Determine relaying address if either only one address listed,
2020 # or two the same. If they are different, it implies a forwarding
2021 # or aliasing, which is not relaying. Note that for multi-aliased
2022 # addresses, there may be a further address between the first
2023 # and last.
2024
a83c7e95 2025 if (defined $message[$FROM_HOST]) {
059ec3d9
PH
2026 if (/^(\\S+)(?:\\s+\\([^)]\\))?\\s+<([^>]+)>/) {
2027 ($old,$new) = ($1,$2);
d5692f86 2028 }
059ec3d9 2029 else {
d5692f86
SC
2030 $old = $new = "";
2031 }
059ec3d9
PH
2032
2033 if ("\\L$new" eq "\\L$old") {
2034 ($old) = /^(\\S+)/ if $old eq "";
a83c7e95 2035 my $key = "H=\\L$message[$FROM_HOST]\\E A=\\L$message[$FROM_ADDRESS]\\E => " .
059ec3d9
PH
2036 "H=\\L$host\\E$ip A=\\L$old\\E";
2037 if (!defined $relay_pattern || $key !~ /$relay_pattern/o) {
2038 $relayed{$key} = 0 if !defined $relayed{$key};
608bc29d 2039 ++$relayed{$key};
d5692f86 2040 }
059ec3d9 2041 else {
608bc29d 2042 ++$relayed_unshown;
059ec3d9
PH
2043 }
2044 }
2045 }
2046 #ENDIF ($show_relay)
2047
2048 }
2049
2050 #IFDEF ($local_league_table || $include_remote_users)
d5692f86
SC
2051 #IFDEF ($local_league_table && $include_remote_users)
2052 { #Store both local and remote users.
2053 #ENDIF ($local_league_table && $include_remote_users)
2054
2055 #IFDEF ($local_league_table && ! $include_remote_users)
2056 if ($host eq "local") { #Store local users only.
2057 #ENDIF ($local_league_table && ! $include_remote_users)
2058
2059 #IFDEF ($include_remote_users && ! $local_league_table)
2060 if ($host ne "local") { #Store remote users only.
2061 #ENDIF ($include_remote_users && ! $local_league_table)
2062
2063 if (my($user) = split((/\\s</)? " <" : " ", $_)) {
2064 #IFDEF ($include_original_destination)
2065 {
2066 #ENDIF ($include_original_destination)
2067 #IFNDEF ($include_original_destination)
2068 if ($user =~ /^[\\/|]/) {
2069 #ENDIF ($include_original_destination)
2070 my($parent) = $_ =~ /(<[^@]+@?[^>]*>)/;
2071 $user = "$user $parent" if defined $parent;
2072 }
a83c7e95
SC
2073 ++$delivered_messages_user{$user};
2074 ++$delivered_addresses_user{$user};
d5692f86
SC
2075 add_volume(\\$delivered_data_user{$user},\\$delivered_data_gigs_user{$user},$size);
2076 }
2077 }
059ec3d9
PH
2078 #ENDIF ($local_league_table || $include_remote_users)
2079
2080 #IFDEF ($do_sender{Host})
a83c7e95
SC
2081 $delivered_messages{Host}{$host}++;
2082 $delivered_addresses{Host}{$host}++;
d5692f86 2083 add_volume(\\$delivered_data{Host}{$host},\\$delivered_data_gigs{Host}{$host},$size);
059ec3d9
PH
2084 #ENDIF ($do_sender{Host})
2085 #IFDEF ($do_sender{Domain})
2086 if ($domain) {
a83c7e95
SC
2087 ++$delivered_messages{Domain}{$domain};
2088 ++$delivered_addresses{Domain}{$domain};
d5692f86
SC
2089 add_volume(\\$delivered_data{Domain}{$domain},\\$delivered_data_gigs{Domain}{$domain},$size);
2090 }
059ec3d9
PH
2091 #ENDIF ($do_sender{Domain})
2092 #IFDEF ($do_sender{Email})
a83c7e95
SC
2093 ++$delivered_messages{Email}{$email};
2094 ++$delivered_addresses{Email}{$email};
d5692f86 2095 add_volume(\\$delivered_data{Email}{$email},\\$delivered_data_gigs{Email}{$email},$size);
059ec3d9
PH
2096 #ENDIF ($do_sender{Email})
2097 #IFDEF ($do_sender{Edomain})
a83c7e95
SC
2098 ++$delivered_messages{Edomain}{$edomain};
2099 ++$delivered_addresses{Edomain}{$edomain};
d5692f86 2100 add_volume(\\$delivered_data{Edomain}{$edomain},\\$delivered_data_gigs{Edomain}{$edomain},$size);
059ec3d9
PH
2101 #ENDIF ($do_sender{Edomain})
2102
a83c7e95
SC
2103 ++$total_delivered_messages;
2104 ++$total_delivered_addresses;
059ec3d9
PH
2105 add_volume(\\$total_delivered_data,\\$total_delivered_data_gigs,$size);
2106
2107 #IFDEF ($show_transport)
2108 my $transport = (/\\sT=(\\S+)/) ? $1 : ":blackhole:";
608bc29d 2109 ++$transported_count{$transport};
059ec3d9
PH
2110 add_volume(\\$transported_data{$transport},\\$transported_data_gigs{$transport},$size);
2111 #ENDIF ($show_transport)
2112
2113 #IFDEF ($hist_opt > 0)
2114 $delivered_interval_count[($m_hour*60 + $m_min)/$hist_interval]++;
2115 #ENDIF ($hist_opt > 0)
2116
608bc29d
SC
2117 #IFDEF ($#delivery_times > 0)
2118 if (/ DT=(\S+)/) {
2119 $seconds = wdhms_seconds($1);
2120 for ($i = 0; $i <= $#delivery_times; $i++) {
2121 if ($seconds < $delivery_times[$i]) {
2122 ++$dt_all_bin[$i];
a83c7e95 2123 ++$dt_remote_bin[$i] if $message[$REMOTE_DELIVERED];
608bc29d
SC
2124 last;
2125 }
2126 }
2127 if ($i > $#delivery_times) {
2128 ++$dt_all_overflow;
a83c7e95 2129 ++$dt_remote_overflow if $message[$REMOTE_DELIVERED];
608bc29d
SC
2130 }
2131 }
2132 #ENDIF ($#delivery_times > 0)
2133
059ec3d9
PH
2134 }
2135
a83c7e95
SC
2136 elsif ($flag eq "->") {
2137
2138 #IFDEF ($local_league_table || $include_remote_users)
2139 #IFDEF ($local_league_table && $include_remote_users)
2140 { #Store both local and remote users.
2141 #ENDIF ($local_league_table && $include_remote_users)
2142
2143 #IFDEF ($local_league_table && ! $include_remote_users)
2144 if ($host eq "local") { #Store local users only.
2145 #ENDIF ($local_league_table && ! $include_remote_users)
2146
2147 #IFDEF ($include_remote_users && ! $local_league_table)
2148 if ($host ne "local") { #Store remote users only.
2149 #ENDIF ($include_remote_users && ! $local_league_table)
2150
2151 if (my($user) = split((/\\s</)? " <" : " ", $_)) {
2152 #IFDEF ($include_original_destination)
2153 {
2154 #ENDIF ($include_original_destination)
2155 #IFNDEF ($include_original_destination)
2156 if ($user =~ /^[\\/|]/) {
2157 #ENDIF ($include_original_destination)
2158 my($parent) = $_ =~ /(<[^@]+@?[^>]*>)/;
2159 $user = "$user $parent" if defined $parent;
2160 }
2161 ++$delivered_addresses_user{$user};
2162 }
2163 }
2164 #ENDIF ($local_league_table || $include_remote_users)
2165
2166 #IFDEF ($do_sender{Host})
2167 $delivered_addresses{Host}{$host}++;
2168 #ENDIF ($do_sender{Host})
2169 #IFDEF ($do_sender{Domain})
2170 if ($domain) {
2171 ++$delivered_addresses{Domain}{$domain};
2172 }
2173 #ENDIF ($do_sender{Domain})
2174 #IFDEF ($do_sender{Email})
2175 ++$delivered_addresses{Email}{$email};
2176 #ENDIF ($do_sender{Email})
2177 #IFDEF ($do_sender{Edomain})
2178 ++$delivered_addresses{Edomain}{$edomain};
2179 #ENDIF ($do_sender{Edomain})
2180
2181 ++$total_delivered_addresses;
2182 }
2183
2184 elsif ($flag eq "==" && defined($message[$SIZE]) && !defined($message[$DELAYED])) {
608bc29d 2185 ++$delayed_count;
a83c7e95 2186 $message[$DELAYED] = 1;
059ec3d9
PH
2187 }
2188
2189 elsif ($flag eq "**") {
a83c7e95
SC
2190 if (defined ($message[$SIZE])) {
2191 unless (defined $message[$HAD_ERROR]) {
608bc29d 2192 ++$message_errors;
a83c7e95 2193 $message[$HAD_ERROR] = 1;
608bc29d
SC
2194 }
2195 }
059ec3d9
PH
2196
2197 #IFDEF ($show_errors)
608bc29d 2198 ++$errors_count{$_};
059ec3d9
PH
2199 #ENDIF ($show_errors)
2200
2201 }
2202
2203 elsif ($flag eq "Co") {
2204 #Completed?
2205 #IFDEF ($#queue_times >= 0)
a83c7e95 2206 $queued = queue_time($tod, $message[$ARRIVAL_TIME], $id);
059ec3d9
PH
2207
2208 for ($i = 0; $i <= $#queue_times; $i++) {
2209 if ($queued < $queue_times[$i]) {
608bc29d 2210 ++$qt_all_bin[$i];
a83c7e95 2211 ++$qt_remote_bin[$i] if $message[$REMOTE_DELIVERED];
059ec3d9 2212 last;
d5692f86
SC
2213 }
2214 }
608bc29d
SC
2215 if ($i > $#queue_times) {
2216 ++$qt_all_overflow;
a83c7e95 2217 ++$qt_remote_overflow if $message[$REMOTE_DELIVERED];
608bc29d 2218 }
059ec3d9
PH
2219 #ENDIF ($#queue_times >= 0)
2220
608bc29d
SC
2221 #IFDEF ($#rcpt_times >= 0)
2222 if (/ QT=(\S+)/) {
2223 $seconds = wdhms_seconds($1);
2224 #Calculate $queued if not previously calculated above.
2225 #IFNDEF ($#queue_times >= 0)
a83c7e95 2226 $queued = queue_time($tod, $message[$ARRIVAL_TIME], $id);
608bc29d
SC
2227 #ENDIF ($#queue_times >= 0)
2228 $rcpt_time = $seconds - $queued;
2229 my($protocol);
2230
a83c7e95
SC
2231 if (defined $message[$PROTOCOL]) {
2232 $protocol = $message[$PROTOCOL];
608bc29d
SC
2233
2234 # Create the bin if its not already defined.
2235 unless (exists $rcpt_times_bin{$protocol}) {
2236 initialise_rcpt_times($protocol);
2237 }
2238 }
2239
2240
2241 for ($i = 0; $i <= $#rcpt_times; ++$i) {
2242 if ($rcpt_time < $rcpt_times[$i]) {
2243 ++$rcpt_times_bin{all}[$i];
2244 ++$rcpt_times_bin{$protocol}[$i] if defined $protocol;
2245 last;
2246 }
2247 }
059ec3d9 2248
608bc29d
SC
2249 if ($i > $#rcpt_times) {
2250 ++$rcpt_times_overflow{all};
2251 ++$rcpt_times_overflow{$protocol} if defined $protocol;
2252 }
2253 }
2254 #ENDIF ($#rcpt_times >= 0)
2255
2256 delete($messages{$id});
059ec3d9 2257 }
a83c7e95
SC
2258 elsif ($flag eq "SA") {
2259 $ip = (/From.*?(\\[[^]]+\\])/ || /\\((local)\\)/) ? $1 : "";
2260 #SpamAssassin message
2261 if (/Action: ((permanently|temporarily) rejected message|flagged as Spam but accepted): score=(\d+\.\d)/) {
2262 #add_volume(\\$spam_score,\\$spam_score_gigs,$3);
2263 ++$spam_count_by_ip{$ip};
2264 } elsif (/Action: scanned but message isn\'t spam: score=(-?\d+\.\d)/) {
2265 #add_volume(\\$ham_score,\\$ham_score_gigs,$1);
2266 ++$ham_count_by_ip{$ip};
2267 } elsif (/(Not running SA because SAEximRunCond expanded to false|check skipped due to message size)/) {
2268 ++$ham_count_by_ip{$ip};
2269 }
2270 }
2271
2272 # Look for Reject messages or blackholed messages (deliveries
2273 # without a transport)
2274 if ($flag eq "Re" || ($flag eq "=>" && ! /\\sT=\\S+/)) {
2275 # Correct the IP address for rejects:
2276 # rejected EHLO from my.test.net [10.0.0.5]: syntactically invalid argument(s):
e2f7a0d2
SC
2277 # rejected EHLO from [10.0.0.6]: syntactically invalid argument(s):
2278 $ip = $1 if ($ip eq "local" && /^rejected [HE][HE]LO from .*?(\[.+?\]):/);
a83c7e95 2279 ++$rejected_count_by_ip{$ip};
e2f7a0d2
SC
2280 if (/SpamAssassin/) {
2281 ++$rejected_count_by_reason{"Rejected by SpamAssassin"};
2282 }
2283 elsif (
a83c7e95
SC
2284 /(listed at [^ ]+)/ ||
2285 /(Forged IP detected in HELO)/ ||
2286 /(Invalid domain or IP given in HELO\/EHLO)/ ||
2287 /(unqualified recipient rejected)/ ||
2288 /(closed connection (after|in response) .*?)\s*$/ ||
2289 /(sender rejected)/ ||
2290 # 2005-09-23 15:07:49 1EInHJ-0007Ex-Au H=(a.b.c) [10.0.0.1] F=<> rejected after DATA: This message contains a virus: (Eicar-Test-Signature) please scan your system.
2291 # 2005-10-06 10:50:07 1ENRS3-0000Nr-Kt => blackhole (DATA ACL discarded recipients): This message contains a virus: (Worm.SomeFool.P) please scan your system.
2292 / rejected after DATA: (.*)/ ||
2293 /.DATA ACL discarded recipients.: (.*)/ ||
2294 /rejected after DATA: (unqualified address not permitted)/ ||
2295 /(VRFY rejected)/ ||
2296# /(sender verify (defer|fail))/i ||
2297 /(too many recipients)/ ||
2298 /(refused relay.*?) to/ ||
2299 /(rejected by non-SMTP ACL: .*)/ ||
2300 /(rejected by local_scan.*)/ ||
2301 # SMTP call from %s dropped: too many syntax or protocol errors (last command was "%s"
2302 # SMTP call from %s dropped: too many nonmail commands
2303 /(dropped: too many ((nonmail|unrecognized) commands|syntax or protocol errors))/ ||
2304
2305 # local_scan() function crashed with signal %d - message temporarily rejected
2306 # local_scan() function timed out - message temporarily rejected
2307 /(local_scan.. function .* - message temporarily rejected)/ ||
2308 /(temporarily refused connection)/ ||
2309 # SMTP protocol synchronization error (input sent without waiting for greeting): rejected connection from %s
2310 /(SMTP protocol .*?(error|violation))/ ||
2311 /(message too big)/
2312 ) {
2313 ++$rejected_count_by_reason{"\u$1"};
2314 }
2315 elsif (/rejected [HE][HE]LO from [^:]*: syntactically invalid argument/) {
2316 ++$rejected_count_by_reason{"Rejected HELO/EHLO: syntactically invalid argument"};
2317 }
2318 elsif (/response to "RCPT TO.*? was: (.*)/) {
2319 ++$rejected_count_by_reason{"Response to RCPT TO was: $1"};
2320 }
2321 elsif (
2322 /(lookup of host )\S+ (failed)/ ||
2323
2324 # rejected from <%s>%s%s%s%s: message too big:
2325 /(rejected [A-Z]*) .*?(: .*?)(:|\s*$)/ ||
2326 # refused connection from %s (host_reject_connection)
2327 # refused connection from %s (tcp wrappers)
2328 /(refused connection )from.*? (\(.*)/ ||
2329
2330 # error from remote mailer after RCPT TO:<a@b.c>: host a.b.c [10.0.0.1]: 450 <a@b.c>: Recipient address rejected: Greylisted for 60 seconds
2331 # error from remote mailer after MAIL FROM:<> SIZE=3468: host a.b.c [10.0.0.1]: 421 a.b.c has refused your connection because your server did not have a PTR record.
2332 /(error from remote mailer after .*?:).*(: .*?)(:|\s*$)/ ||
2333
2334 # a.b.c F=<a@b.c> rejected after DATA: "@" or "." expected after "Undisclosed-Recipient": failing address in "To" header is: <Undisclosed-Recipient:;>
2335 /rejected after DATA: ("." or "." expected).*?(: failing address in .*? header)/ ||
2336
2337 # connection from %s refused load average = %.2f
2338 /(Connection )from.*? (refused: load average)/ ||
2339 # connection from %s refused (IP options)
2340 # Connection from %s refused: too many connections
2341 # connection from %s refused
2342 /([Cc]onnection )from.*? (refused.*)/ ||
2343 # [10.0.0.1]: connection refused
2344 /: (Connection refused)()/
2345 ) {
2346 ++$rejected_count_by_reason{"\u$1$2"};
2347 }
2348 else {
2349 ++$rejected_count_by_reason{Unknown};
2350 print STDERR "Unknown rejection: $_" if $debug;
2351 }
2352 }
059ec3d9
PH
2353 }';
2354
2355 # We now do a 'C preprocessor style operation on our parser
2356 # to remove bits not in use.
2357 my(%defines_in_operation,$removing_lines,$processed_parser);
2358 foreach (split (/\n/,$parser)) {
2359 if ((/^\s*#\s*IFDEF\s*\((.*?)\)/i && ! eval $1) ||
d5692f86 2360 (/^\s*#\s*IFNDEF\s*\((.*?)\)/i && eval $1) ) {
059ec3d9
PH
2361 $defines_in_operation{$1} = 1;
2362 $removing_lines = 1;
2363 }
2364
608bc29d
SC
2365 # Convert constants.
2366 while (/(\$[A-Z][A-Z_]*)\b/) {
2367 my $constant = eval $1;
2368 s/(\$[A-Z][A-Z_]*)\b/$constant/;
2369 }
2370
059ec3d9
PH
2371 $processed_parser .= $_."\n" unless $removing_lines;
2372
2373 if (/^\s*#\s*ENDIF\s*\((.*?)\)/i) {
2374 delete $defines_in_operation{$1};
2375 unless (keys %defines_in_operation) {
d5692f86 2376 $removing_lines = 0;
059ec3d9
PH
2377 }
2378 }
2379 }
608bc29d 2380 print STDERR "# START OF PARSER:$processed_parser\n# END OF PARSER\n\n" if $debug;
059ec3d9
PH
2381
2382 return $processed_parser;
2383}
2384
2385
2386
1b4fe9dd
PH
2387#######################################################################
2388# parse();
8e669ac1 2389#
1b4fe9dd 2390# parse($parser,\*FILEHANDLE);
8e669ac1 2391#
1b4fe9dd
PH
2392# This subroutine accepts a parser and a filehandle from main and parses each
2393# line. We store the results into global variables.
2394#######################################################################
059ec3d9
PH
2395sub parse {
2396 my($parser,$fh) = @_;
2397
2398 if ($merge_reports) {
2399 parse_old_eximstat_reports($fh);
2400 }
2401 else {
2402 eval $parser;
2403 die ($@) if $@;
2404 }
2405
2406}
2407
2408
2409
1b4fe9dd
PH
2410#######################################################################
2411# print_header();
8e669ac1 2412#
1b4fe9dd 2413# print_header();
8e669ac1 2414#
1b4fe9dd
PH
2415# Print our headers and contents.
2416#######################################################################
059ec3d9
PH
2417sub print_header {
2418
d5692f86 2419
059ec3d9
PH
2420 my $title = "Exim statistics from $begin to $end";
2421
d5692f86
SC
2422 print $txt_fh "\n$title\n" if $txt_fh;
2423 if ($htm_fh) {
2424 print $htm_fh html_header($title);
2425 print $htm_fh "<ul>\n";
2426 print $htm_fh "<li><a href=\"#grandtotal\">Grand total summary</a>\n";
2427 print $htm_fh "<li><a href=\"#patterns\">User Specified Patterns</a>\n" if @user_patterns;
2428 print $htm_fh "<li><a href=\"#transport\">Deliveries by Transport</a>\n" if $show_transport;
059ec3d9 2429 if ($hist_opt) {
d5692f86
SC
2430 print $htm_fh "<li><a href=\"#Messages received\">Messages received per hour</a>\n";
2431 print $htm_fh "<li><a href=\"#Deliveries\">Deliveries per hour</a>\n";
059ec3d9 2432 }
608bc29d 2433
059ec3d9 2434 if ($#queue_times >= 0) {
608bc29d
SC
2435 print $htm_fh "<li><a href=\"#Time spent on the queue all messages\">Time spent on the queue: all messages</a>\n";
2436 print $htm_fh "<li><a href=\"#Time spent on the queue messages with at least one remote delivery\">Time spent on the queue: messages with at least one remote delivery</a>\n";
2437 }
2438
2439 if ($#delivery_times >= 0) {
2440 print $htm_fh "<li><a href=\"#Delivery times all messages\">Delivery times: all messages</a>\n";
2441 print $htm_fh "<li><a href=\"#Delivery times messages with at least one remote delivery\">Delivery times: messages with at least one remote delivery</a>\n";
2442 }
2443
2444 if ($#rcpt_times >= 0) {
2445 print $htm_fh "<li><a href=\"#Receipt times all messages\">Receipt times</a>\n";
059ec3d9 2446 }
608bc29d 2447
d5692f86 2448 print $htm_fh "<li><a href=\"#Relayed messages\">Relayed messages</a>\n" if $show_relay;
059ec3d9 2449 if ($topcount) {
a83c7e95 2450 print $htm_fh "<li><a href=\"#mail rejection reason count\">Top $topcount mail rejection reasons by message count</a>\n" if %rejected_count_by_reason;
059ec3d9 2451 foreach ('Host','Domain','Email','Edomain') {
d5692f86
SC
2452 next unless $do_sender{$_};
2453 print $htm_fh "<li><a href=\"#sending \l$_ count\">Top $topcount sending \l${_}s by message count</a>\n";
2454 print $htm_fh "<li><a href=\"#sending \l$_ volume\">Top $topcount sending \l${_}s by volume</a>\n";
059ec3d9 2455 }
a83c7e95 2456 if (($local_league_table || $include_remote_users) && %received_count_user) {
d5692f86
SC
2457 print $htm_fh "<li><a href=\"#local sender count\">Top $topcount local senders by message count</a>\n";
2458 print $htm_fh "<li><a href=\"#local sender volume\">Top $topcount local senders by volume</a>\n";
059ec3d9
PH
2459 }
2460 foreach ('Host','Domain','Email','Edomain') {
d5692f86
SC
2461 next unless $do_sender{$_};
2462 print $htm_fh "<li><a href=\"#\l$_ destination count\">Top $topcount \l$_ destinations by message count</a>\n";
2463 print $htm_fh "<li><a href=\"#\l$_ destination volume\">Top $topcount \l$_ destinations by volume</a>\n";
059ec3d9 2464 }
a83c7e95 2465 if (($local_league_table || $include_remote_users) && %delivered_messages_user) {
d5692f86
SC
2466 print $htm_fh "<li><a href=\"#local destination count\">Top $topcount local destinations by message count</a>\n";
2467 print $htm_fh "<li><a href=\"#local destination volume\">Top $topcount local destinations by volume</a>\n";
059ec3d9 2468 }
a83c7e95
SC
2469
2470 print $htm_fh "<li><a href=\"#rejected ip count\">Top $topcount rejected ips by message count</a>\n" if %rejected_count_by_ip;
2471 print $htm_fh "<li><a href=\"#non-rejected spamming ip count\">Top $topcount non-rejected spamming ips by message count</a>\n" if %spam_count_by_ip;
2472
059ec3d9 2473 }
d5692f86
SC
2474 print $htm_fh "<li><a href=\"#errors\">List of errors</a>\n" if %errors_count;
2475 print $htm_fh "</ul>\n<hr>\n";
059ec3d9 2476 }
d5692f86
SC
2477 if ($xls_fh)
2478 {
2479 $ws_global->write($row++, $col+0, "Exim Statistics", $f_header1);
2480 &set_worksheet_line($ws_global, $row, $col, ["from:", $begin, "to:", $end], $f_default);
2481 $row+=2;
059ec3d9
PH
2482 }
2483}
2484
2485
1b4fe9dd
PH
2486#######################################################################
2487# print_grandtotals();
8e669ac1 2488#
1b4fe9dd 2489# print_grandtotals();
8e669ac1 2490#
1b4fe9dd
PH
2491# Print the grand totals.
2492#######################################################################
059ec3d9
PH
2493sub print_grandtotals {
2494
2495 # Get the sender by headings and results. This is complicated as we can have
2496 # different numbers of columns.
a83c7e95 2497 my($sender_txt_header,$sender_txt_format,$sender_html_format);
059ec3d9 2498 my(@received_totals,@delivered_totals);
d5692f86 2499 my($row_tablehead, $row_max);
a83c7e95 2500 my(@col_headers) = ('TOTAL', 'Volume', 'Messages', 'Addresses');
d5692f86 2501
059ec3d9
PH
2502 foreach ('Host','Domain','Email','Edomain') {
2503 next unless $do_sender{$_};
2504 if ($merge_reports) {
2505 push(@received_totals, get_report_total($report_totals{Received},"${_}s"));
2506 push(@delivered_totals,get_report_total($report_totals{Delivered},"${_}s"));
2507 }
2508 else {
2509 push(@received_totals,scalar(keys %{$received_data{$_}}));
2510 push(@delivered_totals,scalar(keys %{$delivered_data{$_}}));
2511 }
059ec3d9
PH
2512 $sender_txt_header .= " " x ($COLUMN_WIDTHS - length($_)) . $_ . 's';
2513 $sender_html_format .= "<td align=\"right\">%d</td>";
2514 $sender_txt_format .= " " x ($COLUMN_WIDTHS - 5) . "%6d";
a83c7e95 2515 push(@col_headers,"${_}s");
059ec3d9
PH
2516 }
2517
a83c7e95 2518 my $txt_format1 = " %-16s %9s %6d %6s $sender_txt_format";
d5692f86 2519 my $txt_format2 = " %6d %4.1f%% %6d %4.1f%%",
a83c7e95 2520 my $htm_format1 = "<tr><td>%s</td><td align=\"right\">%s</td><td align=\"right\">%s</td><td align=\"right\">%s</td>$sender_html_format";
d5692f86 2521 my $htm_format2 = "<td align=\"right\">%d</td><td align=\"right\">%4.1f%%</td><td align=\"right\">%d</td><td align=\"right\">%4.1f%%</td>";
059ec3d9 2522
d5692f86 2523 if ($txt_fh) {
059ec3d9 2524 my $sender_spaces = " " x length($sender_txt_header);
d5692f86
SC
2525 print $txt_fh "\n";
2526 print $txt_fh "Grand total summary\n";
2527 print $txt_fh "-------------------\n";
a83c7e95
SC
2528 print $txt_fh " $sender_spaces At least one address\n";
2529 print $txt_fh " TOTAL Volume Messages Addresses $sender_txt_header Delayed Failed\n";
d5692f86
SC
2530 }
2531 if ($htm_fh) {
2532 print $htm_fh "<a name=\"grandtotal\"></a>\n";
2533 print $htm_fh "<h2>Grand total summary</h2>\n";
2534 print $htm_fh "<table border=1>\n";
a83c7e95 2535 print $htm_fh "<tr><th>" . join('</th><th>',@col_headers) . "</th><th colspan=2>At least one addr<br>Delayed</th><th colspan=2>At least one addr<br>Failed</th>\n";
d5692f86 2536 }
a83c7e95
SC
2537 if ($xls_fh) {
2538 $ws_global->write($row++, 0, "Grand total summary", $f_header2);
2539 $ws_global->write($row, 0, \@col_headers, $f_header2);
2540 $ws_global->merge_range($row, scalar(@col_headers), $row, scalar(@col_headers)+1, "At least one addr Delayed", $f_header2_m);
2541 $ws_global->merge_range($row, scalar(@col_headers)+2, $row, scalar(@col_headers)+3, "At least one addr Failed", $f_header2_m);
2542 #$ws_global->write(++$row, scalar(@col_headers), ['Total','Percent','Total','Percent'], $f_header2);
059ec3d9
PH
2543 }
2544
d5692f86 2545
059ec3d9
PH
2546 my($volume,$failed_count);
2547 if ($merge_reports) {
2548 $volume = volume_rounded($report_totals{Received}{Volume}, $report_totals{Received}{'Volume-gigs'});
2549 $total_received_count = get_report_total($report_totals{Received},'Messages');
2550 $failed_count = get_report_total($report_totals{Received},'Failed');
2551 $delayed_count = get_report_total($report_totals{Received},'Delayed');
2552 }
2553 else {
2554 $volume = volume_rounded($total_received_data, $total_received_data_gigs);
608bc29d 2555 $failed_count = $message_errors;
059ec3d9
PH
2556 }
2557
2558 {
2559 no integer;
059ec3d9 2560
d5692f86 2561 my @content=(
a83c7e95 2562 $volume,$total_received_count,'',
d5692f86
SC
2563 @received_totals,
2564 $delayed_count,
2565 ($total_received_count) ? ($delayed_count*100/$total_received_count) : 0,
2566 $failed_count,
2567 ($total_received_count) ? ($failed_count*100/$total_received_count) : 0
2568 );
2569
2570 printf $txt_fh ("$txt_format1$txt_format2\n", 'Received', @content) if $txt_fh;
2571 printf $htm_fh ("$htm_format1$htm_format2\n", 'Received', @content) if $htm_fh;
a83c7e95
SC
2572 if ($xls_fh) {
2573 $ws_global->write(++$row, 0, 'Received', $f_default);
2574 for (my $i=0; $i < scalar(@content); $i++) {
d5692f86 2575 if ($i == 4 || $i == 6) {
a83c7e95 2576 $ws_global->write($row, $i+1, $content[$i]/100, $f_percent);
d5692f86
SC
2577 }
2578 else {
a83c7e95 2579 $ws_global->write($row, $i+1, $content[$i], $f_default);
d5692f86
SC
2580 }
2581 }
2582 }
2583 }
a83c7e95 2584
059ec3d9
PH
2585 if ($merge_reports) {
2586 $volume = volume_rounded($report_totals{Delivered}{Volume}, $report_totals{Delivered}{'Volume-gigs'});
a83c7e95
SC
2587 $total_delivered_messages = get_report_total($report_totals{Delivered},'Messages');
2588 $total_delivered_addresses = get_report_total($report_totals{Delivered},'Addresses');
059ec3d9
PH
2589 }
2590 else {
2591 $volume = volume_rounded($total_delivered_data, $total_delivered_data_gigs);
2592 }
d5692f86 2593
a83c7e95
SC
2594 my @content=($volume, $total_delivered_messages, $total_delivered_addresses, @delivered_totals);
2595 printf $txt_fh ("$txt_format1\n", 'Delivered', @content) if $txt_fh;
2596 printf $htm_fh ("$htm_format1\n", 'Delivered', @content) if $htm_fh;
d5692f86 2597
a83c7e95
SC
2598 if ($xls_fh) {
2599 $ws_global->write(++$row, 0, 'Delivered', $f_default);
2600 for (my $i=0; $i < scalar(@content); $i++) {
2601 $ws_global->write($row, $i+1, $content[$i], $f_default);
2602 }
2603 }
2604
2605 if ($merge_reports) {
2606 foreach ('Rejects', 'Ham', 'Spam') {
2607 my $messages = get_report_total($report_totals{$_},'Messages');
2608 my $addresses = get_report_total($report_totals{$_},'Addresses');
2609 if ($messages) {
2610 @content = ($_, '', $messages, '');
2611 push(@content,get_report_total($report_totals{$_},'Hosts')) if $do_sender{Host};
2612 printf $txt_fh ("$txt_format1\n", @content) if $txt_fh;
2613 printf $htm_fh ("$htm_format1\n", @content) if $htm_fh;
2614 $ws_global->write(++$row, 0, \@content) if $xls_fh;
2615 }
2616 }
2617 }
2618 else {
2619 foreach my $total_aref (['Rejects',\%rejected_count_by_ip],
2620 ['Ham',\%ham_count_by_ip],
2621 ['Spam',\%spam_count_by_ip]) {
2622 my $messages = 0;
2623 map {$messages += $_} values %{$total_aref->[1]};
2624
2625 if ($messages > 0) {
2626 @content = ($total_aref->[0], '', $messages, '');
2627 push(@content,scalar(keys %{$total_aref->[1]})) if $do_sender{Host};
2628
2629 printf $txt_fh ("$txt_format1\n", @content) if $txt_fh;
2630 printf $htm_fh ("$htm_format1\n", @content) if $htm_fh;
2631 $ws_global->write(++$row, 0, \@content) if $xls_fh;
d5692f86 2632 }
a83c7e95 2633 }
d5692f86 2634 }
a83c7e95
SC
2635
2636 printf $txt_fh "\n" if $txt_fh;
2637 printf $htm_fh "</table>\n" if $htm_fh;
2638 ++$row;
059ec3d9
PH
2639}
2640
2641
1b4fe9dd
PH
2642#######################################################################
2643# print_user_patterns()
8e669ac1 2644#
1b4fe9dd 2645# print_user_patterns();
8e669ac1 2646#
1b4fe9dd
PH
2647# Print the counts of user specified patterns.
2648#######################################################################
059ec3d9 2649sub print_user_patterns {
d5692f86
SC
2650 my $txt_format1 = " %-18s %6d";
2651 my $htm_format1 = "<tr><td>%s</td><td align=\"right\">%d</td>";
059ec3d9 2652
d5692f86
SC
2653 if ($txt_fh) {
2654 print $txt_fh "User Specified Patterns\n";
2655 print $txt_fh "-----------------------";
2656 print $txt_fh "\n Total\n";
059ec3d9 2657 }
d5692f86
SC
2658 if ($htm_fh) {
2659 print $htm_fh "<hr><a name=\"patterns\"></a><h2>User Specified Patterns</h2>\n";
2660 print $htm_fh "<table border=0 width=\"100%\">\n";
2661 print $htm_fh "<tr><td>\n";
2662 print $htm_fh "<table border=1>\n";
2663 print $htm_fh "<tr><th>&nbsp;</th><th>Total</th>\n";
2664 }
2665 if ($xls_fh) {
2666 $ws_global->write($row++, $col, "User Specified Patterns", $f_header2);
2667 &set_worksheet_line($ws_global, $row++, 1, ["Total"], $f_headertab);
059ec3d9
PH
2668 }
2669
d5692f86 2670
059ec3d9
PH
2671 my($key);
2672 if ($merge_reports) {
2673 # We are getting our data from previous reports.
2674 foreach $key (@user_descriptions) {
2675 my $count = get_report_total($report_totals{patterns}{$key},'Total');
d5692f86
SC
2676 printf $txt_fh ("$txt_format1\n",$key,$count) if $txt_fh;
2677 printf $htm_fh ("$htm_format1\n",$key,$count) if $htm_fh;
2678 if ($xls_fh)
2679 {
2680 &set_worksheet_line($ws_global, $row++, 0, [$key,$count], $f_default);
2681 }
059ec3d9
PH
2682 }
2683 }
2684 else {
2685 # We are getting our data from mainlog files.
2686 my $user_pattern_index = 0;
2687 foreach $key (@user_descriptions) {
d5692f86
SC
2688 printf $txt_fh ("$txt_format1\n",$key,$user_pattern_totals[$user_pattern_index]) if $txt_fh;
2689 printf $htm_fh ("$htm_format1\n",$key,$user_pattern_totals[$user_pattern_index]) if $htm_fh;
a83c7e95 2690 $ws_global->write($row++, 0, [$key,$user_pattern_totals[$user_pattern_index]]) if $xls_fh;
059ec3d9
PH
2691 $user_pattern_index++;
2692 }
2693 }
d5692f86
SC
2694 print $txt_fh "\n" if $txt_fh;
2695 print $htm_fh "</table>\n\n" if $htm_fh;
2696 if ($xls_fh)
2697 {
2698 ++$row;
059ec3d9 2699 }
608bc29d
SC
2700
2701 if ($hist_opt > 0) {
2702 my $user_pattern_index = 0;
2703 foreach $key (@user_descriptions) {
2704 print_histogram($key, 'occurence', @{$user_pattern_interval_count[$user_pattern_index]});
2705 $user_pattern_index++;
2706 }
2707 }
059ec3d9
PH
2708}
2709
a83c7e95
SC
2710#######################################################################
2711# print_rejects()
2712#
2713# print_rejects();
2714#
2715# Print statistics about rejected mail.
2716#######################################################################
2717sub print_rejects {
2718 my($format1,$reason);
2719
2720 my $txt_format1 = " %-40s %6d";
2721 my $htm_format1 = "<tr><td>%s</td><td align=\"right\">%d</td>";
2722
2723 if ($txt_fh) {
2724 print $txt_fh "Rejected mail by reason\n";
2725 print $txt_fh "-----------------------";
2726 print $txt_fh "\n Total\n";
2727 }
2728 if ($htm_fh) {
2729 print $htm_fh "<hr><a name=\"patterns\"></a><h2>Rejected mail by reason</h2>\n";
2730 print $htm_fh "<table border=0 width=\"100%\"><tr><td><table border=1>\n";
2731 print $htm_fh "<tr><th>&nbsp;</th><th>Total</th>\n";
2732 }
2733 if ($xls_fh) {
2734 $ws_global->write($row++, $col, "Rejected mail by reason", $f_header2);
2735 &set_worksheet_line($ws_global, $row++, 1, ["Total"], $f_headertab);
2736 }
2737
2738
2739 my $href = ($merge_reports) ? $report_totals{rejected_mail_by_reason} : \%rejected_count_by_reason;
2740 my(@chartdatanames, @chartdatavals_count);
2741
2742 foreach $reason (top_n_sort($topcount, $href, undef, undef)) {
2743 printf $txt_fh ("$txt_format1\n",$reason,$href->{$reason}) if $txt_fh;
2744 printf $htm_fh ("$htm_format1\n",$reason,$href->{$reason}) if $htm_fh;
2745 set_worksheet_line($ws_global, $row++, 0, [$reason,$href->{$reason}], $f_default) if $xls_fh;
2746 push(@chartdatanames, $reason);
2747 push(@chartdatavals_count, $href->{$reason});
2748 }
2749
2750 $row++ if $xls_fh;
2751 print $txt_fh "\n" if $txt_fh;
2752
2753 if ($htm_fh) {
2754 print $htm_fh "</tr></table></td><td>";
2755 if ($HAVE_GD_Graph_pie && $charts && ($#chartdatavals_count > 0)) {
2756 # calculate the graph
2757 my @data = (
2758 \@chartdatanames,
2759 \@chartdatavals_count
2760 );
2761 my $graph = GD::Graph::pie->new(200, 200);
2762 $graph->set(
2763 x_label => 'Rejection Reasons',
2764 y_label => 'Messages',
2765 title => 'By count',
2766 );
2767 my $gd = $graph->plot(\@data) or warn($graph->error);
2768 if ($gd) {
2769 open(IMG, ">$chartdir/rejections_count.png") or die "Could not write $chartdir/rejections_count.png: $!\n";
2770 binmode IMG;
2771 print IMG $gd->png;
2772 close IMG;
2773 print $htm_fh "<img src=\"$chartrel/rejections_count.png\">";
2774 }
2775 }
2776 print $htm_fh "</td></tr></table>\n\n";
2777 }
2778}
2779
2780
2781
2782
059ec3d9 2783
1b4fe9dd
PH
2784#######################################################################
2785# print_transport();
8e669ac1 2786#
1b4fe9dd 2787# print_transport();
8e669ac1 2788#
1b4fe9dd
PH
2789# Print totals by transport.
2790#######################################################################
059ec3d9 2791sub print_transport {
059ec3d9
PH
2792 my(@chartdatanames);
2793 my(@chartdatavals_count);
2794 my(@chartdatavals_vol);
d5692f86
SC
2795 no integer; #Lose this for charting the data.
2796
2797 my $txt_format1 = " %-18s %6s %6d";
2798 my $htm_format1 = "<tr><td>%s</td><td align=\"right\">%s</td><td align=\"right\">%d</td>";
059ec3d9 2799
d5692f86
SC
2800 if ($txt_fh) {
2801 print $txt_fh "Deliveries by transport\n";
2802 print $txt_fh "-----------------------";
2803 print $txt_fh "\n Volume Messages\n";
059ec3d9 2804 }
d5692f86
SC
2805 if ($htm_fh) {
2806 print $htm_fh "<hr><a name=\"transport\"></a><h2>Deliveries by Transport</h2>\n";
a83c7e95 2807 print $htm_fh "<table border=0 width=\"100%\"><tr><td><table border=1>\n";
d5692f86
SC
2808 print $htm_fh "<tr><th>&nbsp;</th><th>Volume</th><th>Messages</th>\n";
2809 }
2810 if ($xls_fh) {
a83c7e95
SC
2811 $ws_global->write(++$row, $col, "Deliveries by transport", $f_header2);
2812 $ws_global->write(++$row, 1, ["Volume", "Messages"], $f_headertab);
059ec3d9
PH
2813 }
2814
2815 my($key);
2816 if ($merge_reports) {
2817 # We are getting our data from previous reports.
2818 foreach $key (sort keys %{$report_totals{transport}}) {
2819 my $count = get_report_total($report_totals{transport}{$key},'Messages');
d5692f86
SC
2820 my @content=($key, volume_rounded($report_totals{transport}{$key}{Volume},
2821 $report_totals{transport}{$key}{'Volume-gigs'}), $count);
059ec3d9
PH
2822 push(@chartdatanames, $key);
2823 push(@chartdatavals_count, $count);
2824 push(@chartdatavals_vol, $report_totals{transport}{$key}{'Volume-gigs'}*$gig + $report_totals{transport}{$key}{Volume} );
d5692f86
SC
2825 printf $txt_fh ("$txt_format1\n", @content) if $txt_fh;
2826 printf $htm_fh ("$htm_format1\n", @content) if $htm_fh;
a83c7e95 2827 $ws_global->write(++$row, 0, \@content) if $xls_fh;
059ec3d9
PH
2828 }
2829 }
2830 else {
2831 # We are getting our data from mainlog files.
2832 foreach $key (sort keys %transported_data) {
d5692f86
SC
2833 my @content=($key, volume_rounded($transported_data{$key},$transported_data_gigs{$key}),
2834 $transported_count{$key});
059ec3d9
PH
2835 push(@chartdatanames, $key);
2836 push(@chartdatavals_count, $transported_count{$key});
2837 push(@chartdatavals_vol, $transported_data_gigs{$key}*$gig + $transported_data{$key});
d5692f86
SC
2838 printf $txt_fh ("$txt_format1\n", @content) if $txt_fh;
2839 printf $htm_fh ("$htm_format1\n", @content) if $htm_fh;
a83c7e95 2840 $ws_global->write(++$row, 0, \@content) if $xls_fh;
059ec3d9
PH
2841 }
2842 }
d5692f86
SC
2843 print $txt_fh "\n" if $txt_fh;
2844 if ($htm_fh) {
a83c7e95
SC
2845 print $htm_fh "</tr></table></td><td>";
2846
608bc29d 2847 if ($HAVE_GD_Graph_pie && $charts && ($#chartdatavals_count > 0))
059ec3d9
PH
2848 {
2849 # calculate the graph
2850 my @data = (
2851 \@chartdatanames,
2852 \@chartdatavals_count
2853 );
2854 my $graph = GD::Graph::pie->new(200, 200);
2855 $graph->set(
2856 x_label => 'Transport',
2857 y_label => 'Messages',
2858 title => 'By count',
2859 );
2860 my $gd = $graph->plot(\@data) or warn($graph->error);
2861 if ($gd) {
d5692f86
SC
2862 open(IMG, ">$chartdir/transports_count.png") or die "Could not write $chartdir/transports_count.png: $!\n";
2863 binmode IMG;
2864 print IMG $gd->png;
2865 close IMG;
2866 print $htm_fh "<img src=\"$chartrel/transports_count.png\">";
059ec3d9
PH
2867 }
2868 }
a83c7e95 2869 print $htm_fh "</td><td>";
059ec3d9 2870
608bc29d 2871 if ($HAVE_GD_Graph_pie && $charts && ($#chartdatavals_vol > 0)) {
059ec3d9
PH
2872 my @data = (
2873 \@chartdatanames,
2874 \@chartdatavals_vol
2875 );
2876 my $graph = GD::Graph::pie->new(200, 200);
2877 $graph->set(
2878 title => 'By volume',
2879 );
2880 my $gd = $graph->plot(\@data) or warn($graph->error);
2881 if ($gd) {
a83c7e95 2882 open(IMG, ">$chartdir/transports_vol.png") or die "Could not write $chartdir/transports_vol.png: $!\n";
d5692f86
SC
2883 binmode IMG;
2884 print IMG $gd->png;
2885 close IMG;
2886 print $htm_fh "<img src=\"$chartrel/transports_vol.png\">";
059ec3d9
PH
2887 }
2888 }
a83c7e95 2889
d5692f86
SC
2890 print $htm_fh "</td></tr></table>\n\n";
2891 }
059ec3d9
PH
2892}
2893
2894
2895
1b4fe9dd
PH
2896#######################################################################
2897# print_relay();
8e669ac1 2898#
1b4fe9dd 2899# print_relay();
8e669ac1 2900#
1b4fe9dd
PH
2901# Print our totals by relay.
2902#######################################################################
059ec3d9 2903sub print_relay {
d5692f86 2904 my $row_print_relay=1;
059ec3d9 2905 my $temp = "Relayed messages";
d5692f86 2906 print $htm_fh "<hr><a name=\"$temp\"></a><h2>$temp</h2>\n" if $htm_fh;
059ec3d9
PH
2907 if (scalar(keys %relayed) > 0 || $relayed_unshown > 0) {
2908 my $shown = 0;
2909 my $spacing = "";
d5692f86
SC
2910 my $txt_format = "%7d %s\n => %s\n";
2911 my $htm_format = "<tr><td align=\"right\">%d</td><td>%s</td><td>%s</td>\n";
059ec3d9 2912
d5692f86
SC
2913 printf $txt_fh ("%s\n%s\n\n", $temp, "-" x length($temp)) if $txt_fh;
2914 if ($htm_fh) {
2915 print $htm_fh "<table border=1>\n";
2916 print $htm_fh "<tr><th>Count</th><th>From</th><th>To</th>\n";
059ec3d9 2917 }
d5692f86
SC
2918 if ($xls_fh) {
2919 $ws_relayed->write($row_print_relay++, $col, $temp, $f_header2);
2920 &set_worksheet_line($ws_relayed, $row_print_relay++, 0, ["Count", "From", "To"], $f_headertab);
059ec3d9
PH
2921 }
2922
d5692f86 2923
059ec3d9
PH
2924 my($key);
2925 foreach $key (sort keys %relayed) {
2926 my $count = $relayed{$key};
2927 $shown += $count;
2928 $key =~ s/[HA]=//g;
2929 my($one,$two) = split(/=> /, $key);
d5692f86
SC
2930 my @content=($count, $one, $two);
2931 printf $txt_fh ($txt_format, @content) if $txt_fh;
2932 printf $htm_fh ($htm_format, @content) if $htm_fh;
2933 if ($xls_fh)
2934 {
2935 &set_worksheet_line($ws_relayed, $row_print_relay++, 0, \@content);
2936 }
059ec3d9
PH
2937 $spacing = "\n";
2938 }
d5692f86
SC
2939
2940 print $htm_fh "</table>\n<p>\n" if $htm_fh;
2941 print $txt_fh "${spacing}Total: $shown (plus $relayed_unshown unshown)\n\n" if $txt_fh;
2942 print $htm_fh "${spacing}Total: $shown (plus $relayed_unshown unshown)\n\n" if $htm_fh;
2943 if ($xls_fh)
2944 {
2945 &set_worksheet_line($ws_relayed, $row_print_relay++, 0, [$shown, "Sum of shown" ]);
2946 &set_worksheet_line($ws_relayed, $row_print_relay++, 0, [$relayed_unshown, "unshown"]);
2947 $row_print_relay++;
2948 }
059ec3d9
PH
2949 }
2950 else {
d5692f86
SC
2951 print $txt_fh "No relayed messages\n-------------------\n\n" if $txt_fh;
2952 print $htm_fh "No relayed messages\n\n" if $htm_fh;
2953 if ($xls_fh)
2954 {
2955 $row_print_relay++;
2956 }
059ec3d9 2957 }
059ec3d9
PH
2958}
2959
2960
2961
1b4fe9dd
PH
2962#######################################################################
2963# print_errors();
8e669ac1 2964#
1b4fe9dd 2965# print_errors();
8e669ac1 2966#
1b4fe9dd
PH
2967# Print our errors. In HTML, we display them as a list rather than a table -
2968# Netscape doesn't like large tables!
2969#######################################################################
059ec3d9
PH
2970sub print_errors {
2971 my $total_errors = 0;
d5692f86 2972 $row=1;
059ec3d9
PH
2973
2974 if (scalar(keys %errors_count) != 0) {
2975 my $temp = "List of errors";
d5692f86
SC
2976 my $htm_format = "<li>%d - %s\n";
2977
2978 printf $txt_fh ("%s\n%s\n\n", $temp, "-" x length($temp)) if $txt_fh;
2979 if ($htm_fh) {
2980 print $htm_fh "<hr><a name=\"errors\"></a><h2>$temp</h2>\n";
2981 print $htm_fh "<ul><li><b>Count - Error</b>\n";
059ec3d9 2982 }
d5692f86
SC
2983 if ($xls_fh)
2984 {
2985 $ws_errors->write($row++, 0, $temp, $f_header2);
2986 &set_worksheet_line($ws_errors, $row++, 0, ["Count", "Error"], $f_headertab);
059ec3d9
PH
2987 }
2988
d5692f86 2989
059ec3d9
PH
2990 my($key);
2991 foreach $key (sort keys %errors_count) {
2992 my $text = $key;
2993 chomp($text);
d5692f86 2994 $text =~ s/\s\s+/ /g; #Convert multiple spaces to a single space.
059ec3d9 2995 $total_errors += $errors_count{$key};
d5692f86
SC
2996
2997 if ($txt_fh) {
2998 printf $txt_fh ("%5d ", $errors_count{$key});
2999 my $text_remaining = $text;
3000 while (length($text_remaining) > 65) {
3001 my($first,$rest) = $text_remaining =~ /(.{50}\S*)\s+(.+)/;
3002 last if !$first;
3003 printf $txt_fh ("%s\n\t ", $first);
3004 $text_remaining = $rest;
3005 }
3006 printf $txt_fh ("%s\n\n", $text_remaining);
3007 }
3008
3009 if ($htm_fh) {
8e669ac1 3010
059ec3d9
PH
3011 #Translate HTML tag characters. Sergey Sholokh.
3012 $text =~ s/\</\&lt\;/g;
3013 $text =~ s/\>/\&gt\;/g;
3014
d5692f86 3015 printf $htm_fh ($htm_format,$errors_count{$key},$text);
059ec3d9 3016 }
d5692f86
SC
3017 if ($xls_fh)
3018 {
3019 &set_worksheet_line($ws_errors, $row++, 0, [$errors_count{$key},$text]);
059ec3d9
PH
3020 }
3021 }
059ec3d9
PH
3022
3023 $temp = "Errors encountered: $total_errors";
d5692f86
SC
3024
3025 if ($txt_fh) {
3026 print $txt_fh $temp, "\n";
3027 print $txt_fh "-" x length($temp),"\n";
3028 }
3029 if ($htm_fh) {
3030 print $htm_fh "</ul>\n<p>\n";
3031 print $htm_fh $temp, "\n";
3032 }
3033 if ($xls_fh)
3034 {
3035 &set_worksheet_line($ws_errors, $row++, 0, [$total_errors, "Sum of Errors encountered"]);
3036 }
059ec3d9
PH
3037 }
3038
3039}
3040
3041
1b4fe9dd
PH
3042#######################################################################
3043# parse_old_eximstat_reports();
8e669ac1 3044#
1b4fe9dd 3045# parse_old_eximstat_reports($fh);
8e669ac1 3046#
1b4fe9dd 3047# Parse old eximstat output so we can merge daily stats to weekly stats and weekly to monthly etc.
8e669ac1 3048#
1b4fe9dd
PH
3049# To test that the merging still works after changes, do something like the following.
3050# All the diffs should produce no output.
8e669ac1 3051#
1b4fe9dd 3052# options='-bydomain -byemail -byhost -byedomain'
608bc29d 3053# options="$options -show_rt1,2,4 -show_dt 1,2,4"
1b4fe9dd
PH
3054# options="$options -pattern 'Completed Messages' /Completed/"
3055# options="$options -pattern 'Received Messages' /<=/"
8e669ac1 3056#
1b4fe9dd
PH
3057# ./eximstats $options mainlog > mainlog.txt
3058# ./eximstats $options -merge mainlog.txt > mainlog.2.txt
3059# diff mainlog.txt mainlog.2.txt
8e669ac1 3060#
1b4fe9dd
PH
3061# ./eximstats $options -html mainlog > mainlog.html
3062# ./eximstats $options -merge -html mainlog.txt > mainlog.2.html
3063# diff mainlog.html mainlog.2.html
8e669ac1 3064#
1b4fe9dd
PH
3065# ./eximstats $options -merge mainlog.html > mainlog.3.txt
3066# diff mainlog.txt mainlog.3.txt
8e669ac1 3067#
1b4fe9dd
PH
3068# ./eximstats $options -merge -html mainlog.html > mainlog.3.html
3069# diff mainlog.html mainlog.3.html
8e669ac1 3070#
1b4fe9dd
PH
3071# ./eximstats $options -nvr mainlog > mainlog.nvr.txt
3072# ./eximstats $options -merge mainlog.nvr.txt > mainlog.4.txt
3073# diff mainlog.txt mainlog.4.txt
8e669ac1 3074#
1b4fe9dd
PH
3075# # double_mainlog.txt should have twice the values that mainlog.txt has.
3076# ./eximstats $options mainlog mainlog > double_mainlog.txt
3077#######################################################################
059ec3d9
PH
3078sub parse_old_eximstat_reports {
3079 my($fh) = @_;
3080
3081 my(%league_table_value_entered, %league_table_value_was_zero, %table_order);
3082
608bc29d
SC
3083 my(%user_pattern_index);
3084 my $user_pattern_index = 0;
3085 map {$user_pattern_index{$_} = $user_pattern_index++} @user_descriptions;
3086 my $user_pattern_keys = join('|', @user_descriptions);
3087
059ec3d9 3088 while (<$fh>) {
d5692f86 3089 PARSE_OLD_REPORT_LINE:
059ec3d9
PH
3090 if (/Exim statistics from ([\d\-]+ [\d:]+(\s+[\+\-]\d+)?) to ([\d\-]+ [\d:]+(\s+[\+\-]\d+)?)/) {
3091 $begin = $1 if ($1 lt $begin);
3092 $end = $3 if ($3 gt $end);
3093 }
3094 elsif (/Grand total summary/) {
a83c7e95
SC
3095 # Fill in $report_totals{Received|Delivered}{Volume|Messages|Addresses|Hosts|Domains|...|Delayed|DelayedPercent|Failed|FailedPercent}
3096 my(@fields, @delivered_fields);
3097 my $doing_table = 0;
059ec3d9 3098 while (<$fh>) {
d5692f86
SC
3099 $_ = html2txt($_); #Convert general HTML markup to text.
3100 s/At least one addr//g; #Another part of the HTML output we don't want.
059ec3d9 3101
a83c7e95
SC
3102# TOTAL Volume Messages Addresses Hosts Domains Delayed Failed
3103# Received 26MB 237 177 23 8 3.4% 28 11.8%
3104# Delivered 13MB 233 250 99 88
d5692f86 3105 if (/TOTAL\s+(.*?)\s*$/) {
a83c7e95
SC
3106 $doing_table = 1;
3107 @delivered_fields = split(/\s+/,$1);
3108
059ec3d9 3109 #Delayed and Failed have two columns each, so add the extra field names in.
a83c7e95
SC
3110 splice(@delivered_fields,-1,1,'DelayedPercent','Failed','FailedPercent');
3111
3112 # Addresses only figure in the Delivered row, so remove them from the
3113 # normal fields.
3114 @fields = grep !/Addresses/, @delivered_fields;
d5692f86 3115 }
a83c7e95 3116 elsif (/(Received)\s+(.*?)\s*$/) {
d5692f86
SC
3117 print STDERR "Parsing $_" if $debug;
3118 add_to_totals($report_totals{$1},\@fields,$2);
3119 }
a83c7e95
SC
3120 elsif (/(Delivered)\s+(.*?)\s*$/) {
3121 print STDERR "Parsing $_" if $debug;
3122 add_to_totals($report_totals{$1},\@delivered_fields,$2);
3123 my $data = $2;
3124 # If we're merging an old report which doesn't include addresses,
3125 # then use the Messages field instead.
3126 unless (grep(/Addresses/, @delivered_fields)) {
3127 my %tmp;
3128 line_to_hash(\%tmp,\@delivered_fields,$data);
3129 add_to_totals($report_totals{Delivered},['Addresses'],$tmp{Messages});
3130 }
3131 }
3132 elsif (/(Rejects|Ham|Spam)\s+(.*?)\s*$/) {
3133 print STDERR "Parsing $_" if $debug;
3134 add_to_totals($report_totals{$1},['Messages','Hosts'],$2);
3135 }
3136 else {
3137 last if $doing_table;
3138 }
059ec3d9
PH
3139 }
3140 }
3141
3142 elsif (/User Specified Patterns/i) {
3143#User Specified Patterns
3144#-----------------------
3145# Total
3146# Description 85
3147
d5692f86 3148 while (<$fh>) { last if (/Total/); } #Wait until we get the table headers.
059ec3d9 3149 while (<$fh>) {
d5692f86
SC
3150 print STDERR "Parsing $_" if $debug;
3151 $_ = html2txt($_); #Convert general HTML markup to text.
3152 if (/^\s*(.*?)\s+(\d+)\s*$/) {
3153 $report_totals{patterns}{$1} = {} unless (defined $report_totals{patterns}{$1});
3154 add_to_totals($report_totals{patterns}{$1},['Total'],$2);
3155 }
3156 last if (/^\s*$/); #Finished if we have a blank line.
059ec3d9
PH
3157 }
3158 }
3159
608bc29d
SC
3160 elsif (/(^|<h2>)($user_pattern_keys) per /o) {
3161 # Parse User defined pattern histograms if they exist.
3162 parse_histogram($fh, $user_pattern_interval_count[$user_pattern_index{$2}] );
3163 }
3164
3165
059ec3d9
PH
3166 elsif (/Deliveries by transport/i) {
3167#Deliveries by transport
3168#-----------------------
3169# Volume Messages
3170# :blackhole: 70KB 51
3171# address_pipe 655KB 1
3172# smtp 11MB 151
3173
d5692f86 3174 while (<$fh>) { last if (/Volume/); } #Wait until we get the table headers.
059ec3d9 3175 while (<$fh>) {
d5692f86
SC
3176 print STDERR "Parsing $_" if $debug;
3177 $_ = html2txt($_); #Convert general HTML markup to text.
3178 if (/(\S+)\s+(\d+\S*\s+\d+)/) {
3179 $report_totals{transport}{$1} = {} unless (defined $report_totals{transport}{$1});
3180 add_to_totals($report_totals{transport}{$1},['Volume','Messages'],$2);
3181 }
3182 last if (/^\s*$/); #Finished if we have a blank line.
059ec3d9
PH
3183 }
3184 }
608bc29d
SC
3185 elsif (/Messages received per/) {
3186 parse_histogram($fh, \@received_interval_count);
3187 }
3188 elsif (/Deliveries per/) {
3189 parse_histogram($fh, \@delivered_interval_count);
059ec3d9
PH
3190 }
3191
608bc29d
SC
3192 #elsif (/Time spent on the queue: (all messages|messages with at least one remote delivery)/) {
3193 elsif (/(Time spent on the queue|Delivery times|Receipt times): ((\S+) messages|messages with at least one remote delivery)((<[^>]*>)*\s*)$/) {
059ec3d9
PH
3194#Time spent on the queue: all messages
3195#-------------------------------------
3196#
3197#Under 1m 217 91.9% 91.9%
3198# 5m 2 0.8% 92.8%
3199# 3h 8 3.4% 96.2%
3200# 6h 7 3.0% 99.2%
3201# 12h 2 0.8% 100.0%
3202
3203 # Set a pointer to the queue bin so we can use the same code
3204 # block for both all messages and remote deliveries.
608bc29d
SC
3205 #my $bin_aref = ($1 eq 'all messages') ? \@qt_all_bin : \@qt_remote_bin;
3206 my($bin_aref, $times_aref, $overflow_sref);
3207 if ($1 eq 'Time spent on the queue') {
3208 $times_aref = \@queue_times;
3209 if ($2 eq 'all messages') {
3210 $bin_aref = \@qt_all_bin;
3211 $overflow_sref = \$qt_all_overflow;
3212 }
3213 else {
3214 $bin_aref = \@qt_remote_bin;
3215 $overflow_sref = \$qt_remote_overflow;
3216 }
3217 }
3218 elsif ($1 eq 'Delivery times') {
3219 $times_aref = \@delivery_times;
3220 if ($2 eq 'all messages') {
3221 $bin_aref = \@dt_all_bin;
3222 $overflow_sref = \$dt_all_overflow;
3223 }
3224 else {
3225 $bin_aref = \@dt_remote_bin;
3226 $overflow_sref = \$dt_remote_overflow;
3227 }
3228 }
3229 else {
3230 unless (exists $rcpt_times_bin{$3}) {
3231 initialise_rcpt_times($3);
3232 }
3233 $bin_aref = $rcpt_times_bin{$3};
3234 $times_aref = \@rcpt_times;
3235 $overflow_sref = \$rcpt_times_overflow{$3};
3236 }
3237
3238
a83c7e95 3239 my ($blank_lines, $reached_table) = (0,0);
059ec3d9 3240 while (<$fh>) {
d5692f86 3241 $_ = html2txt($_); #Convert general HTML markup to text.
a83c7e95
SC
3242 # The table is preceded by one blank line, and has one blank line
3243 # following it. As the table may be empty, the best way to determine
3244 # that we've finished it is to look for the second blank line.
3245 ++$blank_lines if /^\s*$/;
3246 last if ($blank_lines >=2); #Finished the table ?
3247 $reached_table = 1 if (/\d/);
d5692f86
SC
3248 next unless $reached_table;
3249 my $previous_seconds_on_queue = 0;
3250 if (/^\s*(Under|Over|)\s+(\d+[smhdw])\s+(\d+)/) {
3251 print STDERR "Parsing $_" if $debug;
3252 my($modifier,$formated_time,$count) = ($1,$2,$3);
3253 my $seconds = unformat_time($formated_time);
3254 my $time_on_queue = ($seconds + $previous_seconds_on_queue) / 2;
3255 $previous_seconds_on_queue = $seconds;
3256 $time_on_queue = $seconds * 2 if ($modifier eq 'Over');
3257 my($i);
608bc29d
SC
3258 for ($i = 0; $i <= $#$times_aref; $i++) {
3259 if ($time_on_queue < $times_aref->[$i]) {
d5692f86
SC
3260 $$bin_aref[$i] += $count;
3261 last;
3262 }
3263 }
608bc29d
SC
3264 $$overflow_sref += $count if ($i > $#$times_aref);
3265
d5692f86 3266 }
059ec3d9
PH
3267 }
3268 }
3269
3270 elsif (/Relayed messages/) {
3271#Relayed messages
3272#----------------
3273#
3274# 1 addr.domain.com [1.2.3.4] a.user@domain.com
3275# => addr2.domain2.com [5.6.7.8] a2.user2@domain2.com
3276#
3277#<tr><td align="right">1</td><td>addr.domain.com [1.2.3.4] a.user@domain.com </td><td>addr2.domain2.com [5.6.7.8] a2.user2@domain2.com</td>
3278
3279 my $reached_table = 0;
3280 my($count,$sender);
3281 while (<$fh>) {
d5692f86
SC
3282 unless ($reached_table) {
3283 last if (/No relayed messages/);
3284 $reached_table = 1 if (/^\s*\d/ || />\d+</);
3285 next unless $reached_table;
3286 }
3287 if (/>(\d+)<.td><td>(.*?) ?<.td><td>(.*?)</) {
3288 update_relayed($1,$2,$3);
3289 }
3290 elsif (/^\s*(\d+)\s+(.*?)\s*$/) {
3291 ($count,$sender) = ($1,$2);
3292 }
3293 elsif (/=>\s+(.*?)\s*$/) {
3294 update_relayed($count,$sender,$1);
3295 }
3296 else {
3297 last; #Finished the table ?
3298 }
059ec3d9
PH
3299 }
3300 }
3301
3302 elsif (/Top (.*?) by (message count|volume)/) {
3303#Top 50 sending hosts by message count
3304#-------------------------------------
3305#
3306# 48 1468KB local
50adf73a
SC
3307# Could also have average values for HTML output.
3308# 48 1468KB 30KB local
3309
059ec3d9
PH
3310 my($category,$by_count_or_volume) = ($1,$2);
3311
3312 #As we show 2 views of each table (by count and by volume),
3313 #most (but not all) entries will appear in both tables.
3314 #Set up a hash to record which entries we have already seen
3315 #and one to record which ones we are seeing for the first time.
3316 if ($by_count_or_volume =~ /count/) {
d5692f86
SC
3317 undef %league_table_value_entered;
3318 undef %league_table_value_was_zero;
3319 undef %table_order;
059ec3d9
PH
3320 }
3321
3322 #As this section processes multiple different table categories,
3323 #set up pointers to the hashes to be updated.
a83c7e95 3324 my($messages_href,$addresses_href,$data_href,$data_gigs_href);
059ec3d9 3325 if ($category =~ /local sender/) {
a83c7e95
SC
3326 $messages_href = \%received_count_user;
3327 $addresses_href = undef;
d5692f86
SC
3328 $data_href = \%received_data_user;
3329 $data_gigs_href = \%received_data_gigs_user;
059ec3d9
PH
3330 }
3331 elsif ($category =~ /sending (\S+?)s?\b/) {
3332 #Top 50 sending (host|domain|email|edomain)s
3333 #Top sending (host|domain|email|edomain)
a83c7e95 3334 $messages_href = \%{$received_count{"\u$1"}};
d5692f86
SC
3335 $data_href = \%{$received_data{"\u$1"}};
3336 $data_gigs_href = \%{$received_data_gigs{"\u$1"}};
059ec3d9
PH
3337 }
3338 elsif ($category =~ /local destination/) {
a83c7e95
SC
3339 $messages_href = \%delivered_messages_user;
3340 $addresses_href = \%delivered_addresses_user;
d5692f86
SC
3341 $data_href = \%delivered_data_user;
3342 $data_gigs_href = \%delivered_data_gigs_user;
059ec3d9
PH
3343 }
3344 elsif ($category =~ /(\S+) destination/) {
3345 #Top 50 (host|domain|email|edomain) destinations
3346 #Top (host|domain|email|edomain) destination
a83c7e95
SC
3347 $messages_href = \%{$delivered_messages{"\u$1"}};
3348 $addresses_href = \%{$delivered_addresses{"\u$1"}};
d5692f86
SC
3349 $data_href = \%{$delivered_data{"\u$1"}};
3350 $data_gigs_href = \%{$delivered_data_gigs{"\u$1"}};
059ec3d9 3351 }
a83c7e95
SC
3352 elsif ($category =~ /rejected ips/) {
3353 $messages_href = \%rejected_count_by_ip;
3354 }
3355 elsif ($category =~ /non-rejected spamming ips/) {
3356 $messages_href = \%spam_count_by_ip;
3357 }
3358 elsif ($category =~ /mail rejection reasons/) {
3359 $messages_href = \%rejected_count_by_reason;
3360 }
059ec3d9
PH
3361
3362 my $reached_table = 0;
a83c7e95 3363 my $row_re;
059ec3d9 3364 while (<$fh>) {
d5692f86 3365 # Watch out for empty tables.
a83c7e95 3366 goto PARSE_OLD_REPORT_LINE if (/<h2>/ or (/^\s*[a-zA-Z]/ && !/^\s*Messages/));
d5692f86
SC
3367
3368 $_ = html2txt($_); #Convert general HTML markup to text.
3369
a83c7e95
SC
3370 # Messages Addresses Bytes Average
3371 if (/^\s*Messages/) {
3372 my $pattern = '^\s*(\d+)';
3373 $pattern .= (/Addresses/) ? '\s+(\d+)' : '()';
3374 $pattern .= (/Bytes/) ? '\s+([\dKMGB]+)' : '()';
3375 $pattern .= (/Average/) ? '\s+[\dKMGB]+' : '';
3376 $pattern .= '\s+(.*?)\s*$';
3377 $row_re = qr/$pattern/;
3378 $reached_table = 1;
3379 next;
3380 }
d5692f86 3381 next unless $reached_table;
50adf73a 3382
a83c7e95 3383 my($messages, $addresses, $rounded_volume, $entry);
059ec3d9 3384
a83c7e95
SC
3385 if (/$row_re/) {
3386 ($messages, $addresses, $rounded_volume, $entry) = ($1, $2, $3, $4);
d5692f86 3387 }
a83c7e95
SC
3388 else {
3389 #Else we have finished the table and we may need to do some
3390 #kludging to retain the order of the entries.
3391
d5692f86
SC
3392 if ($by_count_or_volume =~ /volume/) {
3393 #Add a few bytes to appropriate entries to preserve the order.
d5692f86
SC
3394 foreach $rounded_volume (keys %table_order) {
3395 #For each rounded volume, we want to create a list which has things
3396 #ordered from the volume table at the front, and additional things
3397 #from the count table ordered at the back.
3398 @{$table_order{$rounded_volume}{volume}} = () unless defined $table_order{$rounded_volume}{volume};
3399 @{$table_order{$rounded_volume}{'message count'}} = () unless defined $table_order{$rounded_volume}{'message count'};
3400 my(@order,%mark);
3401 map {$mark{$_} = 1} @{$table_order{$rounded_volume}{volume}};
3402 @order = @{$table_order{$rounded_volume}{volume}};
3403 map {push(@order,$_)} grep(!$mark{$_},@{$table_order{$rounded_volume}{'message count'}});
3404
3405 my $bonus_bytes = $#order;
3406 $bonus_bytes = 511 if ($bonus_bytes > 511); #Don't go over the half-K boundary!
3407 while (@order and ($bonus_bytes > 0)) {
3408 my $entry = shift(@order);
3409 if ($league_table_value_was_zero{$entry}) {
3410 $$data_href{$entry} += $bonus_bytes;
3411 print STDERR "$category by $by_count_or_volume: added $bonus_bytes bonus bytes to $entry\n" if $debug;
3412 }
3413 $bonus_bytes--;
3414 }
3415 }
3416 }
d5692f86
SC
3417 last;
3418 }
a83c7e95
SC
3419
3420 # Store a new table entry.
3421
3422 # Add the entry into the %table_order hash if it has a rounded
3423 # volume (KB/MB/GB).
3424 push(@{$table_order{$rounded_volume}{$by_count_or_volume}},$entry) if ($rounded_volume =~ /\D/);
3425
3426 unless ($league_table_value_entered{$entry}) {
3427 $league_table_value_entered{$entry} = 1;
3428 unless ($$messages_href{$entry}) {
3429 $$messages_href{$entry} = 0;
3430 $$addresses_href{$entry} = 0;
3431 $$data_href{$entry} = 0;
3432 $$data_gigs_href{$entry} = 0;
3433 $league_table_value_was_zero{$entry} = 1;
3434 }
3435
3436 $$messages_href{$entry} += $messages;
3437
3438 # When adding the addresses, be aware that we could be merging
3439 # an old report which does not include addresses. In this case,
3440 # we add the messages instead.
3441 $$addresses_href{$entry} += ($addresses) ? $addresses : $messages;
3442
3443 #Add the rounded value to the data and data_gigs hashes.
3444 un_round($rounded_volume,\$$data_href{$entry},\$$data_gigs_href{$entry}) if $rounded_volume;
3445 print STDERR "$category by $by_count_or_volume: added $messages,$rounded_volume to $entry\n" if $debug;
3446 }
3447
059ec3d9
PH
3448 }
3449 }
3450 elsif (/List of errors/) {
3451#List of errors
3452#--------------
3453#
3454# 1 07904931641@one2one.net R=external T=smtp: SMTP error
3455# from remote mailer after RCPT TO:<07904931641@one2one.net>:
3456# host mail.one2one.net [193.133.192.24]: 550 User unknown
3457#
3458#<li>1 - ally.dufc@dunbar.org.uk R=external T=smtp: SMTP error from remote mailer after RCPT TO:<ally.dufc@dunbar.org.uk>: host mail.dunbar.org.uk [216.167.89.88]: 550 Unknown local part ally.dufc in <ally.dufc@dunbar.org.uk>
3459
3460
3461 my $reached_table = 0;
3462 my($count,$error,$blanks);
3463 while (<$fh>) {
d5692f86
SC
3464 $reached_table = 1 if (/^( *|<li>)(\d+)/);
3465 next unless $reached_table;
059ec3d9 3466
d5692f86
SC
3467 s/^<li>(\d+) -/$1/; #Convert an HTML line to a text line.
3468 $_ = html2txt($_); #Convert general HTML markup to text.
059ec3d9 3469
d5692f86
SC
3470 if (/\t\s*(.*)/) {
3471 $error .= ' ' . $1; #Join a multiline error.
3472 }
3473 elsif (/^\s*(\d+)\s+(.*)/) {
3474 if ($error) {
059ec3d9 3475 #Finished with a previous multiline error so save it.
d5692f86
SC
3476 $errors_count{$error} = 0 unless $errors_count{$error};
3477 $errors_count{$error} += $count;
3478 }
3479 ($count,$error) = ($1,$2);
3480 }
3481 elsif (/Errors encountered/) {
3482 if ($error) {
059ec3d9 3483 #Finished the section, so save our stored last error.
d5692f86
SC
3484 $errors_count{$error} = 0 unless $errors_count{$error};
3485 $errors_count{$error} += $count;
3486 }
3487 last;
3488 }
059ec3d9
PH
3489 }
3490 }
3491
3492 }
3493}
3494
608bc29d
SC
3495#######################################################################
3496# parse_histogram($fh, \@delivered_interval_count);
3497# Parse a histogram into the provided array of counters.
3498#######################################################################
3499sub parse_histogram {
3500 my($fh, $counters_aref) = @_;
3501
3502 # Messages received per hour (each dot is 2 messages)
3503 #---------------------------------------------------
3504 #
3505 #00-01 106 .....................................................
3506 #01-02 103 ...................................................
3507
3508 my $reached_table = 0;
3509 while (<$fh>) {
3510 $reached_table = 1 if (/^00/);
3511 next unless $reached_table;
3512 print STDERR "Parsing $_" if $debug;
3513 if (/^(\d+):(\d+)\s+(\d+)/) { #hh:mm start time format ?
3514 $$counters_aref[($1*60 + $2)/$hist_interval] += $3 if $hist_opt;
3515 }
3516 elsif (/^(\d+)-(\d+)\s+(\d+)/) { #hh-hh start-end time format ?
3517 $$counters_aref[($1*60)/$hist_interval] += $3 if $hist_opt;
3518 }
3519 else { #Finished the table ?
3520 last;
3521 }
3522 }
3523}
059ec3d9
PH
3524
3525
1b4fe9dd
PH
3526#######################################################################
3527# update_relayed();
8e669ac1 3528#
1b4fe9dd 3529# update_relayed($count,$sender,$recipient);
8e669ac1 3530#
1b4fe9dd
PH
3531# Adds an entry into the %relayed hash. Currently only used when
3532# merging reports.
3533#######################################################################
059ec3d9
PH
3534sub update_relayed {
3535 my($count,$sender,$recipient) = @_;
3536
3537 #When generating the key, put in the 'H=' and 'A=' which can be used
3538 #in searches.
3539 my $key = "H=$sender => H=$recipient";
3540 $key =~ s/ ([^=\s]+\@\S+|<>)/ A=$1/g;
3541 if (!defined $relay_pattern || $key !~ /$relay_pattern/o) {
3542 $relayed{$key} = 0 if !defined $relayed{$key};
3543 $relayed{$key} += $count;
3544 }
3545 else {
3546 $relayed_unshown += $count;
3547 }
3548}
3549
3550
1b4fe9dd
PH
3551#######################################################################
3552# add_to_totals();
8e669ac1 3553#
1b4fe9dd 3554# add_to_totals(\%totals,\@keys,$values);
8e669ac1 3555#
1b4fe9dd
PH
3556# Given a line of space seperated values, add them into the provided hash using @keys
3557# as the hash keys.
8e669ac1 3558#
1b4fe9dd
PH
3559# If the value contains a '%', then the value is set rather than added. Otherwise, we
3560# convert the value to bytes and gigs. The gigs get added to I<Key>-gigs.
3561#######################################################################
059ec3d9
PH
3562sub add_to_totals {
3563 my($totals_href,$keys_aref,$values) = @_;
3564 my(@values) = split(/\s+/,$values);
a83c7e95
SC
3565
3566 for(my $i = 0; $i < @values && $i < @$keys_aref; ++$i) {
3567 my $key = $keys_aref->[$i];
3568 if ($values[$i] =~ /%/) {
3569 $$totals_href{$key} = $values[$i];
059ec3d9
PH
3570 }
3571 else {
3572 $$totals_href{$key} = 0 unless ($$totals_href{$key});
3573 $$totals_href{"$key-gigs"} = 0 unless ($$totals_href{"$key-gigs"});
a83c7e95
SC
3574 un_round($values[$i], \$$totals_href{$key}, \$$totals_href{"$key-gigs"});
3575 print STDERR "Added $values[$i] to $key - $$totals_href{$key} , " . $$totals_href{"$key-gigs"} . "GB.\n" if $debug;
059ec3d9
PH
3576 }
3577 }
3578}
3579
a83c7e95
SC
3580
3581#######################################################################
3582# line_to_hash();
3583#
3584# line_to_hash(\%hash,\@keys,$line);
3585#
3586# Given a line of space seperated values, set them into the provided hash
3587# using @keys as the hash keys.
3588#######################################################################
3589sub line_to_hash {
3590 my($href,$keys_aref,$values) = @_;
3591 my(@values) = split(/\s+/,$values);
3592 for(my $i = 0; $i < @values && $i < @$keys_aref; ++$i) {
3593 $$href{$keys_aref->[$i]} = $values[$i];
3594 }
3595}
3596
3597
1b4fe9dd
PH
3598#######################################################################
3599# get_report_total();
8e669ac1 3600#
1b4fe9dd 3601# $total = get_report_total(\%hash,$key);
8e669ac1 3602#
1b4fe9dd 3603# If %hash contains values split into Units and Gigs, we calculate and return
8e669ac1 3604#
1b4fe9dd
PH
3605# $hash{$key} + 1024*1024*1024 * $hash{"${key}-gigs"}
3606#######################################################################
059ec3d9
PH
3607sub get_report_total {
3608 no integer;
3609 my($hash_ref,$key) = @_;
3610 if ($$hash_ref{"${key}-gigs"}) {
3611 return $$hash_ref{$key} + $gig * $$hash_ref{"${key}-gigs"};
3612 }
3613 return $$hash_ref{$key} || 0;
3614}
3615
1b4fe9dd
PH
3616#######################################################################
3617# html2txt();
8e669ac1 3618#
1b4fe9dd 3619# $text_line = html2txt($html_line);
8e669ac1 3620#
1b4fe9dd
PH
3621# Convert a line from html to text. Currently we just convert HTML tags to spaces
3622# and convert &gt;, &lt;, and &nbsp; tags back.
3623#######################################################################
059ec3d9
PH
3624sub html2txt {
3625 ($_) = @_;
3626
3627 # Convert HTML tags to spacing. Note that the reports may contain <Userid> and
3628 # <Userid@Domain> words, so explicitly specify the HTML tags we will remove
3629 # (the ones used by this program). If someone is careless enough to have their
3630 # Userid the same as an HTML tag, there's not much we can do about it.
608bc29d 3631 s/<\/?(html|head|title|body|h\d|ul|li|a\s+|table|tr|td|th|pre|hr|p|br)\b.*?>/ /g;
059ec3d9
PH
3632
3633 s/\&lt\;/\</og; #Convert '&lt;' to '<'.
3634 s/\&gt\;/\>/og; #Convert '&gt;' to '>'.
3635 s/\&nbsp\;/ /og; #Convert '&nbsp;' to ' '.
3636 return($_);
3637}
3638
1b4fe9dd
PH
3639#######################################################################
3640# get_next_arg();
8e669ac1 3641#
1b4fe9dd 3642# $arg = get_next_arg();
8e669ac1 3643#
1b4fe9dd
PH
3644# Because eximstats arguments are often passed as variables,
3645# we can't rely on shell parsing to deal with quotes. This
3646# subroutine returns $ARGV[1] and does a shift. If $ARGV[1]
3647# starts with a quote (' or "), and doesn't end in one, then
3648# we append the next argument to it and shift again. We repeat
3649# until we've got all of the argument.
8e669ac1 3650#
1b4fe9dd
PH
3651# This isn't perfect as all white space gets reduced to one space,
3652# but it's as good as we can get! If it's esential that spacing
3653# be preserved precisely, then you get that by not using shell
3654# variables.
3655#######################################################################
059ec3d9
PH
3656sub get_next_arg {
3657 my $arg = '';
3658 my $matched_pattern = 0;
3659 while ($ARGV[1]) {
3660 $arg .= ' ' if $arg;
3661 $arg .= $ARGV[1]; shift(@ARGV);
3662 if ($arg !~ /^['"]/) {
3663 $matched_pattern = 1;
3664 last;
3665 }
3666 if ($arg =~ s/^(['"])(.*)\1$/$2/) {
3667 $matched_pattern = 1;
3668 last;
3669 }
3670 }
3671 die "Mismatched argument quotes - <$arg>.\n" unless $matched_pattern;
3672 return $arg;
3673}
3674
d5692f86
SC
3675#######################################################################
3676# set_worksheet_line($ws_global, $startrow, $startcol, \@content, $format);
3677#
3678# set values to a sequence of cells in a row.
3679#
3680#######################################################################
3681sub set_worksheet_line {
3682 my ($worksheet, $row, $col, $content, $format) = @_;
3683
3684 foreach my $token (@$content)
3685 {
3686 $worksheet->write($row, $col++, $token, $format );
3687 }
059ec3d9 3688
d5692f86 3689}
059ec3d9 3690
608bc29d
SC
3691#######################################################################
3692# @rcpt_times = parse_time_list($string);
3693#
3694# Parse a comma seperated list of time values in seconds given by
3695# the user and fill an array.
3696#
3697# Return a default list if $string is undefined.
3698# Return () if $string eq '0'.
3699#######################################################################
3700sub parse_time_list {
3701 my($string) = @_;
3702 if (! defined $string) {
3703 return(60, 5*60, 15*60, 30*60, 60*60, 3*60*60, 6*60*60, 12*60*60, 24*60*60);
3704 }
3705 my(@times) = split(/,/, $string);
3706 foreach my $q (@times) { $q = eval($q) + 0 }
3707 @times = sort { $a <=> $b } @times;
3708 @times = () if ($#times == 0 && $times[0] == 0);
3709 return(@times);
3710}
3711
3712
3713#######################################################################
3714# initialise_rcpt_times($protocol);
3715# Initialise an array of rcpt_times to 0 for the specified protocol.
3716#######################################################################
3717sub initialise_rcpt_times {
3718 my($protocol) = @_;
3719 for (my $i = 0; $i <= $#rcpt_times; ++$i) {
3720 $rcpt_times_bin{$protocol}[$i] = 0;
3721 }
3722 $rcpt_times_overflow{$protocol} = 0;
3723}
3724
3725
059ec3d9
PH
3726##################################################
3727# Main Program #
3728##################################################
3729
3730
3731$last_timestamp = '';
3732$last_date = '';
3733$show_errors = 1;
3734$show_relay = 1;
3735$show_transport = 1;
3736$topcount = 50;
3737$local_league_table = 1;
3738$include_remote_users = 0;
d5692f86 3739$include_original_destination = 0;
059ec3d9
PH
3740$hist_opt = 1;
3741$volume_rounding = 1;
3742$localtime_offset = calculate_localtime_offset(); # PH/FANF
3743
3744$charts = 0;
3745$charts_option_specified = 0;
3746$chartrel = ".";
3747$chartdir = ".";
3748
608bc29d
SC
3749@queue_times = parse_time_list();
3750@rcpt_times = ();
3751@delivery_times = ();
059ec3d9
PH
3752
3753$last_offset = '';
3754$offset_seconds = 0;
3755
d5692f86 3756$row=1;
d5692f86
SC
3757$col=0;
3758$col_hist=0;
3759$run_hist=0;
3760my(%output_files); # What output files have been specified?
3761
059ec3d9
PH
3762# Decode options
3763
608bc29d 3764while (@ARGV > 0 && substr($ARGV[0], 0, 1) eq '-') {
059ec3d9
PH
3765 if ($ARGV[0] =~ /^\-h(\d+)$/) { $hist_opt = $1 }
3766 elsif ($ARGV[0] =~ /^\-ne$/) { $show_errors = 0 }
608bc29d 3767 elsif ($ARGV[0] =~ /^\-nr(.?)(.*)\1$/) {
059ec3d9 3768 if ($1 eq "") { $show_relay = 0 } else { $relay_pattern = $2 }
608bc29d
SC
3769 }
3770 elsif ($ARGV[0] =~ /^\-q([,\d\+\-\*\/]+)$/) { @queue_times = parse_time_list($1) }
059ec3d9
PH
3771 elsif ($ARGV[0] =~ /^-nt$/) { $show_transport = 0 }
3772 elsif ($ARGV[0] =~ /^\-nt(.?)(.*)\1$/)
3773 {
3774 if ($1 eq "") { $show_transport = 0 } else { $transport_pattern = $2 }
3775 }
3776 elsif ($ARGV[0] =~ /^-t(\d+)$/) { $topcount = $1 }
3777 elsif ($ARGV[0] =~ /^-tnl$/) { $local_league_table = 0 }
d5692f86
SC
3778 elsif ($ARGV[0] =~ /^-txt=?(\S*)$/) { $txt_fh = get_filehandle($1,\%output_files) }
3779 elsif ($ARGV[0] =~ /^-html=?(\S*)$/) { $htm_fh = get_filehandle($1,\%output_files) }
3780 elsif ($ARGV[0] =~ /^-xls=?(\S*)$/) {
3781 if ($HAVE_Spreadsheet_WriteExcel) {
3782 $xls_fh = get_filehandle($1,\%output_files);
3783 }
3784 else {
3785 warn "WARNING: CPAN Module Spreadsheet::WriteExcel not installed. Obtain from www.cpan.org\n";
3786 }
3787 }
059ec3d9
PH
3788 elsif ($ARGV[0] =~ /^-merge$/) { $merge_reports = 1 }
3789 elsif ($ARGV[0] =~ /^-charts$/) {
3790 $charts = 1;
3791 warn "WARNING: CPAN Module GD::Graph::pie not installed. Obtain from www.cpan.org\n" unless $HAVE_GD_Graph_pie;
3792 warn "WARNING: CPAN Module GD::Graph::linespoints not installed. Obtain from www.cpan.org\n" unless $HAVE_GD_Graph_linespoints;
3793 }
3794 elsif ($ARGV[0] =~ /^-chartdir$/) { $chartdir = $ARGV[1]; shift; $charts_option_specified = 1; }
3795 elsif ($ARGV[0] =~ /^-chartrel$/) { $chartrel = $ARGV[1]; shift; $charts_option_specified = 1; }
d5692f86
SC
3796 elsif ($ARGV[0] =~ /^-include_original_destination$/) { $include_original_destination = 1 }
3797 elsif ($ARGV[0] =~ /^-cache$/) { } #Not currently used.
059ec3d9
PH
3798 elsif ($ARGV[0] =~ /^-byhost$/) { $do_sender{Host} = 1 }
3799 elsif ($ARGV[0] =~ /^-bydomain$/) { $do_sender{Domain} = 1 }
3800 elsif ($ARGV[0] =~ /^-byemail$/) { $do_sender{Email} = 1 }
3801 elsif ($ARGV[0] =~ /^-byemaildomain$/) { $do_sender{Edomain} = 1 }
3802 elsif ($ARGV[0] =~ /^-byedomain$/) { $do_sender{Edomain} = 1 }
a83c7e95 3803 elsif ($ARGV[0] =~ /^-emptyok$/) { $emptyOK = 1 }
059ec3d9 3804 elsif ($ARGV[0] =~ /^-nvr$/) { $volume_rounding = 0 }
608bc29d
SC
3805 elsif ($ARGV[0] =~ /^-show_rt([,\d\+\-\*\/]+)?$/) { @rcpt_times = parse_time_list($1) }
3806 elsif ($ARGV[0] =~ /^-show_dt([,\d\+\-\*\/]+)?$/) { @delivery_times = parse_time_list($1) }
059ec3d9
PH
3807 elsif ($ARGV[0] =~ /^-d$/) { $debug = 1 }
3808 elsif ($ARGV[0] =~ /^--?h(elp)?$/){ help() }
3809 elsif ($ARGV[0] =~ /^-t_remote_users$/) { $include_remote_users = 1 }
3810 elsif ($ARGV[0] =~ /^-pattern$/)
3811 {
3812 push(@user_descriptions,get_next_arg());
3813 push(@user_patterns,get_next_arg());
3814 }
3815 elsif ($ARGV[0] =~ /^-utc$/)
3816 {
3817 # We don't need this value if the log is in UTC.
3818 $localtime_offset = undef;
3819 }
3820 else
3821 {
3822 print STDERR "Eximstats: Unknown or malformed option $ARGV[0]\n";
3823 help();
3824 }
3825 shift;
3826 }
3827
d5692f86
SC
3828 # keep old default behaviour
3829 if (! ($xls_fh or $htm_fh or $txt_fh)) {
3830 $txt_fh = \*STDOUT;
3831 }
3832
059ec3d9
PH
3833 # Check that all the charts options are specified.
3834 warn "-charts option not specified. Use -help for help.\n" if ($charts_option_specified && ! $charts);
3835
3836 # Default to display tables by sending Host.
3837 $do_sender{Host} = 1 unless ($do_sender{Domain} || $do_sender{Email} || $do_sender{Edomain});
3838
d5692f86 3839 # prepare xls Excel Workbook
a83c7e95 3840 if (defined $xls_fh) {
d5692f86
SC
3841
3842 # Create a new Excel workbook
3843 $workbook = Spreadsheet::WriteExcel->new($xls_fh);
3844
3845 # Add worksheets
3846 $ws_global = $workbook->addworksheet('Exim Statistik');
3847 # show $ws_global as initial sheet
3848 $ws_global->set_first_sheet();
3849 $ws_global->activate();
3850
3851 if ($show_relay) {
3852 $ws_relayed = $workbook->addworksheet('Relayed Messages');
3853 $ws_relayed->set_column(1, 2, 80);
3854 }
d5692f86
SC
3855 if ($show_errors) {
3856 $ws_errors = $workbook->addworksheet('Errors');
3857 }
3858
3859
3860 # set column widths
3861 $ws_global->set_column(0, 2, 20); # Columns B-D width set to 30
3862 $ws_global->set_column(3, 3, 15); # Columns B-D width set to 30
3863 $ws_global->set_column(4, 4, 25); # Columns B-D width set to 30
3864
3865 # Define Formats
3866 $f_default = $workbook->add_format();
3867
3868 $f_header1 = $workbook->add_format();
3869 $f_header1->set_bold();
3870 #$f_header1->set_color('red');
3871 $f_header1->set_size('15');
3872 $f_header1->set_valign();
3873 # $f_header1->set_align('center');
3874 # $ws_global->write($row++, 2, "Testing Headers 1", $f_header1);
3875
3876 $f_header2 = $workbook->add_format();
3877 $f_header2->set_bold();
3878 $f_header2->set_size('12');
3879 $f_header2->set_valign();
3880 # $ws_global->write($row++, 2, "Testing Headers 2", $f_header2);
3881
a83c7e95
SC
3882 # Create another header2 for use in merged cells.
3883 $f_header2_m = $workbook->add_format();
3884 $f_header2_m->set_bold();
3885 $f_header2_m->set_size('8');
3886 $f_header2_m->set_valign();
3887 $f_header2_m->set_align('center');
3888
d5692f86
SC
3889 $f_percent = $workbook->add_format();
3890 $f_percent->set_num_format('0.0%');
3891
3892 $f_headertab = $workbook->add_format();
3893 $f_headertab->set_bold();
3894 $f_headertab->set_valign();
3895 # $ws_global->write($row++, 2, "Testing Headers tab", $f_headertab);
3896
3897 }
3898
059ec3d9 3899
608bc29d 3900# Initialise the queue/delivery/rcpt time counters.
059ec3d9 3901for (my $i = 0; $i <= $#queue_times; $i++) {
608bc29d
SC
3902 $qt_all_bin[$i] = 0;
3903 $qt_remote_bin[$i] = 0;
3904}
3905for (my $i = 0; $i <= $#delivery_times; $i++) {
3906 $dt_all_bin[$i] = 0;
3907 $dt_remote_bin[$i] = 0;
059ec3d9 3908}
608bc29d 3909initialise_rcpt_times('all');
059ec3d9 3910
059ec3d9 3911
608bc29d 3912# Compute the number of slots for the histogram
059ec3d9
PH
3913if ($hist_opt > 0)
3914 {
3915 if ($hist_opt > 60 || 60 % $hist_opt != 0)
3916 {
d5692f86 3917 print STDERR "Eximstats: -h must specify a factor of 60\n";
059ec3d9
PH
3918 exit 1;
3919 }
d5692f86
SC
3920 $hist_interval = 60/$hist_opt; #Interval in minutes.
3921 $hist_number = (24*60)/$hist_interval; #Number of intervals per day.
059ec3d9
PH
3922 @received_interval_count = (0) x $hist_number;
3923 @delivered_interval_count = (0) x $hist_number;
608bc29d
SC
3924 my $user_pattern_index = 0;
3925 for (my $user_pattern_index = 0; $user_pattern_index <= $#user_patterns; ++$user_pattern_index) {
3926 @{$user_pattern_interval_count[$user_pattern_index]} = (0) x $hist_number;
3927 }
3928 @dt_all_bin = (0) x $hist_number;
3929 @dt_remote_bin = (0) x $hist_number;
81aad8c9 3930}
059ec3d9
PH
3931
3932#$queue_unknown = 0;
3933
3934$total_received_data = 0;
3935$total_received_data_gigs = 0;
3936$total_received_count = 0;
3937
3938$total_delivered_data = 0;
3939$total_delivered_data_gigs = 0;
a83c7e95
SC
3940$total_delivered_messages = 0;
3941$total_delivered_addresses = 0;
059ec3d9 3942
608bc29d
SC
3943$qt_all_overflow = 0;
3944$qt_remote_overflow = 0;
3945$dt_all_overflow = 0;
3946$dt_remote_overflow = 0;
059ec3d9
PH
3947$delayed_count = 0;
3948$relayed_unshown = 0;
608bc29d 3949$message_errors = 0;
059ec3d9
PH
3950$begin = "9999-99-99 99:99:99";
3951$end = "0000-00-00 00:00:00";
3952my($section,$type);
a83c7e95 3953foreach $section ('Received','Delivered','Rejects','Ham','Spam') {
059ec3d9
PH
3954 foreach $type ('Volume','Messages','Delayed','Failed','Hosts','Domains','Emails','Edomains') {
3955 $report_totals{$section}{$type} = 0;
3956 }
3957}
3958
3959# Generate our parser.
3960my $parser = generate_parser();
3961
3962
3963
3964if (@ARGV) {
3965 # Scan the input files and collect the data
3966 foreach my $file (@ARGV) {
3967 if ($file =~ /\.gz/) {
3968 unless (open(FILE,"gunzip -c $file |")) {
d5692f86
SC
3969 print STDERR "Failed to gunzip -c $file: $!";
3970 next;
059ec3d9
PH
3971 }
3972 }
3973 elsif ($file =~ /\.Z/) {
3974 unless (open(FILE,"uncompress -c $file |")) {
d5692f86
SC
3975 print STDERR "Failed to uncompress -c $file: $!";
3976 next;
059ec3d9
PH
3977 }
3978 }
3979 else {
3980 unless (open(FILE,$file)) {
d5692f86
SC
3981 print STDERR "Failed to read $file: $!";
3982 next;
059ec3d9
PH
3983 }
3984 }
3985 #Now parse the filehandle, updating the global variables.
3986 parse($parser,\*FILE);
3987 close FILE;
3988 }
3989}
3990else {
3991 #No files provided. Parse STDIN, updating the global variables.
3992 parse($parser,\*STDIN);
3993}
3994
3995
a83c7e95 3996if ($begin eq "9999-99-99 99:99:99" && ! $emptyOK) {
d5692f86 3997 print STDERR "**** No valid log lines read\n";
059ec3d9
PH
3998 exit 1;
3999}
4000
4001# Output our results.
4002print_header();
4003print_grandtotals();
4004
4005# Print counts of user specified patterns if required.
4006print_user_patterns() if @user_patterns;
4007
a83c7e95
SC
4008# Print rejection reasons.
4009# print_rejects();
4010
059ec3d9
PH
4011# Print totals by transport if required.
4012print_transport() if $show_transport;
4013
4014# Print the deliveries per interval as a histogram, unless configured not to.
4015# First find the maximum in one interval and scale accordingly.
4016if ($hist_opt > 0) {
608bc29d
SC
4017 print_histogram("Messages received", 'message', @received_interval_count);
4018 print_histogram("Deliveries", 'delivery', @delivered_interval_count);
059ec3d9
PH
4019}
4020
4021# Print times on queue if required.
4022if ($#queue_times >= 0) {
608bc29d
SC
4023 print_duration_table("Time spent on the queue", "all messages", \@queue_times, \@qt_all_bin,$qt_all_overflow);
4024 print_duration_table("Time spent on the queue", "messages with at least one remote delivery", \@queue_times, \@qt_remote_bin,$qt_remote_overflow);
4025}
4026
4027# Print delivery times if required.
4028if ($#delivery_times >= 0) {
4029 print_duration_table("Delivery times", "all messages", \@delivery_times, \@dt_all_bin,$dt_all_overflow);
4030 print_duration_table("Delivery times", "messages with at least one remote delivery", \@delivery_times, \@dt_remote_bin,$dt_remote_overflow);
4031}
4032
4033# Print rcpt times if required.
4034if ($#rcpt_times >= 0) {
4035 foreach my $protocol ('all', grep(!/^all$/, sort keys %rcpt_times_bin)) {
4036 print_duration_table("Receipt times", "$protocol messages", \@rcpt_times, $rcpt_times_bin{$protocol}, $rcpt_times_overflow{$protocol});
4037 }
059ec3d9
PH
4038}
4039
4040# Print relay information if required.
4041print_relay() if $show_relay;
4042
4043# Print the league tables, if topcount isn't zero.
4044if ($topcount > 0) {
a83c7e95
SC
4045 my($ws_rej, $ws_top50, $ws_rej_row, $ws_top50_row);
4046 $ws_rej_row = $ws_top50_row = 0;
4047 if ($xls_fh) {
4048 $ws_top50 = $workbook->addworksheet('Deliveries');
4049 $ws_rej = $workbook->addworksheet('Rejections') if (%rejected_count_by_reason || %rejected_count_by_ip || %spam_count_by_ip);
4050 }
4051
4052 print_league_table("mail rejection reason", \%rejected_count_by_reason, undef, undef, undef, $ws_rej, \$ws_rej_row) if %rejected_count_by_reason;
4053
059ec3d9
PH
4054 foreach ('Host','Domain','Email','Edomain') {
4055 next unless $do_sender{$_};
a83c7e95 4056 print_league_table("sending \l$_", $received_count{$_}, undef, $received_data{$_},$received_data_gigs{$_}, $ws_top50, \$ws_top50_row);
059ec3d9
PH
4057 }
4058
a83c7e95
SC
4059 print_league_table("local sender", \%received_count_user, undef,
4060 \%received_data_user,\%received_data_gigs_user, $ws_top50, \$ws_top50_row) if (($local_league_table || $include_remote_users) && %received_count_user);
059ec3d9
PH
4061 foreach ('Host','Domain','Email','Edomain') {
4062 next unless $do_sender{$_};
a83c7e95 4063 print_league_table("\l$_ destination", $delivered_messages{$_}, $delivered_addresses{$_}, $delivered_data{$_},$delivered_data_gigs{$_}, $ws_top50, \$ws_top50_row);
059ec3d9 4064 }
a83c7e95
SC
4065 print_league_table("local destination", \%delivered_messages_user, \%delivered_addresses_user, \%delivered_data_user,\%delivered_data_gigs_user, $ws_top50, \$ws_top50_row) if (($local_league_table || $include_remote_users) && %delivered_messages_user);
4066
4067 print_league_table("rejected ip", \%rejected_count_by_ip, undef, undef, undef, $ws_rej, \$ws_rej_row) if %rejected_count_by_ip;
4068 print_league_table("non-rejected spamming ip", \%spam_count_by_ip, undef, undef, undef, $ws_rej, \$ws_rej_row) if %spam_count_by_ip;
4069
059ec3d9
PH
4070}
4071
4072# Print the error statistics if required.
4073print_errors() if $show_errors;
4074
d5692f86
SC
4075print $htm_fh "</body>\n</html>\n" if $htm_fh;
4076
4077
a83c7e95 4078$txt_fh->close if $txt_fh && ref $txt_fh;
d5692f86
SC
4079$htm_fh->close if $htm_fh;
4080
4081if ($xls_fh) {
4082 # close Excel Workbook
4083 $ws_global->set_first_sheet();
4084 # FIXME: whyever - activate does not work :-/
4085 $ws_global->activate();
4086 $workbook->close();
059ec3d9
PH
4087}
4088
d5692f86 4089
059ec3d9 4090# End of eximstats