Start
[exim.git] / src / src / eximstats.src
1 #!PERL_COMMAND -w
2 # $Cambridge: exim/src/src/eximstats.src,v 1.1 2004/10/07 10:39:01 ph10 Exp $
3
4 # Copyright (c) 2001 University of Cambridge.
5 # See the file NOTICE for conditions of use and distribution.
6
7 # Perl script to generate statistics from one or more Exim log files.
8
9 # Usage: eximstats [<options>] <log file> <log file> ...
10
11 # 1996-05-21: Ignore lines not starting with valid date/time, just in case
12 # these get into a log file.
13 # 1996-11-19: Add the -h option to control the size of the histogram,
14 # and optionally turn it off.
15 # Use some Perl 5 things; it should be everywhere by now.
16 # Add the Perl -w option and rewrite so no warnings are given.
17 # Add the -t option to control the length of the "top" listing.
18 # Add the -ne, -nt options to turn off errors and transport
19 # information.
20 # Add information about length of time on queue, and -q<list> to
21 # control the intervals and turn it off.
22 # Add count and percentage of delayed messages to the Received
23 # line.
24 # Show total number of errors.
25 # Add count and percentage of messages with errors to Received
26 # line.
27 # Add information about relaying and -nr to suppress it.
28 # 1997-02-03 Merged in some of the things Nigel Metheringham had done:
29 # Re-worded headings
30 # Added received histogram as well as delivered
31 # Added local senders' league table
32 # Added local recipients' league table
33 # 1997-03-10 Fixed typo "destinationss"
34 # Allow for intermediate address between final and original
35 # when testing for relaying
36 # Give better message when no input
37 # 1997-04-24 Fixed bug in layout of error listing that was depending on
38 # text length (output line got repeated).
39 # 1997-05-06 Bug in option decoding when only one option.
40 # Overflow bug when handling very large volumes.
41 # 1997-10-28 Updated to handle revised log format that might show
42 # HELO name as well as host name before IP number
43 # 1998-01-26 Bugs in the function for calculating the number of seconds
44 # since 1970 from a log date
45 # 1998-02-02 Delivery to :blackhole: doesn't have a T= entry in the log
46 # line; cope with this, thereby avoiding undefined problems
47 # Very short log line gave substring error
48 # 1998-02-03 A routed delivery to a local transport may not have <> in the
49 # log line; terminate the address at white space, not <
50 # 1998-09-07 If first line of input was a => line, $thissize was undefined;
51 # ensure it is zero.
52 # 1998-12-21 Adding of $thissize from => line should have been adding $size.
53 # Oops. Should have looked more closely when fixing the previous
54 # bug!
55 # 1999-11-12 Increased the field widths for printed integers; numbers are
56 # bigger than originally envisaged.
57 # 2001-03-21 Converted seconds() routine to use Time::Local, fixing a bug
58 # whereby seconds($timestamp) - id_seconds($id) gave an
59 # incorrect result.
60 # Added POD documentation.
61 # Moved usage instructions into help() subroutine.
62 # Added 'use strict' and declared all global variables.
63 # Added '-html' flag and resultant code.
64 # Added '-cache' flag and resultant code.
65 # Added add_volume() routine and converted all volume variables
66 # to use it, fixing the overflow problems for individual hosts
67 # on large sites.
68 # Converted all volume output to GB/MB/KB as appropriate.
69 # Don't store local user stats if -nfl is specified.
70 # Modifications done by: Steve Campbell (<steve@computurn.com>)
71 # 2001-04-02 Added the -t_remote_users flag. Steve Campbell.
72 # 2001-10-15 Added the -domain flag. Steve Campbell.
73 # 2001-10-16 Accept files on STDIN or on the command line. Steve Campbell.
74 # 2001-10-21 Removed -domain flag and added -bydomain, -byhost, and -byemail.
75 # We now generate our main parsing subroutine as an eval statement
76 # which improves performance dramatically when not all the results
77 # are required. We also cache the last timestamp to time convertion.
78 #
79 # NOTE: 'Top 50 destinations by (message count|volume)' lines are
80 # now 'Top N (host|email|domain) destinations by (message count|volume)'
81 # where N is the topcount. Steve Campbell.
82 #
83 # 2001-10-30 V1.16 Joachim Wieland.
84 # Fixed minor bugs in add_volume() when taking over this version
85 # for use in Exim 4: -w gave uninitialized value warnings in
86 # two situations: for the first addition to a counter, and if
87 # there were never any gigabytes, thereby leaving the $gigs
88 # value unset.
89 # Initialized $last_timestamp to stop a -w uninitialized warning.
90 # Minor layout tweak for grand totals (nitpicking).
91 # Put the IP addresses for relaying stats in [] and separated by
92 # a space from the domain name.
93 # Removed the IPv4-specific address test when picking out addresses
94 # for relaying. Anything inside [] is OK.
95 #
96 # 2002-07-02 Philip Hazel
97 # Fixed "uninitialized variable" message that occurred for relay
98 # messages that arrived from H=[1.2.3.4] hosts (no name shown).
99 # This bug didn't affect the output.
100 #
101 # 2002-04-15 V1.17 Joachim Wieland.
102 # Added -charts, -chartdir. -chartrel options which use
103 # GD::Graph modules to create graphical charts of the statistics.
104 #
105 # 2002-04-15 V1.18 Steve Campbell.
106 # Added a check for $domain to to stop a -w uninitialized warning.
107 # Added -byemaildomain option.
108 # Only print HTML header links to included tables!
109 #
110 # 2002-08-02 V1.19 Steve Campbell.
111 # Changed the debug mode to dump the parser onto STDERR rather
112 # than STDOUT. Documented the -d flag into the help().
113 # Rejoined the divergent 2002-04-15 and 2002-07-02 releases.
114 #
115 # 2002-08-21 V1.20 Steve Campbell.
116 # Added the '-merge' option to allow merging of previous reports.
117 # Fixed a missing semicolon when doing -bydomain.
118 # Make volume charts plot the data gigs and bytes rather than just bytes.
119 # Only process log lines with $flag =~ /<=|=>|->|==|\*\*|Co/
120 # Converted Emaildomain to Edomain - the column header was too wide!
121 # This changes the text output slightly. You can revert to the old
122 # column widths by changing $COLUMN_WIDTHS to 7;
123 #
124 # 2002-09-04 V1.21 Andreas J Mueller
125 # Local deliveries domain now defaults to 'localdomain'.
126 # Don't match F=<From> when looking for the user.
127 #
128 # 2002-09-05 V1.22 Steve Campbell
129 # Fixed a perl 5.005 incompatibility problem ('our' variables).
130 #
131 # 2002-09-11 V1.23 Steve Campbell
132 # Stopped -charts option from throwing errors on null data.
133 # Don't print out 'Errors encountered' unless there are any.
134
135 # 2002-10-21 V1.23a Philip Hazel - patch from Tony Finch put in until
136 # Steve's eximstats catches up.
137 # Handle log files that include the timezone after the timestamp.
138 # Switch to assuming that log timestamps are in local time, with
139 # an option for UTC timestamps, as in Exim itself.
140 #
141 # 2003-02-05 V1.24 Steve Campbell
142 # Added in Sergey Sholokh's code to convert '<' and '>' characters
143 # in HTML output. Also added code to convert them back with -merge.
144 # Fixed timestamp offsets to convert to seconds rather than minutes.
145 # Updated -merge to work with output files using timezones.
146 # Added cacheing to speed up the calculation of timezone offsets.
147 #
148 # 2003-02-07 V1.25 Steve Campbell
149 # Optimised the usage of mktime() in the seconds subroutine.
150 # Removed the now redundant '-cache' option.
151 # html2txt() now explicitly matches HTML tags.
152 # Implemented a new sorting algorithm - the top_n_sort() routine.
153 # Added Danny Carroll's '-nvr' flag and code.
154 #
155 # 2003-03-13 V1.26 Steve Campbell
156 # Implemented HTML compliance changes recommended by Bernard Massot.
157 # Bug fix to allow top_n_sort() to handle null keys.
158 # Convert all domains and edomains to lowercase.
159 # Remove preceding dots from domains.
160 #
161 # 2003-03-13 V1.27 Steve Campbell
162 # Replaced border attributes with 'border=1', as recommended by
163 # Bernard Massot.
164 #
165 # 2003-06-03 V1.28 John Newman
166 # Added in the ability to skip over the parsing and evaulation of
167 # specific transports as passed to eximstats via the new "-nt/.../"
168 # command line argument. This new switch allows the viewing of
169 # not more accurate statistics but more applicable statistics when
170 # special transports are in use (ie; SpamAssassin). We need to be
171 # able to ignore transports such as this otherwise the resulting
172 # local deliveries are significantly skewed (doubled)...
173 #
174 # 2003-11-06 V1.29 Steve Campbell
175 # Added the '-pattern "Description" "/pattern/"' option.
176 #
177 # 2004-02-17 V1.30 Steve Campbell
178 # Added warnings if required GD::Graph modules are not available or
179 # insufficient -chart* options are specified.
180 #
181 # 2004-02-20 V1.31 Andrea Balzi
182 # Only show the Local Sender/Destination links if the tables exist.
183 #
184
185
186 =head1 NAME
187
188 eximstats - generates statistics from Exim mainlog files.
189
190 =head1 SYNOPSIS
191
192 eximstats [Options] mainlog1 mainlog2 ... > report.txt
193 eximstats -merge [Options] report.1.txt report.2.txt ... > weekly_report.txt
194
195 Options:
196
197 =over 4
198
199 =item B<-h>I<number>
200
201 histogram divisions per hour. The default is 1, and
202 0 suppresses histograms. Valid values are:
203
204 0, 1, 2, 3, 5, 10, 15, 20, 30 or 60.
205
206 =item B<-ne>
207
208 Don't display error information.
209
210 =item B<-nr>
211
212 Don't display relaying information.
213
214 =item B<-nr>I</pattern/>
215
216 Don't display relaying information that matches.
217
218 =item B<-nt>
219
220 Don't display transport information.
221
222 =item B<-nt>I</pattern/>
223
224 Don't display transport information that matches
225
226 =item B<-q>I<list>
227
228 List of times for queuing information single 0 item suppresses.
229
230 =item B<-t>I<number>
231
232 Display top <number> sources/destinations
233 default is 50, 0 suppresses top listing.
234
235 =item B<-tnl>
236
237 Omit local sources/destinations in top listing.
238
239 =item B<-t_remote_users>
240
241 Include remote users in the top source/destination listings.
242
243 =item B<-byhost>
244
245 Show results by sending host. This may be combined with
246 B<-bydomain> and/or B<-byemail> and/or B<-byedomain>. If none of these options
247 are specified, then B<-byhost> is assumed as a default.
248
249 =item B<-bydomain>
250
251 Show results by sending domain.
252 May be combined with B<-byhost> and/or B<-byemail> and/or B<-byedomain>.
253
254 =item B<-byemail>
255
256 Show results by sender's email address.
257 May be combined with B<-byhost> and/or B<-bydomain> and/or B<-byedomain>.
258
259 =item B<-byemaildomain> or B<-byedomain>
260
261 Show results by sender's email domain.
262 May be combined with B<-byhost> and/or B<-bydomain> and/or B<-byemail>.
263
264 =item B<-pattern> I<Description> I</Pattern/>
265
266 Look for the specified pattern and count the number of lines in which it appears.
267 This option can be specified multiple times. Eg:
268
269 -pattern 'Refused connections' '/refused connection/'
270
271
272 =item B<-merge>
273
274 This option allows eximstats to merge old eximstat reports together. Eg:
275
276 eximstats mainlog.sun > report.sun.txt
277 eximstats mainlog.mon > report.mon.txt
278 eximstats mainlog.tue > report.tue.txt
279 eximstats mainlog.wed > report.web.txt
280 eximstats mainlog.thu > report.thu.txt
281 eximstats mainlog.fri > report.fri.txt
282 eximstats mainlog.sat > report.sat.txt
283 eximstats -merge report.*.txt > weekly_report.txt
284 eximstats -merge -html report.*.txt > weekly_report.html
285
286 =over 4
287
288 =item *
289
290 You can merge text or html reports and output the results as text or html.
291
292 =item *
293
294 You can use all the normal eximstat output options, but only data
295 included in the original reports can be shown!
296
297 =item *
298
299 When merging reports, some loss of accuracy may occur in the top I<n> lists.
300 This will be towards the ends of the lists.
301
302 =item *
303
304 The order of items in the top I<n> lists may vary when the data volumes
305 round to the same value.
306
307 =back
308
309 =item B<-html>
310
311 Output the results in HTML.
312
313 =item B<-charts>
314
315 Create graphical charts to be displayed in HTML output.
316
317 This requires the following modules which can be obtained
318 from http://www.cpan.org/modules/01modules.index.html
319
320 =over 4
321
322 =item GD
323
324 =item GDTextUtil
325
326 =item GDGraph
327
328 =back
329
330 To install these, download and unpack them, then use the normal perl installation procedure:
331
332 perl Makefile.PL
333 make
334 make test
335 make install
336
337 =item B<-chartdir>I <dir>
338
339 Create the charts in the directory <dir>
340
341 =item B<-chartrel>I <dir>
342
343 Specify the relative directory for the "img src=" tags from where to include
344 the charts
345
346 =item B<-d>
347
348 Debug flag. This outputs the eval()'d parser onto STDOUT which makes it
349 easier to trap errors in the eval section. Remember to add 1 to the line numbers to allow for the
350 title!
351
352 =back
353
354 =head1 DESCRIPTION
355
356 Eximstats parses exim mainlog files and outputs a statistical
357 analysis of the messages processed. By default, a text
358 analysis is generated, but you can request an html analysis
359 by using the B<-html> flag. See the help (B<-help>) to learn
360 about how to create charts from the tables.
361
362 =head1 AUTHOR
363
364 There is a web site at http://www.exim.org - this contains details of the
365 mailing list exim-users@exim.org.
366
367 =head1 TO DO
368
369 This program does not perfectly handle messages whose received
370 and delivered log lines are in different files, which can happen
371 when you have multiple mail servers and a message cannot be
372 immeadiately delivered. Fixing this could be tricky...
373
374 =head1 SUBROUTINES
375
376 The following section will only be of interest to the
377 program maintainers:
378
379 =cut
380
381 use integer;
382 use strict;
383
384 # use Time::Local; # PH/FANF
385 use POSIX;
386
387 use vars qw($HAVE_GD_Graph_pie $HAVE_GD_Graph_linespoints);
388 eval { require GD::Graph::pie; };
389 $HAVE_GD_Graph_pie = $@ ? 0 : 1;
390 eval { require GD::Graph::linespoints; };
391 $HAVE_GD_Graph_linespoints = $@ ? 0 : 1;
392
393
394 ##################################################
395 # Static data #
396 ##################################################
397 # 'use vars' instead of 'our' as perl5.005 is still in use out there!
398 use vars qw(@tab62 @days_per_month $gig);
399 use vars qw($VERSION);
400 use vars qw($COLUMN_WIDTHS);
401
402
403 @tab62 =
404 (0,1,2,3,4,5,6,7,8,9,0,0,0,0,0,0, # 0-9
405 0,10,11,12,13,14,15,16,17,18,19,20, # A-K
406 21,22,23,24,25,26,27,28,29,30,31,32, # L-W
407 33,34,35, 0, 0, 0, 0, 0, # X-Z
408 0,36,37,38,39,40,41,42,43,44,45,46, # a-k
409 47,48,49,50,51,52,53,54,55,56,57,58, # l-w
410 59,60,61); # x-z
411
412 @days_per_month = (0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334);
413 $gig = 1024 * 1024 * 1024;
414 $VERSION = '1.31';
415
416 # How much space do we allow for the Hosts/Domains/Emails/Edomains column headers?
417 $COLUMN_WIDTHS = 8;
418
419 # Declare global variables.
420 use vars qw($total_received_data $total_received_data_gigs $total_received_count);
421 use vars qw($total_delivered_data $total_delivered_data_gigs $total_delivered_count);
422 use vars qw(%arrival_time %size %from_host %from_address);
423 use vars qw(%timestamp2time); #Hash of timestamp => time.
424 use vars qw($last_timestamp $last_time); #The last time convertion done.
425 use vars qw($last_date $date_seconds); #The last date convertion done.
426 use vars qw($last_offset $offset_seconds); #The last time offset convertion done.
427 use vars qw($localtime_offset);
428 use vars qw($i); #General loop counter.
429 use vars qw($debug); #Debug mode?
430 use vars qw($ntopchart); #How many entries should make it into the chart?
431 use vars qw($gddirectory); #Where to put files from GD::Graph
432
433 $ntopchart = 5;
434
435 # The following are parameters whose values are
436 # set by command line switches:
437 use vars qw($show_errors $show_relay $show_transport $transport_pattern);
438 use vars qw($topcount $local_league_table $include_remote_users);
439 use vars qw($hist_opt $hist_interval $hist_number $volume_rounding);
440 use vars qw($relay_pattern @queue_times $html @user_patterns @user_descriptions);
441
442 use vars qw(%do_sender); #Do sender by Host, Domain, Email, and/or Edomain tables.
443 use vars qw($charts $chartrel $chartdir $charts_option_specified);
444 use vars qw($merge_reports); #Merge old reports ?
445
446 # The following are modified in the parse() routine, and
447 # referred to in the print_*() routines.
448 use vars qw($queue_more_than $delayed_count $relayed_unshown $begin $end);
449 use vars qw(%received_count %received_data %received_data_gigs);
450 use vars qw(%delivered_count %delivered_data %delivered_data_gigs);
451 use vars qw(%received_count_user %received_data_user %received_data_gigs_user);
452 use vars qw(%delivered_count_user %delivered_data_user %delivered_data_gigs_user);
453 use vars qw(%transported_count %transported_data %transported_data_gigs);
454 use vars qw(%remote_delivered %relayed %delayed %had_error %errors_count);
455 use vars qw(@queue_bin @remote_queue_bin @received_interval_count @delivered_interval_count);
456 use vars qw(@user_pattern_totals);
457
458 use vars qw(%report_totals);
459
460
461
462
463 ##################################################
464 # Subroutines #
465 ##################################################
466
467
468 =head2 volume_rounded();
469
470 $rounded_volume = volume_rounded($bytes,$gigabytes);
471
472 Given a data size in bytes, round it to KB, MB, or GB
473 as appropriate.
474
475 Eg 12000 => 12KB, 15000000 => 14GB, etc.
476
477 Note: I've experimented with Math::BigInt and it results in a 33%
478 performance degredation as opposed to storing numbers split into
479 bytes and gigabytes.
480
481 =cut
482
483 sub volume_rounded {
484 my($x,$g) = @_;
485 $x = 0 unless $x;
486 $g = 0 unless $g;
487 my($rounded);
488
489 while ($x > $gig) {
490 $g++;
491 $x -= $gig;
492 }
493
494 if ($volume_rounding) {
495 # Values < 1 GB
496 if ($g <= 0) {
497 if ($x < 10000) {
498 $rounded = sprintf("%6d", $x);
499 }
500 elsif ($x < 10000000) {
501 $rounded = sprintf("%4dKB", ($x + 512)/1024);
502 }
503 else {
504 $rounded = sprintf("%4dMB", ($x + 512*1024)/(1024*1024));
505 }
506 }
507 # Values between 1GB and 10GB are printed in MB
508 elsif ($g < 10) {
509 $rounded = sprintf("%4dMB", ($g * 1024) + ($x + 512*1024)/(1024*1024));
510 }
511 else {
512 # Handle values over 10GB
513 $rounded = sprintf("%4dGB", $g + ($x + $gig/2)/$gig);
514 }
515 }
516 else {
517 # We don't want any rounding to be done.
518 $rounded = sprintf("%4d", ($g * $gig) + $x);
519 }
520
521 return $rounded;
522 }
523
524
525 =head2 un_round();
526
527 un_round($rounded_volume,\$bytes,\$gigabytes);
528
529 Given a volume in KB, MB or GB, as generated by volume_rounded(),
530 do the reverse transformation and convert it back into Bytes and Gigabytes.
531 These are added to the $bytes and $gigabytes parameters.
532
533 Given a data size in bytes, round it to KB, MB, or GB
534 as appropriate.
535
536 EG: 500 => (500,0), 14GB => (0,14), etc.
537
538 =cut
539
540 sub un_round {
541 my($rounded,$bytes_sref,$gigabytes_sref) = @_;
542
543 if ($rounded =~ /(\d+)GB/) {
544 $$gigabytes_sref += $1;
545 }
546 elsif ($rounded =~ /(\d+)MB/) {
547 $$gigabytes_sref += $1 / 1024;
548 $$bytes_sref += (($1 % 1024 ) * 1024 * 1024);
549 }
550 elsif ($rounded =~ /(\d+)KB/) {
551 $$gigabytes_sref += $1 / (1024 * 1024);
552 $$bytes_sref += ($1 % (1024 * 1024) * 1024);
553 }
554 elsif ($rounded =~ /(\d+)/) {
555 $$gigabytes_sref += $1 / $gig;
556 $$bytes_sref += $1 % $gig;
557 }
558
559 #Now reduce the bytes down to less than 1GB.
560 add_volume($bytes_sref,$gigabytes_sref,0) if ($$bytes_sref > $gig);
561 }
562
563
564 =head2 add_volume();
565
566 add_volume(\$bytes,\$gigs,$size);
567
568 Add $size to $bytes/$gigs where this is a number split into
569 bytes ($bytes) and gigabytes ($gigs). This is significantly
570 faster than using Math::BigInt.
571
572 =cut
573
574 sub add_volume {
575 my($bytes_ref,$gigs_ref,$size) = @_;
576 $$bytes_ref = 0 if ! defined $$bytes_ref;
577 $$gigs_ref = 0 if ! defined $$gigs_ref;
578 $$bytes_ref += $size;
579 while ($$bytes_ref > $gig)
580 {
581 $$gigs_ref++;
582 $$bytes_ref -= $gig;
583 }
584 }
585
586
587 =head2 format_time();
588
589 $formatted_time = format_time($seconds);
590
591 Given a time in seconds, break it down into
592 weeks, days, hours, minutes, and seconds.
593
594 Eg 12005 => 3h20m5s
595
596 =cut
597
598 sub format_time {
599 my($t) = pop @_;
600 my($s) = $t % 60;
601 $t /= 60;
602 my($m) = $t % 60;
603 $t /= 60;
604 my($h) = $t % 24;
605 $t /= 24;
606 my($d) = $t % 7;
607 my($w) = $t/7;
608 my($p) = "";
609 $p .= "$w"."w" if $w > 0;
610 $p .= "$d"."d" if $d > 0;
611 $p .= "$h"."h" if $h > 0;
612 $p .= "$m"."m" if $m > 0;
613 $p .= "$s"."s" if $s > 0 || $p eq "";
614 $p;
615 }
616
617
618 =head2 unformat_time();
619
620 $seconds = unformat_time($formatted_time);
621
622 Given a time in weeks, days, hours, minutes, or seconds, convert it to seconds.
623
624 Eg 3h20m5s => 12005
625
626 =cut
627
628 sub unformat_time {
629 my($formated_time) = pop @_;
630 my $time = 0;
631
632 while ($formated_time =~ s/^(\d+)([wdhms]?)//) {
633 $time += $1 if ($2 eq '' || $2 eq 's');
634 $time += $1 * 60 if ($2 eq 'm');
635 $time += $1 * 60 * 60 if ($2 eq 'h');
636 $time += $1 * 60 * 60 * 24 if ($2 eq 'd');
637 $time += $1 * 60 * 60 * 24 * 7 if ($2 eq 'w');
638 }
639 $time;
640 }
641
642
643 =head2 seconds();
644
645 $time = seconds($timestamp);
646
647 Given a time-of-day timestamp, convert it into a time() value using
648 POSIX::mktime. We expect the timestamp to be of the form
649 "$year-$mon-$day $hour:$min:$sec", with month going from 1 to 12,
650 and the year to be absolute (we do the necessary conversions). The
651 timestamp may be followed with an offset from UTC like "+$hh$mm"; if the
652 offset is not present, and we have not been told that the log is in UTC
653 (with the -utc option), then we adjust the time by the current local
654 time offset so that it can be compared with the time recorded in message
655 IDs, which is UTC.
656
657 To improve performance, we only use mktime on the date ($year-$mon-$day),
658 and only calculate it if the date is different to the previous time we
659 came here. We then add on seconds for the '$hour:$min:$sec'.
660
661 We also store the results of the last conversion done, and only
662 recalculate if the date is different.
663
664 We used to have the '-cache' flag which would store the results of the
665 mktime() call. However, the current way of just using mktime() on the
666 date obsoletes this.
667
668 =cut
669
670 sub seconds {
671 my($timestamp) = @_;
672
673 # Is the timestamp the same as the last one?
674 return $last_time if ($last_timestamp eq $timestamp);
675
676 return 0 unless ($timestamp =~ /^((\d{4})\-(\d\d)-(\d\d))\s(\d\d):(\d\d):(\d\d)( ([+-])(\d\d)(\d\d))?/o);
677
678 unless ($last_date eq $1) {
679 $last_date = $1;
680 my(@timestamp) = (0,0,0,$4,$3,$2);
681 $timestamp[5] -= 1900;
682 $timestamp[4]--;
683 $date_seconds = mktime(@timestamp);
684 }
685 my $time = $date_seconds + ($5 * 3600) + ($6 * 60) + $7;
686
687 # SC. Use cacheing. Also note we want seconds not minutes.
688 #my($this_offset) = ($10 * 60 + $11) * ($9 . "1") if defined $8;
689 if (defined $8 && ($8 ne $last_offset)) {
690 $last_offset = $8;
691 $offset_seconds = ($10 * 60 + $11) * 60;
692 $offset_seconds = -$offset_seconds if ($9 eq '-');
693 }
694
695
696 if (defined $7) {
697 #$time -= $this_offset;
698 $time -= $offset_seconds;
699 } elsif (defined $localtime_offset) {
700 $time -= $localtime_offset;
701 }
702
703 # Store the last timestamp received.
704 $last_timestamp = $timestamp;
705 $last_time = $time;
706
707 $time;
708 }
709
710
711 =head2 id_seconds();
712
713 $time = id_seconds($message_id);
714
715 Given a message ID, convert it into a time() value.
716
717 =cut
718
719 sub id_seconds {
720 my($sub_id) = substr((pop @_), 0, 6);
721 my($s) = 0;
722 my(@c) = split(//, $sub_id);
723 while($#c >= 0) { $s = $s * 62 + $tab62[ord(shift @c) - ord('0')] }
724 $s;
725 }
726
727
728
729 =head2 calculate_localtime_offset();
730
731 $localtime_offset = calculate_localtime_offset();
732
733 Calculate the the localtime offset from gmtime in seconds.
734
735 $localtime = time() + $localtime_offset.
736
737 These are the same semantics as ISO 8601 and RFC 2822 timezone offsets.
738 (West is negative, East is positive.)
739
740 =cut
741
742 # $localtime = gmtime() + $localtime_offset. OLD COMMENT
743 # This subroutine commented out as it's not currently in use.
744
745 #sub calculate_localtime_offset {
746 # # Pick an arbitrary date, convert it to localtime & gmtime, and return the difference.
747 # my (@sample_date) = (0,0,0,5,5,100);
748 # my $localtime = timelocal(@sample_date);
749 # my $gmtime = timegm(@sample_date);
750 # my $offset = $localtime - $gmtime;
751 # return $offset;
752 #}
753
754 sub calculate_localtime_offset {
755 # Assume that the offset at the moment is valid across the whole
756 # period covered by the logs that we're analysing. This may not
757 # be true around the time the clocks change in spring or autumn.
758 my $utc = time;
759 # mktime works on local time and gmtime works in UTC
760 my $local = mktime(gmtime($utc));
761 return $local - $utc;
762 }
763
764
765 =head2 print_queue_times();
766
767 $time = print_queue_times($message_type,\@queue_times,$queue_more_than);
768
769 Given the type of messages being output, the array of message queue times,
770 and the number of messages which exceeded the queue times, print out
771 a table.
772
773 =cut
774
775 sub print_queue_times {
776 no integer;
777 my($string,$array,$queue_more_than) = @_;
778 my(@chartdatanames);
779 my(@chartdatavals);
780
781 my $printed_one = 0;
782 my $cumulative_percent = 0;
783 #$queue_unknown += keys %arrival_time;
784
785 my $queue_total = $queue_more_than;
786 for ($i = 0; $i <= $#queue_times; $i++) { $queue_total += $$array[$i] }
787
788 my $temp = "Time spent on the queue: $string";
789
790 my($format);
791 if ($html) {
792 print "<hr><a name=\"$string time\"></a><h2>$temp</h2>\n";
793 print "<table border=0 width=\"100%\">\n";
794 print "<tr><td>\n";
795 print "<table border=1>\n";
796 print "<tr><th>Time</th><th>Messages</th><th>Percentage</th><th>Cumulative Percentage</th>\n";
797 $format = "<tr><td align=\"right\">%s %s</td><td align=\"right\">%d</td><td align=\"right\">%5.1f%%</td><td align=\"right\">%5.1f%%</td>\n";
798 }
799 else
800 {
801 printf("%s\n%s\n\n", $temp, "-" x length($temp));
802 $format = "%5s %4s %6d %5.1f%% %5.1f%%\n";
803 }
804
805 for ($i = 0; $i <= $#queue_times; $i++) {
806 if ($$array[$i] > 0)
807 {
808 my $percent = ($$array[$i] * 100)/$queue_total;
809 $cumulative_percent += $percent;
810 printf($format,
811 $printed_one? " " : "Under",
812 format_time($queue_times[$i]),
813 $$array[$i], $percent, $cumulative_percent);
814 if (!defined($queue_times[$i])) {
815 print "Not defined";
816 }
817 push(@chartdatanames,
818 ($printed_one? "" : "Under") . format_time($queue_times[$i]));
819 push(@chartdatavals, $$array[$i]);
820 $printed_one = 1;
821 }
822 }
823
824 if ($queue_more_than > 0) {
825 my $percent = ($queue_more_than * 100)/$queue_total;
826 $cumulative_percent += $percent;
827 printf($format,
828 "Over ",
829 format_time($queue_times[$#queue_times]),
830 $queue_more_than, $percent, $cumulative_percent);
831 }
832 push(@chartdatanames, "Over " . format_time($queue_times[$#queue_times]));
833 push(@chartdatavals, $queue_more_than);
834
835 #printf("Unknown %6d\n", $queue_unknown) if $queue_unknown > 0;
836 if ($html) {
837 print "</table>\n";
838 print "</td><td>\n";
839
840 if ($HAVE_GD_Graph_pie && $charts) {
841 my @data = (
842 \@chartdatanames,
843 \@chartdatavals
844 );
845 my $graph = GD::Graph::pie->new(200, 200);
846 my $pngname;
847 my $title;
848 if ($string =~ /all/) { $pngname = "queue_all.png"; $title = "Queue (all)"; }
849 if ($string =~ /remote/) { $pngname = "queue_rem.png"; $title = "Queue (remote)"; }
850 $graph->set(
851 title => $title,
852 );
853 my $gd = $graph->plot(\@data) or warn($graph->error);
854 if ($gd) {
855 open(IMG, ">$chartdir/$pngname") or die $!;
856 binmode IMG;
857 print IMG $gd->png;
858 close IMG;
859 print "<img src=\"$chartrel/$pngname\">";
860 }
861 }
862 print "</td></tr></table>\n";
863 }
864 print "\n";
865 }
866
867
868
869 =head2 print_histogram();
870
871 print_histogram('Deliverieds|Messages received',@interval_count);
872
873 Print a histogram of the messages delivered/received per time slot
874 (hour by default).
875
876 =cut
877
878 sub print_histogram {
879 my($text) = shift;
880 my(@interval_count) = @_;
881 my(@chartdatanames);
882 my(@chartdatavals);
883 my($maxd) = 0;
884 for ($i = 0; $i < $hist_number; $i++)
885 { $maxd = $interval_count[$i] if $interval_count[$i] > $maxd; }
886
887 my $scale = int(($maxd + 25)/50);
888 $scale = 1 if $scale == 0;
889
890 my($type);
891 if ($text eq "Deliveries")
892 {
893 $type = ($scale == 1)? "delivery" : "deliveries";
894 }
895 else
896 {
897 $type = ($scale == 1)? "message" : "messages";
898 }
899
900 my($title) = sprintf("$text per %s (each dot is $scale $type)",
901 ($hist_interval == 60)? "hour" :
902 ($hist_interval == 1)? "minute" : "$hist_interval minutes");
903
904 if ($html) {
905 print "<hr><a name=\"$text\"></a><h2>$title</h2>\n";
906 print "<table border=0 width=\"100%\">\n";
907 print "<tr><td><pre>\n";
908 }
909 else {
910 printf("%s\n%s\n\n", $title, "-" x length($title));
911 }
912
913 my $hour = 0;
914 my $minutes = 0;
915 for ($i = 0; $i < $hist_number; $i++)
916 {
917 my $c = $interval_count[$i];
918
919 # If the interval is an hour (the maximum) print the starting and
920 # ending hours as a label. Otherwise print the starting hour and
921 # minutes, which take up the same space.
922
923 my $temp;
924 if ($hist_opt == 1)
925 {
926 $temp = sprintf("%02d-%02d", $hour, $hour + 1);
927 print $temp;
928 push(@chartdatanames, $temp);
929 $hour++;
930 }
931 else
932 {
933 if ($minutes == 0)
934 { $temp = sprintf("%02d:%02d", $hour, $minutes) }
935 else
936 { $temp = sprintf(" :%02d", $minutes) }
937 print $temp;
938 push(@chartdatanames, $temp);
939 $minutes += $hist_interval;
940 if ($minutes >= 60)
941 {
942 $minutes = 0;
943 $hour++;
944 }
945 }
946 push(@chartdatavals, $c);
947 printf(" %6d %s\n", $c, "." x ($c/$scale));
948 }
949 print "\n";
950 if ($html)
951 {
952 print "</pre>\n";
953 print "</td><td>\n";
954 if ($HAVE_GD_Graph_linespoints && $charts) {
955 # calculate the graph
956 my @data = (
957 \@chartdatanames,
958 \@chartdatavals
959 );
960 my $graph = GD::Graph::linespoints->new(300, 300);
961 $graph->set(
962 x_label => 'Time',
963 y_label => 'Amount',
964 title => $text,
965 x_labels_vertical => 1
966 );
967 my($pngname);
968 if ($text =~ /Deliveries/) { $pngname = "histogram_del.png"; }
969 if ($text =~ /Messages/) { $pngname = "histogram_mes.png"; }
970 my $gd = $graph->plot(\@data) or warn($graph->error);
971 if ($gd) {
972 open(IMG, ">$chartdir/$pngname") or die $!;
973 binmode IMG;
974 print IMG $gd->png;
975 close IMG;
976 print "<img src=\"$chartrel/$pngname\">";
977 }
978 }
979 print "</td></tr></table>\n";
980 }
981 }
982
983
984
985 =head2 print_league_table();
986
987 print_league_table($league_table_type,\%message_count,\%message_data,\%message_data_gigs);
988
989 Given hashes of message count and message data, which are keyed by
990 the table type (eg by the sending host), print a league table
991 showing the top $topcount (defaults to 50).
992
993 =cut
994
995 sub print_league_table {
996 my($text,$m_count,$m_data,$m_data_gigs) = @_;
997 my($name) = ($topcount == 1)? "$text" : "$topcount ${text}s";
998 my($temp) = "Top $name by message count";
999 my(@chartdatanames) = ();
1000 my(@chartdatavals) = ();
1001 my $chartotherval = 0;
1002
1003 my($format);
1004 if ($html) {
1005 print "<hr><a name=\"$text count\"></a><h2>$temp</h2>\n";
1006 print "<table border=0 width=\"100%\">\n";
1007 print "<tr><td>\n";
1008 print "<table border=1>\n";
1009 print "<tr><th>Messages</th><th>Bytes</th><th>\u$text</th>\n";
1010
1011 # Align non-local addresses to the right (so all the .com's line up).
1012 # Local addresses are aligned on the left as they are userids.
1013 my $align = ($text !~ /local/i) ? 'right' : 'left';
1014 $format = "<tr><td align=\"right\">%d</td><td align=\"right\">%s</td><td align=\"$align\" nowrap>%s</td>\n";
1015 }
1016 else {
1017 printf("%s\n%s\n\n", $temp, "-" x length($temp));
1018 $format = "%7d %10s %s\n";
1019 }
1020
1021 my($key,$htmlkey);
1022 foreach $key (top_n_sort($topcount,$m_count,$m_data_gigs,$m_data)) {
1023 if ($html) {
1024 $htmlkey = $key;
1025 $htmlkey =~ s/>/\&gt\;/g;
1026 $htmlkey =~ s/</\&lt\;/g;
1027 printf($format, $$m_count{$key}, volume_rounded($$m_data{$key},$$m_data_gigs{$key}), $htmlkey);
1028 }
1029 else {
1030 printf($format, $$m_count{$key}, volume_rounded($$m_data{$key},$$m_data_gigs{$key}), $key);
1031 }
1032 if (scalar @chartdatanames < $ntopchart)
1033 {
1034 push(@chartdatanames, $key);
1035 push(@chartdatavals, $$m_count{$key});
1036 }
1037 else
1038 {
1039 $chartotherval += $$m_count{$key};
1040 }
1041 }
1042 push(@chartdatanames, "Other");
1043 push(@chartdatavals, $chartotherval);
1044
1045 if ($html)
1046 {
1047 print "</table>\n";
1048 print "</td><td>\n";
1049 if ($HAVE_GD_Graph_pie && $charts)
1050 {
1051 # calculate the graph
1052 my @data = (
1053 \@chartdatanames,
1054 \@chartdatavals
1055 );
1056 my $graph = GD::Graph::pie->new(300, 300);
1057 $graph->set(
1058 x_label => 'Name',
1059 y_label => 'Amount',
1060 title => 'By count',
1061 );
1062 my $gd = $graph->plot(\@data) or warn($graph->error);
1063 if ($gd) {
1064 my $temp = $text;
1065 $temp =~ s/ /_/g;
1066 open(IMG, ">$chartdir/${temp}_count.png") or die $!;
1067 binmode IMG;
1068 print IMG $gd->png;
1069 close IMG;
1070 print "<img src=\"$chartrel/${temp}_count.png\">";
1071 }
1072 }
1073 print "</td><td>\n";
1074 print "</td></tr></table>\n";
1075 }
1076 print "\n";
1077
1078 $temp = "Top $name by volume";
1079 if ($html) {
1080 print "<hr><a name=\"$text volume\"></a><h2>$temp</h2>\n";
1081 print "<table border=0 width=\"100%\">\n";
1082 print "<tr><td>\n";
1083 print "<table border=1>\n";
1084 print "<tr><th>Messages</th><th>Bytes</th><th>\u$text</th>\n";
1085 }
1086 else {
1087 printf("%s\n%s\n\n", $temp, "-" x length($temp));
1088 }
1089
1090 @chartdatanames = ();
1091 @chartdatavals = ();
1092 $chartotherval = 0;
1093 foreach $key (top_n_sort($topcount,$m_data_gigs,$m_data,$m_count)) {
1094 if ($html) {
1095 $htmlkey = $key;
1096 $htmlkey =~ s/>/\&gt\;/g;
1097 $htmlkey =~ s/</\&lt\;/g;
1098 printf($format, $$m_count{$key}, volume_rounded($$m_data{$key},$$m_data_gigs{$key}), $htmlkey);
1099 }
1100 else {
1101 printf($format, $$m_count{$key}, volume_rounded($$m_data{$key},$$m_data_gigs{$key}), $key);
1102 }
1103
1104 if (scalar @chartdatanames < $ntopchart)
1105 {
1106 push(@chartdatanames, $key);
1107 push(@chartdatavals, $$m_count{$key});
1108 }
1109 else
1110 {
1111 $chartotherval += $$m_count{$key};
1112 }
1113 }
1114 push(@chartdatanames, "Other");
1115 push(@chartdatavals, $chartotherval);
1116
1117 if ($html) {
1118 print "</table>\n";
1119 print "</td><td>\n";
1120 if ($HAVE_GD_Graph_pie && $charts) {
1121 # calculate the graph
1122 my @data = (
1123 \@chartdatanames,
1124 \@chartdatavals
1125 );
1126 my $graph = GD::Graph::pie->new(300, 300);
1127 $graph->set(
1128 x_label => 'Name',
1129 y_label => 'Volume',
1130 title => 'By Volume',
1131 );
1132 my $gd = $graph->plot(\@data) or warn($graph->error);
1133 if ($gd) {
1134 my $temp = $text;
1135 $temp =~ s/ /_/g;
1136 open(IMG, ">$chartdir/${temp}_volume.png") or die $!;
1137 binmode IMG;
1138 print IMG $gd->png;
1139 close IMG;
1140 print "<img src=\"$chartrel/${temp}_volume.png\">";
1141 }
1142 }
1143 print "</td><td>\n";
1144 print "</td></tr></table>\n";
1145 }
1146
1147 print "\n";
1148 }
1149
1150
1151 =head2 top_n_sort();
1152
1153 @sorted_keys = top_n_sort($n,$href1,$href2,$href3);
1154
1155 Given a hash which has numerical values, return the sorted $n keys which
1156 point to the top values. The second and third hashes are used as
1157 tiebreakers. They all must have the same keys.
1158
1159 The idea behind this routine is that when you only want to see the
1160 top n members of a set, rather than sorting the entire set and then
1161 plucking off the top n, sort through the stack as you go, discarding
1162 any member which is lower than your current n'th highest member.
1163
1164 This proves to be an order of magnitude faster for large hashes.
1165 On 200,000 lines of mainlog it benchmarked 9 times faster.
1166 On 700,000 lines of mainlog it benchmarked 13.8 times faster.
1167
1168 We assume the values are > 0.
1169
1170 =cut
1171
1172 sub top_n_sort {
1173 my($n,$href1,$href2,$href3) = @_;
1174
1175 # PH's original sort was:
1176 #
1177 # foreach $key (sort
1178 # {
1179 # $$m_count{$b} <=> $$m_count{$a} ||
1180 # $$m_data_gigs{$b} <=> $$m_data_gigs{$a} ||
1181 # $$m_data{$b} <=> $$m_data{$a} ||
1182 # $a cmp $b
1183 # }
1184 # keys %{$m_count})
1185 #
1186
1187 #We use a key of '_' to represent non-existant values, as null keys are valid.
1188 #'_' is not a valid domain, edomain, host, or email.
1189 my(@top_n_keys) = ('_') x $n;
1190 my($minimum_value1,$minimum_value2,$minimum_value3) = (0,0,0);
1191 my $top_n_key = '';
1192 my $n_minus_1 = $n - 1;
1193 my $n_minus_2 = $n - 2;
1194
1195 # Pick out the top $n keys.
1196 my($key,$value1,$value2,$value3,$i,$comparison,$insert_position);
1197 while (($key,$value1) = each %$href1) {
1198
1199 #print STDERR "key $key ($value1,",$href2->{$key},",",$href3->{$key},") <=> ($minimum_value1,$minimum_value2,$minimum_value3)\n";
1200
1201 # Check to see that the new value is bigger than the lowest of the
1202 # top n keys that we're keeping.
1203 $comparison = $value1 <=> $minimum_value1 ||
1204 $href2->{$key} <=> $minimum_value2 ||
1205 $href3->{$key} <=> $minimum_value3 ||
1206 $top_n_key cmp $key;
1207 next unless ($comparison == 1);
1208
1209 # As we will be using these values a few times, extract them into scalars.
1210 $value2 = $href2->{$key};
1211 $value3 = $href3->{$key};
1212
1213 # This key is bigger than the bottom n key, so the lowest position we
1214 # will insert it into is $n minus 1 (the bottom of the list).
1215 $insert_position = $n_minus_1;
1216
1217 # Now go through the list, stopping when we find a key that we're
1218 # bigger than, or we come to the penultimate position - we've
1219 # already tested bigger than the last.
1220 #
1221 # Note: we go top down as the list starts off empty.
1222 # Note: stepping through the list in this way benchmarks nearly
1223 # three times faster than doing a sort() on the reduced list.
1224 # I assume this is because the list is already in order, and
1225 # we get a performance boost from not having to do hash lookups
1226 # on the new key.
1227 for ($i = 0; $i < $n_minus_1; $i++) {
1228 $top_n_key = $top_n_keys[$i];
1229 if ( ($top_n_key eq '_') ||
1230 ( ($value1 <=> $href1->{$top_n_key} ||
1231 $value2 <=> $href2->{$top_n_key} ||
1232 $value3 <=> $href3->{$top_n_key} ||
1233 $top_n_key cmp $key) == 1
1234 )
1235 ) {
1236 $insert_position = $i;
1237 last;
1238 }
1239 }
1240
1241 # Remove the last element, then insert the new one.
1242 $#top_n_keys = $n_minus_2;
1243 splice(@top_n_keys,$insert_position,0,$key);
1244
1245 # Extract our new minimum values.
1246 $top_n_key = $top_n_keys[$n_minus_1];
1247 if ($top_n_key ne '_') {
1248 $minimum_value1 = $href1->{$top_n_key};
1249 $minimum_value2 = $href2->{$top_n_key};
1250 $minimum_value3 = $href3->{$top_n_key};
1251 }
1252 }
1253
1254 # Return the top n list, grepping out non-existant values, just in case
1255 # we didn't have that many values.
1256 return(grep(!/^_$/,@top_n_keys));
1257 }
1258
1259
1260 =head2 html_header();
1261
1262 $header = html_header($title);
1263
1264 Print our HTML header and start the <body> block.
1265
1266 =cut
1267
1268 sub html_header {
1269 my($title) = @_;
1270 my $text = << "EoText";
1271 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
1272 <html>
1273 <head>
1274 <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-15">
1275 <title>$title</title>
1276 </head>
1277 <body bgcolor="white">
1278 <h1>$title</h1>
1279 EoText
1280 return $text;
1281 }
1282
1283
1284
1285 =head2 help();
1286
1287 help();
1288
1289 Display usage instructions and exit.
1290
1291 =cut
1292
1293 sub help {
1294 print << "EoText";
1295
1296 eximstats Version $VERSION
1297
1298 Usage: eximstats [Options] mainlog1 mainlog2 ... > report.txt
1299 eximstats -html [Options] mainlog1 mainlog2 ... > report.html
1300 eximstats -merge [Options] report.1.txt report.2.txt ... > weekly_rep.txt
1301 eximstats -merge -html [Options] report.1.html ... > weekly_rep.html
1302
1303 Parses exim mainlog files and generates a statistical analysis of
1304 the messages processed. Valid options are:
1305
1306 -h<number> histogram divisions per hour. The default is 1, and
1307 0 suppresses histograms. Other valid values are:
1308 2, 3, 5, 10, 15, 20, 30 or 60.
1309 -ne don't display error information
1310 -nr don't display relaying information
1311 -nr/pattern/ don't display relaying information that matches
1312 -nt don't display transport information
1313 -nt/pattern/ don't display transport information that matches
1314 -nvr don't do volume rounding. Display in bytes, not KB/MB/GB.
1315 -q<list> list of times for queuing information
1316 single 0 item suppresses
1317 -t<number> display top <number> sources/destinations
1318 default is 50, 0 suppresses top listing
1319 -tnl omit local sources/destinations in top listing
1320 -t_remote_users show top user sources/destinations from non-local domains
1321
1322 -byhost show results by sending host (default unless bydomain or
1323 byemail is specified)
1324 -bydomain show results by sending domain.
1325 -byemail show results by sender's email address
1326 -byedomain show results by sender's email domain
1327
1328 -pattern "Description" /pattern/
1329 Count lines matching specified patterns and show them in
1330 the results. It can be specified multiple times. Eg:
1331 -pattern 'Refused connections' '/refused connection/'
1332
1333 -merge merge previously generated reports into a new report
1334
1335 -html output the results in HTML
1336 -charts Create charts (this requires the GD::Graph modules)
1337 -chartdir <dir> Create the charts' png files in the directory <dir>
1338 -chartrel <dir> Specify the relative directory for the "img src=" tags
1339 from where to include the charts in the html file
1340 -chartdir and -chartrel default to '.'
1341
1342 -d Debug mode - dump the eval'ed parser onto STDERR.
1343
1344 EoText
1345
1346 exit 1;
1347 }
1348
1349
1350
1351 =head2 generate_parser();
1352
1353 $parser = generate_parser();
1354
1355 This subroutine generates the parsing routine which will be
1356 used to parse the mainlog. We take the base operation, and remove bits not in use.
1357 This improves performance depending on what bits you take out or add.
1358
1359 I've tested using study(), but this does not improve performance.
1360
1361 We store our parsing routing in a variable, and process it looking for #IFDEF (Expression)
1362 or #IFNDEF (Expression) statements and corresponding #ENDIF (Expression) statements. If
1363 the expression evaluates to true, then it is included/excluded accordingly.
1364
1365 =cut
1366
1367 sub generate_parser {
1368 my $parser = '
1369 my($ip,$host,$email,$edomain,$domain,$thissize,$size,$old,$new);
1370 my($tod,$m_hour,$m_min,$id,$flag);
1371 while (<$fh>) {
1372 next if length($_) < 38;
1373
1374 # PH/FANF
1375 # next unless /^(\\d{4}\\-\\d\\d-\\d\\d\\s(\\d\\d):(\\d\\d):\\d\\d)/;
1376 next unless /^(\\d{4}\\-\\d\\d-\\d\\d\\s(\\d\\d):(\\d\\d):\\d\\d( [-+]\\d\\d\\d\\d)?)/o;
1377
1378 ($tod,$m_hour,$m_min) = ($1,$2,$3);
1379
1380 # PH
1381 my($extra) = defined($4)? 6 : 0;
1382 $id = substr($_, 20 + $extra, 16);
1383 $flag = substr($_, 37 + $extra, 2);
1384 ';
1385
1386 # Watch for user specified patterns.
1387 my $user_pattern_index = 0;
1388 foreach (@user_patterns) {
1389 $user_pattern_totals[$user_pattern_index] = 0;
1390 $parser .= " \$user_pattern_totals[$user_pattern_index]++ if $_;\n";
1391 $user_pattern_index++;
1392 }
1393
1394 $parser .= '
1395 next unless ($flag =~ /<=|=>|->|==|\\*\\*|Co/);
1396
1397 #Strip away the timestamp, ID and flag (which could be "Com" for completed)
1398 #This speeds up the later pattern matches.
1399 # $_ = substr($_, 40);
1400
1401 $_ = substr($_, 40 + $extra); # PH
1402
1403 # JN - Skip over certain transports as specified via the "-nt/.../" command
1404 # line switch (where ... is a perl style regular expression). This is
1405 # required so that transports that skew stats such as SpamAssassin can be
1406 # ignored.
1407 #IFDEF ($transport_pattern)
1408 if (/\\sT=(\\S+)/) {
1409 next if ($1 =~ /$transport_pattern/o) ;
1410 }
1411 #ENDIF ($transport_pattern)
1412
1413
1414 $host = "local"; #Host is local unless otherwise specified.
1415 $domain = "localdomain"; #Domain is localdomain unless otherwise specified.
1416
1417
1418 # Do some pattern matches to get the host and IP address.
1419 # We expect lines to be of the form "H=[IpAddr]" or "H=Host [IpAddr]" or
1420 # "H=Host (UnverifiedHost) [IpAddr]" or "H=(UnverifiedHost) [IpAddr]".
1421 # We do 2 separate matches to keep the matches simple and fast.
1422 if (/\\sH=(\\S+)/) {
1423 $host = $1;
1424
1425 ($ip) = /\\sH=.*?(\\s\\[[^]]+\\])/;
1426 # If there is only an IP address, it will be in $host and $ip will be
1427 # unset. That is OK, because we only use $ip in conjunction with $host
1428 # below. But make it empty to avoid warning messages.
1429 $ip = "" if !defined $ip;
1430
1431 #IFDEF ($do_sender{Domain})
1432 if ($host !~ /^\\[/ && $host =~ /^(\\(?)[^\\.]+\\.([^\\.]+\\..*)/) {
1433 # Remove the host portion from the DNS name. We ensure that we end up with
1434 # at least xxx.yyy. $host can be "(x.y.z)" or "x.y.z".
1435 $domain = lc("$1.$2");
1436 $domain =~ s/^\\.//; #Remove preceding dot.
1437 }
1438 #ENDIF ($do_sender{Domain})
1439
1440 }
1441
1442 #IFDEF ($do_sender{Email})
1443 $email = (/^(\S+)/) ? $1 : "";
1444 #ENDIF ($do_sender{Email})
1445
1446 #IFDEF ($do_sender{Edomain})
1447 $edomain = (/^\S*?\\@(\S+)/) ? lc($1) : "";
1448 #ENDIF ($do_sender{Edomain})
1449
1450 if ($tod lt $begin) {
1451 $begin = $tod;
1452 }
1453 elsif ($tod gt $end) {
1454 $end = $tod;
1455 }
1456
1457
1458 if ($flag eq "<=") {
1459 $thissize = (/\\sS=(\\d+)( |$)/) ? $1 : 0;
1460 $size{$id} = $thissize;
1461
1462 #IFDEF ($show_relay)
1463 if ($host ne "local") {
1464 # Save incoming information in case it becomes interesting
1465 # later, when delivery lines are read.
1466 my($from) = /^(\\S+)/;
1467 $from_host{$id} = "$host$ip";
1468 $from_address{$id} = $from;
1469 }
1470 #ENDIF ($show_relay)
1471
1472 #IFDEF ($local_league_table || $include_remote_users)
1473 if (/\sU=(\\S+)/) {
1474 my $user = $1;
1475
1476 #IFDEF ($local_league_table && $include_remote_users)
1477 { #Store both local and remote users.
1478 #ENDIF ($local_league_table && $include_remote_users)
1479
1480 #IFDEF ($local_league_table && ! $include_remote_users)
1481 if ($host eq "local") { #Store local users only.
1482 #ENDIF ($local_league_table && ! $include_remote_users)
1483
1484 #IFDEF ($include_remote_users && ! $local_league_table)
1485 if ($host ne "local") { #Store remote users only.
1486 #ENDIF ($include_remote_users && ! $local_league_table)
1487
1488 $received_count_user{$user}++;
1489 add_volume(\\$received_data_user{$user},\\$received_data_gigs_user{$user},$thissize);
1490 }
1491 }
1492 #ENDIF ($local_league_table || $include_remote_users)
1493
1494 #IFDEF ($do_sender{Host})
1495 $received_count{Host}{$host}++;
1496 add_volume(\\$received_data{Host}{$host},\\$received_data_gigs{Host}{$host},$thissize);
1497 #ENDIF ($do_sender{Host})
1498
1499 #IFDEF ($do_sender{Domain})
1500 if ($domain) {
1501 $received_count{Domain}{$domain}++;
1502 add_volume(\\$received_data{Domain}{$domain},\\$received_data_gigs{Domain}{$domain},$thissize);
1503 }
1504 #ENDIF ($do_sender{Domain})
1505
1506 #IFDEF ($do_sender{Email})
1507 $received_count{Email}{$email}++;
1508 add_volume(\\$received_data{Email}{$email},\\$received_data_gigs{Email}{$email},$thissize);
1509 #ENDIF ($do_sender{Email})
1510
1511 #IFDEF ($do_sender{Edomain})
1512 $received_count{Edomain}{$edomain}++;
1513 add_volume(\\$received_data{Edomain}{$edomain},\\$received_data_gigs{Edomain}{$edomain},$thissize);
1514 #ENDIF ($do_sender{Edomain})
1515
1516 $total_received_count++;
1517 add_volume(\\$total_received_data,\\$total_received_data_gigs,$thissize);
1518
1519 #IFDEF ($#queue_times >= 0)
1520 $arrival_time{$id} = $tod;
1521 #ENDIF ($#queue_times >= 0)
1522
1523 #IFDEF ($hist_opt > 0)
1524 $received_interval_count[($m_hour*60 + $m_min)/$hist_interval]++;
1525 #ENDIF ($hist_opt > 0)
1526 }
1527
1528 elsif ($flag eq "=>") {
1529 $size = $size{$id} || 0;
1530 if ($host ne "local") {
1531 $remote_delivered{$id} = 1;
1532
1533
1534 #IFDEF ($show_relay)
1535 # Determine relaying address if either only one address listed,
1536 # or two the same. If they are different, it implies a forwarding
1537 # or aliasing, which is not relaying. Note that for multi-aliased
1538 # addresses, there may be a further address between the first
1539 # and last.
1540
1541 if (defined $from_host{$id}) {
1542 if (/^(\\S+)(?:\\s+\\([^)]\\))?\\s+<([^>]+)>/) {
1543 ($old,$new) = ($1,$2);
1544 }
1545 else {
1546 $old = $new = "";
1547 }
1548
1549 if ("\\L$new" eq "\\L$old") {
1550 ($old) = /^(\\S+)/ if $old eq "";
1551 my $key = "H=\\L$from_host{$id}\\E A=\\L$from_address{$id}\\E => " .
1552 "H=\\L$host\\E$ip A=\\L$old\\E";
1553 if (!defined $relay_pattern || $key !~ /$relay_pattern/o) {
1554 $relayed{$key} = 0 if !defined $relayed{$key};
1555 $relayed{$key}++;
1556 }
1557 else {
1558 $relayed_unshown++
1559 }
1560 }
1561 }
1562 #ENDIF ($show_relay)
1563
1564 }
1565
1566 #IFDEF ($local_league_table || $include_remote_users)
1567 #IFDEF ($local_league_table && $include_remote_users)
1568 { #Store both local and remote users.
1569 #ENDIF ($local_league_table && $include_remote_users)
1570
1571 #IFDEF ($local_league_table && ! $include_remote_users)
1572 if ($host eq "local") { #Store local users only.
1573 #ENDIF ($local_league_table && ! $include_remote_users)
1574
1575 #IFDEF ($include_remote_users && ! $local_league_table)
1576 if ($host ne "local") { #Store remote users only.
1577 #ENDIF ($include_remote_users && ! $local_league_table)
1578
1579 if (my($user) = split((/\\s</)? " <" : " ", $_)) {
1580 if ($user =~ /^[\\/|]/) {
1581 my($parent) = $_ =~ /(<[^@]+@?[^>]*>)/;
1582 $user = "$user $parent" if defined $parent;
1583 }
1584 $delivered_count_user{$user}++;
1585 add_volume(\\$delivered_data_user{$user},\\$delivered_data_gigs_user{$user},$size);
1586 }
1587 }
1588 #ENDIF ($local_league_table || $include_remote_users)
1589
1590 #IFDEF ($do_sender{Host})
1591 $delivered_count{Host}{$host}++;
1592 add_volume(\\$delivered_data{Host}{$host},\\$delivered_data_gigs{Host}{$host},$size);
1593 #ENDIF ($do_sender{Host})
1594 #IFDEF ($do_sender{Domain})
1595 if ($domain) {
1596 $delivered_count{Domain}{$domain}++;
1597 add_volume(\\$delivered_data{Domain}{$domain},\\$delivered_data_gigs{Domain}{$domain},$size);
1598 }
1599 #ENDIF ($do_sender{Domain})
1600 #IFDEF ($do_sender{Email})
1601 $delivered_count{Email}{$email}++;
1602 add_volume(\\$delivered_data{Email}{$email},\\$delivered_data_gigs{Email}{$email},$size);
1603 #ENDIF ($do_sender{Email})
1604 #IFDEF ($do_sender{Edomain})
1605 $delivered_count{Edomain}{$edomain}++;
1606 add_volume(\\$delivered_data{Edomain}{$edomain},\\$delivered_data_gigs{Edomain}{$edomain},$size);
1607 #ENDIF ($do_sender{Edomain})
1608
1609 $total_delivered_count++;
1610 add_volume(\\$total_delivered_data,\\$total_delivered_data_gigs,$size);
1611
1612 #IFDEF ($show_transport)
1613 my $transport = (/\\sT=(\\S+)/) ? $1 : ":blackhole:";
1614 $transported_count{$transport}++;
1615 add_volume(\\$transported_data{$transport},\\$transported_data_gigs{$transport},$size);
1616 #ENDIF ($show_transport)
1617
1618 #IFDEF ($hist_opt > 0)
1619 $delivered_interval_count[($m_hour*60 + $m_min)/$hist_interval]++;
1620 #ENDIF ($hist_opt > 0)
1621
1622 }
1623
1624 elsif ($flag eq "==" && defined($size{$id}) && !defined($delayed{$id})) {
1625 $delayed_count++;
1626 $delayed{$id} = 1;
1627 }
1628
1629 elsif ($flag eq "**") {
1630 $had_error{$id} = 1 if defined ($size{$id});
1631
1632 #IFDEF ($show_errors)
1633 $errors_count{$_}++;
1634 #ENDIF ($show_errors)
1635
1636 }
1637
1638 elsif ($flag eq "Co") {
1639 #Completed?
1640 #IFDEF ($#queue_times >= 0)
1641 #Note: id_seconds() benchmarks as 42% slower than seconds() and computing
1642 #the time accounts for a significant portion of the run time.
1643 my($queued);
1644 if (defined $arrival_time{$id}) {
1645 $queued = seconds($tod) - seconds($arrival_time{$id});
1646 delete($arrival_time{$id});
1647 }
1648 else {
1649 $queued = seconds($tod) - id_seconds($id);
1650 }
1651
1652 for ($i = 0; $i <= $#queue_times; $i++) {
1653 if ($queued < $queue_times[$i]) {
1654 $queue_bin[$i]++;
1655 $remote_queue_bin[$i]++ if $remote_delivered{$id};
1656 last;
1657 }
1658 }
1659 $queue_more_than++ if $i > $#queue_times;
1660 #ENDIF ($#queue_times >= 0)
1661
1662 #IFDEF ($show_relay)
1663 delete($from_host{$id});
1664 delete($from_address{$id});
1665 #ENDIF ($show_relay)
1666
1667 }
1668 }';
1669
1670 # We now do a 'C preprocessor style operation on our parser
1671 # to remove bits not in use.
1672 my(%defines_in_operation,$removing_lines,$processed_parser);
1673 foreach (split (/\n/,$parser)) {
1674 if ((/^\s*#\s*IFDEF\s*\((.*?)\)/i && ! eval $1) ||
1675 (/^\s*#\s*IFNDEF\s*\((.*?)\)/i && eval $1) ) {
1676 $defines_in_operation{$1} = 1;
1677 $removing_lines = 1;
1678 }
1679
1680 $processed_parser .= $_."\n" unless $removing_lines;
1681
1682 if (/^\s*#\s*ENDIF\s*\((.*?)\)/i) {
1683 delete $defines_in_operation{$1};
1684 unless (keys %defines_in_operation) {
1685 $removing_lines = 0;
1686 }
1687 }
1688 }
1689 print STDERR "# START OF PARSER:\n$processed_parser\n# END OF PARSER\n\n" if $debug;
1690
1691 return $processed_parser;
1692 }
1693
1694
1695
1696 =head2 parse();
1697
1698 parse($parser,\*FILEHANDLE);
1699
1700 This subroutine accepts a parser and a filehandle from main and parses each
1701 line. We store the results into global variables.
1702
1703 =cut
1704
1705 sub parse {
1706 my($parser,$fh) = @_;
1707
1708 if ($merge_reports) {
1709 parse_old_eximstat_reports($fh);
1710 }
1711 else {
1712 eval $parser;
1713 die ($@) if $@;
1714 }
1715
1716 }
1717
1718
1719
1720 =head2 print_header();
1721
1722 print_header();
1723
1724 Print our headers and contents.
1725
1726 =cut
1727
1728 sub print_header {
1729
1730 my $title = "Exim statistics from $begin to $end";
1731
1732 if ($html) {
1733 print html_header($title);
1734 print "<ul>\n";
1735 print "<li><a href=\"#grandtotal\">Grand total summary</a>\n";
1736 print "<li><a href=\"#patterns\">User Specified Patterns</a>\n" if @user_patterns;
1737 print "<li><a href=\"#transport\">Deliveries by Transport</a>\n" if $show_transport;
1738 if ($hist_opt) {
1739 print "<li><a href=\"#Messages received\">Messages received per hour</a>\n";
1740 print "<li><a href=\"#Deliveries\">Deliveries per hour</a>\n";
1741 }
1742 if ($#queue_times >= 0) {
1743 print "<li><a href=\"#all messages time\">Time spent on the queue: all messages</a>\n";
1744 print "<li><a href=\"#messages with at least one remote delivery time\">Time spent on the queue: messages with at least one remote delivery</a>\n";
1745 }
1746 print "<li><a href=\"#Relayed messages\">Relayed messages</a>\n" if $show_relay;
1747 if ($topcount) {
1748 foreach ('Host','Domain','Email','Edomain') {
1749 next unless $do_sender{$_};
1750 print "<li><a href=\"#sending \l$_ count\">Top $topcount sending \l${_}s by message count</a>\n";
1751 print "<li><a href=\"#sending \l$_ volume\">Top $topcount sending \l${_}s by volume</a>\n";
1752 }
1753 if ($local_league_table || $include_remote_users) {
1754 print "<li><a href=\"#local sender count\">Top $topcount local senders by message count</a>\n";
1755 print "<li><a href=\"#local sender volume\">Top $topcount local senders by volume</a>\n";
1756 }
1757 foreach ('Host','Domain','Email','Edomain') {
1758 next unless $do_sender{$_};
1759 print "<li><a href=\"#\l$_ destination count\">Top $topcount \l$_ destinations by message count</a>\n";
1760 print "<li><a href=\"#\l$_ destination volume\">Top $topcount \l$_ destinations by volume</a>\n";
1761 }
1762 if ($local_league_table || $include_remote_users) {
1763 print "<li><a href=\"#local destination count\">Top $topcount local destinations by message count</a>\n";
1764 print "<li><a href=\"#local destination volume\">Top $topcount local destinations by volume</a>\n";
1765 }
1766 }
1767 print "<li><a href=\"#errors\">List of errors</a>\n" if %errors_count;
1768 print "</ul>\n<hr>\n";
1769
1770 }
1771 else {
1772 print "\n$title\n";
1773 }
1774 }
1775
1776
1777 =head2 print_grandtotals();
1778
1779 print_grandtotals();
1780
1781 Print the grand totals.
1782
1783 =cut
1784
1785 sub print_grandtotals {
1786
1787 # Get the sender by headings and results. This is complicated as we can have
1788 # different numbers of columns.
1789 my($sender_txt_header,$sender_html_header,$sender_txt_format,$sender_html_format);
1790 my(@received_totals,@delivered_totals);
1791 foreach ('Host','Domain','Email','Edomain') {
1792 next unless $do_sender{$_};
1793 if ($merge_reports) {
1794 push(@received_totals, get_report_total($report_totals{Received},"${_}s"));
1795 push(@delivered_totals,get_report_total($report_totals{Delivered},"${_}s"));
1796 }
1797 else {
1798 push(@received_totals,scalar(keys %{$received_data{$_}}));
1799 push(@delivered_totals,scalar(keys %{$delivered_data{$_}}));
1800 }
1801 $sender_html_header .= "<th>${_}s</th>";
1802 $sender_txt_header .= " " x ($COLUMN_WIDTHS - length($_)) . $_ . 's';
1803 $sender_html_format .= "<td align=\"right\">%d</td>";
1804 $sender_txt_format .= " " x ($COLUMN_WIDTHS - 5) . "%6d";
1805 }
1806
1807 my($format1,$format2);
1808 if ($html) {
1809 print << "EoText";
1810 <a name="grandtotal"></a>
1811 <h2>Grand total summary</h2>
1812 <table border=1>
1813 <tr><th>TOTAL</th><th>Volume</th><th>Messages</th>$sender_html_header<th colspan=2>At least one addr<br>Delayed</th><th colspan=2>At least one addr<br>Failed</th>
1814 EoText
1815
1816 $format1 = "<tr><td>%s</td><td align=\"right\">%s</td>$sender_html_format<td align=\"right\">%d</td>";
1817 $format2 = "<td align=\"right\">%d</td><td align=\"right\">%4.1f%%</td><td align=\"right\">%d</td><td align=\"right\">%4.1f%%</td>";
1818 }
1819 else {
1820 my $sender_spaces = " " x length($sender_txt_header);
1821 print << "EoText";
1822
1823 Grand total summary
1824 -------------------
1825 $sender_spaces At least one address
1826 TOTAL Volume Messages $sender_txt_header Delayed Failed
1827 EoText
1828 $format1 = " %-16s %9s %6d $sender_txt_format";
1829 $format2 = " %6d %4.1f%% %6d %4.1f%%",
1830 }
1831
1832 my($volume,$failed_count);
1833 if ($merge_reports) {
1834 $volume = volume_rounded($report_totals{Received}{Volume}, $report_totals{Received}{'Volume-gigs'});
1835 $total_received_count = get_report_total($report_totals{Received},'Messages');
1836 $failed_count = get_report_total($report_totals{Received},'Failed');
1837 $delayed_count = get_report_total($report_totals{Received},'Delayed');
1838 }
1839 else {
1840 $volume = volume_rounded($total_received_data, $total_received_data_gigs);
1841 $failed_count = keys %had_error;
1842 }
1843
1844 {
1845 no integer;
1846 printf("$format1$format2\n",'Received',$volume,$total_received_count,
1847 @received_totals,$delayed_count,
1848 ($total_received_count) ? ($delayed_count*100/$total_received_count) : 0,
1849 $failed_count,
1850 ($total_received_count) ? ($failed_count*100/$total_received_count) : 0);
1851 }
1852
1853 if ($merge_reports) {
1854 $volume = volume_rounded($report_totals{Delivered}{Volume}, $report_totals{Delivered}{'Volume-gigs'});
1855 $total_delivered_count = get_report_total($report_totals{Delivered},'Messages');
1856 }
1857 else {
1858 $volume = volume_rounded($total_delivered_data, $total_delivered_data_gigs);
1859 }
1860 printf("$format1\n\n",'Delivered',$volume,$total_delivered_count,@delivered_totals);
1861 print "</table>\n" if $html;
1862 }
1863
1864
1865 =head2 print_user_patterns()
1866
1867 print_user_patterns();
1868
1869 Print the counts of user specified patterns.
1870
1871 =cut
1872
1873 sub print_user_patterns {
1874 my($format1);
1875
1876 if ($html) {
1877 print "<hr><a name=\"patterns\"></a><h2>User Specified Patterns</h2>\n";
1878 print "<table border=0 width=\"100%\">\n";
1879 print "<tr><td>\n";
1880 print "<table border=1>\n";
1881 print "<tr><th>&nbsp;</th><th>Total</th>\n";
1882 $format1 = "<tr><td>%s</td><td align=\"right\">%d</td>";
1883 }
1884 else {
1885 print "User Specified Patterns\n";
1886 print "-----------------------";
1887 print "\n Total\n";
1888 $format1 = " %-18s %6d";
1889 }
1890
1891 my($key);
1892 if ($merge_reports) {
1893 # We are getting our data from previous reports.
1894 foreach $key (@user_descriptions) {
1895 my $count = get_report_total($report_totals{patterns}{$key},'Total');
1896 printf("$format1\n",$key,$count);
1897 }
1898 }
1899 else {
1900 # We are getting our data from mainlog files.
1901 my $user_pattern_index = 0;
1902 foreach $key (@user_descriptions) {
1903 printf("$format1\n",$key,$user_pattern_totals[$user_pattern_index]);
1904 $user_pattern_index++;
1905 }
1906 }
1907 if ($html) {
1908 print "</table>\n";
1909 }
1910 print "\n";
1911 }
1912
1913
1914 =head2 print_transport();
1915
1916 print_transport();
1917
1918 Print totals by transport.
1919
1920 =cut
1921
1922 sub print_transport {
1923 my($format1);
1924 my(@chartdatanames);
1925 my(@chartdatavals_count);
1926 my(@chartdatavals_vol);
1927 no integer; #Lose this for charting the data.
1928
1929 if ($html) {
1930 print "<hr><a name=\"transport\"></a><h2>Deliveries by Transport</h2>\n";
1931 print "<table border=0 width=\"100%\">\n";
1932 print "<tr><td>\n";
1933 print "<table border=1>\n";
1934 print "<tr><th>&nbsp;</th><th>Volume</th><th>Messages</th>\n";
1935 $format1 = "<tr><td>%s</td><td align=\"right\">%s</td><td align=\"right\">%d</td>";
1936 }
1937 else {
1938 print "Deliveries by transport\n";
1939 print "-----------------------";
1940 print "\n Volume Messages\n";
1941 $format1 = " %-18s %6s %6d";
1942 }
1943
1944 my($key);
1945 if ($merge_reports) {
1946 # We are getting our data from previous reports.
1947 foreach $key (sort keys %{$report_totals{transport}}) {
1948 my $count = get_report_total($report_totals{transport}{$key},'Messages');
1949 printf("$format1\n",$key,
1950 volume_rounded($report_totals{transport}{$key}{Volume},$report_totals{transport}{$key}{'Volume-gigs'}),
1951 $count);
1952 push(@chartdatanames, $key);
1953 push(@chartdatavals_count, $count);
1954 push(@chartdatavals_vol, $report_totals{transport}{$key}{'Volume-gigs'}*$gig + $report_totals{transport}{$key}{Volume} );
1955 }
1956 }
1957 else {
1958 # We are getting our data from mainlog files.
1959 foreach $key (sort keys %transported_data) {
1960 printf("$format1\n",$key,
1961 volume_rounded($transported_data{$key},$transported_data_gigs{$key}),
1962 $transported_count{$key});
1963 push(@chartdatanames, $key);
1964 push(@chartdatavals_count, $transported_count{$key});
1965 push(@chartdatavals_vol, $transported_data_gigs{$key}*$gig + $transported_data{$key});
1966 }
1967 }
1968 if ($html) {
1969 print "</table>\n";
1970 print "</td><td>\n";
1971 if ($HAVE_GD_Graph_pie && $charts)
1972 {
1973 # calculate the graph
1974 my @data = (
1975 \@chartdatanames,
1976 \@chartdatavals_count
1977 );
1978 my $graph = GD::Graph::pie->new(200, 200);
1979 $graph->set(
1980 x_label => 'Transport',
1981 y_label => 'Messages',
1982 title => 'By count',
1983 );
1984 my $gd = $graph->plot(\@data) or warn($graph->error);
1985 if ($gd) {
1986 open(IMG, ">$chartdir/transports_count.png") or die $!;
1987 binmode IMG;
1988 print IMG $gd->png;
1989 close IMG;
1990 print "<img src=\"$chartrel/transports_count.png\">";
1991 }
1992 }
1993 print "</td><td>\n";
1994
1995 if ($HAVE_GD_Graph_pie && $charts) {
1996 my @data = (
1997 \@chartdatanames,
1998 \@chartdatavals_vol
1999 );
2000 my $graph = GD::Graph::pie->new(200, 200);
2001 $graph->set(
2002 title => 'By volume',
2003 );
2004 my $gd = $graph->plot(\@data) or warn($graph->error);
2005 if ($gd) {
2006 open(IMG, ">$chartdir/transports_vol.png") or die $!;
2007 binmode IMG;
2008 print IMG $gd->png;
2009 close IMG;
2010 print "<img src=\"$chartrel/transports_vol.png\">";
2011 }
2012 }
2013 print "</td></tr></table>\n";
2014 }
2015 print "\n";
2016 }
2017
2018
2019
2020 =head2 print_relay();
2021
2022 print_relay();
2023
2024 Print our totals by relay.
2025
2026 =cut
2027
2028 sub print_relay {
2029 my $temp = "Relayed messages";
2030 print "<hr><a name=\"$temp\"></a><h2>$temp</h2>\n" if $html;
2031 if (scalar(keys %relayed) > 0 || $relayed_unshown > 0) {
2032 my $shown = 0;
2033 my $spacing = "";
2034 my($format);
2035
2036 if ($html) {
2037 print "<table border=1>\n";
2038 print "<tr><th>Count</th><th>From</th><th>To</th>\n";
2039 $format = "<tr><td align=\"right\">%d</td><td>%s</td><td>%s</td>\n";
2040 }
2041 else {
2042 printf("%s\n%s\n\n", $temp, "-" x length($temp));
2043 $format = "%7d %s\n => %s\n";
2044 }
2045
2046 my($key);
2047 foreach $key (sort keys %relayed) {
2048 my $count = $relayed{$key};
2049 $shown += $count;
2050 $key =~ s/[HA]=//g;
2051 my($one,$two) = split(/=> /, $key);
2052 printf($format, $count, $one, $two);
2053 $spacing = "\n";
2054 }
2055 print "</table>\n<p>\n" if $html;
2056 print "${spacing}Total: $shown (plus $relayed_unshown unshown)\n";
2057 }
2058 else {
2059 print "No relayed messages\n";
2060 print "-------------------\n" unless $html;
2061 }
2062 print "\n";
2063 }
2064
2065
2066
2067 =head2 print_errors();
2068
2069 print_errors();
2070
2071 Print our errors. In HTML, we display them as a list rather than a table -
2072 Netscape doesn't like large tables!
2073
2074 =cut
2075
2076 sub print_errors {
2077 my $total_errors = 0;
2078
2079 if (scalar(keys %errors_count) != 0) {
2080 my $temp = "List of errors";
2081 my($format);
2082 if ($html) {
2083 print "<hr><a name=\"errors\"></a><h2>$temp</h2>\n";
2084 print "<ul><li><b>Count - Error</b>\n";
2085 $format = "<li>%d - %s\n";
2086 }
2087 else {
2088 printf("%s\n%s\n\n", $temp, "-" x length($temp));
2089 }
2090
2091 my($key);
2092 foreach $key (sort keys %errors_count) {
2093 my $text = $key;
2094 chomp($text);
2095 $text =~ s/\s\s+/ /g; #Convert multiple spaces to a single space.
2096 $total_errors += $errors_count{$key};
2097 if ($html) {
2098
2099 #Translate HTML tag characters. Sergey Sholokh.
2100 $text =~ s/\</\&lt\;/g;
2101 $text =~ s/\>/\&gt\;/g;
2102
2103 printf($format,$errors_count{$key},$text);
2104 }
2105 else {
2106 printf("%5d ", $errors_count{$key});
2107 while (length($text) > 65) {
2108 my($first,$rest) = $text =~ /(.{50}\S*)\s+(.+)/;
2109 last if !$first;
2110 printf("%s\n ", $first);
2111 $text = $rest;
2112 }
2113 printf("%s\n\n", $text);
2114 }
2115 }
2116 print "</ul>\n<p>\n" if $html;
2117
2118 $temp = "Errors encountered: $total_errors";
2119 print $temp,"\n";
2120 print "-" x length($temp),"\n" unless $html;
2121 }
2122
2123 }
2124
2125
2126 =head2 parse_old_eximstat_reports();
2127
2128 parse_old_eximstat_reports($fh);
2129
2130 Parse old eximstat output so we can merge daily stats to weekly stats and weekly to monthly etc.
2131
2132 To test that the merging still works after changes, do something like the following.
2133 All the diffs should produce no output.
2134
2135 options='-bydomain -byemail -byhost -byedomain'
2136 options="$options -pattern 'Completed Messages' /Completed/"
2137 options="$options -pattern 'Received Messages' /<=/"
2138
2139 ./eximstats $options mainlog > mainlog.txt
2140 ./eximstats $options -merge mainlog.txt > mainlog.2.txt
2141 diff mainlog.txt mainlog.2.txt
2142
2143 ./eximstats $options -html mainlog > mainlog.html
2144 ./eximstats $options -merge -html mainlog.txt > mainlog.2.html
2145 diff mainlog.html mainlog.2.html
2146
2147 ./eximstats $options -merge mainlog.html > mainlog.3.txt
2148 diff mainlog.txt mainlog.3.txt
2149
2150 ./eximstats $options -merge -html mainlog.html > mainlog.3.html
2151 diff mainlog.html mainlog.3.html
2152
2153 ./eximstats $options -nvr mainlog > mainlog.nvr.txt
2154 ./eximstats $options -merge mainlog.nvr.txt > mainlog.4.txt
2155 diff mainlog.txt mainlog.4.txt
2156
2157 # double_mainlog.txt should have twice the values that mainlog.txt has.
2158 ./eximstats $options mainlog mainlog > double_mainlog.txt
2159
2160 =cut
2161
2162 sub parse_old_eximstat_reports {
2163 my($fh) = @_;
2164
2165 my(%league_table_value_entered, %league_table_value_was_zero, %table_order);
2166
2167 while (<$fh>) {
2168 if (/Exim statistics from ([\d\-]+ [\d:]+(\s+[\+\-]\d+)?) to ([\d\-]+ [\d:]+(\s+[\+\-]\d+)?)/) {
2169 $begin = $1 if ($1 lt $begin);
2170 $end = $3 if ($3 gt $end);
2171 }
2172 elsif (/Grand total summary/) {
2173 # Fill in $report_totals{Received|Delivered}{Volume|Messages|Hosts|Domains|...|Delayed|DelayedPercent|Failed|FailedPercent}
2174 my(@fields);
2175 while (<$fh>) {
2176 $_ = html2txt($_); #Convert general HTML markup to text.
2177 s/At least one addr//g; #Another part of the HTML output we don't want.
2178
2179 # TOTAL Volume Messages Hosts Domains Delayed Failed
2180 # Received 26MB 237 177 23 8 3.4% 28 11.8%
2181 # Delivered 13MB 233 99 88
2182 if (/TOTAL\s+(.*?)\s*$/) {
2183 @fields = split(/\s+/,$1);
2184 #Delayed and Failed have two columns each, so add the extra field names in.
2185 splice(@fields,-1,1,'DelayedPercent','Failed','FailedPercent');
2186 }
2187 elsif (/(Received|Delivered)\s+(.*?)\s*$/) {
2188 print STDERR "Parsing $_" if $debug;
2189 add_to_totals($report_totals{$1},\@fields,$2);
2190 }
2191 last if (/Delivered/); #Last line of this section.
2192 }
2193 }
2194
2195 elsif (/User Specified Patterns/i) {
2196 #User Specified Patterns
2197 #-----------------------
2198 # Total
2199 # Description 85
2200
2201 while (<$fh>) { last if (/Total/); } #Wait until we get the table headers.
2202 while (<$fh>) {
2203 print STDERR "Parsing $_" if $debug;
2204 $_ = html2txt($_); #Convert general HTML markup to text.
2205 if (/^\s*(.*?)\s+(\d+)\s*$/) {
2206 $report_totals{patterns}{$1} = {} unless (defined $report_totals{patterns}{$1});
2207 add_to_totals($report_totals{patterns}{$1},['Total'],$2);
2208 }
2209 last if (/^\s*$/); #Finished if we have a blank line.
2210 }
2211 }
2212
2213 elsif (/Deliveries by transport/i) {
2214 #Deliveries by transport
2215 #-----------------------
2216 # Volume Messages
2217 # :blackhole: 70KB 51
2218 # address_pipe 655KB 1
2219 # smtp 11MB 151
2220
2221 while (<$fh>) { last if (/Volume/); } #Wait until we get the table headers.
2222 while (<$fh>) {
2223 print STDERR "Parsing $_" if $debug;
2224 $_ = html2txt($_); #Convert general HTML markup to text.
2225 if (/(\S+)\s+(\d+\S*\s+\d+)/) {
2226 $report_totals{transport}{$1} = {} unless (defined $report_totals{transport}{$1});
2227 add_to_totals($report_totals{transport}{$1},['Volume','Messages'],$2);
2228 }
2229 last if (/^\s*$/); #Finished if we have a blank line.
2230 }
2231 }
2232 elsif (/(Messages received|Deliveries) per/) {
2233 # Messages received per hour (each dot is 2 messages)
2234 #---------------------------------------------------
2235 #
2236 #00-01 106 .....................................................
2237 #01-02 103 ...................................................
2238
2239 # Set a pointer to the interval array so we can use the same code
2240 # block for both messages received and delivered.
2241 my $interval_aref = ($1 eq 'Deliveries') ? \@delivered_interval_count : \@received_interval_count;
2242 my $reached_table = 0;
2243 while (<$fh>) {
2244 $reached_table = 1 if (/^00/);
2245 next unless $reached_table;
2246 print STDERR "Parsing $_" if $debug;
2247 if (/^(\d+):(\d+)\s+(\d+)/) { #hh:mm start time format ?
2248 $$interval_aref[($1*60 + $2)/$hist_interval] += $3;
2249 }
2250 elsif (/^(\d+)-(\d+)\s+(\d+)/) { #hh-hh start-end time format ?
2251 $$interval_aref[($1*60)/$hist_interval] += $3;
2252 }
2253 else { #Finished the table ?
2254 last;
2255 }
2256 }
2257 }
2258
2259 elsif (/Time spent on the queue: (all messages|messages with at least one remote delivery)/) {
2260 #Time spent on the queue: all messages
2261 #-------------------------------------
2262 #
2263 #Under 1m 217 91.9% 91.9%
2264 # 5m 2 0.8% 92.8%
2265 # 3h 8 3.4% 96.2%
2266 # 6h 7 3.0% 99.2%
2267 # 12h 2 0.8% 100.0%
2268
2269 # Set a pointer to the queue bin so we can use the same code
2270 # block for both all messages and remote deliveries.
2271 my $bin_aref = ($1 eq 'all messages') ? \@queue_bin : \@remote_queue_bin;
2272 my $reached_table = 0;
2273 while (<$fh>) {
2274 $_ = html2txt($_); #Convert general HTML markup to text.
2275 $reached_table = 1 if (/^\s*Under/);
2276 next unless $reached_table;
2277 my $previous_seconds_on_queue = 0;
2278 if (/^\s*(Under|Over|)\s+(\d+[smhdw])\s+(\d+)/) {
2279 print STDERR "Parsing $_" if $debug;
2280 my($modifier,$formated_time,$count) = ($1,$2,$3);
2281 my $seconds = unformat_time($formated_time);
2282 my $time_on_queue = ($seconds + $previous_seconds_on_queue) / 2;
2283 $previous_seconds_on_queue = $seconds;
2284 $time_on_queue = $seconds * 2 if ($modifier eq 'Over');
2285 my($i);
2286 for ($i = 0; $i <= $#queue_times; $i++) {
2287 if ($time_on_queue < $queue_times[$i]) {
2288 $$bin_aref[$i] += $count;
2289 last;
2290 }
2291 }
2292 # There's only one counter for messages going over the queue
2293 # times so make sure we only count it once.
2294 $queue_more_than += $count if (($bin_aref == \@queue_bin) && ($i > $#queue_times));
2295 }
2296 else {
2297 last; #Finished the table ?
2298 }
2299 }
2300 }
2301
2302 elsif (/Relayed messages/) {
2303 #Relayed messages
2304 #----------------
2305 #
2306 # 1 addr.domain.com [1.2.3.4] a.user@domain.com
2307 # => addr2.domain2.com [5.6.7.8] a2.user2@domain2.com
2308 #
2309 #<tr><td align="right">1</td><td>addr.domain.com [1.2.3.4] a.user@domain.com </td><td>addr2.domain2.com [5.6.7.8] a2.user2@domain2.com</td>
2310
2311 my $reached_table = 0;
2312 my($count,$sender);
2313 while (<$fh>) {
2314 unless ($reached_table) {
2315 last if (/No relayed messages/);
2316 $reached_table = 1 if (/^\s*\d/ || />\d+</);
2317 next unless $reached_table;
2318 }
2319 if (/>(\d+)<.td><td>(.*?) ?<.td><td>(.*?)</) {
2320 update_relayed($1,$2,$3);
2321 }
2322 elsif (/^\s*(\d+)\s+(.*?)\s*$/) {
2323 ($count,$sender) = ($1,$2);
2324 }
2325 elsif (/=>\s+(.*?)\s*$/) {
2326 update_relayed($count,$sender,$1);
2327 }
2328 else {
2329 last; #Finished the table ?
2330 }
2331 }
2332 }
2333
2334 elsif (/Top (.*?) by (message count|volume)/) {
2335 #Top 50 sending hosts by message count
2336 #-------------------------------------
2337 #
2338 # 48 1468KB local
2339 my($category,$by_count_or_volume) = ($1,$2);
2340
2341 #As we show 2 views of each table (by count and by volume),
2342 #most (but not all) entries will appear in both tables.
2343 #Set up a hash to record which entries we have already seen
2344 #and one to record which ones we are seeing for the first time.
2345 if ($by_count_or_volume =~ /count/) {
2346 undef %league_table_value_entered;
2347 undef %league_table_value_was_zero;
2348 undef %table_order;
2349 }
2350
2351 #As this section processes multiple different table categories,
2352 #set up pointers to the hashes to be updated.
2353 my($count_href,$data_href,$data_gigs_href);
2354 if ($category =~ /local sender/) {
2355 $count_href = \%received_count_user;
2356 $data_href = \%received_data_user;
2357 $data_gigs_href = \%received_data_gigs_user;
2358 }
2359 elsif ($category =~ /sending (\S+?)s?\b/) {
2360 #Top 50 sending (host|domain|email|edomain)s
2361 #Top sending (host|domain|email|edomain)
2362 $count_href = \%{$received_count{"\u$1"}};
2363 $data_href = \%{$received_data{"\u$1"}};
2364 $data_gigs_href = \%{$received_data_gigs{"\u$1"}};
2365 }
2366 elsif ($category =~ /local destination/) {
2367 $count_href = \%delivered_count_user;
2368 $data_href = \%delivered_data_user;
2369 $data_gigs_href = \%delivered_data_gigs_user;
2370 }
2371 elsif ($category =~ /(\S+) destination/) {
2372 #Top 50 (host|domain|email|edomain) destinations
2373 #Top (host|domain|email|edomain) destination
2374 $count_href = \%{$delivered_count{"\u$1"}};
2375 $data_href = \%{$delivered_data{"\u$1"}};
2376 $data_gigs_href = \%{$delivered_data_gigs{"\u$1"}};
2377 }
2378
2379 my $reached_table = 0;
2380 while (<$fh>) {
2381 $_ = html2txt($_); #Convert general HTML markup to text.
2382 $reached_table = 1 if (/^\s*\d/);
2383 next unless $reached_table;
2384 if (/^\s*(\d+)\s+(\S+)\s*(.*?)\s*$/) {
2385 my($count,$rounded_volume,$entry) = ($1,$2,$3);
2386 #Note: $entry fields can be both null and can contain spaces.
2387
2388 #Add the entry into the %table_order hash if it has a rounded volume (KB/MB/GB).
2389 push(@{$table_order{$rounded_volume}{$by_count_or_volume}},$entry) if ($rounded_volume =~ /\D/);
2390
2391 unless ($league_table_value_entered{$entry}) {
2392 $league_table_value_entered{$entry} = 1;
2393 unless ($$count_href{$entry}) {
2394 $$count_href{$entry} = 0;
2395 $$data_href{$entry} = 0;
2396 $$data_gigs_href{$entry} = 0;
2397 $league_table_value_was_zero{$entry} = 1;
2398 }
2399
2400 $$count_href{$entry} += $count;
2401 #Add the rounded value to the data and data_gigs hashes.
2402 un_round($rounded_volume,\$$data_href{$entry},\$$data_gigs_href{$entry});
2403 print STDERR "$category by $by_count_or_volume: added $count,$rounded_volume to $entry\n" if $debug;
2404 }
2405 }
2406 else { #Finished the table ?
2407 if ($by_count_or_volume =~ /volume/) {
2408 #Add a few bytes to appropriate entries to preserve the order.
2409
2410 my($rounded_volume);
2411 foreach $rounded_volume (keys %table_order) {
2412 #For each rounded volume, we want to create a list which has things
2413 #ordered from the volume table at the front, and additional things
2414 #from the count table ordered at the back.
2415 @{$table_order{$rounded_volume}{volume}} = () unless defined $table_order{$rounded_volume}{volume};
2416 @{$table_order{$rounded_volume}{'message count'}} = () unless defined $table_order{$rounded_volume}{'message count'};
2417 my(@order,%mark);
2418 map {$mark{$_} = 1} @{$table_order{$rounded_volume}{volume}};
2419 @order = @{$table_order{$rounded_volume}{volume}};
2420 map {push(@order,$_)} grep(!$mark{$_},@{$table_order{$rounded_volume}{'message count'}});
2421
2422 my $bonus_bytes = $#order;
2423 $bonus_bytes = 511 if ($bonus_bytes > 511); #Don't go over the half-K boundary!
2424 while (@order and ($bonus_bytes > 0)) {
2425 my $entry = shift(@order);
2426 if ($league_table_value_was_zero{$entry}) {
2427 $$data_href{$entry} += $bonus_bytes;
2428 print STDERR "$category by $by_count_or_volume: added $bonus_bytes bonus bytes to $entry\n" if $debug;
2429 }
2430 $bonus_bytes--;
2431 }
2432 }
2433 }
2434
2435 last;
2436 }
2437 }
2438 }
2439 elsif (/List of errors/) {
2440 #List of errors
2441 #--------------
2442 #
2443 # 1 07904931641@one2one.net R=external T=smtp: SMTP error
2444 # from remote mailer after RCPT TO:<07904931641@one2one.net>:
2445 # host mail.one2one.net [193.133.192.24]: 550 User unknown
2446 #
2447 #<li>1 - ally.dufc@dunbar.org.uk R=external T=smtp: SMTP error from remote mailer after RCPT TO:<ally.dufc@dunbar.org.uk>: host mail.dunbar.org.uk [216.167.89.88]: 550 Unknown local part ally.dufc in <ally.dufc@dunbar.org.uk>
2448
2449
2450 my $reached_table = 0;
2451 my($count,$error,$blanks);
2452 while (<$fh>) {
2453 $reached_table = 1 if (/^( *|<li>)(\d+)/);
2454 next unless $reached_table;
2455
2456 s/^<li>(\d+) -/$1/; #Convert an HTML line to a text line.
2457 $_ = html2txt($_); #Convert general HTML markup to text.
2458
2459 if (/\t\s*(.*)/) {
2460 $error .= ' ' . $1; #Join a multiline error.
2461 }
2462 elsif (/^\s*(\d+)\s+(.*)/) {
2463 if ($error) {
2464 #Finished with a previous multiline error so save it.
2465 $errors_count{$error} = 0 unless $errors_count{$error};
2466 $errors_count{$error} += $count;
2467 }
2468 ($count,$error) = ($1,$2);
2469 }
2470 elsif (/Errors encountered/) {
2471 if ($error) {
2472 #Finished the section, so save our stored last error.
2473 $errors_count{$error} = 0 unless $errors_count{$error};
2474 $errors_count{$error} += $count;
2475 }
2476 last;
2477 }
2478 }
2479 }
2480
2481 }
2482 }
2483
2484
2485
2486 =head2 update_relayed();
2487
2488 update_relayed($count,$sender,$recipient);
2489
2490 Adds an entry into the %relayed hash. Currently only used when
2491 merging reports.
2492
2493 =cut
2494
2495 sub update_relayed {
2496 my($count,$sender,$recipient) = @_;
2497
2498 #When generating the key, put in the 'H=' and 'A=' which can be used
2499 #in searches.
2500 my $key = "H=$sender => H=$recipient";
2501 $key =~ s/ ([^=\s]+\@\S+|<>)/ A=$1/g;
2502 if (!defined $relay_pattern || $key !~ /$relay_pattern/o) {
2503 $relayed{$key} = 0 if !defined $relayed{$key};
2504 $relayed{$key} += $count;
2505 }
2506 else {
2507 $relayed_unshown += $count;
2508 }
2509 }
2510
2511
2512 =head2 add_to_totals();
2513
2514 add_to_totals(\%totals,\@keys,$values);
2515
2516 Given a line of space seperated values, add them into the provided hash using @keys
2517 as the hash keys.
2518
2519 If the value contains a '%', then the value is set rather than added. Otherwise, we
2520 convert the value to bytes and gigs. The gigs get added to I<Key>-gigs.
2521
2522 =cut
2523
2524 sub add_to_totals {
2525 my($totals_href,$keys_aref,$values) = @_;
2526 my(@values) = split(/\s+/,$values);
2527 my(@keys) = @$keys_aref; #Make a copy as we destroy the one we use.
2528 my($value);
2529 foreach $value (@values) {
2530 my $key = shift(@keys) or next;
2531 if ($value =~ /%/) {
2532 $$totals_href{$key} = $value;
2533 }
2534 else {
2535 $$totals_href{$key} = 0 unless ($$totals_href{$key});
2536 $$totals_href{"$key-gigs"} = 0 unless ($$totals_href{"$key-gigs"});
2537 un_round($value, \$$totals_href{$key}, \$$totals_href{"$key-gigs"});
2538 print STDERR "Added $value to $key - $$totals_href{$key} , " . $$totals_href{"$key-gigs"} . "GB.\n" if $debug;
2539 }
2540 }
2541 }
2542
2543 =head2 get_report_total();
2544
2545 $total = get_report_total(\%hash,$key);
2546
2547 If %hash contains values split into Units and Gigs, we calculate and return
2548
2549 $hash{$key} + 1024*1024*1024 * $hash{"${key}-gigs"}
2550
2551 =cut
2552
2553 sub get_report_total {
2554 no integer;
2555 my($hash_ref,$key) = @_;
2556 if ($$hash_ref{"${key}-gigs"}) {
2557 return $$hash_ref{$key} + $gig * $$hash_ref{"${key}-gigs"};
2558 }
2559 return $$hash_ref{$key} || 0;
2560 }
2561
2562 =head2 html2txt();
2563
2564 $text_line = html2txt($html_line);
2565
2566 Convert a line from html to text. Currently we just convert HTML tags to spaces
2567 and convert &gt;, &lt;, and &nbsp; tags back.
2568
2569 =cut
2570
2571 sub html2txt {
2572 ($_) = @_;
2573
2574 # Convert HTML tags to spacing. Note that the reports may contain <Userid> and
2575 # <Userid@Domain> words, so explicitly specify the HTML tags we will remove
2576 # (the ones used by this program). If someone is careless enough to have their
2577 # Userid the same as an HTML tag, there's not much we can do about it.
2578 s/<\/?(html|head|title|body|h\d|ul|li|a\s+|table|tr|td|th|pre|hr|p|br)\b.*?>/ /og;
2579
2580 s/\&lt\;/\</og; #Convert '&lt;' to '<'.
2581 s/\&gt\;/\>/og; #Convert '&gt;' to '>'.
2582 s/\&nbsp\;/ /og; #Convert '&nbsp;' to ' '.
2583 return($_);
2584 }
2585
2586 =head2 get_next_arg();
2587
2588 $arg = get_next_arg();
2589
2590 Because eximstats arguments are often passed as variables,
2591 we can't rely on shell parsing to deal with quotes. This
2592 subroutine returns $ARGV[1] and does a shift. If $ARGV[1]
2593 starts with a quote (' or "), and doesn't end in one, then
2594 we append the next argument to it and shift again. We repeat
2595 until we've got all of the argument.
2596
2597 This isn't perfect as all white space gets reduced to one space,
2598 but it's as good as we can get! If it's esential that spacing
2599 be preserved precisely, then you get that by not using shell
2600 variables.
2601
2602 =cut
2603
2604 sub get_next_arg {
2605 my $arg = '';
2606 my $matched_pattern = 0;
2607 while ($ARGV[1]) {
2608 $arg .= ' ' if $arg;
2609 $arg .= $ARGV[1]; shift(@ARGV);
2610 if ($arg !~ /^['"]/) {
2611 $matched_pattern = 1;
2612 last;
2613 }
2614 if ($arg =~ s/^(['"])(.*)\1$/$2/) {
2615 $matched_pattern = 1;
2616 last;
2617 }
2618 }
2619 die "Mismatched argument quotes - <$arg>.\n" unless $matched_pattern;
2620 return $arg;
2621 }
2622
2623
2624
2625 ##################################################
2626 # Main Program #
2627 ##################################################
2628
2629
2630 $last_timestamp = '';
2631 $last_date = '';
2632 $show_errors = 1;
2633 $show_relay = 1;
2634 $show_transport = 1;
2635 $topcount = 50;
2636 $local_league_table = 1;
2637 $include_remote_users = 0;
2638 $hist_opt = 1;
2639 $volume_rounding = 1;
2640 $localtime_offset = calculate_localtime_offset(); # PH/FANF
2641
2642 $charts = 0;
2643 $charts_option_specified = 0;
2644 $chartrel = ".";
2645 $chartdir = ".";
2646
2647 @queue_times = (60, 5*60, 15*60, 30*60, 60*60, 3*60*60, 6*60*60,
2648 12*60*60, 24*60*60);
2649
2650 $last_offset = '';
2651 $offset_seconds = 0;
2652
2653 # Decode options
2654
2655 while (@ARGV > 0 && substr($ARGV[0], 0, 1) eq '-')
2656 {
2657 if ($ARGV[0] =~ /^\-h(\d+)$/) { $hist_opt = $1 }
2658 elsif ($ARGV[0] =~ /^\-ne$/) { $show_errors = 0 }
2659 elsif ($ARGV[0] =~ /^\-nr(.?)(.*)\1$/)
2660 {
2661 if ($1 eq "") { $show_relay = 0 } else { $relay_pattern = $2 }
2662 }
2663 elsif ($ARGV[0] =~ /^\-q([,\d\+\-\*\/]+)$/)
2664 {
2665 @queue_times = split(/,/, $1);
2666 my($q);
2667 foreach $q (@queue_times) { $q = eval($q) + 0 }
2668 @queue_times = sort { $a <=> $b } @queue_times;
2669 @queue_times = () if ($#queue_times == 0 && $queue_times[0] == 0);
2670 }
2671 elsif ($ARGV[0] =~ /^-nt$/) { $show_transport = 0 }
2672 elsif ($ARGV[0] =~ /^\-nt(.?)(.*)\1$/)
2673 {
2674 if ($1 eq "") { $show_transport = 0 } else { $transport_pattern = $2 }
2675 }
2676 elsif ($ARGV[0] =~ /^-t(\d+)$/) { $topcount = $1 }
2677 elsif ($ARGV[0] =~ /^-tnl$/) { $local_league_table = 0 }
2678 elsif ($ARGV[0] =~ /^-html$/) { $html = 1 }
2679 elsif ($ARGV[0] =~ /^-merge$/) { $merge_reports = 1 }
2680 elsif ($ARGV[0] =~ /^-charts$/) {
2681 $charts = 1;
2682 warn "WARNING: CPAN Module GD::Graph::pie not installed. Obtain from www.cpan.org\n" unless $HAVE_GD_Graph_pie;
2683 warn "WARNING: CPAN Module GD::Graph::linespoints not installed. Obtain from www.cpan.org\n" unless $HAVE_GD_Graph_linespoints;
2684 }
2685 elsif ($ARGV[0] =~ /^-chartdir$/) { $chartdir = $ARGV[1]; shift; $charts_option_specified = 1; }
2686 elsif ($ARGV[0] =~ /^-chartrel$/) { $chartrel = $ARGV[1]; shift; $charts_option_specified = 1; }
2687 elsif ($ARGV[0] =~ /^-cache$/) { } #Not currently used.
2688 elsif ($ARGV[0] =~ /^-byhost$/) { $do_sender{Host} = 1 }
2689 elsif ($ARGV[0] =~ /^-bydomain$/) { $do_sender{Domain} = 1 }
2690 elsif ($ARGV[0] =~ /^-byemail$/) { $do_sender{Email} = 1 }
2691 elsif ($ARGV[0] =~ /^-byemaildomain$/) { $do_sender{Edomain} = 1 }
2692 elsif ($ARGV[0] =~ /^-byedomain$/) { $do_sender{Edomain} = 1 }
2693 elsif ($ARGV[0] =~ /^-nvr$/) { $volume_rounding = 0 }
2694 elsif ($ARGV[0] =~ /^-d$/) { $debug = 1 }
2695 elsif ($ARGV[0] =~ /^--?h(elp)?$/){ help() }
2696 elsif ($ARGV[0] =~ /^-t_remote_users$/) { $include_remote_users = 1 }
2697 elsif ($ARGV[0] =~ /^-pattern$/)
2698 {
2699 push(@user_descriptions,get_next_arg());
2700 push(@user_patterns,get_next_arg());
2701 }
2702 elsif ($ARGV[0] =~ /^-utc$/)
2703 {
2704 # We don't need this value if the log is in UTC.
2705 $localtime_offset = undef;
2706 }
2707 else
2708 {
2709 print STDERR "Eximstats: Unknown or malformed option $ARGV[0]\n";
2710 help();
2711 }
2712 shift;
2713 }
2714
2715 # Check that all the charts options are specified.
2716 warn "-charts option not specified. Use -help for help.\n" if ($charts_option_specified && ! $charts);
2717
2718 # Default to display tables by sending Host.
2719 $do_sender{Host} = 1 unless ($do_sender{Domain} || $do_sender{Email} || $do_sender{Edomain});
2720
2721
2722 for (my $i = 0; $i <= $#queue_times; $i++) {
2723 $queue_bin[$i] = 0;
2724 $remote_queue_bin[$i] = 0;
2725 }
2726
2727 # Compute the number of slots for the histogram
2728
2729 if ($hist_opt > 0)
2730 {
2731 if ($hist_opt > 60 || 60 % $hist_opt != 0)
2732 {
2733 print "Eximstats: -h must specify a factor of 60\n";
2734 exit 1;
2735 }
2736 $hist_interval = 60/$hist_opt; #Interval in minutes.
2737 $hist_number = (24*60)/$hist_interval; #Number of intervals per day.
2738 @received_interval_count = (0) x $hist_number;
2739 @delivered_interval_count = (0) x $hist_number;
2740 }
2741
2742 #$queue_unknown = 0;
2743
2744 $total_received_data = 0;
2745 $total_received_data_gigs = 0;
2746 $total_received_count = 0;
2747
2748 $total_delivered_data = 0;
2749 $total_delivered_data_gigs = 0;
2750 $total_delivered_count = 0;
2751
2752 $queue_more_than = 0;
2753 $delayed_count = 0;
2754 $relayed_unshown = 0;
2755 $begin = "9999-99-99 99:99:99";
2756 $end = "0000-00-00 00:00:00";
2757 my($section,$type);
2758 foreach $section ('Received','Delivered') {
2759 foreach $type ('Volume','Messages','Delayed','Failed','Hosts','Domains','Emails','Edomains') {
2760 $report_totals{$section}{$type} = 0;
2761 }
2762 }
2763
2764 # Generate our parser.
2765 my $parser = generate_parser();
2766
2767
2768
2769 if (@ARGV) {
2770 # Scan the input files and collect the data
2771 foreach my $file (@ARGV) {
2772 if ($file =~ /\.gz/) {
2773 unless (open(FILE,"gunzip -c $file |")) {
2774 print STDERR "Failed to gunzip -c $file: $!";
2775 next;
2776 }
2777 }
2778 elsif ($file =~ /\.Z/) {
2779 unless (open(FILE,"uncompress -c $file |")) {
2780 print STDERR "Failed to uncompress -c $file: $!";
2781 next;
2782 }
2783 }
2784 else {
2785 unless (open(FILE,$file)) {
2786 print STDERR "Failed to read $file: $!";
2787 next;
2788 }
2789 }
2790 #Now parse the filehandle, updating the global variables.
2791 parse($parser,\*FILE);
2792 close FILE;
2793 }
2794 }
2795 else {
2796 #No files provided. Parse STDIN, updating the global variables.
2797 parse($parser,\*STDIN);
2798 }
2799
2800
2801 if ($begin eq "9999-99-99 99:99:99") {
2802 print "**** No valid log lines read\n";
2803 exit 1;
2804 }
2805
2806 # Output our results.
2807 print_header();
2808 print_grandtotals();
2809
2810 # Print counts of user specified patterns if required.
2811 print_user_patterns() if @user_patterns;
2812
2813 # Print totals by transport if required.
2814 print_transport() if $show_transport;
2815
2816 # Print the deliveries per interval as a histogram, unless configured not to.
2817 # First find the maximum in one interval and scale accordingly.
2818 if ($hist_opt > 0) {
2819 print_histogram("Messages received", @received_interval_count);
2820 print_histogram("Deliveries", @delivered_interval_count);
2821 }
2822
2823 # Print times on queue if required.
2824 if ($#queue_times >= 0) {
2825 print_queue_times("all messages", \@queue_bin,$queue_more_than);
2826 print_queue_times("messages with at least one remote delivery",\@remote_queue_bin,$queue_more_than);
2827 }
2828
2829 # Print relay information if required.
2830 print_relay() if $show_relay;
2831
2832 # Print the league tables, if topcount isn't zero.
2833 if ($topcount > 0) {
2834 foreach ('Host','Domain','Email','Edomain') {
2835 next unless $do_sender{$_};
2836 print_league_table("sending \l$_", $received_count{$_}, $received_data{$_},$received_data_gigs{$_});
2837 }
2838
2839 print_league_table("local sender", \%received_count_user,
2840 \%received_data_user,\%received_data_gigs_user) if ($local_league_table || $include_remote_users);
2841 foreach ('Host','Domain','Email','Edomain') {
2842 next unless $do_sender{$_};
2843 print_league_table("\l$_ destination", $delivered_count{$_}, $delivered_data{$_},$delivered_data_gigs{$_});
2844 }
2845 print_league_table("local destination", \%delivered_count_user,
2846 \%delivered_data_user,\%delivered_data_gigs_user) if ($local_league_table || $include_remote_users);
2847 }
2848
2849 # Print the error statistics if required.
2850 print_errors() if $show_errors;
2851
2852 if ($html) {
2853 print "</body>\n</html>\n"
2854 }
2855
2856 # End of eximstats