src/src/eximstats.src

   1 #!PERL_COMMAND -w
   2 # $Cambridge: exim/src/src/eximstats.src,v 1.1 2004/10/07 10:39:01 ph10 Exp $
   3
   4 # Copyright (c) 2001 University of Cambridge.
   5 # See the file NOTICE for conditions of use and distribution.
   6
   7 # Perl script to generate statistics from one or more Exim log files.
   8
   9 # Usage: eximstats [<options>] <log file> <log file> ...
  10
  11 # 1996-05-21: Ignore lines not starting with valid date/time, just in case
  12 #               these get into a log file.
  13 # 1996-11-19: Add the -h option to control the size of the histogram,
  14 #               and optionally turn it off.
  15 #             Use some Perl 5 things; it should be everywhere by now.
  16 #             Add the Perl -w option and rewrite so no warnings are given.
  17 #             Add the -t option to control the length of the "top" listing.
  18 #             Add the -ne, -nt options to turn off errors and transport
  19 #               information.
  20 #             Add information about length of time on queue, and -q<list> to
  21 #               control the intervals and turn it off.
  22 #             Add count and percentage of delayed messages to the Received
  23 #               line.
  24 #             Show total number of errors.
  25 #             Add count and percentage of messages with errors to Received
  26 #               line.
  27 #             Add information about relaying and -nr to suppress it.
  28 # 1997-02-03  Merged in some of the things Nigel Metheringham had done:
  29 #               Re-worded headings
  30 #               Added received histogram as well as delivered
  31 #               Added local senders' league table
  32 #               Added local recipients' league table
  33 # 1997-03-10  Fixed typo "destinationss"
  34 #             Allow for intermediate address between final and original
  35 #               when testing for relaying
  36 #             Give better message when no input
  37 # 1997-04-24  Fixed bug in layout of error listing that was depending on
  38 #               text length (output line got repeated).
  39 # 1997-05-06  Bug in option decoding when only one option.
  40 #             Overflow bug when handling very large volumes.
  41 # 1997-10-28  Updated to handle revised log format that might show
  42 #               HELO name as well as host name before IP number
  43 # 1998-01-26  Bugs in the function for calculating the number of seconds
  44 #               since 1970 from a log date
  45 # 1998-02-02  Delivery to :blackhole: doesn't have a T= entry in the log
  46 #               line; cope with this, thereby avoiding undefined problems
  47 #             Very short log line gave substring error
  48 # 1998-02-03  A routed delivery to a local transport may not have <> in the
  49 #               log line; terminate the address at white space, not <
  50 # 1998-09-07  If first line of input was a => line, $thissize was undefined;
  51 #               ensure it is zero.
  52 # 1998-12-21  Adding of $thissize from => line should have been adding $size.
  53 #             Oops. Should have looked more closely when fixing the previous
  54 #               bug!
  55 # 1999-11-12  Increased the field widths for printed integers; numbers are
  56 #               bigger than originally envisaged.
  57 # 2001-03-21  Converted seconds() routine to use Time::Local, fixing a bug
  58 #               whereby seconds($timestamp) - id_seconds($id) gave an
  59 #               incorrect result.
  60 #             Added POD documentation.
  61 #             Moved usage instructions into help() subroutine.
  62 #             Added 'use strict' and declared all global variables.
  63 #             Added '-html' flag and resultant code.
  64 #             Added '-cache' flag and resultant code.
  65 #             Added add_volume() routine and converted all volume variables
  66 #               to use it, fixing the overflow problems for individual hosts
  67 #               on large sites.
  68 #             Converted all volume output to GB/MB/KB as appropriate.
  69 #             Don't store local user stats if -nfl is specified.
  70 #             Modifications done by: Steve Campbell (<steve@computurn.com>)
  71 # 2001-04-02  Added the -t_remote_users flag. Steve Campbell.
  72 # 2001-10-15  Added the -domain flag. Steve Campbell.
  73 # 2001-10-16  Accept files on STDIN or on the command line. Steve Campbell.
  74 # 2001-10-21  Removed -domain flag and added -bydomain, -byhost, and -byemail.
  75 #             We now generate our main parsing subroutine as an eval statement
  76 #             which improves performance dramatically when not all the results
  77 #             are required. We also cache the last timestamp to time convertion.
  78 #
  79 #             NOTE: 'Top 50 destinations by (message count|volume)' lines are
  80 #             now 'Top N (host|email|domain) destinations by (message count|volume)'
  81 #             where N is the topcount. Steve Campbell.
  82 #
  83 # 2001-10-30  V1.16 Joachim Wieland.
  84 #            Fixed minor bugs in add_volume() when taking over this version
  85 #               for use in Exim 4: -w gave uninitialized value warnings in
  86 #               two situations: for the first addition to a counter, and if
  87 #               there were never any gigabytes, thereby leaving the $gigs
  88 #               value unset.
  89 #             Initialized $last_timestamp to stop a -w uninitialized warning.
  90 #             Minor layout tweak for grand totals (nitpicking).
  91 #             Put the IP addresses for relaying stats in [] and separated by
  92 #               a space from the domain name.
  93 #             Removed the IPv4-specific address test when picking out addresses
  94 #               for relaying. Anything inside [] is OK.
  95 #
  96 # 2002-07-02  Philip Hazel
  97 #             Fixed "uninitialized variable" message that occurred for relay
  98 #               messages that arrived from H=[1.2.3.4] hosts (no name shown).
  99 #               This bug didn't affect the output.
 100 #
 101 # 2002-04-15  V1.17 Joachim Wieland.
 102 #             Added -charts, -chartdir. -chartrel options which use
 103 #             GD::Graph modules to create graphical charts of the statistics.
 104 #
 105 # 2002-04-15  V1.18 Steve Campbell.
 106 #             Added a check for $domain to to stop a -w uninitialized warning.
 107 #             Added -byemaildomain option.
 108 #             Only print HTML header links to included tables!
 109 #
 110 # 2002-08-02  V1.19 Steve Campbell.
 111 #             Changed the debug mode to dump the parser onto STDERR rather
 112 #             than STDOUT. Documented the -d flag into the help().
 113 #             Rejoined the divergent 2002-04-15 and 2002-07-02 releases.
 114 #
 115 # 2002-08-21  V1.20 Steve Campbell.
 116 #             Added the '-merge' option to allow merging of previous reports.
 117 #             Fixed a missing semicolon when doing -bydomain.
 118 #             Make volume charts plot the data gigs and bytes rather than just bytes.
 119 #             Only process log lines with $flag =~ /<=|=>|->|==|\*\*|Co/
 120 #             Converted Emaildomain to Edomain - the column header was too wide!
 121 #             This changes the text output slightly. You can revert to the old
 122 #             column widths by changing $COLUMN_WIDTHS to 7;
 123 #
 124 # 2002-09-04  V1.21 Andreas J Mueller
 125 #             Local deliveries domain now defaults to 'localdomain'.
 126 #             Don't match F=<From> when looking for the user.
 127 #
 128 # 2002-09-05  V1.22 Steve Campbell
 129 #             Fixed a perl 5.005 incompatibility problem ('our' variables).
 130 #
 131 # 2002-09-11  V1.23 Steve Campbell
 132 #             Stopped -charts option from throwing errors on null data.
 133 #             Don't print out 'Errors encountered' unless there are any.
 134
 135 # 2002-10-21  V1.23a Philip Hazel - patch from Tony Finch put in until
 136 #               Steve's eximstats catches up.
 137 #             Handle log files that include the timezone after the timestamp.
 138 #             Switch to assuming that log timestamps are in local time, with
 139 #               an option for UTC timestamps, as in Exim itself.
 140 #
 141 # 2003-02-05  V1.24 Steve Campbell
 142 #             Added in Sergey Sholokh's code to convert '<' and '>' characters
 143 #             in HTML output. Also added code to convert them back with -merge.
 144 #             Fixed timestamp offsets to convert to seconds rather than minutes.
 145 #             Updated -merge to work with output files using timezones.
 146 #             Added cacheing to speed up the calculation of timezone offsets.
 147 #
 148 # 2003-02-07  V1.25 Steve Campbell
 149 #             Optimised the usage of mktime() in the seconds subroutine.
 150 #             Removed the now redundant '-cache' option.
 151 #             html2txt() now explicitly matches HTML tags.
 152 #             Implemented a new sorting algorithm - the top_n_sort() routine.
 153 #             Added Danny Carroll's '-nvr' flag and code.
 154 #
 155 # 2003-03-13  V1.26 Steve Campbell
 156 #             Implemented HTML compliance changes recommended by Bernard Massot.
 157 #             Bug fix to allow top_n_sort() to handle null keys.
 158 #             Convert all domains and edomains to lowercase.
 159 #             Remove preceding dots from domains.
 160 #
 161 # 2003-03-13  V1.27 Steve Campbell
 162 #             Replaced border attributes with 'border=1', as recommended by
 163 #             Bernard Massot.
 164 #
 165 # 2003-06-03  V1.28 John Newman
 166 #             Added in the ability to skip over the parsing and evaulation of
 167 #             specific transports as passed to eximstats via the new "-nt/.../"
 168 #             command line argument.  This new switch allows the viewing of
 169 #             not more accurate statistics but more applicable statistics when
 170 #             special transports are in use (ie; SpamAssassin).  We need to be
 171 #             able to ignore transports such as this otherwise the resulting
 172 #             local deliveries are significantly skewed (doubled)...
 173 #
 174 # 2003-11-06  V1.29 Steve Campbell
 175 #             Added the '-pattern "Description" "/pattern/"' option.
 176 #
 177 # 2004-02-17  V1.30 Steve Campbell
 178 #             Added warnings if required GD::Graph modules are not available or
 179 #             insufficient -chart* options are specified.
 180 #
 181 # 2004-02-20  V1.31 Andrea Balzi
 182 #             Only show the Local Sender/Destination links if the tables exist.
 183 #
 184
 185
 186 =head1 NAME
 187
 188 eximstats - generates statistics from Exim mainlog files.
 189
 190 =head1 SYNOPSIS
 191
 192  eximstats [Options] mainlog1 mainlog2 ... > report.txt
 193  eximstats -merge [Options] report.1.txt report.2.txt ... > weekly_report.txt
 194
 195 Options:
 196
 197 =over 4
 198
 199 =item B<-h>I<number>
 200
 201 histogram divisions per hour. The default is 1, and
 202 0 suppresses histograms. Valid values are:
 203
 204 0, 1, 2, 3, 5, 10, 15, 20, 30 or 60.
 205
 206 =item B<-ne>
 207
 208 Don't display error information.
 209
 210 =item B<-nr>
 211
 212 Don't display relaying information.
 213
 214 =item B<-nr>I</pattern/>
 215
 216 Don't display relaying information that matches.
 217
 218 =item B<-nt>
 219
 220 Don't display transport information.
 221
 222 =item B<-nt>I</pattern/>
 223
 224 Don't display transport information that matches
 225
 226 =item B<-q>I<list>
 227
 228 List of times for queuing information single 0 item suppresses.
 229
 230 =item B<-t>I<number>
 231
 232 Display top <number> sources/destinations
 233 default is 50, 0 suppresses top listing.
 234
 235 =item B<-tnl>
 236
 237 Omit local sources/destinations in top listing.
 238
 239 =item B<-t_remote_users>
 240
 241 Include remote users in the top source/destination listings.
 242
 243 =item B<-byhost>
 244
 245 Show results by sending host. This may be combined with
 246 B<-bydomain> and/or B<-byemail> and/or B<-byedomain>. If none of these options
 247 are specified, then B<-byhost> is assumed as a default.
 248
 249 =item B<-bydomain>
 250
 251 Show results by sending domain.
 252 May be combined with B<-byhost> and/or B<-byemail> and/or B<-byedomain>.
 253
 254 =item B<-byemail>
 255
 256 Show results by sender's email address.
 257 May be combined with B<-byhost> and/or B<-bydomain> and/or B<-byedomain>.
 258
 259 =item B<-byemaildomain> or B<-byedomain>
 260
 261 Show results by sender's email domain.
 262 May be combined with B<-byhost> and/or B<-bydomain> and/or B<-byemail>.
 263
 264 =item B<-pattern> I<Description> I</Pattern/>
 265
 266 Look for the specified pattern and count the number of lines in which it appears.
 267 This option can be specified multiple times. Eg:
 268
 269  -pattern 'Refused connections' '/refused connection/'
 270
 271
 272 =item B<-merge>
 273
 274 This option allows eximstats to merge old eximstat reports together. Eg:
 275
 276  eximstats mainlog.sun > report.sun.txt
 277  eximstats mainlog.mon > report.mon.txt
 278  eximstats mainlog.tue > report.tue.txt
 279  eximstats mainlog.wed > report.web.txt
 280  eximstats mainlog.thu > report.thu.txt
 281  eximstats mainlog.fri > report.fri.txt
 282  eximstats mainlog.sat > report.sat.txt
 283  eximstats -merge       report.*.txt > weekly_report.txt
 284  eximstats -merge -html report.*.txt > weekly_report.html
 285
 286 =over 4
 287
 288 =item *
 289
 290 You can merge text or html reports and output the results as text or html.
 291
 292 =item *
 293
 294 You can use all the normal eximstat output options, but only data
 295 included in the original reports can be shown!
 296
 297 =item *
 298
 299 When merging reports, some loss of accuracy may occur in the top I<n> lists.
 300 This will be towards the ends of the lists.
 301
 302 =item *
 303
 304 The order of items in the top I<n> lists may vary when the data volumes
 305 round to the same value.
 306
 307 =back
 308
 309 =item B<-html>
 310
 311 Output the results in HTML.
 312
 313 =item B<-charts>
 314
 315 Create graphical charts to be displayed in HTML output.
 316
 317 This requires the following modules which can be obtained
 318 from http://www.cpan.org/modules/01modules.index.html
 319
 320 =over 4
 321
 322 =item GD
 323
 324 =item GDTextUtil
 325
 326 =item GDGraph
 327
 328 =back
 329
 330 To install these, download and unpack them, then use the normal perl installation procedure:
 331
 332  perl Makefile.PL
 333  make
 334  make test
 335  make install
 336
 337 =item B<-chartdir>I <dir>
 338
 339 Create the charts in the directory <dir>
 340
 341 =item B<-chartrel>I <dir>
 342
 343 Specify the relative directory for the "img src=" tags from where to include
 344 the charts
 345
 346 =item B<-d>
 347
 348 Debug flag. This outputs the eval()'d parser onto STDOUT which makes it
 349 easier to trap errors in the eval section. Remember to add 1 to the line numbers to allow for the
 350 title!
 351
 352 =back
 353
 354 =head1 DESCRIPTION
 355
 356 Eximstats parses exim mainlog files and outputs a statistical
 357 analysis of the messages processed. By default, a text
 358 analysis is generated, but you can request an html analysis
 359 by using the B<-html> flag. See the help (B<-help>) to learn
 360 about how to create charts from the tables.
 361
 362 =head1 AUTHOR
 363
 364 There is a web site at http://www.exim.org - this contains details of the
 365 mailing list exim-users@exim.org.
 366
 367 =head1 TO DO
 368
 369 This program does not perfectly handle messages whose received
 370 and delivered log lines are in different files, which can happen
 371 when you have multiple mail servers and a message cannot be
 372 immeadiately delivered. Fixing this could be tricky...
 373
 374 =head1 SUBROUTINES
 375
 376 The following section will only be of interest to the
 377 program maintainers:
 378
 379 =cut
 380
 381 use integer;
 382 use strict;
 383
 384 # use Time::Local;  # PH/FANF
 385 use POSIX;
 386
 387 use vars qw($HAVE_GD_Graph_pie $HAVE_GD_Graph_linespoints);
 388 eval { require GD::Graph::pie; };
 389 $HAVE_GD_Graph_pie = $@ ? 0 : 1;
 390 eval { require GD::Graph::linespoints; };
 391 $HAVE_GD_Graph_linespoints = $@ ? 0 : 1;
 392
 393
 394 ##################################################
 395 #             Static data                        #
 396 ##################################################
 397 # 'use vars' instead of 'our' as perl5.005 is still in use out there!
 398 use vars qw(@tab62 @days_per_month $gig);
 399 use vars qw($VERSION);
 400 use vars qw($COLUMN_WIDTHS);
 401
 402
 403 @tab62 =
 404   (0,1,2,3,4,5,6,7,8,9,0,0,0,0,0,0,     # 0-9
 405    0,10,11,12,13,14,15,16,17,18,19,20,  # A-K
 406   21,22,23,24,25,26,27,28,29,30,31,32,  # L-W
 407   33,34,35, 0, 0, 0, 0, 0,              # X-Z
 408    0,36,37,38,39,40,41,42,43,44,45,46,  # a-k
 409   47,48,49,50,51,52,53,54,55,56,57,58,  # l-w
 410   59,60,61);                            # x-z
 411
 412 @days_per_month = (0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334);
 413 $gig     = 1024 * 1024 * 1024;
 414 $VERSION = '1.31';
 415
 416 # How much space do we allow for the Hosts/Domains/Emails/Edomains column headers?
 417 $COLUMN_WIDTHS = 8;
 418
 419 # Declare global variables.
 420 use vars qw($total_received_data  $total_received_data_gigs  $total_received_count);
 421 use vars qw($total_delivered_data $total_delivered_data_gigs $total_delivered_count);
 422 use vars qw(%arrival_time %size %from_host %from_address);
 423 use vars qw(%timestamp2time);                   #Hash of timestamp => time.
 424 use vars qw($last_timestamp $last_time);        #The last time convertion done.
 425 use vars qw($last_date $date_seconds);          #The last date convertion done.
 426 use vars qw($last_offset $offset_seconds);      #The last time offset convertion done.
 427 use vars qw($localtime_offset);
 428 use vars qw($i);                                #General loop counter.
 429 use vars qw($debug);                            #Debug mode?
 430 use vars qw($ntopchart);                        #How many entries should make it into the chart?
 431 use vars qw($gddirectory);                      #Where to put files from GD::Graph
 432
 433 $ntopchart = 5;
 434
 435 # The following are parameters whose values are
 436 # set by command line switches:
 437 use vars qw($show_errors $show_relay $show_transport $transport_pattern);
 438 use vars qw($topcount $local_league_table $include_remote_users);
 439 use vars qw($hist_opt $hist_interval $hist_number $volume_rounding);
 440 use vars qw($relay_pattern @queue_times $html @user_patterns @user_descriptions);
 441
 442 use vars qw(%do_sender);                #Do sender by Host, Domain, Email, and/or Edomain tables.
 443 use vars qw($charts $chartrel $chartdir $charts_option_specified);
 444 use vars qw($merge_reports);                    #Merge old reports ?
 445
 446 # The following are modified in the parse() routine, and
 447 # referred to in the print_*() routines.
 448 use vars qw($queue_more_than $delayed_count $relayed_unshown $begin $end);
 449 use vars qw(%received_count       %received_data       %received_data_gigs);
 450 use vars qw(%delivered_count      %delivered_data      %delivered_data_gigs);
 451 use vars qw(%received_count_user  %received_data_user  %received_data_gigs_user);
 452 use vars qw(%delivered_count_user %delivered_data_user %delivered_data_gigs_user);
 453 use vars qw(%transported_count    %transported_data    %transported_data_gigs);
 454 use vars qw(%remote_delivered %relayed %delayed %had_error %errors_count);
 455 use vars qw(@queue_bin @remote_queue_bin @received_interval_count @delivered_interval_count);
 456 use vars qw(@user_pattern_totals);
 457
 458 use vars qw(%report_totals);
 459
 460
 461
 462
 463 ##################################################
 464 #                   Subroutines                  #
 465 ##################################################
 466
 467
 468 =head2 volume_rounded();
 469
 470  $rounded_volume = volume_rounded($bytes,$gigabytes);
 471
 472 Given a data size in bytes, round it to KB, MB, or GB
 473 as appropriate.
 474
 475 Eg 12000 => 12KB, 15000000 => 14GB, etc.
 476
 477 Note: I've experimented with Math::BigInt and it results in a 33%
 478 performance degredation as opposed to storing numbers split into
 479 bytes and gigabytes.
 480
 481 =cut
 482
 483 sub volume_rounded {
 484   my($x,$g) = @_;
 485   $x = 0 unless $x;
 486   $g = 0 unless $g;
 487   my($rounded);
 488
 489   while ($x > $gig) {
 490     $g++;
 491     $x -= $gig;
 492   }
 493
 494   if ($volume_rounding) {
 495     # Values < 1 GB
 496     if ($g <= 0) {
 497       if ($x < 10000) {
 498         $rounded = sprintf("%6d", $x);
 499       }
 500       elsif ($x < 10000000) {
 501         $rounded = sprintf("%4dKB", ($x + 512)/1024);
 502       }
 503       else {
 504         $rounded = sprintf("%4dMB", ($x + 512*1024)/(1024*1024));
 505       }
 506     }
 507     # Values between 1GB and 10GB are printed in MB
 508     elsif ($g < 10) {
 509       $rounded = sprintf("%4dMB", ($g * 1024) + ($x + 512*1024)/(1024*1024));
 510     }
 511     else {
 512       # Handle values over 10GB
 513       $rounded = sprintf("%4dGB", $g + ($x + $gig/2)/$gig);
 514     }
 515   }
 516   else {
 517     # We don't want any rounding to be done.
 518     $rounded = sprintf("%4d", ($g * $gig) + $x);
 519   }
 520
 521   return $rounded;
 522 }
 523
 524
 525 =head2 un_round();
 526
 527  un_round($rounded_volume,\$bytes,\$gigabytes);
 528
 529 Given a volume in KB, MB or GB, as generated by volume_rounded(),
 530 do the reverse transformation and convert it back into Bytes and Gigabytes.
 531 These are added to the $bytes and $gigabytes parameters.
 532
 533 Given a data size in bytes, round it to KB, MB, or GB
 534 as appropriate.
 535
 536 EG: 500 => (500,0), 14GB => (0,14), etc.
 537
 538 =cut
 539
 540 sub un_round {
 541   my($rounded,$bytes_sref,$gigabytes_sref) = @_;
 542
 543   if ($rounded =~ /(\d+)GB/) {
 544     $$gigabytes_sref += $1;
 545   }
 546   elsif ($rounded =~ /(\d+)MB/) {
 547     $$gigabytes_sref +=   $1 / 1024;
 548     $$bytes_sref     += (($1 % 1024 ) * 1024 * 1024);
 549   }
 550   elsif ($rounded =~ /(\d+)KB/) {
 551     $$gigabytes_sref +=  $1 / (1024 * 1024);
 552     $$bytes_sref     += ($1 % (1024 * 1024) * 1024);
 553   }
 554   elsif ($rounded =~ /(\d+)/) {
 555     $$gigabytes_sref += $1 / $gig;
 556     $$bytes_sref     += $1 % $gig;
 557   }
 558
 559   #Now reduce the bytes down to less than 1GB.
 560   add_volume($bytes_sref,$gigabytes_sref,0) if ($$bytes_sref > $gig);
 561 }
 562
 563
 564 =head2 add_volume();
 565
 566   add_volume(\$bytes,\$gigs,$size);
 567
 568 Add $size to $bytes/$gigs where this is a number split into
 569 bytes ($bytes) and gigabytes ($gigs). This is significantly
 570 faster than using Math::BigInt.
 571
 572 =cut
 573
 574 sub add_volume {
 575 my($bytes_ref,$gigs_ref,$size) = @_;
 576 $$bytes_ref = 0 if ! defined $$bytes_ref;
 577 $$gigs_ref = 0 if ! defined $$gigs_ref;
 578 $$bytes_ref += $size;
 579 while ($$bytes_ref > $gig)
 580   {
 581   $$gigs_ref++;
 582   $$bytes_ref -= $gig;
 583   }
 584 }
 585
 586
 587 =head2 format_time();
 588
 589  $formatted_time = format_time($seconds);
 590
 591 Given a time in seconds, break it down into
 592 weeks, days, hours, minutes, and seconds.
 593
 594 Eg 12005 => 3h20m5s
 595
 596 =cut
 597
 598 sub format_time {
 599 my($t) = pop @_;
 600 my($s) = $t % 60;
 601 $t /= 60;
 602 my($m) = $t % 60;
 603 $t /= 60;
 604 my($h) = $t % 24;
 605 $t /= 24;
 606 my($d) = $t % 7;
 607 my($w) = $t/7;
 608 my($p) = "";
 609 $p .= "$w"."w" if $w > 0;
 610 $p .= "$d"."d" if $d > 0;
 611 $p .= "$h"."h" if $h > 0;
 612 $p .= "$m"."m" if $m > 0;
 613 $p .= "$s"."s" if $s > 0 || $p eq "";
 614 $p;
 615 }
 616
 617
 618 =head2 unformat_time();
 619
 620  $seconds = unformat_time($formatted_time);
 621
 622 Given a time in weeks, days, hours, minutes, or seconds, convert it to seconds.
 623
 624 Eg 3h20m5s => 12005
 625
 626 =cut
 627
 628 sub unformat_time {
 629   my($formated_time) = pop @_;
 630   my $time = 0;
 631
 632   while ($formated_time =~ s/^(\d+)([wdhms]?)//) {
 633     $time +=  $1 if ($2 eq '' || $2 eq 's');
 634     $time +=  $1 * 60 if ($2 eq 'm');
 635     $time +=  $1 * 60 * 60 if ($2 eq 'h');
 636     $time +=  $1 * 60 * 60 * 24 if ($2 eq 'd');
 637     $time +=  $1 * 60 * 60 * 24  * 7 if ($2 eq 'w');
 638   }
 639   $time;
 640 }
 641
 642
 643 =head2 seconds();
 644
 645  $time = seconds($timestamp);
 646
 647 Given a time-of-day timestamp, convert it into a time() value using
 648 POSIX::mktime.  We expect the timestamp to be of the form
 649 "$year-$mon-$day $hour:$min:$sec", with month going from 1 to 12,
 650 and the year to be absolute (we do the necessary conversions). The
 651 timestamp may be followed with an offset from UTC like "+$hh$mm"; if the
 652 offset is not present, and we have not been told that the log is in UTC
 653 (with the -utc option), then we adjust the time by the current local
 654 time offset so that it can be compared with the time recorded in message
 655 IDs, which is UTC.
 656
 657 To improve performance, we only use mktime on the date ($year-$mon-$day),
 658 and only calculate it if the date is different to the previous time we
 659 came here. We then add on seconds for the '$hour:$min:$sec'.
 660
 661 We also store the results of the last conversion done, and only
 662 recalculate if the date is different.
 663
 664 We used to have the '-cache' flag which would store the results of the
 665 mktime() call. However, the current way of just using mktime() on the
 666 date obsoletes this.
 667
 668 =cut
 669
 670 sub seconds {
 671   my($timestamp) = @_;
 672
 673   # Is the timestamp the same as the last one?
 674   return $last_time if ($last_timestamp eq $timestamp);
 675
 676   return 0 unless ($timestamp =~ /^((\d{4})\-(\d\d)-(\d\d))\s(\d\d):(\d\d):(\d\d)( ([+-])(\d\d)(\d\d))?/o);
 677
 678   unless ($last_date eq $1) {
 679     $last_date = $1;
 680     my(@timestamp) = (0,0,0,$4,$3,$2);
 681     $timestamp[5] -= 1900;
 682     $timestamp[4]--;
 683     $date_seconds = mktime(@timestamp);
 684   }
 685   my $time = $date_seconds + ($5 * 3600) + ($6 * 60) + $7;
 686
 687   # SC. Use cacheing. Also note we want seconds not minutes.
 688   #my($this_offset) = ($10 * 60 + $11) * ($9 . "1") if defined $8;
 689   if (defined $8 && ($8 ne $last_offset)) {
 690     $last_offset = $8;
 691     $offset_seconds = ($10 * 60 + $11) * 60;
 692     $offset_seconds = -$offset_seconds if ($9 eq '-');
 693   }
 694
 695
 696   if (defined $7) {
 697     #$time -= $this_offset;
 698     $time -= $offset_seconds;
 699   } elsif (defined $localtime_offset) {
 700     $time -= $localtime_offset;
 701   }
 702
 703   # Store the last timestamp received.
 704   $last_timestamp = $timestamp;
 705   $last_time      = $time;
 706
 707   $time;
 708 }
 709
 710
 711 =head2 id_seconds();
 712
 713  $time = id_seconds($message_id);
 714
 715 Given a message ID, convert it into a time() value.
 716
 717 =cut
 718
 719 sub id_seconds {
 720 my($sub_id) = substr((pop @_), 0, 6);
 721 my($s) = 0;
 722 my(@c) = split(//, $sub_id);
 723 while($#c >= 0) { $s = $s * 62 + $tab62[ord(shift @c) - ord('0')] }
 724 $s;
 725 }
 726
 727
 728
 729 =head2 calculate_localtime_offset();
 730
 731  $localtime_offset = calculate_localtime_offset();
 732
 733 Calculate the the localtime offset from gmtime in seconds.
 734
 735  $localtime = time() + $localtime_offset.
 736
 737 These are the same semantics as ISO 8601 and RFC 2822 timezone offsets.
 738 (West is negative, East is positive.)
 739
 740 =cut
 741
 742 # $localtime = gmtime() + $localtime_offset.  OLD COMMENT
 743 # This subroutine commented out as it's not currently in use.
 744
 745 #sub calculate_localtime_offset {
 746 #  # Pick an arbitrary date, convert it to localtime & gmtime, and return the difference.
 747 #  my (@sample_date) = (0,0,0,5,5,100);
 748 #  my $localtime = timelocal(@sample_date);
 749 #  my $gmtime    = timegm(@sample_date);
 750 #  my $offset = $localtime - $gmtime;
 751 #  return $offset;
 752 #}
 753
 754 sub calculate_localtime_offset {
 755   # Assume that the offset at the moment is valid across the whole
 756   # period covered by the logs that we're analysing. This may not
 757   # be true around the time the clocks change in spring or autumn.
 758   my $utc = time;
 759   # mktime works on local time and gmtime works in UTC
 760   my $local = mktime(gmtime($utc));
 761   return $local - $utc;
 762 }
 763
 764
 765 =head2 print_queue_times();
 766
 767  $time = print_queue_times($message_type,\@queue_times,$queue_more_than);
 768
 769 Given the type of messages being output, the array of message queue times,
 770 and the number of messages which exceeded the queue times, print out
 771 a table.
 772
 773 =cut
 774
 775 sub print_queue_times {
 776 no integer;
 777 my($string,$array,$queue_more_than) = @_;
 778 my(@chartdatanames);
 779 my(@chartdatavals);
 780
 781 my $printed_one = 0;
 782 my $cumulative_percent = 0;
 783 #$queue_unknown += keys %arrival_time;
 784
 785 my $queue_total = $queue_more_than;
 786 for ($i = 0; $i <= $#queue_times; $i++) { $queue_total += $$array[$i] }
 787
 788 my $temp = "Time spent on the queue: $string";
 789
 790 my($format);
 791 if ($html) {
 792   print "<hr><a name=\"$string time\"></a><h2>$temp</h2>\n";
 793   print "<table border=0 width=\"100%\">\n";
 794   print "<tr><td>\n";
 795   print "<table border=1>\n";
 796   print "<tr><th>Time</th><th>Messages</th><th>Percentage</th><th>Cumulative Percentage</th>\n";
 797   $format = "<tr><td align=\"right\">%s %s</td><td align=\"right\">%d</td><td align=\"right\">%5.1f%%</td><td align=\"right\">%5.1f%%</td>\n";
 798 }
 799 else
 800 {
 801   printf("%s\n%s\n\n", $temp, "-" x length($temp));
 802   $format = "%5s %4s   %6d %5.1f%%  %5.1f%%\n";
 803 }
 804
 805 for ($i = 0; $i <= $#queue_times; $i++) {
 806   if ($$array[$i] > 0)
 807     {
 808     my $percent = ($$array[$i] * 100)/$queue_total;
 809     $cumulative_percent += $percent;
 810     printf($format,
 811       $printed_one? "     " : "Under",
 812       format_time($queue_times[$i]),
 813       $$array[$i], $percent, $cumulative_percent);
 814     if (!defined($queue_times[$i])) {
 815       print "Not defined";
 816     }
 817     push(@chartdatanames,
 818       ($printed_one? "" : "Under") . format_time($queue_times[$i]));
 819     push(@chartdatavals, $$array[$i]);
 820     $printed_one = 1;
 821   }
 822 }
 823
 824 if ($queue_more_than > 0) {
 825   my $percent = ($queue_more_than * 100)/$queue_total;
 826   $cumulative_percent += $percent;
 827   printf($format,
 828     "Over ",
 829     format_time($queue_times[$#queue_times]),
 830     $queue_more_than, $percent, $cumulative_percent);
 831 }
 832 push(@chartdatanames, "Over " . format_time($queue_times[$#queue_times]));
 833 push(@chartdatavals, $queue_more_than);
 834
 835 #printf("Unknown   %6d\n", $queue_unknown) if $queue_unknown > 0;
 836 if ($html) {
 837   print "</table>\n";
 838   print "</td><td>\n";
 839
 840   if ($HAVE_GD_Graph_pie && $charts) {
 841     my @data = (
 842        \@chartdatanames,
 843        \@chartdatavals
 844     );
 845     my $graph = GD::Graph::pie->new(200, 200);
 846     my $pngname;
 847     my $title;
 848     if ($string =~ /all/) { $pngname = "queue_all.png"; $title = "Queue (all)"; }
 849     if ($string =~ /remote/) { $pngname = "queue_rem.png"; $title = "Queue (remote)"; }
 850     $graph->set(
 851         title             => $title,
 852     );
 853     my $gd = $graph->plot(\@data) or warn($graph->error);
 854     if ($gd) {
 855       open(IMG, ">$chartdir/$pngname") or die $!;
 856       binmode IMG;
 857       print IMG $gd->png;
 858       close IMG;
 859       print "<img src=\"$chartrel/$pngname\">";
 860     }
 861   }
 862   print "</td></tr></table>\n";
 863 }
 864 print "\n";
 865 }
 866
 867
 868
 869 =head2 print_histogram();
 870
 871  print_histogram('Deliverieds|Messages received',@interval_count);
 872
 873 Print a histogram of the messages delivered/received per time slot
 874 (hour by default).
 875
 876 =cut
 877
 878 sub print_histogram {
 879 my($text) = shift;
 880 my(@interval_count) = @_;
 881 my(@chartdatanames);
 882 my(@chartdatavals);
 883 my($maxd) = 0;
 884 for ($i = 0; $i < $hist_number; $i++)
 885   { $maxd = $interval_count[$i] if $interval_count[$i] > $maxd; }
 886
 887 my $scale = int(($maxd + 25)/50);
 888 $scale = 1 if $scale == 0;
 889
 890 my($type);
 891 if ($text eq "Deliveries")
 892   {
 893   $type = ($scale == 1)? "delivery" : "deliveries";
 894   }
 895 else
 896   {
 897   $type = ($scale == 1)? "message" : "messages";
 898   }
 899
 900 my($title) = sprintf("$text per %s (each dot is $scale $type)",
 901   ($hist_interval == 60)? "hour" :
 902   ($hist_interval == 1)?  "minute" : "$hist_interval minutes");
 903
 904 if ($html) {
 905   print "<hr><a name=\"$text\"></a><h2>$title</h2>\n";
 906   print "<table border=0 width=\"100%\">\n";
 907   print "<tr><td><pre>\n";
 908 }
 909 else {
 910   printf("%s\n%s\n\n", $title, "-" x length($title));
 911 }
 912
 913 my $hour = 0;
 914 my $minutes = 0;
 915 for ($i = 0; $i < $hist_number; $i++)
 916   {
 917   my $c = $interval_count[$i];
 918
 919   # If the interval is an hour (the maximum) print the starting and
 920   # ending hours as a label. Otherwise print the starting hour and
 921   # minutes, which take up the same space.
 922
 923   my $temp;
 924   if ($hist_opt == 1)
 925     {
 926     $temp = sprintf("%02d-%02d", $hour, $hour + 1);
 927     print $temp;
 928     push(@chartdatanames, $temp);
 929     $hour++;
 930     }
 931   else
 932     {
 933     if ($minutes == 0)
 934       { $temp = sprintf("%02d:%02d", $hour, $minutes) }
 935     else
 936       { $temp = sprintf("  :%02d", $minutes) }
 937     print $temp;
 938     push(@chartdatanames, $temp);
 939     $minutes += $hist_interval;
 940     if ($minutes >= 60)
 941       {
 942       $minutes = 0;
 943       $hour++;
 944       }
 945     }
 946   push(@chartdatavals, $c);
 947   printf(" %6d %s\n", $c, "." x ($c/$scale));
 948   }
 949 print "\n";
 950 if ($html)
 951   {
 952   print "</pre>\n";
 953   print "</td><td>\n";
 954   if ($HAVE_GD_Graph_linespoints && $charts) {
 955     # calculate the graph
 956     my @data = (
 957        \@chartdatanames,
 958        \@chartdatavals
 959     );
 960     my $graph = GD::Graph::linespoints->new(300, 300);
 961     $graph->set(
 962         x_label           => 'Time',
 963         y_label           => 'Amount',
 964         title             => $text,
 965         x_labels_vertical => 1
 966     );
 967     my($pngname);
 968     if ($text =~ /Deliveries/) { $pngname = "histogram_del.png"; }
 969     if ($text =~ /Messages/)   { $pngname = "histogram_mes.png"; }
 970     my $gd = $graph->plot(\@data) or warn($graph->error);
 971     if ($gd) {
 972       open(IMG, ">$chartdir/$pngname") or die $!;
 973       binmode IMG;
 974       print IMG $gd->png;
 975       close IMG;
 976       print "<img src=\"$chartrel/$pngname\">";
 977     }
 978   }
 979   print "</td></tr></table>\n";
 980 }
 981 }
 982
 983
 984
 985 =head2 print_league_table();
 986
 987  print_league_table($league_table_type,\%message_count,\%message_data,\%message_data_gigs);
 988
 989 Given hashes of message count and message data, which are keyed by
 990 the table type (eg by the sending host), print a league table
 991 showing the top $topcount (defaults to 50).
 992
 993 =cut
 994
 995 sub print_league_table {
 996 my($text,$m_count,$m_data,$m_data_gigs) = @_;
 997 my($name) = ($topcount == 1)? "$text" : "$topcount ${text}s";
 998 my($temp) = "Top $name by message count";
 999 my(@chartdatanames) = ();
1000 my(@chartdatavals) = ();
1001 my $chartotherval = 0;
1002
1003 my($format);
1004 if ($html) {
1005   print "<hr><a name=\"$text count\"></a><h2>$temp</h2>\n";
1006   print "<table border=0 width=\"100%\">\n";
1007   print "<tr><td>\n";
1008   print "<table border=1>\n";
1009   print "<tr><th>Messages</th><th>Bytes</th><th>\u$text</th>\n";
1010
1011   # Align non-local addresses to the right (so all the .com's line up).
1012   # Local addresses are aligned on the left as they are userids.
1013   my $align = ($text !~ /local/i) ? 'right' : 'left';
1014   $format = "<tr><td align=\"right\">%d</td><td align=\"right\">%s</td><td align=\"$align\" nowrap>%s</td>\n";
1015 }
1016 else {
1017   printf("%s\n%s\n\n", $temp, "-" x length($temp));
1018   $format = "%7d %10s   %s\n";
1019 }
1020
1021 my($key,$htmlkey);
1022 foreach $key (top_n_sort($topcount,$m_count,$m_data_gigs,$m_data)) {
1023   if ($html) {
1024     $htmlkey = $key;
1025     $htmlkey =~ s/>/\&gt\;/g;
1026     $htmlkey =~ s/</\&lt\;/g;
1027     printf($format, $$m_count{$key}, volume_rounded($$m_data{$key},$$m_data_gigs{$key}), $htmlkey);
1028   }
1029   else {
1030     printf($format, $$m_count{$key}, volume_rounded($$m_data{$key},$$m_data_gigs{$key}), $key);
1031   }
1032   if (scalar @chartdatanames < $ntopchart)
1033     {
1034     push(@chartdatanames, $key);
1035     push(@chartdatavals, $$m_count{$key});
1036     }
1037   else
1038     {
1039     $chartotherval += $$m_count{$key};
1040     }
1041   }
1042 push(@chartdatanames, "Other");
1043 push(@chartdatavals, $chartotherval);
1044
1045 if ($html)
1046   {
1047   print "</table>\n";
1048   print "</td><td>\n";
1049   if ($HAVE_GD_Graph_pie && $charts)
1050     {
1051     # calculate the graph
1052     my @data = (
1053        \@chartdatanames,
1054        \@chartdatavals
1055     );
1056     my $graph = GD::Graph::pie->new(300, 300);
1057     $graph->set(
1058         x_label           => 'Name',
1059         y_label           => 'Amount',
1060         title             => 'By count',
1061     );
1062     my $gd = $graph->plot(\@data) or warn($graph->error);
1063     if ($gd) {
1064       my $temp = $text;
1065       $temp =~ s/ /_/g;
1066       open(IMG, ">$chartdir/${temp}_count.png") or die $!;
1067       binmode IMG;
1068       print IMG $gd->png;
1069       close IMG;
1070       print "<img src=\"$chartrel/${temp}_count.png\">";
1071     }
1072   }
1073   print "</td><td>\n";
1074   print "</td></tr></table>\n";
1075 }
1076 print "\n";
1077
1078 $temp = "Top $name by volume";
1079 if ($html) {
1080   print "<hr><a name=\"$text volume\"></a><h2>$temp</h2>\n";
1081   print "<table border=0 width=\"100%\">\n";
1082   print "<tr><td>\n";
1083   print "<table border=1>\n";
1084   print "<tr><th>Messages</th><th>Bytes</th><th>\u$text</th>\n";
1085 }
1086 else {
1087   printf("%s\n%s\n\n", $temp, "-" x length($temp));
1088 }
1089
1090 @chartdatanames = ();
1091 @chartdatavals = ();
1092 $chartotherval = 0;
1093 foreach $key (top_n_sort($topcount,$m_data_gigs,$m_data,$m_count)) {
1094   if ($html) {
1095     $htmlkey = $key;
1096     $htmlkey =~ s/>/\&gt\;/g;
1097     $htmlkey =~ s/</\&lt\;/g;
1098     printf($format, $$m_count{$key}, volume_rounded($$m_data{$key},$$m_data_gigs{$key}), $htmlkey);
1099   }
1100   else {
1101     printf($format, $$m_count{$key}, volume_rounded($$m_data{$key},$$m_data_gigs{$key}), $key);
1102   }
1103
1104   if (scalar @chartdatanames < $ntopchart)
1105     {
1106     push(@chartdatanames, $key);
1107     push(@chartdatavals, $$m_count{$key});
1108     }
1109   else
1110     {
1111     $chartotherval += $$m_count{$key};
1112     }
1113   }
1114 push(@chartdatanames, "Other");
1115 push(@chartdatavals, $chartotherval);
1116
1117 if ($html) {
1118   print "</table>\n";
1119   print "</td><td>\n";
1120   if ($HAVE_GD_Graph_pie && $charts) {
1121     # calculate the graph
1122     my @data = (
1123        \@chartdatanames,
1124        \@chartdatavals
1125     );
1126     my $graph = GD::Graph::pie->new(300, 300);
1127     $graph->set(
1128         x_label           => 'Name',
1129         y_label           => 'Volume',
1130         title             => 'By Volume',
1131     );
1132     my $gd = $graph->plot(\@data) or warn($graph->error);
1133     if ($gd) {
1134       my $temp = $text;
1135       $temp =~ s/ /_/g;
1136       open(IMG, ">$chartdir/${temp}_volume.png") or die $!;
1137       binmode IMG;
1138       print IMG $gd->png;
1139       close IMG;
1140       print "<img src=\"$chartrel/${temp}_volume.png\">";
1141     }
1142   }
1143   print "</td><td>\n";
1144   print "</td></tr></table>\n";
1145 }
1146
1147 print "\n";
1148 }
1149
1150
1151 =head2 top_n_sort();
1152
1153   @sorted_keys = top_n_sort($n,$href1,$href2,$href3);
1154
1155 Given a hash which has numerical values, return the sorted $n keys which
1156 point to the top values. The second and third hashes are used as
1157 tiebreakers. They all must have the same keys.
1158
1159 The idea behind this routine is that when you only want to see the
1160 top n members of a set, rather than sorting the entire set and then
1161 plucking off the top n, sort through the stack as you go, discarding
1162 any member which is lower than your current n'th highest member.
1163
1164 This proves to be an order of magnitude faster for large hashes.
1165 On 200,000 lines of mainlog it benchmarked 9 times faster.
1166 On 700,000 lines of mainlog it benchmarked 13.8 times faster.
1167
1168 We assume the values are > 0.
1169
1170 =cut
1171
1172 sub top_n_sort {
1173   my($n,$href1,$href2,$href3) = @_;
1174
1175   # PH's original sort was:
1176   #
1177   # foreach $key (sort
1178   #               {
1179   #               $$m_count{$b}     <=> $$m_count{$a} ||
1180   #               $$m_data_gigs{$b} <=> $$m_data_gigs{$a}  ||
1181   #               $$m_data{$b}      <=> $$m_data{$a}  ||
1182   #               $a cmp $b
1183   #               }
1184   #             keys %{$m_count})
1185   #
1186
1187   #We use a key of '_' to represent non-existant values, as null keys are valid.
1188   #'_' is not a valid domain, edomain, host, or email.
1189   my(@top_n_keys) = ('_') x $n;
1190   my($minimum_value1,$minimum_value2,$minimum_value3) = (0,0,0);
1191   my $top_n_key = '';
1192   my $n_minus_1 = $n - 1;
1193   my $n_minus_2 = $n - 2;
1194
1195   # Pick out the top $n keys.
1196   my($key,$value1,$value2,$value3,$i,$comparison,$insert_position);
1197   while (($key,$value1) = each %$href1) {
1198
1199     #print STDERR "key $key ($value1,",$href2->{$key},",",$href3->{$key},") <=> ($minimum_value1,$minimum_value2,$minimum_value3)\n";
1200
1201     # Check to see that the new value is bigger than the lowest of the
1202     # top n keys that we're keeping.
1203     $comparison = $value1        <=> $minimum_value1 ||
1204                   $href2->{$key} <=> $minimum_value2 ||
1205                   $href3->{$key} <=> $minimum_value3 ||
1206                   $top_n_key cmp $key;
1207     next unless ($comparison == 1);
1208
1209     # As we will be using these values a few times, extract them into scalars.
1210     $value2 = $href2->{$key};
1211     $value3 = $href3->{$key};
1212
1213     # This key is bigger than the bottom n key, so the lowest position we
1214     # will insert it into is $n minus 1 (the bottom of the list).
1215     $insert_position = $n_minus_1;
1216
1217     # Now go through the list, stopping when we find a key that we're
1218     # bigger than, or we come to the penultimate position - we've
1219     # already tested bigger than the last.
1220     #
1221     # Note: we go top down as the list starts off empty.
1222     # Note: stepping through the list in this way benchmarks nearly
1223     # three times faster than doing a sort() on the reduced list.
1224     # I assume this is because the list is already in order, and
1225     # we get a performance boost from not having to do hash lookups
1226     # on the new key.
1227     for ($i = 0; $i < $n_minus_1; $i++) {
1228       $top_n_key = $top_n_keys[$i];
1229       if ( ($top_n_key eq '_') ||
1230            ( ($value1 <=> $href1->{$top_n_key} ||
1231               $value2 <=> $href2->{$top_n_key} ||
1232               $value3 <=> $href3->{$top_n_key} ||
1233               $top_n_key cmp $key) == 1
1234            )
1235          ) {
1236         $insert_position = $i;
1237         last;
1238       }
1239     }
1240
1241     # Remove the last element, then insert the new one.
1242     $#top_n_keys = $n_minus_2;
1243     splice(@top_n_keys,$insert_position,0,$key);
1244
1245     # Extract our new minimum values.
1246     $top_n_key = $top_n_keys[$n_minus_1];
1247     if ($top_n_key ne '_') {
1248       $minimum_value1 = $href1->{$top_n_key};
1249       $minimum_value2 = $href2->{$top_n_key};
1250       $minimum_value3 = $href3->{$top_n_key};
1251     }
1252   }
1253
1254   # Return the top n list, grepping out non-existant values, just in case
1255   # we didn't have that many values.
1256   return(grep(!/^_$/,@top_n_keys));
1257 }
1258
1259
1260 =head2 html_header();
1261
1262  $header = html_header($title);
1263
1264 Print our HTML header and start the <body> block.
1265
1266 =cut
1267
1268 sub html_header {
1269   my($title) = @_;
1270   my $text = << "EoText";
1271 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
1272 <html>
1273 <head>
1274 <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-15">
1275 <title>$title</title>
1276 </head>
1277 <body bgcolor="white">
1278 <h1>$title</h1>
1279 EoText
1280   return $text;
1281 }
1282
1283
1284
1285 =head2 help();
1286
1287  help();
1288
1289 Display usage instructions and exit.
1290
1291 =cut
1292
1293 sub help {
1294   print << "EoText";
1295
1296 eximstats Version $VERSION
1297
1298 Usage: eximstats [Options] mainlog1 mainlog2 ... > report.txt
1299        eximstats -html  [Options] mainlog1 mainlog2 ... > report.html
1300        eximstats -merge [Options] report.1.txt report.2.txt ... > weekly_rep.txt
1301        eximstats -merge -html [Options] report.1.html ... > weekly_rep.html
1302
1303 Parses exim mainlog files and generates a statistical analysis of
1304 the messages processed. Valid options are:
1305
1306 -h<number>      histogram divisions per hour. The default is 1, and
1307                 0 suppresses histograms. Other valid values are:
1308                 2, 3, 5, 10, 15, 20, 30 or 60.
1309 -ne             don't display error information
1310 -nr             don't display relaying information
1311 -nr/pattern/    don't display relaying information that matches
1312 -nt             don't display transport information
1313 -nt/pattern/    don't display transport information that matches
1314 -nvr            don't do volume rounding. Display in bytes, not KB/MB/GB.
1315 -q<list>        list of times for queuing information
1316                 single 0 item suppresses
1317 -t<number>      display top <number> sources/destinations
1318                 default is 50, 0 suppresses top listing
1319 -tnl            omit local sources/destinations in top listing
1320 -t_remote_users show top user sources/destinations from non-local domains
1321
1322 -byhost         show results by sending host (default unless bydomain or
1323                 byemail is specified)
1324 -bydomain       show results by sending domain.
1325 -byemail        show results by sender's email address
1326 -byedomain      show results by sender's email domain
1327
1328 -pattern "Description" /pattern/
1329                 Count lines matching specified patterns and show them in
1330                 the results. It can be specified multiple times. Eg:
1331                 -pattern 'Refused connections' '/refused connection/'
1332
1333 -merge          merge previously generated reports into a new report
1334
1335 -html           output the results in HTML
1336 -charts         Create charts (this requires the GD::Graph modules)
1337 -chartdir <dir> Create the charts' png files in the directory <dir>
1338 -chartrel <dir> Specify the relative directory for the "img src=" tags
1339                 from where to include the charts in the html file
1340                 -chartdir and -chartrel default to '.'
1341
1342 -d              Debug mode - dump the eval'ed parser onto STDERR.
1343
1344 EoText
1345
1346   exit 1;
1347 }
1348
1349
1350
1351 =head2 generate_parser();
1352
1353  $parser = generate_parser();
1354
1355 This subroutine generates the parsing routine which will be
1356 used to parse the mainlog. We take the base operation, and remove bits not in use.
1357 This improves performance depending on what bits you take out or add.
1358
1359 I've tested using study(), but this does not improve performance.
1360
1361 We store our parsing routing in a variable, and process it looking for #IFDEF (Expression)
1362 or #IFNDEF (Expression) statements and corresponding #ENDIF (Expression) statements. If
1363 the expression evaluates to true, then it is included/excluded accordingly.
1364
1365 =cut
1366
1367 sub generate_parser {
1368   my $parser = '
1369   my($ip,$host,$email,$edomain,$domain,$thissize,$size,$old,$new);
1370   my($tod,$m_hour,$m_min,$id,$flag);
1371   while (<$fh>) {
1372     next if length($_) < 38;
1373
1374     # PH/FANF
1375     # next unless /^(\\d{4}\\-\\d\\d-\\d\\d\\s(\\d\\d):(\\d\\d):\\d\\d)/;
1376     next unless /^(\\d{4}\\-\\d\\d-\\d\\d\\s(\\d\\d):(\\d\\d):\\d\\d( [-+]\\d\\d\\d\\d)?)/o;
1377
1378     ($tod,$m_hour,$m_min) = ($1,$2,$3);
1379
1380     # PH
1381     my($extra) = defined($4)? 6 : 0;
1382     $id   = substr($_, 20 + $extra, 16);
1383     $flag = substr($_, 37 + $extra, 2);
1384 ';
1385
1386   # Watch for user specified patterns.
1387   my $user_pattern_index = 0;
1388   foreach (@user_patterns) {
1389     $user_pattern_totals[$user_pattern_index] = 0;
1390     $parser .= "  \$user_pattern_totals[$user_pattern_index]++ if $_;\n";
1391     $user_pattern_index++;
1392   }
1393
1394   $parser .= '
1395     next unless ($flag =~ /<=|=>|->|==|\\*\\*|Co/);
1396
1397     #Strip away the timestamp, ID and flag (which could be "Com" for completed)
1398     #This speeds up the later pattern matches.
1399     # $_ = substr($_, 40);
1400
1401     $_ = substr($_, 40 + $extra);  # PH
1402
1403     # JN - Skip over certain transports as specified via the "-nt/.../" command
1404     # line switch (where ... is a perl style regular expression).  This is
1405     # required so that transports that skew stats such as SpamAssassin can be
1406     # ignored.
1407     #IFDEF ($transport_pattern)
1408     if (/\\sT=(\\S+)/) {
1409        next if ($1 =~ /$transport_pattern/o) ;
1410     }
1411     #ENDIF ($transport_pattern)
1412
1413
1414     $host = "local";            #Host is local unless otherwise specified.
1415     $domain = "localdomain";    #Domain is localdomain unless otherwise specified.
1416
1417
1418     # Do some pattern matches to get the host and IP address.
1419     # We expect lines to be of the form "H=[IpAddr]" or "H=Host [IpAddr]" or
1420     # "H=Host (UnverifiedHost) [IpAddr]" or "H=(UnverifiedHost) [IpAddr]".
1421     # We do 2 separate matches to keep the matches simple and fast.
1422     if (/\\sH=(\\S+)/) {
1423       $host = $1;
1424
1425       ($ip) = /\\sH=.*?(\\s\\[[^]]+\\])/;
1426       # If there is only an IP address, it will be in $host and $ip will be
1427       # unset. That is OK, because we only use $ip in conjunction with $host
1428       # below. But make it empty to avoid warning messages.
1429       $ip = "" if !defined $ip;
1430
1431       #IFDEF ($do_sender{Domain})
1432       if ($host !~ /^\\[/ && $host =~ /^(\\(?)[^\\.]+\\.([^\\.]+\\..*)/) {
1433         # Remove the host portion from the DNS name. We ensure that we end up with
1434         # at least xxx.yyy. $host can be "(x.y.z)" or  "x.y.z".
1435         $domain = lc("$1.$2");
1436         $domain =~ s/^\\.//;            #Remove preceding dot.
1437       }
1438       #ENDIF ($do_sender{Domain})
1439
1440     }
1441
1442     #IFDEF ($do_sender{Email})
1443     $email = (/^(\S+)/) ? $1 : "";
1444     #ENDIF ($do_sender{Email})
1445
1446     #IFDEF ($do_sender{Edomain})
1447     $edomain = (/^\S*?\\@(\S+)/) ? lc($1) : "";
1448     #ENDIF ($do_sender{Edomain})
1449
1450     if ($tod lt $begin) {
1451       $begin = $tod;
1452     }
1453     elsif ($tod gt $end) {
1454       $end   = $tod;
1455     }
1456
1457
1458     if ($flag eq "<=") {
1459       $thissize = (/\\sS=(\\d+)( |$)/) ? $1 : 0;
1460       $size{$id} = $thissize;
1461
1462       #IFDEF ($show_relay)
1463       if ($host ne "local") {
1464         # Save incoming information in case it becomes interesting
1465         # later, when delivery lines are read.
1466         my($from) = /^(\\S+)/;
1467         $from_host{$id} = "$host$ip";
1468         $from_address{$id} = $from;
1469       }
1470       #ENDIF ($show_relay)
1471
1472       #IFDEF ($local_league_table || $include_remote_users)
1473         if (/\sU=(\\S+)/) {
1474           my $user = $1;
1475
1476           #IFDEF ($local_league_table && $include_remote_users)
1477           {                             #Store both local and remote users.
1478           #ENDIF ($local_league_table && $include_remote_users)
1479
1480           #IFDEF ($local_league_table && ! $include_remote_users)
1481           if ($host eq "local") {               #Store local users only.
1482           #ENDIF ($local_league_table && ! $include_remote_users)
1483
1484           #IFDEF ($include_remote_users && ! $local_league_table)
1485           if ($host ne "local") {               #Store remote users only.
1486           #ENDIF ($include_remote_users && ! $local_league_table)
1487
1488             $received_count_user{$user}++;
1489             add_volume(\\$received_data_user{$user},\\$received_data_gigs_user{$user},$thissize);
1490           }
1491         }
1492       #ENDIF ($local_league_table || $include_remote_users)
1493
1494       #IFDEF ($do_sender{Host})
1495         $received_count{Host}{$host}++;
1496         add_volume(\\$received_data{Host}{$host},\\$received_data_gigs{Host}{$host},$thissize);
1497       #ENDIF ($do_sender{Host})
1498
1499       #IFDEF ($do_sender{Domain})
1500         if ($domain) {
1501           $received_count{Domain}{$domain}++;
1502           add_volume(\\$received_data{Domain}{$domain},\\$received_data_gigs{Domain}{$domain},$thissize);
1503         }
1504       #ENDIF ($do_sender{Domain})
1505
1506       #IFDEF ($do_sender{Email})
1507         $received_count{Email}{$email}++;
1508         add_volume(\\$received_data{Email}{$email},\\$received_data_gigs{Email}{$email},$thissize);
1509       #ENDIF ($do_sender{Email})
1510
1511       #IFDEF ($do_sender{Edomain})
1512         $received_count{Edomain}{$edomain}++;
1513         add_volume(\\$received_data{Edomain}{$edomain},\\$received_data_gigs{Edomain}{$edomain},$thissize);
1514       #ENDIF ($do_sender{Edomain})
1515
1516       $total_received_count++;
1517       add_volume(\\$total_received_data,\\$total_received_data_gigs,$thissize);
1518
1519       #IFDEF ($#queue_times >= 0)
1520         $arrival_time{$id} = $tod;
1521       #ENDIF ($#queue_times >= 0)
1522
1523       #IFDEF ($hist_opt > 0)
1524         $received_interval_count[($m_hour*60 + $m_min)/$hist_interval]++;
1525       #ENDIF ($hist_opt > 0)
1526     }
1527
1528     elsif ($flag eq "=>") {
1529       $size = $size{$id} || 0;
1530       if ($host ne "local") {
1531         $remote_delivered{$id} = 1;
1532
1533
1534         #IFDEF ($show_relay)
1535         # Determine relaying address if either only one address listed,
1536         # or two the same. If they are different, it implies a forwarding
1537         # or aliasing, which is not relaying. Note that for multi-aliased
1538         # addresses, there may be a further address between the first
1539         # and last.
1540
1541         if (defined $from_host{$id}) {
1542           if (/^(\\S+)(?:\\s+\\([^)]\\))?\\s+<([^>]+)>/) {
1543             ($old,$new) = ($1,$2);
1544           }
1545           else {
1546             $old = $new = "";
1547           }
1548
1549           if ("\\L$new" eq "\\L$old") {
1550             ($old) = /^(\\S+)/ if $old eq "";
1551             my $key = "H=\\L$from_host{$id}\\E A=\\L$from_address{$id}\\E => " .
1552               "H=\\L$host\\E$ip A=\\L$old\\E";
1553             if (!defined $relay_pattern || $key !~ /$relay_pattern/o) {
1554               $relayed{$key} = 0 if !defined $relayed{$key};
1555               $relayed{$key}++;
1556             }
1557             else {
1558               $relayed_unshown++
1559             }
1560           }
1561         }
1562         #ENDIF ($show_relay)
1563
1564       }
1565
1566       #IFDEF ($local_league_table || $include_remote_users)
1567         #IFDEF ($local_league_table && $include_remote_users)
1568         {                               #Store both local and remote users.
1569         #ENDIF ($local_league_table && $include_remote_users)
1570
1571         #IFDEF ($local_league_table && ! $include_remote_users)
1572         if ($host eq "local") {         #Store local users only.
1573         #ENDIF ($local_league_table && ! $include_remote_users)
1574
1575         #IFDEF ($include_remote_users && ! $local_league_table)
1576         if ($host ne "local") {         #Store remote users only.
1577         #ENDIF ($include_remote_users && ! $local_league_table)
1578
1579           if (my($user) = split((/\\s</)? " <" : " ", $_)) {
1580             if ($user =~ /^[\\/|]/) {
1581               my($parent) = $_ =~ /(<[^@]+@?[^>]*>)/;
1582               $user = "$user $parent" if defined $parent;
1583             }
1584             $delivered_count_user{$user}++;
1585             add_volume(\\$delivered_data_user{$user},\\$delivered_data_gigs_user{$user},$size);
1586           }
1587         }
1588       #ENDIF ($local_league_table || $include_remote_users)
1589
1590       #IFDEF ($do_sender{Host})
1591         $delivered_count{Host}{$host}++;
1592         add_volume(\\$delivered_data{Host}{$host},\\$delivered_data_gigs{Host}{$host},$size);
1593       #ENDIF ($do_sender{Host})
1594       #IFDEF ($do_sender{Domain})
1595         if ($domain) {
1596           $delivered_count{Domain}{$domain}++;
1597           add_volume(\\$delivered_data{Domain}{$domain},\\$delivered_data_gigs{Domain}{$domain},$size);
1598         }
1599       #ENDIF ($do_sender{Domain})
1600       #IFDEF ($do_sender{Email})
1601         $delivered_count{Email}{$email}++;
1602         add_volume(\\$delivered_data{Email}{$email},\\$delivered_data_gigs{Email}{$email},$size);
1603       #ENDIF ($do_sender{Email})
1604       #IFDEF ($do_sender{Edomain})
1605         $delivered_count{Edomain}{$edomain}++;
1606         add_volume(\\$delivered_data{Edomain}{$edomain},\\$delivered_data_gigs{Edomain}{$edomain},$size);
1607       #ENDIF ($do_sender{Edomain})
1608
1609       $total_delivered_count++;
1610       add_volume(\\$total_delivered_data,\\$total_delivered_data_gigs,$size);
1611
1612       #IFDEF ($show_transport)
1613         my $transport = (/\\sT=(\\S+)/) ? $1 : ":blackhole:";
1614         $transported_count{$transport}++;
1615         add_volume(\\$transported_data{$transport},\\$transported_data_gigs{$transport},$size);
1616       #ENDIF ($show_transport)
1617
1618       #IFDEF ($hist_opt > 0)
1619         $delivered_interval_count[($m_hour*60 + $m_min)/$hist_interval]++;
1620       #ENDIF ($hist_opt > 0)
1621
1622     }
1623
1624     elsif ($flag eq "==" && defined($size{$id}) && !defined($delayed{$id})) {
1625       $delayed_count++;
1626       $delayed{$id} = 1;
1627     }
1628
1629     elsif ($flag eq "**") {
1630       $had_error{$id} = 1 if defined ($size{$id});
1631
1632       #IFDEF ($show_errors)
1633         $errors_count{$_}++;
1634       #ENDIF ($show_errors)
1635
1636     }
1637
1638     elsif ($flag eq "Co") {
1639       #Completed?
1640       #IFDEF ($#queue_times >= 0)
1641         #Note: id_seconds() benchmarks as 42% slower than seconds() and computing
1642         #the time accounts for a significant portion of the run time.
1643         my($queued);
1644         if (defined $arrival_time{$id}) {
1645           $queued = seconds($tod) - seconds($arrival_time{$id});
1646           delete($arrival_time{$id});
1647         }
1648         else {
1649           $queued = seconds($tod) - id_seconds($id);
1650         }
1651
1652         for ($i = 0; $i <= $#queue_times; $i++) {
1653           if ($queued < $queue_times[$i]) {
1654             $queue_bin[$i]++;
1655             $remote_queue_bin[$i]++ if $remote_delivered{$id};
1656             last;
1657           }
1658         }
1659         $queue_more_than++ if $i > $#queue_times;
1660       #ENDIF ($#queue_times >= 0)
1661
1662       #IFDEF ($show_relay)
1663         delete($from_host{$id});
1664         delete($from_address{$id});
1665       #ENDIF ($show_relay)
1666
1667     }
1668   }';
1669
1670   # We now do a 'C preprocessor style operation on our parser
1671   # to remove bits not in use.
1672   my(%defines_in_operation,$removing_lines,$processed_parser);
1673   foreach (split (/\n/,$parser)) {
1674     if ((/^\s*#\s*IFDEF\s*\((.*?)\)/i  && ! eval $1) ||
1675         (/^\s*#\s*IFNDEF\s*\((.*?)\)/i &&   eval $1)    ) {
1676       $defines_in_operation{$1} = 1;
1677       $removing_lines = 1;
1678     }
1679
1680     $processed_parser .= $_."\n" unless $removing_lines;
1681
1682     if (/^\s*#\s*ENDIF\s*\((.*?)\)/i) {
1683       delete $defines_in_operation{$1};
1684       unless (keys %defines_in_operation) {
1685         $removing_lines = 0;
1686       }
1687     }
1688   }
1689   print STDERR "# START OF PARSER:\n$processed_parser\n# END OF PARSER\n\n" if $debug;
1690
1691   return $processed_parser;
1692 }
1693
1694
1695
1696 =head2 parse();
1697
1698  parse($parser,\*FILEHANDLE);
1699
1700 This subroutine accepts a parser and a filehandle from main and parses each
1701 line. We store the results into global variables.
1702
1703 =cut
1704
1705 sub parse {
1706   my($parser,$fh) = @_;
1707
1708   if ($merge_reports) {
1709     parse_old_eximstat_reports($fh);
1710   }
1711   else {
1712     eval $parser;
1713     die ($@) if $@;
1714   }
1715
1716 }
1717
1718
1719
1720 =head2 print_header();
1721
1722  print_header();
1723
1724 Print our headers and contents.
1725
1726 =cut
1727
1728 sub print_header {
1729
1730   my $title = "Exim statistics from $begin to $end";
1731
1732   if ($html) {
1733     print html_header($title);
1734     print "<ul>\n";
1735     print "<li><a href=\"#grandtotal\">Grand total summary</a>\n";
1736     print "<li><a href=\"#patterns\">User Specified Patterns</a>\n" if @user_patterns;
1737     print "<li><a href=\"#transport\">Deliveries by Transport</a>\n" if $show_transport;
1738     if ($hist_opt) {
1739       print "<li><a href=\"#Messages received\">Messages received per hour</a>\n";
1740       print "<li><a href=\"#Deliveries\">Deliveries per hour</a>\n";
1741     }
1742     if ($#queue_times >= 0) {
1743       print "<li><a href=\"#all messages time\">Time spent on the queue: all messages</a>\n";
1744       print "<li><a href=\"#messages with at least one remote delivery time\">Time spent on the queue: messages with at least one remote delivery</a>\n";
1745     }
1746     print "<li><a href=\"#Relayed messages\">Relayed messages</a>\n" if $show_relay;
1747     if ($topcount) {
1748       foreach ('Host','Domain','Email','Edomain') {
1749         next unless $do_sender{$_};
1750         print "<li><a href=\"#sending \l$_ count\">Top $topcount sending \l${_}s by message count</a>\n";
1751         print "<li><a href=\"#sending \l$_ volume\">Top $topcount sending \l${_}s by volume</a>\n";
1752       }
1753       if ($local_league_table || $include_remote_users) {
1754         print "<li><a href=\"#local sender count\">Top $topcount local senders by message count</a>\n";
1755         print "<li><a href=\"#local sender volume\">Top $topcount local senders by volume</a>\n";
1756       }
1757       foreach ('Host','Domain','Email','Edomain') {
1758         next unless $do_sender{$_};
1759         print "<li><a href=\"#\l$_ destination count\">Top $topcount \l$_ destinations by message count</a>\n";
1760         print "<li><a href=\"#\l$_ destination volume\">Top $topcount \l$_ destinations by volume</a>\n";
1761       }
1762       if ($local_league_table || $include_remote_users) {
1763         print "<li><a href=\"#local destination count\">Top $topcount local destinations by message count</a>\n";
1764         print "<li><a href=\"#local destination volume\">Top $topcount local destinations by volume</a>\n";
1765       }
1766     }
1767     print "<li><a href=\"#errors\">List of errors</a>\n" if %errors_count;
1768     print "</ul>\n<hr>\n";
1769
1770   }
1771   else {
1772     print "\n$title\n";
1773   }
1774 }
1775
1776
1777 =head2 print_grandtotals();
1778
1779  print_grandtotals();
1780
1781 Print the grand totals.
1782
1783 =cut
1784
1785 sub print_grandtotals {
1786
1787   # Get the sender by headings and results. This is complicated as we can have
1788   # different numbers of columns.
1789   my($sender_txt_header,$sender_html_header,$sender_txt_format,$sender_html_format);
1790   my(@received_totals,@delivered_totals);
1791   foreach ('Host','Domain','Email','Edomain') {
1792     next unless $do_sender{$_};
1793     if ($merge_reports) {
1794       push(@received_totals, get_report_total($report_totals{Received},"${_}s"));
1795       push(@delivered_totals,get_report_total($report_totals{Delivered},"${_}s"));
1796     }
1797     else {
1798       push(@received_totals,scalar(keys %{$received_data{$_}}));
1799       push(@delivered_totals,scalar(keys %{$delivered_data{$_}}));
1800     }
1801     $sender_html_header .= "<th>${_}s</th>";
1802     $sender_txt_header  .= " " x ($COLUMN_WIDTHS - length($_)) . $_ . 's';
1803     $sender_html_format .= "<td align=\"right\">%d</td>";
1804     $sender_txt_format  .= " " x ($COLUMN_WIDTHS - 5) . "%6d";
1805   }
1806
1807   my($format1,$format2);
1808   if ($html) {
1809     print << "EoText";
1810 <a name="grandtotal"></a>
1811 <h2>Grand total summary</h2>
1812 <table border=1>
1813 <tr><th>TOTAL</th><th>Volume</th><th>Messages</th>$sender_html_header<th colspan=2>At least one addr<br>Delayed</th><th colspan=2>At least one addr<br>Failed</th>
1814 EoText
1815
1816     $format1 = "<tr><td>%s</td><td align=\"right\">%s</td>$sender_html_format<td align=\"right\">%d</td>";
1817     $format2 = "<td align=\"right\">%d</td><td align=\"right\">%4.1f%%</td><td align=\"right\">%d</td><td align=\"right\">%4.1f%%</td>";
1818   }
1819   else {
1820     my $sender_spaces = " " x length($sender_txt_header);
1821     print << "EoText";
1822
1823 Grand total summary
1824 -------------------
1825                                     $sender_spaces           At least one address
1826   TOTAL               Volume    Messages $sender_txt_header      Delayed       Failed
1827 EoText
1828     $format1 = "  %-16s %9s      %6d $sender_txt_format";
1829     $format2 = "  %6d %4.1f%% %6d %4.1f%%",
1830   }
1831
1832   my($volume,$failed_count);
1833   if ($merge_reports) {
1834     $volume = volume_rounded($report_totals{Received}{Volume}, $report_totals{Received}{'Volume-gigs'});
1835     $total_received_count = get_report_total($report_totals{Received},'Messages');
1836     $failed_count  = get_report_total($report_totals{Received},'Failed');
1837     $delayed_count = get_report_total($report_totals{Received},'Delayed');
1838   }
1839   else {
1840     $volume = volume_rounded($total_received_data, $total_received_data_gigs);
1841     $failed_count = keys %had_error;
1842   }
1843
1844   {
1845     no integer;
1846     printf("$format1$format2\n",'Received',$volume,$total_received_count,
1847       @received_totals,$delayed_count,
1848       ($total_received_count) ? ($delayed_count*100/$total_received_count) : 0,
1849       $failed_count,
1850       ($total_received_count) ? ($failed_count*100/$total_received_count) : 0);
1851   }
1852
1853   if ($merge_reports) {
1854     $volume = volume_rounded($report_totals{Delivered}{Volume}, $report_totals{Delivered}{'Volume-gigs'});
1855     $total_delivered_count = get_report_total($report_totals{Delivered},'Messages');
1856   }
1857   else {
1858     $volume = volume_rounded($total_delivered_data, $total_delivered_data_gigs);
1859   }
1860   printf("$format1\n\n",'Delivered',$volume,$total_delivered_count,@delivered_totals);
1861   print "</table>\n" if $html;
1862 }
1863
1864
1865 =head2 print_user_patterns()
1866
1867  print_user_patterns();
1868
1869 Print the counts of user specified patterns.
1870
1871 =cut
1872
1873 sub print_user_patterns {
1874   my($format1);
1875
1876   if ($html) {
1877     print "<hr><a name=\"patterns\"></a><h2>User Specified Patterns</h2>\n";
1878     print "<table border=0 width=\"100%\">\n";
1879     print "<tr><td>\n";
1880     print "<table border=1>\n";
1881     print "<tr><th>&nbsp;</th><th>Total</th>\n";
1882     $format1 = "<tr><td>%s</td><td align=\"right\">%d</td>";
1883   }
1884   else {
1885     print "User Specified Patterns\n";
1886     print "-----------------------";
1887     print "\n                       Total\n";
1888     $format1 = "  %-18s  %6d";
1889   }
1890
1891   my($key);
1892   if ($merge_reports) {
1893     # We are getting our data from previous reports.
1894     foreach $key (@user_descriptions) {
1895       my $count = get_report_total($report_totals{patterns}{$key},'Total');
1896       printf("$format1\n",$key,$count);
1897     }
1898   }
1899   else {
1900     # We are getting our data from mainlog files.
1901     my $user_pattern_index = 0;
1902     foreach $key (@user_descriptions) {
1903       printf("$format1\n",$key,$user_pattern_totals[$user_pattern_index]);
1904       $user_pattern_index++;
1905     }
1906   }
1907   if ($html) {
1908     print "</table>\n";
1909   }
1910   print "\n";
1911 }
1912
1913
1914 =head2 print_transport();
1915
1916  print_transport();
1917
1918 Print totals by transport.
1919
1920 =cut
1921
1922 sub print_transport {
1923   my($format1);
1924   my(@chartdatanames);
1925   my(@chartdatavals_count);
1926   my(@chartdatavals_vol);
1927   no integer;           #Lose this for charting the data.
1928
1929   if ($html) {
1930     print "<hr><a name=\"transport\"></a><h2>Deliveries by Transport</h2>\n";
1931     print "<table border=0 width=\"100%\">\n";
1932     print "<tr><td>\n";
1933     print "<table border=1>\n";
1934     print "<tr><th>&nbsp;</th><th>Volume</th><th>Messages</th>\n";
1935     $format1 = "<tr><td>%s</td><td align=\"right\">%s</td><td align=\"right\">%d</td>";
1936   }
1937   else {
1938     print "Deliveries by transport\n";
1939     print "-----------------------";
1940     print "\n                      Volume    Messages\n";
1941     $format1 = "  %-18s  %6s      %6d";
1942   }
1943
1944   my($key);
1945   if ($merge_reports) {
1946     # We are getting our data from previous reports.
1947     foreach $key (sort keys %{$report_totals{transport}}) {
1948       my $count = get_report_total($report_totals{transport}{$key},'Messages');
1949       printf("$format1\n",$key,
1950         volume_rounded($report_totals{transport}{$key}{Volume},$report_totals{transport}{$key}{'Volume-gigs'}),
1951         $count);
1952       push(@chartdatanames, $key);
1953       push(@chartdatavals_count, $count);
1954       push(@chartdatavals_vol, $report_totals{transport}{$key}{'Volume-gigs'}*$gig + $report_totals{transport}{$key}{Volume} );
1955     }
1956   }
1957   else {
1958     # We are getting our data from mainlog files.
1959     foreach $key (sort keys %transported_data) {
1960       printf("$format1\n",$key,
1961         volume_rounded($transported_data{$key},$transported_data_gigs{$key}),
1962         $transported_count{$key});
1963       push(@chartdatanames, $key);
1964       push(@chartdatavals_count, $transported_count{$key});
1965       push(@chartdatavals_vol, $transported_data_gigs{$key}*$gig + $transported_data{$key});
1966     }
1967   }
1968   if ($html) {
1969     print "</table>\n";
1970     print "</td><td>\n";
1971     if ($HAVE_GD_Graph_pie && $charts)
1972       {
1973       # calculate the graph
1974       my @data = (
1975          \@chartdatanames,
1976          \@chartdatavals_count
1977       );
1978       my $graph = GD::Graph::pie->new(200, 200);
1979       $graph->set(
1980           x_label           => 'Transport',
1981           y_label           => 'Messages',
1982           title             => 'By count',
1983       );
1984       my $gd = $graph->plot(\@data) or warn($graph->error);
1985       if ($gd) {
1986         open(IMG, ">$chartdir/transports_count.png") or die $!;
1987         binmode IMG;
1988         print IMG $gd->png;
1989         close IMG;
1990         print "<img src=\"$chartrel/transports_count.png\">";
1991       }
1992     }
1993     print "</td><td>\n";
1994
1995     if ($HAVE_GD_Graph_pie && $charts) {
1996       my @data = (
1997          \@chartdatanames,
1998          \@chartdatavals_vol
1999       );
2000       my $graph = GD::Graph::pie->new(200, 200);
2001       $graph->set(
2002           title             => 'By volume',
2003       );
2004       my $gd = $graph->plot(\@data) or warn($graph->error);
2005       if ($gd) {
2006         open(IMG, ">$chartdir/transports_vol.png") or die $!;
2007         binmode IMG;
2008         print IMG $gd->png;
2009         close IMG;
2010         print "<img src=\"$chartrel/transports_vol.png\">";
2011       }
2012     }
2013     print "</td></tr></table>\n";
2014   }
2015   print "\n";
2016 }
2017
2018
2019
2020 =head2 print_relay();
2021
2022  print_relay();
2023
2024 Print our totals by relay.
2025
2026 =cut
2027
2028 sub print_relay {
2029   my $temp = "Relayed messages";
2030   print "<hr><a name=\"$temp\"></a><h2>$temp</h2>\n" if $html;
2031   if (scalar(keys %relayed) > 0 || $relayed_unshown > 0) {
2032     my $shown = 0;
2033     my $spacing = "";
2034     my($format);
2035
2036     if ($html) {
2037       print "<table border=1>\n";
2038       print "<tr><th>Count</th><th>From</th><th>To</th>\n";
2039       $format = "<tr><td align=\"right\">%d</td><td>%s</td><td>%s</td>\n";
2040     }
2041     else {
2042       printf("%s\n%s\n\n", $temp, "-" x length($temp));
2043       $format = "%7d %s\n      => %s\n";
2044     }
2045
2046     my($key);
2047     foreach $key (sort keys %relayed) {
2048       my $count = $relayed{$key};
2049       $shown += $count;
2050       $key =~ s/[HA]=//g;
2051       my($one,$two) = split(/=> /, $key);
2052       printf($format, $count, $one, $two);
2053       $spacing = "\n";
2054     }
2055     print "</table>\n<p>\n" if $html;
2056     print "${spacing}Total: $shown (plus $relayed_unshown unshown)\n";
2057   }
2058   else {
2059     print "No relayed messages\n";
2060     print "-------------------\n" unless $html;
2061   }
2062   print "\n";
2063 }
2064
2065
2066
2067 =head2 print_errors();
2068
2069  print_errors();
2070
2071 Print our errors. In HTML, we display them as a list rather than a table -
2072 Netscape doesn't like large tables!
2073
2074 =cut
2075
2076 sub print_errors {
2077   my $total_errors = 0;
2078
2079   if (scalar(keys %errors_count) != 0) {
2080     my $temp = "List of errors";
2081     my($format);
2082     if ($html) {
2083       print "<hr><a name=\"errors\"></a><h2>$temp</h2>\n";
2084       print "<ul><li><b>Count - Error</b>\n";
2085       $format = "<li>%d - %s\n";
2086     }
2087     else {
2088       printf("%s\n%s\n\n", $temp, "-" x length($temp));
2089     }
2090
2091     my($key);
2092     foreach $key (sort keys %errors_count) {
2093       my $text = $key;
2094       chomp($text);
2095       $text =~ s/\s\s+/ /g;     #Convert multiple spaces to a single space.
2096       $total_errors += $errors_count{$key};
2097       if ($html) {
2098
2099         #Translate HTML tag characters. Sergey Sholokh.
2100         $text =~ s/\</\&lt\;/g;
2101         $text =~ s/\>/\&gt\;/g;
2102
2103         printf($format,$errors_count{$key},$text);
2104       }
2105       else {
2106         printf("%5d ", $errors_count{$key});
2107         while (length($text) > 65) {
2108           my($first,$rest) = $text =~ /(.{50}\S*)\s+(.+)/;
2109           last if !$first;
2110           printf("%s\n      ", $first);
2111           $text = $rest;
2112         }
2113         printf("%s\n\n", $text);
2114       }
2115     }
2116     print "</ul>\n<p>\n" if $html;
2117
2118     $temp = "Errors encountered: $total_errors";
2119     print $temp,"\n";
2120     print "-" x length($temp),"\n" unless $html;
2121   }
2122
2123 }
2124
2125
2126 =head2 parse_old_eximstat_reports();
2127
2128  parse_old_eximstat_reports($fh);
2129
2130 Parse old eximstat output so we can merge daily stats to weekly stats and weekly to monthly etc.
2131
2132 To test that the merging still works after changes, do something like the following.
2133 All the diffs should produce no output.
2134
2135  options='-bydomain -byemail -byhost -byedomain'
2136  options="$options -pattern 'Completed Messages' /Completed/"
2137  options="$options -pattern 'Received Messages' /<=/"
2138
2139  ./eximstats $options mainlog > mainlog.txt
2140  ./eximstats $options -merge mainlog.txt > mainlog.2.txt
2141  diff mainlog.txt mainlog.2.txt
2142
2143  ./eximstats $options -html mainlog > mainlog.html
2144  ./eximstats $options -merge -html mainlog.txt  > mainlog.2.html
2145  diff mainlog.html mainlog.2.html
2146
2147  ./eximstats $options -merge mainlog.html > mainlog.3.txt
2148  diff mainlog.txt mainlog.3.txt
2149
2150  ./eximstats $options -merge -html mainlog.html > mainlog.3.html
2151  diff mainlog.html mainlog.3.html
2152
2153  ./eximstats $options -nvr   mainlog > mainlog.nvr.txt
2154  ./eximstats $options -merge mainlog.nvr.txt > mainlog.4.txt
2155  diff mainlog.txt mainlog.4.txt
2156
2157  # double_mainlog.txt should have twice the values that mainlog.txt has.
2158  ./eximstats $options mainlog mainlog > double_mainlog.txt
2159
2160 =cut
2161
2162 sub parse_old_eximstat_reports {
2163   my($fh) = @_;
2164
2165   my(%league_table_value_entered, %league_table_value_was_zero, %table_order);
2166
2167   while (<$fh>) {
2168     if (/Exim statistics from ([\d\-]+ [\d:]+(\s+[\+\-]\d+)?) to ([\d\-]+ [\d:]+(\s+[\+\-]\d+)?)/) {
2169       $begin = $1 if ($1 lt $begin);
2170       $end   = $3 if ($3 gt $end);
2171     }
2172     elsif (/Grand total summary/) {
2173       # Fill in $report_totals{Received|Delivered}{Volume|Messages|Hosts|Domains|...|Delayed|DelayedPercent|Failed|FailedPercent}
2174       my(@fields);
2175       while (<$fh>) {
2176         $_ = html2txt($_);              #Convert general HTML markup to text.
2177         s/At least one addr//g;         #Another part of the HTML output we don't want.
2178
2179 #  TOTAL               Volume    Messages    Hosts Domains      Delayed       Failed
2180 #  Received              26MB         237      177      23       8  3.4%     28 11.8%
2181 #  Delivered             13MB         233       99      88
2182         if (/TOTAL\s+(.*?)\s*$/) {
2183           @fields = split(/\s+/,$1);
2184           #Delayed and Failed have two columns each, so add the extra field names in.
2185           splice(@fields,-1,1,'DelayedPercent','Failed','FailedPercent');
2186         }
2187         elsif (/(Received|Delivered)\s+(.*?)\s*$/) {
2188           print STDERR "Parsing $_" if $debug;
2189           add_to_totals($report_totals{$1},\@fields,$2);
2190         }
2191         last if (/Delivered/);          #Last line of this section.
2192       }
2193     }
2194
2195     elsif (/User Specified Patterns/i) {
2196 #User Specified Patterns
2197 #-----------------------
2198 #                       Total
2199 #  Description             85
2200
2201       while (<$fh>) { last if (/Total/); }      #Wait until we get the table headers.
2202       while (<$fh>) {
2203         print STDERR "Parsing $_" if $debug;
2204         $_ = html2txt($_);              #Convert general HTML markup to text.
2205         if (/^\s*(.*?)\s+(\d+)\s*$/) {
2206           $report_totals{patterns}{$1} = {} unless (defined $report_totals{patterns}{$1});
2207           add_to_totals($report_totals{patterns}{$1},['Total'],$2);
2208         }
2209         last if (/^\s*$/);                      #Finished if we have a blank line.
2210       }
2211     }
2212
2213     elsif (/Deliveries by transport/i) {
2214 #Deliveries by transport
2215 #-----------------------
2216 #                      Volume    Messages
2217 #  :blackhole:           70KB          51
2218 #  address_pipe         655KB           1
2219 #  smtp                  11MB         151
2220
2221       while (<$fh>) { last if (/Volume/); }     #Wait until we get the table headers.
2222       while (<$fh>) {
2223         print STDERR "Parsing $_" if $debug;
2224         $_ = html2txt($_);              #Convert general HTML markup to text.
2225         if (/(\S+)\s+(\d+\S*\s+\d+)/) {
2226           $report_totals{transport}{$1} = {} unless (defined $report_totals{transport}{$1});
2227           add_to_totals($report_totals{transport}{$1},['Volume','Messages'],$2);
2228         }
2229         last if (/^\s*$/);                      #Finished if we have a blank line.
2230       }
2231     }
2232     elsif (/(Messages received|Deliveries) per/) {
2233 #      Messages received per hour (each dot is 2 messages)
2234 #---------------------------------------------------
2235 #
2236 #00-01    106 .....................................................
2237 #01-02    103 ...................................................
2238
2239       # Set a pointer to the interval array so we can use the same code
2240       # block for both messages received and delivered.
2241       my $interval_aref = ($1 eq 'Deliveries') ? \@delivered_interval_count : \@received_interval_count;
2242       my $reached_table = 0;
2243       while (<$fh>) {
2244         $reached_table = 1 if (/^00/);
2245         next unless $reached_table;
2246         print STDERR "Parsing $_" if $debug;
2247         if (/^(\d+):(\d+)\s+(\d+)/) {           #hh:mm start time format ?
2248           $$interval_aref[($1*60 + $2)/$hist_interval] += $3;
2249         }
2250         elsif (/^(\d+)-(\d+)\s+(\d+)/) {        #hh-hh start-end time format ?
2251           $$interval_aref[($1*60)/$hist_interval] += $3;
2252         }
2253         else {                                  #Finished the table ?
2254           last;
2255         }
2256       }
2257     }
2258
2259     elsif (/Time spent on the queue: (all messages|messages with at least one remote delivery)/) {
2260 #Time spent on the queue: all messages
2261 #-------------------------------------
2262 #
2263 #Under   1m      217  91.9%   91.9%
2264 #        5m        2   0.8%   92.8%
2265 #        3h        8   3.4%   96.2%
2266 #        6h        7   3.0%   99.2%
2267 #       12h        2   0.8%  100.0%
2268
2269       # Set a pointer to the queue bin so we can use the same code
2270       # block for both all messages and remote deliveries.
2271       my $bin_aref = ($1 eq 'all messages') ? \@queue_bin : \@remote_queue_bin;
2272       my $reached_table = 0;
2273       while (<$fh>) {
2274         $_ = html2txt($_);              #Convert general HTML markup to text.
2275         $reached_table = 1 if (/^\s*Under/);
2276         next unless $reached_table;
2277         my $previous_seconds_on_queue = 0;
2278         if (/^\s*(Under|Over|)\s+(\d+[smhdw])\s+(\d+)/) {
2279           print STDERR "Parsing $_" if $debug;
2280           my($modifier,$formated_time,$count) = ($1,$2,$3);
2281           my $seconds = unformat_time($formated_time);
2282           my $time_on_queue = ($seconds + $previous_seconds_on_queue) / 2;
2283           $previous_seconds_on_queue = $seconds;
2284           $time_on_queue = $seconds * 2 if ($modifier eq 'Over');
2285           my($i);
2286           for ($i = 0; $i <= $#queue_times; $i++) {
2287             if ($time_on_queue < $queue_times[$i]) {
2288               $$bin_aref[$i] += $count;
2289               last;
2290             }
2291           }
2292           # There's only one counter for messages going over the queue
2293           # times so make sure we only count it once.
2294           $queue_more_than += $count if (($bin_aref == \@queue_bin) && ($i > $#queue_times));
2295         }
2296         else {
2297           last;                                 #Finished the table ?
2298         }
2299       }
2300     }
2301
2302     elsif (/Relayed messages/) {
2303 #Relayed messages
2304 #----------------
2305 #
2306 #      1 addr.domain.com [1.2.3.4] a.user@domain.com
2307 #      => addr2.domain2.com [5.6.7.8] a2.user2@domain2.com
2308 #
2309 #<tr><td align="right">1</td><td>addr.domain.com [1.2.3.4] a.user@domain.com </td><td>addr2.domain2.com [5.6.7.8] a2.user2@domain2.com</td>
2310
2311       my $reached_table = 0;
2312       my($count,$sender);
2313       while (<$fh>) {
2314         unless ($reached_table) {
2315           last if (/No relayed messages/);
2316           $reached_table = 1 if (/^\s*\d/ || />\d+</);
2317           next unless $reached_table;
2318         }
2319         if (/>(\d+)<.td><td>(.*?) ?<.td><td>(.*?)</) {
2320           update_relayed($1,$2,$3);
2321         }
2322         elsif (/^\s*(\d+)\s+(.*?)\s*$/) {
2323           ($count,$sender) = ($1,$2);
2324         }
2325         elsif (/=>\s+(.*?)\s*$/) {
2326           update_relayed($count,$sender,$1);
2327         }
2328         else {
2329           last;                                 #Finished the table ?
2330         }
2331       }
2332     }
2333
2334     elsif (/Top (.*?) by (message count|volume)/) {
2335 #Top 50 sending hosts by message count
2336 #-------------------------------------
2337 #
2338 #     48     1468KB   local
2339       my($category,$by_count_or_volume) = ($1,$2);
2340
2341       #As we show 2 views of each table (by count and by volume),
2342       #most (but not all) entries will appear in both tables.
2343       #Set up a hash to record which entries we have already seen
2344       #and one to record which ones we are seeing for the first time.
2345       if ($by_count_or_volume =~ /count/) {
2346         undef %league_table_value_entered;
2347         undef %league_table_value_was_zero;
2348         undef %table_order;
2349       }
2350
2351       #As this section processes multiple different table categories,
2352       #set up pointers to the hashes to be updated.
2353       my($count_href,$data_href,$data_gigs_href);
2354       if ($category =~ /local sender/) {
2355         $count_href      = \%received_count_user;
2356         $data_href       = \%received_data_user;
2357         $data_gigs_href  = \%received_data_gigs_user;
2358       }
2359       elsif ($category =~ /sending (\S+?)s?\b/) {
2360         #Top 50 sending (host|domain|email|edomain)s
2361         #Top sending (host|domain|email|edomain)
2362         $count_href      = \%{$received_count{"\u$1"}};
2363         $data_href       = \%{$received_data{"\u$1"}};
2364         $data_gigs_href  = \%{$received_data_gigs{"\u$1"}};
2365       }
2366       elsif ($category =~ /local destination/) {
2367         $count_href      = \%delivered_count_user;
2368         $data_href       = \%delivered_data_user;
2369         $data_gigs_href  = \%delivered_data_gigs_user;
2370       }
2371       elsif ($category =~ /(\S+) destination/) {
2372         #Top 50 (host|domain|email|edomain) destinations
2373         #Top (host|domain|email|edomain) destination
2374         $count_href      = \%{$delivered_count{"\u$1"}};
2375         $data_href       = \%{$delivered_data{"\u$1"}};
2376         $data_gigs_href  = \%{$delivered_data_gigs{"\u$1"}};
2377       }
2378
2379       my $reached_table = 0;
2380       while (<$fh>) {
2381         $_ = html2txt($_);              #Convert general HTML markup to text.
2382         $reached_table = 1 if (/^\s*\d/);
2383         next unless $reached_table;
2384         if (/^\s*(\d+)\s+(\S+)\s*(.*?)\s*$/) {
2385           my($count,$rounded_volume,$entry) = ($1,$2,$3);
2386           #Note: $entry fields can be both null and can contain spaces.
2387
2388           #Add the entry into the %table_order hash if it has a rounded volume (KB/MB/GB).
2389           push(@{$table_order{$rounded_volume}{$by_count_or_volume}},$entry) if ($rounded_volume =~ /\D/);
2390
2391           unless ($league_table_value_entered{$entry}) {
2392             $league_table_value_entered{$entry} = 1;
2393             unless ($$count_href{$entry}) {
2394               $$count_href{$entry}     = 0;
2395               $$data_href{$entry}      = 0;
2396               $$data_gigs_href{$entry} = 0;
2397               $league_table_value_was_zero{$entry} = 1;
2398             }
2399
2400             $$count_href{$entry} += $count;
2401             #Add the rounded value to the data and data_gigs hashes.
2402             un_round($rounded_volume,\$$data_href{$entry},\$$data_gigs_href{$entry});
2403             print STDERR "$category by $by_count_or_volume: added $count,$rounded_volume to $entry\n" if $debug;
2404           }
2405         }
2406         else {          #Finished the table ?
2407           if ($by_count_or_volume =~ /volume/) {
2408             #Add a few bytes to appropriate entries to preserve the order.
2409
2410             my($rounded_volume);
2411             foreach $rounded_volume (keys %table_order) {
2412               #For each rounded volume, we want to create a list which has things
2413               #ordered from the volume table at the front, and additional things
2414               #from the count table ordered at the back.
2415               @{$table_order{$rounded_volume}{volume}} = () unless defined $table_order{$rounded_volume}{volume};
2416               @{$table_order{$rounded_volume}{'message count'}} = () unless defined $table_order{$rounded_volume}{'message count'};
2417               my(@order,%mark);
2418               map {$mark{$_} = 1} @{$table_order{$rounded_volume}{volume}};
2419               @order = @{$table_order{$rounded_volume}{volume}};
2420               map {push(@order,$_)} grep(!$mark{$_},@{$table_order{$rounded_volume}{'message count'}});
2421
2422               my $bonus_bytes = $#order;
2423               $bonus_bytes = 511 if ($bonus_bytes > 511);       #Don't go over the half-K boundary!
2424               while (@order and ($bonus_bytes > 0)) {
2425                 my $entry = shift(@order);
2426                 if ($league_table_value_was_zero{$entry}) {
2427                   $$data_href{$entry} += $bonus_bytes;
2428                   print STDERR "$category by $by_count_or_volume: added $bonus_bytes bonus bytes to $entry\n" if $debug;
2429                 }
2430                 $bonus_bytes--;
2431               }
2432             }
2433           }
2434
2435           last;
2436         }
2437       }
2438     }
2439     elsif (/List of errors/) {
2440 #List of errors
2441 #--------------
2442 #
2443 #    1 07904931641@one2one.net R=external T=smtp: SMTP error
2444 #            from remote mailer after RCPT TO:<07904931641@one2one.net>:
2445 #            host mail.one2one.net [193.133.192.24]: 550 User unknown
2446 #
2447 #<li>1 - ally.dufc@dunbar.org.uk R=external T=smtp: SMTP error from remote mailer after RCPT TO:<ally.dufc@dunbar.org.uk>: host mail.dunbar.org.uk [216.167.89.88]: 550 Unknown local part ally.dufc in <ally.dufc@dunbar.org.uk>
2448
2449
2450       my $reached_table = 0;
2451       my($count,$error,$blanks);
2452       while (<$fh>) {
2453         $reached_table = 1 if (/^( *|<li>)(\d+)/);
2454         next unless $reached_table;
2455
2456         s/^<li>(\d+) -/$1/;     #Convert an HTML line to a text line.
2457         $_ = html2txt($_);      #Convert general HTML markup to text.
2458
2459         if (/\t\s*(.*)/) {
2460           $error .= ' ' . $1;   #Join a multiline error.
2461         }
2462         elsif (/^\s*(\d+)\s+(.*)/) {
2463           if ($error) {
2464             #Finished with a previous multiline error so save it.
2465             $errors_count{$error} = 0 unless $errors_count{$error};
2466             $errors_count{$error} += $count;
2467           }
2468           ($count,$error) = ($1,$2);
2469         }
2470         elsif (/Errors encountered/) {
2471           if ($error) {
2472             #Finished the section, so save our stored last error.
2473             $errors_count{$error} = 0 unless $errors_count{$error};
2474             $errors_count{$error} += $count;
2475           }
2476           last;
2477         }
2478       }
2479     }
2480
2481   }
2482 }
2483
2484
2485
2486 =head2 update_relayed();
2487
2488  update_relayed($count,$sender,$recipient);
2489
2490 Adds an entry into the %relayed hash. Currently only used when
2491 merging reports.
2492
2493 =cut
2494
2495 sub update_relayed {
2496   my($count,$sender,$recipient) = @_;
2497
2498   #When generating the key, put in the 'H=' and 'A=' which can be used
2499   #in searches.
2500   my $key = "H=$sender => H=$recipient";
2501   $key =~ s/ ([^=\s]+\@\S+|<>)/ A=$1/g;
2502   if (!defined $relay_pattern || $key !~ /$relay_pattern/o) {
2503     $relayed{$key} = 0 if !defined $relayed{$key};
2504     $relayed{$key} += $count;
2505   }
2506   else {
2507     $relayed_unshown += $count;
2508   }
2509 }
2510
2511
2512 =head2 add_to_totals();
2513
2514  add_to_totals(\%totals,\@keys,$values);
2515
2516 Given a line of space seperated values, add them into the provided hash using @keys
2517 as the hash keys.
2518
2519 If the value contains a '%', then the value is set rather than added. Otherwise, we
2520 convert the value to bytes and gigs. The gigs get added to I<Key>-gigs.
2521
2522 =cut
2523
2524 sub add_to_totals {
2525   my($totals_href,$keys_aref,$values) = @_;
2526   my(@values) = split(/\s+/,$values);
2527   my(@keys) = @$keys_aref;              #Make a copy as we destroy the one we use.
2528   my($value);
2529   foreach $value (@values) {
2530     my $key = shift(@keys) or next;
2531     if ($value =~ /%/) {
2532       $$totals_href{$key} = $value;
2533     }
2534     else {
2535       $$totals_href{$key} = 0 unless ($$totals_href{$key});
2536       $$totals_href{"$key-gigs"} = 0 unless ($$totals_href{"$key-gigs"});
2537       un_round($value, \$$totals_href{$key}, \$$totals_href{"$key-gigs"});
2538       print STDERR "Added $value to $key - $$totals_href{$key} , " . $$totals_href{"$key-gigs"} . "GB.\n" if $debug;
2539     }
2540   }
2541 }
2542
2543 =head2 get_report_total();
2544
2545  $total = get_report_total(\%hash,$key);
2546
2547 If %hash contains values split into Units and Gigs, we calculate and return
2548
2549   $hash{$key} + 1024*1024*1024 * $hash{"${key}-gigs"}
2550
2551 =cut
2552
2553 sub get_report_total {
2554   no integer;
2555   my($hash_ref,$key) = @_;
2556   if ($$hash_ref{"${key}-gigs"}) {
2557     return $$hash_ref{$key} + $gig * $$hash_ref{"${key}-gigs"};
2558   }
2559   return $$hash_ref{$key} || 0;
2560 }
2561
2562 =head2 html2txt();
2563
2564  $text_line = html2txt($html_line);
2565
2566 Convert a line from html to text. Currently we just convert HTML tags to spaces
2567 and convert &gt;, &lt;, and &nbsp; tags back.
2568
2569 =cut
2570
2571 sub html2txt {
2572   ($_) = @_;
2573
2574   # Convert HTML tags to spacing. Note that the reports may contain <Userid> and
2575   # <Userid@Domain> words, so explicitly specify the HTML tags we will remove
2576   # (the ones used by this program). If someone is careless enough to have their
2577   # Userid the same as an HTML tag, there's not much we can do about it.
2578   s/<\/?(html|head|title|body|h\d|ul|li|a\s+|table|tr|td|th|pre|hr|p|br)\b.*?>/ /og;
2579
2580   s/\&lt\;/\</og;             #Convert '&lt;' to '<'.
2581   s/\&gt\;/\>/og;             #Convert '&gt;' to '>'.
2582   s/\&nbsp\;/ /og;            #Convert '&nbsp;' to ' '.
2583   return($_);
2584 }
2585
2586 =head2 get_next_arg();
2587
2588  $arg = get_next_arg();
2589
2590 Because eximstats arguments are often passed as variables,
2591 we can't rely on shell parsing to deal with quotes. This
2592 subroutine returns $ARGV[1] and does a shift. If $ARGV[1]
2593 starts with a quote (' or "), and doesn't end in one, then
2594 we append the next argument to it and shift again. We repeat
2595 until we've got all of the argument.
2596
2597 This isn't perfect as all white space gets reduced to one space,
2598 but it's as good as we can get! If it's esential that spacing
2599 be preserved precisely, then you get that by not using shell
2600 variables.
2601
2602 =cut
2603
2604 sub get_next_arg {
2605   my $arg = '';
2606   my $matched_pattern = 0;
2607   while ($ARGV[1]) {
2608     $arg .= ' ' if $arg;
2609     $arg .= $ARGV[1]; shift(@ARGV);
2610     if ($arg !~ /^['"]/) {
2611       $matched_pattern = 1;
2612       last;
2613     }
2614     if ($arg =~ s/^(['"])(.*)\1$/$2/) {
2615       $matched_pattern = 1;
2616       last;
2617     }
2618   }
2619   die "Mismatched argument quotes - <$arg>.\n" unless $matched_pattern;
2620   return $arg;
2621 }
2622
2623
2624
2625 ##################################################
2626 #                 Main Program                   #
2627 ##################################################
2628
2629
2630 $last_timestamp = '';
2631 $last_date = '';
2632 $show_errors = 1;
2633 $show_relay = 1;
2634 $show_transport = 1;
2635 $topcount = 50;
2636 $local_league_table = 1;
2637 $include_remote_users = 0;
2638 $hist_opt = 1;
2639 $volume_rounding = 1;
2640 $localtime_offset = calculate_localtime_offset();    # PH/FANF
2641
2642 $charts = 0;
2643 $charts_option_specified = 0;
2644 $chartrel = ".";
2645 $chartdir = ".";
2646
2647 @queue_times = (60, 5*60, 15*60, 30*60, 60*60, 3*60*60, 6*60*60,
2648                 12*60*60, 24*60*60);
2649
2650 $last_offset = '';
2651 $offset_seconds = 0;
2652
2653 # Decode options
2654
2655 while (@ARGV > 0 && substr($ARGV[0], 0, 1) eq '-')
2656   {
2657   if    ($ARGV[0] =~ /^\-h(\d+)$/) { $hist_opt = $1 }
2658   elsif ($ARGV[0] =~ /^\-ne$/)     { $show_errors = 0 }
2659   elsif ($ARGV[0] =~ /^\-nr(.?)(.*)\1$/)
2660     {
2661     if ($1 eq "") { $show_relay = 0 } else { $relay_pattern = $2 }
2662     }
2663   elsif ($ARGV[0] =~ /^\-q([,\d\+\-\*\/]+)$/)
2664     {
2665     @queue_times = split(/,/, $1);
2666     my($q);
2667     foreach $q (@queue_times) { $q = eval($q) + 0 }
2668     @queue_times = sort { $a <=> $b } @queue_times;
2669     @queue_times = () if ($#queue_times == 0 && $queue_times[0] == 0);
2670     }
2671   elsif ($ARGV[0] =~ /^-nt$/)       { $show_transport = 0 }
2672   elsif ($ARGV[0] =~ /^\-nt(.?)(.*)\1$/)
2673     {
2674     if ($1 eq "") { $show_transport = 0 } else { $transport_pattern = $2 }
2675     }
2676   elsif ($ARGV[0] =~ /^-t(\d+)$/)   { $topcount = $1 }
2677   elsif ($ARGV[0] =~ /^-tnl$/)      { $local_league_table = 0 }
2678   elsif ($ARGV[0] =~ /^-html$/)     { $html = 1 }
2679   elsif ($ARGV[0] =~ /^-merge$/)    { $merge_reports = 1 }
2680   elsif ($ARGV[0] =~ /^-charts$/)   {
2681     $charts = 1;
2682     warn "WARNING: CPAN Module GD::Graph::pie not installed. Obtain from www.cpan.org\n" unless $HAVE_GD_Graph_pie;
2683     warn "WARNING: CPAN Module GD::Graph::linespoints not installed. Obtain from www.cpan.org\n" unless $HAVE_GD_Graph_linespoints;
2684   }
2685   elsif ($ARGV[0] =~ /^-chartdir$/) { $chartdir = $ARGV[1]; shift; $charts_option_specified = 1; }
2686   elsif ($ARGV[0] =~ /^-chartrel$/) { $chartrel = $ARGV[1]; shift; $charts_option_specified = 1; }
2687   elsif ($ARGV[0] =~ /^-cache$/)    { } #Not currently used.
2688   elsif ($ARGV[0] =~ /^-byhost$/)   { $do_sender{Host} = 1 }
2689   elsif ($ARGV[0] =~ /^-bydomain$/) { $do_sender{Domain} = 1 }
2690   elsif ($ARGV[0] =~ /^-byemail$/)  { $do_sender{Email} = 1 }
2691   elsif ($ARGV[0] =~ /^-byemaildomain$/)  { $do_sender{Edomain} = 1 }
2692   elsif ($ARGV[0] =~ /^-byedomain$/)  { $do_sender{Edomain} = 1 }
2693   elsif ($ARGV[0] =~ /^-nvr$/)      { $volume_rounding = 0 }
2694   elsif ($ARGV[0] =~ /^-d$/)        { $debug = 1 }
2695   elsif ($ARGV[0] =~ /^--?h(elp)?$/){ help() }
2696   elsif ($ARGV[0] =~ /^-t_remote_users$/) { $include_remote_users = 1 }
2697   elsif ($ARGV[0] =~ /^-pattern$/)
2698     {
2699     push(@user_descriptions,get_next_arg());
2700     push(@user_patterns,get_next_arg());
2701     }
2702   elsif ($ARGV[0] =~ /^-utc$/)
2703     {
2704     # We don't need this value if the log is in UTC.
2705     $localtime_offset = undef;
2706     }
2707   else
2708     {
2709     print STDERR "Eximstats: Unknown or malformed option $ARGV[0]\n";
2710     help();
2711     }
2712   shift;
2713   }
2714
2715   # Check that all the charts options are specified.
2716   warn "-charts option not specified. Use -help for help.\n" if ($charts_option_specified && ! $charts);
2717
2718   # Default to display tables by sending Host.
2719   $do_sender{Host} = 1 unless ($do_sender{Domain} || $do_sender{Email} || $do_sender{Edomain});
2720
2721
2722 for (my $i = 0; $i <= $#queue_times; $i++) {
2723   $queue_bin[$i] = 0;
2724   $remote_queue_bin[$i] = 0;
2725 }
2726
2727 # Compute the number of slots for the histogram
2728
2729 if ($hist_opt > 0)
2730   {
2731   if ($hist_opt > 60 || 60 % $hist_opt != 0)
2732     {
2733     print "Eximstats: -h must specify a factor of 60\n";
2734     exit 1;
2735     }
2736   $hist_interval = 60/$hist_opt;                #Interval in minutes.
2737   $hist_number = (24*60)/$hist_interval;        #Number of intervals per day.
2738   @received_interval_count = (0) x $hist_number;
2739   @delivered_interval_count = (0) x $hist_number;
2740   }
2741
2742 #$queue_unknown = 0;
2743
2744 $total_received_data = 0;
2745 $total_received_data_gigs = 0;
2746 $total_received_count = 0;
2747
2748 $total_delivered_data = 0;
2749 $total_delivered_data_gigs = 0;
2750 $total_delivered_count = 0;
2751
2752 $queue_more_than = 0;
2753 $delayed_count = 0;
2754 $relayed_unshown = 0;
2755 $begin = "9999-99-99 99:99:99";
2756 $end = "0000-00-00 00:00:00";
2757 my($section,$type);
2758 foreach $section ('Received','Delivered') {
2759   foreach $type ('Volume','Messages','Delayed','Failed','Hosts','Domains','Emails','Edomains') {
2760     $report_totals{$section}{$type} = 0;
2761   }
2762 }
2763
2764 # Generate our parser.
2765 my $parser = generate_parser();
2766
2767
2768
2769 if (@ARGV) {
2770   # Scan the input files and collect the data
2771   foreach my $file (@ARGV) {
2772     if ($file =~ /\.gz/) {
2773       unless (open(FILE,"gunzip -c $file |")) {
2774         print STDERR "Failed to gunzip -c $file: $!";
2775         next;
2776       }
2777     }
2778     elsif ($file =~ /\.Z/) {
2779       unless (open(FILE,"uncompress -c $file |")) {
2780         print STDERR "Failed to uncompress -c $file: $!";
2781         next;
2782       }
2783     }
2784     else {
2785       unless (open(FILE,$file)) {
2786         print STDERR "Failed to read $file: $!";
2787         next;
2788       }
2789     }
2790     #Now parse the filehandle, updating the global variables.
2791     parse($parser,\*FILE);
2792     close FILE;
2793   }
2794 }
2795 else {
2796   #No files provided. Parse STDIN, updating the global variables.
2797   parse($parser,\*STDIN);
2798 }
2799
2800
2801 if ($begin eq "9999-99-99 99:99:99") {
2802   print "**** No valid log lines read\n";
2803   exit 1;
2804 }
2805
2806 # Output our results.
2807 print_header();
2808 print_grandtotals();
2809
2810 # Print counts of user specified patterns if required.
2811 print_user_patterns() if @user_patterns;
2812
2813 # Print totals by transport if required.
2814 print_transport() if $show_transport;
2815
2816 # Print the deliveries per interval as a histogram, unless configured not to.
2817 # First find the maximum in one interval and scale accordingly.
2818 if ($hist_opt > 0) {
2819   print_histogram("Messages received", @received_interval_count);
2820   print_histogram("Deliveries", @delivered_interval_count);
2821 }
2822
2823 # Print times on queue if required.
2824 if ($#queue_times >= 0) {
2825   print_queue_times("all messages", \@queue_bin,$queue_more_than);
2826   print_queue_times("messages with at least one remote delivery",\@remote_queue_bin,$queue_more_than);
2827 }
2828
2829 # Print relay information if required.
2830 print_relay() if $show_relay;
2831
2832 # Print the league tables, if topcount isn't zero.
2833 if ($topcount > 0) {
2834   foreach ('Host','Domain','Email','Edomain') {
2835     next unless $do_sender{$_};
2836     print_league_table("sending \l$_", $received_count{$_}, $received_data{$_},$received_data_gigs{$_});
2837   }
2838
2839   print_league_table("local sender", \%received_count_user,
2840     \%received_data_user,\%received_data_gigs_user) if ($local_league_table || $include_remote_users);
2841   foreach ('Host','Domain','Email','Edomain') {
2842     next unless $do_sender{$_};
2843     print_league_table("\l$_ destination", $delivered_count{$_}, $delivered_data{$_},$delivered_data_gigs{$_});
2844   }
2845   print_league_table("local destination", \%delivered_count_user,
2846     \%delivered_data_user,\%delivered_data_gigs_user) if ($local_league_table || $include_remote_users);
2847 }
2848
2849 # Print the error statistics if required.
2850 print_errors() if $show_errors;
2851
2852 if ($html) {
2853   print "</body>\n</html>\n"
2854 }
2855
2856 # End of eximstats