| 1 | #!/usr/bin/perl -wT |
| 2 | # |
| 3 | # $Cambridge: exim/src/util/ratelimit.pl,v 1.2 2005/09/13 18:06:31 fanf2 Exp $ |
| 4 | |
| 5 | use strict; |
| 6 | |
| 7 | sub usage () { |
| 8 | print <<END; |
| 9 | usage: ratelimit.pl <period> <regex> logfile |
| 10 | |
| 11 | The aim of this script is to compute clients' peak sending rates |
| 12 | from an Exim log file, using the same formula as Exim's ratelimit |
| 13 | ACL condition. This is so that you can get an idea of a reasonable |
| 14 | limit setting before you deploy the restrictions. |
| 15 | |
| 16 | This script isn't perfectly accurate, because the time stamps in |
| 17 | Exim's log files are only accurate to a seond whereas internally Exim |
| 18 | computes sender rates to the accuracy of your computer's clock |
| 19 | (typically 10ms). |
| 20 | |
| 21 | The log files to be processed can be specified on the command line |
| 22 | after the other arguments; if no filenames are specified the script |
| 23 | will read from stdin. |
| 24 | |
| 25 | The first command line argument is the smoothing period, as defined by |
| 26 | the documentation for the ratelimit ACL condition. The second argumetn |
| 27 | is a regular expression. |
| 28 | |
| 29 | Each line is matched against the regular expression. Lines that do not |
| 30 | match are ignored. The regex may contain 0, 1, or 2 () capturing |
| 31 | sub-expressions. |
| 32 | |
| 33 | If there are no () sub-expressions, then every line that matches is |
| 34 | used to compute a single rate. Its maximum value is reported when the |
| 35 | script finishes. |
| 36 | |
| 37 | If there is one () sub-expression, then the text matched by the |
| 38 | sub-expression is used to identify a rate lookup key, similar to the |
| 39 | lookup key used by the ratelimit ACL condition. For example, you might |
| 40 | write a regex to match the client IP address, or the authenticated |
| 41 | username. Separate rates are computed for each different client and |
| 42 | the maximum rate for each client is reported when the script finishes. |
| 43 | |
| 44 | If there are two () sub-expressions, then the text matched by the |
| 45 | first sub-expression is used to identify a rate lookup key as above, |
| 46 | and the second is used to match the message size recorded in the log |
| 47 | line, e.g. " S=(\\d+) ". In this case the byte rate is computed instead |
| 48 | of the message rate, similar to the per_byte option of the ratelimit |
| 49 | ACL condition. |
| 50 | END |
| 51 | exit 1; |
| 52 | } |
| 53 | |
| 54 | sub iso2unix (@) { |
| 55 | my ($y,$m,$d,$H,$M,$S,$zs,$zh,$zm) = @_; |
| 56 | use integer; |
| 57 | $y -= $m < 3; |
| 58 | $m += $m < 3 ? 10 : -2; |
| 59 | my $z = defined $zs ? "${zs}1" * ($zh * 60 + $zm) : 0; |
| 60 | my $t = $y/400 - $y/100 + $y/4 + $y*365 |
| 61 | + $m*367/12 + $d - 719499; |
| 62 | return $t * 86400 |
| 63 | + $H * 3600 |
| 64 | + $M * 60 |
| 65 | + $S |
| 66 | - $z; |
| 67 | } |
| 68 | |
| 69 | my $debug = 0; |
| 70 | my $progress = 0; |
| 71 | while (@ARGV && $ARGV[0] =~ /^-\w+$/) { |
| 72 | $debug = 1 if $ARGV[0] =~ s/(-\w*)d(\w*)/$1$2/; |
| 73 | $progress = 1 if $ARGV[0] =~ s/(-\w*)p(\w*)/$1$2/; |
| 74 | shift if $ARGV[0] eq "-"; |
| 75 | } |
| 76 | |
| 77 | usage if @ARGV < 2; |
| 78 | |
| 79 | my $progtime = ""; |
| 80 | |
| 81 | my $period = shift; |
| 82 | |
| 83 | my $re_txt = shift; |
| 84 | my $re = qr{$re_txt}o; |
| 85 | |
| 86 | my %time; |
| 87 | my %rate; |
| 88 | my %max; |
| 89 | |
| 90 | sub debug ($) { |
| 91 | my $key = shift; |
| 92 | printf "%s\t%12d %8s %5.2f %5.2f\n", |
| 93 | $_, $time{$key}, $key, $max{$key}, $rate{$key}; |
| 94 | } |
| 95 | |
| 96 | while (<>) { |
| 97 | next unless $_ =~ $re; |
| 98 | my $key = $1 || ""; |
| 99 | my $size = $2 || 1.0; |
| 100 | my $time = iso2unix |
| 101 | ($_ =~ m{^(\d{4})-(\d\d)-(\d\d)[ ] |
| 102 | (\d\d):(\d\d):(\d\d)[ ] |
| 103 | (?:([+-])(\d\d)(\d\d)[ ])? |
| 104 | }x); |
| 105 | if ($progress) { |
| 106 | my $prog_now = substr $_, 0, 14; |
| 107 | if ($progtime ne $prog_now) { |
| 108 | $progtime = $prog_now; |
| 109 | print "$progtime\n"; |
| 110 | } |
| 111 | } |
| 112 | if (not defined $time{$key}) { |
| 113 | $time{$key} = $time; |
| 114 | $rate{$key} = 0.0; |
| 115 | $max{$key} = 0.0; |
| 116 | debug $key if $debug; |
| 117 | next; |
| 118 | } |
| 119 | # see acl_ratelimit() for details of the following |
| 120 | my $interval = $time - $time{$key}; |
| 121 | $interval = 1e-9 if $interval <= 0.0; |
| 122 | my $i_over_p = $interval / $period; |
| 123 | my $a = exp(-$i_over_p); |
| 124 | $time{$key} = $time; |
| 125 | $rate{$key} = $size * (1.0 - $a) / $i_over_p + $a * $rate{$key}; |
| 126 | $max{$key} = $rate{$key} if $rate{$key} > $max{$key}; |
| 127 | debug $key if $debug; |
| 128 | } |
| 129 | |
| 130 | print map { |
| 131 | " " x (20 - length) . |
| 132 | "$_ : $max{$_}\n" |
| 133 | } sort { |
| 134 | $max{$a} <=> $max{$b} |
| 135 | } keys %max; |
| 136 | |
| 137 | # eof |