| 1 | #!/usr/bin/perl -wT |
| 2 | |
| 3 | use strict; |
| 4 | |
| 5 | BEGIN { pop @INC if $INC[-1] eq '.' }; |
| 6 | |
| 7 | sub usage () { |
| 8 | print <<END; |
| 9 | usage: ratelimit.pl [options] <period> <regex> <logfile> |
| 10 | |
| 11 | The aim of this script is to compute clients' peak sending rates |
| 12 | from an Exim log file, using the same formula as Exim's ratelimit |
| 13 | ACL condition. This is so that you can get an idea of a reasonable |
| 14 | limit setting before you deploy the restrictions. |
| 15 | |
| 16 | options: |
| 17 | |
| 18 | -d Show debugging information to stderr |
| 19 | -p Show progress of parse the log to stderr |
| 20 | |
| 21 | <period> The smoothing period in seconds, as defined by the |
| 22 | documentation for the ratelimit ACL condition. |
| 23 | |
| 24 | This script isn't perfectly accurate, because the time |
| 25 | stamps in Exim's log files are only accurate to a second |
| 26 | whereas internally Exim computes sender rates to the |
| 27 | accuracy of your computer's clock (typically 10ms). |
| 28 | |
| 29 | <regex> The second argument is a regular expression. |
| 30 | |
| 31 | Each line is matched against the regular expression. |
| 32 | Lines that do not match are ignored. The regex may |
| 33 | contain 0, 1, or 2 () capturing sub-expressions. |
| 34 | |
| 35 | If there are no () sub-expressions, then every line that |
| 36 | matches is used to compute a single rate. Its maximum |
| 37 | value is reported when the script finishes. |
| 38 | |
| 39 | If there is one () sub-expression, then the text matched |
| 40 | by the sub-expression is used to identify a rate lookup |
| 41 | key, similar to the lookup key used by the ratelimit |
| 42 | ACL condition. For example, you might write a regex |
| 43 | to match the client IP address, or the authenticated |
| 44 | username. Separate rates are computed for each different |
| 45 | client and the maximum rate for each client is reported |
| 46 | when the script finishes. |
| 47 | |
| 48 | If there are two () sub-expressions, then the text matched |
| 49 | by the first sub-expression is used to identify a rate |
| 50 | lookup key as above, and the second is used to match the |
| 51 | message size recorded in the log line, e.g. "S=(\\d+)". |
| 52 | In this case the byte rate is computed instead of the |
| 53 | message rate, similar to the per_byte option of the |
| 54 | ratelimit ACL condition. |
| 55 | |
| 56 | <logfile> The log files to be processed can be specified on the |
| 57 | command line after the other arguments; if no filenames |
| 58 | are specified the script will read from stdin. |
| 59 | |
| 60 | examples: |
| 61 | |
| 62 | ./ratelimit.pl 1 ' <= .*? \[(.*?)\]' <logfile> |
| 63 | |
| 64 | Compute burst sending rate like ACL condition |
| 65 | ratelimit = 0 / 1s / strict / \$sender_host_address |
| 66 | |
| 67 | ./ratelimit.pl 3600 '<= (.*?) ' <logfile> |
| 68 | |
| 69 | Compute sending rate like ACL condition |
| 70 | ratelimit = 0 / 1h / strict / \$sender_address |
| 71 | |
| 72 | END |
| 73 | exit 1; |
| 74 | } |
| 75 | |
| 76 | sub iso2unix (@) { |
| 77 | my ($y,$m,$d,$H,$M,$S,$zs,$zh,$zm) = @_; |
| 78 | use integer; |
| 79 | $y -= $m < 3; |
| 80 | $m += $m < 3 ? 10 : -2; |
| 81 | my $z = defined $zs ? "${zs}1" * ($zh * 60 + $zm) : 0; |
| 82 | my $t = $y/400 - $y/100 + $y/4 + $y*365 |
| 83 | + $m*367/12 + $d - 719499; |
| 84 | return $t * 86400 |
| 85 | + $H * 3600 |
| 86 | + $M * 60 |
| 87 | + $S |
| 88 | - $z; |
| 89 | } |
| 90 | |
| 91 | my $debug = 0; |
| 92 | my $progress = 0; |
| 93 | while (@ARGV && $ARGV[0] =~ /^-\w+$/) { |
| 94 | $debug = 1 if $ARGV[0] =~ s/(-\w*)d(\w*)/$1$2/; |
| 95 | $progress = 1 if $ARGV[0] =~ s/(-\w*)p(\w*)/$1$2/; |
| 96 | shift if $ARGV[0] eq "-"; |
| 97 | } |
| 98 | |
| 99 | usage if @ARGV < 2; |
| 100 | |
| 101 | my $progtime = ""; |
| 102 | |
| 103 | my $period = shift; |
| 104 | |
| 105 | my $re_txt = shift; |
| 106 | my $re = qr{$re_txt}o; |
| 107 | |
| 108 | my %time; |
| 109 | my %rate; |
| 110 | my %max; |
| 111 | |
| 112 | sub debug ($) { |
| 113 | my $key = shift; |
| 114 | printf STDERR "%s\t%12d %8s %5.2f %5.2f\n", |
| 115 | $_, $time{$key}, $key, $max{$key}, $rate{$key}; |
| 116 | } |
| 117 | |
| 118 | while (<>) { |
| 119 | next unless $_ =~ $re; |
| 120 | my $key = $1 || ""; |
| 121 | my $size = $2 || 1.0; |
| 122 | my $time = iso2unix |
| 123 | ($_ =~ m{^(\d{4})-(\d\d)-(\d\d)[ ] |
| 124 | (\d\d):(\d\d):(\d\d)[ ] |
| 125 | (?:([+-])(\d\d)(\d\d)[ ])? |
| 126 | }x); |
| 127 | if ($progress) { |
| 128 | my $prog_now = substr $_, 0, 14; |
| 129 | if ($progtime ne $prog_now) { |
| 130 | $progtime = $prog_now; |
| 131 | print STDERR "$progtime\n"; |
| 132 | } |
| 133 | } |
| 134 | if (not defined $time{$key}) { |
| 135 | $time{$key} = $time; |
| 136 | $rate{$key} = 0.0; |
| 137 | $max{$key} = 0.0; |
| 138 | debug $key if $debug; |
| 139 | next; |
| 140 | } |
| 141 | # see acl_ratelimit() for details of the following |
| 142 | my $interval = $time - $time{$key}; |
| 143 | $interval = 1e-9 if $interval <= 0.0; |
| 144 | my $i_over_p = $interval / $period; |
| 145 | my $a = exp(-$i_over_p); |
| 146 | $time{$key} = $time; |
| 147 | $rate{$key} = $size * (1.0 - $a) / $i_over_p + $a * $rate{$key}; |
| 148 | $max{$key} = $rate{$key} if $rate{$key} > $max{$key}; |
| 149 | debug $key if $debug; |
| 150 | } |
| 151 | |
| 152 | print map { |
| 153 | " " x (20 - length) . |
| 154 | "$_ : $max{$_}\n" |
| 155 | } sort { |
| 156 | $max{$a} <=> $max{$b} |
| 157 | } keys %max; |
| 158 | |
| 159 | # eof |