| 1 | #!/usr/bin/perl -wT |
| 2 | |
| 3 | use strict; |
| 4 | |
| 5 | sub usage () { |
| 6 | print <<END; |
| 7 | usage: ratelimit.pl [options] <period> <regex> <logfile> |
| 8 | |
| 9 | The aim of this script is to compute clients' peak sending rates |
| 10 | from an Exim log file, using the same formula as Exim's ratelimit |
| 11 | ACL condition. This is so that you can get an idea of a reasonable |
| 12 | limit setting before you deploy the restrictions. |
| 13 | |
| 14 | options: |
| 15 | |
| 16 | -d Show debugging information to stderr |
| 17 | -p Show progress of parse the log to stderr |
| 18 | |
| 19 | <period> The smoothing period in seconds, as defined by the |
| 20 | documentation for the ratelimit ACL condition. |
| 21 | |
| 22 | This script isn't perfectly accurate, because the time |
| 23 | stamps in Exim's log files are only accurate to a second |
| 24 | whereas internally Exim computes sender rates to the |
| 25 | accuracy of your computer's clock (typically 10ms). |
| 26 | |
| 27 | <regex> The second argumetnis is a regular expression. |
| 28 | |
| 29 | Each line is matched against the regular expression. |
| 30 | Lines that do not match are ignored. The regex may |
| 31 | contain 0, 1, or 2 () capturing sub-expressions. |
| 32 | |
| 33 | If there are no () sub-expressions, then every line that |
| 34 | matches is used to compute a single rate. Its maximum |
| 35 | value is reported when the script finishes. |
| 36 | |
| 37 | If there is one () sub-expression, then the text matched |
| 38 | by the sub-expression is used to identify a rate lookup |
| 39 | key, similar to the lookup key used by the ratelimit |
| 40 | ACL condition. For example, you might write a regex |
| 41 | to match the client IP address, or the authenticated |
| 42 | username. Separate rates are computed for each different |
| 43 | client and the maximum rate for each client is reported |
| 44 | when the script finishes. |
| 45 | |
| 46 | If there are two () sub-expressions, then the text matched |
| 47 | by the first sub-expression is used to identify a rate |
| 48 | lookup key as above, and the second is used to match the |
| 49 | message size recorded in the log line, e.g. "S=(\\d+)". |
| 50 | In this case the byte rate is computed instead of the |
| 51 | message rate, similar to the per_byte option of the |
| 52 | ratelimit ACL condition. |
| 53 | |
| 54 | <logfile> The log files to be processed can be specified on the |
| 55 | command line after the other arguments; if no filenames |
| 56 | are specified the script will read from stdin. |
| 57 | |
| 58 | examples: |
| 59 | |
| 60 | ./ratelimit.pl 1 ' <= .*? \[(.*?)\]' <logfile> |
| 61 | |
| 62 | Compute burst sending rate like ACL condition |
| 63 | ratelimit = 0 / 1s / strict / \$sender_host_address |
| 64 | |
| 65 | ./ratelimit.pl 3600 '<= (.*?) ' <logfile> |
| 66 | |
| 67 | Compute sending rate like ACL condition |
| 68 | ratelimit = 0 / 1h / strict / \$sender_address |
| 69 | |
| 70 | END |
| 71 | exit 1; |
| 72 | } |
| 73 | |
| 74 | sub iso2unix (@) { |
| 75 | my ($y,$m,$d,$H,$M,$S,$zs,$zh,$zm) = @_; |
| 76 | use integer; |
| 77 | $y -= $m < 3; |
| 78 | $m += $m < 3 ? 10 : -2; |
| 79 | my $z = defined $zs ? "${zs}1" * ($zh * 60 + $zm) : 0; |
| 80 | my $t = $y/400 - $y/100 + $y/4 + $y*365 |
| 81 | + $m*367/12 + $d - 719499; |
| 82 | return $t * 86400 |
| 83 | + $H * 3600 |
| 84 | + $M * 60 |
| 85 | + $S |
| 86 | - $z; |
| 87 | } |
| 88 | |
| 89 | my $debug = 0; |
| 90 | my $progress = 0; |
| 91 | while (@ARGV && $ARGV[0] =~ /^-\w+$/) { |
| 92 | $debug = 1 if $ARGV[0] =~ s/(-\w*)d(\w*)/$1$2/; |
| 93 | $progress = 1 if $ARGV[0] =~ s/(-\w*)p(\w*)/$1$2/; |
| 94 | shift if $ARGV[0] eq "-"; |
| 95 | } |
| 96 | |
| 97 | usage if @ARGV < 2; |
| 98 | |
| 99 | my $progtime = ""; |
| 100 | |
| 101 | my $period = shift; |
| 102 | |
| 103 | my $re_txt = shift; |
| 104 | my $re = qr{$re_txt}o; |
| 105 | |
| 106 | my %time; |
| 107 | my %rate; |
| 108 | my %max; |
| 109 | |
| 110 | sub debug ($) { |
| 111 | my $key = shift; |
| 112 | printf STDERR "%s\t%12d %8s %5.2f %5.2f\n", |
| 113 | $_, $time{$key}, $key, $max{$key}, $rate{$key}; |
| 114 | } |
| 115 | |
| 116 | while (<>) { |
| 117 | next unless $_ =~ $re; |
| 118 | my $key = $1 || ""; |
| 119 | my $size = $2 || 1.0; |
| 120 | my $time = iso2unix |
| 121 | ($_ =~ m{^(\d{4})-(\d\d)-(\d\d)[ ] |
| 122 | (\d\d):(\d\d):(\d\d)[ ] |
| 123 | (?:([+-])(\d\d)(\d\d)[ ])? |
| 124 | }x); |
| 125 | if ($progress) { |
| 126 | my $prog_now = substr $_, 0, 14; |
| 127 | if ($progtime ne $prog_now) { |
| 128 | $progtime = $prog_now; |
| 129 | print STDERR "$progtime\n"; |
| 130 | } |
| 131 | } |
| 132 | if (not defined $time{$key}) { |
| 133 | $time{$key} = $time; |
| 134 | $rate{$key} = 0.0; |
| 135 | $max{$key} = 0.0; |
| 136 | debug $key if $debug; |
| 137 | next; |
| 138 | } |
| 139 | # see acl_ratelimit() for details of the following |
| 140 | my $interval = $time - $time{$key}; |
| 141 | $interval = 1e-9 if $interval <= 0.0; |
| 142 | my $i_over_p = $interval / $period; |
| 143 | my $a = exp(-$i_over_p); |
| 144 | $time{$key} = $time; |
| 145 | $rate{$key} = $size * (1.0 - $a) / $i_over_p + $a * $rate{$key}; |
| 146 | $max{$key} = $rate{$key} if $rate{$key} > $max{$key}; |
| 147 | debug $key if $debug; |
| 148 | } |
| 149 | |
| 150 | print map { |
| 151 | " " x (20 - length) . |
| 152 | "$_ : $max{$_}\n" |
| 153 | } sort { |
| 154 | $max{$a} <=> $max{$b} |
| 155 | } keys %max; |
| 156 | |
| 157 | # eof |