Build: fix parallelism problem. Bug 2566
[exim.git] / src / util / ratelimit.pl
CommitLineData
7546de58 1#!/usr/bin/perl -wT
7546de58
TF
2
3use strict;
4
4d3d955f
PP
5BEGIN { pop @INC if $INC[-1] eq '.' };
6
7546de58
TF
7sub usage () {
8 print <<END;
e83a8cf2 9usage: ratelimit.pl [options] <period> <regex> <logfile>
7546de58
TF
10
11The aim of this script is to compute clients' peak sending rates
12from an Exim log file, using the same formula as Exim's ratelimit
13ACL condition. This is so that you can get an idea of a reasonable
14limit setting before you deploy the restrictions.
15
e83a8cf2 16options:
17
18-d Show debugging information to stderr
19-p Show progress of parse the log to stderr
20
21<period> The smoothing period in seconds, as defined by the
22 documentation for the ratelimit ACL condition.
23
24 This script isn't perfectly accurate, because the time
25 stamps in Exim's log files are only accurate to a second
26 whereas internally Exim computes sender rates to the
27 accuracy of your computer's clock (typically 10ms).
28
4d66637f 29<regex> The second argument is a regular expression.
e83a8cf2 30
31 Each line is matched against the regular expression.
32 Lines that do not match are ignored. The regex may
33 contain 0, 1, or 2 () capturing sub-expressions.
34
35 If there are no () sub-expressions, then every line that
36 matches is used to compute a single rate. Its maximum
37 value is reported when the script finishes.
38
39 If there is one () sub-expression, then the text matched
40 by the sub-expression is used to identify a rate lookup
41 key, similar to the lookup key used by the ratelimit
42 ACL condition. For example, you might write a regex
43 to match the client IP address, or the authenticated
44 username. Separate rates are computed for each different
45 client and the maximum rate for each client is reported
46 when the script finishes.
47
48 If there are two () sub-expressions, then the text matched
49 by the first sub-expression is used to identify a rate
50 lookup key as above, and the second is used to match the
51 message size recorded in the log line, e.g. "S=(\\d+)".
52 In this case the byte rate is computed instead of the
53 message rate, similar to the per_byte option of the
54 ratelimit ACL condition.
55
56<logfile> The log files to be processed can be specified on the
57 command line after the other arguments; if no filenames
58 are specified the script will read from stdin.
59
60examples:
61
62./ratelimit.pl 1 ' <= .*? \[(.*?)\]' <logfile>
63
64 Compute burst sending rate like ACL condition
65 ratelimit = 0 / 1s / strict / \$sender_host_address
66
67./ratelimit.pl 3600 '<= (.*?) ' <logfile>
68
69 Compute sending rate like ACL condition
70 ratelimit = 0 / 1h / strict / \$sender_address
71
7546de58
TF
72END
73 exit 1;
74}
75
76sub iso2unix (@) {
77 my ($y,$m,$d,$H,$M,$S,$zs,$zh,$zm) = @_;
78 use integer;
79 $y -= $m < 3;
80 $m += $m < 3 ? 10 : -2;
81 my $z = defined $zs ? "${zs}1" * ($zh * 60 + $zm) : 0;
82 my $t = $y/400 - $y/100 + $y/4 + $y*365
83 + $m*367/12 + $d - 719499;
84 return $t * 86400
85 + $H * 3600
86 + $M * 60
87 + $S
88 - $z;
89}
90
91my $debug = 0;
92my $progress = 0;
93while (@ARGV && $ARGV[0] =~ /^-\w+$/) {
94 $debug = 1 if $ARGV[0] =~ s/(-\w*)d(\w*)/$1$2/;
95 $progress = 1 if $ARGV[0] =~ s/(-\w*)p(\w*)/$1$2/;
96 shift if $ARGV[0] eq "-";
97}
98
99usage if @ARGV < 2;
100
101my $progtime = "";
102
103my $period = shift;
104
105my $re_txt = shift;
106my $re = qr{$re_txt}o;
107
108my %time;
109my %rate;
110my %max;
111
112sub debug ($) {
113 my $key = shift;
e83a8cf2 114 printf STDERR "%s\t%12d %8s %5.2f %5.2f\n",
7546de58
TF
115 $_, $time{$key}, $key, $max{$key}, $rate{$key};
116}
117
118while (<>) {
119 next unless $_ =~ $re;
120 my $key = $1 || "";
121 my $size = $2 || 1.0;
122 my $time = iso2unix
123 ($_ =~ m{^(\d{4})-(\d\d)-(\d\d)[ ]
124 (\d\d):(\d\d):(\d\d)[ ]
125 (?:([+-])(\d\d)(\d\d)[ ])?
126 }x);
127 if ($progress) {
128 my $prog_now = substr $_, 0, 14;
129 if ($progtime ne $prog_now) {
130 $progtime = $prog_now;
e83a8cf2 131 print STDERR "$progtime\n";
7546de58
TF
132 }
133 }
134 if (not defined $time{$key}) {
135 $time{$key} = $time;
136 $rate{$key} = 0.0;
137 $max{$key} = 0.0;
138 debug $key if $debug;
139 next;
140 }
141 # see acl_ratelimit() for details of the following
142 my $interval = $time - $time{$key};
e5d5a95f 143 $interval = 1e-9 if $interval <= 0.0;
7546de58
TF
144 my $i_over_p = $interval / $period;
145 my $a = exp(-$i_over_p);
7546de58
TF
146 $time{$key} = $time;
147 $rate{$key} = $size * (1.0 - $a) / $i_over_p + $a * $rate{$key};
148 $max{$key} = $rate{$key} if $rate{$key} > $max{$key};
149 debug $key if $debug;
150}
151
152print map {
153 " " x (20 - length) .
154 "$_ : $max{$_}\n"
155} sort {
156 $max{$a} <=> $max{$b}
157} keys %max;
158
159# eof