Update version number and copyright year.
[exim.git] / src / src / retry.c
CommitLineData
184e8823 1/* $Cambridge: exim/src/src/retry.c,v 1.12 2007/01/08 10:50:18 ph10 Exp $ */
059ec3d9
PH
2
3/*************************************************
4* Exim - an Internet mail transport agent *
5*************************************************/
6
184e8823 7/* Copyright (c) University of Cambridge 1995 - 2007 */
059ec3d9
PH
8/* See the file NOTICE for conditions of use and distribution. */
9
10/* Functions concerned with retrying unsuccessful deliveries. */
11
12
13#include "exim.h"
14
15
16
17/*************************************************
18* Check the ultimate address timeout *
19*************************************************/
20
21/* This function tests whether a message has been on the queue longer than
22the maximum retry time for a particular host.
23
24Arguments:
25 host_key the key to look up a host retry rule
26 domain the domain to look up a domain retry rule
27 basic_errno a specific error number, or zero if none
28 more_errno additional data for the error
29 now the time
30
31Returns: TRUE if the ultimate timeout has been reached
32*/
33
34static BOOL
35ultimate_address_timeout(uschar *host_key, uschar *domain, int basic_errno,
36 int more_errno, time_t now)
37{
38BOOL address_timeout = TRUE; /* no rule => timed out */
39
40retry_config *retry =
41 retry_find_config(host_key+2, domain, basic_errno, more_errno);
42
43if (retry != NULL && retry->rules != NULL)
44 {
45 retry_rule *last_rule;
46 for (last_rule = retry->rules;
47 last_rule->next != NULL;
48 last_rule = last_rule->next);
ea49d0e1 49 DEBUG(D_transport|D_retry)
c816d124
PH
50 debug_printf(" received_time=%d diff=%d timeout=%d\n",
51 received_time, (int)(now - received_time), last_rule->timeout);
059ec3d9
PH
52 address_timeout = (now - received_time > last_rule->timeout);
53 }
ea49d0e1
PH
54else
55 {
56 DEBUG(D_transport|D_retry)
57 debug_printf("no retry rule found: assume timed out\n");
58 }
059ec3d9
PH
59
60return address_timeout;
61}
62
63
64
65/*************************************************
66* Set status of a host+address item *
67*************************************************/
68
69/* This function is passed a host_item which contains a host name and an
70IP address string. Its job is to set the status of the address if it is not
71already set (indicated by hstatus_unknown). The possible values are:
72
73 hstatus_usable the address is not listed in the unusable tree, and does
74 not have a retry record, OR the time is past the next
75 try time, OR the message has been on the queue for more
76 than the maximum retry time for a failing host
77
78 hstatus_unusable the address is listed in the unusable tree, or does have
79 a retry record, and the time is not yet at the next retry
80 time.
81
82 hstatus_unusable_expired as above, but also the retry time has expired
83 for this address.
84
85The reason a delivery is permitted when a message has been around for a very
86long time is to allow the ultimate address timeout to operate after a delivery
87failure. Otherwise some messages may stick around without being tried for too
88long.
89
90If a host retry record is retrieved from the hints database, the time of last
91trying is filled into the last_try field of the host block. If a host is
92generally usable, a check is made to see if there is a retry delay on this
93specific message at this host.
94
95If a non-standard port is being used, it is added to the retry key.
96
97Arguments:
98 domain the address domain
99 host pointer to a host item
100 portstring "" for standard port, ":xxxx" for a non-standard port
101 include_ip_address TRUE to include the address in the key - this is
102 usual, but sometimes is not wanted
103 retry_host_key where to put a pointer to the key for the host-specific
104 retry record, if one is read and the host is usable
105 retry_message_key where to put a pointer to the key for the message+host
106 retry record, if one is read and the host is usable
107
108Returns: TRUE if the host has expired but is usable because
109 its retry time has come
110*/
111
112BOOL
113retry_check_address(uschar *domain, host_item *host, uschar *portstring,
114 BOOL include_ip_address, uschar **retry_host_key, uschar **retry_message_key)
115{
116BOOL yield = FALSE;
117time_t now = time(NULL);
118uschar *host_key, *message_key;
119open_db dbblock;
120open_db *dbm_file;
121tree_node *node;
122dbdata_retry *host_retry_record, *message_retry_record;
123
124*retry_host_key = *retry_message_key = NULL;
125
126DEBUG(D_transport|D_retry) debug_printf("checking status of %s\n", host->name);
127
128/* Do nothing if status already set; otherwise initialize status as usable. */
129
130if (host->status != hstatus_unknown) return FALSE;
131host->status = hstatus_usable;
132
133/* Generate the host key for the unusable tree and the retry database. Ensure
134host names are lower cased (that's what %S does). */
135
136host_key = include_ip_address?
137 string_sprintf("T:%S:%s%s", host->name, host->address, portstring) :
138 string_sprintf("T:%S%s", host->name, portstring);
139
140/* Generate the message-specific key */
141
142message_key = string_sprintf("%s:%s", host_key, message_id);
143
144/* Search the tree of unusable IP addresses. This is filled in when deliveries
145fail, because the retry database itself is not updated until the end of all
146deliveries (so as to do it all in one go). The tree records addresses that have
147become unusable during this delivery process (i.e. those that will get put into
148the retry database when it is updated). */
149
150node = tree_search(tree_unusable, host_key);
151if (node != NULL)
152 {
153 DEBUG(D_transport|D_retry) debug_printf("found in tree of unusables\n");
154 host->status = (node->data.val > 255)?
155 hstatus_unusable_expired : hstatus_unusable;
156 host->why = node->data.val & 255;
157 return FALSE;
158 }
159
160/* Open the retry database, giving up if there isn't one. Otherwise, search for
161the retry records, and then close the database again. */
162
163if ((dbm_file = dbfn_open(US"retry", O_RDONLY, &dbblock, FALSE)) == NULL)
164 {
165 DEBUG(D_deliver|D_retry|D_hints_lookup)
166 debug_printf("no retry data available\n");
167 return FALSE;
168 }
169host_retry_record = dbfn_read(dbm_file, host_key);
170message_retry_record = dbfn_read(dbm_file, message_key);
171dbfn_close(dbm_file);
172
173/* Ignore the data if it is too old - too long since it was written */
174
175if (host_retry_record == NULL)
176 {
177 DEBUG(D_transport|D_retry) debug_printf("no host retry record\n");
178 }
179else if (now - host_retry_record->time_stamp > retry_data_expire)
180 {
181 host_retry_record = NULL;
182 DEBUG(D_transport|D_retry) debug_printf("host retry record too old\n");
183 }
184
185if (message_retry_record == NULL)
186 {
187 DEBUG(D_transport|D_retry) debug_printf("no message retry record\n");
188 }
189else if (now - message_retry_record->time_stamp > retry_data_expire)
190 {
191 message_retry_record = NULL;
192 DEBUG(D_transport|D_retry) debug_printf("message retry record too old\n");
193 }
194
195/* If there's a host-specific retry record, check for reaching the retry
196time (or forcing). If not, and the host is not expired, check for the message
197having been around for longer than the maximum retry time for this host or
198address. Allow the delivery if it has. Otherwise set the appropriate unusable
199flag and return FALSE. Otherwise arrange to return TRUE if this is an expired
200host. */
201
202if (host_retry_record != NULL)
203 {
204 *retry_host_key = host_key;
205
206 /* We have not reached the next try time. Check for the ultimate address
207 timeout if the host has not expired. */
208
209 if (now < host_retry_record->next_try && !deliver_force)
210 {
211 DEBUG(D_transport|D_retry)
c816d124 212 {
059ec3d9
PH
213 debug_printf("host retry time not reached: checking ultimate address "
214 "timeout\n");
c816d124
PH
215 debug_printf(" now=%d first_failed=%d next_try=%d expired=%d\n",
216 (int)now, (int)host_retry_record->first_failed,
217 (int)host_retry_record->next_try,
218 host_retry_record->expired);
219 }
059ec3d9
PH
220
221 if (!host_retry_record->expired &&
222 ultimate_address_timeout(host_key, domain,
223 host_retry_record->basic_errno, host_retry_record->more_errno, now))
224 {
225 DEBUG(D_transport|D_retry)
226 debug_printf("on queue longer than maximum retry for "
227 "address - allowing delivery\n");
228 return FALSE;
229 }
230
231 /* We have not hit the ultimate address timeout; host is unusable. */
232
233 host->status = (host_retry_record->expired)?
234 hstatus_unusable_expired : hstatus_unusable;
235 host->why = hwhy_retry;
236 host->last_try = host_retry_record->last_try;
237 return FALSE;
238 }
239
240 /* Host is usable; set return TRUE if expired. */
241
242 yield = host_retry_record->expired;
243 }
244
245/* It's OK to try the host. If there's a message-specific retry record, check
246for reaching its retry time (or forcing). If not, mark the host unusable,
247unless the ultimate address timeout has been reached. */
248
249if (message_retry_record != NULL)
250 {
251 *retry_message_key = message_key;
252 if (now < message_retry_record->next_try && !deliver_force)
253 {
254 DEBUG(D_transport|D_retry)
c816d124 255 {
059ec3d9
PH
256 debug_printf("host+message retry time not reached: checking ultimate "
257 "address timeout\n");
c816d124
PH
258 debug_printf(" now=%d first_failed=%d next_try=%d expired=%d\n",
259 (int)now, (int)message_retry_record->first_failed,
260 (int)message_retry_record->next_try, message_retry_record->expired);
261 }
059ec3d9
PH
262 if (!ultimate_address_timeout(host_key, domain, 0, 0, now))
263 {
264 host->status = hstatus_unusable;
265 host->why = hwhy_retry;
266 }
267 else
268 {
269 DEBUG(D_transport|D_retry)
270 debug_printf("on queue longer than maximum retry for "
271 "address - allowing delivery\n");
272 }
273 return FALSE;
274 }
275 }
276
277return yield;
278}
279
280
281
282
283/*************************************************
284* Add a retry item to an address *
285*************************************************/
286
287/* Retry items are chained onto an address when it is deferred either by router
288or by a transport, or if it succeeds or fails and there was a previous retry
289item that now needs to be deleted. Sometimes there can be both kinds of item:
290for example, if routing was deferred but then succeeded, and delivery then
291deferred. In that case there is a delete item for the routing retry, and an
292updating item for the delivery.
293
294(But note that that is only visible at the outer level, because in remote
295delivery subprocesses, the address starts "clean", with no retry items carried
296in.)
297
298These items are used at the end of a delivery attempt to update the retry
299database. The keys start R: for routing delays and T: for transport delays.
300
301Arguments:
302 addr the address block onto which to hang the item
303 key the retry key
304 flags delete, host, and message flags, copied into the block
305
306Returns: nothing
307*/
308
309void
310retry_add_item(address_item *addr, uschar *key, int flags)
311{
312retry_item *rti = store_get(sizeof(retry_item));
313rti->next = addr->retries;
314addr->retries = rti;
315rti->key = key;
316rti->basic_errno = addr->basic_errno;
317rti->more_errno = addr->more_errno;
318rti->message = addr->message;
319rti->flags = flags;
320
321DEBUG(D_transport|D_retry)
322 {
323 int letter = rti->more_errno & 255;
324 debug_printf("added retry item for %s: errno=%d more_errno=", rti->key,
325 rti->basic_errno);
326 if (letter == 'A' || letter == 'M')
327 debug_printf("%d,%c", (rti->more_errno >> 8) & 255, letter);
328 else
329 debug_printf("%d", rti->more_errno);
330 debug_printf(" flags=%d\n", flags);
331 }
332}
333
334
335
336/*************************************************
337* Find retry configuration data *
338*************************************************/
339
340/* Search the in-store retry information for the first retry item that applies
341to a given destination. If the key contains an @ we are probably handling a
342local delivery and have a complete address to search for; this happens when
343retry_use_local_part is set on a router. Otherwise, the key is likely to be a
344host name for a remote delivery, or a domain name for a local delivery. We
345prepend *@ on the front of it so that it will match a retry item whose address
346item pattern is independent of the local part. The alternate key, if set, is
347always just a domain, so we treat it likewise.
348
349Arguments:
350 key key for which retry info is wanted
351 alternate alternative key, always just a domain
352 basic_errno specific error predicate on the retry rule, or zero
353 more_errno additional data for errno predicate
354
355Returns: pointer to retry rule, or NULL
356*/
357
358retry_config *
359retry_find_config(uschar *key, uschar *alternate, int basic_errno,
360 int more_errno)
361{
ea49d0e1 362int replace = 0;
059ec3d9
PH
363uschar *use_key, *use_alternate;
364uschar *colon = Ustrchr(key, ':');
365retry_config *yield;
366
ea49d0e1
PH
367/* If there's a colon in the key, there are two possibilities:
368
369(1) This is a key for a host, ip address, and possibly port, in the format
370
371 hostname:ip+port
372
373 In this case, we temporarily replace the colon with a zero, to terminate
374 the string after the host name.
375
376(2) This is a key for a pipe, file, or autoreply delivery, in the format
377
378 pipe-or-file-or-auto:x@y
379
380 where x@y is the original address that provoked the delivery. The pipe or
381 file or auto will start with | or / or >, whereas a host name will start
382 with a letter or a digit. In this case we want to use the original address
383 to search for a retry rule. */
059ec3d9
PH
384
385if (colon != NULL)
386 {
ea49d0e1
PH
387 if (isalnum(*key))
388 replace = ':';
389 else
390 key = Ustrrchr(key, ':') + 1; /* Take from the last colon */
059ec3d9 391 }
ea49d0e1
PH
392
393if (replace == 0) colon = key + Ustrlen(key);
059ec3d9
PH
394*colon = 0;
395
396/* Sort out the keys */
397
398use_key = (Ustrchr(key, '@') != NULL)? key : string_sprintf("*@%s", key);
399use_alternate = (alternate == NULL)? NULL : string_sprintf("*@%s", alternate);
400
401/* Scan the configured retry items. */
402
403for (yield = retries; yield != NULL; yield = yield->next)
404 {
405 uschar *plist = yield->pattern;
406 uschar *slist = yield->senders;
407
408 /* If a specific error is set for this item, check that we are handling that
409 specific error, and if so, check any additional error information if
410 required. */
411
412 if (yield->basic_errno != 0)
413 {
414 /* Special code is required for quota errors, as these can either be system
415 quota errors, or Exim's own quota imposition, which has a different error
416 number. Full partitions are also treated in the same way as quota errors.
417 */
418
419 if (yield->basic_errno == ERRNO_EXIMQUOTA)
420 {
421 if ((basic_errno != ERRNO_EXIMQUOTA && basic_errno != errno_quota &&
422 basic_errno != ENOSPC) ||
423 (yield->more_errno != 0 && yield->more_errno > more_errno))
424 continue;
425 }
426
e97957bc
PH
427 /* The TLSREQUIRED error also covers TLSFAILURE. These are subtly different
428 errors, but not worth separating at this level. */
429
430 else if (yield->basic_errno == ERRNO_TLSREQUIRED)
431 {
432 if (basic_errno != ERRNO_TLSREQUIRED && basic_errno != ERRNO_TLSFAILURE)
433 continue;
434 }
435
436 /* Handle 4xx responses to MAIL, RCPT, or DATA. The code that was received
437 is in the 2nd least significant byte of more_errno (with 400 subtracted).
438 The required value is coded in the 2nd least significant byte of the
439 yield->more_errno field as follows:
059ec3d9
PH
440
441 255 => any 4xx code
442 >= 100 => the decade must match the value less 100
443 < 100 => the exact value must match
444 */
445
e97957bc
PH
446 else if (yield->basic_errno == ERRNO_MAIL4XX ||
447 yield->basic_errno == ERRNO_RCPT4XX ||
448 yield->basic_errno == ERRNO_DATA4XX)
059ec3d9
PH
449 {
450 int wanted;
e97957bc 451 if (basic_errno != yield->basic_errno) continue;
059ec3d9
PH
452 wanted = (yield->more_errno >> 8) & 255;
453 if (wanted != 255)
454 {
455 int evalue = (more_errno >> 8) & 255;
456 if (wanted >= 100)
457 {
458 if ((evalue/10)*10 != wanted - 100) continue;
459 }
460 else if (evalue != wanted) continue;
461 }
462 }
463
464 /* There are some special cases for timeouts */
465
466 else if (yield->basic_errno == ETIMEDOUT)
467 {
468 if (basic_errno != ETIMEDOUT) continue;
469
470 /* Just RTEF_CTOUT in the rule => don't care about 'A'/'M' addresses */
471 if (yield->more_errno == RTEF_CTOUT)
472 {
473 if ((more_errno & RTEF_CTOUT) == 0) continue;
474 }
475
476 else if (yield->more_errno != 0)
477 {
478 int cf_errno = more_errno;
479 if ((yield->more_errno & RTEF_CTOUT) == 0) cf_errno &= ~RTEF_CTOUT;
480 if (yield->more_errno != cf_errno) continue;
481 }
482 }
483
484 /* Default checks for exact match */
485
486 else
487 {
488 if (yield->basic_errno != basic_errno ||
489 (yield->more_errno != 0 && yield->more_errno != more_errno))
490 continue;
491 }
492 }
493
494 /* If the "senders" condition is set, check it. Note that sender_address may
495 be null during -brt checking, in which case we do not use this rule. */
496
497 if (slist != NULL && (sender_address == NULL ||
498 match_address_list(sender_address, TRUE, TRUE, &slist, NULL, -1, 0,
499 NULL) != OK))
500 continue;
501
502 /* Check for a match between the address list item at the start of this retry
503 rule and either the main or alternate keys. */
504
505 if (match_address_list(use_key, TRUE, TRUE, &plist, NULL, -1, UCHAR_MAX+1,
506 NULL) == OK ||
507 (use_alternate != NULL &&
508 match_address_list(use_alternate, TRUE, TRUE, &plist, NULL, -1,
509 UCHAR_MAX+1, NULL) == OK))
510 break;
511 }
512
513*colon = replace;
514return yield;
515}
516
517
518
519
520/*************************************************
521* Update retry database *
522*************************************************/
523
524/* Update the retry data for any directing/routing/transporting that was
525deferred, or delete it for those that succeeded after a previous defer. This is
526done all in one go to minimize opening/closing/locking of the database file.
527
528Note that, because SMTP delivery involves a list of destinations to try, there
529may be defer-type retry information for some of them even when the message was
530successfully delivered. Likewise if it eventually failed.
531
532This function may move addresses from the defer to the failed queue if the
533ultimate retry time has expired.
534
535Arguments:
536 addr_defer queue of deferred addresses
537 addr_failed queue of failed addresses
538 addr_succeed queue of successful addresses
539
540Returns: nothing
541*/
542
543void
544retry_update(address_item **addr_defer, address_item **addr_failed,
545 address_item **addr_succeed)
546{
547open_db dbblock;
548open_db *dbm_file = NULL;
549time_t now = time(NULL);
550int i;
551
552DEBUG(D_retry) debug_printf("Processing retry items\n");
553
554/* Three-times loop to handle succeeded, failed, and deferred addresses.
555Deferred addresses must be handled after failed ones, because some may be moved
556to the failed chain if they have timed out. */
557
558for (i = 0; i < 3; i++)
559 {
560 address_item *endaddr, *addr;
561 address_item *last_first = NULL;
562 address_item **paddr = (i==0)? addr_succeed :
563 (i==1)? addr_failed : addr_defer;
564 address_item **saved_paddr = NULL;
565
566 DEBUG(D_retry) debug_printf("%s addresses:\n", (i == 0)? "Succeeded" :
567 (i == 1)? "Failed" : "Deferred");
568
569 /* Loop for each address on the chain. For deferred addresses, the whole
570 address times out unless one of its retry addresses has a retry rule that
571 hasn't yet timed out. Deferred addresses should not be requesting deletion
572 of retry items, but just in case they do by accident, treat that case
573 as "not timed out".
574
575 As well as handling the addresses themselves, we must also process any
576 retry items for any parent addresses - these are typically "delete" items,
577 because the parent must have succeeded in order to generate the child. */
578
579 while ((endaddr = *paddr) != NULL)
580 {
581 BOOL timed_out = FALSE;
582 retry_item *rti;
583
584 for (addr = endaddr; addr != NULL; addr = addr->parent)
585 {
586 int update_count = 0;
587 int timedout_count = 0;
588
589 DEBUG(D_retry) debug_printf("%s%s\n", addr->address, (addr->retries == NULL)?
590 ": no retry items" : "");
591
592 /* Loop for each retry item. */
593
594 for (rti = addr->retries; rti != NULL; rti = rti->next)
595 {
596 uschar *message;
597 int message_length, message_space, failing_interval, next_try;
598 retry_rule *rule, *final_rule;
599 retry_config *retry;
600 dbdata_retry *retry_record;
601
602 /* Open the retry database if it is not already open; failure to open
603 the file is logged, but otherwise ignored - deferred addresses will
604 get retried at the next opportunity. Not opening earlier than this saves
605 opening if no addresses have retry items - common when none have yet
606 reached their retry next try time. */
607
608 if (dbm_file == NULL)
609 dbm_file = dbfn_open(US"retry", O_RDWR, &dbblock, TRUE);
610
611 if (dbm_file == NULL)
612 {
613 DEBUG(D_deliver|D_retry|D_hints_lookup)
614 debug_printf("retry database not available for updating\n");
615 return;
616 }
617
618 /* If there are no deferred addresses, that is, if this message is
619 completing, and the retry item is for a message-specific SMTP error,
620 force it to be deleted, because there's no point in keeping data for
621 no-longer-existing messages. This situation can occur when a domain has
622 two hosts and a message-specific error occurs for the first of them,
623 but the address gets delivered to the second one. This optimization
624 doesn't succeed in cleaning out all the dead entries, but it helps. */
625
626 if (*addr_defer == NULL && (rti->flags & rf_message) != 0)
627 rti->flags |= rf_delete;
628
629 /* Handle the case of a request to delete the retry info for this
630 destination. */
631
632 if ((rti->flags & rf_delete) != 0)
633 {
634 (void)dbfn_delete(dbm_file, rti->key);
635 DEBUG(D_retry)
636 debug_printf("deleted retry information for %s\n", rti->key);
637 continue;
638 }
639
640 /* Count the number of non-delete retry items. This is so that we
641 can compare it to the count of timed_out ones, to check whether
642 all are timed out. */
643
644 update_count++;
645
646 /* Get the retry information for this destination and error code, if
647 any. If this item is for a remote host with ip address, then pass
648 the domain name as an alternative to search for. If no retry
649 information is found, we can't generate a retry time, so there is
650 no point updating the database. This retry item is timed out. */
651
652 if ((retry = retry_find_config(rti->key + 2,
653 ((rti->flags & rf_host) != 0)? addr->domain : NULL,
654 rti->basic_errno, rti->more_errno)) == NULL)
655 {
656 DEBUG(D_retry) debug_printf("No configured retry item for %s%s%s\n",
657 rti->key,
658 ((rti->flags & rf_host) != 0)? US" or " : US"",
659 ((rti->flags & rf_host) != 0)? addr->domain : US"");
660 if (addr == endaddr) timedout_count++;
661 continue;
662 }
663
664 DEBUG(D_retry)
665 {
666 if ((rti->flags & rf_host) != 0)
ea49d0e1
PH
667 debug_printf("retry for %s (%s) = %s %d %d\n", rti->key,
668 addr->domain, retry->pattern, retry->basic_errno,
669 retry->more_errno);
059ec3d9 670 else
ea49d0e1
PH
671 debug_printf("retry for %s = %s %d %d\n", rti->key, retry->pattern,
672 retry->basic_errno, retry->more_errno);
059ec3d9
PH
673 }
674
675 /* Set up the message for the database retry record. Because DBM
676 records have a maximum data length, we enforce a limit. There isn't
677 much point in keeping a huge message here, anyway. */
678
679 message = (rti->basic_errno > 0)? US strerror(rti->basic_errno) :
680 (rti->message == NULL)?
681 US"unknown error" : string_printing(rti->message);
682 message_length = Ustrlen(message);
683 if (message_length > 150) message_length = 150;
684
685 /* Read a retry record from the database or construct a new one.
686 Ignore an old one if it is too old since it was last updated. */
687
688 retry_record = dbfn_read(dbm_file, rti->key);
689 if (retry_record != NULL &&
690 now - retry_record->time_stamp > retry_data_expire)
691 retry_record = NULL;
692
693 if (retry_record == NULL)
694 {
695 retry_record = store_get(sizeof(dbdata_retry) + message_length);
696 message_space = message_length;
697 retry_record->first_failed = now;
698 retry_record->last_try = now;
699 retry_record->next_try = now;
700 retry_record->expired = FALSE;
701 retry_record->text[0] = 0; /* just in case */
702 }
703 else message_space = Ustrlen(retry_record->text);
704
705 /* Compute how long this destination has been failing */
706
707 failing_interval = now - retry_record->first_failed;
727071f8
PH
708 DEBUG(D_retry) debug_printf("failing_interval=%d message_age=%d\n",
709 failing_interval, message_age);
710
dd16e114
PH
711 /* For a non-host error, if the message has been on the queue longer
712 than the recorded time of failure, use the message's age instead. This
713 can happen when some messages can be delivered and others cannot; a
714 successful delivery will reset the first_failed time, and this can lead
715 to a failing message being retried too often. */
727071f8 716
dd16e114
PH
717 if ((rti->flags & rf_host) == 0 && message_age > failing_interval)
718 failing_interval = message_age;
059ec3d9
PH
719
720 /* Search for the current retry rule. The cutoff time of the
721 last rule is handled differently to the others. The rule continues
722 to operate for ever (the global maximum interval will eventually
723 limit the gaps) but its cutoff time determines when an individual
724 destination times out. If there are no retry rules, the destination
725 always times out, but we can't compute a retry time. */
726
727 final_rule = NULL;
728 for (rule = retry->rules; rule != NULL; rule = rule->next)
729 {
730 if (failing_interval <= rule->timeout) break;
731 final_rule = rule;
732 }
733
734 /* If there's an un-timed out rule, the destination has not
735 yet timed out, so the address as a whole has not timed out (but we are
736 interested in this only for the end address). Make sure the expired
737 flag is false (can be forced via fixdb from outside, but ensure it is
738 consistent with the rules whenever we go through here). */
739
740 if (rule != NULL)
741 {
742 retry_record->expired = FALSE;
743 }
744
745 /* Otherwise, set the retry timeout expired, and set the final rule
746 as the one from which to compute the next retry time. Subsequent
747 messages will fail immediately until the retry time is reached (unless
748 there are other, still active, retries). */
749
750 else
751 {
752 rule = final_rule;
753 retry_record->expired = TRUE;
754 if (addr == endaddr) timedout_count++;
755 }
756
757 /* There is a special case to consider when some messages get through
758 to a destination and others don't. This can happen locally when a
759 large message pushes a user over quota, and it can happen remotely
760 when a machine is on a dodgy Internet connection. The messages that
761 get through wipe the retry information, causing those that don't to
762 stay on the queue longer than the final retry time. In order to
763 avoid this, we check, using the time of arrival of the message, to
764 see if it has been on the queue for more than the final cutoff time,
765 and if so, cause this retry item to time out, and the retry time to
766 be set to "now" so that any subsequent messages in the same condition
767 also get tried. We search for the last rule onwards from the one that
768 is in use. If there are no retry rules for the item, rule will be null
769 and timedout_count will already have been updated.
770
771 This implements "timeout this rule if EITHER the host (or routing or
772 directing) has been failing for more than the maximum time, OR if the
727071f8
PH
773 message has been on the queue for more than the maximum time."
774
775 February 2006: It is possible that this code is no longer needed
776 following the change to the retry calculation to use the message age if
777 it is larger than the time since first failure. It may be that the
778 expired flag is always set when the other conditions are met. However,
779 this is a small bit of code, and it does no harm to leave it in place,
780 just in case. */
059ec3d9
PH
781
782 if (received_time <= retry_record->first_failed &&
783 addr == endaddr && !retry_record->expired && rule != NULL)
784 {
785 retry_rule *last_rule;
786 for (last_rule = rule;
787 last_rule->next != NULL;
788 last_rule = last_rule->next);
789 if (now - received_time > last_rule->timeout)
790 {
791 DEBUG(D_retry) debug_printf("on queue longer than maximum retry\n");
792 timedout_count++;
793 rule = NULL;
794 }
795 }
796
797 /* Compute the next try time from the rule, subject to the global
798 maximum, and update the retry database. If rule == NULL it means
799 there were no rules at all (and the timeout will be set expired),
800 or we have a message that is older than the final timeout. In this
801 case set the next retry time to now, so that one delivery attempt
802 happens for subsequent messages. */
803
804 if (rule == NULL) next_try = now; else
805 {
806 if (rule->rule == 'F') next_try = now + rule->p1;
6af56900 807 else /* rule = 'G' or 'H' */
059ec3d9
PH
808 {
809 int last_predicted_gap =
810 retry_record->next_try - retry_record->last_try;
811 int last_actual_gap = now - retry_record->last_try;
812 int lastgap = (last_predicted_gap < last_actual_gap)?
813 last_predicted_gap : last_actual_gap;
6af56900
PH
814 int next_gap = (lastgap * rule->p2)/1000;
815 if (rule->rule == 'G')
816 {
817 next_try = now + ((lastgap < rule->p1)? rule->p1 : next_gap);
818 }
819 else /* The 'H' rule */
820 {
821 next_try = now + rule->p1;
822 if (next_gap > rule->p1)
3cd34f13
PH
823 next_try += random_number(next_gap - rule->p1)/2 +
824 (next_gap - rule->p1)/2;
6af56900 825 }
059ec3d9
PH
826 }
827 }
828
829 /* Impose a global retry max */
830
831 if (next_try - now > retry_interval_max)
832 next_try = now + retry_interval_max;
833
834 /* If the new message length is greater than the previous one, we
835 have to copy the record first. */
836
837 if (message_length > message_space)
838 {
839 dbdata_retry *newr = store_get(sizeof(dbdata_retry) + message_length);
840 memcpy(newr, retry_record, sizeof(dbdata_retry));
841 retry_record = newr;
842 }
843
844 /* Set up the retry record; message_length may be less than the string
845 length for very long error strings. */
846
847 retry_record->last_try = now;
848 retry_record->next_try = next_try;
849 retry_record->basic_errno = rti->basic_errno;
850 retry_record->more_errno = rti->more_errno;
851 Ustrncpy(retry_record->text, message, message_length);
852 retry_record->text[message_length] = 0;
853
854 DEBUG(D_retry)
855 {
856 int letter = retry_record->more_errno & 255;
857 debug_printf("Writing retry data for %s\n", rti->key);
858 debug_printf(" first failed=%d last try=%d next try=%d expired=%d\n",
859 (int)retry_record->first_failed, (int)retry_record->last_try,
860 (int)retry_record->next_try, retry_record->expired);
861 debug_printf(" errno=%d more_errno=", retry_record->basic_errno);
862 if (letter == 'A' || letter == 'M')
863 debug_printf("%d,%c", (retry_record->more_errno >> 8) & 255,
864 letter);
865 else
866 debug_printf("%d", retry_record->more_errno);
867 debug_printf(" %s\n", retry_record->text);
868 }
869
870 (void)dbfn_write(dbm_file, rti->key, retry_record,
871 sizeof(dbdata_retry) + message_length);
872 } /* Loop for each retry item */
873
874 /* If all the non-delete retry items are timed out, the address is
875 timed out, provided that we didn't skip any hosts because their retry
876 time was not reached (or because of hosts_max_try). */
877
878 if (update_count > 0 && update_count == timedout_count)
879 {
880 if (!testflag(endaddr, af_retry_skipped))
881 {
882 DEBUG(D_retry) debug_printf("timed out: all retries expired\n");
883 timed_out = TRUE;
884 }
885 else
886 {
887 DEBUG(D_retry)
888 debug_printf("timed out but some hosts were skipped\n");
889 }
890 }
891 } /* Loop for an address and its parents */
892
893 /* If this is a deferred address, and retry processing was requested by
894 means of one or more retry items, and they all timed out, move the address
895 to the failed queue, and restart this loop without updating paddr.
896
897 If there were several addresses batched in the same remote delivery, only
898 the original top one will have host retry items attached to it, but we want
899 to handle all the same. Each will have a pointer back to its "top" address,
900 and they will now precede the item with the retries because addresses are
901 inverted when added to these final queues. We have saved information about
902 them in passing (below) so they can all be cut out at once. */
903
904 if (i == 2) /* Handling defers */
905 {
906 if (endaddr->retries != NULL && timed_out)
907 {
908 if (last_first == endaddr) paddr = saved_paddr;
909 addr = *paddr;
910 *paddr = endaddr->next;
911
912 endaddr->next = *addr_failed;
913 *addr_failed = addr;
914
915 for (;; addr = addr->next)
916 {
917 setflag(addr, af_retry_timedout);
918 addr->message = (addr->message == NULL)? US"retry timeout exceeded" :
919 string_sprintf("%s: retry timeout exceeded", addr->message);
fffffe4c
PH
920 addr->user_message = (addr->user_message == NULL)?
921 US"retry timeout exceeded" :
922 string_sprintf("%s: retry timeout exceeded", addr->user_message);
059ec3d9
PH
923 log_write(0, LOG_MAIN, "** %s%s%s%s: retry timeout exceeded",
924 addr->address,
925 (addr->parent == NULL)? US"" : US" <",
926 (addr->parent == NULL)? US"" : addr->parent->address,
927 (addr->parent == NULL)? US"" : US">");
928
929 if (addr == endaddr) break;
930 }
931
932 continue; /* Restart from changed *paddr */
933 }
934
935 /* This address is to remain on the defer chain. If it has a "first"
936 pointer, save the pointer to it in case we want to fail the set of
937 addresses when we get to the first one. */
938
939 if (endaddr->first != last_first)
940 {
941 last_first = endaddr->first;
942 saved_paddr = paddr;
943 }
944 }
945
946 /* All cases (succeed, fail, defer left on queue) */
947
948 paddr = &(endaddr->next); /* Advance to next address */
949 } /* Loop for all addresses */
950 } /* Loop for succeed, fail, defer */
951
952/* Close and unlock the database */
953
954if (dbm_file != NULL) dbfn_close(dbm_file);
955
956DEBUG(D_retry) debug_printf("end of retry processing\n");
957}
958
959/* End of retry.c */