Commit | Line | Data |
---|---|---|
059ec3d9 PH |
1 | /************************************************* |
2 | * Exim - an Internet mail transport agent * | |
3 | *************************************************/ | |
4 | ||
f9ba5e22 | 5 | /* Copyright (c) University of Cambridge 1995 - 2018 */ |
059ec3d9 PH |
6 | /* See the file NOTICE for conditions of use and distribution. */ |
7 | ||
8 | /* Functions concerned with retrying unsuccessful deliveries. */ | |
9 | ||
10 | ||
11 | #include "exim.h" | |
12 | ||
13 | ||
14 | ||
15 | /************************************************* | |
16 | * Check the ultimate address timeout * | |
17 | *************************************************/ | |
18 | ||
19 | /* This function tests whether a message has been on the queue longer than | |
ba9af0af | 20 | the maximum retry time for a particular host or address. |
059ec3d9 PH |
21 | |
22 | Arguments: | |
ba9af0af | 23 | retry_key the key to look up a retry rule |
059ec3d9 | 24 | domain the domain to look up a domain retry rule |
ba9af0af | 25 | retry_record contains error information for finding rule |
059ec3d9 PH |
26 | now the time |
27 | ||
28 | Returns: TRUE if the ultimate timeout has been reached | |
29 | */ | |
30 | ||
ba9af0af | 31 | BOOL |
55414b25 | 32 | retry_ultimate_address_timeout(uschar *retry_key, const uschar *domain, |
ba9af0af | 33 | dbdata_retry *retry_record, time_t now) |
059ec3d9 | 34 | { |
ba9af0af | 35 | BOOL address_timeout; |
de6f74f2 | 36 | retry_config * retry; |
ba9af0af TF |
37 | |
38 | DEBUG(D_retry) | |
39 | { | |
40 | debug_printf("retry time not reached: checking ultimate address timeout\n"); | |
de6f74f2 JH |
41 | debug_printf(" now=" TIME_T_FMT " first_failed=" TIME_T_FMT |
42 | " next_try=" TIME_T_FMT " expired=%c\n", | |
43 | now, retry_record->first_failed, | |
44 | retry_record->next_try, retry_record->expired ? 'T' : 'F'); | |
ba9af0af | 45 | } |
059ec3d9 | 46 | |
de6f74f2 | 47 | retry = retry_find_config(retry_key+2, domain, |
ba9af0af | 48 | retry_record->basic_errno, retry_record->more_errno); |
059ec3d9 | 49 | |
de6f74f2 | 50 | if (retry && retry->rules) |
059ec3d9 PH |
51 | { |
52 | retry_rule *last_rule; | |
de6f74f2 | 53 | for (last_rule = retry->rules; last_rule->next; last_rule = last_rule->next) ; |
ba9af0af | 54 | DEBUG(D_retry) |
de6f74f2 JH |
55 | debug_printf(" received_time=" TIME_T_FMT " diff=%d timeout=%d\n", |
56 | received_time.tv_sec, (int)(now - received_time.tv_sec), last_rule->timeout); | |
32dfdf8b | 57 | address_timeout = (now - received_time.tv_sec > last_rule->timeout); |
059ec3d9 | 58 | } |
ea49d0e1 PH |
59 | else |
60 | { | |
ba9af0af | 61 | DEBUG(D_retry) |
ea49d0e1 | 62 | debug_printf("no retry rule found: assume timed out\n"); |
ba9af0af | 63 | address_timeout = TRUE; |
ea49d0e1 | 64 | } |
059ec3d9 | 65 | |
ba9af0af TF |
66 | DEBUG(D_retry) |
67 | if (address_timeout) | |
68 | debug_printf("on queue longer than maximum retry for address - " | |
69 | "allowing delivery\n"); | |
70 | ||
059ec3d9 PH |
71 | return address_timeout; |
72 | } | |
73 | ||
74 | ||
75 | ||
76 | /************************************************* | |
77 | * Set status of a host+address item * | |
78 | *************************************************/ | |
79 | ||
80 | /* This function is passed a host_item which contains a host name and an | |
81 | IP address string. Its job is to set the status of the address if it is not | |
82 | already set (indicated by hstatus_unknown). The possible values are: | |
83 | ||
84 | hstatus_usable the address is not listed in the unusable tree, and does | |
85 | not have a retry record, OR the time is past the next | |
86 | try time, OR the message has been on the queue for more | |
87 | than the maximum retry time for a failing host | |
88 | ||
89 | hstatus_unusable the address is listed in the unusable tree, or does have | |
90 | a retry record, and the time is not yet at the next retry | |
91 | time. | |
92 | ||
93 | hstatus_unusable_expired as above, but also the retry time has expired | |
94 | for this address. | |
95 | ||
96 | The reason a delivery is permitted when a message has been around for a very | |
97 | long time is to allow the ultimate address timeout to operate after a delivery | |
98 | failure. Otherwise some messages may stick around without being tried for too | |
99 | long. | |
100 | ||
101 | If a host retry record is retrieved from the hints database, the time of last | |
102 | trying is filled into the last_try field of the host block. If a host is | |
103 | generally usable, a check is made to see if there is a retry delay on this | |
104 | specific message at this host. | |
105 | ||
106 | If a non-standard port is being used, it is added to the retry key. | |
107 | ||
108 | Arguments: | |
109 | domain the address domain | |
110 | host pointer to a host item | |
111 | portstring "" for standard port, ":xxxx" for a non-standard port | |
112 | include_ip_address TRUE to include the address in the key - this is | |
113 | usual, but sometimes is not wanted | |
114 | retry_host_key where to put a pointer to the key for the host-specific | |
115 | retry record, if one is read and the host is usable | |
116 | retry_message_key where to put a pointer to the key for the message+host | |
117 | retry record, if one is read and the host is usable | |
118 | ||
119 | Returns: TRUE if the host has expired but is usable because | |
120 | its retry time has come | |
121 | */ | |
122 | ||
123 | BOOL | |
55414b25 | 124 | retry_check_address(const uschar *domain, host_item *host, uschar *portstring, |
059ec3d9 PH |
125 | BOOL include_ip_address, uschar **retry_host_key, uschar **retry_message_key) |
126 | { | |
127 | BOOL yield = FALSE; | |
128 | time_t now = time(NULL); | |
129 | uschar *host_key, *message_key; | |
130 | open_db dbblock; | |
131 | open_db *dbm_file; | |
132 | tree_node *node; | |
133 | dbdata_retry *host_retry_record, *message_retry_record; | |
134 | ||
135 | *retry_host_key = *retry_message_key = NULL; | |
136 | ||
137 | DEBUG(D_transport|D_retry) debug_printf("checking status of %s\n", host->name); | |
138 | ||
139 | /* Do nothing if status already set; otherwise initialize status as usable. */ | |
140 | ||
141 | if (host->status != hstatus_unknown) return FALSE; | |
142 | host->status = hstatus_usable; | |
143 | ||
144 | /* Generate the host key for the unusable tree and the retry database. Ensure | |
145 | host names are lower cased (that's what %S does). */ | |
146 | ||
147 | host_key = include_ip_address? | |
148 | string_sprintf("T:%S:%s%s", host->name, host->address, portstring) : | |
149 | string_sprintf("T:%S%s", host->name, portstring); | |
150 | ||
151 | /* Generate the message-specific key */ | |
152 | ||
153 | message_key = string_sprintf("%s:%s", host_key, message_id); | |
154 | ||
155 | /* Search the tree of unusable IP addresses. This is filled in when deliveries | |
156 | fail, because the retry database itself is not updated until the end of all | |
157 | deliveries (so as to do it all in one go). The tree records addresses that have | |
158 | become unusable during this delivery process (i.e. those that will get put into | |
159 | the retry database when it is updated). */ | |
160 | ||
36d295f1 | 161 | if ((node = tree_search(tree_unusable, host_key))) |
059ec3d9 PH |
162 | { |
163 | DEBUG(D_transport|D_retry) debug_printf("found in tree of unusables\n"); | |
164 | host->status = (node->data.val > 255)? | |
165 | hstatus_unusable_expired : hstatus_unusable; | |
166 | host->why = node->data.val & 255; | |
167 | return FALSE; | |
168 | } | |
169 | ||
170 | /* Open the retry database, giving up if there isn't one. Otherwise, search for | |
171 | the retry records, and then close the database again. */ | |
172 | ||
36d295f1 | 173 | if (!(dbm_file = dbfn_open(US"retry", O_RDONLY, &dbblock, FALSE))) |
059ec3d9 PH |
174 | { |
175 | DEBUG(D_deliver|D_retry|D_hints_lookup) | |
176 | debug_printf("no retry data available\n"); | |
177 | return FALSE; | |
178 | } | |
179 | host_retry_record = dbfn_read(dbm_file, host_key); | |
180 | message_retry_record = dbfn_read(dbm_file, message_key); | |
181 | dbfn_close(dbm_file); | |
182 | ||
183 | /* Ignore the data if it is too old - too long since it was written */ | |
184 | ||
36d295f1 | 185 | if (!host_retry_record) |
059ec3d9 PH |
186 | { |
187 | DEBUG(D_transport|D_retry) debug_printf("no host retry record\n"); | |
188 | } | |
189 | else if (now - host_retry_record->time_stamp > retry_data_expire) | |
190 | { | |
191 | host_retry_record = NULL; | |
192 | DEBUG(D_transport|D_retry) debug_printf("host retry record too old\n"); | |
193 | } | |
194 | ||
36d295f1 | 195 | if (!message_retry_record) |
059ec3d9 PH |
196 | { |
197 | DEBUG(D_transport|D_retry) debug_printf("no message retry record\n"); | |
198 | } | |
199 | else if (now - message_retry_record->time_stamp > retry_data_expire) | |
200 | { | |
201 | message_retry_record = NULL; | |
202 | DEBUG(D_transport|D_retry) debug_printf("message retry record too old\n"); | |
203 | } | |
204 | ||
205 | /* If there's a host-specific retry record, check for reaching the retry | |
206 | time (or forcing). If not, and the host is not expired, check for the message | |
207 | having been around for longer than the maximum retry time for this host or | |
208 | address. Allow the delivery if it has. Otherwise set the appropriate unusable | |
209 | flag and return FALSE. Otherwise arrange to return TRUE if this is an expired | |
210 | host. */ | |
211 | ||
36d295f1 | 212 | if (host_retry_record) |
059ec3d9 PH |
213 | { |
214 | *retry_host_key = host_key; | |
215 | ||
216 | /* We have not reached the next try time. Check for the ultimate address | |
217 | timeout if the host has not expired. */ | |
218 | ||
8768d548 | 219 | if (now < host_retry_record->next_try && !f.deliver_force) |
059ec3d9 | 220 | { |
059ec3d9 | 221 | if (!host_retry_record->expired && |
ba9af0af TF |
222 | retry_ultimate_address_timeout(host_key, domain, |
223 | host_retry_record, now)) | |
059ec3d9 | 224 | return FALSE; |
059ec3d9 PH |
225 | |
226 | /* We have not hit the ultimate address timeout; host is unusable. */ | |
227 | ||
228 | host->status = (host_retry_record->expired)? | |
229 | hstatus_unusable_expired : hstatus_unusable; | |
230 | host->why = hwhy_retry; | |
231 | host->last_try = host_retry_record->last_try; | |
232 | return FALSE; | |
233 | } | |
234 | ||
235 | /* Host is usable; set return TRUE if expired. */ | |
236 | ||
237 | yield = host_retry_record->expired; | |
238 | } | |
239 | ||
240 | /* It's OK to try the host. If there's a message-specific retry record, check | |
241 | for reaching its retry time (or forcing). If not, mark the host unusable, | |
242 | unless the ultimate address timeout has been reached. */ | |
243 | ||
36d295f1 | 244 | if (message_retry_record) |
059ec3d9 PH |
245 | { |
246 | *retry_message_key = message_key; | |
8768d548 | 247 | if (now < message_retry_record->next_try && !f.deliver_force) |
059ec3d9 | 248 | { |
ba9af0af TF |
249 | if (!retry_ultimate_address_timeout(host_key, domain, |
250 | message_retry_record, now)) | |
059ec3d9 PH |
251 | { |
252 | host->status = hstatus_unusable; | |
253 | host->why = hwhy_retry; | |
254 | } | |
059ec3d9 PH |
255 | return FALSE; |
256 | } | |
257 | } | |
258 | ||
259 | return yield; | |
260 | } | |
261 | ||
262 | ||
263 | ||
264 | ||
265 | /************************************************* | |
266 | * Add a retry item to an address * | |
267 | *************************************************/ | |
268 | ||
269 | /* Retry items are chained onto an address when it is deferred either by router | |
270 | or by a transport, or if it succeeds or fails and there was a previous retry | |
271 | item that now needs to be deleted. Sometimes there can be both kinds of item: | |
272 | for example, if routing was deferred but then succeeded, and delivery then | |
273 | deferred. In that case there is a delete item for the routing retry, and an | |
274 | updating item for the delivery. | |
275 | ||
276 | (But note that that is only visible at the outer level, because in remote | |
277 | delivery subprocesses, the address starts "clean", with no retry items carried | |
278 | in.) | |
279 | ||
280 | These items are used at the end of a delivery attempt to update the retry | |
281 | database. The keys start R: for routing delays and T: for transport delays. | |
282 | ||
283 | Arguments: | |
284 | addr the address block onto which to hang the item | |
285 | key the retry key | |
286 | flags delete, host, and message flags, copied into the block | |
287 | ||
288 | Returns: nothing | |
289 | */ | |
290 | ||
291 | void | |
292 | retry_add_item(address_item *addr, uschar *key, int flags) | |
293 | { | |
294 | retry_item *rti = store_get(sizeof(retry_item)); | |
c562fd30 | 295 | host_item * host = addr->host_used; |
36d295f1 | 296 | |
059ec3d9 PH |
297 | rti->next = addr->retries; |
298 | addr->retries = rti; | |
299 | rti->key = key; | |
300 | rti->basic_errno = addr->basic_errno; | |
301 | rti->more_errno = addr->more_errno; | |
c562fd30 JH |
302 | rti->message = host |
303 | ? string_sprintf("H=%s [%s]: %s", host->name, host->address, addr->message) | |
304 | : addr->message; | |
059ec3d9 PH |
305 | rti->flags = flags; |
306 | ||
307 | DEBUG(D_transport|D_retry) | |
308 | { | |
309 | int letter = rti->more_errno & 255; | |
310 | debug_printf("added retry item for %s: errno=%d more_errno=", rti->key, | |
311 | rti->basic_errno); | |
312 | if (letter == 'A' || letter == 'M') | |
313 | debug_printf("%d,%c", (rti->more_errno >> 8) & 255, letter); | |
314 | else | |
315 | debug_printf("%d", rti->more_errno); | |
316 | debug_printf(" flags=%d\n", flags); | |
317 | } | |
318 | } | |
319 | ||
320 | ||
321 | ||
322 | /************************************************* | |
323 | * Find retry configuration data * | |
324 | *************************************************/ | |
325 | ||
326 | /* Search the in-store retry information for the first retry item that applies | |
327 | to a given destination. If the key contains an @ we are probably handling a | |
328 | local delivery and have a complete address to search for; this happens when | |
329 | retry_use_local_part is set on a router. Otherwise, the key is likely to be a | |
330 | host name for a remote delivery, or a domain name for a local delivery. We | |
331 | prepend *@ on the front of it so that it will match a retry item whose address | |
332 | item pattern is independent of the local part. The alternate key, if set, is | |
333 | always just a domain, so we treat it likewise. | |
334 | ||
335 | Arguments: | |
336 | key key for which retry info is wanted | |
337 | alternate alternative key, always just a domain | |
338 | basic_errno specific error predicate on the retry rule, or zero | |
339 | more_errno additional data for errno predicate | |
340 | ||
341 | Returns: pointer to retry rule, or NULL | |
342 | */ | |
343 | ||
344 | retry_config * | |
55414b25 | 345 | retry_find_config(const uschar *key, const uschar *alternate, int basic_errno, |
059ec3d9 PH |
346 | int more_errno) |
347 | { | |
55414b25 | 348 | const uschar *colon = Ustrchr(key, ':'); |
059ec3d9 PH |
349 | retry_config *yield; |
350 | ||
ea49d0e1 PH |
351 | /* If there's a colon in the key, there are two possibilities: |
352 | ||
353 | (1) This is a key for a host, ip address, and possibly port, in the format | |
354 | ||
355 | hostname:ip+port | |
356 | ||
55414b25 | 357 | In this case, we copy the host name. |
ea49d0e1 PH |
358 | |
359 | (2) This is a key for a pipe, file, or autoreply delivery, in the format | |
360 | ||
361 | pipe-or-file-or-auto:x@y | |
362 | ||
363 | where x@y is the original address that provoked the delivery. The pipe or | |
364 | file or auto will start with | or / or >, whereas a host name will start | |
365 | with a letter or a digit. In this case we want to use the original address | |
366 | to search for a retry rule. */ | |
059ec3d9 | 367 | |
55414b25 JH |
368 | if (colon) |
369 | key = isalnum(*key) | |
370 | ? string_copyn(key, colon-key) /* the hostname */ | |
371 | : Ustrrchr(key, ':') + 1; /* Take from the last colon */ | |
059ec3d9 PH |
372 | |
373 | /* Sort out the keys */ | |
374 | ||
55414b25 JH |
375 | if (!Ustrchr(key, '@')) key = string_sprintf("*@%s", key); |
376 | if (alternate) alternate = string_sprintf("*@%s", alternate); | |
059ec3d9 PH |
377 | |
378 | /* Scan the configured retry items. */ | |
379 | ||
36d295f1 | 380 | for (yield = retries; yield; yield = yield->next) |
059ec3d9 | 381 | { |
55414b25 JH |
382 | const uschar *plist = yield->pattern; |
383 | const uschar *slist = yield->senders; | |
059ec3d9 PH |
384 | |
385 | /* If a specific error is set for this item, check that we are handling that | |
386 | specific error, and if so, check any additional error information if | |
387 | required. */ | |
388 | ||
389 | if (yield->basic_errno != 0) | |
390 | { | |
391 | /* Special code is required for quota errors, as these can either be system | |
392 | quota errors, or Exim's own quota imposition, which has a different error | |
393 | number. Full partitions are also treated in the same way as quota errors. | |
394 | */ | |
395 | ||
396 | if (yield->basic_errno == ERRNO_EXIMQUOTA) | |
397 | { | |
398 | if ((basic_errno != ERRNO_EXIMQUOTA && basic_errno != errno_quota && | |
399 | basic_errno != ENOSPC) || | |
400 | (yield->more_errno != 0 && yield->more_errno > more_errno)) | |
401 | continue; | |
402 | } | |
403 | ||
e97957bc PH |
404 | /* The TLSREQUIRED error also covers TLSFAILURE. These are subtly different |
405 | errors, but not worth separating at this level. */ | |
406 | ||
407 | else if (yield->basic_errno == ERRNO_TLSREQUIRED) | |
408 | { | |
409 | if (basic_errno != ERRNO_TLSREQUIRED && basic_errno != ERRNO_TLSFAILURE) | |
410 | continue; | |
411 | } | |
412 | ||
413 | /* Handle 4xx responses to MAIL, RCPT, or DATA. The code that was received | |
414 | is in the 2nd least significant byte of more_errno (with 400 subtracted). | |
415 | The required value is coded in the 2nd least significant byte of the | |
416 | yield->more_errno field as follows: | |
059ec3d9 PH |
417 | |
418 | 255 => any 4xx code | |
419 | >= 100 => the decade must match the value less 100 | |
420 | < 100 => the exact value must match | |
421 | */ | |
422 | ||
e97957bc PH |
423 | else if (yield->basic_errno == ERRNO_MAIL4XX || |
424 | yield->basic_errno == ERRNO_RCPT4XX || | |
425 | yield->basic_errno == ERRNO_DATA4XX) | |
059ec3d9 PH |
426 | { |
427 | int wanted; | |
e97957bc | 428 | if (basic_errno != yield->basic_errno) continue; |
059ec3d9 PH |
429 | wanted = (yield->more_errno >> 8) & 255; |
430 | if (wanted != 255) | |
431 | { | |
432 | int evalue = (more_errno >> 8) & 255; | |
433 | if (wanted >= 100) | |
434 | { | |
435 | if ((evalue/10)*10 != wanted - 100) continue; | |
436 | } | |
437 | else if (evalue != wanted) continue; | |
438 | } | |
439 | } | |
440 | ||
441 | /* There are some special cases for timeouts */ | |
442 | ||
443 | else if (yield->basic_errno == ETIMEDOUT) | |
444 | { | |
445 | if (basic_errno != ETIMEDOUT) continue; | |
446 | ||
447 | /* Just RTEF_CTOUT in the rule => don't care about 'A'/'M' addresses */ | |
448 | if (yield->more_errno == RTEF_CTOUT) | |
449 | { | |
450 | if ((more_errno & RTEF_CTOUT) == 0) continue; | |
451 | } | |
452 | ||
453 | else if (yield->more_errno != 0) | |
454 | { | |
455 | int cf_errno = more_errno; | |
456 | if ((yield->more_errno & RTEF_CTOUT) == 0) cf_errno &= ~RTEF_CTOUT; | |
457 | if (yield->more_errno != cf_errno) continue; | |
458 | } | |
459 | } | |
460 | ||
461 | /* Default checks for exact match */ | |
462 | ||
463 | else | |
464 | { | |
465 | if (yield->basic_errno != basic_errno || | |
466 | (yield->more_errno != 0 && yield->more_errno != more_errno)) | |
467 | continue; | |
468 | } | |
469 | } | |
470 | ||
471 | /* If the "senders" condition is set, check it. Note that sender_address may | |
472 | be null during -brt checking, in which case we do not use this rule. */ | |
473 | ||
36d295f1 JH |
474 | if ( slist |
475 | && ( !sender_address | |
476 | || match_address_list_basic(sender_address, &slist, 0) != OK | |
477 | ) ) | |
059ec3d9 PH |
478 | continue; |
479 | ||
480 | /* Check for a match between the address list item at the start of this retry | |
481 | rule and either the main or alternate keys. */ | |
482 | ||
36d295f1 JH |
483 | if ( match_address_list_basic(key, &plist, UCHAR_MAX+1) == OK |
484 | || ( alternate | |
485 | && match_address_list_basic(alternate, &plist, UCHAR_MAX+1) == OK | |
486 | ) ) | |
059ec3d9 PH |
487 | break; |
488 | } | |
489 | ||
059ec3d9 PH |
490 | return yield; |
491 | } | |
492 | ||
493 | ||
494 | ||
495 | ||
496 | /************************************************* | |
497 | * Update retry database * | |
498 | *************************************************/ | |
499 | ||
500 | /* Update the retry data for any directing/routing/transporting that was | |
501 | deferred, or delete it for those that succeeded after a previous defer. This is | |
502 | done all in one go to minimize opening/closing/locking of the database file. | |
503 | ||
504 | Note that, because SMTP delivery involves a list of destinations to try, there | |
505 | may be defer-type retry information for some of them even when the message was | |
506 | successfully delivered. Likewise if it eventually failed. | |
507 | ||
508 | This function may move addresses from the defer to the failed queue if the | |
509 | ultimate retry time has expired. | |
510 | ||
511 | Arguments: | |
512 | addr_defer queue of deferred addresses | |
513 | addr_failed queue of failed addresses | |
514 | addr_succeed queue of successful addresses | |
515 | ||
516 | Returns: nothing | |
517 | */ | |
518 | ||
519 | void | |
520 | retry_update(address_item **addr_defer, address_item **addr_failed, | |
521 | address_item **addr_succeed) | |
522 | { | |
523 | open_db dbblock; | |
524 | open_db *dbm_file = NULL; | |
525 | time_t now = time(NULL); | |
526 | int i; | |
527 | ||
528 | DEBUG(D_retry) debug_printf("Processing retry items\n"); | |
529 | ||
530 | /* Three-times loop to handle succeeded, failed, and deferred addresses. | |
531 | Deferred addresses must be handled after failed ones, because some may be moved | |
532 | to the failed chain if they have timed out. */ | |
533 | ||
534 | for (i = 0; i < 3; i++) | |
535 | { | |
536 | address_item *endaddr, *addr; | |
537 | address_item *last_first = NULL; | |
7b4c8c1f JH |
538 | address_item **paddr = i==0 ? addr_succeed : |
539 | i==1 ? addr_failed : addr_defer; | |
059ec3d9 PH |
540 | address_item **saved_paddr = NULL; |
541 | ||
7b4c8c1f JH |
542 | DEBUG(D_retry) debug_printf("%s addresses:\n", |
543 | i == 0 ? "Succeeded" : i == 1 ? "Failed" : "Deferred"); | |
059ec3d9 PH |
544 | |
545 | /* Loop for each address on the chain. For deferred addresses, the whole | |
546 | address times out unless one of its retry addresses has a retry rule that | |
547 | hasn't yet timed out. Deferred addresses should not be requesting deletion | |
548 | of retry items, but just in case they do by accident, treat that case | |
549 | as "not timed out". | |
550 | ||
551 | As well as handling the addresses themselves, we must also process any | |
552 | retry items for any parent addresses - these are typically "delete" items, | |
553 | because the parent must have succeeded in order to generate the child. */ | |
554 | ||
7b4c8c1f | 555 | while ((endaddr = *paddr)) |
059ec3d9 PH |
556 | { |
557 | BOOL timed_out = FALSE; | |
558 | retry_item *rti; | |
559 | ||
7b4c8c1f | 560 | for (addr = endaddr; addr; addr = addr->parent) |
059ec3d9 PH |
561 | { |
562 | int update_count = 0; | |
563 | int timedout_count = 0; | |
564 | ||
7b4c8c1f JH |
565 | DEBUG(D_retry) debug_printf(" %s%s\n", addr->address, |
566 | addr->retries ? "" : ": no retry items"); | |
059ec3d9 PH |
567 | |
568 | /* Loop for each retry item. */ | |
569 | ||
7b4c8c1f | 570 | for (rti = addr->retries; rti; rti = rti->next) |
059ec3d9 PH |
571 | { |
572 | uschar *message; | |
573 | int message_length, message_space, failing_interval, next_try; | |
574 | retry_rule *rule, *final_rule; | |
575 | retry_config *retry; | |
576 | dbdata_retry *retry_record; | |
577 | ||
578 | /* Open the retry database if it is not already open; failure to open | |
579 | the file is logged, but otherwise ignored - deferred addresses will | |
580 | get retried at the next opportunity. Not opening earlier than this saves | |
581 | opening if no addresses have retry items - common when none have yet | |
582 | reached their retry next try time. */ | |
583 | ||
7b4c8c1f | 584 | if (!dbm_file) |
059ec3d9 PH |
585 | dbm_file = dbfn_open(US"retry", O_RDWR, &dbblock, TRUE); |
586 | ||
7b4c8c1f | 587 | if (!dbm_file) |
059ec3d9 PH |
588 | { |
589 | DEBUG(D_deliver|D_retry|D_hints_lookup) | |
590 | debug_printf("retry database not available for updating\n"); | |
591 | return; | |
592 | } | |
593 | ||
594 | /* If there are no deferred addresses, that is, if this message is | |
595 | completing, and the retry item is for a message-specific SMTP error, | |
596 | force it to be deleted, because there's no point in keeping data for | |
597 | no-longer-existing messages. This situation can occur when a domain has | |
598 | two hosts and a message-specific error occurs for the first of them, | |
599 | but the address gets delivered to the second one. This optimization | |
600 | doesn't succeed in cleaning out all the dead entries, but it helps. */ | |
601 | ||
7b4c8c1f | 602 | if (!*addr_defer && rti->flags & rf_message) |
059ec3d9 PH |
603 | rti->flags |= rf_delete; |
604 | ||
605 | /* Handle the case of a request to delete the retry info for this | |
606 | destination. */ | |
607 | ||
7b4c8c1f | 608 | if (rti->flags & rf_delete) |
059ec3d9 PH |
609 | { |
610 | (void)dbfn_delete(dbm_file, rti->key); | |
611 | DEBUG(D_retry) | |
612 | debug_printf("deleted retry information for %s\n", rti->key); | |
613 | continue; | |
614 | } | |
615 | ||
616 | /* Count the number of non-delete retry items. This is so that we | |
617 | can compare it to the count of timed_out ones, to check whether | |
618 | all are timed out. */ | |
619 | ||
620 | update_count++; | |
621 | ||
622 | /* Get the retry information for this destination and error code, if | |
623 | any. If this item is for a remote host with ip address, then pass | |
624 | the domain name as an alternative to search for. If no retry | |
625 | information is found, we can't generate a retry time, so there is | |
626 | no point updating the database. This retry item is timed out. */ | |
627 | ||
7b4c8c1f JH |
628 | if (!(retry = retry_find_config(rti->key + 2, |
629 | rti->flags & rf_host ? addr->domain : NULL, | |
630 | rti->basic_errno, rti->more_errno))) | |
059ec3d9 PH |
631 | { |
632 | DEBUG(D_retry) debug_printf("No configured retry item for %s%s%s\n", | |
633 | rti->key, | |
7b4c8c1f JH |
634 | rti->flags & rf_host ? US" or " : US"", |
635 | rti->flags & rf_host ? addr->domain : US""); | |
059ec3d9 PH |
636 | if (addr == endaddr) timedout_count++; |
637 | continue; | |
638 | } | |
639 | ||
640 | DEBUG(D_retry) | |
7b4c8c1f | 641 | if (rti->flags & rf_host) |
ea49d0e1 PH |
642 | debug_printf("retry for %s (%s) = %s %d %d\n", rti->key, |
643 | addr->domain, retry->pattern, retry->basic_errno, | |
644 | retry->more_errno); | |
059ec3d9 | 645 | else |
ea49d0e1 PH |
646 | debug_printf("retry for %s = %s %d %d\n", rti->key, retry->pattern, |
647 | retry->basic_errno, retry->more_errno); | |
059ec3d9 PH |
648 | |
649 | /* Set up the message for the database retry record. Because DBM | |
650 | records have a maximum data length, we enforce a limit. There isn't | |
651 | much point in keeping a huge message here, anyway. */ | |
652 | ||
7b4c8c1f JH |
653 | message = rti->basic_errno > 0 |
654 | ? US strerror(rti->basic_errno) | |
655 | : rti->message | |
656 | ? US string_printing(rti->message) | |
657 | : US"unknown error"; | |
059ec3d9 PH |
658 | message_length = Ustrlen(message); |
659 | if (message_length > 150) message_length = 150; | |
660 | ||
661 | /* Read a retry record from the database or construct a new one. | |
662 | Ignore an old one if it is too old since it was last updated. */ | |
663 | ||
664 | retry_record = dbfn_read(dbm_file, rti->key); | |
7b4c8c1f JH |
665 | if ( retry_record |
666 | && now - retry_record->time_stamp > retry_data_expire) | |
059ec3d9 PH |
667 | retry_record = NULL; |
668 | ||
7b4c8c1f | 669 | if (!retry_record) |
059ec3d9 PH |
670 | { |
671 | retry_record = store_get(sizeof(dbdata_retry) + message_length); | |
672 | message_space = message_length; | |
673 | retry_record->first_failed = now; | |
674 | retry_record->last_try = now; | |
675 | retry_record->next_try = now; | |
676 | retry_record->expired = FALSE; | |
677 | retry_record->text[0] = 0; /* just in case */ | |
678 | } | |
679 | else message_space = Ustrlen(retry_record->text); | |
680 | ||
681 | /* Compute how long this destination has been failing */ | |
682 | ||
683 | failing_interval = now - retry_record->first_failed; | |
727071f8 PH |
684 | DEBUG(D_retry) debug_printf("failing_interval=%d message_age=%d\n", |
685 | failing_interval, message_age); | |
686 | ||
dd16e114 PH |
687 | /* For a non-host error, if the message has been on the queue longer |
688 | than the recorded time of failure, use the message's age instead. This | |
689 | can happen when some messages can be delivered and others cannot; a | |
690 | successful delivery will reset the first_failed time, and this can lead | |
691 | to a failing message being retried too often. */ | |
727071f8 | 692 | |
7b4c8c1f | 693 | if (!(rti->flags & rf_host) && message_age > failing_interval) |
dd16e114 | 694 | failing_interval = message_age; |
059ec3d9 PH |
695 | |
696 | /* Search for the current retry rule. The cutoff time of the | |
697 | last rule is handled differently to the others. The rule continues | |
698 | to operate for ever (the global maximum interval will eventually | |
699 | limit the gaps) but its cutoff time determines when an individual | |
700 | destination times out. If there are no retry rules, the destination | |
701 | always times out, but we can't compute a retry time. */ | |
702 | ||
703 | final_rule = NULL; | |
7b4c8c1f | 704 | for (rule = retry->rules; rule; rule = rule->next) |
059ec3d9 PH |
705 | { |
706 | if (failing_interval <= rule->timeout) break; | |
707 | final_rule = rule; | |
708 | } | |
709 | ||
710 | /* If there's an un-timed out rule, the destination has not | |
711 | yet timed out, so the address as a whole has not timed out (but we are | |
712 | interested in this only for the end address). Make sure the expired | |
713 | flag is false (can be forced via fixdb from outside, but ensure it is | |
714 | consistent with the rules whenever we go through here). */ | |
715 | ||
7b4c8c1f | 716 | if (rule) |
059ec3d9 | 717 | retry_record->expired = FALSE; |
059ec3d9 PH |
718 | |
719 | /* Otherwise, set the retry timeout expired, and set the final rule | |
720 | as the one from which to compute the next retry time. Subsequent | |
721 | messages will fail immediately until the retry time is reached (unless | |
722 | there are other, still active, retries). */ | |
723 | ||
724 | else | |
725 | { | |
726 | rule = final_rule; | |
727 | retry_record->expired = TRUE; | |
728 | if (addr == endaddr) timedout_count++; | |
729 | } | |
730 | ||
731 | /* There is a special case to consider when some messages get through | |
732 | to a destination and others don't. This can happen locally when a | |
733 | large message pushes a user over quota, and it can happen remotely | |
734 | when a machine is on a dodgy Internet connection. The messages that | |
735 | get through wipe the retry information, causing those that don't to | |
736 | stay on the queue longer than the final retry time. In order to | |
737 | avoid this, we check, using the time of arrival of the message, to | |
738 | see if it has been on the queue for more than the final cutoff time, | |
739 | and if so, cause this retry item to time out, and the retry time to | |
740 | be set to "now" so that any subsequent messages in the same condition | |
741 | also get tried. We search for the last rule onwards from the one that | |
742 | is in use. If there are no retry rules for the item, rule will be null | |
743 | and timedout_count will already have been updated. | |
744 | ||
745 | This implements "timeout this rule if EITHER the host (or routing or | |
746 | directing) has been failing for more than the maximum time, OR if the | |
727071f8 PH |
747 | message has been on the queue for more than the maximum time." |
748 | ||
749 | February 2006: It is possible that this code is no longer needed | |
750 | following the change to the retry calculation to use the message age if | |
751 | it is larger than the time since first failure. It may be that the | |
752 | expired flag is always set when the other conditions are met. However, | |
753 | this is a small bit of code, and it does no harm to leave it in place, | |
754 | just in case. */ | |
059ec3d9 | 755 | |
32dfdf8b | 756 | if ( received_time.tv_sec <= retry_record->first_failed |
7b4c8c1f JH |
757 | && addr == endaddr |
758 | && !retry_record->expired | |
759 | && rule) | |
059ec3d9 PH |
760 | { |
761 | retry_rule *last_rule; | |
7b4c8c1f JH |
762 | for (last_rule = rule; last_rule->next; last_rule = last_rule->next) |
763 | ; | |
32dfdf8b | 764 | if (now - received_time.tv_sec > last_rule->timeout) |
059ec3d9 PH |
765 | { |
766 | DEBUG(D_retry) debug_printf("on queue longer than maximum retry\n"); | |
767 | timedout_count++; | |
768 | rule = NULL; | |
769 | } | |
770 | } | |
771 | ||
772 | /* Compute the next try time from the rule, subject to the global | |
773 | maximum, and update the retry database. If rule == NULL it means | |
774 | there were no rules at all (and the timeout will be set expired), | |
775 | or we have a message that is older than the final timeout. In this | |
776 | case set the next retry time to now, so that one delivery attempt | |
777 | happens for subsequent messages. */ | |
778 | ||
7b4c8c1f JH |
779 | if (!rule) |
780 | next_try = now; | |
781 | else | |
059ec3d9 | 782 | { |
7b4c8c1f JH |
783 | if (rule->rule == 'F') |
784 | next_try = now + rule->p1; | |
6af56900 | 785 | else /* rule = 'G' or 'H' */ |
059ec3d9 PH |
786 | { |
787 | int last_predicted_gap = | |
788 | retry_record->next_try - retry_record->last_try; | |
789 | int last_actual_gap = now - retry_record->last_try; | |
790 | int lastgap = (last_predicted_gap < last_actual_gap)? | |
791 | last_predicted_gap : last_actual_gap; | |
6af56900 PH |
792 | int next_gap = (lastgap * rule->p2)/1000; |
793 | if (rule->rule == 'G') | |
6af56900 | 794 | next_try = now + ((lastgap < rule->p1)? rule->p1 : next_gap); |
6af56900 PH |
795 | else /* The 'H' rule */ |
796 | { | |
797 | next_try = now + rule->p1; | |
798 | if (next_gap > rule->p1) | |
3cd34f13 PH |
799 | next_try += random_number(next_gap - rule->p1)/2 + |
800 | (next_gap - rule->p1)/2; | |
6af56900 | 801 | } |
059ec3d9 PH |
802 | } |
803 | } | |
804 | ||
805 | /* Impose a global retry max */ | |
806 | ||
807 | if (next_try - now > retry_interval_max) | |
808 | next_try = now + retry_interval_max; | |
809 | ||
810 | /* If the new message length is greater than the previous one, we | |
811 | have to copy the record first. */ | |
812 | ||
813 | if (message_length > message_space) | |
814 | { | |
815 | dbdata_retry *newr = store_get(sizeof(dbdata_retry) + message_length); | |
816 | memcpy(newr, retry_record, sizeof(dbdata_retry)); | |
817 | retry_record = newr; | |
818 | } | |
819 | ||
820 | /* Set up the retry record; message_length may be less than the string | |
821 | length for very long error strings. */ | |
822 | ||
823 | retry_record->last_try = now; | |
824 | retry_record->next_try = next_try; | |
825 | retry_record->basic_errno = rti->basic_errno; | |
826 | retry_record->more_errno = rti->more_errno; | |
827 | Ustrncpy(retry_record->text, message, message_length); | |
828 | retry_record->text[message_length] = 0; | |
829 | ||
830 | DEBUG(D_retry) | |
831 | { | |
832 | int letter = retry_record->more_errno & 255; | |
833 | debug_printf("Writing retry data for %s\n", rti->key); | |
834 | debug_printf(" first failed=%d last try=%d next try=%d expired=%d\n", | |
835 | (int)retry_record->first_failed, (int)retry_record->last_try, | |
836 | (int)retry_record->next_try, retry_record->expired); | |
837 | debug_printf(" errno=%d more_errno=", retry_record->basic_errno); | |
838 | if (letter == 'A' || letter == 'M') | |
839 | debug_printf("%d,%c", (retry_record->more_errno >> 8) & 255, | |
840 | letter); | |
841 | else | |
842 | debug_printf("%d", retry_record->more_errno); | |
843 | debug_printf(" %s\n", retry_record->text); | |
844 | } | |
845 | ||
846 | (void)dbfn_write(dbm_file, rti->key, retry_record, | |
847 | sizeof(dbdata_retry) + message_length); | |
848 | } /* Loop for each retry item */ | |
849 | ||
850 | /* If all the non-delete retry items are timed out, the address is | |
851 | timed out, provided that we didn't skip any hosts because their retry | |
852 | time was not reached (or because of hosts_max_try). */ | |
853 | ||
854 | if (update_count > 0 && update_count == timedout_count) | |
059ec3d9 PH |
855 | if (!testflag(endaddr, af_retry_skipped)) |
856 | { | |
857 | DEBUG(D_retry) debug_printf("timed out: all retries expired\n"); | |
858 | timed_out = TRUE; | |
859 | } | |
860 | else | |
059ec3d9 PH |
861 | DEBUG(D_retry) |
862 | debug_printf("timed out but some hosts were skipped\n"); | |
059ec3d9 PH |
863 | } /* Loop for an address and its parents */ |
864 | ||
865 | /* If this is a deferred address, and retry processing was requested by | |
866 | means of one or more retry items, and they all timed out, move the address | |
867 | to the failed queue, and restart this loop without updating paddr. | |
868 | ||
869 | If there were several addresses batched in the same remote delivery, only | |
870 | the original top one will have host retry items attached to it, but we want | |
871 | to handle all the same. Each will have a pointer back to its "top" address, | |
872 | and they will now precede the item with the retries because addresses are | |
873 | inverted when added to these final queues. We have saved information about | |
874 | them in passing (below) so they can all be cut out at once. */ | |
875 | ||
876 | if (i == 2) /* Handling defers */ | |
877 | { | |
7b4c8c1f | 878 | if (endaddr->retries && timed_out) |
059ec3d9 PH |
879 | { |
880 | if (last_first == endaddr) paddr = saved_paddr; | |
881 | addr = *paddr; | |
882 | *paddr = endaddr->next; | |
883 | ||
884 | endaddr->next = *addr_failed; | |
885 | *addr_failed = addr; | |
886 | ||
887 | for (;; addr = addr->next) | |
888 | { | |
889 | setflag(addr, af_retry_timedout); | |
13c7874e JH |
890 | addr->message = addr->message |
891 | ? string_sprintf("%s: retry timeout exceeded", addr->message) | |
892 | : US"retry timeout exceeded"; | |
893 | addr->user_message = addr->user_message | |
894 | ? string_sprintf("%s: retry timeout exceeded", addr->user_message) | |
895 | : US"retry timeout exceeded"; | |
059ec3d9 PH |
896 | log_write(0, LOG_MAIN, "** %s%s%s%s: retry timeout exceeded", |
897 | addr->address, | |
13c7874e JH |
898 | addr->parent ? US" <" : US"", |
899 | addr->parent ? addr->parent->address : US"", | |
900 | addr->parent ? US">" : US""); | |
059ec3d9 PH |
901 | |
902 | if (addr == endaddr) break; | |
903 | } | |
904 | ||
905 | continue; /* Restart from changed *paddr */ | |
906 | } | |
907 | ||
908 | /* This address is to remain on the defer chain. If it has a "first" | |
909 | pointer, save the pointer to it in case we want to fail the set of | |
910 | addresses when we get to the first one. */ | |
911 | ||
912 | if (endaddr->first != last_first) | |
913 | { | |
914 | last_first = endaddr->first; | |
915 | saved_paddr = paddr; | |
916 | } | |
917 | } | |
918 | ||
919 | /* All cases (succeed, fail, defer left on queue) */ | |
920 | ||
921 | paddr = &(endaddr->next); /* Advance to next address */ | |
922 | } /* Loop for all addresses */ | |
923 | } /* Loop for succeed, fail, defer */ | |
924 | ||
925 | /* Close and unlock the database */ | |
926 | ||
7b4c8c1f | 927 | if (dbm_file) dbfn_close(dbm_file); |
059ec3d9 PH |
928 | |
929 | DEBUG(D_retry) debug_printf("end of retry processing\n"); | |
930 | } | |
931 | ||
932 | /* End of retry.c */ |