| 1 | /* $Cambridge: exim/src/src/retry.c,v 1.13 2009/11/16 19:50:37 nm4 Exp $ */ |
| 2 | |
| 3 | /************************************************* |
| 4 | * Exim - an Internet mail transport agent * |
| 5 | *************************************************/ |
| 6 | |
| 7 | /* Copyright (c) University of Cambridge 1995 - 2009 */ |
| 8 | /* See the file NOTICE for conditions of use and distribution. */ |
| 9 | |
| 10 | /* Functions concerned with retrying unsuccessful deliveries. */ |
| 11 | |
| 12 | |
| 13 | #include "exim.h" |
| 14 | |
| 15 | |
| 16 | |
| 17 | /************************************************* |
| 18 | * Check the ultimate address timeout * |
| 19 | *************************************************/ |
| 20 | |
| 21 | /* This function tests whether a message has been on the queue longer than |
| 22 | the maximum retry time for a particular host. |
| 23 | |
| 24 | Arguments: |
| 25 | host_key the key to look up a host retry rule |
| 26 | domain the domain to look up a domain retry rule |
| 27 | basic_errno a specific error number, or zero if none |
| 28 | more_errno additional data for the error |
| 29 | now the time |
| 30 | |
| 31 | Returns: TRUE if the ultimate timeout has been reached |
| 32 | */ |
| 33 | |
| 34 | static BOOL |
| 35 | ultimate_address_timeout(uschar *host_key, uschar *domain, int basic_errno, |
| 36 | int more_errno, time_t now) |
| 37 | { |
| 38 | BOOL address_timeout = TRUE; /* no rule => timed out */ |
| 39 | |
| 40 | retry_config *retry = |
| 41 | retry_find_config(host_key+2, domain, basic_errno, more_errno); |
| 42 | |
| 43 | if (retry != NULL && retry->rules != NULL) |
| 44 | { |
| 45 | retry_rule *last_rule; |
| 46 | for (last_rule = retry->rules; |
| 47 | last_rule->next != NULL; |
| 48 | last_rule = last_rule->next); |
| 49 | DEBUG(D_transport|D_retry) |
| 50 | debug_printf(" received_time=%d diff=%d timeout=%d\n", |
| 51 | received_time, (int)(now - received_time), last_rule->timeout); |
| 52 | address_timeout = (now - received_time > last_rule->timeout); |
| 53 | } |
| 54 | else |
| 55 | { |
| 56 | DEBUG(D_transport|D_retry) |
| 57 | debug_printf("no retry rule found: assume timed out\n"); |
| 58 | } |
| 59 | |
| 60 | return address_timeout; |
| 61 | } |
| 62 | |
| 63 | |
| 64 | |
| 65 | /************************************************* |
| 66 | * Set status of a host+address item * |
| 67 | *************************************************/ |
| 68 | |
| 69 | /* This function is passed a host_item which contains a host name and an |
| 70 | IP address string. Its job is to set the status of the address if it is not |
| 71 | already set (indicated by hstatus_unknown). The possible values are: |
| 72 | |
| 73 | hstatus_usable the address is not listed in the unusable tree, and does |
| 74 | not have a retry record, OR the time is past the next |
| 75 | try time, OR the message has been on the queue for more |
| 76 | than the maximum retry time for a failing host |
| 77 | |
| 78 | hstatus_unusable the address is listed in the unusable tree, or does have |
| 79 | a retry record, and the time is not yet at the next retry |
| 80 | time. |
| 81 | |
| 82 | hstatus_unusable_expired as above, but also the retry time has expired |
| 83 | for this address. |
| 84 | |
| 85 | The reason a delivery is permitted when a message has been around for a very |
| 86 | long time is to allow the ultimate address timeout to operate after a delivery |
| 87 | failure. Otherwise some messages may stick around without being tried for too |
| 88 | long. |
| 89 | |
| 90 | If a host retry record is retrieved from the hints database, the time of last |
| 91 | trying is filled into the last_try field of the host block. If a host is |
| 92 | generally usable, a check is made to see if there is a retry delay on this |
| 93 | specific message at this host. |
| 94 | |
| 95 | If a non-standard port is being used, it is added to the retry key. |
| 96 | |
| 97 | Arguments: |
| 98 | domain the address domain |
| 99 | host pointer to a host item |
| 100 | portstring "" for standard port, ":xxxx" for a non-standard port |
| 101 | include_ip_address TRUE to include the address in the key - this is |
| 102 | usual, but sometimes is not wanted |
| 103 | retry_host_key where to put a pointer to the key for the host-specific |
| 104 | retry record, if one is read and the host is usable |
| 105 | retry_message_key where to put a pointer to the key for the message+host |
| 106 | retry record, if one is read and the host is usable |
| 107 | |
| 108 | Returns: TRUE if the host has expired but is usable because |
| 109 | its retry time has come |
| 110 | */ |
| 111 | |
| 112 | BOOL |
| 113 | retry_check_address(uschar *domain, host_item *host, uschar *portstring, |
| 114 | BOOL include_ip_address, uschar **retry_host_key, uschar **retry_message_key) |
| 115 | { |
| 116 | BOOL yield = FALSE; |
| 117 | time_t now = time(NULL); |
| 118 | uschar *host_key, *message_key; |
| 119 | open_db dbblock; |
| 120 | open_db *dbm_file; |
| 121 | tree_node *node; |
| 122 | dbdata_retry *host_retry_record, *message_retry_record; |
| 123 | |
| 124 | *retry_host_key = *retry_message_key = NULL; |
| 125 | |
| 126 | DEBUG(D_transport|D_retry) debug_printf("checking status of %s\n", host->name); |
| 127 | |
| 128 | /* Do nothing if status already set; otherwise initialize status as usable. */ |
| 129 | |
| 130 | if (host->status != hstatus_unknown) return FALSE; |
| 131 | host->status = hstatus_usable; |
| 132 | |
| 133 | /* Generate the host key for the unusable tree and the retry database. Ensure |
| 134 | host names are lower cased (that's what %S does). */ |
| 135 | |
| 136 | host_key = include_ip_address? |
| 137 | string_sprintf("T:%S:%s%s", host->name, host->address, portstring) : |
| 138 | string_sprintf("T:%S%s", host->name, portstring); |
| 139 | |
| 140 | /* Generate the message-specific key */ |
| 141 | |
| 142 | message_key = string_sprintf("%s:%s", host_key, message_id); |
| 143 | |
| 144 | /* Search the tree of unusable IP addresses. This is filled in when deliveries |
| 145 | fail, because the retry database itself is not updated until the end of all |
| 146 | deliveries (so as to do it all in one go). The tree records addresses that have |
| 147 | become unusable during this delivery process (i.e. those that will get put into |
| 148 | the retry database when it is updated). */ |
| 149 | |
| 150 | node = tree_search(tree_unusable, host_key); |
| 151 | if (node != NULL) |
| 152 | { |
| 153 | DEBUG(D_transport|D_retry) debug_printf("found in tree of unusables\n"); |
| 154 | host->status = (node->data.val > 255)? |
| 155 | hstatus_unusable_expired : hstatus_unusable; |
| 156 | host->why = node->data.val & 255; |
| 157 | return FALSE; |
| 158 | } |
| 159 | |
| 160 | /* Open the retry database, giving up if there isn't one. Otherwise, search for |
| 161 | the retry records, and then close the database again. */ |
| 162 | |
| 163 | if ((dbm_file = dbfn_open(US"retry", O_RDONLY, &dbblock, FALSE)) == NULL) |
| 164 | { |
| 165 | DEBUG(D_deliver|D_retry|D_hints_lookup) |
| 166 | debug_printf("no retry data available\n"); |
| 167 | return FALSE; |
| 168 | } |
| 169 | host_retry_record = dbfn_read(dbm_file, host_key); |
| 170 | message_retry_record = dbfn_read(dbm_file, message_key); |
| 171 | dbfn_close(dbm_file); |
| 172 | |
| 173 | /* Ignore the data if it is too old - too long since it was written */ |
| 174 | |
| 175 | if (host_retry_record == NULL) |
| 176 | { |
| 177 | DEBUG(D_transport|D_retry) debug_printf("no host retry record\n"); |
| 178 | } |
| 179 | else if (now - host_retry_record->time_stamp > retry_data_expire) |
| 180 | { |
| 181 | host_retry_record = NULL; |
| 182 | DEBUG(D_transport|D_retry) debug_printf("host retry record too old\n"); |
| 183 | } |
| 184 | |
| 185 | if (message_retry_record == NULL) |
| 186 | { |
| 187 | DEBUG(D_transport|D_retry) debug_printf("no message retry record\n"); |
| 188 | } |
| 189 | else if (now - message_retry_record->time_stamp > retry_data_expire) |
| 190 | { |
| 191 | message_retry_record = NULL; |
| 192 | DEBUG(D_transport|D_retry) debug_printf("message retry record too old\n"); |
| 193 | } |
| 194 | |
| 195 | /* If there's a host-specific retry record, check for reaching the retry |
| 196 | time (or forcing). If not, and the host is not expired, check for the message |
| 197 | having been around for longer than the maximum retry time for this host or |
| 198 | address. Allow the delivery if it has. Otherwise set the appropriate unusable |
| 199 | flag and return FALSE. Otherwise arrange to return TRUE if this is an expired |
| 200 | host. */ |
| 201 | |
| 202 | if (host_retry_record != NULL) |
| 203 | { |
| 204 | *retry_host_key = host_key; |
| 205 | |
| 206 | /* We have not reached the next try time. Check for the ultimate address |
| 207 | timeout if the host has not expired. */ |
| 208 | |
| 209 | if (now < host_retry_record->next_try && !deliver_force) |
| 210 | { |
| 211 | DEBUG(D_transport|D_retry) |
| 212 | { |
| 213 | debug_printf("host retry time not reached: checking ultimate address " |
| 214 | "timeout\n"); |
| 215 | debug_printf(" now=%d first_failed=%d next_try=%d expired=%d\n", |
| 216 | (int)now, (int)host_retry_record->first_failed, |
| 217 | (int)host_retry_record->next_try, |
| 218 | host_retry_record->expired); |
| 219 | } |
| 220 | |
| 221 | if (!host_retry_record->expired && |
| 222 | ultimate_address_timeout(host_key, domain, |
| 223 | host_retry_record->basic_errno, host_retry_record->more_errno, now)) |
| 224 | { |
| 225 | DEBUG(D_transport|D_retry) |
| 226 | debug_printf("on queue longer than maximum retry for " |
| 227 | "address - allowing delivery\n"); |
| 228 | return FALSE; |
| 229 | } |
| 230 | |
| 231 | /* We have not hit the ultimate address timeout; host is unusable. */ |
| 232 | |
| 233 | host->status = (host_retry_record->expired)? |
| 234 | hstatus_unusable_expired : hstatus_unusable; |
| 235 | host->why = hwhy_retry; |
| 236 | host->last_try = host_retry_record->last_try; |
| 237 | return FALSE; |
| 238 | } |
| 239 | |
| 240 | /* Host is usable; set return TRUE if expired. */ |
| 241 | |
| 242 | yield = host_retry_record->expired; |
| 243 | } |
| 244 | |
| 245 | /* It's OK to try the host. If there's a message-specific retry record, check |
| 246 | for reaching its retry time (or forcing). If not, mark the host unusable, |
| 247 | unless the ultimate address timeout has been reached. */ |
| 248 | |
| 249 | if (message_retry_record != NULL) |
| 250 | { |
| 251 | *retry_message_key = message_key; |
| 252 | if (now < message_retry_record->next_try && !deliver_force) |
| 253 | { |
| 254 | DEBUG(D_transport|D_retry) |
| 255 | { |
| 256 | debug_printf("host+message retry time not reached: checking ultimate " |
| 257 | "address timeout\n"); |
| 258 | debug_printf(" now=%d first_failed=%d next_try=%d expired=%d\n", |
| 259 | (int)now, (int)message_retry_record->first_failed, |
| 260 | (int)message_retry_record->next_try, message_retry_record->expired); |
| 261 | } |
| 262 | if (!ultimate_address_timeout(host_key, domain, 0, 0, now)) |
| 263 | { |
| 264 | host->status = hstatus_unusable; |
| 265 | host->why = hwhy_retry; |
| 266 | } |
| 267 | else |
| 268 | { |
| 269 | DEBUG(D_transport|D_retry) |
| 270 | debug_printf("on queue longer than maximum retry for " |
| 271 | "address - allowing delivery\n"); |
| 272 | } |
| 273 | return FALSE; |
| 274 | } |
| 275 | } |
| 276 | |
| 277 | return yield; |
| 278 | } |
| 279 | |
| 280 | |
| 281 | |
| 282 | |
| 283 | /************************************************* |
| 284 | * Add a retry item to an address * |
| 285 | *************************************************/ |
| 286 | |
| 287 | /* Retry items are chained onto an address when it is deferred either by router |
| 288 | or by a transport, or if it succeeds or fails and there was a previous retry |
| 289 | item that now needs to be deleted. Sometimes there can be both kinds of item: |
| 290 | for example, if routing was deferred but then succeeded, and delivery then |
| 291 | deferred. In that case there is a delete item for the routing retry, and an |
| 292 | updating item for the delivery. |
| 293 | |
| 294 | (But note that that is only visible at the outer level, because in remote |
| 295 | delivery subprocesses, the address starts "clean", with no retry items carried |
| 296 | in.) |
| 297 | |
| 298 | These items are used at the end of a delivery attempt to update the retry |
| 299 | database. The keys start R: for routing delays and T: for transport delays. |
| 300 | |
| 301 | Arguments: |
| 302 | addr the address block onto which to hang the item |
| 303 | key the retry key |
| 304 | flags delete, host, and message flags, copied into the block |
| 305 | |
| 306 | Returns: nothing |
| 307 | */ |
| 308 | |
| 309 | void |
| 310 | retry_add_item(address_item *addr, uschar *key, int flags) |
| 311 | { |
| 312 | retry_item *rti = store_get(sizeof(retry_item)); |
| 313 | rti->next = addr->retries; |
| 314 | addr->retries = rti; |
| 315 | rti->key = key; |
| 316 | rti->basic_errno = addr->basic_errno; |
| 317 | rti->more_errno = addr->more_errno; |
| 318 | rti->message = addr->message; |
| 319 | rti->flags = flags; |
| 320 | |
| 321 | DEBUG(D_transport|D_retry) |
| 322 | { |
| 323 | int letter = rti->more_errno & 255; |
| 324 | debug_printf("added retry item for %s: errno=%d more_errno=", rti->key, |
| 325 | rti->basic_errno); |
| 326 | if (letter == 'A' || letter == 'M') |
| 327 | debug_printf("%d,%c", (rti->more_errno >> 8) & 255, letter); |
| 328 | else |
| 329 | debug_printf("%d", rti->more_errno); |
| 330 | debug_printf(" flags=%d\n", flags); |
| 331 | } |
| 332 | } |
| 333 | |
| 334 | |
| 335 | |
| 336 | /************************************************* |
| 337 | * Find retry configuration data * |
| 338 | *************************************************/ |
| 339 | |
| 340 | /* Search the in-store retry information for the first retry item that applies |
| 341 | to a given destination. If the key contains an @ we are probably handling a |
| 342 | local delivery and have a complete address to search for; this happens when |
| 343 | retry_use_local_part is set on a router. Otherwise, the key is likely to be a |
| 344 | host name for a remote delivery, or a domain name for a local delivery. We |
| 345 | prepend *@ on the front of it so that it will match a retry item whose address |
| 346 | item pattern is independent of the local part. The alternate key, if set, is |
| 347 | always just a domain, so we treat it likewise. |
| 348 | |
| 349 | Arguments: |
| 350 | key key for which retry info is wanted |
| 351 | alternate alternative key, always just a domain |
| 352 | basic_errno specific error predicate on the retry rule, or zero |
| 353 | more_errno additional data for errno predicate |
| 354 | |
| 355 | Returns: pointer to retry rule, or NULL |
| 356 | */ |
| 357 | |
| 358 | retry_config * |
| 359 | retry_find_config(uschar *key, uschar *alternate, int basic_errno, |
| 360 | int more_errno) |
| 361 | { |
| 362 | int replace = 0; |
| 363 | uschar *use_key, *use_alternate; |
| 364 | uschar *colon = Ustrchr(key, ':'); |
| 365 | retry_config *yield; |
| 366 | |
| 367 | /* If there's a colon in the key, there are two possibilities: |
| 368 | |
| 369 | (1) This is a key for a host, ip address, and possibly port, in the format |
| 370 | |
| 371 | hostname:ip+port |
| 372 | |
| 373 | In this case, we temporarily replace the colon with a zero, to terminate |
| 374 | the string after the host name. |
| 375 | |
| 376 | (2) This is a key for a pipe, file, or autoreply delivery, in the format |
| 377 | |
| 378 | pipe-or-file-or-auto:x@y |
| 379 | |
| 380 | where x@y is the original address that provoked the delivery. The pipe or |
| 381 | file or auto will start with | or / or >, whereas a host name will start |
| 382 | with a letter or a digit. In this case we want to use the original address |
| 383 | to search for a retry rule. */ |
| 384 | |
| 385 | if (colon != NULL) |
| 386 | { |
| 387 | if (isalnum(*key)) |
| 388 | replace = ':'; |
| 389 | else |
| 390 | key = Ustrrchr(key, ':') + 1; /* Take from the last colon */ |
| 391 | } |
| 392 | |
| 393 | if (replace == 0) colon = key + Ustrlen(key); |
| 394 | *colon = 0; |
| 395 | |
| 396 | /* Sort out the keys */ |
| 397 | |
| 398 | use_key = (Ustrchr(key, '@') != NULL)? key : string_sprintf("*@%s", key); |
| 399 | use_alternate = (alternate == NULL)? NULL : string_sprintf("*@%s", alternate); |
| 400 | |
| 401 | /* Scan the configured retry items. */ |
| 402 | |
| 403 | for (yield = retries; yield != NULL; yield = yield->next) |
| 404 | { |
| 405 | uschar *plist = yield->pattern; |
| 406 | uschar *slist = yield->senders; |
| 407 | |
| 408 | /* If a specific error is set for this item, check that we are handling that |
| 409 | specific error, and if so, check any additional error information if |
| 410 | required. */ |
| 411 | |
| 412 | if (yield->basic_errno != 0) |
| 413 | { |
| 414 | /* Special code is required for quota errors, as these can either be system |
| 415 | quota errors, or Exim's own quota imposition, which has a different error |
| 416 | number. Full partitions are also treated in the same way as quota errors. |
| 417 | */ |
| 418 | |
| 419 | if (yield->basic_errno == ERRNO_EXIMQUOTA) |
| 420 | { |
| 421 | if ((basic_errno != ERRNO_EXIMQUOTA && basic_errno != errno_quota && |
| 422 | basic_errno != ENOSPC) || |
| 423 | (yield->more_errno != 0 && yield->more_errno > more_errno)) |
| 424 | continue; |
| 425 | } |
| 426 | |
| 427 | /* The TLSREQUIRED error also covers TLSFAILURE. These are subtly different |
| 428 | errors, but not worth separating at this level. */ |
| 429 | |
| 430 | else if (yield->basic_errno == ERRNO_TLSREQUIRED) |
| 431 | { |
| 432 | if (basic_errno != ERRNO_TLSREQUIRED && basic_errno != ERRNO_TLSFAILURE) |
| 433 | continue; |
| 434 | } |
| 435 | |
| 436 | /* Handle 4xx responses to MAIL, RCPT, or DATA. The code that was received |
| 437 | is in the 2nd least significant byte of more_errno (with 400 subtracted). |
| 438 | The required value is coded in the 2nd least significant byte of the |
| 439 | yield->more_errno field as follows: |
| 440 | |
| 441 | 255 => any 4xx code |
| 442 | >= 100 => the decade must match the value less 100 |
| 443 | < 100 => the exact value must match |
| 444 | */ |
| 445 | |
| 446 | else if (yield->basic_errno == ERRNO_MAIL4XX || |
| 447 | yield->basic_errno == ERRNO_RCPT4XX || |
| 448 | yield->basic_errno == ERRNO_DATA4XX) |
| 449 | { |
| 450 | int wanted; |
| 451 | if (basic_errno != yield->basic_errno) continue; |
| 452 | wanted = (yield->more_errno >> 8) & 255; |
| 453 | if (wanted != 255) |
| 454 | { |
| 455 | int evalue = (more_errno >> 8) & 255; |
| 456 | if (wanted >= 100) |
| 457 | { |
| 458 | if ((evalue/10)*10 != wanted - 100) continue; |
| 459 | } |
| 460 | else if (evalue != wanted) continue; |
| 461 | } |
| 462 | } |
| 463 | |
| 464 | /* There are some special cases for timeouts */ |
| 465 | |
| 466 | else if (yield->basic_errno == ETIMEDOUT) |
| 467 | { |
| 468 | if (basic_errno != ETIMEDOUT) continue; |
| 469 | |
| 470 | /* Just RTEF_CTOUT in the rule => don't care about 'A'/'M' addresses */ |
| 471 | if (yield->more_errno == RTEF_CTOUT) |
| 472 | { |
| 473 | if ((more_errno & RTEF_CTOUT) == 0) continue; |
| 474 | } |
| 475 | |
| 476 | else if (yield->more_errno != 0) |
| 477 | { |
| 478 | int cf_errno = more_errno; |
| 479 | if ((yield->more_errno & RTEF_CTOUT) == 0) cf_errno &= ~RTEF_CTOUT; |
| 480 | if (yield->more_errno != cf_errno) continue; |
| 481 | } |
| 482 | } |
| 483 | |
| 484 | /* Default checks for exact match */ |
| 485 | |
| 486 | else |
| 487 | { |
| 488 | if (yield->basic_errno != basic_errno || |
| 489 | (yield->more_errno != 0 && yield->more_errno != more_errno)) |
| 490 | continue; |
| 491 | } |
| 492 | } |
| 493 | |
| 494 | /* If the "senders" condition is set, check it. Note that sender_address may |
| 495 | be null during -brt checking, in which case we do not use this rule. */ |
| 496 | |
| 497 | if (slist != NULL && (sender_address == NULL || |
| 498 | match_address_list(sender_address, TRUE, TRUE, &slist, NULL, -1, 0, |
| 499 | NULL) != OK)) |
| 500 | continue; |
| 501 | |
| 502 | /* Check for a match between the address list item at the start of this retry |
| 503 | rule and either the main or alternate keys. */ |
| 504 | |
| 505 | if (match_address_list(use_key, TRUE, TRUE, &plist, NULL, -1, UCHAR_MAX+1, |
| 506 | NULL) == OK || |
| 507 | (use_alternate != NULL && |
| 508 | match_address_list(use_alternate, TRUE, TRUE, &plist, NULL, -1, |
| 509 | UCHAR_MAX+1, NULL) == OK)) |
| 510 | break; |
| 511 | } |
| 512 | |
| 513 | *colon = replace; |
| 514 | return yield; |
| 515 | } |
| 516 | |
| 517 | |
| 518 | |
| 519 | |
| 520 | /************************************************* |
| 521 | * Update retry database * |
| 522 | *************************************************/ |
| 523 | |
| 524 | /* Update the retry data for any directing/routing/transporting that was |
| 525 | deferred, or delete it for those that succeeded after a previous defer. This is |
| 526 | done all in one go to minimize opening/closing/locking of the database file. |
| 527 | |
| 528 | Note that, because SMTP delivery involves a list of destinations to try, there |
| 529 | may be defer-type retry information for some of them even when the message was |
| 530 | successfully delivered. Likewise if it eventually failed. |
| 531 | |
| 532 | This function may move addresses from the defer to the failed queue if the |
| 533 | ultimate retry time has expired. |
| 534 | |
| 535 | Arguments: |
| 536 | addr_defer queue of deferred addresses |
| 537 | addr_failed queue of failed addresses |
| 538 | addr_succeed queue of successful addresses |
| 539 | |
| 540 | Returns: nothing |
| 541 | */ |
| 542 | |
| 543 | void |
| 544 | retry_update(address_item **addr_defer, address_item **addr_failed, |
| 545 | address_item **addr_succeed) |
| 546 | { |
| 547 | open_db dbblock; |
| 548 | open_db *dbm_file = NULL; |
| 549 | time_t now = time(NULL); |
| 550 | int i; |
| 551 | |
| 552 | DEBUG(D_retry) debug_printf("Processing retry items\n"); |
| 553 | |
| 554 | /* Three-times loop to handle succeeded, failed, and deferred addresses. |
| 555 | Deferred addresses must be handled after failed ones, because some may be moved |
| 556 | to the failed chain if they have timed out. */ |
| 557 | |
| 558 | for (i = 0; i < 3; i++) |
| 559 | { |
| 560 | address_item *endaddr, *addr; |
| 561 | address_item *last_first = NULL; |
| 562 | address_item **paddr = (i==0)? addr_succeed : |
| 563 | (i==1)? addr_failed : addr_defer; |
| 564 | address_item **saved_paddr = NULL; |
| 565 | |
| 566 | DEBUG(D_retry) debug_printf("%s addresses:\n", (i == 0)? "Succeeded" : |
| 567 | (i == 1)? "Failed" : "Deferred"); |
| 568 | |
| 569 | /* Loop for each address on the chain. For deferred addresses, the whole |
| 570 | address times out unless one of its retry addresses has a retry rule that |
| 571 | hasn't yet timed out. Deferred addresses should not be requesting deletion |
| 572 | of retry items, but just in case they do by accident, treat that case |
| 573 | as "not timed out". |
| 574 | |
| 575 | As well as handling the addresses themselves, we must also process any |
| 576 | retry items for any parent addresses - these are typically "delete" items, |
| 577 | because the parent must have succeeded in order to generate the child. */ |
| 578 | |
| 579 | while ((endaddr = *paddr) != NULL) |
| 580 | { |
| 581 | BOOL timed_out = FALSE; |
| 582 | retry_item *rti; |
| 583 | |
| 584 | for (addr = endaddr; addr != NULL; addr = addr->parent) |
| 585 | { |
| 586 | int update_count = 0; |
| 587 | int timedout_count = 0; |
| 588 | |
| 589 | DEBUG(D_retry) debug_printf("%s%s\n", addr->address, (addr->retries == NULL)? |
| 590 | ": no retry items" : ""); |
| 591 | |
| 592 | /* Loop for each retry item. */ |
| 593 | |
| 594 | for (rti = addr->retries; rti != NULL; rti = rti->next) |
| 595 | { |
| 596 | uschar *message; |
| 597 | int message_length, message_space, failing_interval, next_try; |
| 598 | retry_rule *rule, *final_rule; |
| 599 | retry_config *retry; |
| 600 | dbdata_retry *retry_record; |
| 601 | |
| 602 | /* Open the retry database if it is not already open; failure to open |
| 603 | the file is logged, but otherwise ignored - deferred addresses will |
| 604 | get retried at the next opportunity. Not opening earlier than this saves |
| 605 | opening if no addresses have retry items - common when none have yet |
| 606 | reached their retry next try time. */ |
| 607 | |
| 608 | if (dbm_file == NULL) |
| 609 | dbm_file = dbfn_open(US"retry", O_RDWR, &dbblock, TRUE); |
| 610 | |
| 611 | if (dbm_file == NULL) |
| 612 | { |
| 613 | DEBUG(D_deliver|D_retry|D_hints_lookup) |
| 614 | debug_printf("retry database not available for updating\n"); |
| 615 | return; |
| 616 | } |
| 617 | |
| 618 | /* If there are no deferred addresses, that is, if this message is |
| 619 | completing, and the retry item is for a message-specific SMTP error, |
| 620 | force it to be deleted, because there's no point in keeping data for |
| 621 | no-longer-existing messages. This situation can occur when a domain has |
| 622 | two hosts and a message-specific error occurs for the first of them, |
| 623 | but the address gets delivered to the second one. This optimization |
| 624 | doesn't succeed in cleaning out all the dead entries, but it helps. */ |
| 625 | |
| 626 | if (*addr_defer == NULL && (rti->flags & rf_message) != 0) |
| 627 | rti->flags |= rf_delete; |
| 628 | |
| 629 | /* Handle the case of a request to delete the retry info for this |
| 630 | destination. */ |
| 631 | |
| 632 | if ((rti->flags & rf_delete) != 0) |
| 633 | { |
| 634 | (void)dbfn_delete(dbm_file, rti->key); |
| 635 | DEBUG(D_retry) |
| 636 | debug_printf("deleted retry information for %s\n", rti->key); |
| 637 | continue; |
| 638 | } |
| 639 | |
| 640 | /* Count the number of non-delete retry items. This is so that we |
| 641 | can compare it to the count of timed_out ones, to check whether |
| 642 | all are timed out. */ |
| 643 | |
| 644 | update_count++; |
| 645 | |
| 646 | /* Get the retry information for this destination and error code, if |
| 647 | any. If this item is for a remote host with ip address, then pass |
| 648 | the domain name as an alternative to search for. If no retry |
| 649 | information is found, we can't generate a retry time, so there is |
| 650 | no point updating the database. This retry item is timed out. */ |
| 651 | |
| 652 | if ((retry = retry_find_config(rti->key + 2, |
| 653 | ((rti->flags & rf_host) != 0)? addr->domain : NULL, |
| 654 | rti->basic_errno, rti->more_errno)) == NULL) |
| 655 | { |
| 656 | DEBUG(D_retry) debug_printf("No configured retry item for %s%s%s\n", |
| 657 | rti->key, |
| 658 | ((rti->flags & rf_host) != 0)? US" or " : US"", |
| 659 | ((rti->flags & rf_host) != 0)? addr->domain : US""); |
| 660 | if (addr == endaddr) timedout_count++; |
| 661 | continue; |
| 662 | } |
| 663 | |
| 664 | DEBUG(D_retry) |
| 665 | { |
| 666 | if ((rti->flags & rf_host) != 0) |
| 667 | debug_printf("retry for %s (%s) = %s %d %d\n", rti->key, |
| 668 | addr->domain, retry->pattern, retry->basic_errno, |
| 669 | retry->more_errno); |
| 670 | else |
| 671 | debug_printf("retry for %s = %s %d %d\n", rti->key, retry->pattern, |
| 672 | retry->basic_errno, retry->more_errno); |
| 673 | } |
| 674 | |
| 675 | /* Set up the message for the database retry record. Because DBM |
| 676 | records have a maximum data length, we enforce a limit. There isn't |
| 677 | much point in keeping a huge message here, anyway. */ |
| 678 | |
| 679 | message = (rti->basic_errno > 0)? US strerror(rti->basic_errno) : |
| 680 | (rti->message == NULL)? |
| 681 | US"unknown error" : string_printing(rti->message); |
| 682 | message_length = Ustrlen(message); |
| 683 | if (message_length > 150) message_length = 150; |
| 684 | |
| 685 | /* Read a retry record from the database or construct a new one. |
| 686 | Ignore an old one if it is too old since it was last updated. */ |
| 687 | |
| 688 | retry_record = dbfn_read(dbm_file, rti->key); |
| 689 | if (retry_record != NULL && |
| 690 | now - retry_record->time_stamp > retry_data_expire) |
| 691 | retry_record = NULL; |
| 692 | |
| 693 | if (retry_record == NULL) |
| 694 | { |
| 695 | retry_record = store_get(sizeof(dbdata_retry) + message_length); |
| 696 | message_space = message_length; |
| 697 | retry_record->first_failed = now; |
| 698 | retry_record->last_try = now; |
| 699 | retry_record->next_try = now; |
| 700 | retry_record->expired = FALSE; |
| 701 | retry_record->text[0] = 0; /* just in case */ |
| 702 | } |
| 703 | else message_space = Ustrlen(retry_record->text); |
| 704 | |
| 705 | /* Compute how long this destination has been failing */ |
| 706 | |
| 707 | failing_interval = now - retry_record->first_failed; |
| 708 | DEBUG(D_retry) debug_printf("failing_interval=%d message_age=%d\n", |
| 709 | failing_interval, message_age); |
| 710 | |
| 711 | /* For a non-host error, if the message has been on the queue longer |
| 712 | than the recorded time of failure, use the message's age instead. This |
| 713 | can happen when some messages can be delivered and others cannot; a |
| 714 | successful delivery will reset the first_failed time, and this can lead |
| 715 | to a failing message being retried too often. */ |
| 716 | |
| 717 | if ((rti->flags & rf_host) == 0 && message_age > failing_interval) |
| 718 | failing_interval = message_age; |
| 719 | |
| 720 | /* Search for the current retry rule. The cutoff time of the |
| 721 | last rule is handled differently to the others. The rule continues |
| 722 | to operate for ever (the global maximum interval will eventually |
| 723 | limit the gaps) but its cutoff time determines when an individual |
| 724 | destination times out. If there are no retry rules, the destination |
| 725 | always times out, but we can't compute a retry time. */ |
| 726 | |
| 727 | final_rule = NULL; |
| 728 | for (rule = retry->rules; rule != NULL; rule = rule->next) |
| 729 | { |
| 730 | if (failing_interval <= rule->timeout) break; |
| 731 | final_rule = rule; |
| 732 | } |
| 733 | |
| 734 | /* If there's an un-timed out rule, the destination has not |
| 735 | yet timed out, so the address as a whole has not timed out (but we are |
| 736 | interested in this only for the end address). Make sure the expired |
| 737 | flag is false (can be forced via fixdb from outside, but ensure it is |
| 738 | consistent with the rules whenever we go through here). */ |
| 739 | |
| 740 | if (rule != NULL) |
| 741 | { |
| 742 | retry_record->expired = FALSE; |
| 743 | } |
| 744 | |
| 745 | /* Otherwise, set the retry timeout expired, and set the final rule |
| 746 | as the one from which to compute the next retry time. Subsequent |
| 747 | messages will fail immediately until the retry time is reached (unless |
| 748 | there are other, still active, retries). */ |
| 749 | |
| 750 | else |
| 751 | { |
| 752 | rule = final_rule; |
| 753 | retry_record->expired = TRUE; |
| 754 | if (addr == endaddr) timedout_count++; |
| 755 | } |
| 756 | |
| 757 | /* There is a special case to consider when some messages get through |
| 758 | to a destination and others don't. This can happen locally when a |
| 759 | large message pushes a user over quota, and it can happen remotely |
| 760 | when a machine is on a dodgy Internet connection. The messages that |
| 761 | get through wipe the retry information, causing those that don't to |
| 762 | stay on the queue longer than the final retry time. In order to |
| 763 | avoid this, we check, using the time of arrival of the message, to |
| 764 | see if it has been on the queue for more than the final cutoff time, |
| 765 | and if so, cause this retry item to time out, and the retry time to |
| 766 | be set to "now" so that any subsequent messages in the same condition |
| 767 | also get tried. We search for the last rule onwards from the one that |
| 768 | is in use. If there are no retry rules for the item, rule will be null |
| 769 | and timedout_count will already have been updated. |
| 770 | |
| 771 | This implements "timeout this rule if EITHER the host (or routing or |
| 772 | directing) has been failing for more than the maximum time, OR if the |
| 773 | message has been on the queue for more than the maximum time." |
| 774 | |
| 775 | February 2006: It is possible that this code is no longer needed |
| 776 | following the change to the retry calculation to use the message age if |
| 777 | it is larger than the time since first failure. It may be that the |
| 778 | expired flag is always set when the other conditions are met. However, |
| 779 | this is a small bit of code, and it does no harm to leave it in place, |
| 780 | just in case. */ |
| 781 | |
| 782 | if (received_time <= retry_record->first_failed && |
| 783 | addr == endaddr && !retry_record->expired && rule != NULL) |
| 784 | { |
| 785 | retry_rule *last_rule; |
| 786 | for (last_rule = rule; |
| 787 | last_rule->next != NULL; |
| 788 | last_rule = last_rule->next); |
| 789 | if (now - received_time > last_rule->timeout) |
| 790 | { |
| 791 | DEBUG(D_retry) debug_printf("on queue longer than maximum retry\n"); |
| 792 | timedout_count++; |
| 793 | rule = NULL; |
| 794 | } |
| 795 | } |
| 796 | |
| 797 | /* Compute the next try time from the rule, subject to the global |
| 798 | maximum, and update the retry database. If rule == NULL it means |
| 799 | there were no rules at all (and the timeout will be set expired), |
| 800 | or we have a message that is older than the final timeout. In this |
| 801 | case set the next retry time to now, so that one delivery attempt |
| 802 | happens for subsequent messages. */ |
| 803 | |
| 804 | if (rule == NULL) next_try = now; else |
| 805 | { |
| 806 | if (rule->rule == 'F') next_try = now + rule->p1; |
| 807 | else /* rule = 'G' or 'H' */ |
| 808 | { |
| 809 | int last_predicted_gap = |
| 810 | retry_record->next_try - retry_record->last_try; |
| 811 | int last_actual_gap = now - retry_record->last_try; |
| 812 | int lastgap = (last_predicted_gap < last_actual_gap)? |
| 813 | last_predicted_gap : last_actual_gap; |
| 814 | int next_gap = (lastgap * rule->p2)/1000; |
| 815 | if (rule->rule == 'G') |
| 816 | { |
| 817 | next_try = now + ((lastgap < rule->p1)? rule->p1 : next_gap); |
| 818 | } |
| 819 | else /* The 'H' rule */ |
| 820 | { |
| 821 | next_try = now + rule->p1; |
| 822 | if (next_gap > rule->p1) |
| 823 | next_try += random_number(next_gap - rule->p1)/2 + |
| 824 | (next_gap - rule->p1)/2; |
| 825 | } |
| 826 | } |
| 827 | } |
| 828 | |
| 829 | /* Impose a global retry max */ |
| 830 | |
| 831 | if (next_try - now > retry_interval_max) |
| 832 | next_try = now + retry_interval_max; |
| 833 | |
| 834 | /* If the new message length is greater than the previous one, we |
| 835 | have to copy the record first. */ |
| 836 | |
| 837 | if (message_length > message_space) |
| 838 | { |
| 839 | dbdata_retry *newr = store_get(sizeof(dbdata_retry) + message_length); |
| 840 | memcpy(newr, retry_record, sizeof(dbdata_retry)); |
| 841 | retry_record = newr; |
| 842 | } |
| 843 | |
| 844 | /* Set up the retry record; message_length may be less than the string |
| 845 | length for very long error strings. */ |
| 846 | |
| 847 | retry_record->last_try = now; |
| 848 | retry_record->next_try = next_try; |
| 849 | retry_record->basic_errno = rti->basic_errno; |
| 850 | retry_record->more_errno = rti->more_errno; |
| 851 | Ustrncpy(retry_record->text, message, message_length); |
| 852 | retry_record->text[message_length] = 0; |
| 853 | |
| 854 | DEBUG(D_retry) |
| 855 | { |
| 856 | int letter = retry_record->more_errno & 255; |
| 857 | debug_printf("Writing retry data for %s\n", rti->key); |
| 858 | debug_printf(" first failed=%d last try=%d next try=%d expired=%d\n", |
| 859 | (int)retry_record->first_failed, (int)retry_record->last_try, |
| 860 | (int)retry_record->next_try, retry_record->expired); |
| 861 | debug_printf(" errno=%d more_errno=", retry_record->basic_errno); |
| 862 | if (letter == 'A' || letter == 'M') |
| 863 | debug_printf("%d,%c", (retry_record->more_errno >> 8) & 255, |
| 864 | letter); |
| 865 | else |
| 866 | debug_printf("%d", retry_record->more_errno); |
| 867 | debug_printf(" %s\n", retry_record->text); |
| 868 | } |
| 869 | |
| 870 | (void)dbfn_write(dbm_file, rti->key, retry_record, |
| 871 | sizeof(dbdata_retry) + message_length); |
| 872 | } /* Loop for each retry item */ |
| 873 | |
| 874 | /* If all the non-delete retry items are timed out, the address is |
| 875 | timed out, provided that we didn't skip any hosts because their retry |
| 876 | time was not reached (or because of hosts_max_try). */ |
| 877 | |
| 878 | if (update_count > 0 && update_count == timedout_count) |
| 879 | { |
| 880 | if (!testflag(endaddr, af_retry_skipped)) |
| 881 | { |
| 882 | DEBUG(D_retry) debug_printf("timed out: all retries expired\n"); |
| 883 | timed_out = TRUE; |
| 884 | } |
| 885 | else |
| 886 | { |
| 887 | DEBUG(D_retry) |
| 888 | debug_printf("timed out but some hosts were skipped\n"); |
| 889 | } |
| 890 | } |
| 891 | } /* Loop for an address and its parents */ |
| 892 | |
| 893 | /* If this is a deferred address, and retry processing was requested by |
| 894 | means of one or more retry items, and they all timed out, move the address |
| 895 | to the failed queue, and restart this loop without updating paddr. |
| 896 | |
| 897 | If there were several addresses batched in the same remote delivery, only |
| 898 | the original top one will have host retry items attached to it, but we want |
| 899 | to handle all the same. Each will have a pointer back to its "top" address, |
| 900 | and they will now precede the item with the retries because addresses are |
| 901 | inverted when added to these final queues. We have saved information about |
| 902 | them in passing (below) so they can all be cut out at once. */ |
| 903 | |
| 904 | if (i == 2) /* Handling defers */ |
| 905 | { |
| 906 | if (endaddr->retries != NULL && timed_out) |
| 907 | { |
| 908 | if (last_first == endaddr) paddr = saved_paddr; |
| 909 | addr = *paddr; |
| 910 | *paddr = endaddr->next; |
| 911 | |
| 912 | endaddr->next = *addr_failed; |
| 913 | *addr_failed = addr; |
| 914 | |
| 915 | for (;; addr = addr->next) |
| 916 | { |
| 917 | setflag(addr, af_retry_timedout); |
| 918 | addr->message = (addr->message == NULL)? US"retry timeout exceeded" : |
| 919 | string_sprintf("%s: retry timeout exceeded", addr->message); |
| 920 | addr->user_message = (addr->user_message == NULL)? |
| 921 | US"retry timeout exceeded" : |
| 922 | string_sprintf("%s: retry timeout exceeded", addr->user_message); |
| 923 | log_write(0, LOG_MAIN, "** %s%s%s%s: retry timeout exceeded", |
| 924 | addr->address, |
| 925 | (addr->parent == NULL)? US"" : US" <", |
| 926 | (addr->parent == NULL)? US"" : addr->parent->address, |
| 927 | (addr->parent == NULL)? US"" : US">"); |
| 928 | |
| 929 | if (addr == endaddr) break; |
| 930 | } |
| 931 | |
| 932 | continue; /* Restart from changed *paddr */ |
| 933 | } |
| 934 | |
| 935 | /* This address is to remain on the defer chain. If it has a "first" |
| 936 | pointer, save the pointer to it in case we want to fail the set of |
| 937 | addresses when we get to the first one. */ |
| 938 | |
| 939 | if (endaddr->first != last_first) |
| 940 | { |
| 941 | last_first = endaddr->first; |
| 942 | saved_paddr = paddr; |
| 943 | } |
| 944 | } |
| 945 | |
| 946 | /* All cases (succeed, fail, defer left on queue) */ |
| 947 | |
| 948 | paddr = &(endaddr->next); /* Advance to next address */ |
| 949 | } /* Loop for all addresses */ |
| 950 | } /* Loop for succeed, fail, defer */ |
| 951 | |
| 952 | /* Close and unlock the database */ |
| 953 | |
| 954 | if (dbm_file != NULL) dbfn_close(dbm_file); |
| 955 | |
| 956 | DEBUG(D_retry) debug_printf("end of retry processing\n"); |
| 957 | } |
| 958 | |
| 959 | /* End of retry.c */ |