| 1 | /************************************************* |
| 2 | * Exim - an Internet mail transport agent * |
| 3 | *************************************************/ |
| 4 | |
| 5 | /* Copyright (c) University of Cambridge 1995 - 2009 */ |
| 6 | /* See the file NOTICE for conditions of use and distribution. */ |
| 7 | |
| 8 | /* Functions for doing things with sockets. With the advent of IPv6 this has |
| 9 | got messier, so that it's worth pulling out the code into separate functions |
| 10 | that other parts of Exim can call, expecially as there are now several |
| 11 | different places in the code where sockets are used. */ |
| 12 | |
| 13 | |
| 14 | #include "exim.h" |
| 15 | |
| 16 | |
| 17 | /************************************************* |
| 18 | * Create a socket * |
| 19 | *************************************************/ |
| 20 | |
| 21 | /* Socket creation happens in a number of places so it's packaged here for |
| 22 | convenience. |
| 23 | |
| 24 | Arguments: |
| 25 | type SOCK_DGRAM or SOCK_STREAM |
| 26 | af AF_INET or AF_INET6 |
| 27 | |
| 28 | Returns: socket number or -1 on failure |
| 29 | */ |
| 30 | |
| 31 | int |
| 32 | ip_socket(int type, int af) |
| 33 | { |
| 34 | int sock = socket(af, type, 0); |
| 35 | if (sock < 0) |
| 36 | log_write(0, LOG_MAIN, "IPv%c socket creation failed: %s", |
| 37 | (af == AF_INET6)? '6':'4', strerror(errno)); |
| 38 | return sock; |
| 39 | } |
| 40 | |
| 41 | |
| 42 | |
| 43 | |
| 44 | #if HAVE_IPV6 |
| 45 | /************************************************* |
| 46 | * Convert printing address to numeric * |
| 47 | *************************************************/ |
| 48 | |
| 49 | /* This function converts the textual form of an IP address into a numeric form |
| 50 | in an appropriate structure in an IPv6 environment. The getaddrinfo() function |
| 51 | can (apparently) handle more complicated addresses (e.g. those containing |
| 52 | scopes) than inet_pton() in some environments. We use hints to tell it that the |
| 53 | input must be a numeric address. |
| 54 | |
| 55 | However, apparently some operating systems (or libraries) don't support |
| 56 | getaddrinfo(), so there is a build-time option to revert to inet_pton() (which |
| 57 | does not support scopes). |
| 58 | |
| 59 | Arguments: |
| 60 | address textual form of the address |
| 61 | addr where to copy back the answer |
| 62 | |
| 63 | Returns: nothing - failure provokes a panic-die |
| 64 | */ |
| 65 | |
| 66 | static void |
| 67 | ip_addrinfo(uschar *address, struct sockaddr_in6 *saddr) |
| 68 | { |
| 69 | #ifdef IPV6_USE_INET_PTON |
| 70 | |
| 71 | if (inet_pton(AF_INET6, CS address, &saddr->sin6_addr) != 1) |
| 72 | log_write(0, LOG_MAIN|LOG_PANIC_DIE, "unable to parse \"%s\" as an " |
| 73 | "IP address", address); |
| 74 | saddr->sin6_family = AF_INET6; |
| 75 | |
| 76 | #else |
| 77 | |
| 78 | int rc; |
| 79 | struct addrinfo hints, *res; |
| 80 | memset(&hints, 0, sizeof(hints)); |
| 81 | hints.ai_family = AF_INET6; |
| 82 | hints.ai_socktype = SOCK_STREAM; |
| 83 | hints.ai_flags = AI_NUMERICHOST; |
| 84 | if ((rc = getaddrinfo(CS address, NULL, &hints, &res)) != 0 || res == NULL) |
| 85 | log_write(0, LOG_MAIN|LOG_PANIC_DIE, "unable to parse \"%s\" as an " |
| 86 | "IP address: %s", address, |
| 87 | (rc == 0)? "NULL result returned" : gai_strerror(rc)); |
| 88 | memcpy(saddr, res->ai_addr, res->ai_addrlen); |
| 89 | freeaddrinfo(res); |
| 90 | |
| 91 | #endif |
| 92 | } |
| 93 | #endif /* HAVE_IPV6 */ |
| 94 | |
| 95 | |
| 96 | /************************************************* |
| 97 | * Bind socket to interface and port * |
| 98 | *************************************************/ |
| 99 | |
| 100 | /* This function binds a socket to a local interface address and port. For a |
| 101 | wildcard IPv6 bind, the address is ":". |
| 102 | |
| 103 | Arguments: |
| 104 | sock the socket |
| 105 | af AF_INET or AF_INET6 - the socket type |
| 106 | address the IP address, in text form |
| 107 | port the IP port (host order) |
| 108 | |
| 109 | Returns: the result of bind() |
| 110 | */ |
| 111 | |
| 112 | int |
| 113 | ip_bind(int sock, int af, uschar *address, int port) |
| 114 | { |
| 115 | int s_len; |
| 116 | union sockaddr_46 sin; |
| 117 | memset(&sin, 0, sizeof(sin)); |
| 118 | |
| 119 | /* Setup code when using an IPv6 socket. The wildcard address is ":", to |
| 120 | ensure an IPv6 socket is used. */ |
| 121 | |
| 122 | #if HAVE_IPV6 |
| 123 | if (af == AF_INET6) |
| 124 | { |
| 125 | if (address[0] == ':' && address[1] == 0) |
| 126 | { |
| 127 | sin.v6.sin6_family = AF_INET6; |
| 128 | sin.v6.sin6_addr = in6addr_any; |
| 129 | } |
| 130 | else |
| 131 | { |
| 132 | ip_addrinfo(address, &sin.v6); /* Panic-dies on error */ |
| 133 | } |
| 134 | sin.v6.sin6_port = htons(port); |
| 135 | s_len = sizeof(sin.v6); |
| 136 | } |
| 137 | else |
| 138 | #else /* HAVE_IPv6 */ |
| 139 | af = af; /* Avoid compiler warning */ |
| 140 | #endif /* HAVE_IPV6 */ |
| 141 | |
| 142 | /* Setup code when using IPv4 socket. The wildcard address is "". */ |
| 143 | |
| 144 | { |
| 145 | sin.v4.sin_family = AF_INET; |
| 146 | sin.v4.sin_port = htons(port); |
| 147 | s_len = sizeof(sin.v4); |
| 148 | if (address[0] == 0) |
| 149 | sin.v4.sin_addr.s_addr = (S_ADDR_TYPE)INADDR_ANY; |
| 150 | else |
| 151 | sin.v4.sin_addr.s_addr = (S_ADDR_TYPE)inet_addr(CS address); |
| 152 | } |
| 153 | |
| 154 | /* Now we can call the bind() function */ |
| 155 | |
| 156 | return bind(sock, (struct sockaddr *)&sin, s_len); |
| 157 | } |
| 158 | |
| 159 | |
| 160 | |
| 161 | /************************************************* |
| 162 | * Connect socket to remote host * |
| 163 | *************************************************/ |
| 164 | |
| 165 | /* This function connects a socket to a remote address and port. The socket may |
| 166 | or may not have previously been bound to a local interface. The socket is not |
| 167 | closed, even in cases of error. It is expected that the calling function, which |
| 168 | created the socket, will be the one that closes it. |
| 169 | |
| 170 | Arguments: |
| 171 | sock the socket |
| 172 | af AF_INET6 or AF_INET for the socket type |
| 173 | address the remote address, in text form |
| 174 | port the remote port |
| 175 | timeout a timeout |
| 176 | |
| 177 | Returns: 0 on success; -1 on failure, with errno set |
| 178 | */ |
| 179 | |
| 180 | int |
| 181 | ip_connect(int sock, int af, uschar *address, int port, int timeout) |
| 182 | { |
| 183 | struct sockaddr_in s_in4; |
| 184 | struct sockaddr *s_ptr; |
| 185 | int s_len, rc, save_errno; |
| 186 | |
| 187 | /* For an IPv6 address, use an IPv6 sockaddr structure. */ |
| 188 | |
| 189 | #if HAVE_IPV6 |
| 190 | struct sockaddr_in6 s_in6; |
| 191 | if (af == AF_INET6) |
| 192 | { |
| 193 | memset(&s_in6, 0, sizeof(s_in6)); |
| 194 | ip_addrinfo(address, &s_in6); /* Panic-dies on error */ |
| 195 | s_in6.sin6_port = htons(port); |
| 196 | s_ptr = (struct sockaddr *)&s_in6; |
| 197 | s_len = sizeof(s_in6); |
| 198 | } |
| 199 | else |
| 200 | #else /* HAVE_IPV6 */ |
| 201 | af = af; /* Avoid compiler warning */ |
| 202 | #endif /* HAVE_IPV6 */ |
| 203 | |
| 204 | /* For an IPv4 address, use an IPv4 sockaddr structure, even on a system with |
| 205 | IPv6 support. */ |
| 206 | |
| 207 | { |
| 208 | memset(&s_in4, 0, sizeof(s_in4)); |
| 209 | s_in4.sin_family = AF_INET; |
| 210 | s_in4.sin_port = htons(port); |
| 211 | s_in4.sin_addr.s_addr = (S_ADDR_TYPE)inet_addr(CS address); |
| 212 | s_ptr = (struct sockaddr *)&s_in4; |
| 213 | s_len = sizeof(s_in4); |
| 214 | } |
| 215 | |
| 216 | /* If no connection timeout is set, just call connect() without setting a |
| 217 | timer, thereby allowing the inbuilt OS timeout to operate. */ |
| 218 | |
| 219 | sigalrm_seen = FALSE; |
| 220 | if (timeout > 0) alarm(timeout); |
| 221 | rc = connect(sock, s_ptr, s_len); |
| 222 | save_errno = errno; |
| 223 | alarm(0); |
| 224 | |
| 225 | /* There is a testing facility for simulating a connection timeout, as I |
| 226 | can't think of any other way of doing this. It converts a connection refused |
| 227 | into a timeout if the timeout is set to 999999. */ |
| 228 | |
| 229 | if (running_in_test_harness) |
| 230 | { |
| 231 | if (save_errno == ECONNREFUSED && timeout == 999999) |
| 232 | { |
| 233 | rc = -1; |
| 234 | save_errno = EINTR; |
| 235 | sigalrm_seen = TRUE; |
| 236 | } |
| 237 | } |
| 238 | |
| 239 | /* Success */ |
| 240 | |
| 241 | if (rc >= 0) return 0; |
| 242 | |
| 243 | /* A failure whose error code is "Interrupted system call" is in fact |
| 244 | an externally applied timeout if the signal handler has been run. */ |
| 245 | |
| 246 | errno = (save_errno == EINTR && sigalrm_seen)? ETIMEDOUT : save_errno; |
| 247 | return -1; |
| 248 | } |
| 249 | |
| 250 | |
| 251 | |
| 252 | /************************************************* |
| 253 | * Set keepalive on a socket * |
| 254 | *************************************************/ |
| 255 | |
| 256 | /* Can be called for both incoming and outgoing sockets. |
| 257 | |
| 258 | Arguments: |
| 259 | sock the socket |
| 260 | address the remote host address, for failure logging |
| 261 | torf true for outgoing connection, false for incoming |
| 262 | |
| 263 | Returns: nothing |
| 264 | */ |
| 265 | |
| 266 | void |
| 267 | ip_keepalive(int sock, uschar *address, BOOL torf) |
| 268 | { |
| 269 | int fodder = 1; |
| 270 | if (setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE, |
| 271 | (uschar *)(&fodder), sizeof(fodder)) != 0) |
| 272 | log_write(0, LOG_MAIN, "setsockopt(SO_KEEPALIVE) on connection %s %s " |
| 273 | "failed: %s", torf? "to":"from", address, strerror(errno)); |
| 274 | } |
| 275 | |
| 276 | |
| 277 | |
| 278 | /************************************************* |
| 279 | * Receive from a socket with timeout * |
| 280 | *************************************************/ |
| 281 | |
| 282 | /* The timeout is implemented using select(), and we loop to cover select() |
| 283 | getting interrupted, and the possibility of select() returning with a positive |
| 284 | result but no ready descriptor. Is this in fact possible? |
| 285 | |
| 286 | Arguments: |
| 287 | sock the socket |
| 288 | buffer to read into |
| 289 | bufsize the buffer size |
| 290 | timeout the timeout |
| 291 | |
| 292 | Returns: > 0 => that much data read |
| 293 | <= 0 on error or EOF; errno set - zero for EOF |
| 294 | */ |
| 295 | |
| 296 | int |
| 297 | ip_recv(int sock, uschar *buffer, int buffsize, int timeout) |
| 298 | { |
| 299 | fd_set select_inset; |
| 300 | struct timeval tv; |
| 301 | int start_recv = time(NULL); |
| 302 | int rc; |
| 303 | |
| 304 | /* Wait until the socket is ready */ |
| 305 | |
| 306 | for (;;) |
| 307 | { |
| 308 | FD_ZERO (&select_inset); |
| 309 | FD_SET (sock, &select_inset); |
| 310 | tv.tv_sec = timeout; |
| 311 | tv.tv_usec = 0; |
| 312 | |
| 313 | DEBUG(D_transport) debug_printf("waiting for data on socket\n"); |
| 314 | rc = select(sock + 1, (SELECT_ARG2_TYPE *)&select_inset, NULL, NULL, &tv); |
| 315 | |
| 316 | /* If some interrupt arrived, just retry. We presume this to be rare, |
| 317 | but it can happen (e.g. the SIGUSR1 signal sent by exiwhat causes |
| 318 | select() to exit). |
| 319 | |
| 320 | Aug 2004: Somebody set up a cron job that ran exiwhat every 2 minutes, making |
| 321 | the interrupt not at all rare. Since the timeout is typically more than 2 |
| 322 | minutes, the effect was to block the timeout completely. To prevent this |
| 323 | happening again, we do an explicit time test. */ |
| 324 | |
| 325 | if (rc < 0 && errno == EINTR) |
| 326 | { |
| 327 | DEBUG(D_transport) debug_printf("EINTR while waiting for socket data\n"); |
| 328 | if (time(NULL) - start_recv < timeout) continue; |
| 329 | DEBUG(D_transport) debug_printf("total wait time exceeds timeout\n"); |
| 330 | } |
| 331 | |
| 332 | /* Handle a timeout, and treat any other select error as a timeout, including |
| 333 | an EINTR when we have been in this loop for longer than timeout. */ |
| 334 | |
| 335 | if (rc <= 0) |
| 336 | { |
| 337 | errno = ETIMEDOUT; |
| 338 | return -1; |
| 339 | } |
| 340 | |
| 341 | /* If the socket is ready, break out of the loop. */ |
| 342 | |
| 343 | if (FD_ISSET(sock, &select_inset)) break; |
| 344 | } |
| 345 | |
| 346 | /* The socket is ready, read from it (via TLS if it's active). On EOF (i.e. |
| 347 | close down of the connection), set errno to zero; otherwise leave it alone. */ |
| 348 | |
| 349 | #ifdef SUPPORT_TLS |
| 350 | if (tls_active == sock) |
| 351 | rc = tls_read(buffer, buffsize); |
| 352 | else |
| 353 | #endif |
| 354 | rc = recv(sock, buffer, buffsize, 0); |
| 355 | |
| 356 | if (rc > 0) return rc; |
| 357 | if (rc == 0) errno = 0; |
| 358 | return -1; |
| 359 | } |
| 360 | |
| 361 | |
| 362 | |
| 363 | |
| 364 | /************************************************* |
| 365 | * Lookup DSCP settings for a socket * |
| 366 | *************************************************/ |
| 367 | |
| 368 | struct dscp_name_tableentry { |
| 369 | const uschar *name; |
| 370 | int value; |
| 371 | }; |
| 372 | /* Keep both of these tables sorted! */ |
| 373 | static struct dscp_name_tableentry dscp_table[] = { |
| 374 | #ifdef IPTOS_DSCP_AF11 |
| 375 | { "af11", IPTOS_DSCP_AF11 }, |
| 376 | { "af12", IPTOS_DSCP_AF12 }, |
| 377 | { "af13", IPTOS_DSCP_AF13 }, |
| 378 | { "af21", IPTOS_DSCP_AF21 }, |
| 379 | { "af22", IPTOS_DSCP_AF22 }, |
| 380 | { "af23", IPTOS_DSCP_AF23 }, |
| 381 | { "af31", IPTOS_DSCP_AF31 }, |
| 382 | { "af32", IPTOS_DSCP_AF32 }, |
| 383 | { "af33", IPTOS_DSCP_AF33 }, |
| 384 | { "af41", IPTOS_DSCP_AF41 }, |
| 385 | { "af42", IPTOS_DSCP_AF42 }, |
| 386 | { "af43", IPTOS_DSCP_AF43 }, |
| 387 | { "ef", IPTOS_DSCP_EF }, |
| 388 | #endif |
| 389 | #ifdef IPTOS_LOWCOST |
| 390 | { "lowcost", IPTOS_LOWCOST }, |
| 391 | #endif |
| 392 | { "lowdelay", IPTOS_LOWDELAY }, |
| 393 | #ifdef IPTOS_MINCOST |
| 394 | { "mincost", IPTOS_MINCOST }, |
| 395 | #endif |
| 396 | { "reliability", IPTOS_RELIABILITY }, |
| 397 | { "throughput", IPTOS_THROUGHPUT } |
| 398 | }; |
| 399 | static int dscp_table_size = |
| 400 | sizeof(dscp_table) / sizeof(struct dscp_name_tableentry); |
| 401 | |
| 402 | /* DSCP values change by protocol family, and so do the options used for |
| 403 | setsockopt(); this utility does all the lookups. |
| 404 | |
| 405 | Arguments: |
| 406 | dscp_name a string, so far unvalidated |
| 407 | af address_family in use |
| 408 | level setsockopt level to use |
| 409 | optname setsockopt name to use |
| 410 | dscp_value value for dscp_name |
| 411 | |
| 412 | Returns: TRUE if okay to setsockopt(), else FALSE |
| 413 | */ |
| 414 | |
| 415 | BOOL |
| 416 | dscp_lookup(const uschar *dscp_name, int af, |
| 417 | int *level, int *optname, int *dscp_value) |
| 418 | { |
| 419 | uschar *dscp_lookup; |
| 420 | int first, last; |
| 421 | |
| 422 | if (af == AF_INET) |
| 423 | { |
| 424 | *level = IPPROTO_IP; |
| 425 | *optname = IP_TOS; |
| 426 | } |
| 427 | else if (af == AF_INET6) |
| 428 | { |
| 429 | *level = IPPROTO_IPV6; |
| 430 | *optname = IPV6_TCLASS; |
| 431 | } |
| 432 | else |
| 433 | { |
| 434 | DEBUG(D_transport) |
| 435 | debug_printf("Unhandled address family %d in dscp_lookup()\n", af); |
| 436 | return FALSE; |
| 437 | } |
| 438 | if (!dscp_name) |
| 439 | { |
| 440 | DEBUG(D_transport) |
| 441 | debug_printf("[empty DSCP]\n"); |
| 442 | return FALSE; |
| 443 | } |
| 444 | dscp_lookup = expand_string(US dscp_name); |
| 445 | if (dscp_lookup == NULL || *dscp_lookup == '\0') |
| 446 | return FALSE; |
| 447 | |
| 448 | first = 0; |
| 449 | last = dscp_table_size; |
| 450 | while (last > first) |
| 451 | { |
| 452 | int middle = (first + last)/2; |
| 453 | int c = Ustrcmp(dscp_lookup, dscp_table[middle].name); |
| 454 | if (c == 0) |
| 455 | { |
| 456 | *dscp_value = dscp_table[middle].value; |
| 457 | return TRUE; |
| 458 | } |
| 459 | else if (c > 0) |
| 460 | { |
| 461 | first = middle + 1; |
| 462 | } |
| 463 | else |
| 464 | { |
| 465 | last = middle; |
| 466 | } |
| 467 | } |
| 468 | return FALSE; |
| 469 | } |
| 470 | |
| 471 | |
| 472 | /* End of ip.c */ |