Adjust the timeout after interrupted select()
[exim.git] / src / src / ip.c
... / ...
CommitLineData
1/*************************************************
2* Exim - an Internet mail transport agent *
3*************************************************/
4
5/* Copyright (c) University of Cambridge 1995 - 2015 */
6/* See the file NOTICE for conditions of use and distribution. */
7
8/* Functions for doing things with sockets. With the advent of IPv6 this has
9got messier, so that it's worth pulling out the code into separate functions
10that other parts of Exim can call, expecially as there are now several
11different places in the code where sockets are used. */
12
13
14#include "exim.h"
15
16
17/*************************************************
18* Create a socket *
19*************************************************/
20
21/* Socket creation happens in a number of places so it's packaged here for
22convenience.
23
24Arguments:
25 type SOCK_DGRAM or SOCK_STREAM
26 af AF_INET or AF_INET6
27
28Returns: socket number or -1 on failure
29*/
30
31int
32ip_socket(int type, int af)
33{
34int sock = socket(af, type, 0);
35if (sock < 0)
36 log_write(0, LOG_MAIN, "IPv%c socket creation failed: %s",
37 (af == AF_INET6)? '6':'4', strerror(errno));
38return sock;
39}
40
41
42
43
44#if HAVE_IPV6
45/*************************************************
46* Convert printing address to numeric *
47*************************************************/
48
49/* This function converts the textual form of an IP address into a numeric form
50in an appropriate structure in an IPv6 environment. The getaddrinfo() function
51can (apparently) handle more complicated addresses (e.g. those containing
52scopes) than inet_pton() in some environments. We use hints to tell it that the
53input must be a numeric address.
54
55However, apparently some operating systems (or libraries) don't support
56getaddrinfo(), so there is a build-time option to revert to inet_pton() (which
57does not support scopes).
58
59Arguments:
60 address textual form of the address
61 addr where to copy back the answer
62
63Returns: nothing - failure provokes a panic-die
64*/
65
66static void
67ip_addrinfo(const uschar *address, struct sockaddr_in6 *saddr)
68{
69#ifdef IPV6_USE_INET_PTON
70
71 if (inet_pton(AF_INET6, CCS address, &saddr->sin6_addr) != 1)
72 log_write(0, LOG_MAIN|LOG_PANIC_DIE, "unable to parse \"%s\" as an "
73 "IP address", address);
74 saddr->sin6_family = AF_INET6;
75
76#else
77
78 int rc;
79 struct addrinfo hints, *res;
80 memset(&hints, 0, sizeof(hints));
81 hints.ai_family = AF_INET6;
82 hints.ai_socktype = SOCK_STREAM;
83 hints.ai_flags = AI_NUMERICHOST;
84 if ((rc = getaddrinfo(CCS address, NULL, &hints, &res)) != 0 || res == NULL)
85 log_write(0, LOG_MAIN|LOG_PANIC_DIE, "unable to parse \"%s\" as an "
86 "IP address: %s", address,
87 (rc == 0)? "NULL result returned" : gai_strerror(rc));
88 memcpy(saddr, res->ai_addr, res->ai_addrlen);
89 freeaddrinfo(res);
90
91#endif
92}
93#endif /* HAVE_IPV6 */
94
95
96/*************************************************
97* Bind socket to interface and port *
98*************************************************/
99
100int
101ip_addr(void * sin_, int af, const uschar * address, int port)
102{
103union sockaddr_46 * sin = sin_;
104memset(sin, 0, sizeof(*sin));
105
106/* Setup code when using an IPv6 socket. The wildcard address is ":", to
107ensure an IPv6 socket is used. */
108
109#if HAVE_IPV6
110if (af == AF_INET6)
111 {
112 if (address[0] == ':' && address[1] == 0)
113 {
114 sin->v6.sin6_family = AF_INET6;
115 sin->v6.sin6_addr = in6addr_any;
116 }
117 else
118 ip_addrinfo(address, &sin->v6); /* Panic-dies on error */
119 sin->v6.sin6_port = htons(port);
120 return sizeof(sin->v6);
121 }
122else
123#else /* HAVE_IPv6 */
124af = af; /* Avoid compiler warning */
125#endif /* HAVE_IPV6 */
126
127/* Setup code when using IPv4 socket. The wildcard address is "". */
128
129 {
130 sin->v4.sin_family = AF_INET;
131 sin->v4.sin_port = htons(port);
132 sin->v4.sin_addr.s_addr = address[0] == 0
133 ? (S_ADDR_TYPE)INADDR_ANY
134 : (S_ADDR_TYPE)inet_addr(CS address);
135 return sizeof(sin->v4);
136 }
137}
138
139
140
141/* This function binds a socket to a local interface address and port. For a
142wildcard IPv6 bind, the address is ":".
143
144Arguments:
145 sock the socket
146 af AF_INET or AF_INET6 - the socket type
147 address the IP address, in text form
148 port the IP port (host order)
149
150Returns: the result of bind()
151*/
152
153int
154ip_bind(int sock, int af, uschar *address, int port)
155{
156union sockaddr_46 sin;
157int s_len = ip_addr(&sin, af, address, port);
158return bind(sock, (struct sockaddr *)&sin, s_len);
159}
160
161
162
163/*************************************************
164* Connect socket to remote host *
165*************************************************/
166
167/* This function connects a socket to a remote address and port. The socket may
168or may not have previously been bound to a local interface. The socket is not
169closed, even in cases of error. It is expected that the calling function, which
170created the socket, will be the one that closes it.
171
172Arguments:
173 sock the socket
174 af AF_INET6 or AF_INET for the socket type
175 address the remote address, in text form
176 port the remote port
177 timeout a timeout (zero for indefinite timeout)
178
179Returns: 0 on success; -1 on failure, with errno set
180*/
181
182int
183ip_connect(int sock, int af, const uschar *address, int port, int timeout)
184{
185struct sockaddr_in s_in4;
186struct sockaddr *s_ptr;
187int s_len, rc, save_errno;
188
189/* For an IPv6 address, use an IPv6 sockaddr structure. */
190
191#if HAVE_IPV6
192struct sockaddr_in6 s_in6;
193if (af == AF_INET6)
194 {
195 memset(&s_in6, 0, sizeof(s_in6));
196 ip_addrinfo(address, &s_in6); /* Panic-dies on error */
197 s_in6.sin6_port = htons(port);
198 s_ptr = (struct sockaddr *)&s_in6;
199 s_len = sizeof(s_in6);
200 }
201else
202#else /* HAVE_IPV6 */
203af = af; /* Avoid compiler warning */
204#endif /* HAVE_IPV6 */
205
206/* For an IPv4 address, use an IPv4 sockaddr structure, even on a system with
207IPv6 support. */
208
209 {
210 memset(&s_in4, 0, sizeof(s_in4));
211 s_in4.sin_family = AF_INET;
212 s_in4.sin_port = htons(port);
213 s_in4.sin_addr.s_addr = (S_ADDR_TYPE)inet_addr(CCS address);
214 s_ptr = (struct sockaddr *)&s_in4;
215 s_len = sizeof(s_in4);
216 }
217
218/* If no connection timeout is set, just call connect() without setting a
219timer, thereby allowing the inbuilt OS timeout to operate. */
220
221sigalrm_seen = FALSE;
222if (timeout > 0) alarm(timeout);
223rc = connect(sock, s_ptr, s_len);
224save_errno = errno;
225alarm(0);
226
227/* There is a testing facility for simulating a connection timeout, as I
228can't think of any other way of doing this. It converts a connection refused
229into a timeout if the timeout is set to 999999. */
230
231if (running_in_test_harness && save_errno == ECONNREFUSED && timeout == 999999)
232 {
233 rc = -1;
234 save_errno = EINTR;
235 sigalrm_seen = TRUE;
236 }
237
238/* Success */
239
240if (rc >= 0) return 0;
241
242/* A failure whose error code is "Interrupted system call" is in fact
243an externally applied timeout if the signal handler has been run. */
244
245errno = save_errno == EINTR && sigalrm_seen ? ETIMEDOUT : save_errno;
246return -1;
247}
248
249
250
251/*************************************************
252* Create connected socket to remote host *
253*************************************************/
254
255/* Create a socket and connect to host (name or number, ipv6 ok)
256 at one of port-range.
257
258Arguments:
259 type SOCK_DGRAM or SOCK_STREAM
260 af AF_INET6 or AF_INET for the socket type
261 address the remote address, in text form
262 portlo,porthi the remote port range
263 timeout a timeout
264 connhost if not NULL, host_item filled in with connection details
265 errstr pointer for allocated string on error
266
267Return:
268 socket fd, or -1 on failure (having allocated an error string)
269*/
270int
271ip_connectedsocket(int type, const uschar * hostname, int portlo, int porthi,
272 int timeout, host_item * connhost, uschar ** errstr)
273{
274int namelen, port;
275host_item shost;
276host_item *h;
277int af = 0, fd, fd4 = -1, fd6 = -1;
278
279shost.next = NULL;
280shost.address = NULL;
281shost.port = portlo;
282shost.mx = -1;
283
284namelen = Ustrlen(hostname);
285
286/* Anything enclosed in [] must be an IP address. */
287
288if (hostname[0] == '[' &&
289 hostname[namelen - 1] == ']')
290 {
291 uschar * host = string_copy(hostname);
292 host[namelen - 1] = 0;
293 host++;
294 if (string_is_ip_address(host, NULL) == 0)
295 {
296 *errstr = string_sprintf("malformed IP address \"%s\"", hostname);
297 return -1;
298 }
299 shost.name = shost.address = host;
300 }
301
302/* Otherwise check for an unadorned IP address */
303
304else if (string_is_ip_address(hostname, NULL) != 0)
305 shost.name = shost.address = string_copy(hostname);
306
307/* Otherwise lookup IP address(es) from the name */
308
309else
310 {
311 shost.name = string_copy(hostname);
312 if (host_find_byname(&shost, NULL, HOST_FIND_QUALIFY_SINGLE,
313 NULL, FALSE) != HOST_FOUND)
314 {
315 *errstr = string_sprintf("no IP address found for host %s", shost.name);
316 return -1;
317 }
318 }
319
320/* Try to connect to the server - test each IP till one works */
321
322for (h = &shost; h != NULL; h = h->next)
323 {
324 fd = (Ustrchr(h->address, ':') != 0)
325 ? (fd6 < 0) ? (fd6 = ip_socket(type, af = AF_INET6)) : fd6
326 : (fd4 < 0) ? (fd4 = ip_socket(type, af = AF_INET )) : fd4;
327
328 if (fd < 0)
329 {
330 *errstr = string_sprintf("failed to create socket: %s", strerror(errno));
331 goto bad;
332 }
333
334 for(port = portlo; port <= porthi; port++)
335 if (ip_connect(fd, af, h->address, port, timeout) == 0)
336 {
337 if (fd != fd6) close(fd6);
338 if (fd != fd4) close(fd4);
339 if (connhost)
340 {
341 h->port = port;
342 *connhost = *h;
343 connhost->next = NULL;
344 }
345 return fd;
346 }
347 }
348
349*errstr = string_sprintf("failed to connect to any address for %s: %s",
350 hostname, strerror(errno));
351
352bad:
353 close(fd4); close(fd6); return -1;
354}
355
356
357int
358ip_tcpsocket(const uschar * hostport, uschar ** errstr, int tmo)
359{
360int scan;
361uschar hostname[256];
362unsigned int portlow, porthigh;
363
364/* extract host and port part */
365scan = sscanf(CS hostport, "%255s %u-%u", hostname, &portlow, &porthigh);
366if (scan != 3)
367 {
368 if (scan != 2)
369 {
370 *errstr = string_sprintf("invalid socket '%s'", hostport);
371 return -1;
372 }
373 porthigh = portlow;
374 }
375
376return ip_connectedsocket(SOCK_STREAM, hostname, portlow, porthigh,
377 tmo, NULL, errstr);
378}
379
380int
381ip_unixsocket(const uschar * path, uschar ** errstr)
382{
383int sock;
384struct sockaddr_un server;
385
386if ((sock = socket(AF_UNIX, SOCK_STREAM, 0)) < 0)
387 {
388 *errstr = US"can't open UNIX socket.";
389 return -1;
390 }
391
392server.sun_family = AF_UNIX;
393Ustrncpy(server.sun_path, path, sizeof(server.sun_path)-1);
394server.sun_path[sizeof(server.sun_path)-1] = '\0';
395if (connect(sock, (struct sockaddr *) &server, sizeof(server)) < 0)
396 {
397 int err = errno;
398 (void)close(sock);
399 *errstr = string_sprintf("unable to connect to UNIX socket (%s): %s",
400 path, strerror(err));
401 return -1;
402 }
403return sock;
404}
405
406int
407ip_streamsocket(const uschar * spec, uschar ** errstr, int tmo)
408{
409return *spec == '/'
410 ? ip_unixsocket(spec, errstr) : ip_tcpsocket(spec, errstr, tmo);
411}
412
413/*************************************************
414* Set keepalive on a socket *
415*************************************************/
416
417/* Can be called for both incoming and outgoing sockets.
418
419Arguments:
420 sock the socket
421 address the remote host address, for failure logging
422 torf true for outgoing connection, false for incoming
423
424Returns: nothing
425*/
426
427void
428ip_keepalive(int sock, const uschar *address, BOOL torf)
429{
430int fodder = 1;
431if (setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE,
432 (uschar *)(&fodder), sizeof(fodder)) != 0)
433 log_write(0, LOG_MAIN, "setsockopt(SO_KEEPALIVE) on connection %s %s "
434 "failed: %s", torf? "to":"from", address, strerror(errno));
435}
436
437
438
439/*************************************************
440* Receive from a socket with timeout *
441*************************************************/
442
443/*
444Arguments:
445 fd the file descriptor
446 timeout the timeout, seconds
447Returns: TRUE => ready for i/o
448 FALSE => timed out, or other error
449*/
450BOOL
451fd_ready(int fd, int timeout)
452{
453fd_set select_inset;
454time_t start_recv = time(NULL);
455int time_left = timeout;
456int rc;
457
458if (timeout <= 0)
459 {
460 errno = ETIMEDOUT;
461 return FALSE;
462 }
463/* Wait until the socket is ready */
464
465do
466 {
467 struct timeval tv = { time_left, 0 };
468 FD_ZERO (&select_inset);
469 FD_SET (fd, &select_inset);
470
471 /*DEBUG(D_transport) debug_printf("waiting for data on fd\n");*/
472 rc = select(fd + 1, (SELECT_ARG2_TYPE *)&select_inset, NULL, NULL, &tv);
473
474 /* If some interrupt arrived, just retry. We presume this to be rare,
475 but it can happen (e.g. the SIGUSR1 signal sent by exiwhat causes
476 select() to exit).
477
478 Aug 2004: Somebody set up a cron job that ran exiwhat every 2 minutes, making
479 the interrupt not at all rare. Since the timeout is typically more than 2
480 minutes, the effect was to block the timeout completely. To prevent this
481 happening again, we do an explicit time test and adjust the timeout
482 accordingly */
483
484 if (rc < 0 && errno == EINTR)
485 {
486 DEBUG(D_transport) debug_printf("EINTR while waiting for socket data\n");
487 /* Watch out, 'continue' jumps to the condition, not to the loops top */
488 if (time_left = timeout - (time(NULL) - start_recv)) continue;
489 }
490
491 if (rc <= 0)
492 {
493 errno = ETIMEDOUT;
494 return FALSE;
495 }
496
497 /* Checking the FD_ISSET is not enough, if we're interrupted, the
498 select_inset may still contain the 'input'. */
499 }
500while (rc < 0 || !FD_ISSET(fd, &select_inset));
501return TRUE;
502}
503
504/* The timeout is implemented using select(), and we loop to cover select()
505getting interrupted, and the possibility of select() returning with a positive
506result but no ready descriptor. Is this in fact possible?
507
508Arguments:
509 sock the socket
510 buffer to read into
511 bufsize the buffer size
512 timeout the timeout
513
514Returns: > 0 => that much data read
515 <= 0 on error or EOF; errno set - zero for EOF
516*/
517
518int
519ip_recv(int sock, uschar *buffer, int buffsize, int timeout)
520{
521int rc;
522
523if (!fd_ready(sock, timeout))
524 return -1;
525
526/* The socket is ready, read from it (via TLS if it's active). On EOF (i.e.
527close down of the connection), set errno to zero; otherwise leave it alone. */
528
529#ifdef SUPPORT_TLS
530if (tls_out.active == sock)
531 rc = tls_read(FALSE, buffer, buffsize);
532else if (tls_in.active == sock)
533 rc = tls_read(TRUE, buffer, buffsize);
534else
535#endif
536 rc = recv(sock, buffer, buffsize, 0);
537
538if (rc > 0) return rc;
539if (rc == 0) errno = 0;
540return -1;
541}
542
543
544
545
546/*************************************************
547* Lookup address family of potential socket *
548*************************************************/
549
550/* Given a file-descriptor, check to see if it's a socket and, if so,
551return the address family; detects IPv4 vs IPv6. If not a socket then
552return -1.
553
554The value 0 is typically AF_UNSPEC, which should not be seen on a connected
555fd. If the return is -1, the errno will be from getsockname(); probably
556ENOTSOCK or ECONNRESET.
557
558Arguments: socket-or-not fd
559Returns: address family or -1
560*/
561
562int
563ip_get_address_family(int fd)
564{
565struct sockaddr_storage ss;
566socklen_t sslen = sizeof(ss);
567
568if (getsockname(fd, (struct sockaddr *) &ss, &sslen) < 0)
569 return -1;
570
571return (int) ss.ss_family;
572}
573
574
575
576
577/*************************************************
578* Lookup DSCP settings for a socket *
579*************************************************/
580
581struct dscp_name_tableentry {
582 const uschar *name;
583 int value;
584};
585/* Keep both of these tables sorted! */
586static struct dscp_name_tableentry dscp_table[] = {
587#ifdef IPTOS_DSCP_AF11
588 { CUS"af11", IPTOS_DSCP_AF11 },
589 { CUS"af12", IPTOS_DSCP_AF12 },
590 { CUS"af13", IPTOS_DSCP_AF13 },
591 { CUS"af21", IPTOS_DSCP_AF21 },
592 { CUS"af22", IPTOS_DSCP_AF22 },
593 { CUS"af23", IPTOS_DSCP_AF23 },
594 { CUS"af31", IPTOS_DSCP_AF31 },
595 { CUS"af32", IPTOS_DSCP_AF32 },
596 { CUS"af33", IPTOS_DSCP_AF33 },
597 { CUS"af41", IPTOS_DSCP_AF41 },
598 { CUS"af42", IPTOS_DSCP_AF42 },
599 { CUS"af43", IPTOS_DSCP_AF43 },
600 { CUS"ef", IPTOS_DSCP_EF },
601#endif
602#ifdef IPTOS_LOWCOST
603 { CUS"lowcost", IPTOS_LOWCOST },
604#endif
605 { CUS"lowdelay", IPTOS_LOWDELAY },
606#ifdef IPTOS_MINCOST
607 { CUS"mincost", IPTOS_MINCOST },
608#endif
609 { CUS"reliability", IPTOS_RELIABILITY },
610 { CUS"throughput", IPTOS_THROUGHPUT }
611};
612static int dscp_table_size =
613 sizeof(dscp_table) / sizeof(struct dscp_name_tableentry);
614
615/* DSCP values change by protocol family, and so do the options used for
616setsockopt(); this utility does all the lookups. It takes an unexpanded
617option string, expands it, strips off affix whitespace, then checks if it's
618a number. If all of what's left is a number, then that's how the option will
619be parsed and success/failure is a range check. If it's not all a number,
620then it must be a supported keyword.
621
622Arguments:
623 dscp_name a string, so far unvalidated
624 af address_family in use
625 level setsockopt level to use
626 optname setsockopt name to use
627 dscp_value value for dscp_name
628
629Returns: TRUE if okay to setsockopt(), else FALSE
630
631*level and *optname may be set even if FALSE is returned
632*/
633
634BOOL
635dscp_lookup(const uschar *dscp_name, int af,
636 int *level, int *optname, int *dscp_value)
637{
638uschar *dscp_lookup, *p;
639int first, last;
640long rawlong;
641
642if (af == AF_INET)
643 {
644 *level = IPPROTO_IP;
645 *optname = IP_TOS;
646 }
647#if HAVE_IPV6 && defined(IPV6_TCLASS)
648else if (af == AF_INET6)
649 {
650 *level = IPPROTO_IPV6;
651 *optname = IPV6_TCLASS;
652 }
653#endif
654else
655 {
656 DEBUG(D_transport)
657 debug_printf("Unhandled address family %d in dscp_lookup()\n", af);
658 return FALSE;
659 }
660if (!dscp_name)
661 {
662 DEBUG(D_transport)
663 debug_printf("[empty DSCP]\n");
664 return FALSE;
665 }
666dscp_lookup = expand_string(US dscp_name);
667if (dscp_lookup == NULL || *dscp_lookup == '\0')
668 return FALSE;
669
670p = dscp_lookup + Ustrlen(dscp_lookup) - 1;
671while (isspace(*p)) *p-- = '\0';
672while (isspace(*dscp_lookup) && dscp_lookup < p) dscp_lookup++;
673if (*dscp_lookup == '\0')
674 return FALSE;
675
676rawlong = Ustrtol(dscp_lookup, &p, 0);
677if (p != dscp_lookup && *p == '\0')
678 {
679 /* We have six bits available, which will end up shifted to fit in 0xFC mask.
680 RFC 2597 defines the values unshifted. */
681 if (rawlong < 0 || rawlong > 0x3F)
682 {
683 DEBUG(D_transport)
684 debug_printf("DSCP value %ld out of range, ignored.\n", rawlong);
685 return FALSE;
686 }
687 *dscp_value = rawlong << 2;
688 return TRUE;
689 }
690
691first = 0;
692last = dscp_table_size;
693while (last > first)
694 {
695 int middle = (first + last)/2;
696 int c = Ustrcmp(dscp_lookup, dscp_table[middle].name);
697 if (c == 0)
698 {
699 *dscp_value = dscp_table[middle].value;
700 return TRUE;
701 }
702 else if (c > 0)
703 first = middle + 1;
704 else
705 last = middle;
706 }
707return FALSE;
708}
709
710void
711dscp_list_to_stream(FILE *stream)
712{
713int i;
714for (i=0; i < dscp_table_size; ++i)
715 fprintf(stream, "%s\n", dscp_table[i].name);
716}
717
718
719/* End of ip.c */
720/* vi: aw ai sw=2
721*/