TFO: even in binary built for modern Linux, handle error returned by old Linux kernel...
[exim.git] / src / src / ip.c
CommitLineData
059ec3d9
PH
1/*************************************************
2* Exim - an Internet mail transport agent *
3*************************************************/
4
f9ba5e22 5/* Copyright (c) University of Cambridge 1995 - 2018 */
059ec3d9
PH
6/* See the file NOTICE for conditions of use and distribution. */
7
8/* Functions for doing things with sockets. With the advent of IPv6 this has
9got messier, so that it's worth pulling out the code into separate functions
4c04137d 10that other parts of Exim can call, especially as there are now several
059ec3d9
PH
11different places in the code where sockets are used. */
12
13
14#include "exim.h"
15
16
73a10da9
JH
17#if defined(TCP_FASTOPEN)
18# if defined(MSG_FASTOPEN) || defined(EXIM_TFO_CONNECTX) || defined(EXIM_TFO_FREEBSD)
19# define EXIM_SUPPORT_TFO
20# endif
21#endif
22
059ec3d9
PH
23/*************************************************
24* Create a socket *
25*************************************************/
26
27/* Socket creation happens in a number of places so it's packaged here for
28convenience.
29
30Arguments:
31 type SOCK_DGRAM or SOCK_STREAM
32 af AF_INET or AF_INET6
33
34Returns: socket number or -1 on failure
35*/
36
37int
38ip_socket(int type, int af)
39{
40int sock = socket(af, type, 0);
41if (sock < 0)
42 log_write(0, LOG_MAIN, "IPv%c socket creation failed: %s",
43 (af == AF_INET6)? '6':'4', strerror(errno));
44return sock;
45}
46
47
48
49
50#if HAVE_IPV6
51/*************************************************
52* Convert printing address to numeric *
53*************************************************/
54
55/* This function converts the textual form of an IP address into a numeric form
56in an appropriate structure in an IPv6 environment. The getaddrinfo() function
57can (apparently) handle more complicated addresses (e.g. those containing
58scopes) than inet_pton() in some environments. We use hints to tell it that the
59input must be a numeric address.
60
61However, apparently some operating systems (or libraries) don't support
62getaddrinfo(), so there is a build-time option to revert to inet_pton() (which
63does not support scopes).
64
65Arguments:
66 address textual form of the address
67 addr where to copy back the answer
68
69Returns: nothing - failure provokes a panic-die
70*/
71
72static void
a56cc2b8 73ip_addrinfo(const uschar *address, struct sockaddr_in6 *saddr)
059ec3d9
PH
74{
75#ifdef IPV6_USE_INET_PTON
76
a56cc2b8 77 if (inet_pton(AF_INET6, CCS address, &saddr->sin6_addr) != 1)
059ec3d9
PH
78 log_write(0, LOG_MAIN|LOG_PANIC_DIE, "unable to parse \"%s\" as an "
79 "IP address", address);
80 saddr->sin6_family = AF_INET6;
81
82#else
83
84 int rc;
85 struct addrinfo hints, *res;
86 memset(&hints, 0, sizeof(hints));
87 hints.ai_family = AF_INET6;
88 hints.ai_socktype = SOCK_STREAM;
89 hints.ai_flags = AI_NUMERICHOST;
a56cc2b8 90 if ((rc = getaddrinfo(CCS address, NULL, &hints, &res)) != 0 || res == NULL)
059ec3d9
PH
91 log_write(0, LOG_MAIN|LOG_PANIC_DIE, "unable to parse \"%s\" as an "
92 "IP address: %s", address,
93 (rc == 0)? "NULL result returned" : gai_strerror(rc));
94 memcpy(saddr, res->ai_addr, res->ai_addrlen);
95 freeaddrinfo(res);
96
97#endif
98}
99#endif /* HAVE_IPV6 */
100
101
102/*************************************************
103* Bind socket to interface and port *
104*************************************************/
105
059ec3d9 106int
7eb6c37c 107ip_addr(void * sin_, int af, const uschar * address, int port)
059ec3d9 108{
7eb6c37c 109union sockaddr_46 * sin = sin_;
69cbeaec 110memset(sin, 0, sizeof(*sin));
059ec3d9
PH
111
112/* Setup code when using an IPv6 socket. The wildcard address is ":", to
113ensure an IPv6 socket is used. */
114
115#if HAVE_IPV6
116if (af == AF_INET6)
117 {
118 if (address[0] == ':' && address[1] == 0)
119 {
7eb6c37c
JH
120 sin->v6.sin6_family = AF_INET6;
121 sin->v6.sin6_addr = in6addr_any;
059ec3d9
PH
122 }
123 else
7eb6c37c
JH
124 ip_addrinfo(address, &sin->v6); /* Panic-dies on error */
125 sin->v6.sin6_port = htons(port);
126 return sizeof(sin->v6);
059ec3d9
PH
127 }
128else
129#else /* HAVE_IPv6 */
130af = af; /* Avoid compiler warning */
131#endif /* HAVE_IPV6 */
132
133/* Setup code when using IPv4 socket. The wildcard address is "". */
134
135 {
7eb6c37c
JH
136 sin->v4.sin_family = AF_INET;
137 sin->v4.sin_port = htons(port);
138 sin->v4.sin_addr.s_addr = address[0] == 0
139 ? (S_ADDR_TYPE)INADDR_ANY
140 : (S_ADDR_TYPE)inet_addr(CS address);
141 return sizeof(sin->v4);
059ec3d9 142 }
7eb6c37c 143}
059ec3d9 144
059ec3d9 145
7eb6c37c
JH
146
147/* This function binds a socket to a local interface address and port. For a
148wildcard IPv6 bind, the address is ":".
149
150Arguments:
151 sock the socket
152 af AF_INET or AF_INET6 - the socket type
153 address the IP address, in text form
154 port the IP port (host order)
155
156Returns: the result of bind()
157*/
158
159int
160ip_bind(int sock, int af, uschar *address, int port)
161{
162union sockaddr_46 sin;
163int s_len = ip_addr(&sin, af, address, port);
059ec3d9
PH
164return bind(sock, (struct sockaddr *)&sin, s_len);
165}
166
167
168
169/*************************************************
170* Connect socket to remote host *
171*************************************************/
172
173/* This function connects a socket to a remote address and port. The socket may
d515a917
PH
174or may not have previously been bound to a local interface. The socket is not
175closed, even in cases of error. It is expected that the calling function, which
176created the socket, will be the one that closes it.
059ec3d9
PH
177
178Arguments:
179 sock the socket
180 af AF_INET6 or AF_INET for the socket type
181 address the remote address, in text form
182 port the remote port
b1f8e4f8 183 timeout a timeout (zero for indefinite timeout)
10ac8d7f 184 fastopen_blob non-null iff TCP_FASTOPEN can be used; may indicate early-data to
b536a578 185 be sent in SYN segment. Any such data must be idempotent.
059ec3d9
PH
186
187Returns: 0 on success; -1 on failure, with errno set
188*/
189
190int
fb05276a 191ip_connect(int sock, int af, const uschar *address, int port, int timeout,
10ac8d7f 192 const blob * fastopen_blob)
059ec3d9
PH
193{
194struct sockaddr_in s_in4;
195struct sockaddr *s_ptr;
196int s_len, rc, save_errno;
197
198/* For an IPv6 address, use an IPv6 sockaddr structure. */
199
200#if HAVE_IPV6
201struct sockaddr_in6 s_in6;
202if (af == AF_INET6)
203 {
204 memset(&s_in6, 0, sizeof(s_in6));
205 ip_addrinfo(address, &s_in6); /* Panic-dies on error */
206 s_in6.sin6_port = htons(port);
207 s_ptr = (struct sockaddr *)&s_in6;
208 s_len = sizeof(s_in6);
209 }
210else
211#else /* HAVE_IPV6 */
212af = af; /* Avoid compiler warning */
213#endif /* HAVE_IPV6 */
214
215/* For an IPv4 address, use an IPv4 sockaddr structure, even on a system with
216IPv6 support. */
217
218 {
219 memset(&s_in4, 0, sizeof(s_in4));
220 s_in4.sin_family = AF_INET;
221 s_in4.sin_port = htons(port);
a56cc2b8 222 s_in4.sin_addr.s_addr = (S_ADDR_TYPE)inet_addr(CCS address);
059ec3d9
PH
223 s_ptr = (struct sockaddr *)&s_in4;
224 s_len = sizeof(s_in4);
225 }
226
227/* If no connection timeout is set, just call connect() without setting a
228timer, thereby allowing the inbuilt OS timeout to operate. */
229
af483912 230callout_address = string_sprintf("[%s]:%d", address, port);
059ec3d9 231sigalrm_seen = FALSE;
c2a1bba0 232if (timeout > 0) ALARM(timeout);
fb05276a 233
73a10da9 234#ifdef EXIM_SUPPORT_TFO
fb05276a
JH
235/* TCP Fast Open, if the system has a cookie from a previous call to
236this peer, can send data in the SYN packet. The peer can send data
237before it gets our ACK of its SYN,ACK - the latter is useful for
ac0dcd3f 238the SMTP banner. Other (than SMTP) cases of TCP connections can
b536a578 239possibly use the data-on-syn, so support that too. */
fb05276a 240
8768d548 241if (fastopen_blob && f.tcp_fastopen_ok)
fb05276a 242 {
b536a578 243# ifdef MSG_FASTOPEN
73a10da9 244 /* This is a Linux implementation. */
b536a578 245
10ac8d7f 246 if ((rc = sendto(sock, fastopen_blob->data, fastopen_blob->len,
ac0dcd3f 247 MSG_FASTOPEN | MSG_DONTWAIT, s_ptr, s_len)) >= 0)
1ccd5f67 248 /* seen for with-data, experimental TFO option, with-cookie case */
8255135b 249 /* seen for with-data, proper TFO opt, with-cookie case */
ac0dcd3f 250 {
10ac8d7f 251 DEBUG(D_transport|D_v)
adb21834 252 debug_printf(" TFO mode connection attempt to %s, %lu data\n",
4aa2e44b 253 address, (unsigned long)fastopen_blob->len);
afdb5e9c 254 /*XXX also seen on successful TFO, sigh */
ee8b8090 255 tcp_out_fastopen = fastopen_blob->len > 0 ? TFO_ATTEMPTED_DATA : TFO_ATTEMPTED_NODATA;
ac0dcd3f 256 }
c3da38a1
BF
257 else switch (errno)
258 {
259 case EINPROGRESS: /* expected if we had no cookie for peer */
1ccd5f67
JH
260 /* seen for no-data, proper TFO option, both cookie-request and with-cookie cases */
261 /* apparently no visibility of the diffference at this point */
8255135b 262 /* seen for with-data, proper TFO opt, cookie-req */
1ccd5f67
JH
263 /* with netwk delay, post-conn tcp_info sees unacked 1 for R, 2 for C; code in smtp_out.c */
264 /* ? older Experimental TFO option behaviour ? */
c3da38a1
BF
265 DEBUG(D_transport|D_v) debug_printf(" TFO mode sendto, %s data: EINPROGRESS\n",
266 fastopen_blob->len > 0 ? "with" : "no");
267 if (!fastopen_blob->data)
268 {
269 tcp_out_fastopen = TFO_ATTEMPTED_NODATA; /* we tried; unknown if useful yet */
270 rc = 0;
271 }
272 else /* queue unsent data */
273 rc = send(sock, fastopen_blob->data, fastopen_blob->len, 0);
274 break;
275
276 case EOPNOTSUPP:
277 DEBUG(D_transport)
278 debug_printf("Tried TCP Fast Open but apparently not enabled by sysctl\n");
279 goto legacy_connect;
280
281 case EPIPE:
282 DEBUG(D_transport)
283 debug_printf("Tried TCP Fast Open but kernel too old to support it\n");
284 goto legacy_connect;
ac0dcd3f 285 }
73a10da9
JH
286
287# elif defined(EXIM_TFO_FREEBSD)
288 /* Re: https://people.freebsd.org/~pkelsey/tfo-tools/tfo-client.c */
289
290 if (setsockopt(sock, IPPROTO_TCP, TCP_FASTOPEN, &on, sizeof(on)) < 0)
291 {
292 DEBUG(D_transport)
293 debug_printf("Tried TCP Fast Open but apparently not enabled by sysctl\n");
294 goto legacy_connect;
295 }
296 if ((rc = sendto(sock, fastopen_blob->data, fastopen_blob->len, 0,
297 s_ptr, s_len)) >= 0)
298 {
299 DEBUG(D_transport|D_v)
300 debug_printf(" TFO mode connection attempt to %s, %lu data\n",
301 address, (unsigned long)fastopen_blob->len);
302 tcp_out_fastopen = fastopen_blob->len > 0 ? TFO_ATTEMPTED_DATA : TFO_ATTEMPTED_NODATA;
303 }
304
305# elif defined(EXIM_TFO_CONNECTX)
b536a578
JH
306 /* MacOS */
307 sa_endpoints_t ends = {
308 .sae_srcif = 0, .sae_srcaddr = NULL, .sae_srcaddrlen = 0,
309 .sae_dstaddr = s_ptr, .sae_dstaddrlen = s_len };
310 struct iovec iov = {
311 .iov_base = fastopen_blob->data, .iov_len = fastopen_blob->len };
312 size_t len;
313
314 if ((rc = connectx(sock, &ends, SAE_ASSOCID_ANY,
315 CONNECT_DATA_IDEMPOTENT, &iov, 1, &len, NULL)) == 0)
316 {
317 DEBUG(D_transport|D_v)
adb21834 318 debug_printf(" TFO mode connection attempt to %s, %lu data\n",
b536a578 319 address, (unsigned long)fastopen_blob->len);
068f180d 320 tcp_out_fastopen = fastopen_blob->len > 0 ? TFO_ATTEMPTED_DATA : TFO_ATTEMPTED_NODATA;
b536a578
JH
321
322 if (len != fastopen_blob->len)
323 DEBUG(D_transport|D_v)
324 debug_printf(" only queued %lu data!\n", (unsigned long)len);
325 }
326 else if (errno == EINPROGRESS)
327 {
adb21834 328 DEBUG(D_transport|D_v) debug_printf(" TFO mode connectx, %s data: EINPROGRESS\n",
b536a578
JH
329 fastopen_blob->len > 0 ? "with" : "no");
330 if (!fastopen_blob->data)
331 {
7434882d 332 tcp_out_fastopen = TFO_ATTEMPTED_NODATA; /* we tried; unknown if useful yet */
b536a578
JH
333 rc = 0;
334 }
335 else /* assume that no data was queued; block in send */
336 rc = send(sock, fastopen_blob->data, fastopen_blob->len, 0);
337 }
338# endif
fb05276a
JH
339 }
340else
73a10da9 341#endif /*EXIM_SUPPORT_TFO*/
0ab63f3d 342 {
73a10da9 343#if defined(EXIM_SUPPORT_TFO) && !defined(EXIM_TFO_CONNECTX)
0ab63f3d 344legacy_connect:
dca6d121
JH
345#endif
346
10ac8d7f 347 DEBUG(D_transport|D_v) if (fastopen_blob)
adb21834 348 debug_printf(" non-TFO mode connection attempt to %s, %lu data\n",
4aa2e44b 349 address, (unsigned long)fastopen_blob->len);
0ab63f3d 350 if ((rc = connect(sock, s_ptr, s_len)) >= 0)
10ac8d7f
JH
351 if ( fastopen_blob && fastopen_blob->data && fastopen_blob->len
352 && send(sock, fastopen_blob->data, fastopen_blob->len, 0) < 0)
0ab63f3d
JH
353 rc = -1;
354 }
fb05276a 355
059ec3d9 356save_errno = errno;
c2a1bba0 357ALARM_CLR(0);
059ec3d9
PH
358
359/* There is a testing facility for simulating a connection timeout, as I
360can't think of any other way of doing this. It converts a connection refused
75e0e026 361into a timeout if the timeout is set to 999999. */
059ec3d9 362
8768d548 363if (f.running_in_test_harness && save_errno == ECONNREFUSED && timeout == 999999)
059ec3d9 364 {
a39bd74d
JB
365 rc = -1;
366 save_errno = EINTR;
367 sigalrm_seen = TRUE;
059ec3d9
PH
368 }
369
370/* Success */
371
055e2cb4 372if (rc >= 0)
055e2cb4 373 return 0;
059ec3d9
PH
374
375/* A failure whose error code is "Interrupted system call" is in fact
376an externally applied timeout if the signal handler has been run. */
377
a39bd74d 378errno = save_errno == EINTR && sigalrm_seen ? ETIMEDOUT : save_errno;
059ec3d9
PH
379return -1;
380}
381
382
a6d4c44e
TF
383
384/*************************************************
385* Create connected socket to remote host *
386*************************************************/
387
b1f8e4f8
JH
388/* Create a socket and connect to host (name or number, ipv6 ok)
389 at one of port-range.
a6d4c44e 390
b1f8e4f8
JH
391Arguments:
392 type SOCK_DGRAM or SOCK_STREAM
393 af AF_INET6 or AF_INET for the socket type
afdb5e9c 394 hostname host name, or ip address (as text)
b1f8e4f8
JH
395 portlo,porthi the remote port range
396 timeout a timeout
4a5cbaff 397 connhost if not NULL, host_item to be filled in with connection details
b1f8e4f8 398 errstr pointer for allocated string on error
10ac8d7f 399 fastopen_blob with SOCK_STREAM, if non-null, request TCP Fast Open.
b536a578 400 Additionally, optional idempotent early-data to send
b1f8e4f8
JH
401
402Return:
403 socket fd, or -1 on failure (having allocated an error string)
404*/
405int
406ip_connectedsocket(int type, const uschar * hostname, int portlo, int porthi,
10ac8d7f 407 int timeout, host_item * connhost, uschar ** errstr, const blob * fastopen_blob)
b1f8e4f8 408{
d7978c0f 409int namelen;
b1f8e4f8 410host_item shost;
b1f8e4f8
JH
411int af = 0, fd, fd4 = -1, fd6 = -1;
412
413shost.next = NULL;
414shost.address = NULL;
415shost.port = portlo;
416shost.mx = -1;
417
418namelen = Ustrlen(hostname);
419
420/* Anything enclosed in [] must be an IP address. */
421
422if (hostname[0] == '[' &&
423 hostname[namelen - 1] == ']')
424 {
af483912 425 uschar * host = string_copyn(hostname+1, namelen-2);
b1f8e4f8
JH
426 if (string_is_ip_address(host, NULL) == 0)
427 {
428 *errstr = string_sprintf("malformed IP address \"%s\"", hostname);
429 return -1;
430 }
431 shost.name = shost.address = host;
432 }
433
434/* Otherwise check for an unadorned IP address */
435
436else if (string_is_ip_address(hostname, NULL) != 0)
af483912 437 shost.name = shost.address = string_copyn(hostname, namelen);
b1f8e4f8
JH
438
439/* Otherwise lookup IP address(es) from the name */
440
441else
442 {
af483912 443 shost.name = string_copyn(hostname, namelen);
1f155f8e
JH
444 if (host_find_byname(&shost, NULL, HOST_FIND_QUALIFY_SINGLE,
445 NULL, FALSE) != HOST_FOUND)
b1f8e4f8
JH
446 {
447 *errstr = string_sprintf("no IP address found for host %s", shost.name);
448 return -1;
449 }
450 }
451
452/* Try to connect to the server - test each IP till one works */
453
d7978c0f 454for (host_item * h = &shost; h; h = h->next)
b1f8e4f8 455 {
af483912
JH
456 fd = Ustrchr(h->address, ':') != 0
457 ? fd6 < 0 ? (fd6 = ip_socket(type, af = AF_INET6)) : fd6
458 : fd4 < 0 ? (fd4 = ip_socket(type, af = AF_INET )) : fd4;
b1f8e4f8
JH
459
460 if (fd < 0)
461 {
462 *errstr = string_sprintf("failed to create socket: %s", strerror(errno));
463 goto bad;
464 }
465
d7978c0f 466 for (int port = portlo; port <= porthi; port++)
10ac8d7f 467 if (ip_connect(fd, af, h->address, port, timeout, fastopen_blob) == 0)
b1f8e4f8
JH
468 {
469 if (fd != fd6) close(fd6);
470 if (fd != fd4) close(fd4);
8a512ed5
JH
471 if (connhost)
472 {
b1f8e4f8
JH
473 h->port = port;
474 *connhost = *h;
475 connhost->next = NULL;
476 }
477 return fd;
478 }
479 }
480
a9764ac5
JH
481*errstr = string_sprintf("failed to connect to any address for %s: %s",
482 hostname, strerror(errno));
b1f8e4f8
JH
483
484bad:
485 close(fd4); close(fd6); return -1;
486}
487
059ec3d9 488
4a5cbaff 489/*XXX TFO? */
3e60dd41 490int
7d2f2d36
JH
491ip_tcpsocket(const uschar * hostport, uschar ** errstr, int tmo,
492 host_item * connhost)
3e60dd41 493{
a39bd74d
JB
494int scan;
495uschar hostname[256];
496unsigned int portlow, porthigh;
497
498/* extract host and port part */
499scan = sscanf(CS hostport, "%255s %u-%u", hostname, &portlow, &porthigh);
500if (scan != 3)
501 {
502 if (scan != 2)
503 {
504 *errstr = string_sprintf("invalid socket '%s'", hostport);
505 return -1;
3e60dd41 506 }
a39bd74d 507 porthigh = portlow;
3e60dd41
JH
508 }
509
a39bd74d 510return ip_connectedsocket(SOCK_STREAM, hostname, portlow, porthigh,
7d2f2d36 511 tmo, connhost, errstr, NULL);
3e60dd41
JH
512}
513
514int
515ip_unixsocket(const uschar * path, uschar ** errstr)
516{
a39bd74d
JB
517int sock;
518struct sockaddr_un server;
3e60dd41 519
a39bd74d
JB
520if ((sock = socket(AF_UNIX, SOCK_STREAM, 0)) < 0)
521 {
522 *errstr = US"can't open UNIX socket.";
523 return -1;
3e60dd41
JH
524 }
525
af483912 526callout_address = string_copy(path);
a39bd74d 527server.sun_family = AF_UNIX;
f3ebb786 528Ustrncpy(US server.sun_path, path, sizeof(server.sun_path)-1);
a39bd74d
JB
529server.sun_path[sizeof(server.sun_path)-1] = '\0';
530if (connect(sock, (struct sockaddr *) &server, sizeof(server)) < 0)
531 {
532 int err = errno;
533 (void)close(sock);
534 *errstr = string_sprintf("unable to connect to UNIX socket (%s): %s",
535 path, strerror(err));
536 return -1;
537 }
538return sock;
3e60dd41
JH
539}
540
c71c454d
JH
541/* spec is either an absolute path (with a leading /), or
542a host (name or IP) and port (whitespace-separated).
543The port can be a range, dash-separated, or a single number.
7d2f2d36
JH
544
545For a TCP socket, optionally fill in a host_item.
c71c454d 546*/
3e60dd41 547int
7d2f2d36
JH
548ip_streamsocket(const uschar * spec, uschar ** errstr, int tmo,
549 host_item * connhost)
3e60dd41 550{
a39bd74d 551return *spec == '/'
7d2f2d36 552 ? ip_unixsocket(spec, errstr) : ip_tcpsocket(spec, errstr, tmo, connhost);
3e60dd41
JH
553}
554
059ec3d9
PH
555/*************************************************
556* Set keepalive on a socket *
557*************************************************/
558
559/* Can be called for both incoming and outgoing sockets.
560
561Arguments:
562 sock the socket
563 address the remote host address, for failure logging
564 torf true for outgoing connection, false for incoming
565
566Returns: nothing
567*/
568
569void
55414b25 570ip_keepalive(int sock, const uschar *address, BOOL torf)
059ec3d9
PH
571{
572int fodder = 1;
573if (setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE,
5903c6ff 574 US (&fodder), sizeof(fodder)) != 0)
059ec3d9
PH
575 log_write(0, LOG_MAIN, "setsockopt(SO_KEEPALIVE) on connection %s %s "
576 "failed: %s", torf? "to":"from", address, strerror(errno));
577}
578
579
580
581/*************************************************
582* Receive from a socket with timeout *
583*************************************************/
584
4e71661f 585/*
059ec3d9 586Arguments:
4e71661f 587 fd the file descriptor
0a5441fc 588 timelimit the timeout endpoint, seconds-since-epoch
4e71661f
JH
589Returns: TRUE => ready for i/o
590 FALSE => timed out, or other error
059ec3d9 591*/
4e71661f 592BOOL
0a5441fc 593fd_ready(int fd, time_t timelimit)
059ec3d9
PH
594{
595fd_set select_inset;
0a5441fc 596int time_left = timelimit - time(NULL);
059ec3d9
PH
597int rc;
598
85ff3cf9 599if (time_left <= 0)
4e71661f
JH
600 {
601 errno = ETIMEDOUT;
602 return FALSE;
603 }
059ec3d9
PH
604/* Wait until the socket is ready */
605
a39bd74d 606do
059ec3d9 607 {
f2ed27cf 608 struct timeval tv = { .tv_sec = time_left, .tv_usec = 0 };
059ec3d9 609 FD_ZERO (&select_inset);
4e71661f 610 FD_SET (fd, &select_inset);
059ec3d9 611
0f0c8159 612 /*DEBUG(D_transport) debug_printf("waiting for data on fd\n");*/
4e71661f 613 rc = select(fd + 1, (SELECT_ARG2_TYPE *)&select_inset, NULL, NULL, &tv);
059ec3d9
PH
614
615 /* If some interrupt arrived, just retry. We presume this to be rare,
616 but it can happen (e.g. the SIGUSR1 signal sent by exiwhat causes
617 select() to exit).
618
619 Aug 2004: Somebody set up a cron job that ran exiwhat every 2 minutes, making
620 the interrupt not at all rare. Since the timeout is typically more than 2
621 minutes, the effect was to block the timeout completely. To prevent this
c528cec4
HSHR
622 happening again, we do an explicit time test and adjust the timeout
623 accordingly */
059ec3d9
PH
624
625 if (rc < 0 && errno == EINTR)
626 {
627 DEBUG(D_transport) debug_printf("EINTR while waiting for socket data\n");
85ff3cf9 628
c528cec4 629 /* Watch out, 'continue' jumps to the condition, not to the loops top */
0a5441fc 630 if ((time_left = timelimit - time(NULL)) > 0) continue;
059ec3d9
PH
631 }
632
059ec3d9
PH
633 if (rc <= 0)
634 {
635 errno = ETIMEDOUT;
4e71661f 636 return FALSE;
059ec3d9
PH
637 }
638
c528cec4
HSHR
639 /* Checking the FD_ISSET is not enough, if we're interrupted, the
640 select_inset may still contain the 'input'. */
059ec3d9 641 }
4bd6107d 642while (rc < 0 || !FD_ISSET(fd, &select_inset));
4e71661f
JH
643return TRUE;
644}
645
646/* The timeout is implemented using select(), and we loop to cover select()
647getting interrupted, and the possibility of select() returning with a positive
648result but no ready descriptor. Is this in fact possible?
649
650Arguments:
74f1a423 651 cctx the connection context (socket fd, possibly TLS context)
4e71661f
JH
652 buffer to read into
653 bufsize the buffer size
0a5441fc 654 timelimit the timeout endpoint, seconds-since-epoch
4e71661f
JH
655
656Returns: > 0 => that much data read
657 <= 0 on error or EOF; errno set - zero for EOF
658*/
659
660int
0a5441fc 661ip_recv(client_conn_ctx * cctx, uschar * buffer, int buffsize, time_t timelimit)
4e71661f
JH
662{
663int rc;
664
0a5441fc 665if (!fd_ready(cctx->sock, timelimit))
4e71661f 666 return -1;
059ec3d9
PH
667
668/* The socket is ready, read from it (via TLS if it's active). On EOF (i.e.
669close down of the connection), set errno to zero; otherwise leave it alone. */
670
01603eec 671#ifndef DISABLE_TLS
74f1a423
JH
672if (cctx->tls_ctx) /* client TLS */
673 rc = tls_read(cctx->tls_ctx, buffer, buffsize);
674else if (tls_in.active.sock == cctx->sock) /* server TLS */
675 rc = tls_read(NULL, buffer, buffsize);
059ec3d9
PH
676else
677#endif
74f1a423 678 rc = recv(cctx->sock, buffer, buffsize, 0);
059ec3d9
PH
679
680if (rc > 0) return rc;
681if (rc == 0) errno = 0;
682return -1;
683}
684
685
9e4f5962
PP
686
687
688/*************************************************
13363eba
PP
689* Lookup address family of potential socket *
690*************************************************/
691
692/* Given a file-descriptor, check to see if it's a socket and, if so,
693return the address family; detects IPv4 vs IPv6. If not a socket then
694return -1.
695
696The value 0 is typically AF_UNSPEC, which should not be seen on a connected
697fd. If the return is -1, the errno will be from getsockname(); probably
698ENOTSOCK or ECONNRESET.
699
700Arguments: socket-or-not fd
701Returns: address family or -1
702*/
703
704int
705ip_get_address_family(int fd)
706{
707struct sockaddr_storage ss;
708socklen_t sslen = sizeof(ss);
709
710if (getsockname(fd, (struct sockaddr *) &ss, &sslen) < 0)
711 return -1;
712
713return (int) ss.ss_family;
714}
715
716
717
718
719/*************************************************
9e4f5962
PP
720* Lookup DSCP settings for a socket *
721*************************************************/
722
723struct dscp_name_tableentry {
724 const uschar *name;
725 int value;
726};
727/* Keep both of these tables sorted! */
728static struct dscp_name_tableentry dscp_table[] = {
729#ifdef IPTOS_DSCP_AF11
36a3ae5f
PP
730 { CUS"af11", IPTOS_DSCP_AF11 },
731 { CUS"af12", IPTOS_DSCP_AF12 },
732 { CUS"af13", IPTOS_DSCP_AF13 },
733 { CUS"af21", IPTOS_DSCP_AF21 },
734 { CUS"af22", IPTOS_DSCP_AF22 },
735 { CUS"af23", IPTOS_DSCP_AF23 },
736 { CUS"af31", IPTOS_DSCP_AF31 },
737 { CUS"af32", IPTOS_DSCP_AF32 },
738 { CUS"af33", IPTOS_DSCP_AF33 },
739 { CUS"af41", IPTOS_DSCP_AF41 },
740 { CUS"af42", IPTOS_DSCP_AF42 },
741 { CUS"af43", IPTOS_DSCP_AF43 },
742 { CUS"ef", IPTOS_DSCP_EF },
9e4f5962
PP
743#endif
744#ifdef IPTOS_LOWCOST
36a3ae5f 745 { CUS"lowcost", IPTOS_LOWCOST },
9e4f5962 746#endif
36a3ae5f 747 { CUS"lowdelay", IPTOS_LOWDELAY },
9e4f5962 748#ifdef IPTOS_MINCOST
36a3ae5f 749 { CUS"mincost", IPTOS_MINCOST },
9e4f5962 750#endif
36a3ae5f
PP
751 { CUS"reliability", IPTOS_RELIABILITY },
752 { CUS"throughput", IPTOS_THROUGHPUT }
9e4f5962
PP
753};
754static int dscp_table_size =
755 sizeof(dscp_table) / sizeof(struct dscp_name_tableentry);
756
757/* DSCP values change by protocol family, and so do the options used for
2a1b36b3
PP
758setsockopt(); this utility does all the lookups. It takes an unexpanded
759option string, expands it, strips off affix whitespace, then checks if it's
760a number. If all of what's left is a number, then that's how the option will
761be parsed and success/failure is a range check. If it's not all a number,
762then it must be a supported keyword.
9e4f5962
PP
763
764Arguments:
765 dscp_name a string, so far unvalidated
766 af address_family in use
767 level setsockopt level to use
768 optname setsockopt name to use
769 dscp_value value for dscp_name
770
771Returns: TRUE if okay to setsockopt(), else FALSE
2a1b36b3
PP
772
773*level and *optname may be set even if FALSE is returned
9e4f5962
PP
774*/
775
776BOOL
777dscp_lookup(const uschar *dscp_name, int af,
778 int *level, int *optname, int *dscp_value)
779{
2a1b36b3 780uschar *dscp_lookup, *p;
9e4f5962 781int first, last;
2a1b36b3 782long rawlong;
9e4f5962
PP
783
784if (af == AF_INET)
785 {
786 *level = IPPROTO_IP;
787 *optname = IP_TOS;
788 }
bb7b9411 789#if HAVE_IPV6 && defined(IPV6_TCLASS)
9e4f5962
PP
790else if (af == AF_INET6)
791 {
792 *level = IPPROTO_IPV6;
793 *optname = IPV6_TCLASS;
794 }
b301a50b 795#endif
9e4f5962
PP
796else
797 {
798 DEBUG(D_transport)
799 debug_printf("Unhandled address family %d in dscp_lookup()\n", af);
800 return FALSE;
801 }
802if (!dscp_name)
803 {
804 DEBUG(D_transport)
805 debug_printf("[empty DSCP]\n");
806 return FALSE;
807 }
808dscp_lookup = expand_string(US dscp_name);
809if (dscp_lookup == NULL || *dscp_lookup == '\0')
810 return FALSE;
811
2a1b36b3
PP
812p = dscp_lookup + Ustrlen(dscp_lookup) - 1;
813while (isspace(*p)) *p-- = '\0';
814while (isspace(*dscp_lookup) && dscp_lookup < p) dscp_lookup++;
815if (*dscp_lookup == '\0')
816 return FALSE;
817
818rawlong = Ustrtol(dscp_lookup, &p, 0);
819if (p != dscp_lookup && *p == '\0')
820 {
821 /* We have six bits available, which will end up shifted to fit in 0xFC mask.
822 RFC 2597 defines the values unshifted. */
823 if (rawlong < 0 || rawlong > 0x3F)
824 {
825 DEBUG(D_transport)
826 debug_printf("DSCP value %ld out of range, ignored.\n", rawlong);
827 return FALSE;
828 }
829 *dscp_value = rawlong << 2;
830 return TRUE;
831 }
832
9e4f5962
PP
833first = 0;
834last = dscp_table_size;
835while (last > first)
836 {
837 int middle = (first + last)/2;
838 int c = Ustrcmp(dscp_lookup, dscp_table[middle].name);
839 if (c == 0)
840 {
841 *dscp_value = dscp_table[middle].value;
842 return TRUE;
843 }
844 else if (c > 0)
9e4f5962 845 first = middle + 1;
9e4f5962 846 else
9e4f5962 847 last = middle;
9e4f5962
PP
848 }
849return FALSE;
850}
851
36a3ae5f
PP
852void
853dscp_list_to_stream(FILE *stream)
854{
d7978c0f 855for (int i = 0; i < dscp_table_size; ++i)
36a3ae5f
PP
856 fprintf(stream, "%s\n", dscp_table[i].name);
857}
858
9e4f5962 859
059ec3d9 860/* End of ip.c */
8a512ed5
JH
861/* vi: aw ai sw=2
862*/