TFO: better detection of client fast-open connections (again)
[exim.git] / src / src / ip.c
CommitLineData
059ec3d9
PH
1/*************************************************
2* Exim - an Internet mail transport agent *
3*************************************************/
4
d4e5e70b 5/* Copyright (c) University of Cambridge 1995 - 2017 */
059ec3d9
PH
6/* See the file NOTICE for conditions of use and distribution. */
7
8/* Functions for doing things with sockets. With the advent of IPv6 this has
9got messier, so that it's worth pulling out the code into separate functions
4c04137d 10that other parts of Exim can call, especially as there are now several
059ec3d9
PH
11different places in the code where sockets are used. */
12
13
14#include "exim.h"
15
16
17/*************************************************
18* Create a socket *
19*************************************************/
20
21/* Socket creation happens in a number of places so it's packaged here for
22convenience.
23
24Arguments:
25 type SOCK_DGRAM or SOCK_STREAM
26 af AF_INET or AF_INET6
27
28Returns: socket number or -1 on failure
29*/
30
31int
32ip_socket(int type, int af)
33{
34int sock = socket(af, type, 0);
35if (sock < 0)
36 log_write(0, LOG_MAIN, "IPv%c socket creation failed: %s",
37 (af == AF_INET6)? '6':'4', strerror(errno));
38return sock;
39}
40
41
42
43
44#if HAVE_IPV6
45/*************************************************
46* Convert printing address to numeric *
47*************************************************/
48
49/* This function converts the textual form of an IP address into a numeric form
50in an appropriate structure in an IPv6 environment. The getaddrinfo() function
51can (apparently) handle more complicated addresses (e.g. those containing
52scopes) than inet_pton() in some environments. We use hints to tell it that the
53input must be a numeric address.
54
55However, apparently some operating systems (or libraries) don't support
56getaddrinfo(), so there is a build-time option to revert to inet_pton() (which
57does not support scopes).
58
59Arguments:
60 address textual form of the address
61 addr where to copy back the answer
62
63Returns: nothing - failure provokes a panic-die
64*/
65
66static void
a56cc2b8 67ip_addrinfo(const uschar *address, struct sockaddr_in6 *saddr)
059ec3d9
PH
68{
69#ifdef IPV6_USE_INET_PTON
70
a56cc2b8 71 if (inet_pton(AF_INET6, CCS address, &saddr->sin6_addr) != 1)
059ec3d9
PH
72 log_write(0, LOG_MAIN|LOG_PANIC_DIE, "unable to parse \"%s\" as an "
73 "IP address", address);
74 saddr->sin6_family = AF_INET6;
75
76#else
77
78 int rc;
79 struct addrinfo hints, *res;
80 memset(&hints, 0, sizeof(hints));
81 hints.ai_family = AF_INET6;
82 hints.ai_socktype = SOCK_STREAM;
83 hints.ai_flags = AI_NUMERICHOST;
a56cc2b8 84 if ((rc = getaddrinfo(CCS address, NULL, &hints, &res)) != 0 || res == NULL)
059ec3d9
PH
85 log_write(0, LOG_MAIN|LOG_PANIC_DIE, "unable to parse \"%s\" as an "
86 "IP address: %s", address,
87 (rc == 0)? "NULL result returned" : gai_strerror(rc));
88 memcpy(saddr, res->ai_addr, res->ai_addrlen);
89 freeaddrinfo(res);
90
91#endif
92}
93#endif /* HAVE_IPV6 */
94
95
96/*************************************************
97* Bind socket to interface and port *
98*************************************************/
99
059ec3d9 100int
7eb6c37c 101ip_addr(void * sin_, int af, const uschar * address, int port)
059ec3d9 102{
7eb6c37c 103union sockaddr_46 * sin = sin_;
69cbeaec 104memset(sin, 0, sizeof(*sin));
059ec3d9
PH
105
106/* Setup code when using an IPv6 socket. The wildcard address is ":", to
107ensure an IPv6 socket is used. */
108
109#if HAVE_IPV6
110if (af == AF_INET6)
111 {
112 if (address[0] == ':' && address[1] == 0)
113 {
7eb6c37c
JH
114 sin->v6.sin6_family = AF_INET6;
115 sin->v6.sin6_addr = in6addr_any;
059ec3d9
PH
116 }
117 else
7eb6c37c
JH
118 ip_addrinfo(address, &sin->v6); /* Panic-dies on error */
119 sin->v6.sin6_port = htons(port);
120 return sizeof(sin->v6);
059ec3d9
PH
121 }
122else
123#else /* HAVE_IPv6 */
124af = af; /* Avoid compiler warning */
125#endif /* HAVE_IPV6 */
126
127/* Setup code when using IPv4 socket. The wildcard address is "". */
128
129 {
7eb6c37c
JH
130 sin->v4.sin_family = AF_INET;
131 sin->v4.sin_port = htons(port);
132 sin->v4.sin_addr.s_addr = address[0] == 0
133 ? (S_ADDR_TYPE)INADDR_ANY
134 : (S_ADDR_TYPE)inet_addr(CS address);
135 return sizeof(sin->v4);
059ec3d9 136 }
7eb6c37c 137}
059ec3d9 138
059ec3d9 139
7eb6c37c
JH
140
141/* This function binds a socket to a local interface address and port. For a
142wildcard IPv6 bind, the address is ":".
143
144Arguments:
145 sock the socket
146 af AF_INET or AF_INET6 - the socket type
147 address the IP address, in text form
148 port the IP port (host order)
149
150Returns: the result of bind()
151*/
152
153int
154ip_bind(int sock, int af, uschar *address, int port)
155{
156union sockaddr_46 sin;
157int s_len = ip_addr(&sin, af, address, port);
059ec3d9
PH
158return bind(sock, (struct sockaddr *)&sin, s_len);
159}
160
161
162
163/*************************************************
164* Connect socket to remote host *
165*************************************************/
166
167/* This function connects a socket to a remote address and port. The socket may
d515a917
PH
168or may not have previously been bound to a local interface. The socket is not
169closed, even in cases of error. It is expected that the calling function, which
170created the socket, will be the one that closes it.
059ec3d9
PH
171
172Arguments:
173 sock the socket
174 af AF_INET6 or AF_INET for the socket type
175 address the remote address, in text form
176 port the remote port
b1f8e4f8 177 timeout a timeout (zero for indefinite timeout)
0ab63f3d
JH
178 fastopen non-null iff TCP_FASTOPEN can be used; may indicate early-data to
179 be sent in SYN segment
059ec3d9
PH
180
181Returns: 0 on success; -1 on failure, with errno set
182*/
183
184int
fb05276a 185ip_connect(int sock, int af, const uschar *address, int port, int timeout,
0ab63f3d 186 const blob * fastopen)
059ec3d9
PH
187{
188struct sockaddr_in s_in4;
189struct sockaddr *s_ptr;
190int s_len, rc, save_errno;
191
192/* For an IPv6 address, use an IPv6 sockaddr structure. */
193
194#if HAVE_IPV6
195struct sockaddr_in6 s_in6;
196if (af == AF_INET6)
197 {
198 memset(&s_in6, 0, sizeof(s_in6));
199 ip_addrinfo(address, &s_in6); /* Panic-dies on error */
200 s_in6.sin6_port = htons(port);
201 s_ptr = (struct sockaddr *)&s_in6;
202 s_len = sizeof(s_in6);
203 }
204else
205#else /* HAVE_IPV6 */
206af = af; /* Avoid compiler warning */
207#endif /* HAVE_IPV6 */
208
209/* For an IPv4 address, use an IPv4 sockaddr structure, even on a system with
210IPv6 support. */
211
212 {
213 memset(&s_in4, 0, sizeof(s_in4));
214 s_in4.sin_family = AF_INET;
215 s_in4.sin_port = htons(port);
a56cc2b8 216 s_in4.sin_addr.s_addr = (S_ADDR_TYPE)inet_addr(CCS address);
059ec3d9
PH
217 s_ptr = (struct sockaddr *)&s_in4;
218 s_len = sizeof(s_in4);
219 }
220
221/* If no connection timeout is set, just call connect() without setting a
222timer, thereby allowing the inbuilt OS timeout to operate. */
223
af483912 224callout_address = string_sprintf("[%s]:%d", address, port);
059ec3d9
PH
225sigalrm_seen = FALSE;
226if (timeout > 0) alarm(timeout);
fb05276a 227
18f1b2f3 228#if defined(TCP_FASTOPEN) && defined(MSG_FASTOPEN)
fb05276a
JH
229/* TCP Fast Open, if the system has a cookie from a previous call to
230this peer, can send data in the SYN packet. The peer can send data
231before it gets our ACK of its SYN,ACK - the latter is useful for
ac0dcd3f
JH
232the SMTP banner. Other (than SMTP) cases of TCP connections can
233possibly use the data-on-syn, so support that too. */
fb05276a
JH
234
235if (fastopen)
236 {
0ab63f3d 237 if ((rc = sendto(sock, fastopen->data, fastopen->len,
ac0dcd3f 238 MSG_FASTOPEN | MSG_DONTWAIT, s_ptr, s_len)) >= 0)
1ccd5f67 239 /* seen for with-data, experimental TFO option, with-cookie case */
ac0dcd3f 240 {
1ccd5f67
JH
241 DEBUG(D_transport|D_v) debug_printf("TFO mode connection attempt, %s data\n",
242 fastopen->len > 0 ? "with" : "no");
243 tcp_out_fastopen = fastopen->len > 0 ? 2 : 1;
ac0dcd3f
JH
244 }
245 else if (errno == EINPROGRESS) /* expected for nonready peer */
1ccd5f67
JH
246 /* seen for no-data, proper TFO option, both cookie-request and with-cookie cases */
247 /* apparently no visibility of the diffference at this point */
248 /* with netwk delay, post-conn tcp_info sees unacked 1 for R, 2 for C; code in smtp_out.c */
249 /* ? older Experimental TFO option behaviour ? */
250 { /* queue unsent data */
ac0dcd3f 251 if (!fastopen->data)
a2673768 252 {
1ccd5f67 253 tcp_out_fastopen = 1; /* we tried; unknown if useful yet */
ac0dcd3f 254 rc = 0;
a2673768 255 }
ac0dcd3f 256 else if ( (rc = send(sock, fastopen->data, fastopen->len, 0)) < 0
1ccd5f67
JH
257 && errno == EINPROGRESS /* expected for nonready peer */
258 )
ac0dcd3f
JH
259 rc = 0;
260 }
261 else if(errno == EOPNOTSUPP)
262 {
263 DEBUG(D_transport)
264 debug_printf("Tried TCP Fast Open but apparently not enabled by sysctl\n");
265 goto legacy_connect;
266 }
fb05276a
JH
267 }
268else
269#endif
0ab63f3d
JH
270 {
271legacy_connect:
272 if ((rc = connect(sock, s_ptr, s_len)) >= 0)
273 if ( fastopen && fastopen->data && fastopen->len
274 && send(sock, fastopen->data, fastopen->len, 0) < 0)
275 rc = -1;
276 }
fb05276a 277
059ec3d9
PH
278save_errno = errno;
279alarm(0);
280
281/* There is a testing facility for simulating a connection timeout, as I
282can't think of any other way of doing this. It converts a connection refused
75e0e026 283into a timeout if the timeout is set to 999999. */
059ec3d9 284
a39bd74d 285if (running_in_test_harness && save_errno == ECONNREFUSED && timeout == 999999)
059ec3d9 286 {
a39bd74d
JB
287 rc = -1;
288 save_errno = EINTR;
289 sigalrm_seen = TRUE;
059ec3d9
PH
290 }
291
292/* Success */
293
055e2cb4 294if (rc >= 0)
055e2cb4 295 return 0;
059ec3d9
PH
296
297/* A failure whose error code is "Interrupted system call" is in fact
298an externally applied timeout if the signal handler has been run. */
299
a39bd74d 300errno = save_errno == EINTR && sigalrm_seen ? ETIMEDOUT : save_errno;
059ec3d9
PH
301return -1;
302}
303
304
a6d4c44e
TF
305
306/*************************************************
307* Create connected socket to remote host *
308*************************************************/
309
b1f8e4f8
JH
310/* Create a socket and connect to host (name or number, ipv6 ok)
311 at one of port-range.
a6d4c44e 312
b1f8e4f8
JH
313Arguments:
314 type SOCK_DGRAM or SOCK_STREAM
315 af AF_INET6 or AF_INET for the socket type
316 address the remote address, in text form
317 portlo,porthi the remote port range
318 timeout a timeout
4a5cbaff 319 connhost if not NULL, host_item to be filled in with connection details
b1f8e4f8 320 errstr pointer for allocated string on error
4a5cbaff
JH
321 fastopen with SOCK_STREAM, if non-null, request TCP Fast Open.
322 Additionally, optional early-data to send
b1f8e4f8
JH
323
324Return:
325 socket fd, or -1 on failure (having allocated an error string)
326*/
327int
328ip_connectedsocket(int type, const uschar * hostname, int portlo, int porthi,
4a5cbaff 329 int timeout, host_item * connhost, uschar ** errstr, const blob * fastopen)
b1f8e4f8
JH
330{
331int namelen, port;
332host_item shost;
333host_item *h;
334int af = 0, fd, fd4 = -1, fd6 = -1;
335
336shost.next = NULL;
337shost.address = NULL;
338shost.port = portlo;
339shost.mx = -1;
340
341namelen = Ustrlen(hostname);
342
343/* Anything enclosed in [] must be an IP address. */
344
345if (hostname[0] == '[' &&
346 hostname[namelen - 1] == ']')
347 {
af483912 348 uschar * host = string_copyn(hostname+1, namelen-2);
b1f8e4f8
JH
349 if (string_is_ip_address(host, NULL) == 0)
350 {
351 *errstr = string_sprintf("malformed IP address \"%s\"", hostname);
352 return -1;
353 }
354 shost.name = shost.address = host;
355 }
356
357/* Otherwise check for an unadorned IP address */
358
359else if (string_is_ip_address(hostname, NULL) != 0)
af483912 360 shost.name = shost.address = string_copyn(hostname, namelen);
b1f8e4f8
JH
361
362/* Otherwise lookup IP address(es) from the name */
363
364else
365 {
af483912 366 shost.name = string_copyn(hostname, namelen);
1f155f8e
JH
367 if (host_find_byname(&shost, NULL, HOST_FIND_QUALIFY_SINGLE,
368 NULL, FALSE) != HOST_FOUND)
b1f8e4f8
JH
369 {
370 *errstr = string_sprintf("no IP address found for host %s", shost.name);
371 return -1;
372 }
373 }
374
375/* Try to connect to the server - test each IP till one works */
376
fb05276a 377for (h = &shost; h; h = h->next)
b1f8e4f8 378 {
af483912
JH
379 fd = Ustrchr(h->address, ':') != 0
380 ? fd6 < 0 ? (fd6 = ip_socket(type, af = AF_INET6)) : fd6
381 : fd4 < 0 ? (fd4 = ip_socket(type, af = AF_INET )) : fd4;
b1f8e4f8
JH
382
383 if (fd < 0)
384 {
385 *errstr = string_sprintf("failed to create socket: %s", strerror(errno));
386 goto bad;
387 }
388
389 for(port = portlo; port <= porthi; port++)
6af7e591 390 if (ip_connect(fd, af, h->address, port, timeout, fastopen) == 0)
b1f8e4f8
JH
391 {
392 if (fd != fd6) close(fd6);
393 if (fd != fd4) close(fd4);
8a512ed5
JH
394 if (connhost)
395 {
b1f8e4f8
JH
396 h->port = port;
397 *connhost = *h;
398 connhost->next = NULL;
399 }
400 return fd;
401 }
402 }
403
a9764ac5
JH
404*errstr = string_sprintf("failed to connect to any address for %s: %s",
405 hostname, strerror(errno));
b1f8e4f8
JH
406
407bad:
408 close(fd4); close(fd6); return -1;
409}
410
059ec3d9 411
4a5cbaff 412/*XXX TFO? */
3e60dd41
JH
413int
414ip_tcpsocket(const uschar * hostport, uschar ** errstr, int tmo)
415{
a39bd74d
JB
416int scan;
417uschar hostname[256];
418unsigned int portlow, porthigh;
419
420/* extract host and port part */
421scan = sscanf(CS hostport, "%255s %u-%u", hostname, &portlow, &porthigh);
422if (scan != 3)
423 {
424 if (scan != 2)
425 {
426 *errstr = string_sprintf("invalid socket '%s'", hostport);
427 return -1;
3e60dd41 428 }
a39bd74d 429 porthigh = portlow;
3e60dd41
JH
430 }
431
a39bd74d 432return ip_connectedsocket(SOCK_STREAM, hostname, portlow, porthigh,
4a5cbaff 433 tmo, NULL, errstr, NULL);
3e60dd41
JH
434}
435
436int
437ip_unixsocket(const uschar * path, uschar ** errstr)
438{
a39bd74d
JB
439int sock;
440struct sockaddr_un server;
3e60dd41 441
a39bd74d
JB
442if ((sock = socket(AF_UNIX, SOCK_STREAM, 0)) < 0)
443 {
444 *errstr = US"can't open UNIX socket.";
445 return -1;
3e60dd41
JH
446 }
447
af483912 448callout_address = string_copy(path);
a39bd74d
JB
449server.sun_family = AF_UNIX;
450Ustrncpy(server.sun_path, path, sizeof(server.sun_path)-1);
451server.sun_path[sizeof(server.sun_path)-1] = '\0';
452if (connect(sock, (struct sockaddr *) &server, sizeof(server)) < 0)
453 {
454 int err = errno;
455 (void)close(sock);
456 *errstr = string_sprintf("unable to connect to UNIX socket (%s): %s",
457 path, strerror(err));
458 return -1;
459 }
460return sock;
3e60dd41
JH
461}
462
463int
464ip_streamsocket(const uschar * spec, uschar ** errstr, int tmo)
465{
a39bd74d
JB
466return *spec == '/'
467 ? ip_unixsocket(spec, errstr) : ip_tcpsocket(spec, errstr, tmo);
3e60dd41
JH
468}
469
059ec3d9
PH
470/*************************************************
471* Set keepalive on a socket *
472*************************************************/
473
474/* Can be called for both incoming and outgoing sockets.
475
476Arguments:
477 sock the socket
478 address the remote host address, for failure logging
479 torf true for outgoing connection, false for incoming
480
481Returns: nothing
482*/
483
484void
55414b25 485ip_keepalive(int sock, const uschar *address, BOOL torf)
059ec3d9
PH
486{
487int fodder = 1;
488if (setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE,
5903c6ff 489 US (&fodder), sizeof(fodder)) != 0)
059ec3d9
PH
490 log_write(0, LOG_MAIN, "setsockopt(SO_KEEPALIVE) on connection %s %s "
491 "failed: %s", torf? "to":"from", address, strerror(errno));
492}
493
494
495
496/*************************************************
497* Receive from a socket with timeout *
498*************************************************/
499
4e71661f 500/*
059ec3d9 501Arguments:
4e71661f
JH
502 fd the file descriptor
503 timeout the timeout, seconds
504Returns: TRUE => ready for i/o
505 FALSE => timed out, or other error
059ec3d9 506*/
4e71661f
JH
507BOOL
508fd_ready(int fd, int timeout)
059ec3d9
PH
509{
510fd_set select_inset;
19050083 511time_t start_recv = time(NULL);
c528cec4 512int time_left = timeout;
059ec3d9
PH
513int rc;
514
85ff3cf9 515if (time_left <= 0)
4e71661f
JH
516 {
517 errno = ETIMEDOUT;
518 return FALSE;
519 }
059ec3d9
PH
520/* Wait until the socket is ready */
521
a39bd74d 522do
059ec3d9 523 {
f2ed27cf 524 struct timeval tv = { .tv_sec = time_left, .tv_usec = 0 };
059ec3d9 525 FD_ZERO (&select_inset);
4e71661f 526 FD_SET (fd, &select_inset);
059ec3d9 527
0f0c8159 528 /*DEBUG(D_transport) debug_printf("waiting for data on fd\n");*/
4e71661f 529 rc = select(fd + 1, (SELECT_ARG2_TYPE *)&select_inset, NULL, NULL, &tv);
059ec3d9
PH
530
531 /* If some interrupt arrived, just retry. We presume this to be rare,
532 but it can happen (e.g. the SIGUSR1 signal sent by exiwhat causes
533 select() to exit).
534
535 Aug 2004: Somebody set up a cron job that ran exiwhat every 2 minutes, making
536 the interrupt not at all rare. Since the timeout is typically more than 2
537 minutes, the effect was to block the timeout completely. To prevent this
c528cec4
HSHR
538 happening again, we do an explicit time test and adjust the timeout
539 accordingly */
059ec3d9
PH
540
541 if (rc < 0 && errno == EINTR)
542 {
543 DEBUG(D_transport) debug_printf("EINTR while waiting for socket data\n");
85ff3cf9 544
c528cec4 545 /* Watch out, 'continue' jumps to the condition, not to the loops top */
85ff3cf9
HSHR
546 time_left = timeout - (time(NULL) - start_recv);
547 if (time_left > 0) continue;
059ec3d9
PH
548 }
549
059ec3d9
PH
550 if (rc <= 0)
551 {
552 errno = ETIMEDOUT;
4e71661f 553 return FALSE;
059ec3d9
PH
554 }
555
c528cec4
HSHR
556 /* Checking the FD_ISSET is not enough, if we're interrupted, the
557 select_inset may still contain the 'input'. */
059ec3d9 558 }
4bd6107d 559while (rc < 0 || !FD_ISSET(fd, &select_inset));
4e71661f
JH
560return TRUE;
561}
562
563/* The timeout is implemented using select(), and we loop to cover select()
564getting interrupted, and the possibility of select() returning with a positive
565result but no ready descriptor. Is this in fact possible?
566
567Arguments:
568 sock the socket
569 buffer to read into
570 bufsize the buffer size
571 timeout the timeout
572
573Returns: > 0 => that much data read
574 <= 0 on error or EOF; errno set - zero for EOF
575*/
576
577int
578ip_recv(int sock, uschar *buffer, int buffsize, int timeout)
579{
580int rc;
581
582if (!fd_ready(sock, timeout))
583 return -1;
059ec3d9
PH
584
585/* The socket is ready, read from it (via TLS if it's active). On EOF (i.e.
586close down of the connection), set errno to zero; otherwise leave it alone. */
587
588#ifdef SUPPORT_TLS
817d9f57
JH
589if (tls_out.active == sock)
590 rc = tls_read(FALSE, buffer, buffsize);
591else if (tls_in.active == sock)
592 rc = tls_read(TRUE, buffer, buffsize);
059ec3d9
PH
593else
594#endif
595 rc = recv(sock, buffer, buffsize, 0);
596
597if (rc > 0) return rc;
598if (rc == 0) errno = 0;
599return -1;
600}
601
602
9e4f5962
PP
603
604
13363eba
PP
605/*************************************************
606* Lookup address family of potential socket *
607*************************************************/
608
609/* Given a file-descriptor, check to see if it's a socket and, if so,
610return the address family; detects IPv4 vs IPv6. If not a socket then
611return -1.
612
613The value 0 is typically AF_UNSPEC, which should not be seen on a connected
614fd. If the return is -1, the errno will be from getsockname(); probably
615ENOTSOCK or ECONNRESET.
616
617Arguments: socket-or-not fd
618Returns: address family or -1
619*/
620
621int
622ip_get_address_family(int fd)
623{
624struct sockaddr_storage ss;
625socklen_t sslen = sizeof(ss);
626
627if (getsockname(fd, (struct sockaddr *) &ss, &sslen) < 0)
628 return -1;
629
630return (int) ss.ss_family;
631}
632
633
634
635
9e4f5962
PP
636/*************************************************
637* Lookup DSCP settings for a socket *
638*************************************************/
639
640struct dscp_name_tableentry {
641 const uschar *name;
642 int value;
643};
644/* Keep both of these tables sorted! */
645static struct dscp_name_tableentry dscp_table[] = {
646#ifdef IPTOS_DSCP_AF11
36a3ae5f
PP
647 { CUS"af11", IPTOS_DSCP_AF11 },
648 { CUS"af12", IPTOS_DSCP_AF12 },
649 { CUS"af13", IPTOS_DSCP_AF13 },
650 { CUS"af21", IPTOS_DSCP_AF21 },
651 { CUS"af22", IPTOS_DSCP_AF22 },
652 { CUS"af23", IPTOS_DSCP_AF23 },
653 { CUS"af31", IPTOS_DSCP_AF31 },
654 { CUS"af32", IPTOS_DSCP_AF32 },
655 { CUS"af33", IPTOS_DSCP_AF33 },
656 { CUS"af41", IPTOS_DSCP_AF41 },
657 { CUS"af42", IPTOS_DSCP_AF42 },
658 { CUS"af43", IPTOS_DSCP_AF43 },
659 { CUS"ef", IPTOS_DSCP_EF },
9e4f5962
PP
660#endif
661#ifdef IPTOS_LOWCOST
36a3ae5f 662 { CUS"lowcost", IPTOS_LOWCOST },
9e4f5962 663#endif
36a3ae5f 664 { CUS"lowdelay", IPTOS_LOWDELAY },
9e4f5962 665#ifdef IPTOS_MINCOST
36a3ae5f 666 { CUS"mincost", IPTOS_MINCOST },
9e4f5962 667#endif
36a3ae5f
PP
668 { CUS"reliability", IPTOS_RELIABILITY },
669 { CUS"throughput", IPTOS_THROUGHPUT }
9e4f5962
PP
670};
671static int dscp_table_size =
672 sizeof(dscp_table) / sizeof(struct dscp_name_tableentry);
673
674/* DSCP values change by protocol family, and so do the options used for
2a1b36b3
PP
675setsockopt(); this utility does all the lookups. It takes an unexpanded
676option string, expands it, strips off affix whitespace, then checks if it's
677a number. If all of what's left is a number, then that's how the option will
678be parsed and success/failure is a range check. If it's not all a number,
679then it must be a supported keyword.
9e4f5962
PP
680
681Arguments:
682 dscp_name a string, so far unvalidated
683 af address_family in use
684 level setsockopt level to use
685 optname setsockopt name to use
686 dscp_value value for dscp_name
687
688Returns: TRUE if okay to setsockopt(), else FALSE
2a1b36b3
PP
689
690*level and *optname may be set even if FALSE is returned
9e4f5962
PP
691*/
692
693BOOL
694dscp_lookup(const uschar *dscp_name, int af,
695 int *level, int *optname, int *dscp_value)
696{
2a1b36b3 697uschar *dscp_lookup, *p;
9e4f5962 698int first, last;
2a1b36b3 699long rawlong;
9e4f5962
PP
700
701if (af == AF_INET)
702 {
703 *level = IPPROTO_IP;
704 *optname = IP_TOS;
705 }
bb7b9411 706#if HAVE_IPV6 && defined(IPV6_TCLASS)
9e4f5962
PP
707else if (af == AF_INET6)
708 {
709 *level = IPPROTO_IPV6;
710 *optname = IPV6_TCLASS;
711 }
b301a50b 712#endif
9e4f5962
PP
713else
714 {
715 DEBUG(D_transport)
716 debug_printf("Unhandled address family %d in dscp_lookup()\n", af);
717 return FALSE;
718 }
719if (!dscp_name)
720 {
721 DEBUG(D_transport)
722 debug_printf("[empty DSCP]\n");
723 return FALSE;
724 }
725dscp_lookup = expand_string(US dscp_name);
726if (dscp_lookup == NULL || *dscp_lookup == '\0')
727 return FALSE;
728
2a1b36b3
PP
729p = dscp_lookup + Ustrlen(dscp_lookup) - 1;
730while (isspace(*p)) *p-- = '\0';
731while (isspace(*dscp_lookup) && dscp_lookup < p) dscp_lookup++;
732if (*dscp_lookup == '\0')
733 return FALSE;
734
735rawlong = Ustrtol(dscp_lookup, &p, 0);
736if (p != dscp_lookup && *p == '\0')
737 {
738 /* We have six bits available, which will end up shifted to fit in 0xFC mask.
739 RFC 2597 defines the values unshifted. */
740 if (rawlong < 0 || rawlong > 0x3F)
741 {
742 DEBUG(D_transport)
743 debug_printf("DSCP value %ld out of range, ignored.\n", rawlong);
744 return FALSE;
745 }
746 *dscp_value = rawlong << 2;
747 return TRUE;
748 }
749
9e4f5962
PP
750first = 0;
751last = dscp_table_size;
752while (last > first)
753 {
754 int middle = (first + last)/2;
755 int c = Ustrcmp(dscp_lookup, dscp_table[middle].name);
756 if (c == 0)
757 {
758 *dscp_value = dscp_table[middle].value;
759 return TRUE;
760 }
761 else if (c > 0)
9e4f5962 762 first = middle + 1;
9e4f5962 763 else
9e4f5962 764 last = middle;
9e4f5962
PP
765 }
766return FALSE;
767}
768
36a3ae5f
PP
769void
770dscp_list_to_stream(FILE *stream)
771{
772int i;
773for (i=0; i < dscp_table_size; ++i)
774 fprintf(stream, "%s\n", dscp_table[i].name);
775}
776
9e4f5962 777
059ec3d9 778/* End of ip.c */
8a512ed5
JH
779/* vi: aw ai sw=2
780*/