Fix identd connections on FreeBSD under TCP Fast Open
[exim.git] / src / src / ip.c
CommitLineData
059ec3d9
PH
1/*************************************************
2* Exim - an Internet mail transport agent *
3*************************************************/
4
d4e5e70b 5/* Copyright (c) University of Cambridge 1995 - 2017 */
059ec3d9
PH
6/* See the file NOTICE for conditions of use and distribution. */
7
8/* Functions for doing things with sockets. With the advent of IPv6 this has
9got messier, so that it's worth pulling out the code into separate functions
4c04137d 10that other parts of Exim can call, especially as there are now several
059ec3d9
PH
11different places in the code where sockets are used. */
12
13
14#include "exim.h"
15
16
17/*************************************************
18* Create a socket *
19*************************************************/
20
21/* Socket creation happens in a number of places so it's packaged here for
22convenience.
23
24Arguments:
25 type SOCK_DGRAM or SOCK_STREAM
26 af AF_INET or AF_INET6
27
28Returns: socket number or -1 on failure
29*/
30
31int
32ip_socket(int type, int af)
33{
34int sock = socket(af, type, 0);
35if (sock < 0)
36 log_write(0, LOG_MAIN, "IPv%c socket creation failed: %s",
37 (af == AF_INET6)? '6':'4', strerror(errno));
38return sock;
39}
40
41
42
43
44#if HAVE_IPV6
45/*************************************************
46* Convert printing address to numeric *
47*************************************************/
48
49/* This function converts the textual form of an IP address into a numeric form
50in an appropriate structure in an IPv6 environment. The getaddrinfo() function
51can (apparently) handle more complicated addresses (e.g. those containing
52scopes) than inet_pton() in some environments. We use hints to tell it that the
53input must be a numeric address.
54
55However, apparently some operating systems (or libraries) don't support
56getaddrinfo(), so there is a build-time option to revert to inet_pton() (which
57does not support scopes).
58
59Arguments:
60 address textual form of the address
61 addr where to copy back the answer
62
63Returns: nothing - failure provokes a panic-die
64*/
65
66static void
a56cc2b8 67ip_addrinfo(const uschar *address, struct sockaddr_in6 *saddr)
059ec3d9
PH
68{
69#ifdef IPV6_USE_INET_PTON
70
a56cc2b8 71 if (inet_pton(AF_INET6, CCS address, &saddr->sin6_addr) != 1)
059ec3d9
PH
72 log_write(0, LOG_MAIN|LOG_PANIC_DIE, "unable to parse \"%s\" as an "
73 "IP address", address);
74 saddr->sin6_family = AF_INET6;
75
76#else
77
78 int rc;
79 struct addrinfo hints, *res;
80 memset(&hints, 0, sizeof(hints));
81 hints.ai_family = AF_INET6;
82 hints.ai_socktype = SOCK_STREAM;
83 hints.ai_flags = AI_NUMERICHOST;
a56cc2b8 84 if ((rc = getaddrinfo(CCS address, NULL, &hints, &res)) != 0 || res == NULL)
059ec3d9
PH
85 log_write(0, LOG_MAIN|LOG_PANIC_DIE, "unable to parse \"%s\" as an "
86 "IP address: %s", address,
87 (rc == 0)? "NULL result returned" : gai_strerror(rc));
88 memcpy(saddr, res->ai_addr, res->ai_addrlen);
89 freeaddrinfo(res);
90
91#endif
92}
93#endif /* HAVE_IPV6 */
94
95
96/*************************************************
97* Bind socket to interface and port *
98*************************************************/
99
059ec3d9 100int
7eb6c37c 101ip_addr(void * sin_, int af, const uschar * address, int port)
059ec3d9 102{
7eb6c37c 103union sockaddr_46 * sin = sin_;
69cbeaec 104memset(sin, 0, sizeof(*sin));
059ec3d9
PH
105
106/* Setup code when using an IPv6 socket. The wildcard address is ":", to
107ensure an IPv6 socket is used. */
108
109#if HAVE_IPV6
110if (af == AF_INET6)
111 {
112 if (address[0] == ':' && address[1] == 0)
113 {
7eb6c37c
JH
114 sin->v6.sin6_family = AF_INET6;
115 sin->v6.sin6_addr = in6addr_any;
059ec3d9
PH
116 }
117 else
7eb6c37c
JH
118 ip_addrinfo(address, &sin->v6); /* Panic-dies on error */
119 sin->v6.sin6_port = htons(port);
120 return sizeof(sin->v6);
059ec3d9
PH
121 }
122else
123#else /* HAVE_IPv6 */
124af = af; /* Avoid compiler warning */
125#endif /* HAVE_IPV6 */
126
127/* Setup code when using IPv4 socket. The wildcard address is "". */
128
129 {
7eb6c37c
JH
130 sin->v4.sin_family = AF_INET;
131 sin->v4.sin_port = htons(port);
132 sin->v4.sin_addr.s_addr = address[0] == 0
133 ? (S_ADDR_TYPE)INADDR_ANY
134 : (S_ADDR_TYPE)inet_addr(CS address);
135 return sizeof(sin->v4);
059ec3d9 136 }
7eb6c37c 137}
059ec3d9 138
059ec3d9 139
7eb6c37c
JH
140
141/* This function binds a socket to a local interface address and port. For a
142wildcard IPv6 bind, the address is ":".
143
144Arguments:
145 sock the socket
146 af AF_INET or AF_INET6 - the socket type
147 address the IP address, in text form
148 port the IP port (host order)
149
150Returns: the result of bind()
151*/
152
153int
154ip_bind(int sock, int af, uschar *address, int port)
155{
156union sockaddr_46 sin;
157int s_len = ip_addr(&sin, af, address, port);
059ec3d9
PH
158return bind(sock, (struct sockaddr *)&sin, s_len);
159}
160
161
162
163/*************************************************
164* Connect socket to remote host *
165*************************************************/
166
167/* This function connects a socket to a remote address and port. The socket may
d515a917
PH
168or may not have previously been bound to a local interface. The socket is not
169closed, even in cases of error. It is expected that the calling function, which
170created the socket, will be the one that closes it.
059ec3d9
PH
171
172Arguments:
173 sock the socket
174 af AF_INET6 or AF_INET for the socket type
175 address the remote address, in text form
176 port the remote port
b1f8e4f8 177 timeout a timeout (zero for indefinite timeout)
0ab63f3d
JH
178 fastopen non-null iff TCP_FASTOPEN can be used; may indicate early-data to
179 be sent in SYN segment
059ec3d9
PH
180
181Returns: 0 on success; -1 on failure, with errno set
182*/
183
184int
fb05276a 185ip_connect(int sock, int af, const uschar *address, int port, int timeout,
0ab63f3d 186 const blob * fastopen)
059ec3d9
PH
187{
188struct sockaddr_in s_in4;
189struct sockaddr *s_ptr;
190int s_len, rc, save_errno;
191
192/* For an IPv6 address, use an IPv6 sockaddr structure. */
193
194#if HAVE_IPV6
195struct sockaddr_in6 s_in6;
196if (af == AF_INET6)
197 {
198 memset(&s_in6, 0, sizeof(s_in6));
199 ip_addrinfo(address, &s_in6); /* Panic-dies on error */
200 s_in6.sin6_port = htons(port);
201 s_ptr = (struct sockaddr *)&s_in6;
202 s_len = sizeof(s_in6);
203 }
204else
205#else /* HAVE_IPV6 */
206af = af; /* Avoid compiler warning */
207#endif /* HAVE_IPV6 */
208
209/* For an IPv4 address, use an IPv4 sockaddr structure, even on a system with
210IPv6 support. */
211
212 {
213 memset(&s_in4, 0, sizeof(s_in4));
214 s_in4.sin_family = AF_INET;
215 s_in4.sin_port = htons(port);
a56cc2b8 216 s_in4.sin_addr.s_addr = (S_ADDR_TYPE)inet_addr(CCS address);
059ec3d9
PH
217 s_ptr = (struct sockaddr *)&s_in4;
218 s_len = sizeof(s_in4);
219 }
220
221/* If no connection timeout is set, just call connect() without setting a
222timer, thereby allowing the inbuilt OS timeout to operate. */
223
af483912 224callout_address = string_sprintf("[%s]:%d", address, port);
059ec3d9
PH
225sigalrm_seen = FALSE;
226if (timeout > 0) alarm(timeout);
fb05276a 227
18f1b2f3 228#if defined(TCP_FASTOPEN) && defined(MSG_FASTOPEN)
fb05276a
JH
229/* TCP Fast Open, if the system has a cookie from a previous call to
230this peer, can send data in the SYN packet. The peer can send data
231before it gets our ACK of its SYN,ACK - the latter is useful for
ac0dcd3f
JH
232the SMTP banner. Other (than SMTP) cases of TCP connections can
233possibly use the data-on-syn, so support that too. */
fb05276a
JH
234
235if (fastopen)
236 {
0ab63f3d 237 if ((rc = sendto(sock, fastopen->data, fastopen->len,
ac0dcd3f 238 MSG_FASTOPEN | MSG_DONTWAIT, s_ptr, s_len)) >= 0)
1ccd5f67 239 /* seen for with-data, experimental TFO option, with-cookie case */
8255135b 240 /* seen for with-data, proper TFO opt, with-cookie case */
ac0dcd3f 241 {
1ccd5f67
JH
242 DEBUG(D_transport|D_v) debug_printf("TFO mode connection attempt, %s data\n",
243 fastopen->len > 0 ? "with" : "no");
244 tcp_out_fastopen = fastopen->len > 0 ? 2 : 1;
ac0dcd3f 245 }
8255135b 246 else if (errno == EINPROGRESS) /* expected if we had no cookie for peer */
1ccd5f67
JH
247 /* seen for no-data, proper TFO option, both cookie-request and with-cookie cases */
248 /* apparently no visibility of the diffference at this point */
8255135b 249 /* seen for with-data, proper TFO opt, cookie-req */
1ccd5f67
JH
250 /* with netwk delay, post-conn tcp_info sees unacked 1 for R, 2 for C; code in smtp_out.c */
251 /* ? older Experimental TFO option behaviour ? */
252 { /* queue unsent data */
8255135b
JH
253 DEBUG(D_transport|D_v) debug_printf("TFO mode sendto, %s data: EINPROGRESS\n",
254 fastopen->len > 0 ? "with" : "no");
ac0dcd3f 255 if (!fastopen->data)
a2673768 256 {
8255135b 257 tcp_out_fastopen = 1; /* we tried; unknown if useful yet */
ac0dcd3f 258 rc = 0;
a2673768 259 }
8255135b
JH
260 else
261 rc = send(sock, fastopen->data, fastopen->len, 0);
ac0dcd3f
JH
262 }
263 else if(errno == EOPNOTSUPP)
264 {
265 DEBUG(D_transport)
266 debug_printf("Tried TCP Fast Open but apparently not enabled by sysctl\n");
267 goto legacy_connect;
268 }
fb05276a
JH
269 }
270else
271#endif
0ab63f3d
JH
272 {
273legacy_connect:
274 if ((rc = connect(sock, s_ptr, s_len)) >= 0)
275 if ( fastopen && fastopen->data && fastopen->len
276 && send(sock, fastopen->data, fastopen->len, 0) < 0)
277 rc = -1;
278 }
fb05276a 279
059ec3d9
PH
280save_errno = errno;
281alarm(0);
282
283/* There is a testing facility for simulating a connection timeout, as I
284can't think of any other way of doing this. It converts a connection refused
75e0e026 285into a timeout if the timeout is set to 999999. */
059ec3d9 286
a39bd74d 287if (running_in_test_harness && save_errno == ECONNREFUSED && timeout == 999999)
059ec3d9 288 {
a39bd74d
JB
289 rc = -1;
290 save_errno = EINTR;
291 sigalrm_seen = TRUE;
059ec3d9
PH
292 }
293
294/* Success */
295
055e2cb4 296if (rc >= 0)
055e2cb4 297 return 0;
059ec3d9
PH
298
299/* A failure whose error code is "Interrupted system call" is in fact
300an externally applied timeout if the signal handler has been run. */
301
a39bd74d 302errno = save_errno == EINTR && sigalrm_seen ? ETIMEDOUT : save_errno;
059ec3d9
PH
303return -1;
304}
305
306
a6d4c44e
TF
307
308/*************************************************
309* Create connected socket to remote host *
310*************************************************/
311
b1f8e4f8
JH
312/* Create a socket and connect to host (name or number, ipv6 ok)
313 at one of port-range.
a6d4c44e 314
b1f8e4f8
JH
315Arguments:
316 type SOCK_DGRAM or SOCK_STREAM
317 af AF_INET6 or AF_INET for the socket type
318 address the remote address, in text form
319 portlo,porthi the remote port range
320 timeout a timeout
4a5cbaff 321 connhost if not NULL, host_item to be filled in with connection details
b1f8e4f8 322 errstr pointer for allocated string on error
4a5cbaff
JH
323 fastopen with SOCK_STREAM, if non-null, request TCP Fast Open.
324 Additionally, optional early-data to send
b1f8e4f8
JH
325
326Return:
327 socket fd, or -1 on failure (having allocated an error string)
328*/
329int
330ip_connectedsocket(int type, const uschar * hostname, int portlo, int porthi,
4a5cbaff 331 int timeout, host_item * connhost, uschar ** errstr, const blob * fastopen)
b1f8e4f8
JH
332{
333int namelen, port;
334host_item shost;
335host_item *h;
336int af = 0, fd, fd4 = -1, fd6 = -1;
337
338shost.next = NULL;
339shost.address = NULL;
340shost.port = portlo;
341shost.mx = -1;
342
343namelen = Ustrlen(hostname);
344
345/* Anything enclosed in [] must be an IP address. */
346
347if (hostname[0] == '[' &&
348 hostname[namelen - 1] == ']')
349 {
af483912 350 uschar * host = string_copyn(hostname+1, namelen-2);
b1f8e4f8
JH
351 if (string_is_ip_address(host, NULL) == 0)
352 {
353 *errstr = string_sprintf("malformed IP address \"%s\"", hostname);
354 return -1;
355 }
356 shost.name = shost.address = host;
357 }
358
359/* Otherwise check for an unadorned IP address */
360
361else if (string_is_ip_address(hostname, NULL) != 0)
af483912 362 shost.name = shost.address = string_copyn(hostname, namelen);
b1f8e4f8
JH
363
364/* Otherwise lookup IP address(es) from the name */
365
366else
367 {
af483912 368 shost.name = string_copyn(hostname, namelen);
1f155f8e
JH
369 if (host_find_byname(&shost, NULL, HOST_FIND_QUALIFY_SINGLE,
370 NULL, FALSE) != HOST_FOUND)
b1f8e4f8
JH
371 {
372 *errstr = string_sprintf("no IP address found for host %s", shost.name);
373 return -1;
374 }
375 }
376
377/* Try to connect to the server - test each IP till one works */
378
fb05276a 379for (h = &shost; h; h = h->next)
b1f8e4f8 380 {
af483912
JH
381 fd = Ustrchr(h->address, ':') != 0
382 ? fd6 < 0 ? (fd6 = ip_socket(type, af = AF_INET6)) : fd6
383 : fd4 < 0 ? (fd4 = ip_socket(type, af = AF_INET )) : fd4;
b1f8e4f8
JH
384
385 if (fd < 0)
386 {
387 *errstr = string_sprintf("failed to create socket: %s", strerror(errno));
388 goto bad;
389 }
390
391 for(port = portlo; port <= porthi; port++)
6af7e591 392 if (ip_connect(fd, af, h->address, port, timeout, fastopen) == 0)
b1f8e4f8
JH
393 {
394 if (fd != fd6) close(fd6);
395 if (fd != fd4) close(fd4);
8a512ed5
JH
396 if (connhost)
397 {
b1f8e4f8
JH
398 h->port = port;
399 *connhost = *h;
400 connhost->next = NULL;
401 }
402 return fd;
403 }
404 }
405
a9764ac5
JH
406*errstr = string_sprintf("failed to connect to any address for %s: %s",
407 hostname, strerror(errno));
b1f8e4f8
JH
408
409bad:
410 close(fd4); close(fd6); return -1;
411}
412
059ec3d9 413
4a5cbaff 414/*XXX TFO? */
3e60dd41
JH
415int
416ip_tcpsocket(const uschar * hostport, uschar ** errstr, int tmo)
417{
a39bd74d
JB
418int scan;
419uschar hostname[256];
420unsigned int portlow, porthigh;
421
422/* extract host and port part */
423scan = sscanf(CS hostport, "%255s %u-%u", hostname, &portlow, &porthigh);
424if (scan != 3)
425 {
426 if (scan != 2)
427 {
428 *errstr = string_sprintf("invalid socket '%s'", hostport);
429 return -1;
3e60dd41 430 }
a39bd74d 431 porthigh = portlow;
3e60dd41
JH
432 }
433
a39bd74d 434return ip_connectedsocket(SOCK_STREAM, hostname, portlow, porthigh,
4a5cbaff 435 tmo, NULL, errstr, NULL);
3e60dd41
JH
436}
437
438int
439ip_unixsocket(const uschar * path, uschar ** errstr)
440{
a39bd74d
JB
441int sock;
442struct sockaddr_un server;
3e60dd41 443
a39bd74d
JB
444if ((sock = socket(AF_UNIX, SOCK_STREAM, 0)) < 0)
445 {
446 *errstr = US"can't open UNIX socket.";
447 return -1;
3e60dd41
JH
448 }
449
af483912 450callout_address = string_copy(path);
a39bd74d
JB
451server.sun_family = AF_UNIX;
452Ustrncpy(server.sun_path, path, sizeof(server.sun_path)-1);
453server.sun_path[sizeof(server.sun_path)-1] = '\0';
454if (connect(sock, (struct sockaddr *) &server, sizeof(server)) < 0)
455 {
456 int err = errno;
457 (void)close(sock);
458 *errstr = string_sprintf("unable to connect to UNIX socket (%s): %s",
459 path, strerror(err));
460 return -1;
461 }
462return sock;
3e60dd41
JH
463}
464
465int
466ip_streamsocket(const uschar * spec, uschar ** errstr, int tmo)
467{
a39bd74d
JB
468return *spec == '/'
469 ? ip_unixsocket(spec, errstr) : ip_tcpsocket(spec, errstr, tmo);
3e60dd41
JH
470}
471
059ec3d9
PH
472/*************************************************
473* Set keepalive on a socket *
474*************************************************/
475
476/* Can be called for both incoming and outgoing sockets.
477
478Arguments:
479 sock the socket
480 address the remote host address, for failure logging
481 torf true for outgoing connection, false for incoming
482
483Returns: nothing
484*/
485
486void
55414b25 487ip_keepalive(int sock, const uschar *address, BOOL torf)
059ec3d9
PH
488{
489int fodder = 1;
490if (setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE,
5903c6ff 491 US (&fodder), sizeof(fodder)) != 0)
059ec3d9
PH
492 log_write(0, LOG_MAIN, "setsockopt(SO_KEEPALIVE) on connection %s %s "
493 "failed: %s", torf? "to":"from", address, strerror(errno));
494}
495
496
497
498/*************************************************
499* Receive from a socket with timeout *
500*************************************************/
501
4e71661f 502/*
059ec3d9 503Arguments:
4e71661f
JH
504 fd the file descriptor
505 timeout the timeout, seconds
506Returns: TRUE => ready for i/o
507 FALSE => timed out, or other error
059ec3d9 508*/
4e71661f
JH
509BOOL
510fd_ready(int fd, int timeout)
059ec3d9
PH
511{
512fd_set select_inset;
19050083 513time_t start_recv = time(NULL);
c528cec4 514int time_left = timeout;
059ec3d9
PH
515int rc;
516
85ff3cf9 517if (time_left <= 0)
4e71661f
JH
518 {
519 errno = ETIMEDOUT;
520 return FALSE;
521 }
059ec3d9
PH
522/* Wait until the socket is ready */
523
a39bd74d 524do
059ec3d9 525 {
f2ed27cf 526 struct timeval tv = { .tv_sec = time_left, .tv_usec = 0 };
059ec3d9 527 FD_ZERO (&select_inset);
4e71661f 528 FD_SET (fd, &select_inset);
059ec3d9 529
0f0c8159 530 /*DEBUG(D_transport) debug_printf("waiting for data on fd\n");*/
4e71661f 531 rc = select(fd + 1, (SELECT_ARG2_TYPE *)&select_inset, NULL, NULL, &tv);
059ec3d9
PH
532
533 /* If some interrupt arrived, just retry. We presume this to be rare,
534 but it can happen (e.g. the SIGUSR1 signal sent by exiwhat causes
535 select() to exit).
536
537 Aug 2004: Somebody set up a cron job that ran exiwhat every 2 minutes, making
538 the interrupt not at all rare. Since the timeout is typically more than 2
539 minutes, the effect was to block the timeout completely. To prevent this
c528cec4
HSHR
540 happening again, we do an explicit time test and adjust the timeout
541 accordingly */
059ec3d9
PH
542
543 if (rc < 0 && errno == EINTR)
544 {
545 DEBUG(D_transport) debug_printf("EINTR while waiting for socket data\n");
85ff3cf9 546
c528cec4 547 /* Watch out, 'continue' jumps to the condition, not to the loops top */
85ff3cf9
HSHR
548 time_left = timeout - (time(NULL) - start_recv);
549 if (time_left > 0) continue;
059ec3d9
PH
550 }
551
059ec3d9
PH
552 if (rc <= 0)
553 {
554 errno = ETIMEDOUT;
4e71661f 555 return FALSE;
059ec3d9
PH
556 }
557
c528cec4
HSHR
558 /* Checking the FD_ISSET is not enough, if we're interrupted, the
559 select_inset may still contain the 'input'. */
059ec3d9 560 }
4bd6107d 561while (rc < 0 || !FD_ISSET(fd, &select_inset));
4e71661f
JH
562return TRUE;
563}
564
565/* The timeout is implemented using select(), and we loop to cover select()
566getting interrupted, and the possibility of select() returning with a positive
567result but no ready descriptor. Is this in fact possible?
568
569Arguments:
570 sock the socket
571 buffer to read into
572 bufsize the buffer size
573 timeout the timeout
574
575Returns: > 0 => that much data read
576 <= 0 on error or EOF; errno set - zero for EOF
577*/
578
579int
580ip_recv(int sock, uschar *buffer, int buffsize, int timeout)
581{
582int rc;
583
584if (!fd_ready(sock, timeout))
585 return -1;
059ec3d9
PH
586
587/* The socket is ready, read from it (via TLS if it's active). On EOF (i.e.
588close down of the connection), set errno to zero; otherwise leave it alone. */
589
590#ifdef SUPPORT_TLS
817d9f57
JH
591if (tls_out.active == sock)
592 rc = tls_read(FALSE, buffer, buffsize);
593else if (tls_in.active == sock)
594 rc = tls_read(TRUE, buffer, buffsize);
059ec3d9
PH
595else
596#endif
597 rc = recv(sock, buffer, buffsize, 0);
598
599if (rc > 0) return rc;
600if (rc == 0) errno = 0;
601return -1;
602}
603
604
9e4f5962
PP
605
606
13363eba
PP
607/*************************************************
608* Lookup address family of potential socket *
609*************************************************/
610
611/* Given a file-descriptor, check to see if it's a socket and, if so,
612return the address family; detects IPv4 vs IPv6. If not a socket then
613return -1.
614
615The value 0 is typically AF_UNSPEC, which should not be seen on a connected
616fd. If the return is -1, the errno will be from getsockname(); probably
617ENOTSOCK or ECONNRESET.
618
619Arguments: socket-or-not fd
620Returns: address family or -1
621*/
622
623int
624ip_get_address_family(int fd)
625{
626struct sockaddr_storage ss;
627socklen_t sslen = sizeof(ss);
628
629if (getsockname(fd, (struct sockaddr *) &ss, &sslen) < 0)
630 return -1;
631
632return (int) ss.ss_family;
633}
634
635
636
637
9e4f5962
PP
638/*************************************************
639* Lookup DSCP settings for a socket *
640*************************************************/
641
642struct dscp_name_tableentry {
643 const uschar *name;
644 int value;
645};
646/* Keep both of these tables sorted! */
647static struct dscp_name_tableentry dscp_table[] = {
648#ifdef IPTOS_DSCP_AF11
36a3ae5f
PP
649 { CUS"af11", IPTOS_DSCP_AF11 },
650 { CUS"af12", IPTOS_DSCP_AF12 },
651 { CUS"af13", IPTOS_DSCP_AF13 },
652 { CUS"af21", IPTOS_DSCP_AF21 },
653 { CUS"af22", IPTOS_DSCP_AF22 },
654 { CUS"af23", IPTOS_DSCP_AF23 },
655 { CUS"af31", IPTOS_DSCP_AF31 },
656 { CUS"af32", IPTOS_DSCP_AF32 },
657 { CUS"af33", IPTOS_DSCP_AF33 },
658 { CUS"af41", IPTOS_DSCP_AF41 },
659 { CUS"af42", IPTOS_DSCP_AF42 },
660 { CUS"af43", IPTOS_DSCP_AF43 },
661 { CUS"ef", IPTOS_DSCP_EF },
9e4f5962
PP
662#endif
663#ifdef IPTOS_LOWCOST
36a3ae5f 664 { CUS"lowcost", IPTOS_LOWCOST },
9e4f5962 665#endif
36a3ae5f 666 { CUS"lowdelay", IPTOS_LOWDELAY },
9e4f5962 667#ifdef IPTOS_MINCOST
36a3ae5f 668 { CUS"mincost", IPTOS_MINCOST },
9e4f5962 669#endif
36a3ae5f
PP
670 { CUS"reliability", IPTOS_RELIABILITY },
671 { CUS"throughput", IPTOS_THROUGHPUT }
9e4f5962
PP
672};
673static int dscp_table_size =
674 sizeof(dscp_table) / sizeof(struct dscp_name_tableentry);
675
676/* DSCP values change by protocol family, and so do the options used for
2a1b36b3
PP
677setsockopt(); this utility does all the lookups. It takes an unexpanded
678option string, expands it, strips off affix whitespace, then checks if it's
679a number. If all of what's left is a number, then that's how the option will
680be parsed and success/failure is a range check. If it's not all a number,
681then it must be a supported keyword.
9e4f5962
PP
682
683Arguments:
684 dscp_name a string, so far unvalidated
685 af address_family in use
686 level setsockopt level to use
687 optname setsockopt name to use
688 dscp_value value for dscp_name
689
690Returns: TRUE if okay to setsockopt(), else FALSE
2a1b36b3
PP
691
692*level and *optname may be set even if FALSE is returned
9e4f5962
PP
693*/
694
695BOOL
696dscp_lookup(const uschar *dscp_name, int af,
697 int *level, int *optname, int *dscp_value)
698{
2a1b36b3 699uschar *dscp_lookup, *p;
9e4f5962 700int first, last;
2a1b36b3 701long rawlong;
9e4f5962
PP
702
703if (af == AF_INET)
704 {
705 *level = IPPROTO_IP;
706 *optname = IP_TOS;
707 }
bb7b9411 708#if HAVE_IPV6 && defined(IPV6_TCLASS)
9e4f5962
PP
709else if (af == AF_INET6)
710 {
711 *level = IPPROTO_IPV6;
712 *optname = IPV6_TCLASS;
713 }
b301a50b 714#endif
9e4f5962
PP
715else
716 {
717 DEBUG(D_transport)
718 debug_printf("Unhandled address family %d in dscp_lookup()\n", af);
719 return FALSE;
720 }
721if (!dscp_name)
722 {
723 DEBUG(D_transport)
724 debug_printf("[empty DSCP]\n");
725 return FALSE;
726 }
727dscp_lookup = expand_string(US dscp_name);
728if (dscp_lookup == NULL || *dscp_lookup == '\0')
729 return FALSE;
730
2a1b36b3
PP
731p = dscp_lookup + Ustrlen(dscp_lookup) - 1;
732while (isspace(*p)) *p-- = '\0';
733while (isspace(*dscp_lookup) && dscp_lookup < p) dscp_lookup++;
734if (*dscp_lookup == '\0')
735 return FALSE;
736
737rawlong = Ustrtol(dscp_lookup, &p, 0);
738if (p != dscp_lookup && *p == '\0')
739 {
740 /* We have six bits available, which will end up shifted to fit in 0xFC mask.
741 RFC 2597 defines the values unshifted. */
742 if (rawlong < 0 || rawlong > 0x3F)
743 {
744 DEBUG(D_transport)
745 debug_printf("DSCP value %ld out of range, ignored.\n", rawlong);
746 return FALSE;
747 }
748 *dscp_value = rawlong << 2;
749 return TRUE;
750 }
751
9e4f5962
PP
752first = 0;
753last = dscp_table_size;
754while (last > first)
755 {
756 int middle = (first + last)/2;
757 int c = Ustrcmp(dscp_lookup, dscp_table[middle].name);
758 if (c == 0)
759 {
760 *dscp_value = dscp_table[middle].value;
761 return TRUE;
762 }
763 else if (c > 0)
9e4f5962 764 first = middle + 1;
9e4f5962 765 else
9e4f5962 766 last = middle;
9e4f5962
PP
767 }
768return FALSE;
769}
770
36a3ae5f
PP
771void
772dscp_list_to_stream(FILE *stream)
773{
774int i;
775for (i=0; i < dscp_table_size; ++i)
776 fprintf(stream, "%s\n", dscp_table[i].name);
777}
778
9e4f5962 779
059ec3d9 780/* End of ip.c */
8a512ed5
JH
781/* vi: aw ai sw=2
782*/