Make useful socket functions more generally available
[exim.git] / src / src / spam.c
1 /*************************************************
2 * Exim - an Internet mail transport agent *
3 *************************************************/
4
5 /* Copyright (c) Tom Kistner <tom@duncanthrax.net> 2003-???? */
6 /* License: GPL */
7
8 /* Code for calling spamassassin's spamd. Called from acl.c. */
9
10 #include "exim.h"
11 #ifdef WITH_CONTENT_SCAN
12 #include "spam.h"
13
14 uschar spam_score_buffer[16];
15 uschar spam_score_int_buffer[16];
16 uschar spam_bar_buffer[128];
17 uschar spam_action_buffer[32];
18 uschar spam_report_buffer[32600];
19 uschar prev_user_name[128] = "";
20 int spam_ok = 0;
21 int spam_rc = 0;
22 uschar *prev_spamd_address_work = NULL;
23
24 static const uschar * loglabel = US"spam acl condition:";
25
26 static int
27 spamd_param_init(spamd_address_container *spamd)
28 {
29 /* default spamd server weight, time and backup value */
30 spamd->weight = SPAMD_WEIGHT;
31 spamd->is_failed = FALSE;
32 spamd->is_backup = FALSE;
33 return 0;
34 }
35
36
37 static int
38 spamd_param(const uschar *param, spamd_address_container *spamd)
39 {
40 static int timesinceday = -1;
41 uschar buffer[256];
42
43 /* check backup parameter */
44 if (Ustrcmp(param, "backup") == 0)
45 {
46 spamd->is_backup = TRUE;
47 return 0; /* OK */
48 }
49
50 /*XXX more clever parsing could discard embedded spaces? */
51
52 /* check weight parameter */
53 if (sscanf(param, "weight=%u", &spamd->weight))
54 {
55 if (spamd->weight == 0) /* this server disabled: skip it */
56 return 1;
57 return 0; /* OK */
58 }
59
60 /* check time parameter */
61 if (sscanf(param, "time=%s", buffer))
62 {
63 unsigned int start_h = 0, start_m = 0, start_s = 0;
64 unsigned int end_h = 24, end_m = 0, end_s = 0;
65 unsigned int time_start, time_end;
66 uschar *start_string;
67 uschar *end_string;
68 uschar *delimiter;
69
70 if ((delimiter = US strchr(CS buffer, '-')))
71 {
72 *delimiter = '\0';
73 start_string = buffer;
74 end_string = delimiter + 1;
75 if (sscanf(CS end_string, "%u.%u.%u", &end_h, &end_m, &end_s) == 0 ||
76 sscanf(CS start_string, "%u.%u.%u", &start_h, &start_m, &start_s) == 0)
77 {
78 log_write(0, LOG_MAIN,
79 "%s warning - invalid spamd time value: '%s'", loglabel, buffer);
80 return -1; /* syntax error */
81 }
82 }
83 else
84 {
85 log_write(0, LOG_MAIN,
86 "%s warning - invalid spamd time value: '%s'", loglabel, buffer);
87 return -1; /* syntax error */
88 }
89
90 if (timesinceday < 0)
91 {
92 time_t now = time(NULL);
93 struct tm *tmp = localtime(&now);
94 timesinceday = tmp->tm_hour*3600 + tmp->tm_min*60 + tmp->tm_sec;
95 }
96
97 time_start = start_h*3600 + start_m*60 + start_s;
98 time_end = end_h*3600 + end_m*60 + end_s;
99
100 if (timesinceday < time_start || timesinceday >= time_end)
101 return 1; /* skip spamd server */
102
103 return 0; /* OK */
104 }
105
106 if (Ustrcmp(param, "variant=rspamd") == 0)
107 {
108 spamd->is_rspamd = TRUE;
109 return 0;
110 }
111
112 log_write(0, LOG_MAIN, "%s warning - invalid spamd parameter: '%s'",
113 loglabel, param);
114 return -1; /* syntax error */
115 }
116
117
118 static int
119 spamd_get_server(spamd_address_container **spamds, int num_servers)
120 {
121 unsigned int i;
122 long rnd, weights = 0;
123 static BOOL srandomed = 0;
124 BOOL usebackup = FALSE;
125
126 for (;;)
127 {
128 /* seedup, if we have only 1 server */
129 if (num_servers == 1)
130 return (spamds[0]->is_failed ? -1 : 0);
131
132 /* init ranmod */
133 if (!srandomed)
134 {
135 struct timeval tv;
136 gettimeofday(&tv, NULL);
137 srandom((unsigned int)(tv.tv_usec/1000));
138 srandomed = TRUE;
139 }
140
141 /* get sum of all weights */
142 for (i = 0; i < num_servers; i++)
143 if (!spamds[i]->is_failed && spamds[i]->is_backup == usebackup)
144 weights += spamds[i]->weight;
145
146 if (weights != 0)
147 break;
148 if (usebackup) /* all servers failed (backups too) */
149 return -1;
150 usebackup = TRUE;
151 }
152
153 rnd = random() % weights;
154
155 for (i = 0; i < num_servers; i++)
156 if (!spamds[i]->is_failed && spamds[i]->is_backup == usebackup)
157 if ((rnd -= spamds[i]->weight) < 0)
158 return i;
159
160 log_write(0, LOG_MAIN|LOG_PANIC,
161 "%s unknown error (memory/cpu corruption?)", loglabel);
162 return -1;
163 }
164
165
166 int
167 spam(uschar **listptr)
168 {
169 int sep = 0;
170 uschar *list = *listptr;
171 uschar *user_name;
172 uschar user_name_buffer[128];
173 unsigned long mbox_size;
174 FILE *mbox_file;
175 int spamd_sock = -1;
176 uschar spamd_buffer[32600];
177 int i, j, offset, result;
178 BOOL is_rspamd;
179 uschar spamd_version[8];
180 uschar spamd_short_result[8];
181 uschar spamd_score_char;
182 double spamd_threshold, spamd_score, spamd_reject_score;
183 int spamd_report_offset;
184 uschar *p,*q;
185 int override = 0;
186 time_t start;
187 size_t read, wrote;
188 struct sockaddr_un server;
189 #ifndef NO_POLL_H
190 struct pollfd pollfd;
191 #else /* Patch posted by Erik ? for OS X */
192 struct timeval select_tv; /* and applied by PH */
193 fd_set select_fd;
194 #endif
195 uschar *spamd_address_work;
196
197 /* stop compiler warning */
198 result = 0;
199
200 /* find the username from the option list */
201 if ((user_name = string_nextinlist(&list, &sep,
202 user_name_buffer,
203 sizeof(user_name_buffer))) == NULL)
204 {
205 /* no username given, this means no scanning should be done */
206 return FAIL;
207 }
208
209 /* if username is "0" or "false", do not scan */
210 if ( (Ustrcmp(user_name,"0") == 0) ||
211 (strcmpic(user_name,US"false") == 0) )
212 return FAIL;
213
214 /* if there is an additional option, check if it is "true" */
215 if (strcmpic(list,US"true") == 0)
216 /* in that case, always return true later */
217 override = 1;
218
219 /* expand spamd_address if needed */
220 if (*spamd_address == '$')
221 {
222 spamd_address_work = expand_string(spamd_address);
223 if (spamd_address_work == NULL)
224 {
225 log_write(0, LOG_MAIN|LOG_PANIC,
226 "%s spamd_address starts with $, but expansion failed: %s",
227 loglabel, expand_string_message);
228 return DEFER;
229 }
230 }
231 else
232 spamd_address_work = spamd_address;
233
234 HDEBUG(D_acl) debug_printf("spamd: addrlist '%s'\n", spamd_address_work);
235
236 /* check if previous spamd_address was expanded and has changed. dump cached results if so */
237 if ( spam_ok
238 && prev_spamd_address_work != NULL
239 && Ustrcmp(prev_spamd_address_work, spamd_address_work) != 0
240 )
241 spam_ok = 0;
242
243 /* if we scanned for this username last time, just return */
244 if (spam_ok && Ustrcmp(prev_user_name, user_name) == 0)
245 return override ? OK : spam_rc;
246
247 /* make sure the eml mbox file is spooled up */
248 mbox_file = spool_mbox(&mbox_size, NULL);
249
250 if (mbox_file == NULL)
251 {
252 /* error while spooling */
253 log_write(0, LOG_MAIN|LOG_PANIC,
254 "%s error while creating mbox spool file", loglabel);
255 return DEFER;
256 }
257
258 start = time(NULL);
259
260 {
261 int num_servers = 0;
262 int current_server;
263 uschar *address;
264 uschar *spamd_address_list_ptr = spamd_address_work;
265 spamd_address_container * spamd_address_vector[32];
266 spamd_address_container * this_spamd;
267
268
269 /* Check how many spamd servers we have
270 and register their addresses */
271 while ((address = string_nextinlist(&spamd_address_list_ptr, &sep,
272 NULL, 0)) != NULL)
273 {
274 uschar * sublist;
275 int sublist_sep = -(int)' '; /* default space-sep */
276 unsigned args;
277 uschar * s;
278
279 HDEBUG(D_acl) debug_printf("spamd: addr entry '%s'\n", address);
280 this_spamd =
281 (spamd_address_container *)store_get(sizeof(spamd_address_container));
282
283 for (sublist = address, args = 0, spamd_param_init(this_spamd);
284 s = string_nextinlist(&sublist, &sublist_sep, NULL, 0);
285 args++
286 )
287 {
288 HDEBUG(D_acl) debug_printf("spamd: addr parm '%s'\n", s);
289 switch (args)
290 {
291 case 0: this_spamd->hostname = s;
292 if (*s == '/') args++; /* local; no port */
293 break;
294 case 1: this_spamd->tcp_port = atoi(s); break;
295 default: spamd_param(s, this_spamd); break;
296 }
297 }
298 if (args < 2)
299 {
300 log_write(0, LOG_MAIN,
301 "%s warning - invalid spamd address: '%s'", loglabel, address);
302 continue;
303 }
304
305 spamd_address_vector[num_servers] = this_spamd;
306 if (++num_servers > 31)
307 break;
308 }
309
310 /* check if we have at least one server */
311 if (!num_servers)
312 {
313 log_write(0, LOG_MAIN|LOG_PANIC,
314 "%s no useable spamd server addresses in spamd_address configuration option.",
315 loglabel);
316 goto defer;
317 }
318
319 while (1)
320 {
321 struct hostent *he;
322 int i;
323 BOOL is_local;
324
325 current_server = spamd_get_server(spamd_address_vector, num_servers);
326 this_spamd = spamd_address_vector[current_server];
327
328 is_local = *this_spamd->hostname == '/';
329
330 debug_printf(is_local
331 ? "trying server %s\n" : "trying server %s, port %u\n",
332 this_spamd->hostname, this_spamd->tcp_port);
333
334 /* contact a spamd */
335 if (is_local)
336 {
337 if ((spamd_sock = socket(AF_UNIX, SOCK_STREAM, 0)) < 0)
338 {
339 log_write(0, LOG_MAIN|LOG_PANIC,
340 "%s spamd: unable to acquire socket (%s)",
341 loglabel,
342 strerror(errno));
343 goto defer;
344 }
345
346 server.sun_family = AF_UNIX;
347 Ustrcpy(server.sun_path, this_spamd->hostname);
348
349 if (connect(spamd_sock, (struct sockaddr *) &server, sizeof(struct sockaddr_un)) >= 0)
350 break; /* connection OK */
351
352 log_write(0, LOG_MAIN,
353 "%s spamd: unable to connect to UNIX socket %s (%s)",
354 loglabel, server.sun_path, strerror(errno) );
355 }
356 else
357 {
358 if ((spamd_sock = ip_socket(SOCK_STREAM, AF_INET)) < 0)
359 {
360 log_write(0, LOG_MAIN|LOG_PANIC,
361 "%s error creating IP socket for spamd", loglabel);
362 goto defer;
363 }
364
365 /*XXX should we use getaddrinfo? */
366 if (!(he = gethostbyname(CS this_spamd->hostname)))
367 log_write(0, LOG_MAIN|LOG_PANIC,
368 "%s failed to lookup host '%s'", loglabel, this_spamd->hostname);
369
370 else
371 {
372 struct in_addr in = *(struct in_addr *) he->h_addr_list[0];
373
374 if (ip_connect(spamd_sock, AF_INET, US inet_ntoa(in),
375 this_spamd->tcp_port, 5) > -1)
376 break; /* connection OK */
377
378 log_write(0, LOG_MAIN,
379 "%s warning - spamd connection to '%s', port %u failed: %s",
380 loglabel,
381 this_spamd->hostname, this_spamd->tcp_port, strerror(errno));
382 }
383
384 (void)close(spamd_sock);
385
386 this_spamd->is_failed = TRUE;
387 current_server = spamd_get_server(spamd_address_vector, num_servers);
388 if (current_server < 0)
389 {
390 log_write(0, LOG_MAIN|LOG_PANIC, "%s all spamd servers failed",
391 loglabel);
392 goto defer;
393 }
394 }
395 }
396 is_rspamd = this_spamd->is_rspamd;
397 }
398
399 if (spamd_sock == -1)
400 {
401 log_write(0, LOG_MAIN|LOG_PANIC,
402 "programming fault, spamd_sock unexpectedly unset");
403 goto defer;
404 }
405
406 (void)fcntl(spamd_sock, F_SETFL, O_NONBLOCK);
407 /* now we are connected to spamd on spamd_sock */
408 if (is_rspamd)
409 { /* rspamd variant */
410 uschar *req_str;
411 const char *helo;
412 const char *fcrdns;
413
414 req_str = string_sprintf("CHECK RSPAMC/1.3\r\nContent-length: %lu\r\n"
415 "Queue-Id: %s\r\nFrom: <%s>\r\nRecipient-Number: %d\r\n", mbox_size,
416 message_id, sender_address, recipients_count);
417 for (i = 0; i < recipients_count; i ++)
418 req_str = string_sprintf("%sRcpt: <%s>\r\n", req_str, recipients_list[i].address);
419 if ((helo = expand_string(US"$sender_helo_name")) != NULL && *helo != '\0')
420 req_str = string_sprintf("%sHelo: %s\r\n", req_str, helo);
421 if ((fcrdns = expand_string(US"$sender_host_name")) != NULL && *fcrdns != '\0')
422 req_str = string_sprintf("%sHostname: %s\r\n", req_str, fcrdns);
423 if (sender_host_address != NULL)
424 req_str = string_sprintf("%sIP: %s\r\n", req_str, sender_host_address);
425 req_str = string_sprintf("%s\r\n", req_str);
426 wrote = send(spamd_sock, req_str, Ustrlen(req_str), 0);
427 }
428 else
429 { /* spamassassin variant */
430 (void)string_format(spamd_buffer,
431 sizeof(spamd_buffer),
432 "REPORT SPAMC/1.2\r\nUser: %s\r\nContent-length: %ld\r\n\r\n",
433 user_name,
434 mbox_size);
435 /* send our request */
436 wrote = send(spamd_sock, spamd_buffer, Ustrlen(spamd_buffer), 0);
437 }
438 if (wrote == -1)
439 {
440 (void)close(spamd_sock);
441 log_write(0, LOG_MAIN|LOG_PANIC,
442 "%s spamd send failed: %s", loglabel, strerror(errno));
443 goto defer;
444 }
445
446 /* now send the file */
447 /* spamd sometimes accepts conections but doesn't read data off
448 * the connection. We make the file descriptor non-blocking so
449 * that the write will only write sufficient data without blocking
450 * and we poll the desciptor to make sure that we can write without
451 * blocking. Short writes are gracefully handled and if the whole
452 * trasaction takes too long it is aborted.
453 * Note: poll() is not supported in OSX 10.2 and is reported to be
454 * broken in more recent versions (up to 10.4).
455 */
456 #ifndef NO_POLL_H
457 pollfd.fd = spamd_sock;
458 pollfd.events = POLLOUT;
459 #endif
460 (void)fcntl(spamd_sock, F_SETFL, O_NONBLOCK);
461 do
462 {
463 read = fread(spamd_buffer,1,sizeof(spamd_buffer),mbox_file);
464 if (read > 0)
465 {
466 offset = 0;
467 again:
468 #ifndef NO_POLL_H
469 result = poll(&pollfd, 1, 1000);
470
471 /* Patch posted by Erik ? for OS X and applied by PH */
472 #else
473 select_tv.tv_sec = 1;
474 select_tv.tv_usec = 0;
475 FD_ZERO(&select_fd);
476 FD_SET(spamd_sock, &select_fd);
477 result = select(spamd_sock+1, NULL, &select_fd, NULL, &select_tv);
478 #endif
479 /* End Erik's patch */
480
481 if (result == -1 && errno == EINTR)
482 goto again;
483 else if (result < 1)
484 {
485 if (result == -1)
486 log_write(0, LOG_MAIN|LOG_PANIC,
487 "%s %s on spamd socket", loglabel, strerror(errno));
488 else
489 {
490 if (time(NULL) - start < SPAMD_TIMEOUT)
491 goto again;
492 log_write(0, LOG_MAIN|LOG_PANIC,
493 "%s timed out writing spamd socket", loglabel);
494 }
495 (void)close(spamd_sock);
496 goto defer;
497 }
498
499 wrote = send(spamd_sock,spamd_buffer + offset,read - offset,0);
500 if (wrote == -1)
501 {
502 log_write(0, LOG_MAIN|LOG_PANIC,
503 "%s %s on spamd socket", loglabel, strerror(errno));
504 (void)close(spamd_sock);
505 goto defer;
506 }
507 if (offset + wrote != read)
508 {
509 offset += wrote;
510 goto again;
511 }
512 }
513 }
514 while (!feof(mbox_file) && !ferror(mbox_file));
515
516 if (ferror(mbox_file))
517 {
518 log_write(0, LOG_MAIN|LOG_PANIC,
519 "%s error reading spool file: %s", loglabel, strerror(errno));
520 (void)close(spamd_sock);
521 goto defer;
522 }
523
524 (void)fclose(mbox_file);
525
526 /* we're done sending, close socket for writing */
527 shutdown(spamd_sock,SHUT_WR);
528
529 /* read spamd response using what's left of the timeout. */
530 memset(spamd_buffer, 0, sizeof(spamd_buffer));
531 offset = 0;
532 while ((i = ip_recv(spamd_sock,
533 spamd_buffer + offset,
534 sizeof(spamd_buffer) - offset - 1,
535 SPAMD_TIMEOUT - time(NULL) + start)) > 0 )
536 offset += i;
537
538 /* error handling */
539 if (i <= 0 && errno != 0)
540 {
541 log_write(0, LOG_MAIN|LOG_PANIC,
542 "%s error reading from spamd socket: %s", loglabel, strerror(errno));
543 (void)close(spamd_sock);
544 return DEFER;
545 }
546
547 /* reading done */
548 (void)close(spamd_sock);
549
550 if (is_rspamd)
551 { /* rspamd variant of reply */
552 int r;
553 if ((r = sscanf(CS spamd_buffer,
554 "RSPAMD/%7s 0 EX_OK\r\nMetric: default; %7s %lf / %lf / %lf\r\n%n",
555 spamd_version, spamd_short_result, &spamd_score, &spamd_threshold,
556 &spamd_reject_score, &spamd_report_offset)) != 5)
557 {
558 log_write(0, LOG_MAIN|LOG_PANIC,
559 "%s cannot parse spamd output: %d", loglabel, r);
560 return DEFER;
561 }
562 /* now parse action */
563 p = &spamd_buffer[spamd_report_offset];
564
565 if (Ustrncmp(p, "Action: ", sizeof("Action: ") - 1) == 0)
566 {
567 p += sizeof("Action: ") - 1;
568 q = &spam_action_buffer[0];
569 while (*p && *p != '\r' && (q - spam_action_buffer) < sizeof(spam_action_buffer) - 1)
570 *q++ = *p++;
571 *q = '\0';
572 }
573 }
574 else
575 { /* spamassassin */
576 /* dig in the spamd output and put the report in a multiline header,
577 if requested */
578 if (sscanf(CS spamd_buffer,
579 "SPAMD/%7s 0 EX_OK\r\nContent-length: %*u\r\n\r\n%lf/%lf\r\n%n",
580 spamd_version,&spamd_score,&spamd_threshold,&spamd_report_offset) != 3)
581 {
582 /* try to fall back to pre-2.50 spamd output */
583 if (sscanf(CS spamd_buffer,
584 "SPAMD/%7s 0 EX_OK\r\nSpam: %*s ; %lf / %lf\r\n\r\n%n",
585 spamd_version,&spamd_score,&spamd_threshold,&spamd_report_offset) != 3)
586 {
587 log_write(0, LOG_MAIN|LOG_PANIC,
588 "%s cannot parse spamd output", loglabel);
589 return DEFER;
590 }
591 }
592
593 Ustrcpy(spam_action_buffer,
594 spamd_score >= spamd_threshold ? "reject" : "no action");
595 }
596
597 /* Create report. Since this is a multiline string,
598 we must hack it into shape first */
599 p = &spamd_buffer[spamd_report_offset];
600 q = spam_report_buffer;
601 while (*p != '\0')
602 {
603 /* skip \r */
604 if (*p == '\r')
605 {
606 p++;
607 continue;
608 }
609 *q++ = *p;
610 if (*p++ == '\n')
611 {
612 /* add an extra space after the newline to ensure
613 that it is treated as a header continuation line */
614 *q++ = ' ';
615 }
616 }
617 /* NULL-terminate */
618 *q-- = '\0';
619 /* cut off trailing leftovers */
620 while (*q <= ' ')
621 *q-- = '\0';
622
623 spam_report = spam_report_buffer;
624 spam_action = spam_action_buffer;
625
626 /* create spam bar */
627 spamd_score_char = spamd_score > 0 ? '+' : '-';
628 j = abs((int)(spamd_score));
629 i = 0;
630 if (j != 0)
631 while ((i < j) && (i <= MAX_SPAM_BAR_CHARS))
632 spam_bar_buffer[i++] = spamd_score_char;
633 else
634 {
635 spam_bar_buffer[0] = '/';
636 i = 1;
637 }
638 spam_bar_buffer[i] = '\0';
639 spam_bar = spam_bar_buffer;
640
641 /* create "float" spam score */
642 (void)string_format(spam_score_buffer, sizeof(spam_score_buffer),
643 "%.1f", spamd_score);
644 spam_score = spam_score_buffer;
645
646 /* create "int" spam score */
647 j = (int)((spamd_score + 0.001)*10);
648 (void)string_format(spam_score_int_buffer, sizeof(spam_score_int_buffer),
649 "%d", j);
650 spam_score_int = spam_score_int_buffer;
651
652 /* compare threshold against score */
653 spam_rc = spamd_score >= spamd_threshold
654 ? OK /* spam as determined by user's threshold */
655 : FAIL; /* not spam */
656
657 /* remember expanded spamd_address if needed */
658 if (spamd_address_work != spamd_address)
659 prev_spamd_address_work = string_copy(spamd_address_work);
660
661 /* remember user name and "been here" for it */
662 Ustrcpy(prev_user_name, user_name);
663 spam_ok = 1;
664
665 return override
666 ? OK /* always return OK, no matter what the score */
667 : spam_rc;
668
669 defer:
670 (void)fclose(mbox_file);
671 return DEFER;
672 }
673
674 #endif