fix non-bw case, late slip not re-run by my earlier tests
[exim.git] / src / src / mime.c
1 /*************************************************
2 * Exim - an Internet mail transport agent *
3 *************************************************/
4
5 /* Copyright (c) Tom Kistner <tom@duncanthrax.net> 2004 */
6 /* License: GPL */
7
8 #include "exim.h"
9 #ifdef WITH_CONTENT_SCAN
10 #include "mime.h"
11 #include <sys/stat.h>
12
13 FILE *mime_stream = NULL;
14 uschar *mime_current_boundary = NULL;
15
16 /*************************************************
17 * set MIME anomaly level + text *
18 *************************************************/
19
20 /* Small wrapper to set the two expandables which
21 give info on detected "problems" in MIME
22 encodings. Those are defined in mime.h. */
23
24 void mime_set_anomaly(int level, const char *text) {
25 mime_anomaly_level = level;
26 mime_anomaly_text = CUS text;
27 }
28
29
30 /*************************************************
31 * decode quoted-printable chars *
32 *************************************************/
33
34 /* gets called when we hit a =
35 returns: new pointer position
36 result code in c:
37 -2 - decode error
38 -1 - soft line break, no char
39 0-255 - char to write
40 */
41
42 uschar *mime_decode_qp_char(uschar *qp_p, int *c) {
43 uschar *initial_pos = qp_p;
44
45 /* advance one char */
46 qp_p++;
47
48 /* Check for two hex digits and decode them */
49 if (isxdigit(*qp_p) && isxdigit(qp_p[1])) {
50 /* Do hex conversion */
51 if (isdigit(*qp_p)) {*c = *qp_p - '0';}
52 else {*c = toupper(*qp_p) - 'A' + 10;};
53 *c <<= 4;
54 if (isdigit(qp_p[1])) {*c |= qp_p[1] - '0';}
55 else {*c |= toupper(qp_p[1]) - 'A' + 10;};
56 return qp_p + 2;
57 };
58
59 /* tab or whitespace may follow just ignore it if it precedes \n */
60 while (*qp_p == '\t' || *qp_p == ' ' || *qp_p == '\r')
61 qp_p++;
62
63 if (*qp_p == '\n') {
64 /* hit soft line break */
65 *c = -1;
66 return qp_p;
67 };
68
69 /* illegal char here */
70 *c = -2;
71 return initial_pos;
72 }
73
74
75 /* just dump MIME part without any decoding */
76 static ssize_t
77 mime_decode_asis(FILE* in, FILE* out, uschar* boundary)
78 {
79 ssize_t len, size = 0;
80 uschar buffer[MIME_MAX_LINE_LENGTH];
81
82 while(fgets(CS buffer, MIME_MAX_LINE_LENGTH, mime_stream) != NULL) {
83 if (boundary != NULL
84 && Ustrncmp(buffer, "--", 2) == 0
85 && Ustrncmp((buffer+2), boundary, Ustrlen(boundary)) == 0
86 )
87 break;
88
89 len = Ustrlen(buffer);
90 if (fwrite(buffer, 1, (size_t)len, out) < len)
91 return -1;
92 size += len;
93 } /* while */
94 return size;
95 }
96
97
98 /* decode base64 MIME part */
99 static ssize_t
100 mime_decode_base64(FILE* in, FILE* out, uschar* boundary)
101 {
102 uschar ibuf[MIME_MAX_LINE_LENGTH], obuf[MIME_MAX_LINE_LENGTH];
103 uschar *ipos, *opos;
104 ssize_t len, size = 0;
105 int bytestate = 0;
106
107 opos = obuf;
108
109 while (Ufgets(ibuf, MIME_MAX_LINE_LENGTH, in) != NULL)
110 {
111 if (boundary != NULL
112 && Ustrncmp(ibuf, "--", 2) == 0
113 && Ustrncmp((ibuf+2), boundary, Ustrlen(boundary)) == 0
114 )
115 break;
116
117 for (ipos = ibuf ; *ipos != '\r' && *ipos != '\n' && *ipos != 0; ++ipos) {
118 /* skip padding */
119 if (*ipos == '=') {
120 ++bytestate;
121 continue;
122 }
123 /* skip bad characters */
124 if (mime_b64[*ipos] == 128) {
125 mime_set_anomaly(MIME_ANOMALY_BROKEN_BASE64);
126 continue;
127 }
128 /* simple state-machine */
129 switch((bytestate++) & 3) {
130 case 0:
131 *opos = mime_b64[*ipos] << 2;
132 break;
133 case 1:
134 *opos |= mime_b64[*ipos] >> 4;
135 ++opos;
136 *opos = mime_b64[*ipos] << 4;
137 break;
138 case 2:
139 *opos |= mime_b64[*ipos] >> 2;
140 ++opos;
141 *opos = mime_b64[*ipos] << 6;
142 break;
143 case 3:
144 *opos |= mime_b64[*ipos];
145 ++opos;
146 break;
147 } /* switch */
148 } /* for */
149 /* something to write? */
150 len = opos - obuf;
151 if (len > 0) {
152 if (fwrite(obuf, 1, len, out) != len)
153 return -1; /* error */
154 size += len;
155 /* copy incomplete last byte to start of obuf, where we continue */
156 if ((bytestate & 3) != 0)
157 *obuf = *opos;
158 opos = obuf;
159 }
160 } /* while */
161
162 /* write out last byte if it was incomplete */
163 if (bytestate & 3) {
164 if (fwrite(obuf, 1, 1, out) != 1)
165 return -1;
166 ++size;
167 }
168
169 return size;
170 }
171
172
173 /* decode quoted-printable MIME part */
174 static ssize_t
175 mime_decode_qp(FILE* in, FILE* out, uschar* boundary)
176 {
177 uschar ibuf[MIME_MAX_LINE_LENGTH], obuf[MIME_MAX_LINE_LENGTH];
178 uschar *ipos, *opos;
179 ssize_t len, size = 0;
180
181 while (fgets(CS ibuf, MIME_MAX_LINE_LENGTH, in) != NULL)
182 {
183 if (boundary != NULL
184 && Ustrncmp(ibuf, "--", 2) == 0
185 && Ustrncmp((ibuf+2), boundary, Ustrlen(boundary)) == 0
186 )
187 break; /* todo: check for missing boundary */
188
189 ipos = ibuf;
190 opos = obuf;
191
192 while (*ipos != 0) {
193 if (*ipos == '=') {
194 int decode_qp_result;
195
196 ipos = mime_decode_qp_char(ipos, &decode_qp_result);
197
198 if (decode_qp_result == -2) {
199 /* Error from decoder. ipos is unchanged. */
200 mime_set_anomaly(MIME_ANOMALY_BROKEN_QP);
201 *opos = '=';
202 ++opos;
203 ++ipos;
204 }
205 else if (decode_qp_result == -1) {
206 break;
207 }
208 else if (decode_qp_result >= 0) {
209 *opos = decode_qp_result;
210 ++opos;
211 }
212 }
213 else {
214 *opos = *ipos;
215 ++opos;
216 ++ipos;
217 }
218 }
219 /* something to write? */
220 len = opos - obuf;
221 if (len > 0) {
222 if (fwrite(obuf, 1, len, out) != len)
223 return -1; /* error */
224 size += len;
225 }
226 }
227 return size;
228 }
229
230
231 FILE *mime_get_decode_file(uschar *pname, uschar *fname) {
232 FILE *f = NULL;
233 uschar *filename;
234
235 filename = (uschar *)malloc(2048);
236
237 if ((pname != NULL) && (fname != NULL)) {
238 (void)string_format(filename, 2048, "%s/%s", pname, fname);
239 f = modefopen(filename,"wb+",SPOOL_MODE);
240 }
241 else if (pname == NULL) {
242 f = modefopen(fname,"wb+",SPOOL_MODE);
243 }
244 else if (fname == NULL) {
245 int file_nr = 0;
246 int result = 0;
247
248 /* must find first free sequential filename */
249 do {
250 struct stat mystat;
251 (void)string_format(filename,2048,"%s/%s-%05u", pname, message_id, file_nr);
252 file_nr++;
253 /* security break */
254 if (file_nr >= 1024)
255 break;
256 result = stat(CS filename,&mystat);
257 }
258 while(result != -1);
259 f = modefopen(filename,"wb+",SPOOL_MODE);
260 };
261
262 /* set expansion variable */
263 mime_decoded_filename = filename;
264
265 return f;
266 }
267
268
269 int mime_decode(uschar **listptr) {
270 int sep = 0;
271 uschar *list = *listptr;
272 uschar *option;
273 uschar option_buffer[1024];
274 uschar decode_path[1024];
275 FILE *decode_file = NULL;
276 long f_pos = 0;
277 ssize_t size_counter = 0;
278 ssize_t (*decode_function)(FILE*, FILE*, uschar*);
279
280 if (mime_stream == NULL)
281 return FAIL;
282
283 f_pos = ftell(mime_stream);
284
285 /* build default decode path (will exist since MBOX must be spooled up) */
286 (void)string_format(decode_path,1024,"%s/scan/%s",spool_directory,message_id);
287
288 /* try to find 1st option */
289 if ((option = string_nextinlist(&list, &sep,
290 option_buffer,
291 sizeof(option_buffer))) != NULL) {
292
293 /* parse 1st option */
294 if ( (Ustrcmp(option,"false") == 0) || (Ustrcmp(option,"0") == 0) ) {
295 /* explicitly no decoding */
296 return FAIL;
297 };
298
299 if (Ustrcmp(option,"default") == 0) {
300 /* explicit default path + file names */
301 goto DEFAULT_PATH;
302 };
303
304 if (option[0] == '/') {
305 struct stat statbuf;
306
307 memset(&statbuf,0,sizeof(statbuf));
308
309 /* assume either path or path+file name */
310 if ( (stat(CS option, &statbuf) == 0) && S_ISDIR(statbuf.st_mode) )
311 /* is directory, use it as decode_path */
312 decode_file = mime_get_decode_file(option, NULL);
313 else
314 /* does not exist or is a file, use as full file name */
315 decode_file = mime_get_decode_file(NULL, option);
316 }
317 else
318 /* assume file name only, use default path */
319 decode_file = mime_get_decode_file(decode_path, option);
320 }
321 else
322 /* no option? patch default path */
323 DEFAULT_PATH: decode_file = mime_get_decode_file(decode_path, NULL);
324
325 if (decode_file == NULL)
326 return DEFER;
327
328 /* decode according to mime type */
329 if (mime_content_transfer_encoding == NULL)
330 /* no encoding, dump as-is */
331 decode_function = mime_decode_asis;
332 else if (Ustrcmp(mime_content_transfer_encoding, "base64") == 0)
333 decode_function = mime_decode_base64;
334 else if (Ustrcmp(mime_content_transfer_encoding, "quoted-printable") == 0)
335 decode_function = mime_decode_qp;
336 else
337 /* unknown encoding type, just dump as-is */
338 decode_function = mime_decode_asis;
339
340 size_counter = decode_function(mime_stream, decode_file, mime_current_boundary);
341
342 clearerr(mime_stream);
343 fseek(mime_stream, f_pos, SEEK_SET);
344
345 if (fclose(decode_file) != 0 || size_counter < 0)
346 return DEFER;
347
348 /* round up to the next KiB */
349 mime_content_size = (size_counter + 1023) / 1024;
350
351 return OK;
352 }
353
354 int mime_get_header(FILE *f, uschar *header) {
355 int c = EOF;
356 int done = 0;
357 int header_value_mode = 0;
358 int header_open_brackets = 0;
359 int num_copied = 0;
360
361 while(!done) {
362
363 c = fgetc(f);
364 if (c == EOF) break;
365
366 /* always skip CRs */
367 if (c == '\r') continue;
368
369 if (c == '\n') {
370 if (num_copied > 0) {
371 /* look if next char is '\t' or ' ' */
372 c = fgetc(f);
373 if (c == EOF) break;
374 if ( (c == '\t') || (c == ' ') ) continue;
375 (void)ungetc(c,f);
376 };
377 /* end of the header, terminate with ';' */
378 c = ';';
379 done = 1;
380 };
381
382 /* skip control characters */
383 if (c < 32) continue;
384
385 if (header_value_mode) {
386 /* --------- value mode ----------- */
387 /* skip leading whitespace */
388 if ( ((c == '\t') || (c == ' ')) && (header_value_mode == 1) )
389 continue;
390
391 /* we have hit a non-whitespace char, start copying value data */
392 header_value_mode = 2;
393
394 /* skip quotes */
395 if (c == '"') continue;
396
397 /* leave value mode on ';' */
398 if (c == ';') {
399 header_value_mode = 0;
400 };
401 /* -------------------------------- */
402 }
403 else {
404 /* -------- non-value mode -------- */
405 /* skip whitespace + tabs */
406 if ( (c == ' ') || (c == '\t') )
407 continue;
408 if (c == '\\') {
409 /* quote next char. can be used
410 to escape brackets. */
411 c = fgetc(f);
412 if (c == EOF) break;
413 }
414 else if (c == '(') {
415 header_open_brackets++;
416 continue;
417 }
418 else if ((c == ')') && header_open_brackets) {
419 header_open_brackets--;
420 continue;
421 }
422 else if ( (c == '=') && !header_open_brackets ) {
423 /* enter value mode */
424 header_value_mode = 1;
425 };
426
427 /* skip chars while we are in a comment */
428 if (header_open_brackets > 0)
429 continue;
430 /* -------------------------------- */
431 };
432
433 /* copy the char to the buffer */
434 header[num_copied] = (uschar)c;
435 /* raise counter */
436 num_copied++;
437
438 /* break if header buffer is full */
439 if (num_copied > MIME_MAX_HEADER_SIZE-1) {
440 done = 1;
441 };
442 };
443
444 if ((num_copied > 0) && (header[num_copied-1] != ';')) {
445 header[num_copied-1] = ';';
446 };
447
448 /* 0-terminate */
449 header[num_copied] = '\0';
450
451 /* return 0 for EOF or empty line */
452 if ((c == EOF) || (num_copied == 1))
453 return 0;
454 else
455 return 1;
456 }
457
458
459 int mime_acl_check(uschar *acl, FILE *f, struct mime_boundary_context *context,
460 uschar **user_msgptr, uschar **log_msgptr) {
461 int rc = OK;
462 uschar *header = NULL;
463 struct mime_boundary_context nested_context;
464
465 /* reserve a line buffer to work in */
466 header = (uschar *)malloc(MIME_MAX_HEADER_SIZE+1);
467 if (header == NULL) {
468 log_write(0, LOG_PANIC,
469 "MIME ACL: can't allocate %d bytes of memory.", MIME_MAX_HEADER_SIZE+1);
470 return DEFER;
471 };
472
473 /* Not actually used at the moment, but will be vital to fixing
474 * some RFC 2046 nonconformance later... */
475 nested_context.parent = context;
476
477 /* loop through parts */
478 while(1) {
479
480 /* reset all per-part mime variables */
481 mime_anomaly_level = 0;
482 mime_anomaly_text = NULL;
483 mime_boundary = NULL;
484 mime_charset = NULL;
485 mime_decoded_filename = NULL;
486 mime_filename = NULL;
487 mime_content_description = NULL;
488 mime_content_disposition = NULL;
489 mime_content_id = NULL;
490 mime_content_transfer_encoding = NULL;
491 mime_content_type = NULL;
492 mime_is_multipart = 0;
493 mime_content_size = 0;
494
495 /*
496 If boundary is null, we assume that *f is positioned on the start of headers (for example,
497 at the very beginning of a message.
498 If a boundary is given, we must first advance to it to reach the start of the next header
499 block.
500 */
501
502 /* NOTE -- there's an error here -- RFC2046 specifically says to
503 * check for outer boundaries. This code doesn't do that, and
504 * I haven't fixed this.
505 *
506 * (I have moved partway towards adding support, however, by adding
507 * a "parent" field to my new boundary-context structure.)
508 */
509 if (context != NULL) {
510 while(fgets(CS header, MIME_MAX_HEADER_SIZE, f) != NULL) {
511 /* boundary line must start with 2 dashes */
512 if (Ustrncmp(header,"--",2) == 0) {
513 if (Ustrncmp((header+2),context->boundary,Ustrlen(context->boundary)) == 0) {
514 /* found boundary */
515 if (Ustrncmp((header+2+Ustrlen(context->boundary)),"--",2) == 0) {
516 /* END boundary found */
517 debug_printf("End boundary found %s\n", context->boundary);
518 return rc;
519 }
520 else {
521 debug_printf("Next part with boundary %s\n", context->boundary);
522 };
523 /* can't use break here */
524 goto DECODE_HEADERS;
525 }
526 };
527 }
528 /* Hit EOF or read error. Ugh. */
529 debug_printf("Hit EOF ...\n");
530 return rc;
531 };
532
533 DECODE_HEADERS:
534 /* parse headers, set up expansion variables */
535 while(mime_get_header(f,header)) {
536 int i;
537 /* loop through header list */
538 for (i = 0; i < mime_header_list_size; i++) {
539 uschar *header_value = NULL;
540 int header_value_len = 0;
541
542 /* found an interesting header? */
543 if (strncmpic(mime_header_list[i].name,header,mime_header_list[i].namelen) == 0) {
544 uschar *p = header + mime_header_list[i].namelen;
545 /* yes, grab the value (normalize to lower case)
546 and copy to its corresponding expansion variable */
547 while(*p != ';') {
548 *p = tolower(*p);
549 p++;
550 };
551 header_value_len = (p - (header + mime_header_list[i].namelen));
552 header_value = (uschar *)malloc(header_value_len+1);
553 memset(header_value,0,header_value_len+1);
554 p = header + mime_header_list[i].namelen;
555 Ustrncpy(header_value, p, header_value_len);
556 debug_printf("Found %s MIME header, value is '%s'\n", mime_header_list[i].name, header_value);
557 *((uschar **)(mime_header_list[i].value)) = header_value;
558
559 /* make p point to the next character after the closing ';' */
560 p += (header_value_len+1);
561
562 /* grab all param=value tags on the remaining line, check if they are interesting */
563 NEXT_PARAM_SEARCH: while (*p != 0) {
564 int j;
565 for (j = 0; j < mime_parameter_list_size; j++) {
566 uschar *param_value = NULL;
567 int param_value_len = 0;
568
569 /* found an interesting parameter? */
570 if (strncmpic(mime_parameter_list[j].name,p,mime_parameter_list[j].namelen) == 0) {
571 uschar *q = p + mime_parameter_list[j].namelen;
572 /* yes, grab the value and copy to its corresponding expansion variable */
573 while(*q != ';') q++;
574 param_value_len = (q - (p + mime_parameter_list[j].namelen));
575 param_value = (uschar *)malloc(param_value_len+1);
576 memset(param_value,0,param_value_len+1);
577 q = p + mime_parameter_list[j].namelen;
578 Ustrncpy(param_value, q, param_value_len);
579 param_value = rfc2047_decode(param_value, check_rfc2047_length, NULL, 32, &param_value_len, &q);
580 debug_printf("Found %s MIME parameter in %s header, value is '%s'\n", mime_parameter_list[j].name, mime_header_list[i].name, param_value);
581 *((uschar **)(mime_parameter_list[j].value)) = param_value;
582 p += (mime_parameter_list[j].namelen + param_value_len + 1);
583 goto NEXT_PARAM_SEARCH;
584 };
585 }
586 /* There is something, but not one of our interesting parameters.
587 Advance to the next semicolon */
588 while(*p != ';') p++;
589 p++;
590 };
591 };
592 };
593 };
594
595 /* set additional flag variables (easier access) */
596 if ( (mime_content_type != NULL) &&
597 (Ustrncmp(mime_content_type,"multipart",9) == 0) )
598 mime_is_multipart = 1;
599
600 /* Make a copy of the boundary pointer.
601 Required since mime_boundary is global
602 and can be overwritten further down in recursion */
603 nested_context.boundary = mime_boundary;
604
605 /* raise global counter */
606 mime_part_count++;
607
608 /* copy current file handle to global variable */
609 mime_stream = f;
610 mime_current_boundary = context ? context->boundary : 0;
611
612 /* Note the context */
613 mime_is_coverletter = !(context && context->context == MBC_ATTACHMENT);
614
615 /* call ACL handling function */
616 rc = acl_check(ACL_WHERE_MIME, NULL, acl, user_msgptr, log_msgptr);
617
618 mime_stream = NULL;
619 mime_current_boundary = NULL;
620
621 if (rc != OK) break;
622
623 /* If we have a multipart entity and a boundary, go recursive */
624 if ( (mime_content_type != NULL) &&
625 (nested_context.boundary != NULL) &&
626 (Ustrncmp(mime_content_type,"multipart",9) == 0) ) {
627 debug_printf("Entering multipart recursion, boundary '%s'\n", nested_context.boundary);
628
629 if (context && context->context == MBC_ATTACHMENT)
630 nested_context.context = MBC_ATTACHMENT;
631 else if (!Ustrcmp(mime_content_type,"multipart/alternative")
632 || !Ustrcmp(mime_content_type,"multipart/related"))
633 nested_context.context = MBC_COVERLETTER_ALL;
634 else
635 nested_context.context = MBC_COVERLETTER_ONESHOT;
636
637 rc = mime_acl_check(acl, f, &nested_context, user_msgptr, log_msgptr);
638 if (rc != OK) break;
639 }
640 else if ( (mime_content_type != NULL) &&
641 (Ustrncmp(mime_content_type,"message/rfc822",14) == 0) ) {
642 uschar *rfc822name = NULL;
643 uschar filename[2048];
644 int file_nr = 0;
645 int result = 0;
646
647 /* must find first free sequential filename */
648 do {
649 struct stat mystat;
650 (void)string_format(filename,2048,"%s/scan/%s/__rfc822_%05u", spool_directory, message_id, file_nr);
651 file_nr++;
652 /* security break */
653 if (file_nr >= 128)
654 goto NO_RFC822;
655 result = stat(CS filename,&mystat);
656 }
657 while(result != -1);
658
659 rfc822name = filename;
660
661 /* decode RFC822 attachment */
662 mime_decoded_filename = NULL;
663 mime_stream = f;
664 mime_current_boundary = context ? context->boundary : NULL;
665 mime_decode(&rfc822name);
666 mime_stream = NULL;
667 mime_current_boundary = NULL;
668 if (mime_decoded_filename == NULL) {
669 /* decoding failed */
670 log_write(0, LOG_MAIN,
671 "mime_regex acl condition warning - could not decode RFC822 MIME part to file.");
672 return DEFER;
673 };
674 mime_decoded_filename = NULL;
675 };
676
677 NO_RFC822:
678 /* If the boundary of this instance is NULL, we are finished here */
679 if (context == NULL) break;
680
681 if (context->context == MBC_COVERLETTER_ONESHOT)
682 context->context = MBC_ATTACHMENT;
683
684 };
685
686 return rc;
687 }
688
689 #endif