| 1 | /* $Cambridge: exim/src/src/regex.c,v 1.4 2005/02/17 11:58:26 ph10 Exp $ */ |
| 2 | |
| 3 | /************************************************* |
| 4 | * Exim - an Internet mail transport agent * |
| 5 | *************************************************/ |
| 6 | |
| 7 | /* Copyright (c) Tom Kistner <tom@duncanthrax.net> 2003-???? */ |
| 8 | /* License: GPL */ |
| 9 | |
| 10 | /* Code for matching regular expressions against headers and body. |
| 11 | Called from acl.c. */ |
| 12 | |
| 13 | #include "exim.h" |
| 14 | #ifdef WITH_CONTENT_SCAN |
| 15 | #include <unistd.h> |
| 16 | #include <sys/mman.h> |
| 17 | |
| 18 | /* Structure to hold a list of Regular expressions */ |
| 19 | typedef struct pcre_list { |
| 20 | pcre *re; |
| 21 | uschar *pcre_text; |
| 22 | struct pcre_list *next; |
| 23 | } pcre_list; |
| 24 | |
| 25 | uschar regex_match_string_buffer[1024]; |
| 26 | |
| 27 | extern FILE *mime_stream; |
| 28 | extern uschar *mime_current_boundary; |
| 29 | |
| 30 | int regex(uschar **listptr) { |
| 31 | int sep = 0; |
| 32 | uschar *list = *listptr; |
| 33 | uschar *regex_string; |
| 34 | uschar regex_string_buffer[1024]; |
| 35 | unsigned long mbox_size; |
| 36 | FILE *mbox_file; |
| 37 | pcre *re; |
| 38 | pcre_list *re_list_head = NULL; |
| 39 | pcre_list *re_list_item; |
| 40 | const char *pcre_error; |
| 41 | int pcre_erroffset; |
| 42 | uschar *linebuffer; |
| 43 | long f_pos = 0; |
| 44 | |
| 45 | /* reset expansion variable */ |
| 46 | regex_match_string = NULL; |
| 47 | |
| 48 | if (mime_stream == NULL) { |
| 49 | /* We are in the DATA ACL */ |
| 50 | mbox_file = spool_mbox(&mbox_size); |
| 51 | if (mbox_file == NULL) { |
| 52 | /* error while spooling */ |
| 53 | log_write(0, LOG_MAIN|LOG_PANIC, |
| 54 | "regex acl condition: error while creating mbox spool file"); |
| 55 | return DEFER; |
| 56 | }; |
| 57 | } |
| 58 | else { |
| 59 | f_pos = ftell(mime_stream); |
| 60 | mbox_file = mime_stream; |
| 61 | }; |
| 62 | |
| 63 | /* precompile our regexes */ |
| 64 | while ((regex_string = string_nextinlist(&list, &sep, |
| 65 | regex_string_buffer, |
| 66 | sizeof(regex_string_buffer))) != NULL) { |
| 67 | |
| 68 | /* parse option */ |
| 69 | if ( (strcmpic(regex_string,US"false") == 0) || |
| 70 | (Ustrcmp(regex_string,"0") == 0) ) { |
| 71 | /* explicitly no matching */ |
| 72 | continue; |
| 73 | }; |
| 74 | |
| 75 | /* compile our regular expression */ |
| 76 | re = pcre_compile( CS regex_string, |
| 77 | 0, |
| 78 | &pcre_error, |
| 79 | &pcre_erroffset, |
| 80 | NULL ); |
| 81 | |
| 82 | if (re == NULL) { |
| 83 | log_write(0, LOG_MAIN, |
| 84 | "regex acl condition warning - error in regex '%s': %s at offset %d, skipped.", regex_string, pcre_error, pcre_erroffset); |
| 85 | continue; |
| 86 | } |
| 87 | else { |
| 88 | re_list_item = store_get(sizeof(pcre_list)); |
| 89 | re_list_item->re = re; |
| 90 | re_list_item->pcre_text = string_copy(regex_string); |
| 91 | re_list_item->next = re_list_head; |
| 92 | re_list_head = re_list_item; |
| 93 | }; |
| 94 | }; |
| 95 | |
| 96 | /* no regexes -> nothing to do */ |
| 97 | if (re_list_head == NULL) { |
| 98 | return FAIL; |
| 99 | }; |
| 100 | |
| 101 | /* match each line against all regexes */ |
| 102 | linebuffer = store_get(32767); |
| 103 | while (fgets(CS linebuffer, 32767, mbox_file) != NULL) { |
| 104 | if ( (mime_stream != NULL) && (mime_current_boundary != NULL) ) { |
| 105 | /* check boundary */ |
| 106 | if (Ustrncmp(linebuffer,"--",2) == 0) { |
| 107 | if (Ustrncmp((linebuffer+2),mime_current_boundary,Ustrlen(mime_current_boundary)) == 0) |
| 108 | /* found boundary */ |
| 109 | break; |
| 110 | }; |
| 111 | }; |
| 112 | re_list_item = re_list_head; |
| 113 | do { |
| 114 | /* try matcher on the line */ |
| 115 | if (pcre_exec(re_list_item->re, NULL, CS linebuffer, |
| 116 | (int)Ustrlen(linebuffer), 0, 0, NULL, 0) >= 0) { |
| 117 | Ustrncpy(regex_match_string_buffer, re_list_item->pcre_text, 1023); |
| 118 | regex_match_string = regex_match_string_buffer; |
| 119 | if (mime_stream == NULL) |
| 120 | fclose(mbox_file); |
| 121 | else { |
| 122 | clearerr(mime_stream); |
| 123 | fseek(mime_stream,f_pos,SEEK_SET); |
| 124 | }; |
| 125 | return OK; |
| 126 | }; |
| 127 | re_list_item = re_list_item->next; |
| 128 | } while (re_list_item != NULL); |
| 129 | }; |
| 130 | |
| 131 | if (mime_stream == NULL) |
| 132 | fclose(mbox_file); |
| 133 | else { |
| 134 | clearerr(mime_stream); |
| 135 | fseek(mime_stream,f_pos,SEEK_SET); |
| 136 | }; |
| 137 | |
| 138 | /* no matches ... */ |
| 139 | return FAIL; |
| 140 | } |
| 141 | |
| 142 | |
| 143 | int mime_regex(uschar **listptr) { |
| 144 | int sep = 0; |
| 145 | uschar *list = *listptr; |
| 146 | uschar *regex_string; |
| 147 | uschar regex_string_buffer[1024]; |
| 148 | pcre *re; |
| 149 | pcre_list *re_list_head = NULL; |
| 150 | pcre_list *re_list_item; |
| 151 | const char *pcre_error; |
| 152 | int pcre_erroffset; |
| 153 | FILE *f; |
| 154 | uschar *mime_subject = NULL; |
| 155 | int mime_subject_len = 0; |
| 156 | |
| 157 | /* reset expansion variable */ |
| 158 | regex_match_string = NULL; |
| 159 | |
| 160 | /* precompile our regexes */ |
| 161 | while ((regex_string = string_nextinlist(&list, &sep, |
| 162 | regex_string_buffer, |
| 163 | sizeof(regex_string_buffer))) != NULL) { |
| 164 | |
| 165 | /* parse option */ |
| 166 | if ( (strcmpic(regex_string,US"false") == 0) || |
| 167 | (Ustrcmp(regex_string,"0") == 0) ) { |
| 168 | /* explicitly no matching */ |
| 169 | continue; |
| 170 | }; |
| 171 | |
| 172 | /* compile our regular expression */ |
| 173 | re = pcre_compile( CS regex_string, |
| 174 | 0, |
| 175 | &pcre_error, |
| 176 | &pcre_erroffset, |
| 177 | NULL ); |
| 178 | |
| 179 | if (re == NULL) { |
| 180 | log_write(0, LOG_MAIN, |
| 181 | "regex acl condition warning - error in regex '%s': %s at offset %d, skipped.", regex_string, pcre_error, pcre_erroffset); |
| 182 | continue; |
| 183 | } |
| 184 | else { |
| 185 | re_list_item = store_get(sizeof(pcre_list)); |
| 186 | re_list_item->re = re; |
| 187 | re_list_item->pcre_text = string_copy(regex_string); |
| 188 | re_list_item->next = re_list_head; |
| 189 | re_list_head = re_list_item; |
| 190 | }; |
| 191 | }; |
| 192 | |
| 193 | /* no regexes -> nothing to do */ |
| 194 | if (re_list_head == NULL) { |
| 195 | return FAIL; |
| 196 | }; |
| 197 | |
| 198 | /* check if the file is already decoded */ |
| 199 | if (mime_decoded_filename == NULL) { |
| 200 | uschar *empty = US""; |
| 201 | /* no, decode it first */ |
| 202 | mime_decode(&empty); |
| 203 | if (mime_decoded_filename == NULL) { |
| 204 | /* decoding failed */ |
| 205 | log_write(0, LOG_MAIN, |
| 206 | "mime_regex acl condition warning - could not decode MIME part to file."); |
| 207 | return DEFER; |
| 208 | }; |
| 209 | }; |
| 210 | |
| 211 | |
| 212 | /* open file */ |
| 213 | f = fopen(CS mime_decoded_filename, "r"); |
| 214 | if (f == NULL) { |
| 215 | /* open failed */ |
| 216 | log_write(0, LOG_MAIN, |
| 217 | "mime_regex acl condition warning - can't open '%s' for reading.", mime_decoded_filename); |
| 218 | return DEFER; |
| 219 | }; |
| 220 | |
| 221 | /* get 32k memory */ |
| 222 | mime_subject = (uschar *)store_get(32767); |
| 223 | |
| 224 | /* read max 32k chars from file */ |
| 225 | mime_subject_len = fread(mime_subject, 1, 32766, f); |
| 226 | |
| 227 | re_list_item = re_list_head; |
| 228 | do { |
| 229 | /* try matcher on the mmapped file */ |
| 230 | debug_printf("Matching '%s'\n", re_list_item->pcre_text); |
| 231 | if (pcre_exec(re_list_item->re, NULL, CS mime_subject, |
| 232 | mime_subject_len, 0, 0, NULL, 0) >= 0) { |
| 233 | Ustrncpy(regex_match_string_buffer, re_list_item->pcre_text, 1023); |
| 234 | regex_match_string = regex_match_string_buffer; |
| 235 | fclose(f); |
| 236 | return OK; |
| 237 | }; |
| 238 | re_list_item = re_list_item->next; |
| 239 | } while (re_list_item != NULL); |
| 240 | |
| 241 | fclose(f); |
| 242 | |
| 243 | /* no matches ... */ |
| 244 | return FAIL; |
| 245 | } |
| 246 | |
| 247 | #endif |