| 1 | /* $Cambridge: exim/src/src/rfc2047.c,v 1.2 2005/01/04 10:00:42 ph10 Exp $ */ |
| 2 | |
| 3 | /************************************************* |
| 4 | * Exim - an Internet mail transport agent * |
| 5 | *************************************************/ |
| 6 | |
| 7 | /* Copyright (c) University of Cambridge 1995 - 2005 */ |
| 8 | /* See the file NOTICE for conditions of use and distribution. */ |
| 9 | |
| 10 | /* This file contains a function for decoding message header lines that may |
| 11 | contain encoded "words" according to the rules described in |
| 12 | |
| 13 | RFC-2047 at http://www.ietf.org/rfc/rfc2047.txt |
| 14 | |
| 15 | The function is a rewritten version of code created by Norihisa Washitake. |
| 16 | The original could be used both inside Exim (as part of a patch) or in a |
| 17 | freestanding form. The original contained some built-in code conversions; I |
| 18 | have chosen only to do code conversions if iconv() is supported by the OS. |
| 19 | Because there were quite a lot of hacks to be done, for a variety of reasons, |
| 20 | I rewrote the code. |
| 21 | |
| 22 | You can find the latest version of the original library at |
| 23 | |
| 24 | http://washitake.com/mail/exim/mime/ |
| 25 | |
| 26 | The code below is almost completely unlike the original. */ |
| 27 | |
| 28 | |
| 29 | #include "exim.h" |
| 30 | |
| 31 | |
| 32 | /************************************************* |
| 33 | * Do a QP conversion * |
| 34 | *************************************************/ |
| 35 | |
| 36 | /* This function decodes "quoted printable" into bytes. |
| 37 | |
| 38 | Arguments: |
| 39 | string the string that includes QP escapes |
| 40 | ptrptr where to return pointer to the decoded string |
| 41 | |
| 42 | Returns: the length of the decoded string, or -1 on failure |
| 43 | */ |
| 44 | |
| 45 | static int |
| 46 | rfc2047_qpdecode(uschar *string, uschar **ptrptr) |
| 47 | { |
| 48 | int len = 0; |
| 49 | uschar *ptr; |
| 50 | |
| 51 | ptr = *ptrptr = store_get(Ustrlen(string) + 1); /* No longer than this */ |
| 52 | |
| 53 | while (*string != 0) |
| 54 | { |
| 55 | register int ch = *string++; |
| 56 | |
| 57 | if (ch == '_') *ptr++ = ' '; |
| 58 | else if (ch == '=') |
| 59 | { |
| 60 | int a = *string; |
| 61 | int b = (a == 0)? 0 : string[1]; |
| 62 | if (!isxdigit(a) || !isxdigit(b)) return -1; /* Bad QP string */ |
| 63 | *ptr++ = ((Ustrchr(hex_digits, tolower(a)) - hex_digits) << 4) + |
| 64 | Ustrchr(hex_digits, tolower(b)) - hex_digits; |
| 65 | string += 2; |
| 66 | } |
| 67 | else if (ch == ' ' || ch == '\t') return -1; /* Whitespace is illegal */ |
| 68 | else *ptr++ = ch; |
| 69 | |
| 70 | len++; |
| 71 | } |
| 72 | |
| 73 | *ptr = 0; |
| 74 | return len; |
| 75 | } |
| 76 | |
| 77 | |
| 78 | |
| 79 | /************************************************* |
| 80 | * Decode next MIME word * |
| 81 | *************************************************/ |
| 82 | |
| 83 | /* Scan a string to see if a MIME word exists; pass back the separator |
| 84 | points in the string. |
| 85 | |
| 86 | Arguments: |
| 87 | string subject string |
| 88 | lencheck TRUE to enforce maximum length check |
| 89 | q1ptr pass back address of first question mark |
| 90 | q2ptr pass back address of second question mark |
| 91 | endptr pass back address of final ?= |
| 92 | dlenptr pass back length of decoded string |
| 93 | dptrptr pass back pointer to decoded string |
| 94 | |
| 95 | Returns: address of =? or NULL if not present |
| 96 | */ |
| 97 | |
| 98 | static uschar * |
| 99 | decode_mimeword(uschar *string, BOOL lencheck, uschar **q1ptr, uschar **q2ptr, |
| 100 | uschar **endptr, size_t *dlenptr, uschar **dptrptr) |
| 101 | { |
| 102 | uschar *mimeword; |
| 103 | for (;; string = mimeword + 2) |
| 104 | { |
| 105 | int encoding; |
| 106 | int dlen = -1; |
| 107 | |
| 108 | if ((mimeword = Ustrstr(string, "=?")) == NULL || |
| 109 | (*q1ptr = Ustrchr(mimeword+2, '?')) == NULL || |
| 110 | (*q2ptr = Ustrchr(*q1ptr+1, '?')) == NULL || |
| 111 | (*endptr = Ustrstr(*q2ptr+1, "?=")) == NULL) return NULL; |
| 112 | |
| 113 | /* We have found =?xxx?xxx?xxx?= in the string. Optionally check the |
| 114 | length, and that the second field is just one character long. If not, |
| 115 | continue the loop to search again. We must start just after the initial =? |
| 116 | because we might have found =?xxx=?xxx?xxx?xxx?=. */ |
| 117 | |
| 118 | if ((lencheck && *endptr - mimeword > 73) || *q2ptr - *q1ptr != 2) continue; |
| 119 | |
| 120 | /* Get the encoding letter, and decode the data string. */ |
| 121 | |
| 122 | encoding = toupper((*q1ptr)[1]); |
| 123 | **endptr = 0; |
| 124 | if (encoding == 'B') |
| 125 | dlen = auth_b64decode(*q2ptr+1, dptrptr); |
| 126 | else if (encoding == 'Q') |
| 127 | dlen = rfc2047_qpdecode(*q2ptr+1, dptrptr); |
| 128 | **endptr = '?'; /* restore */ |
| 129 | |
| 130 | /* If the decoding succeeded, we are done. Set the length of the decoded |
| 131 | string, and pass back the initial pointer. Otherwise, the loop continues. */ |
| 132 | |
| 133 | if (dlen >= 0) |
| 134 | { |
| 135 | *dlenptr = (size_t)dlen; |
| 136 | return mimeword; |
| 137 | } |
| 138 | } |
| 139 | |
| 140 | /* Control should never actually get here */ |
| 141 | } |
| 142 | |
| 143 | |
| 144 | |
| 145 | /************************************************* |
| 146 | * Decode and convert an RFC 2047 string * |
| 147 | *************************************************/ |
| 148 | |
| 149 | /* There are two functions defined here. The original one was rfc2047_decode() |
| 150 | and it was documented in the local_scan() interface. I needed to add an extra |
| 151 | argument for use by expand_string(), so I created rfc2047_decode2() for that |
| 152 | purpose. The original function became a stub that just supplies NULL for the |
| 153 | new argument (sizeptr). |
| 154 | |
| 155 | An RFC 2047-encoded string may contain one or more "words", each of the |
| 156 | form =?...?.?...?= with the first ... specifying the character code, the |
| 157 | second being Q (for quoted printable) or B for Base64 encoding. The third ... |
| 158 | is the actual data. |
| 159 | |
| 160 | This function first decodes each "word" into bytes from the Q or B encoding. |
| 161 | Then, if provided with the name of a charset encoding, and if iconv() is |
| 162 | available, it attempts to translate the result to the named character set. |
| 163 | If this fails, the binary string is returned with an error message. |
| 164 | |
| 165 | If a binary zero is encountered in the decoded string, it is replaced by the |
| 166 | contents of the zeroval argument. For use with Exim headers, the value must not |
| 167 | be 0 because they are handled as zero-terminated strings. When zeroval==0, |
| 168 | lenptr should not be NULL. |
| 169 | |
| 170 | Arguments: |
| 171 | string the subject string |
| 172 | lencheck TRUE to enforce maximum MIME word length |
| 173 | target the name of the target encoding for MIME words, or NULL for |
| 174 | no charset translation |
| 175 | zeroval the value to use for binary zero bytes |
| 176 | lenptr if not NULL, the length of the result is returned via |
| 177 | this variable |
| 178 | sizeptr if not NULL, the length of a new store block in which the |
| 179 | result is built is placed here; if no new store is obtained, |
| 180 | the value is not changed |
| 181 | error for error messages; NULL if no problem; this can be set |
| 182 | when the yield is non-NULL if there was a charset |
| 183 | translation problem |
| 184 | |
| 185 | Returns: the decoded, converted string, or NULL on error; if there are |
| 186 | no MIME words in the string, the original string is returned |
| 187 | */ |
| 188 | |
| 189 | uschar * |
| 190 | rfc2047_decode2(uschar *string, BOOL lencheck, uschar *target, int zeroval, |
| 191 | int *lenptr, int *sizeptr, uschar **error) |
| 192 | { |
| 193 | int ptr = 0; |
| 194 | int size = Ustrlen(string); |
| 195 | size_t dlen; |
| 196 | uschar *dptr, *yield; |
| 197 | uschar *mimeword, *q1, *q2, *endword; |
| 198 | |
| 199 | *error = NULL; |
| 200 | mimeword = decode_mimeword(string, lencheck, &q1, &q2, &endword, &dlen, &dptr); |
| 201 | |
| 202 | if (mimeword == NULL) |
| 203 | { |
| 204 | if (lenptr != NULL) *lenptr = size; |
| 205 | return string; |
| 206 | } |
| 207 | |
| 208 | /* Scan through the string, decoding MIME words and copying intermediate text, |
| 209 | building the result as we go. The result may be longer than the input if it is |
| 210 | translated into a multibyte code such as UTF-8. That's why we use the dynamic |
| 211 | string building code. */ |
| 212 | |
| 213 | yield = store_get(++size); |
| 214 | |
| 215 | while (mimeword != NULL) |
| 216 | { |
| 217 | |
| 218 | #if HAVE_ICONV |
| 219 | iconv_t icd = (iconv_t)(-1); |
| 220 | #endif |
| 221 | |
| 222 | if (mimeword != string) |
| 223 | yield = string_cat(yield, &size, &ptr, string, mimeword - string); |
| 224 | |
| 225 | /* Do a charset translation if required. This is supported only on hosts |
| 226 | that have the iconv() function. Translation errors set error, but carry on, |
| 227 | using the untranslated data. If there is more than one error, the message |
| 228 | passed back refers to the final one. We use a loop to cater for the case |
| 229 | of long strings - the RFC puts limits on the length, but it's best to be |
| 230 | robust. */ |
| 231 | |
| 232 | #if HAVE_ICONV |
| 233 | *q1 = 0; |
| 234 | if (target != NULL && strcmpic(target, mimeword+2) != 0) |
| 235 | { |
| 236 | icd = iconv_open(CS target, CS(mimeword+2)); |
| 237 | |
| 238 | if (icd == (iconv_t)(-1)) |
| 239 | { |
| 240 | *error = string_sprintf("iconv_open(\"%s\", \"%s\") failed: %s%s", |
| 241 | target, mimeword+2, strerror(errno), |
| 242 | (errno == EINVAL)? " (maybe unsupported conversion)" : ""); |
| 243 | } |
| 244 | } |
| 245 | *q1 = '?'; |
| 246 | #endif |
| 247 | |
| 248 | while (dlen > 0) |
| 249 | { |
| 250 | uschar *tptr = NULL; /* Stops compiler warning */ |
| 251 | int tlen = -1; |
| 252 | |
| 253 | #if HAVE_ICONV |
| 254 | uschar tbuffer[256]; |
| 255 | uschar *outptr = tbuffer; |
| 256 | size_t outleft = sizeof(tbuffer); |
| 257 | |
| 258 | /* If translation is required, go for it. */ |
| 259 | |
| 260 | if (icd != (iconv_t)(-1)) |
| 261 | { |
| 262 | (void)iconv(icd, (ICONV_ARG2_TYPE)(&dptr), &dlen, CSS &outptr, &outleft); |
| 263 | |
| 264 | /* If outptr has been adjusted, there is some output. Set up to add it to |
| 265 | the output buffer. The function will have adjusted dptr and dlen. If |
| 266 | iconv() stopped because of an error, we'll pick it up next time when |
| 267 | there's no output. |
| 268 | |
| 269 | If there is no output, we expect there to have been a translation |
| 270 | error, because we know there was at least one input byte. We leave the |
| 271 | value of tlen as -1, which causes the rest of the input to be copied |
| 272 | verbatim. */ |
| 273 | |
| 274 | if (outptr > tbuffer) |
| 275 | { |
| 276 | tptr = tbuffer; |
| 277 | tlen = outptr - tbuffer; |
| 278 | } |
| 279 | else |
| 280 | { |
| 281 | DEBUG(D_any) debug_printf("iconv error translating \"%.*s\" to %s: " |
| 282 | "%s\n", endword + 2 - mimeword, mimeword, target, strerror(errno)); |
| 283 | } |
| 284 | } |
| 285 | |
| 286 | #endif |
| 287 | |
| 288 | /* No charset translation is happening or there was a translation error; |
| 289 | just set up the original as the string to be added, and mark it all used. |
| 290 | */ |
| 291 | |
| 292 | if (tlen == -1) |
| 293 | { |
| 294 | tptr = dptr; |
| 295 | tlen = dlen; |
| 296 | dlen = 0; |
| 297 | } |
| 298 | |
| 299 | /* Deal with zero values; convert them if requested. */ |
| 300 | |
| 301 | if (zeroval != 0) |
| 302 | { |
| 303 | int i; |
| 304 | for (i = 0; i < tlen; i++) |
| 305 | if (tptr[i] == 0) tptr[i] = zeroval; |
| 306 | } |
| 307 | |
| 308 | /* Add the new string onto the result */ |
| 309 | |
| 310 | yield = string_cat(yield, &size, &ptr, tptr, tlen); |
| 311 | } |
| 312 | |
| 313 | #if HAVE_ICONV |
| 314 | if (icd != (iconv_t)(-1)) iconv_close(icd); |
| 315 | #endif |
| 316 | |
| 317 | /* Update string past the MIME word; skip any white space if the next thing |
| 318 | is another MIME word. */ |
| 319 | |
| 320 | string = endword + 2; |
| 321 | mimeword = decode_mimeword(string, lencheck, &q1, &q2, &endword, &dlen, &dptr); |
| 322 | if (mimeword != NULL) |
| 323 | { |
| 324 | uschar *s = string; |
| 325 | while (isspace(*s)) s++; |
| 326 | if (s == mimeword) string = s; |
| 327 | } |
| 328 | } |
| 329 | |
| 330 | /* Copy the remaining characters of the string, zero-terminate it, and return |
| 331 | the length as well if requested. */ |
| 332 | |
| 333 | yield = string_cat(yield, &size, &ptr, string, Ustrlen(string)); |
| 334 | yield[ptr] = 0; |
| 335 | if (lenptr != NULL) *lenptr = ptr; |
| 336 | if (sizeptr != NULL) *sizeptr = size; |
| 337 | return yield; |
| 338 | } |
| 339 | |
| 340 | |
| 341 | /* This is the stub that provides the original interface without the sizeptr |
| 342 | argument. */ |
| 343 | |
| 344 | uschar * |
| 345 | rfc2047_decode(uschar *string, BOOL lencheck, uschar *target, int zeroval, |
| 346 | int *lenptr, uschar **error) |
| 347 | { |
| 348 | return rfc2047_decode2(string, lencheck, target, zeroval, lenptr, NULL, error); |
| 349 | } |
| 350 | |
| 351 | /* End of rfc2047.c */ |