Commit | Line | Data |
---|---|---|
059ec3d9 PH |
1 | /************************************************* |
2 | * Exim - an Internet mail transport agent * | |
3 | *************************************************/ | |
4 | ||
0a49a7a4 | 5 | /* Copyright (c) University of Cambridge 1995 - 2009 */ |
059ec3d9 PH |
6 | /* See the file NOTICE for conditions of use and distribution. */ |
7 | ||
8 | /* This file contains a function for decoding message header lines that may | |
9 | contain encoded "words" according to the rules described in | |
10 | ||
11 | RFC-2047 at http://www.ietf.org/rfc/rfc2047.txt | |
12 | ||
13 | The function is a rewritten version of code created by Norihisa Washitake. | |
14 | The original could be used both inside Exim (as part of a patch) or in a | |
15 | freestanding form. The original contained some built-in code conversions; I | |
16 | have chosen only to do code conversions if iconv() is supported by the OS. | |
17 | Because there were quite a lot of hacks to be done, for a variety of reasons, | |
18 | I rewrote the code. | |
19 | ||
20 | You can find the latest version of the original library at | |
21 | ||
22 | http://washitake.com/mail/exim/mime/ | |
23 | ||
24 | The code below is almost completely unlike the original. */ | |
25 | ||
26 | ||
27 | #include "exim.h" | |
28 | ||
29 | ||
30 | /************************************************* | |
31 | * Do a QP conversion * | |
32 | *************************************************/ | |
33 | ||
34 | /* This function decodes "quoted printable" into bytes. | |
35 | ||
36 | Arguments: | |
37 | string the string that includes QP escapes | |
38 | ptrptr where to return pointer to the decoded string | |
39 | ||
40 | Returns: the length of the decoded string, or -1 on failure | |
41 | */ | |
42 | ||
43 | static int | |
44 | rfc2047_qpdecode(uschar *string, uschar **ptrptr) | |
45 | { | |
46 | int len = 0; | |
47 | uschar *ptr; | |
48 | ||
49 | ptr = *ptrptr = store_get(Ustrlen(string) + 1); /* No longer than this */ | |
50 | ||
51 | while (*string != 0) | |
52 | { | |
53 | register int ch = *string++; | |
54 | ||
55 | if (ch == '_') *ptr++ = ' '; | |
56 | else if (ch == '=') | |
57 | { | |
58 | int a = *string; | |
59 | int b = (a == 0)? 0 : string[1]; | |
60 | if (!isxdigit(a) || !isxdigit(b)) return -1; /* Bad QP string */ | |
61 | *ptr++ = ((Ustrchr(hex_digits, tolower(a)) - hex_digits) << 4) + | |
62 | Ustrchr(hex_digits, tolower(b)) - hex_digits; | |
63 | string += 2; | |
64 | } | |
65 | else if (ch == ' ' || ch == '\t') return -1; /* Whitespace is illegal */ | |
66 | else *ptr++ = ch; | |
67 | ||
68 | len++; | |
69 | } | |
70 | ||
71 | *ptr = 0; | |
72 | return len; | |
73 | } | |
74 | ||
75 | ||
76 | ||
77 | /************************************************* | |
78 | * Decode next MIME word * | |
79 | *************************************************/ | |
80 | ||
81 | /* Scan a string to see if a MIME word exists; pass back the separator | |
82 | points in the string. | |
83 | ||
84 | Arguments: | |
85 | string subject string | |
86 | lencheck TRUE to enforce maximum length check | |
87 | q1ptr pass back address of first question mark | |
88 | q2ptr pass back address of second question mark | |
89 | endptr pass back address of final ?= | |
90 | dlenptr pass back length of decoded string | |
91 | dptrptr pass back pointer to decoded string | |
92 | ||
93 | Returns: address of =? or NULL if not present | |
94 | */ | |
95 | ||
96 | static uschar * | |
97 | decode_mimeword(uschar *string, BOOL lencheck, uschar **q1ptr, uschar **q2ptr, | |
98 | uschar **endptr, size_t *dlenptr, uschar **dptrptr) | |
99 | { | |
100 | uschar *mimeword; | |
101 | for (;; string = mimeword + 2) | |
102 | { | |
103 | int encoding; | |
104 | int dlen = -1; | |
105 | ||
106 | if ((mimeword = Ustrstr(string, "=?")) == NULL || | |
107 | (*q1ptr = Ustrchr(mimeword+2, '?')) == NULL || | |
108 | (*q2ptr = Ustrchr(*q1ptr+1, '?')) == NULL || | |
109 | (*endptr = Ustrstr(*q2ptr+1, "?=")) == NULL) return NULL; | |
110 | ||
111 | /* We have found =?xxx?xxx?xxx?= in the string. Optionally check the | |
112 | length, and that the second field is just one character long. If not, | |
113 | continue the loop to search again. We must start just after the initial =? | |
114 | because we might have found =?xxx=?xxx?xxx?xxx?=. */ | |
115 | ||
116 | if ((lencheck && *endptr - mimeword > 73) || *q2ptr - *q1ptr != 2) continue; | |
117 | ||
118 | /* Get the encoding letter, and decode the data string. */ | |
119 | ||
120 | encoding = toupper((*q1ptr)[1]); | |
121 | **endptr = 0; | |
122 | if (encoding == 'B') | |
123 | dlen = auth_b64decode(*q2ptr+1, dptrptr); | |
124 | else if (encoding == 'Q') | |
125 | dlen = rfc2047_qpdecode(*q2ptr+1, dptrptr); | |
126 | **endptr = '?'; /* restore */ | |
127 | ||
128 | /* If the decoding succeeded, we are done. Set the length of the decoded | |
129 | string, and pass back the initial pointer. Otherwise, the loop continues. */ | |
130 | ||
131 | if (dlen >= 0) | |
132 | { | |
133 | *dlenptr = (size_t)dlen; | |
134 | return mimeword; | |
135 | } | |
136 | } | |
137 | ||
138 | /* Control should never actually get here */ | |
139 | } | |
140 | ||
141 | ||
142 | ||
143 | /************************************************* | |
144 | * Decode and convert an RFC 2047 string * | |
145 | *************************************************/ | |
146 | ||
147 | /* There are two functions defined here. The original one was rfc2047_decode() | |
148 | and it was documented in the local_scan() interface. I needed to add an extra | |
149 | argument for use by expand_string(), so I created rfc2047_decode2() for that | |
150 | purpose. The original function became a stub that just supplies NULL for the | |
151 | new argument (sizeptr). | |
152 | ||
153 | An RFC 2047-encoded string may contain one or more "words", each of the | |
154 | form =?...?.?...?= with the first ... specifying the character code, the | |
155 | second being Q (for quoted printable) or B for Base64 encoding. The third ... | |
156 | is the actual data. | |
157 | ||
158 | This function first decodes each "word" into bytes from the Q or B encoding. | |
159 | Then, if provided with the name of a charset encoding, and if iconv() is | |
160 | available, it attempts to translate the result to the named character set. | |
161 | If this fails, the binary string is returned with an error message. | |
162 | ||
163 | If a binary zero is encountered in the decoded string, it is replaced by the | |
164 | contents of the zeroval argument. For use with Exim headers, the value must not | |
165 | be 0 because they are handled as zero-terminated strings. When zeroval==0, | |
166 | lenptr should not be NULL. | |
167 | ||
168 | Arguments: | |
169 | string the subject string | |
170 | lencheck TRUE to enforce maximum MIME word length | |
171 | target the name of the target encoding for MIME words, or NULL for | |
172 | no charset translation | |
173 | zeroval the value to use for binary zero bytes | |
174 | lenptr if not NULL, the length of the result is returned via | |
175 | this variable | |
176 | sizeptr if not NULL, the length of a new store block in which the | |
177 | result is built is placed here; if no new store is obtained, | |
178 | the value is not changed | |
179 | error for error messages; NULL if no problem; this can be set | |
180 | when the yield is non-NULL if there was a charset | |
181 | translation problem | |
182 | ||
183 | Returns: the decoded, converted string, or NULL on error; if there are | |
184 | no MIME words in the string, the original string is returned | |
185 | */ | |
186 | ||
187 | uschar * | |
188 | rfc2047_decode2(uschar *string, BOOL lencheck, uschar *target, int zeroval, | |
189 | int *lenptr, int *sizeptr, uschar **error) | |
190 | { | |
191 | int ptr = 0; | |
192 | int size = Ustrlen(string); | |
193 | size_t dlen; | |
194 | uschar *dptr, *yield; | |
195 | uschar *mimeword, *q1, *q2, *endword; | |
196 | ||
197 | *error = NULL; | |
198 | mimeword = decode_mimeword(string, lencheck, &q1, &q2, &endword, &dlen, &dptr); | |
199 | ||
200 | if (mimeword == NULL) | |
201 | { | |
202 | if (lenptr != NULL) *lenptr = size; | |
203 | return string; | |
204 | } | |
205 | ||
206 | /* Scan through the string, decoding MIME words and copying intermediate text, | |
207 | building the result as we go. The result may be longer than the input if it is | |
208 | translated into a multibyte code such as UTF-8. That's why we use the dynamic | |
209 | string building code. */ | |
210 | ||
211 | yield = store_get(++size); | |
212 | ||
213 | while (mimeword != NULL) | |
214 | { | |
215 | ||
216 | #if HAVE_ICONV | |
217 | iconv_t icd = (iconv_t)(-1); | |
218 | #endif | |
219 | ||
220 | if (mimeword != string) | |
221 | yield = string_cat(yield, &size, &ptr, string, mimeword - string); | |
222 | ||
223 | /* Do a charset translation if required. This is supported only on hosts | |
224 | that have the iconv() function. Translation errors set error, but carry on, | |
225 | using the untranslated data. If there is more than one error, the message | |
226 | passed back refers to the final one. We use a loop to cater for the case | |
227 | of long strings - the RFC puts limits on the length, but it's best to be | |
228 | robust. */ | |
229 | ||
230 | #if HAVE_ICONV | |
231 | *q1 = 0; | |
232 | if (target != NULL && strcmpic(target, mimeword+2) != 0) | |
233 | { | |
234 | icd = iconv_open(CS target, CS(mimeword+2)); | |
235 | ||
236 | if (icd == (iconv_t)(-1)) | |
237 | { | |
238 | *error = string_sprintf("iconv_open(\"%s\", \"%s\") failed: %s%s", | |
239 | target, mimeword+2, strerror(errno), | |
240 | (errno == EINVAL)? " (maybe unsupported conversion)" : ""); | |
241 | } | |
242 | } | |
243 | *q1 = '?'; | |
244 | #endif | |
245 | ||
246 | while (dlen > 0) | |
247 | { | |
248 | uschar *tptr = NULL; /* Stops compiler warning */ | |
249 | int tlen = -1; | |
250 | ||
251 | #if HAVE_ICONV | |
252 | uschar tbuffer[256]; | |
253 | uschar *outptr = tbuffer; | |
254 | size_t outleft = sizeof(tbuffer); | |
255 | ||
256 | /* If translation is required, go for it. */ | |
257 | ||
258 | if (icd != (iconv_t)(-1)) | |
259 | { | |
260 | (void)iconv(icd, (ICONV_ARG2_TYPE)(&dptr), &dlen, CSS &outptr, &outleft); | |
261 | ||
262 | /* If outptr has been adjusted, there is some output. Set up to add it to | |
263 | the output buffer. The function will have adjusted dptr and dlen. If | |
264 | iconv() stopped because of an error, we'll pick it up next time when | |
265 | there's no output. | |
266 | ||
267 | If there is no output, we expect there to have been a translation | |
268 | error, because we know there was at least one input byte. We leave the | |
269 | value of tlen as -1, which causes the rest of the input to be copied | |
270 | verbatim. */ | |
271 | ||
272 | if (outptr > tbuffer) | |
273 | { | |
274 | tptr = tbuffer; | |
275 | tlen = outptr - tbuffer; | |
276 | } | |
277 | else | |
278 | { | |
279 | DEBUG(D_any) debug_printf("iconv error translating \"%.*s\" to %s: " | |
73a46702 | 280 | "%s\n", (int)(endword + 2 - mimeword), mimeword, target, strerror(errno)); |
059ec3d9 PH |
281 | } |
282 | } | |
283 | ||
284 | #endif | |
285 | ||
286 | /* No charset translation is happening or there was a translation error; | |
287 | just set up the original as the string to be added, and mark it all used. | |
288 | */ | |
289 | ||
290 | if (tlen == -1) | |
291 | { | |
292 | tptr = dptr; | |
293 | tlen = dlen; | |
294 | dlen = 0; | |
295 | } | |
296 | ||
297 | /* Deal with zero values; convert them if requested. */ | |
298 | ||
299 | if (zeroval != 0) | |
300 | { | |
301 | int i; | |
302 | for (i = 0; i < tlen; i++) | |
303 | if (tptr[i] == 0) tptr[i] = zeroval; | |
304 | } | |
305 | ||
306 | /* Add the new string onto the result */ | |
307 | ||
308 | yield = string_cat(yield, &size, &ptr, tptr, tlen); | |
309 | } | |
310 | ||
311 | #if HAVE_ICONV | |
312 | if (icd != (iconv_t)(-1)) iconv_close(icd); | |
313 | #endif | |
314 | ||
315 | /* Update string past the MIME word; skip any white space if the next thing | |
316 | is another MIME word. */ | |
317 | ||
318 | string = endword + 2; | |
319 | mimeword = decode_mimeword(string, lencheck, &q1, &q2, &endword, &dlen, &dptr); | |
320 | if (mimeword != NULL) | |
321 | { | |
322 | uschar *s = string; | |
323 | while (isspace(*s)) s++; | |
324 | if (s == mimeword) string = s; | |
325 | } | |
326 | } | |
327 | ||
328 | /* Copy the remaining characters of the string, zero-terminate it, and return | |
329 | the length as well if requested. */ | |
330 | ||
331 | yield = string_cat(yield, &size, &ptr, string, Ustrlen(string)); | |
332 | yield[ptr] = 0; | |
333 | if (lenptr != NULL) *lenptr = ptr; | |
334 | if (sizeptr != NULL) *sizeptr = size; | |
335 | return yield; | |
336 | } | |
337 | ||
338 | ||
339 | /* This is the stub that provides the original interface without the sizeptr | |
340 | argument. */ | |
341 | ||
342 | uschar * | |
343 | rfc2047_decode(uschar *string, BOOL lencheck, uschar *target, int zeroval, | |
344 | int *lenptr, uschar **error) | |
345 | { | |
346 | return rfc2047_decode2(string, lencheck, target, zeroval, lenptr, NULL, error); | |
347 | } | |
348 | ||
349 | /* End of rfc2047.c */ |