Commit | Line | Data |
---|---|---|
059ec3d9 PH |
1 | /************************************************* |
2 | * Exim - an Internet mail transport agent * | |
3 | *************************************************/ | |
4 | ||
f9ba5e22 | 5 | /* Copyright (c) University of Cambridge 1995 - 2018 */ |
059ec3d9 PH |
6 | /* See the file NOTICE for conditions of use and distribution. */ |
7 | ||
8 | /* This file contains a function for decoding message header lines that may | |
9 | contain encoded "words" according to the rules described in | |
10 | ||
11 | RFC-2047 at http://www.ietf.org/rfc/rfc2047.txt | |
12 | ||
13 | The function is a rewritten version of code created by Norihisa Washitake. | |
14 | The original could be used both inside Exim (as part of a patch) or in a | |
15 | freestanding form. The original contained some built-in code conversions; I | |
16 | have chosen only to do code conversions if iconv() is supported by the OS. | |
17 | Because there were quite a lot of hacks to be done, for a variety of reasons, | |
18 | I rewrote the code. | |
19 | ||
20 | You can find the latest version of the original library at | |
21 | ||
22 | http://washitake.com/mail/exim/mime/ | |
23 | ||
24 | The code below is almost completely unlike the original. */ | |
25 | ||
26 | ||
27 | #include "exim.h" | |
28 | ||
29 | ||
30 | /************************************************* | |
31 | * Do a QP conversion * | |
32 | *************************************************/ | |
33 | ||
34 | /* This function decodes "quoted printable" into bytes. | |
35 | ||
36 | Arguments: | |
37 | string the string that includes QP escapes | |
38 | ptrptr where to return pointer to the decoded string | |
39 | ||
40 | Returns: the length of the decoded string, or -1 on failure | |
41 | */ | |
42 | ||
43 | static int | |
44 | rfc2047_qpdecode(uschar *string, uschar **ptrptr) | |
45 | { | |
46 | int len = 0; | |
47 | uschar *ptr; | |
48 | ||
49 | ptr = *ptrptr = store_get(Ustrlen(string) + 1); /* No longer than this */ | |
50 | ||
51 | while (*string != 0) | |
52 | { | |
f4bb363f | 53 | int ch = *string++; |
059ec3d9 PH |
54 | |
55 | if (ch == '_') *ptr++ = ' '; | |
56 | else if (ch == '=') | |
57 | { | |
58 | int a = *string; | |
59 | int b = (a == 0)? 0 : string[1]; | |
60 | if (!isxdigit(a) || !isxdigit(b)) return -1; /* Bad QP string */ | |
61 | *ptr++ = ((Ustrchr(hex_digits, tolower(a)) - hex_digits) << 4) + | |
62 | Ustrchr(hex_digits, tolower(b)) - hex_digits; | |
63 | string += 2; | |
64 | } | |
65 | else if (ch == ' ' || ch == '\t') return -1; /* Whitespace is illegal */ | |
66 | else *ptr++ = ch; | |
67 | ||
68 | len++; | |
69 | } | |
70 | ||
71 | *ptr = 0; | |
72 | return len; | |
73 | } | |
74 | ||
75 | ||
76 | ||
77 | /************************************************* | |
78 | * Decode next MIME word * | |
79 | *************************************************/ | |
80 | ||
81 | /* Scan a string to see if a MIME word exists; pass back the separator | |
82 | points in the string. | |
83 | ||
84 | Arguments: | |
85 | string subject string | |
86 | lencheck TRUE to enforce maximum length check | |
87 | q1ptr pass back address of first question mark | |
88 | q2ptr pass back address of second question mark | |
89 | endptr pass back address of final ?= | |
90 | dlenptr pass back length of decoded string | |
91 | dptrptr pass back pointer to decoded string | |
92 | ||
93 | Returns: address of =? or NULL if not present | |
94 | */ | |
95 | ||
96 | static uschar * | |
97 | decode_mimeword(uschar *string, BOOL lencheck, uschar **q1ptr, uschar **q2ptr, | |
98 | uschar **endptr, size_t *dlenptr, uschar **dptrptr) | |
99 | { | |
100 | uschar *mimeword; | |
101 | for (;; string = mimeword + 2) | |
102 | { | |
103 | int encoding; | |
104 | int dlen = -1; | |
105 | ||
106 | if ((mimeword = Ustrstr(string, "=?")) == NULL || | |
107 | (*q1ptr = Ustrchr(mimeword+2, '?')) == NULL || | |
108 | (*q2ptr = Ustrchr(*q1ptr+1, '?')) == NULL || | |
109 | (*endptr = Ustrstr(*q2ptr+1, "?=")) == NULL) return NULL; | |
110 | ||
111 | /* We have found =?xxx?xxx?xxx?= in the string. Optionally check the | |
112 | length, and that the second field is just one character long. If not, | |
113 | continue the loop to search again. We must start just after the initial =? | |
114 | because we might have found =?xxx=?xxx?xxx?xxx?=. */ | |
115 | ||
116 | if ((lencheck && *endptr - mimeword > 73) || *q2ptr - *q1ptr != 2) continue; | |
117 | ||
118 | /* Get the encoding letter, and decode the data string. */ | |
119 | ||
120 | encoding = toupper((*q1ptr)[1]); | |
121 | **endptr = 0; | |
122 | if (encoding == 'B') | |
f4d091fb | 123 | dlen = b64decode(*q2ptr+1, dptrptr); |
059ec3d9 PH |
124 | else if (encoding == 'Q') |
125 | dlen = rfc2047_qpdecode(*q2ptr+1, dptrptr); | |
126 | **endptr = '?'; /* restore */ | |
127 | ||
128 | /* If the decoding succeeded, we are done. Set the length of the decoded | |
129 | string, and pass back the initial pointer. Otherwise, the loop continues. */ | |
130 | ||
131 | if (dlen >= 0) | |
132 | { | |
133 | *dlenptr = (size_t)dlen; | |
134 | return mimeword; | |
135 | } | |
136 | } | |
137 | ||
138 | /* Control should never actually get here */ | |
139 | } | |
140 | ||
141 | ||
142 | ||
143 | /************************************************* | |
144 | * Decode and convert an RFC 2047 string * | |
145 | *************************************************/ | |
146 | ||
147 | /* There are two functions defined here. The original one was rfc2047_decode() | |
148 | and it was documented in the local_scan() interface. I needed to add an extra | |
149 | argument for use by expand_string(), so I created rfc2047_decode2() for that | |
150 | purpose. The original function became a stub that just supplies NULL for the | |
151 | new argument (sizeptr). | |
152 | ||
153 | An RFC 2047-encoded string may contain one or more "words", each of the | |
154 | form =?...?.?...?= with the first ... specifying the character code, the | |
155 | second being Q (for quoted printable) or B for Base64 encoding. The third ... | |
156 | is the actual data. | |
157 | ||
158 | This function first decodes each "word" into bytes from the Q or B encoding. | |
159 | Then, if provided with the name of a charset encoding, and if iconv() is | |
160 | available, it attempts to translate the result to the named character set. | |
161 | If this fails, the binary string is returned with an error message. | |
162 | ||
163 | If a binary zero is encountered in the decoded string, it is replaced by the | |
164 | contents of the zeroval argument. For use with Exim headers, the value must not | |
165 | be 0 because they are handled as zero-terminated strings. When zeroval==0, | |
166 | lenptr should not be NULL. | |
167 | ||
168 | Arguments: | |
169 | string the subject string | |
170 | lencheck TRUE to enforce maximum MIME word length | |
171 | target the name of the target encoding for MIME words, or NULL for | |
172 | no charset translation | |
173 | zeroval the value to use for binary zero bytes | |
174 | lenptr if not NULL, the length of the result is returned via | |
175 | this variable | |
176 | sizeptr if not NULL, the length of a new store block in which the | |
177 | result is built is placed here; if no new store is obtained, | |
178 | the value is not changed | |
179 | error for error messages; NULL if no problem; this can be set | |
180 | when the yield is non-NULL if there was a charset | |
181 | translation problem | |
182 | ||
183 | Returns: the decoded, converted string, or NULL on error; if there are | |
184 | no MIME words in the string, the original string is returned | |
185 | */ | |
186 | ||
187 | uschar * | |
188 | rfc2047_decode2(uschar *string, BOOL lencheck, uschar *target, int zeroval, | |
189 | int *lenptr, int *sizeptr, uschar **error) | |
190 | { | |
059ec3d9 PH |
191 | int size = Ustrlen(string); |
192 | size_t dlen; | |
acec9514 JH |
193 | uschar *dptr; |
194 | gstring *yield; | |
059ec3d9 PH |
195 | uschar *mimeword, *q1, *q2, *endword; |
196 | ||
197 | *error = NULL; | |
198 | mimeword = decode_mimeword(string, lencheck, &q1, &q2, &endword, &dlen, &dptr); | |
199 | ||
f4bb363f | 200 | if (!mimeword) |
059ec3d9 | 201 | { |
f4bb363f | 202 | if (lenptr) *lenptr = size; |
059ec3d9 PH |
203 | return string; |
204 | } | |
205 | ||
206 | /* Scan through the string, decoding MIME words and copying intermediate text, | |
207 | building the result as we go. The result may be longer than the input if it is | |
208 | translated into a multibyte code such as UTF-8. That's why we use the dynamic | |
209 | string building code. */ | |
210 | ||
acec9514 JH |
211 | yield = store_get(sizeof(gstring) + ++size); |
212 | yield->size = size; | |
213 | yield->ptr = 0; | |
214 | yield->s = US(yield + 1); | |
059ec3d9 | 215 | |
f4bb363f | 216 | while (mimeword) |
059ec3d9 PH |
217 | { |
218 | ||
219 | #if HAVE_ICONV | |
220 | iconv_t icd = (iconv_t)(-1); | |
221 | #endif | |
222 | ||
223 | if (mimeword != string) | |
acec9514 | 224 | yield = string_catn(yield, string, mimeword - string); |
059ec3d9 PH |
225 | |
226 | /* Do a charset translation if required. This is supported only on hosts | |
227 | that have the iconv() function. Translation errors set error, but carry on, | |
228 | using the untranslated data. If there is more than one error, the message | |
229 | passed back refers to the final one. We use a loop to cater for the case | |
230 | of long strings - the RFC puts limits on the length, but it's best to be | |
231 | robust. */ | |
232 | ||
233 | #if HAVE_ICONV | |
234 | *q1 = 0; | |
235 | if (target != NULL && strcmpic(target, mimeword+2) != 0) | |
236 | { | |
237 | icd = iconv_open(CS target, CS(mimeword+2)); | |
238 | ||
239 | if (icd == (iconv_t)(-1)) | |
240 | { | |
241 | *error = string_sprintf("iconv_open(\"%s\", \"%s\") failed: %s%s", | |
242 | target, mimeword+2, strerror(errno), | |
243 | (errno == EINVAL)? " (maybe unsupported conversion)" : ""); | |
244 | } | |
245 | } | |
246 | *q1 = '?'; | |
247 | #endif | |
248 | ||
249 | while (dlen > 0) | |
250 | { | |
251 | uschar *tptr = NULL; /* Stops compiler warning */ | |
252 | int tlen = -1; | |
253 | ||
254 | #if HAVE_ICONV | |
255 | uschar tbuffer[256]; | |
256 | uschar *outptr = tbuffer; | |
257 | size_t outleft = sizeof(tbuffer); | |
258 | ||
259 | /* If translation is required, go for it. */ | |
260 | ||
261 | if (icd != (iconv_t)(-1)) | |
262 | { | |
263 | (void)iconv(icd, (ICONV_ARG2_TYPE)(&dptr), &dlen, CSS &outptr, &outleft); | |
264 | ||
265 | /* If outptr has been adjusted, there is some output. Set up to add it to | |
266 | the output buffer. The function will have adjusted dptr and dlen. If | |
267 | iconv() stopped because of an error, we'll pick it up next time when | |
268 | there's no output. | |
269 | ||
270 | If there is no output, we expect there to have been a translation | |
271 | error, because we know there was at least one input byte. We leave the | |
272 | value of tlen as -1, which causes the rest of the input to be copied | |
273 | verbatim. */ | |
274 | ||
275 | if (outptr > tbuffer) | |
276 | { | |
277 | tptr = tbuffer; | |
278 | tlen = outptr - tbuffer; | |
279 | } | |
280 | else | |
281 | { | |
282 | DEBUG(D_any) debug_printf("iconv error translating \"%.*s\" to %s: " | |
73a46702 | 283 | "%s\n", (int)(endword + 2 - mimeword), mimeword, target, strerror(errno)); |
059ec3d9 PH |
284 | } |
285 | } | |
286 | ||
287 | #endif | |
288 | ||
289 | /* No charset translation is happening or there was a translation error; | |
290 | just set up the original as the string to be added, and mark it all used. | |
291 | */ | |
292 | ||
293 | if (tlen == -1) | |
294 | { | |
295 | tptr = dptr; | |
296 | tlen = dlen; | |
297 | dlen = 0; | |
298 | } | |
299 | ||
300 | /* Deal with zero values; convert them if requested. */ | |
301 | ||
302 | if (zeroval != 0) | |
d7978c0f | 303 | for (int i = 0; i < tlen; i++) |
059ec3d9 | 304 | if (tptr[i] == 0) tptr[i] = zeroval; |
059ec3d9 PH |
305 | |
306 | /* Add the new string onto the result */ | |
307 | ||
acec9514 | 308 | yield = string_catn(yield, tptr, tlen); |
059ec3d9 PH |
309 | } |
310 | ||
311 | #if HAVE_ICONV | |
312 | if (icd != (iconv_t)(-1)) iconv_close(icd); | |
313 | #endif | |
314 | ||
315 | /* Update string past the MIME word; skip any white space if the next thing | |
316 | is another MIME word. */ | |
317 | ||
318 | string = endword + 2; | |
319 | mimeword = decode_mimeword(string, lencheck, &q1, &q2, &endword, &dlen, &dptr); | |
f4bb363f | 320 | if (mimeword) |
059ec3d9 PH |
321 | { |
322 | uschar *s = string; | |
323 | while (isspace(*s)) s++; | |
324 | if (s == mimeword) string = s; | |
325 | } | |
326 | } | |
327 | ||
328 | /* Copy the remaining characters of the string, zero-terminate it, and return | |
329 | the length as well if requested. */ | |
330 | ||
acec9514 JH |
331 | yield = string_cat(yield, string); |
332 | ||
333 | if (lenptr) *lenptr = yield->ptr; | |
334 | if (sizeptr) *sizeptr = yield->size; | |
335 | return string_from_gstring(yield); | |
059ec3d9 PH |
336 | } |
337 | ||
338 | ||
339 | /* This is the stub that provides the original interface without the sizeptr | |
340 | argument. */ | |
341 | ||
342 | uschar * | |
343 | rfc2047_decode(uschar *string, BOOL lencheck, uschar *target, int zeroval, | |
344 | int *lenptr, uschar **error) | |
345 | { | |
346 | return rfc2047_decode2(string, lencheck, target, zeroval, lenptr, NULL, error); | |
347 | } | |
348 | ||
349 | /* End of rfc2047.c */ |