Commit | Line | Data |
---|---|---|
059ec3d9 PH |
1 | /************************************************* |
2 | * Exim - an Internet mail transport agent * | |
3 | *************************************************/ | |
4 | ||
f9ba5e22 | 5 | /* Copyright (c) University of Cambridge 1995 - 2018 */ |
1e1ddfac | 6 | /* Copyright (c) The Exim Maintainers 2020 */ |
059ec3d9 PH |
7 | /* See the file NOTICE for conditions of use and distribution. */ |
8 | ||
9 | /* This file contains a function for decoding message header lines that may | |
10 | contain encoded "words" according to the rules described in | |
11 | ||
12 | RFC-2047 at http://www.ietf.org/rfc/rfc2047.txt | |
13 | ||
14 | The function is a rewritten version of code created by Norihisa Washitake. | |
15 | The original could be used both inside Exim (as part of a patch) or in a | |
16 | freestanding form. The original contained some built-in code conversions; I | |
17 | have chosen only to do code conversions if iconv() is supported by the OS. | |
18 | Because there were quite a lot of hacks to be done, for a variety of reasons, | |
19 | I rewrote the code. | |
20 | ||
21 | You can find the latest version of the original library at | |
22 | ||
23 | http://washitake.com/mail/exim/mime/ | |
24 | ||
25 | The code below is almost completely unlike the original. */ | |
26 | ||
27 | ||
28 | #include "exim.h" | |
29 | ||
30 | ||
31 | /************************************************* | |
32 | * Do a QP conversion * | |
33 | *************************************************/ | |
34 | ||
35 | /* This function decodes "quoted printable" into bytes. | |
36 | ||
37 | Arguments: | |
38 | string the string that includes QP escapes | |
39 | ptrptr where to return pointer to the decoded string | |
40 | ||
41 | Returns: the length of the decoded string, or -1 on failure | |
42 | */ | |
43 | ||
44 | static int | |
45 | rfc2047_qpdecode(uschar *string, uschar **ptrptr) | |
46 | { | |
47 | int len = 0; | |
48 | uschar *ptr; | |
49 | ||
f3ebb786 | 50 | ptr = *ptrptr = store_get(Ustrlen(string) + 1, is_tainted(string)); /* No longer than this */ |
059ec3d9 PH |
51 | |
52 | while (*string != 0) | |
53 | { | |
f4bb363f | 54 | int ch = *string++; |
059ec3d9 PH |
55 | |
56 | if (ch == '_') *ptr++ = ' '; | |
57 | else if (ch == '=') | |
58 | { | |
59 | int a = *string; | |
60 | int b = (a == 0)? 0 : string[1]; | |
61 | if (!isxdigit(a) || !isxdigit(b)) return -1; /* Bad QP string */ | |
62 | *ptr++ = ((Ustrchr(hex_digits, tolower(a)) - hex_digits) << 4) + | |
63 | Ustrchr(hex_digits, tolower(b)) - hex_digits; | |
64 | string += 2; | |
65 | } | |
66 | else if (ch == ' ' || ch == '\t') return -1; /* Whitespace is illegal */ | |
67 | else *ptr++ = ch; | |
68 | ||
69 | len++; | |
70 | } | |
71 | ||
72 | *ptr = 0; | |
73 | return len; | |
74 | } | |
75 | ||
76 | ||
77 | ||
78 | /************************************************* | |
79 | * Decode next MIME word * | |
80 | *************************************************/ | |
81 | ||
82 | /* Scan a string to see if a MIME word exists; pass back the separator | |
83 | points in the string. | |
84 | ||
85 | Arguments: | |
86 | string subject string | |
87 | lencheck TRUE to enforce maximum length check | |
88 | q1ptr pass back address of first question mark | |
89 | q2ptr pass back address of second question mark | |
90 | endptr pass back address of final ?= | |
91 | dlenptr pass back length of decoded string | |
92 | dptrptr pass back pointer to decoded string | |
93 | ||
94 | Returns: address of =? or NULL if not present | |
95 | */ | |
96 | ||
97 | static uschar * | |
98 | decode_mimeword(uschar *string, BOOL lencheck, uschar **q1ptr, uschar **q2ptr, | |
99 | uschar **endptr, size_t *dlenptr, uschar **dptrptr) | |
100 | { | |
101 | uschar *mimeword; | |
102 | for (;; string = mimeword + 2) | |
103 | { | |
104 | int encoding; | |
105 | int dlen = -1; | |
106 | ||
107 | if ((mimeword = Ustrstr(string, "=?")) == NULL || | |
108 | (*q1ptr = Ustrchr(mimeword+2, '?')) == NULL || | |
109 | (*q2ptr = Ustrchr(*q1ptr+1, '?')) == NULL || | |
110 | (*endptr = Ustrstr(*q2ptr+1, "?=")) == NULL) return NULL; | |
111 | ||
112 | /* We have found =?xxx?xxx?xxx?= in the string. Optionally check the | |
113 | length, and that the second field is just one character long. If not, | |
114 | continue the loop to search again. We must start just after the initial =? | |
115 | because we might have found =?xxx=?xxx?xxx?xxx?=. */ | |
116 | ||
117 | if ((lencheck && *endptr - mimeword > 73) || *q2ptr - *q1ptr != 2) continue; | |
118 | ||
119 | /* Get the encoding letter, and decode the data string. */ | |
120 | ||
121 | encoding = toupper((*q1ptr)[1]); | |
122 | **endptr = 0; | |
123 | if (encoding == 'B') | |
f4d091fb | 124 | dlen = b64decode(*q2ptr+1, dptrptr); |
059ec3d9 PH |
125 | else if (encoding == 'Q') |
126 | dlen = rfc2047_qpdecode(*q2ptr+1, dptrptr); | |
127 | **endptr = '?'; /* restore */ | |
128 | ||
129 | /* If the decoding succeeded, we are done. Set the length of the decoded | |
130 | string, and pass back the initial pointer. Otherwise, the loop continues. */ | |
131 | ||
132 | if (dlen >= 0) | |
133 | { | |
134 | *dlenptr = (size_t)dlen; | |
135 | return mimeword; | |
136 | } | |
137 | } | |
138 | ||
139 | /* Control should never actually get here */ | |
140 | } | |
141 | ||
142 | ||
143 | ||
144 | /************************************************* | |
145 | * Decode and convert an RFC 2047 string * | |
146 | *************************************************/ | |
147 | ||
148 | /* There are two functions defined here. The original one was rfc2047_decode() | |
149 | and it was documented in the local_scan() interface. I needed to add an extra | |
150 | argument for use by expand_string(), so I created rfc2047_decode2() for that | |
151 | purpose. The original function became a stub that just supplies NULL for the | |
152 | new argument (sizeptr). | |
153 | ||
154 | An RFC 2047-encoded string may contain one or more "words", each of the | |
155 | form =?...?.?...?= with the first ... specifying the character code, the | |
156 | second being Q (for quoted printable) or B for Base64 encoding. The third ... | |
157 | is the actual data. | |
158 | ||
159 | This function first decodes each "word" into bytes from the Q or B encoding. | |
160 | Then, if provided with the name of a charset encoding, and if iconv() is | |
161 | available, it attempts to translate the result to the named character set. | |
162 | If this fails, the binary string is returned with an error message. | |
163 | ||
164 | If a binary zero is encountered in the decoded string, it is replaced by the | |
165 | contents of the zeroval argument. For use with Exim headers, the value must not | |
166 | be 0 because they are handled as zero-terminated strings. When zeroval==0, | |
167 | lenptr should not be NULL. | |
168 | ||
169 | Arguments: | |
170 | string the subject string | |
171 | lencheck TRUE to enforce maximum MIME word length | |
172 | target the name of the target encoding for MIME words, or NULL for | |
173 | no charset translation | |
174 | zeroval the value to use for binary zero bytes | |
175 | lenptr if not NULL, the length of the result is returned via | |
176 | this variable | |
177 | sizeptr if not NULL, the length of a new store block in which the | |
178 | result is built is placed here; if no new store is obtained, | |
179 | the value is not changed | |
180 | error for error messages; NULL if no problem; this can be set | |
181 | when the yield is non-NULL if there was a charset | |
182 | translation problem | |
183 | ||
184 | Returns: the decoded, converted string, or NULL on error; if there are | |
185 | no MIME words in the string, the original string is returned | |
186 | */ | |
187 | ||
188 | uschar * | |
189 | rfc2047_decode2(uschar *string, BOOL lencheck, uschar *target, int zeroval, | |
190 | int *lenptr, int *sizeptr, uschar **error) | |
191 | { | |
059ec3d9 PH |
192 | int size = Ustrlen(string); |
193 | size_t dlen; | |
acec9514 JH |
194 | uschar *dptr; |
195 | gstring *yield; | |
059ec3d9 PH |
196 | uschar *mimeword, *q1, *q2, *endword; |
197 | ||
198 | *error = NULL; | |
199 | mimeword = decode_mimeword(string, lencheck, &q1, &q2, &endword, &dlen, &dptr); | |
200 | ||
f4bb363f | 201 | if (!mimeword) |
059ec3d9 | 202 | { |
f4bb363f | 203 | if (lenptr) *lenptr = size; |
059ec3d9 PH |
204 | return string; |
205 | } | |
206 | ||
207 | /* Scan through the string, decoding MIME words and copying intermediate text, | |
208 | building the result as we go. The result may be longer than the input if it is | |
209 | translated into a multibyte code such as UTF-8. That's why we use the dynamic | |
210 | string building code. */ | |
211 | ||
f3ebb786 | 212 | yield = store_get(sizeof(gstring) + ++size, is_tainted(string)); |
acec9514 JH |
213 | yield->size = size; |
214 | yield->ptr = 0; | |
215 | yield->s = US(yield + 1); | |
059ec3d9 | 216 | |
f4bb363f | 217 | while (mimeword) |
059ec3d9 PH |
218 | { |
219 | ||
220 | #if HAVE_ICONV | |
221 | iconv_t icd = (iconv_t)(-1); | |
222 | #endif | |
223 | ||
224 | if (mimeword != string) | |
acec9514 | 225 | yield = string_catn(yield, string, mimeword - string); |
f3ebb786 | 226 | /*XXX that might have to convert an untainted string to a tainted one */ |
059ec3d9 PH |
227 | |
228 | /* Do a charset translation if required. This is supported only on hosts | |
229 | that have the iconv() function. Translation errors set error, but carry on, | |
230 | using the untranslated data. If there is more than one error, the message | |
231 | passed back refers to the final one. We use a loop to cater for the case | |
232 | of long strings - the RFC puts limits on the length, but it's best to be | |
233 | robust. */ | |
234 | ||
235 | #if HAVE_ICONV | |
236 | *q1 = 0; | |
e851856f JH |
237 | if (target && strcmpic(target, mimeword+2) != 0) |
238 | if ((icd = iconv_open(CS target, CS(mimeword+2))) == (iconv_t)-1) | |
059ec3d9 PH |
239 | *error = string_sprintf("iconv_open(\"%s\", \"%s\") failed: %s%s", |
240 | target, mimeword+2, strerror(errno), | |
241 | (errno == EINVAL)? " (maybe unsupported conversion)" : ""); | |
059ec3d9 PH |
242 | *q1 = '?'; |
243 | #endif | |
244 | ||
245 | while (dlen > 0) | |
246 | { | |
247 | uschar *tptr = NULL; /* Stops compiler warning */ | |
248 | int tlen = -1; | |
249 | ||
250 | #if HAVE_ICONV | |
251 | uschar tbuffer[256]; | |
252 | uschar *outptr = tbuffer; | |
253 | size_t outleft = sizeof(tbuffer); | |
254 | ||
255 | /* If translation is required, go for it. */ | |
256 | ||
257 | if (icd != (iconv_t)(-1)) | |
258 | { | |
259 | (void)iconv(icd, (ICONV_ARG2_TYPE)(&dptr), &dlen, CSS &outptr, &outleft); | |
260 | ||
261 | /* If outptr has been adjusted, there is some output. Set up to add it to | |
262 | the output buffer. The function will have adjusted dptr and dlen. If | |
263 | iconv() stopped because of an error, we'll pick it up next time when | |
264 | there's no output. | |
265 | ||
266 | If there is no output, we expect there to have been a translation | |
267 | error, because we know there was at least one input byte. We leave the | |
268 | value of tlen as -1, which causes the rest of the input to be copied | |
269 | verbatim. */ | |
270 | ||
271 | if (outptr > tbuffer) | |
272 | { | |
273 | tptr = tbuffer; | |
274 | tlen = outptr - tbuffer; | |
275 | } | |
276 | else | |
277 | { | |
278 | DEBUG(D_any) debug_printf("iconv error translating \"%.*s\" to %s: " | |
73a46702 | 279 | "%s\n", (int)(endword + 2 - mimeword), mimeword, target, strerror(errno)); |
059ec3d9 PH |
280 | } |
281 | } | |
282 | ||
283 | #endif | |
284 | ||
285 | /* No charset translation is happening or there was a translation error; | |
286 | just set up the original as the string to be added, and mark it all used. | |
287 | */ | |
288 | ||
289 | if (tlen == -1) | |
290 | { | |
291 | tptr = dptr; | |
292 | tlen = dlen; | |
293 | dlen = 0; | |
294 | } | |
295 | ||
296 | /* Deal with zero values; convert them if requested. */ | |
297 | ||
298 | if (zeroval != 0) | |
d7978c0f | 299 | for (int i = 0; i < tlen; i++) |
059ec3d9 | 300 | if (tptr[i] == 0) tptr[i] = zeroval; |
059ec3d9 PH |
301 | |
302 | /* Add the new string onto the result */ | |
303 | ||
acec9514 | 304 | yield = string_catn(yield, tptr, tlen); |
059ec3d9 PH |
305 | } |
306 | ||
307 | #if HAVE_ICONV | |
308 | if (icd != (iconv_t)(-1)) iconv_close(icd); | |
309 | #endif | |
310 | ||
311 | /* Update string past the MIME word; skip any white space if the next thing | |
312 | is another MIME word. */ | |
313 | ||
314 | string = endword + 2; | |
315 | mimeword = decode_mimeword(string, lencheck, &q1, &q2, &endword, &dlen, &dptr); | |
f4bb363f | 316 | if (mimeword) |
059ec3d9 PH |
317 | { |
318 | uschar *s = string; | |
319 | while (isspace(*s)) s++; | |
320 | if (s == mimeword) string = s; | |
321 | } | |
322 | } | |
323 | ||
324 | /* Copy the remaining characters of the string, zero-terminate it, and return | |
325 | the length as well if requested. */ | |
326 | ||
acec9514 JH |
327 | yield = string_cat(yield, string); |
328 | ||
329 | if (lenptr) *lenptr = yield->ptr; | |
330 | if (sizeptr) *sizeptr = yield->size; | |
331 | return string_from_gstring(yield); | |
059ec3d9 PH |
332 | } |
333 | ||
334 | ||
335 | /* This is the stub that provides the original interface without the sizeptr | |
336 | argument. */ | |
337 | ||
338 | uschar * | |
339 | rfc2047_decode(uschar *string, BOOL lencheck, uschar *target, int zeroval, | |
340 | int *lenptr, uschar **error) | |
341 | { | |
342 | return rfc2047_decode2(string, lencheck, target, zeroval, lenptr, NULL, error); | |
343 | } | |
344 | ||
345 | /* End of rfc2047.c */ |