Fix taint issue with retry records. Bug 2492
[exim.git] / src / src / rfc2047.c
CommitLineData
059ec3d9
PH
1/*************************************************
2* Exim - an Internet mail transport agent *
3*************************************************/
4
f9ba5e22 5/* Copyright (c) University of Cambridge 1995 - 2018 */
059ec3d9
PH
6/* See the file NOTICE for conditions of use and distribution. */
7
8/* This file contains a function for decoding message header lines that may
9contain encoded "words" according to the rules described in
10
11 RFC-2047 at http://www.ietf.org/rfc/rfc2047.txt
12
13The function is a rewritten version of code created by Norihisa Washitake.
14The original could be used both inside Exim (as part of a patch) or in a
15freestanding form. The original contained some built-in code conversions; I
16have chosen only to do code conversions if iconv() is supported by the OS.
17Because there were quite a lot of hacks to be done, for a variety of reasons,
18I rewrote the code.
19
20You can find the latest version of the original library at
21
22 http://washitake.com/mail/exim/mime/
23
24The code below is almost completely unlike the original. */
25
26
27#include "exim.h"
28
29
30/*************************************************
31* Do a QP conversion *
32*************************************************/
33
34/* This function decodes "quoted printable" into bytes.
35
36Arguments:
37 string the string that includes QP escapes
38 ptrptr where to return pointer to the decoded string
39
40Returns: the length of the decoded string, or -1 on failure
41*/
42
43static int
44rfc2047_qpdecode(uschar *string, uschar **ptrptr)
45{
46int len = 0;
47uschar *ptr;
48
f3ebb786 49ptr = *ptrptr = store_get(Ustrlen(string) + 1, is_tainted(string)); /* No longer than this */
059ec3d9
PH
50
51while (*string != 0)
52 {
f4bb363f 53 int ch = *string++;
059ec3d9
PH
54
55 if (ch == '_') *ptr++ = ' ';
56 else if (ch == '=')
57 {
58 int a = *string;
59 int b = (a == 0)? 0 : string[1];
60 if (!isxdigit(a) || !isxdigit(b)) return -1; /* Bad QP string */
61 *ptr++ = ((Ustrchr(hex_digits, tolower(a)) - hex_digits) << 4) +
62 Ustrchr(hex_digits, tolower(b)) - hex_digits;
63 string += 2;
64 }
65 else if (ch == ' ' || ch == '\t') return -1; /* Whitespace is illegal */
66 else *ptr++ = ch;
67
68 len++;
69 }
70
71*ptr = 0;
72return len;
73}
74
75
76
77/*************************************************
78* Decode next MIME word *
79*************************************************/
80
81/* Scan a string to see if a MIME word exists; pass back the separator
82points in the string.
83
84Arguments:
85 string subject string
86 lencheck TRUE to enforce maximum length check
87 q1ptr pass back address of first question mark
88 q2ptr pass back address of second question mark
89 endptr pass back address of final ?=
90 dlenptr pass back length of decoded string
91 dptrptr pass back pointer to decoded string
92
93Returns: address of =? or NULL if not present
94*/
95
96static uschar *
97decode_mimeword(uschar *string, BOOL lencheck, uschar **q1ptr, uschar **q2ptr,
98 uschar **endptr, size_t *dlenptr, uschar **dptrptr)
99{
100uschar *mimeword;
101for (;; string = mimeword + 2)
102 {
103 int encoding;
104 int dlen = -1;
105
106 if ((mimeword = Ustrstr(string, "=?")) == NULL ||
107 (*q1ptr = Ustrchr(mimeword+2, '?')) == NULL ||
108 (*q2ptr = Ustrchr(*q1ptr+1, '?')) == NULL ||
109 (*endptr = Ustrstr(*q2ptr+1, "?=")) == NULL) return NULL;
110
111 /* We have found =?xxx?xxx?xxx?= in the string. Optionally check the
112 length, and that the second field is just one character long. If not,
113 continue the loop to search again. We must start just after the initial =?
114 because we might have found =?xxx=?xxx?xxx?xxx?=. */
115
116 if ((lencheck && *endptr - mimeword > 73) || *q2ptr - *q1ptr != 2) continue;
117
118 /* Get the encoding letter, and decode the data string. */
119
120 encoding = toupper((*q1ptr)[1]);
121 **endptr = 0;
122 if (encoding == 'B')
f4d091fb 123 dlen = b64decode(*q2ptr+1, dptrptr);
059ec3d9
PH
124 else if (encoding == 'Q')
125 dlen = rfc2047_qpdecode(*q2ptr+1, dptrptr);
126 **endptr = '?'; /* restore */
127
128 /* If the decoding succeeded, we are done. Set the length of the decoded
129 string, and pass back the initial pointer. Otherwise, the loop continues. */
130
131 if (dlen >= 0)
132 {
133 *dlenptr = (size_t)dlen;
134 return mimeword;
135 }
136 }
137
138/* Control should never actually get here */
139}
140
141
142
143/*************************************************
144* Decode and convert an RFC 2047 string *
145*************************************************/
146
147/* There are two functions defined here. The original one was rfc2047_decode()
148and it was documented in the local_scan() interface. I needed to add an extra
149argument for use by expand_string(), so I created rfc2047_decode2() for that
150purpose. The original function became a stub that just supplies NULL for the
151new argument (sizeptr).
152
153An RFC 2047-encoded string may contain one or more "words", each of the
154form =?...?.?...?= with the first ... specifying the character code, the
155second being Q (for quoted printable) or B for Base64 encoding. The third ...
156is the actual data.
157
158This function first decodes each "word" into bytes from the Q or B encoding.
159Then, if provided with the name of a charset encoding, and if iconv() is
160available, it attempts to translate the result to the named character set.
161If this fails, the binary string is returned with an error message.
162
163If a binary zero is encountered in the decoded string, it is replaced by the
164contents of the zeroval argument. For use with Exim headers, the value must not
165be 0 because they are handled as zero-terminated strings. When zeroval==0,
166lenptr should not be NULL.
167
168Arguments:
169 string the subject string
170 lencheck TRUE to enforce maximum MIME word length
171 target the name of the target encoding for MIME words, or NULL for
172 no charset translation
173 zeroval the value to use for binary zero bytes
174 lenptr if not NULL, the length of the result is returned via
175 this variable
176 sizeptr if not NULL, the length of a new store block in which the
177 result is built is placed here; if no new store is obtained,
178 the value is not changed
179 error for error messages; NULL if no problem; this can be set
180 when the yield is non-NULL if there was a charset
181 translation problem
182
183Returns: the decoded, converted string, or NULL on error; if there are
184 no MIME words in the string, the original string is returned
185*/
186
187uschar *
188rfc2047_decode2(uschar *string, BOOL lencheck, uschar *target, int zeroval,
189 int *lenptr, int *sizeptr, uschar **error)
190{
059ec3d9
PH
191int size = Ustrlen(string);
192size_t dlen;
acec9514
JH
193uschar *dptr;
194gstring *yield;
059ec3d9
PH
195uschar *mimeword, *q1, *q2, *endword;
196
197*error = NULL;
198mimeword = decode_mimeword(string, lencheck, &q1, &q2, &endword, &dlen, &dptr);
199
f4bb363f 200if (!mimeword)
059ec3d9 201 {
f4bb363f 202 if (lenptr) *lenptr = size;
059ec3d9
PH
203 return string;
204 }
205
206/* Scan through the string, decoding MIME words and copying intermediate text,
207building the result as we go. The result may be longer than the input if it is
208translated into a multibyte code such as UTF-8. That's why we use the dynamic
209string building code. */
210
f3ebb786 211yield = store_get(sizeof(gstring) + ++size, is_tainted(string));
acec9514
JH
212yield->size = size;
213yield->ptr = 0;
214yield->s = US(yield + 1);
059ec3d9 215
f4bb363f 216while (mimeword)
059ec3d9
PH
217 {
218
219 #if HAVE_ICONV
220 iconv_t icd = (iconv_t)(-1);
221 #endif
222
223 if (mimeword != string)
acec9514 224 yield = string_catn(yield, string, mimeword - string);
f3ebb786 225/*XXX that might have to convert an untainted string to a tainted one */
059ec3d9
PH
226
227 /* Do a charset translation if required. This is supported only on hosts
228 that have the iconv() function. Translation errors set error, but carry on,
229 using the untranslated data. If there is more than one error, the message
230 passed back refers to the final one. We use a loop to cater for the case
231 of long strings - the RFC puts limits on the length, but it's best to be
232 robust. */
233
234 #if HAVE_ICONV
235 *q1 = 0;
236 if (target != NULL && strcmpic(target, mimeword+2) != 0)
237 {
238 icd = iconv_open(CS target, CS(mimeword+2));
239
240 if (icd == (iconv_t)(-1))
241 {
242 *error = string_sprintf("iconv_open(\"%s\", \"%s\") failed: %s%s",
243 target, mimeword+2, strerror(errno),
244 (errno == EINVAL)? " (maybe unsupported conversion)" : "");
245 }
246 }
247 *q1 = '?';
248 #endif
249
250 while (dlen > 0)
251 {
252 uschar *tptr = NULL; /* Stops compiler warning */
253 int tlen = -1;
254
255 #if HAVE_ICONV
256 uschar tbuffer[256];
257 uschar *outptr = tbuffer;
258 size_t outleft = sizeof(tbuffer);
259
260 /* If translation is required, go for it. */
261
262 if (icd != (iconv_t)(-1))
263 {
264 (void)iconv(icd, (ICONV_ARG2_TYPE)(&dptr), &dlen, CSS &outptr, &outleft);
265
266 /* If outptr has been adjusted, there is some output. Set up to add it to
267 the output buffer. The function will have adjusted dptr and dlen. If
268 iconv() stopped because of an error, we'll pick it up next time when
269 there's no output.
270
271 If there is no output, we expect there to have been a translation
272 error, because we know there was at least one input byte. We leave the
273 value of tlen as -1, which causes the rest of the input to be copied
274 verbatim. */
275
276 if (outptr > tbuffer)
277 {
278 tptr = tbuffer;
279 tlen = outptr - tbuffer;
280 }
281 else
282 {
283 DEBUG(D_any) debug_printf("iconv error translating \"%.*s\" to %s: "
73a46702 284 "%s\n", (int)(endword + 2 - mimeword), mimeword, target, strerror(errno));
059ec3d9
PH
285 }
286 }
287
288 #endif
289
290 /* No charset translation is happening or there was a translation error;
291 just set up the original as the string to be added, and mark it all used.
292 */
293
294 if (tlen == -1)
295 {
296 tptr = dptr;
297 tlen = dlen;
298 dlen = 0;
299 }
300
301 /* Deal with zero values; convert them if requested. */
302
303 if (zeroval != 0)
d7978c0f 304 for (int i = 0; i < tlen; i++)
059ec3d9 305 if (tptr[i] == 0) tptr[i] = zeroval;
059ec3d9
PH
306
307 /* Add the new string onto the result */
308
acec9514 309 yield = string_catn(yield, tptr, tlen);
059ec3d9
PH
310 }
311
312 #if HAVE_ICONV
313 if (icd != (iconv_t)(-1)) iconv_close(icd);
314 #endif
315
316 /* Update string past the MIME word; skip any white space if the next thing
317 is another MIME word. */
318
319 string = endword + 2;
320 mimeword = decode_mimeword(string, lencheck, &q1, &q2, &endword, &dlen, &dptr);
f4bb363f 321 if (mimeword)
059ec3d9
PH
322 {
323 uschar *s = string;
324 while (isspace(*s)) s++;
325 if (s == mimeword) string = s;
326 }
327 }
328
329/* Copy the remaining characters of the string, zero-terminate it, and return
330the length as well if requested. */
331
acec9514
JH
332yield = string_cat(yield, string);
333
334if (lenptr) *lenptr = yield->ptr;
335if (sizeptr) *sizeptr = yield->size;
336return string_from_gstring(yield);
059ec3d9
PH
337}
338
339
340/* This is the stub that provides the original interface without the sizeptr
341argument. */
342
343uschar *
344rfc2047_decode(uschar *string, BOOL lencheck, uschar *target, int zeroval,
345 int *lenptr, uschar **error)
346{
347return rfc2047_decode2(string, lencheck, target, zeroval, lenptr, NULL, error);
348}
349
350/* End of rfc2047.c */