Clean up compiler warnings from { gcc -Wall }, many of which I introduced with
[exim.git] / src / src / rfc2047.c
CommitLineData
73a46702 1/* $Cambridge: exim/src/src/rfc2047.c,v 1.6 2010/06/07 00:12:42 pdp Exp $ */
059ec3d9
PH
2
3/*************************************************
4* Exim - an Internet mail transport agent *
5*************************************************/
6
0a49a7a4 7/* Copyright (c) University of Cambridge 1995 - 2009 */
059ec3d9
PH
8/* See the file NOTICE for conditions of use and distribution. */
9
10/* This file contains a function for decoding message header lines that may
11contain encoded "words" according to the rules described in
12
13 RFC-2047 at http://www.ietf.org/rfc/rfc2047.txt
14
15The function is a rewritten version of code created by Norihisa Washitake.
16The original could be used both inside Exim (as part of a patch) or in a
17freestanding form. The original contained some built-in code conversions; I
18have chosen only to do code conversions if iconv() is supported by the OS.
19Because there were quite a lot of hacks to be done, for a variety of reasons,
20I rewrote the code.
21
22You can find the latest version of the original library at
23
24 http://washitake.com/mail/exim/mime/
25
26The code below is almost completely unlike the original. */
27
28
29#include "exim.h"
30
31
32/*************************************************
33* Do a QP conversion *
34*************************************************/
35
36/* This function decodes "quoted printable" into bytes.
37
38Arguments:
39 string the string that includes QP escapes
40 ptrptr where to return pointer to the decoded string
41
42Returns: the length of the decoded string, or -1 on failure
43*/
44
45static int
46rfc2047_qpdecode(uschar *string, uschar **ptrptr)
47{
48int len = 0;
49uschar *ptr;
50
51ptr = *ptrptr = store_get(Ustrlen(string) + 1); /* No longer than this */
52
53while (*string != 0)
54 {
55 register int ch = *string++;
56
57 if (ch == '_') *ptr++ = ' ';
58 else if (ch == '=')
59 {
60 int a = *string;
61 int b = (a == 0)? 0 : string[1];
62 if (!isxdigit(a) || !isxdigit(b)) return -1; /* Bad QP string */
63 *ptr++ = ((Ustrchr(hex_digits, tolower(a)) - hex_digits) << 4) +
64 Ustrchr(hex_digits, tolower(b)) - hex_digits;
65 string += 2;
66 }
67 else if (ch == ' ' || ch == '\t') return -1; /* Whitespace is illegal */
68 else *ptr++ = ch;
69
70 len++;
71 }
72
73*ptr = 0;
74return len;
75}
76
77
78
79/*************************************************
80* Decode next MIME word *
81*************************************************/
82
83/* Scan a string to see if a MIME word exists; pass back the separator
84points in the string.
85
86Arguments:
87 string subject string
88 lencheck TRUE to enforce maximum length check
89 q1ptr pass back address of first question mark
90 q2ptr pass back address of second question mark
91 endptr pass back address of final ?=
92 dlenptr pass back length of decoded string
93 dptrptr pass back pointer to decoded string
94
95Returns: address of =? or NULL if not present
96*/
97
98static uschar *
99decode_mimeword(uschar *string, BOOL lencheck, uschar **q1ptr, uschar **q2ptr,
100 uschar **endptr, size_t *dlenptr, uschar **dptrptr)
101{
102uschar *mimeword;
103for (;; string = mimeword + 2)
104 {
105 int encoding;
106 int dlen = -1;
107
108 if ((mimeword = Ustrstr(string, "=?")) == NULL ||
109 (*q1ptr = Ustrchr(mimeword+2, '?')) == NULL ||
110 (*q2ptr = Ustrchr(*q1ptr+1, '?')) == NULL ||
111 (*endptr = Ustrstr(*q2ptr+1, "?=")) == NULL) return NULL;
112
113 /* We have found =?xxx?xxx?xxx?= in the string. Optionally check the
114 length, and that the second field is just one character long. If not,
115 continue the loop to search again. We must start just after the initial =?
116 because we might have found =?xxx=?xxx?xxx?xxx?=. */
117
118 if ((lencheck && *endptr - mimeword > 73) || *q2ptr - *q1ptr != 2) continue;
119
120 /* Get the encoding letter, and decode the data string. */
121
122 encoding = toupper((*q1ptr)[1]);
123 **endptr = 0;
124 if (encoding == 'B')
125 dlen = auth_b64decode(*q2ptr+1, dptrptr);
126 else if (encoding == 'Q')
127 dlen = rfc2047_qpdecode(*q2ptr+1, dptrptr);
128 **endptr = '?'; /* restore */
129
130 /* If the decoding succeeded, we are done. Set the length of the decoded
131 string, and pass back the initial pointer. Otherwise, the loop continues. */
132
133 if (dlen >= 0)
134 {
135 *dlenptr = (size_t)dlen;
136 return mimeword;
137 }
138 }
139
140/* Control should never actually get here */
141}
142
143
144
145/*************************************************
146* Decode and convert an RFC 2047 string *
147*************************************************/
148
149/* There are two functions defined here. The original one was rfc2047_decode()
150and it was documented in the local_scan() interface. I needed to add an extra
151argument for use by expand_string(), so I created rfc2047_decode2() for that
152purpose. The original function became a stub that just supplies NULL for the
153new argument (sizeptr).
154
155An RFC 2047-encoded string may contain one or more "words", each of the
156form =?...?.?...?= with the first ... specifying the character code, the
157second being Q (for quoted printable) or B for Base64 encoding. The third ...
158is the actual data.
159
160This function first decodes each "word" into bytes from the Q or B encoding.
161Then, if provided with the name of a charset encoding, and if iconv() is
162available, it attempts to translate the result to the named character set.
163If this fails, the binary string is returned with an error message.
164
165If a binary zero is encountered in the decoded string, it is replaced by the
166contents of the zeroval argument. For use with Exim headers, the value must not
167be 0 because they are handled as zero-terminated strings. When zeroval==0,
168lenptr should not be NULL.
169
170Arguments:
171 string the subject string
172 lencheck TRUE to enforce maximum MIME word length
173 target the name of the target encoding for MIME words, or NULL for
174 no charset translation
175 zeroval the value to use for binary zero bytes
176 lenptr if not NULL, the length of the result is returned via
177 this variable
178 sizeptr if not NULL, the length of a new store block in which the
179 result is built is placed here; if no new store is obtained,
180 the value is not changed
181 error for error messages; NULL if no problem; this can be set
182 when the yield is non-NULL if there was a charset
183 translation problem
184
185Returns: the decoded, converted string, or NULL on error; if there are
186 no MIME words in the string, the original string is returned
187*/
188
189uschar *
190rfc2047_decode2(uschar *string, BOOL lencheck, uschar *target, int zeroval,
191 int *lenptr, int *sizeptr, uschar **error)
192{
193int ptr = 0;
194int size = Ustrlen(string);
195size_t dlen;
196uschar *dptr, *yield;
197uschar *mimeword, *q1, *q2, *endword;
198
199*error = NULL;
200mimeword = decode_mimeword(string, lencheck, &q1, &q2, &endword, &dlen, &dptr);
201
202if (mimeword == NULL)
203 {
204 if (lenptr != NULL) *lenptr = size;
205 return string;
206 }
207
208/* Scan through the string, decoding MIME words and copying intermediate text,
209building the result as we go. The result may be longer than the input if it is
210translated into a multibyte code such as UTF-8. That's why we use the dynamic
211string building code. */
212
213yield = store_get(++size);
214
215while (mimeword != NULL)
216 {
217
218 #if HAVE_ICONV
219 iconv_t icd = (iconv_t)(-1);
220 #endif
221
222 if (mimeword != string)
223 yield = string_cat(yield, &size, &ptr, string, mimeword - string);
224
225 /* Do a charset translation if required. This is supported only on hosts
226 that have the iconv() function. Translation errors set error, but carry on,
227 using the untranslated data. If there is more than one error, the message
228 passed back refers to the final one. We use a loop to cater for the case
229 of long strings - the RFC puts limits on the length, but it's best to be
230 robust. */
231
232 #if HAVE_ICONV
233 *q1 = 0;
234 if (target != NULL && strcmpic(target, mimeword+2) != 0)
235 {
236 icd = iconv_open(CS target, CS(mimeword+2));
237
238 if (icd == (iconv_t)(-1))
239 {
240 *error = string_sprintf("iconv_open(\"%s\", \"%s\") failed: %s%s",
241 target, mimeword+2, strerror(errno),
242 (errno == EINVAL)? " (maybe unsupported conversion)" : "");
243 }
244 }
245 *q1 = '?';
246 #endif
247
248 while (dlen > 0)
249 {
250 uschar *tptr = NULL; /* Stops compiler warning */
251 int tlen = -1;
252
253 #if HAVE_ICONV
254 uschar tbuffer[256];
255 uschar *outptr = tbuffer;
256 size_t outleft = sizeof(tbuffer);
257
258 /* If translation is required, go for it. */
259
260 if (icd != (iconv_t)(-1))
261 {
262 (void)iconv(icd, (ICONV_ARG2_TYPE)(&dptr), &dlen, CSS &outptr, &outleft);
263
264 /* If outptr has been adjusted, there is some output. Set up to add it to
265 the output buffer. The function will have adjusted dptr and dlen. If
266 iconv() stopped because of an error, we'll pick it up next time when
267 there's no output.
268
269 If there is no output, we expect there to have been a translation
270 error, because we know there was at least one input byte. We leave the
271 value of tlen as -1, which causes the rest of the input to be copied
272 verbatim. */
273
274 if (outptr > tbuffer)
275 {
276 tptr = tbuffer;
277 tlen = outptr - tbuffer;
278 }
279 else
280 {
281 DEBUG(D_any) debug_printf("iconv error translating \"%.*s\" to %s: "
73a46702 282 "%s\n", (int)(endword + 2 - mimeword), mimeword, target, strerror(errno));
059ec3d9
PH
283 }
284 }
285
286 #endif
287
288 /* No charset translation is happening or there was a translation error;
289 just set up the original as the string to be added, and mark it all used.
290 */
291
292 if (tlen == -1)
293 {
294 tptr = dptr;
295 tlen = dlen;
296 dlen = 0;
297 }
298
299 /* Deal with zero values; convert them if requested. */
300
301 if (zeroval != 0)
302 {
303 int i;
304 for (i = 0; i < tlen; i++)
305 if (tptr[i] == 0) tptr[i] = zeroval;
306 }
307
308 /* Add the new string onto the result */
309
310 yield = string_cat(yield, &size, &ptr, tptr, tlen);
311 }
312
313 #if HAVE_ICONV
314 if (icd != (iconv_t)(-1)) iconv_close(icd);
315 #endif
316
317 /* Update string past the MIME word; skip any white space if the next thing
318 is another MIME word. */
319
320 string = endword + 2;
321 mimeword = decode_mimeword(string, lencheck, &q1, &q2, &endword, &dlen, &dptr);
322 if (mimeword != NULL)
323 {
324 uschar *s = string;
325 while (isspace(*s)) s++;
326 if (s == mimeword) string = s;
327 }
328 }
329
330/* Copy the remaining characters of the string, zero-terminate it, and return
331the length as well if requested. */
332
333yield = string_cat(yield, &size, &ptr, string, Ustrlen(string));
334yield[ptr] = 0;
335if (lenptr != NULL) *lenptr = ptr;
336if (sizeptr != NULL) *sizeptr = size;
337return yield;
338}
339
340
341/* This is the stub that provides the original interface without the sizeptr
342argument. */
343
344uschar *
345rfc2047_decode(uschar *string, BOOL lencheck, uschar *target, int zeroval,
346 int *lenptr, uschar **error)
347{
348return rfc2047_decode2(string, lencheck, target, zeroval, lenptr, NULL, error);
349}
350
351/* End of rfc2047.c */