Commit | Line | Data |
---|---|---|
059ec3d9 PH |
1 | /* $Cambridge: exim/src/src/rfc2047.c,v 1.1 2004/10/07 10:39:01 ph10 Exp $ */ |
2 | ||
3 | /************************************************* | |
4 | * Exim - an Internet mail transport agent * | |
5 | *************************************************/ | |
6 | ||
7 | /* Copyright (c) University of Cambridge 1995 - 2004 */ | |
8 | /* See the file NOTICE for conditions of use and distribution. */ | |
9 | ||
10 | /* This file contains a function for decoding message header lines that may | |
11 | contain encoded "words" according to the rules described in | |
12 | ||
13 | RFC-2047 at http://www.ietf.org/rfc/rfc2047.txt | |
14 | ||
15 | The function is a rewritten version of code created by Norihisa Washitake. | |
16 | The original could be used both inside Exim (as part of a patch) or in a | |
17 | freestanding form. The original contained some built-in code conversions; I | |
18 | have chosen only to do code conversions if iconv() is supported by the OS. | |
19 | Because there were quite a lot of hacks to be done, for a variety of reasons, | |
20 | I rewrote the code. | |
21 | ||
22 | You can find the latest version of the original library at | |
23 | ||
24 | http://washitake.com/mail/exim/mime/ | |
25 | ||
26 | The code below is almost completely unlike the original. */ | |
27 | ||
28 | ||
29 | #include "exim.h" | |
30 | ||
31 | ||
32 | /************************************************* | |
33 | * Do a QP conversion * | |
34 | *************************************************/ | |
35 | ||
36 | /* This function decodes "quoted printable" into bytes. | |
37 | ||
38 | Arguments: | |
39 | string the string that includes QP escapes | |
40 | ptrptr where to return pointer to the decoded string | |
41 | ||
42 | Returns: the length of the decoded string, or -1 on failure | |
43 | */ | |
44 | ||
45 | static int | |
46 | rfc2047_qpdecode(uschar *string, uschar **ptrptr) | |
47 | { | |
48 | int len = 0; | |
49 | uschar *ptr; | |
50 | ||
51 | ptr = *ptrptr = store_get(Ustrlen(string) + 1); /* No longer than this */ | |
52 | ||
53 | while (*string != 0) | |
54 | { | |
55 | register int ch = *string++; | |
56 | ||
57 | if (ch == '_') *ptr++ = ' '; | |
58 | else if (ch == '=') | |
59 | { | |
60 | int a = *string; | |
61 | int b = (a == 0)? 0 : string[1]; | |
62 | if (!isxdigit(a) || !isxdigit(b)) return -1; /* Bad QP string */ | |
63 | *ptr++ = ((Ustrchr(hex_digits, tolower(a)) - hex_digits) << 4) + | |
64 | Ustrchr(hex_digits, tolower(b)) - hex_digits; | |
65 | string += 2; | |
66 | } | |
67 | else if (ch == ' ' || ch == '\t') return -1; /* Whitespace is illegal */ | |
68 | else *ptr++ = ch; | |
69 | ||
70 | len++; | |
71 | } | |
72 | ||
73 | *ptr = 0; | |
74 | return len; | |
75 | } | |
76 | ||
77 | ||
78 | ||
79 | /************************************************* | |
80 | * Decode next MIME word * | |
81 | *************************************************/ | |
82 | ||
83 | /* Scan a string to see if a MIME word exists; pass back the separator | |
84 | points in the string. | |
85 | ||
86 | Arguments: | |
87 | string subject string | |
88 | lencheck TRUE to enforce maximum length check | |
89 | q1ptr pass back address of first question mark | |
90 | q2ptr pass back address of second question mark | |
91 | endptr pass back address of final ?= | |
92 | dlenptr pass back length of decoded string | |
93 | dptrptr pass back pointer to decoded string | |
94 | ||
95 | Returns: address of =? or NULL if not present | |
96 | */ | |
97 | ||
98 | static uschar * | |
99 | decode_mimeword(uschar *string, BOOL lencheck, uschar **q1ptr, uschar **q2ptr, | |
100 | uschar **endptr, size_t *dlenptr, uschar **dptrptr) | |
101 | { | |
102 | uschar *mimeword; | |
103 | for (;; string = mimeword + 2) | |
104 | { | |
105 | int encoding; | |
106 | int dlen = -1; | |
107 | ||
108 | if ((mimeword = Ustrstr(string, "=?")) == NULL || | |
109 | (*q1ptr = Ustrchr(mimeword+2, '?')) == NULL || | |
110 | (*q2ptr = Ustrchr(*q1ptr+1, '?')) == NULL || | |
111 | (*endptr = Ustrstr(*q2ptr+1, "?=")) == NULL) return NULL; | |
112 | ||
113 | /* We have found =?xxx?xxx?xxx?= in the string. Optionally check the | |
114 | length, and that the second field is just one character long. If not, | |
115 | continue the loop to search again. We must start just after the initial =? | |
116 | because we might have found =?xxx=?xxx?xxx?xxx?=. */ | |
117 | ||
118 | if ((lencheck && *endptr - mimeword > 73) || *q2ptr - *q1ptr != 2) continue; | |
119 | ||
120 | /* Get the encoding letter, and decode the data string. */ | |
121 | ||
122 | encoding = toupper((*q1ptr)[1]); | |
123 | **endptr = 0; | |
124 | if (encoding == 'B') | |
125 | dlen = auth_b64decode(*q2ptr+1, dptrptr); | |
126 | else if (encoding == 'Q') | |
127 | dlen = rfc2047_qpdecode(*q2ptr+1, dptrptr); | |
128 | **endptr = '?'; /* restore */ | |
129 | ||
130 | /* If the decoding succeeded, we are done. Set the length of the decoded | |
131 | string, and pass back the initial pointer. Otherwise, the loop continues. */ | |
132 | ||
133 | if (dlen >= 0) | |
134 | { | |
135 | *dlenptr = (size_t)dlen; | |
136 | return mimeword; | |
137 | } | |
138 | } | |
139 | ||
140 | /* Control should never actually get here */ | |
141 | } | |
142 | ||
143 | ||
144 | ||
145 | /************************************************* | |
146 | * Decode and convert an RFC 2047 string * | |
147 | *************************************************/ | |
148 | ||
149 | /* There are two functions defined here. The original one was rfc2047_decode() | |
150 | and it was documented in the local_scan() interface. I needed to add an extra | |
151 | argument for use by expand_string(), so I created rfc2047_decode2() for that | |
152 | purpose. The original function became a stub that just supplies NULL for the | |
153 | new argument (sizeptr). | |
154 | ||
155 | An RFC 2047-encoded string may contain one or more "words", each of the | |
156 | form =?...?.?...?= with the first ... specifying the character code, the | |
157 | second being Q (for quoted printable) or B for Base64 encoding. The third ... | |
158 | is the actual data. | |
159 | ||
160 | This function first decodes each "word" into bytes from the Q or B encoding. | |
161 | Then, if provided with the name of a charset encoding, and if iconv() is | |
162 | available, it attempts to translate the result to the named character set. | |
163 | If this fails, the binary string is returned with an error message. | |
164 | ||
165 | If a binary zero is encountered in the decoded string, it is replaced by the | |
166 | contents of the zeroval argument. For use with Exim headers, the value must not | |
167 | be 0 because they are handled as zero-terminated strings. When zeroval==0, | |
168 | lenptr should not be NULL. | |
169 | ||
170 | Arguments: | |
171 | string the subject string | |
172 | lencheck TRUE to enforce maximum MIME word length | |
173 | target the name of the target encoding for MIME words, or NULL for | |
174 | no charset translation | |
175 | zeroval the value to use for binary zero bytes | |
176 | lenptr if not NULL, the length of the result is returned via | |
177 | this variable | |
178 | sizeptr if not NULL, the length of a new store block in which the | |
179 | result is built is placed here; if no new store is obtained, | |
180 | the value is not changed | |
181 | error for error messages; NULL if no problem; this can be set | |
182 | when the yield is non-NULL if there was a charset | |
183 | translation problem | |
184 | ||
185 | Returns: the decoded, converted string, or NULL on error; if there are | |
186 | no MIME words in the string, the original string is returned | |
187 | */ | |
188 | ||
189 | uschar * | |
190 | rfc2047_decode2(uschar *string, BOOL lencheck, uschar *target, int zeroval, | |
191 | int *lenptr, int *sizeptr, uschar **error) | |
192 | { | |
193 | int ptr = 0; | |
194 | int size = Ustrlen(string); | |
195 | size_t dlen; | |
196 | uschar *dptr, *yield; | |
197 | uschar *mimeword, *q1, *q2, *endword; | |
198 | ||
199 | *error = NULL; | |
200 | mimeword = decode_mimeword(string, lencheck, &q1, &q2, &endword, &dlen, &dptr); | |
201 | ||
202 | if (mimeword == NULL) | |
203 | { | |
204 | if (lenptr != NULL) *lenptr = size; | |
205 | return string; | |
206 | } | |
207 | ||
208 | /* Scan through the string, decoding MIME words and copying intermediate text, | |
209 | building the result as we go. The result may be longer than the input if it is | |
210 | translated into a multibyte code such as UTF-8. That's why we use the dynamic | |
211 | string building code. */ | |
212 | ||
213 | yield = store_get(++size); | |
214 | ||
215 | while (mimeword != NULL) | |
216 | { | |
217 | ||
218 | #if HAVE_ICONV | |
219 | iconv_t icd = (iconv_t)(-1); | |
220 | #endif | |
221 | ||
222 | if (mimeword != string) | |
223 | yield = string_cat(yield, &size, &ptr, string, mimeword - string); | |
224 | ||
225 | /* Do a charset translation if required. This is supported only on hosts | |
226 | that have the iconv() function. Translation errors set error, but carry on, | |
227 | using the untranslated data. If there is more than one error, the message | |
228 | passed back refers to the final one. We use a loop to cater for the case | |
229 | of long strings - the RFC puts limits on the length, but it's best to be | |
230 | robust. */ | |
231 | ||
232 | #if HAVE_ICONV | |
233 | *q1 = 0; | |
234 | if (target != NULL && strcmpic(target, mimeword+2) != 0) | |
235 | { | |
236 | icd = iconv_open(CS target, CS(mimeword+2)); | |
237 | ||
238 | if (icd == (iconv_t)(-1)) | |
239 | { | |
240 | *error = string_sprintf("iconv_open(\"%s\", \"%s\") failed: %s%s", | |
241 | target, mimeword+2, strerror(errno), | |
242 | (errno == EINVAL)? " (maybe unsupported conversion)" : ""); | |
243 | } | |
244 | } | |
245 | *q1 = '?'; | |
246 | #endif | |
247 | ||
248 | while (dlen > 0) | |
249 | { | |
250 | uschar *tptr = NULL; /* Stops compiler warning */ | |
251 | int tlen = -1; | |
252 | ||
253 | #if HAVE_ICONV | |
254 | uschar tbuffer[256]; | |
255 | uschar *outptr = tbuffer; | |
256 | size_t outleft = sizeof(tbuffer); | |
257 | ||
258 | /* If translation is required, go for it. */ | |
259 | ||
260 | if (icd != (iconv_t)(-1)) | |
261 | { | |
262 | (void)iconv(icd, (ICONV_ARG2_TYPE)(&dptr), &dlen, CSS &outptr, &outleft); | |
263 | ||
264 | /* If outptr has been adjusted, there is some output. Set up to add it to | |
265 | the output buffer. The function will have adjusted dptr and dlen. If | |
266 | iconv() stopped because of an error, we'll pick it up next time when | |
267 | there's no output. | |
268 | ||
269 | If there is no output, we expect there to have been a translation | |
270 | error, because we know there was at least one input byte. We leave the | |
271 | value of tlen as -1, which causes the rest of the input to be copied | |
272 | verbatim. */ | |
273 | ||
274 | if (outptr > tbuffer) | |
275 | { | |
276 | tptr = tbuffer; | |
277 | tlen = outptr - tbuffer; | |
278 | } | |
279 | else | |
280 | { | |
281 | DEBUG(D_any) debug_printf("iconv error translating \"%.*s\" to %s: " | |
282 | "%s\n", endword + 2 - mimeword, mimeword, target, strerror(errno)); | |
283 | } | |
284 | } | |
285 | ||
286 | #endif | |
287 | ||
288 | /* No charset translation is happening or there was a translation error; | |
289 | just set up the original as the string to be added, and mark it all used. | |
290 | */ | |
291 | ||
292 | if (tlen == -1) | |
293 | { | |
294 | tptr = dptr; | |
295 | tlen = dlen; | |
296 | dlen = 0; | |
297 | } | |
298 | ||
299 | /* Deal with zero values; convert them if requested. */ | |
300 | ||
301 | if (zeroval != 0) | |
302 | { | |
303 | int i; | |
304 | for (i = 0; i < tlen; i++) | |
305 | if (tptr[i] == 0) tptr[i] = zeroval; | |
306 | } | |
307 | ||
308 | /* Add the new string onto the result */ | |
309 | ||
310 | yield = string_cat(yield, &size, &ptr, tptr, tlen); | |
311 | } | |
312 | ||
313 | #if HAVE_ICONV | |
314 | if (icd != (iconv_t)(-1)) iconv_close(icd); | |
315 | #endif | |
316 | ||
317 | /* Update string past the MIME word; skip any white space if the next thing | |
318 | is another MIME word. */ | |
319 | ||
320 | string = endword + 2; | |
321 | mimeword = decode_mimeword(string, lencheck, &q1, &q2, &endword, &dlen, &dptr); | |
322 | if (mimeword != NULL) | |
323 | { | |
324 | uschar *s = string; | |
325 | while (isspace(*s)) s++; | |
326 | if (s == mimeword) string = s; | |
327 | } | |
328 | } | |
329 | ||
330 | /* Copy the remaining characters of the string, zero-terminate it, and return | |
331 | the length as well if requested. */ | |
332 | ||
333 | yield = string_cat(yield, &size, &ptr, string, Ustrlen(string)); | |
334 | yield[ptr] = 0; | |
335 | if (lenptr != NULL) *lenptr = ptr; | |
336 | if (sizeptr != NULL) *sizeptr = size; | |
337 | return yield; | |
338 | } | |
339 | ||
340 | ||
341 | /* This is the stub that provides the original interface without the sizeptr | |
342 | argument. */ | |
343 | ||
344 | uschar * | |
345 | rfc2047_decode(uschar *string, BOOL lencheck, uschar *target, int zeroval, | |
346 | int *lenptr, uschar **error) | |
347 | { | |
348 | return rfc2047_decode2(string, lencheck, target, zeroval, lenptr, NULL, error); | |
349 | } | |
350 | ||
351 | /* End of rfc2047.c */ |