Expansion item ${listquote }. Bug 1066
[exim.git] / src / src / utf8.c
CommitLineData
0d7911ea
JH
1/*************************************************
2* Exim - an Internet mail transport agent *
3*************************************************/
4
f9ba5e22 5/* Copyright (c) Jeremy Harris 2015 - 2018 */
0d7911ea
JH
6/* See the file NOTICE for conditions of use and distribution. */
7
8
9#include "exim.h"
10
8c5d388a 11#ifdef SUPPORT_I18N
0d7911ea 12
9427e879
JH
13#ifdef SUPPORT_I18N_2008
14# include <idn2.h>
15#else
16# include <idna.h>
17#endif
18
0d7911ea
JH
19#include <punycode.h>
20#include <stringprep.h>
21
9427e879
JH
22static uschar *
23string_localpart_alabel_to_utf8_(const uschar * alabel, uschar ** err);
24
25/**************************************************/
26
0d7911ea
JH
27BOOL
28string_is_utf8(const uschar * s)
29{
30uschar c;
1435d4b2 31if (s) while ((c = *s++)) if (c & 0x80) return TRUE;
0d7911ea
JH
32return FALSE;
33}
34
9427e879
JH
35static BOOL
36string_is_alabel(const uschar * s)
37{
38return s[0] == 'x' && s[1] == 'n' && s[2] == '-' && s[3] == '-';
39}
40
0d7911ea 41/**************************************************/
9427e879
JH
42/* Domain conversions.
43The *err string pointer should be null before the call
44
45Return NULL for error, with optional errstr pointer filled in
46*/
0d7911ea
JH
47
48uschar *
49string_domain_utf8_to_alabel(const uschar * utf8, uschar ** err)
50{
9427e879 51uschar * s1, * s;
0d7911ea
JH
52int rc;
53
9427e879 54#ifdef SUPPORT_I18N_2008
496e0df6
JH
55/* Avoid lowercasing plain-ascii domains */
56if (!string_is_utf8(utf8))
57 return string_copy(utf8);
58
9427e879
JH
59/* Only lowercase is accepted by the library call. A pity since we lose
60any mixed-case annotation. This does not really matter for a domain. */
61 {
62 uschar c;
63 for (s1 = s = US utf8; (c = *s1); s1++) if (!(c & 0x80) && isupper(c))
64 {
65 s = string_copy(utf8);
66 for (s1 = s + (s1 - utf8); (c = *s1); s1++) if (!(c & 0x80) && isupper(c))
67 *s1 = tolower(c);
68 break;
69 }
70 }
7845dbb3 71if ((rc = idn2_lookup_u8((const uint8_t *) s, &s1, IDN2_NFC_INPUT)) != IDN2_OK)
9427e879
JH
72 {
73 if (err) *err = US idn2_strerror(rc);
74 return NULL;
75 }
76#else
0d7911ea 77s = US stringprep_utf8_nfkc_normalize(CCS utf8, -1);
37bf366e 78if ( (rc = idna_to_ascii_8z(CCS s, CSS &s1, IDNA_ALLOW_UNASSIGNED))
0d7911ea
JH
79 != IDNA_SUCCESS)
80 {
81 free(s);
82 if (err) *err = US idna_strerror(rc);
83 return NULL;
84 }
85free(s);
9427e879 86#endif
0d7911ea
JH
87s = string_copy(s1);
88free(s1);
89return s;
90}
91
92
93
94uschar *
95string_domain_alabel_to_utf8(const uschar * alabel, uschar ** err)
96{
9427e879
JH
97#ifdef SUPPORT_I18N_2008
98const uschar * label;
99int sep = '.';
acec9514 100gstring * g = NULL;
9427e879
JH
101
102while (label = string_nextinlist(&alabel, &sep, NULL, 0))
103 if ( string_is_alabel(label)
104 && !(label = string_localpart_alabel_to_utf8_(label, err))
105 )
106 return NULL;
107 else
acec9514
JH
108 g = string_append_listele(g, '.', label);
109return string_from_gstring(g);
9427e879
JH
110
111#else
112
113uschar * s1, * s;
0d7911ea 114int rc;
810d16ad 115
0d7911ea
JH
116if ( (rc = idna_to_unicode_8z8z(CCS alabel, CSS &s1, IDNA_USE_STD3_ASCII_RULES))
117 != IDNA_SUCCESS)
118 {
119 if (err) *err = US idna_strerror(rc);
120 return NULL;
121 }
122s = string_copy(s1);
123free(s1);
124return s;
9427e879 125#endif
0d7911ea
JH
126}
127
128/**************************************************/
129/* localpart conversions */
3c8b3577 130/* the *err string pointer should be null before the call */
0d7911ea
JH
131
132
133uschar *
134string_localpart_utf8_to_alabel(const uschar * utf8, uschar ** err)
135{
136size_t ucs4_len;
921dfc11
JH
137punycode_uint * p;
138size_t p_len;
139uschar * res;
0d7911ea
JH
140int rc;
141
921dfc11
JH
142if (!string_is_utf8(utf8)) return string_copy(utf8);
143
144p = (punycode_uint *) stringprep_utf8_to_ucs4(CCS utf8, -1, &ucs4_len);
145p_len = ucs4_len*4; /* this multiplier is pure guesswork */
f3ebb786 146res = store_get(p_len+5, is_tainted(utf8));
921dfc11 147
0d7911ea
JH
148res[0] = 'x'; res[1] = 'n'; res[2] = res[3] = '-';
149
fc362fc5 150if ((rc = punycode_encode(ucs4_len, p, NULL, &p_len, CS res+4)) != PUNYCODE_SUCCESS)
0d7911ea 151 {
4e08fd50 152 DEBUG(D_expand) debug_printf("l_u2a: bad '%s'\n", punycode_strerror(rc));
0d7911ea
JH
153 free(p);
154 if (err) *err = US punycode_strerror(rc);
155 return NULL;
156 }
4e08fd50 157p_len += 4;
0d7911ea
JH
158free(p);
159res[p_len] = '\0';
160return res;
161}
162
163
9427e879
JH
164static uschar *
165string_localpart_alabel_to_utf8_(const uschar * alabel, uschar ** err)
0d7911ea 166{
9427e879 167size_t p_len;
0d7911ea
JH
168punycode_uint * p;
169int rc;
9427e879 170uschar * s, * res;
0d7911ea 171
9427e879
JH
172DEBUG(D_expand) debug_printf("l_a2u: '%s'\n", alabel);
173alabel += 4;
174p_len = Ustrlen(alabel);
f3ebb786 175p = store_get((p_len+1) * sizeof(*p), is_tainted(alabel));
0d7911ea 176
9427e879 177if ((rc = punycode_decode(p_len, CCS alabel, &p_len, p, NULL)) != PUNYCODE_SUCCESS)
0d7911ea
JH
178 {
179 if (err) *err = US punycode_strerror(rc);
180 return NULL;
181 }
4e08fd50 182
aa7751be 183s = US stringprep_ucs4_to_utf8(p, p_len, NULL, &p_len);
4e08fd50
JH
184res = string_copyn(s, p_len);
185free(s);
186return res;
0d7911ea
JH
187}
188
189
9427e879
JH
190uschar *
191string_localpart_alabel_to_utf8(const uschar * alabel, uschar ** err)
192{
193if (string_is_alabel(alabel))
194 return string_localpart_alabel_to_utf8_(alabel, err);
195
196if (err) *err = US"bad alabel prefix";
197return NULL;
198}
199
200
3c8b3577 201/**************************************************/
9427e879
JH
202/* Whole address conversion.
203The *err string pointer should be null before the call.
204
4c04137d 205Return NULL on error, with (optional) errstring pointer filled in
9427e879 206*/
3c8b3577
JH
207
208uschar *
209string_address_utf8_to_alabel(const uschar * utf8, uschar ** err)
210{
d7978c0f 211uschar * l, * d;
3c8b3577 212
921dfc11
JH
213if (!*utf8) return string_copy(utf8);
214
215DEBUG(D_expand) debug_printf("addr from utf8 <%s>", utf8);
216
d7978c0f 217for (const uschar * s = utf8; *s; s++)
3c8b3577
JH
218 if (*s == '@')
219 {
220 l = string_copyn(utf8, s - utf8);
9427e879
JH
221 if ( !(l = string_localpart_utf8_to_alabel(l, err))
222 || !(d = string_domain_utf8_to_alabel(++s, err))
921dfc11
JH
223 )
224 return NULL;
225 l = string_sprintf("%s@%s", l, d);
226 DEBUG(D_expand) debug_printf(" -> <%s>\n", l);
227 return l;
3c8b3577 228 }
921dfc11
JH
229
230l = string_localpart_utf8_to_alabel(utf8, err);
231DEBUG(D_expand) debug_printf(" -> <%s>\n", l);
232return l;
3c8b3577
JH
233}
234
235
236
b04be5e7
JH
237/*************************************************
238* Report the library versions. *
239*************************************************/
240
241/* See a description in tls-openssl.c for an explanation of why this exists.
242
243Arguments: a FILE* to print the results to
244Returns: nothing
245*/
246
247void
248utf8_version_report(FILE *f)
249{
9427e879
JH
250#ifdef SUPPORT_I18N_2008
251fprintf(f, "Library version: IDN2: Compile: %s\n"
252 " Runtime: %s\n",
253 IDN2_VERSION,
254 idn2_check_version(NULL));
255fprintf(f, "Library version: Stringprep: Compile: %s\n"
256 " Runtime: %s\n",
257 STRINGPREP_VERSION,
258 stringprep_check_version(NULL));
259#else
b04be5e7
JH
260fprintf(f, "Library version: IDN: Compile: %s\n"
261 " Runtime: %s\n",
262 STRINGPREP_VERSION,
263 stringprep_check_version(NULL));
9427e879 264#endif
b04be5e7
JH
265}
266
0d7911ea
JH
267#endif /* whole file */
268
269/* vi: aw ai sw=2
270*/
271/* End of utf8.c */