DKIM: rename internal signing api
[exim.git] / src / src / utf8.c
... / ...
CommitLineData
1/*************************************************
2* Exim - an Internet mail transport agent *
3*************************************************/
4
5/* Copyright (c) Jeremy Harris 2015, 2016 */
6/* See the file NOTICE for conditions of use and distribution. */
7
8
9#include "exim.h"
10
11#ifdef SUPPORT_I18N
12
13#ifdef SUPPORT_I18N_2008
14# include <idn2.h>
15#else
16# include <idna.h>
17#endif
18
19#include <punycode.h>
20#include <stringprep.h>
21
22static uschar *
23string_localpart_alabel_to_utf8_(const uschar * alabel, uschar ** err);
24
25/**************************************************/
26
27BOOL
28string_is_utf8(const uschar * s)
29{
30uschar c;
31if (s) while ((c = *s++)) if (c & 0x80) return TRUE;
32return FALSE;
33}
34
35static BOOL
36string_is_alabel(const uschar * s)
37{
38return s[0] == 'x' && s[1] == 'n' && s[2] == '-' && s[3] == '-';
39}
40
41/**************************************************/
42/* Domain conversions.
43The *err string pointer should be null before the call
44
45Return NULL for error, with optional errstr pointer filled in
46*/
47
48uschar *
49string_domain_utf8_to_alabel(const uschar * utf8, uschar ** err)
50{
51uschar * s1, * s;
52int rc;
53
54#ifdef SUPPORT_I18N_2008
55/* Avoid lowercasing plain-ascii domains */
56if (!string_is_utf8(utf8))
57 return string_copy(utf8);
58
59/* Only lowercase is accepted by the library call. A pity since we lose
60any mixed-case annotation. This does not really matter for a domain. */
61 {
62 uschar c;
63 for (s1 = s = US utf8; (c = *s1); s1++) if (!(c & 0x80) && isupper(c))
64 {
65 s = string_copy(utf8);
66 for (s1 = s + (s1 - utf8); (c = *s1); s1++) if (!(c & 0x80) && isupper(c))
67 *s1 = tolower(c);
68 break;
69 }
70 }
71if ((rc = idn2_lookup_u8(CCS s, &s1, IDN2_NFC_INPUT)) != IDN2_OK)
72 {
73 if (err) *err = US idn2_strerror(rc);
74 return NULL;
75 }
76#else
77s = US stringprep_utf8_nfkc_normalize(CCS utf8, -1);
78if ( (rc = idna_to_ascii_8z(CCS s, CSS &s1, IDNA_ALLOW_UNASSIGNED))
79 != IDNA_SUCCESS)
80 {
81 free(s);
82 if (err) *err = US idna_strerror(rc);
83 return NULL;
84 }
85free(s);
86#endif
87s = string_copy(s1);
88free(s1);
89return s;
90}
91
92
93
94uschar *
95string_domain_alabel_to_utf8(const uschar * alabel, uschar ** err)
96{
97#ifdef SUPPORT_I18N_2008
98const uschar * label;
99int sep = '.';
100uschar * s = NULL;
101int size = 0, len = 0;
102
103while (label = string_nextinlist(&alabel, &sep, NULL, 0))
104 if ( string_is_alabel(label)
105 && !(label = string_localpart_alabel_to_utf8_(label, err))
106 )
107 return NULL;
108 else
109 s = string_append_listele(s, &size, &len, '.', label);
110return s;
111
112#else
113
114uschar * s1, * s;
115int rc;
116
117if ( (rc = idna_to_unicode_8z8z(CCS alabel, CSS &s1, IDNA_USE_STD3_ASCII_RULES))
118 != IDNA_SUCCESS)
119 {
120 if (err) *err = US idna_strerror(rc);
121 return NULL;
122 }
123s = string_copy(s1);
124free(s1);
125return s;
126#endif
127}
128
129/**************************************************/
130/* localpart conversions */
131/* the *err string pointer should be null before the call */
132
133
134uschar *
135string_localpart_utf8_to_alabel(const uschar * utf8, uschar ** err)
136{
137size_t ucs4_len;
138punycode_uint * p;
139size_t p_len;
140uschar * res;
141int rc;
142
143if (!string_is_utf8(utf8)) return string_copy(utf8);
144
145p = (punycode_uint *) stringprep_utf8_to_ucs4(CCS utf8, -1, &ucs4_len);
146p_len = ucs4_len*4; /* this multiplier is pure guesswork */
147res = store_get(p_len+5);
148
149res[0] = 'x'; res[1] = 'n'; res[2] = res[3] = '-';
150
151if ((rc = punycode_encode(ucs4_len, p, NULL, &p_len, CS res+4)) != PUNYCODE_SUCCESS)
152 {
153 DEBUG(D_expand) debug_printf("l_u2a: bad '%s'\n", punycode_strerror(rc));
154 free(p);
155 if (err) *err = US punycode_strerror(rc);
156 return NULL;
157 }
158p_len += 4;
159free(p);
160res[p_len] = '\0';
161return res;
162}
163
164
165static uschar *
166string_localpart_alabel_to_utf8_(const uschar * alabel, uschar ** err)
167{
168size_t p_len;
169punycode_uint * p;
170int rc;
171uschar * s, * res;
172
173DEBUG(D_expand) debug_printf("l_a2u: '%s'\n", alabel);
174alabel += 4;
175p_len = Ustrlen(alabel);
176p = (punycode_uint *) store_get((p_len+1) * sizeof(*p));
177
178if ((rc = punycode_decode(p_len, CCS alabel, &p_len, p, NULL)) != PUNYCODE_SUCCESS)
179 {
180 if (err) *err = US punycode_strerror(rc);
181 return NULL;
182 }
183
184s = US stringprep_ucs4_to_utf8(p, p_len, NULL, &p_len);
185res = string_copyn(s, p_len);
186free(s);
187return res;
188}
189
190
191uschar *
192string_localpart_alabel_to_utf8(const uschar * alabel, uschar ** err)
193{
194if (string_is_alabel(alabel))
195 return string_localpart_alabel_to_utf8_(alabel, err);
196
197if (err) *err = US"bad alabel prefix";
198return NULL;
199}
200
201
202/**************************************************/
203/* Whole address conversion.
204The *err string pointer should be null before the call.
205
206Return NULL on error, with (optional) errstring pointer filled in
207*/
208
209uschar *
210string_address_utf8_to_alabel(const uschar * utf8, uschar ** err)
211{
212const uschar * s;
213uschar * l;
214uschar * d;
215
216if (!*utf8) return string_copy(utf8);
217
218DEBUG(D_expand) debug_printf("addr from utf8 <%s>", utf8);
219
220for (s = utf8; *s; s++)
221 if (*s == '@')
222 {
223 l = string_copyn(utf8, s - utf8);
224 if ( !(l = string_localpart_utf8_to_alabel(l, err))
225 || !(d = string_domain_utf8_to_alabel(++s, err))
226 )
227 return NULL;
228 l = string_sprintf("%s@%s", l, d);
229 DEBUG(D_expand) debug_printf(" -> <%s>\n", l);
230 return l;
231 }
232
233l = string_localpart_utf8_to_alabel(utf8, err);
234DEBUG(D_expand) debug_printf(" -> <%s>\n", l);
235return l;
236}
237
238
239
240/*************************************************
241* Report the library versions. *
242*************************************************/
243
244/* See a description in tls-openssl.c for an explanation of why this exists.
245
246Arguments: a FILE* to print the results to
247Returns: nothing
248*/
249
250void
251utf8_version_report(FILE *f)
252{
253#ifdef SUPPORT_I18N_2008
254fprintf(f, "Library version: IDN2: Compile: %s\n"
255 " Runtime: %s\n",
256 IDN2_VERSION,
257 idn2_check_version(NULL));
258fprintf(f, "Library version: Stringprep: Compile: %s\n"
259 " Runtime: %s\n",
260 STRINGPREP_VERSION,
261 stringprep_check_version(NULL));
262#else
263fprintf(f, "Library version: IDN: Compile: %s\n"
264 " Runtime: %s\n",
265 STRINGPREP_VERSION,
266 stringprep_check_version(NULL));
267#endif
268}
269
270#endif /* whole file */
271
272/* vi: aw ai sw=2
273*/
274/* End of utf8.c */