UTF8: MSA downconversions
[exim.git] / src / src / utf8.c
1 /*************************************************
2 * Exim - an Internet mail transport agent *
3 *************************************************/
4
5 /* Copyright (c) Jeremy Harris 2015 */
6 /* See the file NOTICE for conditions of use and distribution. */
7
8
9 #include "exim.h"
10
11 #ifdef EXPERIMENTAL_INTERNATIONAL
12
13 #include <idna.h>
14 #include <punycode.h>
15 #include <stringprep.h>
16
17 BOOL
18 string_is_utf8(const uschar * s)
19 {
20 uschar c;
21 while ((c = *s++)) if (c & 0x80) return TRUE;
22 return FALSE;
23 }
24
25 /**************************************************/
26 /* Domain conversions */
27 /* the *err string pointer should be null before the call */
28
29 uschar *
30 string_domain_utf8_to_alabel(const uschar * utf8, uschar ** err)
31 {
32 uschar * s1;
33 uschar * s;
34 int rc;
35
36 s = US stringprep_utf8_nfkc_normalize(CCS utf8, -1);
37 if ( (rc = idna_to_ascii_8z(CCS s, CSS &s1, IDNA_ALLOW_UNASSIGNED))
38 != IDNA_SUCCESS)
39 {
40 free(s);
41 if (err) *err = US idna_strerror(rc);
42 return NULL;
43 }
44 free(s);
45 s = string_copy(s1);
46 free(s1);
47 return s;
48 }
49
50
51
52 uschar *
53 string_domain_alabel_to_utf8(const uschar * alabel, uschar ** err)
54 {
55 uschar * s1;
56 uschar * s;
57 int rc;
58
59 if ( (rc = idna_to_unicode_8z8z(CCS alabel, CSS &s1, IDNA_USE_STD3_ASCII_RULES))
60 != IDNA_SUCCESS)
61 {
62 if (err) *err = US idna_strerror(rc);
63 return NULL;
64 }
65 s = string_copy(s1);
66 free(s1);
67 return s;
68 }
69
70 /**************************************************/
71 /* localpart conversions */
72 /* the *err string pointer should be null before the call */
73
74
75 uschar *
76 string_localpart_utf8_to_alabel(const uschar * utf8, uschar ** err)
77 {
78 size_t ucs4_len;
79 punycode_uint * p = (punycode_uint *) stringprep_utf8_to_ucs4(CCS utf8, -1, &ucs4_len);
80 size_t p_len = ucs4_len*4; /* this multiplier is pure guesswork */
81 uschar * res = store_get(p_len+5);
82 int rc;
83
84 res[0] = 'x'; res[1] = 'n'; res[2] = res[3] = '-';
85
86 if ((rc = punycode_encode(ucs4_len, p, NULL, &p_len, res+4)) != PUNYCODE_SUCCESS)
87 {
88 DEBUG(D_expand) debug_printf("l_u2a: bad '%s'\n", punycode_strerror(rc));
89 free(p);
90 if (err) *err = US punycode_strerror(rc);
91 return NULL;
92 }
93 p_len += 4;
94 free(p);
95 res[p_len] = '\0';
96 return res;
97 }
98
99
100 uschar *
101 string_localpart_alabel_to_utf8(const uschar * alabel, uschar ** err)
102 {
103 size_t p_len = strlen(alabel);
104 punycode_uint * p;
105 uschar * s;
106 uschar * res;
107 int rc;
108
109 if (alabel[0] != 'x' || alabel[1] != 'n' || alabel[2] != '-' || alabel[3] != '-')
110 {
111 if (err) *err = US"bad alabel prefix";
112 return NULL;
113 }
114
115 p_len -= 4;
116 p = (punycode_uint *) store_get((p_len+1) * sizeof(*p));
117
118 if ((rc = punycode_decode(p_len, CCS alabel+4, &p_len, p, NULL)) != PUNYCODE_SUCCESS)
119 {
120 if (err) *err = US punycode_strerror(rc);
121 return NULL;
122 }
123
124 s = stringprep_ucs4_to_utf8(p, p_len, NULL, &p_len);
125 res = string_copyn(s, p_len);
126 free(s);
127 return res;
128 }
129
130
131 /**************************************************/
132 /* whole address conversion */
133 /* the *err string pointer should be null before the call */
134
135 uschar *
136 string_address_utf8_to_alabel(const uschar * utf8, uschar ** err)
137 {
138 const uschar * s;
139 uschar * l;
140 uschar * d;
141
142 for (s = utf8; *s; s++)
143 if (*s == '@')
144 {
145 l = string_copyn(utf8, s - utf8);
146 return (l = string_localpart_utf8_to_alabel(l, err), err && *err)
147 || (d = string_domain_utf8_to_alabel(++s, err), err && *err)
148 ? NULL
149 : string_sprintf("%s@%s", l, d);
150 }
151 return string_localpart_utf8_to_alabel(utf8, err);
152 }
153
154
155
156 /*************************************************
157 * Report the library versions. *
158 *************************************************/
159
160 /* See a description in tls-openssl.c for an explanation of why this exists.
161
162 Arguments: a FILE* to print the results to
163 Returns: nothing
164 */
165
166 void
167 utf8_version_report(FILE *f)
168 {
169 fprintf(f, "Library version: IDN: Compile: %s\n"
170 " Runtime: %s\n",
171 STRINGPREP_VERSION,
172 stringprep_check_version(NULL));
173 }
174
175 #endif /* whole file */
176
177 /* vi: aw ai sw=2
178 */
179 /* End of utf8.c */