Expansions: add ${sha3:<string>} item
[exim.git] / src / src / utf8.c
CommitLineData
0d7911ea
JH
1/*************************************************
2* Exim - an Internet mail transport agent *
3*************************************************/
4
5/* Copyright (c) Jeremy Harris 2015 */
6/* See the file NOTICE for conditions of use and distribution. */
7
8
9#include "exim.h"
10
8c5d388a 11#ifdef SUPPORT_I18N
0d7911ea
JH
12
13#include <idna.h>
14#include <punycode.h>
15#include <stringprep.h>
16
17BOOL
18string_is_utf8(const uschar * s)
19{
20uschar c;
21while ((c = *s++)) if (c & 0x80) return TRUE;
22return FALSE;
23}
24
25/**************************************************/
26/* Domain conversions */
3c8b3577 27/* the *err string pointer should be null before the call */
0d7911ea
JH
28
29uschar *
30string_domain_utf8_to_alabel(const uschar * utf8, uschar ** err)
31{
32uschar * s1;
33uschar * s;
34int rc;
35
36s = US stringprep_utf8_nfkc_normalize(CCS utf8, -1);
37bf366e 37if ( (rc = idna_to_ascii_8z(CCS s, CSS &s1, IDNA_ALLOW_UNASSIGNED))
0d7911ea
JH
38 != IDNA_SUCCESS)
39 {
40 free(s);
41 if (err) *err = US idna_strerror(rc);
42 return NULL;
43 }
44free(s);
45s = string_copy(s1);
46free(s1);
47return s;
48}
49
50
51
52uschar *
53string_domain_alabel_to_utf8(const uschar * alabel, uschar ** err)
54{
55uschar * s1;
56uschar * s;
57int rc;
810d16ad 58
0d7911ea
JH
59if ( (rc = idna_to_unicode_8z8z(CCS alabel, CSS &s1, IDNA_USE_STD3_ASCII_RULES))
60 != IDNA_SUCCESS)
61 {
62 if (err) *err = US idna_strerror(rc);
63 return NULL;
64 }
65s = string_copy(s1);
66free(s1);
67return s;
68}
69
70/**************************************************/
71/* localpart conversions */
3c8b3577 72/* the *err string pointer should be null before the call */
0d7911ea
JH
73
74
75uschar *
76string_localpart_utf8_to_alabel(const uschar * utf8, uschar ** err)
77{
78size_t ucs4_len;
921dfc11
JH
79punycode_uint * p;
80size_t p_len;
81uschar * res;
0d7911ea
JH
82int rc;
83
921dfc11
JH
84if (!string_is_utf8(utf8)) return string_copy(utf8);
85
86p = (punycode_uint *) stringprep_utf8_to_ucs4(CCS utf8, -1, &ucs4_len);
87p_len = ucs4_len*4; /* this multiplier is pure guesswork */
88res = store_get(p_len+5);
89
0d7911ea
JH
90res[0] = 'x'; res[1] = 'n'; res[2] = res[3] = '-';
91
fc362fc5 92if ((rc = punycode_encode(ucs4_len, p, NULL, &p_len, CS res+4)) != PUNYCODE_SUCCESS)
0d7911ea 93 {
4e08fd50 94 DEBUG(D_expand) debug_printf("l_u2a: bad '%s'\n", punycode_strerror(rc));
0d7911ea
JH
95 free(p);
96 if (err) *err = US punycode_strerror(rc);
97 return NULL;
98 }
4e08fd50 99p_len += 4;
0d7911ea
JH
100free(p);
101res[p_len] = '\0';
102return res;
103}
104
105
106uschar *
107string_localpart_alabel_to_utf8(const uschar * alabel, uschar ** err)
108{
fc362fc5 109size_t p_len = Ustrlen(alabel);
0d7911ea 110punycode_uint * p;
4e08fd50
JH
111uschar * s;
112uschar * res;
0d7911ea
JH
113int rc;
114
115if (alabel[0] != 'x' || alabel[1] != 'n' || alabel[2] != '-' || alabel[3] != '-')
116 {
117 if (err) *err = US"bad alabel prefix";
118 return NULL;
119 }
0d7911ea 120
9d4319df 121p_len -= 4;
0d7911ea
JH
122p = (punycode_uint *) store_get((p_len+1) * sizeof(*p));
123
124if ((rc = punycode_decode(p_len, CCS alabel+4, &p_len, p, NULL)) != PUNYCODE_SUCCESS)
125 {
126 if (err) *err = US punycode_strerror(rc);
127 return NULL;
128 }
4e08fd50 129
aa7751be 130s = US stringprep_ucs4_to_utf8(p, p_len, NULL, &p_len);
4e08fd50
JH
131res = string_copyn(s, p_len);
132free(s);
133return res;
0d7911ea
JH
134}
135
136
3c8b3577
JH
137/**************************************************/
138/* whole address conversion */
139/* the *err string pointer should be null before the call */
140
141uschar *
142string_address_utf8_to_alabel(const uschar * utf8, uschar ** err)
143{
144const uschar * s;
145uschar * l;
146uschar * d;
147
921dfc11
JH
148if (!*utf8) return string_copy(utf8);
149
150DEBUG(D_expand) debug_printf("addr from utf8 <%s>", utf8);
151
3c8b3577
JH
152for (s = utf8; *s; s++)
153 if (*s == '@')
154 {
155 l = string_copyn(utf8, s - utf8);
921dfc11
JH
156 if ( (l = string_localpart_utf8_to_alabel(l, err), err && *err)
157 || (d = string_domain_utf8_to_alabel(++s, err), err && *err)
158 )
159 return NULL;
160 l = string_sprintf("%s@%s", l, d);
161 DEBUG(D_expand) debug_printf(" -> <%s>\n", l);
162 return l;
3c8b3577 163 }
921dfc11
JH
164
165l = string_localpart_utf8_to_alabel(utf8, err);
166DEBUG(D_expand) debug_printf(" -> <%s>\n", l);
167return l;
3c8b3577
JH
168}
169
170
171
b04be5e7
JH
172/*************************************************
173* Report the library versions. *
174*************************************************/
175
176/* See a description in tls-openssl.c for an explanation of why this exists.
177
178Arguments: a FILE* to print the results to
179Returns: nothing
180*/
181
182void
183utf8_version_report(FILE *f)
184{
185fprintf(f, "Library version: IDN: Compile: %s\n"
186 " Runtime: %s\n",
187 STRINGPREP_VERSION,
188 stringprep_check_version(NULL));
189}
190
0d7911ea
JH
191#endif /* whole file */
192
193/* vi: aw ai sw=2
194*/
195/* End of utf8.c */