Internationalised mail name handling.
RFCs 6530, 6533, 5890
-Compile with libidn.
+Compile with EXPERIMENTAL_INTERNATIONAL and libidn.
+
+Expansion operators:
+ ${utf8_domain_to_alabel:str}
+ ${utf8_domain_from_alabel:str}
+ ${utf8_localpart_to_alabel:str}
+ ${utf8_localpart_from_alabel:str}
--------------------------------------------------------------
US"quote_local_part",
US"reverse_ip",
US"time_eval",
- US"time_interval"};
+ US"time_interval"
+#ifdef EXPERIMENTAL_INTERNATIONAL
+ ,US"utf8_domain_from_alabel",
+ US"utf8_domain_to_alabel",
+ US"utf8_localpart_from_alabel",
+ US"utf8_localpart_to_alabel"
+#endif
+ };
enum {
EOP_FROM_UTF8,
EOP_QUOTE_LOCAL_PART,
EOP_REVERSE_IP,
EOP_TIME_EVAL,
- EOP_TIME_INTERVAL };
+ EOP_TIME_INTERVAL
+#ifdef EXPERIMENTAL_INTERNATIONAL
+ ,EOP_UTF8_DOMAIN_FROM_ALABEL,
+ EOP_UTF8_DOMAIN_TO_ALABEL,
+ EOP_UTF8_LOCALPART_FROM_ALABEL,
+ EOP_UTF8_LOCALPART_TO_ALABEL
+#endif
+ };
static uschar *op_table_main[] = {
US"address",
if (bytes_left)
{
if ((c & 0xc0) != 0x80)
- {
/* wrong continuation byte; invalidate all bytes */
complete = 1; /* error */
- }
else
{
codepoint = (codepoint << 6) | (c & 0x3f);
seq_buff[index++] = c;
if (--bytes_left == 0) /* codepoint complete */
- {
if(codepoint > 0x10FFFF) /* is it too large? */
complete = -1; /* error (RFC3629 limit) */
else
yield = string_cat(yield, &size, &ptr, seq_buff, seq_len);
index = 0;
}
- }
}
}
else /* no bytes left: new sequence */
yield = string_cat(yield, &size, &ptr, UTF8_REPLACEMENT_CHAR, 1);
}
if ((complete == 1) && ((c & 0x80) == 0))
- { /* ASCII character follows incomplete sequence */
+ /* ASCII character follows incomplete sequence */
yield = string_cat(yield, &size, &ptr, &c, 1);
- }
}
continue;
}
+#ifdef EXPERIMENTAL_INTERNATIONAL
+ case EOP_UTF8_DOMAIN_TO_ALABEL:
+ {
+ uschar * error = NULL;
+ uschar * s = string_domain_utf8_to_alabel(sub, &error);
+ if (error)
+ {
+ expand_string_message = string_sprintf(
+ "error converting utf8 (%s) to alabel: %s",
+ string_printing(sub), error);
+ goto EXPAND_FAILED;
+ }
+ yield = string_cat(yield, &size, &ptr, s, Ustrlen(s));
+ continue;
+ }
+
+ case EOP_UTF8_DOMAIN_FROM_ALABEL:
+ {
+ uschar * error = NULL;
+ uschar * s = string_domain_alabel_to_utf8(sub, &error);
+ if (error)
+ {
+ expand_string_message = string_sprintf(
+ "error converting alabel (%s) to utf8: %s",
+ string_printing(sub), error);
+ goto EXPAND_FAILED;
+ }
+ yield = string_cat(yield, &size, &ptr, s, Ustrlen(s));
+ continue;
+ }
+
+ case EOP_UTF8_LOCALPART_TO_ALABEL:
+ {
+ uschar * error = NULL;
+ uschar * s = string_localpart_utf8_to_alabel(sub, &error);
+ if (error)
+ {
+ expand_string_message = string_sprintf(
+ "error converting utf8 (%s) to alabel: %s",
+ string_printing(sub), error);
+ goto EXPAND_FAILED;
+ }
+ yield = string_cat(yield, &size, &ptr, s, Ustrlen(s));
+ DEBUG(D_expand) debug_printf("yield: '%s'\n", yield);
+ continue;
+ }
+
+ case EOP_UTF8_LOCALPART_FROM_ALABEL:
+ {
+ uschar * error = NULL;
+ uschar * s = string_localpart_alabel_to_utf8(sub, &error);
+ if (error)
+ {
+ expand_string_message = string_sprintf(
+ "error converting alabel (%s) to utf8: %s",
+ string_printing(sub), error);
+ goto EXPAND_FAILED;
+ }
+ yield = string_cat(yield, &size, &ptr, s, Ustrlen(s));
+ continue;
+ }
+#endif /* EXPERIMENTAL_INTERNATIONAL */
+
/* escape turns all non-printing characters into escape sequences. */
case EOP_ESCAPE:
uschar * res = store_get(p_len+5);
int rc;
+DEBUG(D_expand) debug_printf("l_u2a: ulen %d plen %d\n", ucs4_len, p_len);
+DEBUG(D_expand) for (rc = 0; rc < ucs4_len; rc++) debug_printf("%08x ", p[rc]);
+
res[0] = 'x'; res[1] = 'n'; res[2] = res[3] = '-';
if ((rc = punycode_encode(ucs4_len, p, NULL, &p_len, res+4)) != PUNYCODE_SUCCESS)
{
+ DEBUG(D_expand) debug_printf("l_u2a: bad '%s'\n", punycode_strerror(rc));
free(p);
if (err) *err = US punycode_strerror(rc);
return NULL;
}
+DEBUG(D_expand) debug_printf("l_u2a: plen %d\n", p_len);
+p_len += 4;
+DEBUG(D_expand) for (rc = 0; rc < p_len; rc++) debug_printf("%02x ", res[rc]);
+DEBUG(D_expand) debug_printf("\n");
free(p);
res[p_len] = '\0';
return res;
{
size_t p_len = strlen(alabel);
punycode_uint * p;
+uschar * s;
+uschar * res;
int rc;
if (alabel[0] != 'x' || alabel[1] != 'n' || alabel[2] != '-' || alabel[3] != '-')
return NULL;
}
p_len -= 4;
+DEBUG(D_expand) debug_printf("l_a2u: plen %d\n", p_len);
p = (punycode_uint *) store_get((p_len+1) * sizeof(*p));
if (err) *err = US punycode_strerror(rc);
return NULL;
}
-p[p_len] = 0;
-return US p;
+DEBUG(D_expand) debug_printf("l_a2u: dlen %d\n", p_len);
+
+s = stringprep_ucs4_to_utf8(p, p_len, NULL, &p_len);
+res = string_copyn(s, p_len);
+free(s);
+return res;
}
--- /dev/null
+# Exim test configuration 4200
+
+exim_path = EXIM_PATH
+spool_directory = DIR/spool
+log_file_path = DIR/spool/log/%slog
+gecos_name = CALLER_NAME
+
+# ----- Main settings -----
+
+# ----- ACL -----
+
+# End
--- /dev/null
+# Internationalised mail: expansions
+#
+# Sample strings taken from RFC3942
+
+exim -be
+
+utf-8 localpart to a-label:
+
+${utf8_localpart_to_alabel:\xD9\x84}
+xn--ghb
+
+${utf8_localpart_to_alabel:\xD9\x84\xD9\x8A\xD9\x87\xD9\x85\xD8\xA7\xD8\xA8\xD8\xAA\xD9\x83\xD9\x84\
+\xD9\x85\xD9\x88\xD8\xB4\xD8\xB9\xD8\xB1\xD8\xA8\xD9\x8A\xD8\x9F}
+xn--egbpdaj6bu4bxfgehfvwxn
+
+a-label localpart to utf-8:
+
+${utf8_localpart_from_alabel:xn--ghb}
+${utf8_localpart_from_alabel:xn--egbpdaj6bu4bxfgehfvwxn}
+
+utf-8 domain to a-label:
+
+${utf8_domain_to_alabel:bogus.\xD9\x84.com}
+bogus.xn--ghb.com
+
+${utf8_domain_to_alabel:arabic.\xD9\x84\xD9\x8A\xD9\x87\xD9\x85\xD8\xA7\xD8\xA8\xD8\xAA\xD9\x83\xD9\x84\
+\xD9\x85\xD9\x88\xD8\xB4\xD8\xB9\xD8\xB1\xD8\xA8\xD9\x8A\xD8\x9F.com}
+arabic.xn--egbpdaj6bu4bxfgehfvwxn.com
+
+${utf8_domain_to_alabel:simpl.chinese.\xE4\xBB\x96\xE4\xBB\xAC\xE4\xB8\xBA\xE4\xBB\x80\
+\xE4\xB9\x88\xE4\xB8\x8D\xE8\xAF\xB4\xE4\xB8\xAD\xE6\x96\x87.com}
+simpl.chinese.xn--ihqwcrb4cv8a8dqg056pqjye.com
+
+${utf8_domain_to_alabel:trad.chinese.\xE4\xBB\x96\xE5\x80\x91\xE7\x88\xB2\xE4\xBB\x80\
+\xE9\xBA\xBD\xE4\xB8\x8D\xE8\xAA\xAA\xE4\xB8\xAD\xE6\x96\x87.com}
+trad.chinese.xn--ihqwctvzc91f659drss3x8bo0yb.com
+
+${utf8_domain_to_alabel:czech.\x50\x72\x6F\xC4\x8D\x70\x72\x6F\x73\x74\xC4\x9B\x6E\x65\
+\x6D\x6C\x75\x76\xC3\xAD\xC4\x8D\x65\x73\x6B\x79.com}
+czech.xn--Proprostnemluvesky-uyb24dma41a.com
+
+${utf8_domain_to_alabel:hebrew.\xD7\x9C\xD7\x9E\xD7\x94\xD7\x94\xD7\x9D\xD7\xA4\xD7\xA9\
+\xD7\x95\xD7\x98\xD7\x9C\xD7\x90\xD7\x9E\xD7\x93\xD7\x91\xD7\xA8\xD7\x99\xD7\x9D\xD7\xA2\
+\xD7\x91\xD7\xA8\xD7\x99\xD7\xAA.com}
+hebrew.xn--4dbcagdahymbxekheh6e0a7fei0b.com
+
+${utf8_domain_to_alabel:hindi.\xE0\xA4\xAF\xE0\xA4\xB9\xE0\xA4\xB2\xE0\xA5\x8B\xE0\xA4\x97\
+\xE0\xA4\xB9\xE0\xA4\xBF\xE0\xA4\xA8\xE0\xA5\x8D\xE0\xA4\xA6\xE0\xA5\x80\xE0\xA4\x95\xE0\xA5\x8D\
+\xE0\xA4\xAF\xE0\xA5\x8B\xE0\xA4\x82\xE0\xA4\xA8\xE0\xA4\xB9\xE0\xA5\x80\xE0\xA4\x82\xE0\xA4\xAC\
+\xE0\xA5\x8B\xE0\xA4\xB2\xE0\xA4\xB8\xE0\xA4\x95\xE0\xA4\xA4\xE0\xA5\x87\xE0\xA4\xB9\xE0\xA5\x88\
+\xE0\xA4\x82.com}
+hindi.xn--i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd.com
+
+${utf8_domain_to_alabel:japanese.\xE3\x81\xAA\xE3\x81\x9C\xE3\x81\xBF\xE3\x82\x93\xE3\x81\xAA\
+\xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E\xE3\x82\x92\xE8\xA9\xB1\xE3\x81\x97\xE3\x81\xA6\xE3\x81\x8F\
+\xE3\x82\x8C\xE3\x81\xAA\xE3\x81\x84\xE3\x81\xAE\xE3\x81\x8B.com}
+japanese.xn--n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa.com
+
+# the a-label for the phrase in korean is too long for a domain label (63 byte limit)
+korean: ${utf8_localpart_to_alabel:\xEC\x84\xB8\xEA\xB3\x84\xEC\x9D\x98\xEB\xAA\xA8\xEB\x93\xA0\
+\xEC\x82\xAC\xEB\x9E\x8C\xEB\x93\xA4\xEC\x9D\xB4\xED\x95\x9C\xEA\xB5\xAD\xEC\x96\xB4\xEB\xA5\xBC\
+\xEC\x9D\xB4\xED\x95\xB4\xED\x95\x9C\xEB\x8B\xA4\xEB\xA9\xB4\xEC\x96\xBC\xEB\xA7\x88\xEB\x82\x98\
+\xEC\xA2\x8B\xEC\x9D\x84\xEA\xB9\x8C}
+korean: xn--989aomsvi5e83db1d2a355cv1e0vak1dwrv93d5xbh15a0dt30a5jpsd879ccm6fea98c
+
+${utf8_domain_to_alabel:russian.\xD0\xBF\xD0\xBE\xD1\x87\xD0\xB5\xD0\xBC\xD1\x83\xD0\xB6\
+\xD0\xB5\xD0\xBE\xD0\xBD\xD0\xB8\xD0\xBD\xD0\xB5\xD0\xB3\xD0\xBE\xD0\xB2\xD0\xBE\xD1\x80\
+\xD1\x8F\xD1\x82\xD0\xBF\xD0\xBE\xD1\x80\xD1\x83\xD1\x81\xD1\x81\xD0\xBA\xD0\xB8.com}
+russian.xn--b1abfaaepdrnnbgefbaDotcwatmq2g4l.com
+
+${utf8_domain_to_alabel:spanish.\x50\x6F\x72\x71\x75\xC3\xA9\x6E\x6F\x70\x75\x65\x64\x65\
+\x6E\x73\x69\x6D\x70\x6C\x65\x6D\x65\x6E\x74\x65\x68\x61\x62\x6C\x61\x72\x65\x6E\x45\x73\
+\x70\x61\xC3\xB1\x6F\x6C.com}
+spanish.xn--PorqunopuedensimplementehablarenEspaol-fmd56a.com
+
+${utf8_domain_to_alabel:vietnamese.\x54\xE1\xBA\xA1\x69\x73\x61\x6F\x68\xE1\xBB\x8D\x6B\x68\
+\xC3\xB4\x6E\x67\x74\x68\xE1\xBB\x83\x63\x68\xE1\xBB\x89\x6E\xC3\xB3\x69\x74\x69\xE1\xBA\xBF\
+\x6E\x67\x56\x69\xE1\xBB\x87\x74.com}
+vietnamese.xn--TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g.com
+
+a-label domain to utf-8:
+
+${utf8_domain_from_alabel:arab.xn--ghb.com}
+${utf8_domain_from_alabel:arab.xn--egbpdaj6bu4bxfgehfvwxn.com}
+${utf8_domain_from_alabel:simpl.chinese.xn--ihqwcrb4cv8a8dqg056pqjye.com}
+${utf8_domain_from_alabel:trad.chinese.xn--ihqwctvzc91f659drss3x8bo0yb.com}
+${utf8_domain_from_alabel:czech.xn--Proprostnemluvesky-uyb24dma41a.com}
+${utf8_domain_from_alabel:hebrew.xn--4dbcagdahymbxekheh6e0a7fei0b.com}
+${utf8_domain_from_alabel:hindi.xn--i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd.com}
+${utf8_domain_from_alabel:japanese.xn--n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa.com}
+korean: ${utf8_localpart_from_alabel:xn--989aomsvi5e83db1d2a355cv1e0vak1dwrv93d5xbh15a0dt30a5jpsd879ccm6fea98c}
+${utf8_domain_from_alabel:russian.xn--b1abfaaepdrnnbgefbaDotcwatmq2g4l.com}
+${utf8_domain_from_alabel:spanish.xn--PorqunopuedensimplementehablarenEspaol-fmd56a.com}
+${utf8_domain_from_alabel:vietnamese.xn--TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g.com}
+
+****
--- /dev/null
+>
+> utf-8 localpart to a-label:
+>
+> xn--ghb
+> xn--ghb
+>
+> xn--egbpdaj6bu4bxfgehfvwxn
+> xn--egbpdaj6bu4bxfgehfvwxn
+>
+> a-label localpart to utf-8:
+>
+> ل
+> ليهمابتكلموشعربي؟
+>
+> utf-8 domain to a-label:
+>
+> bogus.xn--ghb.com
+> bogus.xn--ghb.com
+>
+> arabic.xn--egbpdaj6bu4bxfgehfvwxn.com
+> arabic.xn--egbpdaj6bu4bxfgehfvwxn.com
+>
+> simpl.chinese.xn--ihqwcrb4cv8a8dqg056pqjye.com
+> simpl.chinese.xn--ihqwcrb4cv8a8dqg056pqjye.com
+>
+> trad.chinese.xn--ihqwctvzc91f659drss3x8bo0yb.com
+> trad.chinese.xn--ihqwctvzc91f659drss3x8bo0yb.com
+>
+> czech.xn--proprostnemluvesky-uyb24dma41a.com
+> czech.xn--Proprostnemluvesky-uyb24dma41a.com
+>
+> hebrew.xn--4dbcagdahymbxekheh6e0a7fei0b.com
+> hebrew.xn--4dbcagdahymbxekheh6e0a7fei0b.com
+>
+> hindi.xn--i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd.com
+> hindi.xn--i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd.com
+>
+> japanese.xn--n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa.com
+> japanese.xn--n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa.com
+>
+> # the a-label for the phrase in korean is too long for a domain label (63 byte limit)
+> korean: xn--989aomsvi5e83db1d2a355cv1e0vak1dwrv93d5xbh15a0dt30a5jpsd879ccm6fea98c
+> korean: xn--989aomsvi5e83db1d2a355cv1e0vak1dwrv93d5xbh15a0dt30a5jpsd879ccm6fea98c
+>
+> russian.xn--b1abfaaepdrnnbgefbadotcwatmq2g4l.com
+> russian.xn--b1abfaaepdrnnbgefbaDotcwatmq2g4l.com
+>
+> spanish.xn--porqunopuedensimplementehablarenespaol-fmd56a.com
+> spanish.xn--PorqunopuedensimplementehablarenEspaol-fmd56a.com
+>
+> vietnamese.xn--tisaohkhngthchnitingvit-kjcr8268qyxafd2f1b9g.com
+> vietnamese.xn--TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g.com
+>
+> a-label domain to utf-8:
+>
+> arab.ل.com
+> arab.ليهمابتكلموشعربي؟.com
+> simpl.chinese.他们为什么不说中文.com
+> trad.chinese.他們爲什麽不說中文.com
+> czech.Pročprostěnemluvíčesky.com
+> hebrew.למההםפשוטלאמדבריםעברית.com
+> hindi.यहलोगहिन्दीक्योंनहींबोलसकतेहैं.com
+> japanese.なぜみんな日本語を話してくれないのか.com
+> korean: 세계의모든사람들이한국어를이해한다면얼마나좋을까
+> russian.почемужеонинеговорятпорусски.com
+> spanish.PorquénopuedensimplementehablarenEspañol.com
+> vietnamese.TạisaohọkhôngthểchỉnóitiếngViệt.com
+>
+>