From 4e08fd50ebe820edb008a96b892a2749bbe8e72b Mon Sep 17 00:00:00 2001 From: Jeremy Harris Date: Fri, 3 Apr 2015 19:13:27 +0100 Subject: [PATCH] A-label expansion operators --- doc/doc-txt/experimental-spec.txt | 8 ++- src/src/expand.c | 88 ++++++++++++++++++++++--- src/src/utf8.c | 19 +++++- test/confs/4200 | 12 ++++ test/scripts/4200-International/4200 | 96 ++++++++++++++++++++++++++++ test/stdout/4200 | 69 ++++++++++++++++++++ 6 files changed, 281 insertions(+), 11 deletions(-) create mode 100644 test/confs/4200 create mode 100644 test/scripts/4200-International/4200 create mode 100644 test/stdout/4200 diff --git a/doc/doc-txt/experimental-spec.txt b/doc/doc-txt/experimental-spec.txt index 0eeb939bf..738f02cce 100644 --- a/doc/doc-txt/experimental-spec.txt +++ b/doc/doc-txt/experimental-spec.txt @@ -1276,7 +1276,13 @@ SMTPUTF8 Internationalised mail name handling. RFCs 6530, 6533, 5890 -Compile with libidn. +Compile with EXPERIMENTAL_INTERNATIONAL and libidn. + +Expansion operators: + ${utf8_domain_to_alabel:str} + ${utf8_domain_from_alabel:str} + ${utf8_localpart_to_alabel:str} + ${utf8_localpart_from_alabel:str} -------------------------------------------------------------- diff --git a/src/src/expand.c b/src/src/expand.c index b613ef2b4..ad97f6fef 100644 --- a/src/src/expand.c +++ b/src/src/expand.c @@ -168,7 +168,14 @@ static uschar *op_table_underscore[] = { US"quote_local_part", US"reverse_ip", US"time_eval", - US"time_interval"}; + US"time_interval" +#ifdef EXPERIMENTAL_INTERNATIONAL + ,US"utf8_domain_from_alabel", + US"utf8_domain_to_alabel", + US"utf8_localpart_from_alabel", + US"utf8_localpart_to_alabel" +#endif + }; enum { EOP_FROM_UTF8, @@ -176,7 +183,14 @@ enum { EOP_QUOTE_LOCAL_PART, EOP_REVERSE_IP, EOP_TIME_EVAL, - EOP_TIME_INTERVAL }; + EOP_TIME_INTERVAL +#ifdef EXPERIMENTAL_INTERNATIONAL + ,EOP_UTF8_DOMAIN_FROM_ALABEL, + EOP_UTF8_DOMAIN_TO_ALABEL, + EOP_UTF8_LOCALPART_FROM_ALABEL, + EOP_UTF8_LOCALPART_TO_ALABEL +#endif + }; static uschar *op_table_main[] = { US"address", @@ -6555,16 +6569,13 @@ while (*s != 0) if (bytes_left) { if ((c & 0xc0) != 0x80) - { /* wrong continuation byte; invalidate all bytes */ complete = 1; /* error */ - } else { codepoint = (codepoint << 6) | (c & 0x3f); seq_buff[index++] = c; if (--bytes_left == 0) /* codepoint complete */ - { if(codepoint > 0x10FFFF) /* is it too large? */ complete = -1; /* error (RFC3629 limit) */ else @@ -6572,7 +6583,6 @@ while (*s != 0) yield = string_cat(yield, &size, &ptr, seq_buff, seq_len); index = 0; } - } } } else /* no bytes left: new sequence */ @@ -6615,13 +6625,75 @@ while (*s != 0) yield = string_cat(yield, &size, &ptr, UTF8_REPLACEMENT_CHAR, 1); } if ((complete == 1) && ((c & 0x80) == 0)) - { /* ASCII character follows incomplete sequence */ + /* ASCII character follows incomplete sequence */ yield = string_cat(yield, &size, &ptr, &c, 1); - } } continue; } +#ifdef EXPERIMENTAL_INTERNATIONAL + case EOP_UTF8_DOMAIN_TO_ALABEL: + { + uschar * error = NULL; + uschar * s = string_domain_utf8_to_alabel(sub, &error); + if (error) + { + expand_string_message = string_sprintf( + "error converting utf8 (%s) to alabel: %s", + string_printing(sub), error); + goto EXPAND_FAILED; + } + yield = string_cat(yield, &size, &ptr, s, Ustrlen(s)); + continue; + } + + case EOP_UTF8_DOMAIN_FROM_ALABEL: + { + uschar * error = NULL; + uschar * s = string_domain_alabel_to_utf8(sub, &error); + if (error) + { + expand_string_message = string_sprintf( + "error converting alabel (%s) to utf8: %s", + string_printing(sub), error); + goto EXPAND_FAILED; + } + yield = string_cat(yield, &size, &ptr, s, Ustrlen(s)); + continue; + } + + case EOP_UTF8_LOCALPART_TO_ALABEL: + { + uschar * error = NULL; + uschar * s = string_localpart_utf8_to_alabel(sub, &error); + if (error) + { + expand_string_message = string_sprintf( + "error converting utf8 (%s) to alabel: %s", + string_printing(sub), error); + goto EXPAND_FAILED; + } + yield = string_cat(yield, &size, &ptr, s, Ustrlen(s)); + DEBUG(D_expand) debug_printf("yield: '%s'\n", yield); + continue; + } + + case EOP_UTF8_LOCALPART_FROM_ALABEL: + { + uschar * error = NULL; + uschar * s = string_localpart_alabel_to_utf8(sub, &error); + if (error) + { + expand_string_message = string_sprintf( + "error converting alabel (%s) to utf8: %s", + string_printing(sub), error); + goto EXPAND_FAILED; + } + yield = string_cat(yield, &size, &ptr, s, Ustrlen(s)); + continue; + } +#endif /* EXPERIMENTAL_INTERNATIONAL */ + /* escape turns all non-printing characters into escape sequences. */ case EOP_ESCAPE: diff --git a/src/src/utf8.c b/src/src/utf8.c index 2f8173dc1..9a2b8656e 100644 --- a/src/src/utf8.c +++ b/src/src/utf8.c @@ -78,14 +78,22 @@ size_t p_len = ucs4_len*4; /* this multiplier is pure guesswork */ uschar * res = store_get(p_len+5); int rc; +DEBUG(D_expand) debug_printf("l_u2a: ulen %d plen %d\n", ucs4_len, p_len); +DEBUG(D_expand) for (rc = 0; rc < ucs4_len; rc++) debug_printf("%08x ", p[rc]); + res[0] = 'x'; res[1] = 'n'; res[2] = res[3] = '-'; if ((rc = punycode_encode(ucs4_len, p, NULL, &p_len, res+4)) != PUNYCODE_SUCCESS) { + DEBUG(D_expand) debug_printf("l_u2a: bad '%s'\n", punycode_strerror(rc)); free(p); if (err) *err = US punycode_strerror(rc); return NULL; } +DEBUG(D_expand) debug_printf("l_u2a: plen %d\n", p_len); +p_len += 4; +DEBUG(D_expand) for (rc = 0; rc < p_len; rc++) debug_printf("%02x ", res[rc]); +DEBUG(D_expand) debug_printf("\n"); free(p); res[p_len] = '\0'; return res; @@ -97,6 +105,8 @@ string_localpart_alabel_to_utf8(const uschar * alabel, uschar ** err) { size_t p_len = strlen(alabel); punycode_uint * p; +uschar * s; +uschar * res; int rc; if (alabel[0] != 'x' || alabel[1] != 'n' || alabel[2] != '-' || alabel[3] != '-') @@ -105,6 +115,7 @@ if (alabel[0] != 'x' || alabel[1] != 'n' || alabel[2] != '-' || alabel[3] != '-' return NULL; } p_len -= 4; +DEBUG(D_expand) debug_printf("l_a2u: plen %d\n", p_len); p = (punycode_uint *) store_get((p_len+1) * sizeof(*p)); @@ -113,8 +124,12 @@ if ((rc = punycode_decode(p_len, CCS alabel+4, &p_len, p, NULL)) != PUNYCODE_SUC if (err) *err = US punycode_strerror(rc); return NULL; } -p[p_len] = 0; -return US p; +DEBUG(D_expand) debug_printf("l_a2u: dlen %d\n", p_len); + +s = stringprep_ucs4_to_utf8(p, p_len, NULL, &p_len); +res = string_copyn(s, p_len); +free(s); +return res; } diff --git a/test/confs/4200 b/test/confs/4200 new file mode 100644 index 000000000..bce8e551f --- /dev/null +++ b/test/confs/4200 @@ -0,0 +1,12 @@ +# Exim test configuration 4200 + +exim_path = EXIM_PATH +spool_directory = DIR/spool +log_file_path = DIR/spool/log/%slog +gecos_name = CALLER_NAME + +# ----- Main settings ----- + +# ----- ACL ----- + +# End diff --git a/test/scripts/4200-International/4200 b/test/scripts/4200-International/4200 new file mode 100644 index 000000000..48918b702 --- /dev/null +++ b/test/scripts/4200-International/4200 @@ -0,0 +1,96 @@ +# Internationalised mail: expansions +# +# Sample strings taken from RFC3942 + +exim -be + +utf-8 localpart to a-label: + +${utf8_localpart_to_alabel:\xD9\x84} +xn--ghb + +${utf8_localpart_to_alabel:\xD9\x84\xD9\x8A\xD9\x87\xD9\x85\xD8\xA7\xD8\xA8\xD8\xAA\xD9\x83\xD9\x84\ +\xD9\x85\xD9\x88\xD8\xB4\xD8\xB9\xD8\xB1\xD8\xA8\xD9\x8A\xD8\x9F} +xn--egbpdaj6bu4bxfgehfvwxn + +a-label localpart to utf-8: + +${utf8_localpart_from_alabel:xn--ghb} +${utf8_localpart_from_alabel:xn--egbpdaj6bu4bxfgehfvwxn} + +utf-8 domain to a-label: + +${utf8_domain_to_alabel:bogus.\xD9\x84.com} +bogus.xn--ghb.com + +${utf8_domain_to_alabel:arabic.\xD9\x84\xD9\x8A\xD9\x87\xD9\x85\xD8\xA7\xD8\xA8\xD8\xAA\xD9\x83\xD9\x84\ +\xD9\x85\xD9\x88\xD8\xB4\xD8\xB9\xD8\xB1\xD8\xA8\xD9\x8A\xD8\x9F.com} +arabic.xn--egbpdaj6bu4bxfgehfvwxn.com + +${utf8_domain_to_alabel:simpl.chinese.\xE4\xBB\x96\xE4\xBB\xAC\xE4\xB8\xBA\xE4\xBB\x80\ +\xE4\xB9\x88\xE4\xB8\x8D\xE8\xAF\xB4\xE4\xB8\xAD\xE6\x96\x87.com} +simpl.chinese.xn--ihqwcrb4cv8a8dqg056pqjye.com + +${utf8_domain_to_alabel:trad.chinese.\xE4\xBB\x96\xE5\x80\x91\xE7\x88\xB2\xE4\xBB\x80\ +\xE9\xBA\xBD\xE4\xB8\x8D\xE8\xAA\xAA\xE4\xB8\xAD\xE6\x96\x87.com} +trad.chinese.xn--ihqwctvzc91f659drss3x8bo0yb.com + +${utf8_domain_to_alabel:czech.\x50\x72\x6F\xC4\x8D\x70\x72\x6F\x73\x74\xC4\x9B\x6E\x65\ +\x6D\x6C\x75\x76\xC3\xAD\xC4\x8D\x65\x73\x6B\x79.com} +czech.xn--Proprostnemluvesky-uyb24dma41a.com + +${utf8_domain_to_alabel:hebrew.\xD7\x9C\xD7\x9E\xD7\x94\xD7\x94\xD7\x9D\xD7\xA4\xD7\xA9\ +\xD7\x95\xD7\x98\xD7\x9C\xD7\x90\xD7\x9E\xD7\x93\xD7\x91\xD7\xA8\xD7\x99\xD7\x9D\xD7\xA2\ +\xD7\x91\xD7\xA8\xD7\x99\xD7\xAA.com} +hebrew.xn--4dbcagdahymbxekheh6e0a7fei0b.com + +${utf8_domain_to_alabel:hindi.\xE0\xA4\xAF\xE0\xA4\xB9\xE0\xA4\xB2\xE0\xA5\x8B\xE0\xA4\x97\ +\xE0\xA4\xB9\xE0\xA4\xBF\xE0\xA4\xA8\xE0\xA5\x8D\xE0\xA4\xA6\xE0\xA5\x80\xE0\xA4\x95\xE0\xA5\x8D\ +\xE0\xA4\xAF\xE0\xA5\x8B\xE0\xA4\x82\xE0\xA4\xA8\xE0\xA4\xB9\xE0\xA5\x80\xE0\xA4\x82\xE0\xA4\xAC\ +\xE0\xA5\x8B\xE0\xA4\xB2\xE0\xA4\xB8\xE0\xA4\x95\xE0\xA4\xA4\xE0\xA5\x87\xE0\xA4\xB9\xE0\xA5\x88\ +\xE0\xA4\x82.com} +hindi.xn--i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd.com + +${utf8_domain_to_alabel:japanese.\xE3\x81\xAA\xE3\x81\x9C\xE3\x81\xBF\xE3\x82\x93\xE3\x81\xAA\ +\xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E\xE3\x82\x92\xE8\xA9\xB1\xE3\x81\x97\xE3\x81\xA6\xE3\x81\x8F\ +\xE3\x82\x8C\xE3\x81\xAA\xE3\x81\x84\xE3\x81\xAE\xE3\x81\x8B.com} +japanese.xn--n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa.com + +# the a-label for the phrase in korean is too long for a domain label (63 byte limit) +korean: ${utf8_localpart_to_alabel:\xEC\x84\xB8\xEA\xB3\x84\xEC\x9D\x98\xEB\xAA\xA8\xEB\x93\xA0\ +\xEC\x82\xAC\xEB\x9E\x8C\xEB\x93\xA4\xEC\x9D\xB4\xED\x95\x9C\xEA\xB5\xAD\xEC\x96\xB4\xEB\xA5\xBC\ +\xEC\x9D\xB4\xED\x95\xB4\xED\x95\x9C\xEB\x8B\xA4\xEB\xA9\xB4\xEC\x96\xBC\xEB\xA7\x88\xEB\x82\x98\ +\xEC\xA2\x8B\xEC\x9D\x84\xEA\xB9\x8C} +korean: xn--989aomsvi5e83db1d2a355cv1e0vak1dwrv93d5xbh15a0dt30a5jpsd879ccm6fea98c + +${utf8_domain_to_alabel:russian.\xD0\xBF\xD0\xBE\xD1\x87\xD0\xB5\xD0\xBC\xD1\x83\xD0\xB6\ +\xD0\xB5\xD0\xBE\xD0\xBD\xD0\xB8\xD0\xBD\xD0\xB5\xD0\xB3\xD0\xBE\xD0\xB2\xD0\xBE\xD1\x80\ +\xD1\x8F\xD1\x82\xD0\xBF\xD0\xBE\xD1\x80\xD1\x83\xD1\x81\xD1\x81\xD0\xBA\xD0\xB8.com} +russian.xn--b1abfaaepdrnnbgefbaDotcwatmq2g4l.com + +${utf8_domain_to_alabel:spanish.\x50\x6F\x72\x71\x75\xC3\xA9\x6E\x6F\x70\x75\x65\x64\x65\ +\x6E\x73\x69\x6D\x70\x6C\x65\x6D\x65\x6E\x74\x65\x68\x61\x62\x6C\x61\x72\x65\x6E\x45\x73\ +\x70\x61\xC3\xB1\x6F\x6C.com} +spanish.xn--PorqunopuedensimplementehablarenEspaol-fmd56a.com + +${utf8_domain_to_alabel:vietnamese.\x54\xE1\xBA\xA1\x69\x73\x61\x6F\x68\xE1\xBB\x8D\x6B\x68\ +\xC3\xB4\x6E\x67\x74\x68\xE1\xBB\x83\x63\x68\xE1\xBB\x89\x6E\xC3\xB3\x69\x74\x69\xE1\xBA\xBF\ +\x6E\x67\x56\x69\xE1\xBB\x87\x74.com} +vietnamese.xn--TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g.com + +a-label domain to utf-8: + +${utf8_domain_from_alabel:arab.xn--ghb.com} +${utf8_domain_from_alabel:arab.xn--egbpdaj6bu4bxfgehfvwxn.com} +${utf8_domain_from_alabel:simpl.chinese.xn--ihqwcrb4cv8a8dqg056pqjye.com} +${utf8_domain_from_alabel:trad.chinese.xn--ihqwctvzc91f659drss3x8bo0yb.com} +${utf8_domain_from_alabel:czech.xn--Proprostnemluvesky-uyb24dma41a.com} +${utf8_domain_from_alabel:hebrew.xn--4dbcagdahymbxekheh6e0a7fei0b.com} +${utf8_domain_from_alabel:hindi.xn--i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd.com} +${utf8_domain_from_alabel:japanese.xn--n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa.com} +korean: ${utf8_localpart_from_alabel:xn--989aomsvi5e83db1d2a355cv1e0vak1dwrv93d5xbh15a0dt30a5jpsd879ccm6fea98c} +${utf8_domain_from_alabel:russian.xn--b1abfaaepdrnnbgefbaDotcwatmq2g4l.com} +${utf8_domain_from_alabel:spanish.xn--PorqunopuedensimplementehablarenEspaol-fmd56a.com} +${utf8_domain_from_alabel:vietnamese.xn--TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g.com} + +**** diff --git a/test/stdout/4200 b/test/stdout/4200 new file mode 100644 index 000000000..1cfb7a9db --- /dev/null +++ b/test/stdout/4200 @@ -0,0 +1,69 @@ +> +> utf-8 localpart to a-label: +> +> xn--ghb +> xn--ghb +> +> xn--egbpdaj6bu4bxfgehfvwxn +> xn--egbpdaj6bu4bxfgehfvwxn +> +> a-label localpart to utf-8: +> +> ل +> ليهمابتكلموشعربي؟ +> +> utf-8 domain to a-label: +> +> bogus.xn--ghb.com +> bogus.xn--ghb.com +> +> arabic.xn--egbpdaj6bu4bxfgehfvwxn.com +> arabic.xn--egbpdaj6bu4bxfgehfvwxn.com +> +> simpl.chinese.xn--ihqwcrb4cv8a8dqg056pqjye.com +> simpl.chinese.xn--ihqwcrb4cv8a8dqg056pqjye.com +> +> trad.chinese.xn--ihqwctvzc91f659drss3x8bo0yb.com +> trad.chinese.xn--ihqwctvzc91f659drss3x8bo0yb.com +> +> czech.xn--proprostnemluvesky-uyb24dma41a.com +> czech.xn--Proprostnemluvesky-uyb24dma41a.com +> +> hebrew.xn--4dbcagdahymbxekheh6e0a7fei0b.com +> hebrew.xn--4dbcagdahymbxekheh6e0a7fei0b.com +> +> hindi.xn--i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd.com +> hindi.xn--i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd.com +> +> japanese.xn--n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa.com +> japanese.xn--n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa.com +> +> # the a-label for the phrase in korean is too long for a domain label (63 byte limit) +> korean: xn--989aomsvi5e83db1d2a355cv1e0vak1dwrv93d5xbh15a0dt30a5jpsd879ccm6fea98c +> korean: xn--989aomsvi5e83db1d2a355cv1e0vak1dwrv93d5xbh15a0dt30a5jpsd879ccm6fea98c +> +> russian.xn--b1abfaaepdrnnbgefbadotcwatmq2g4l.com +> russian.xn--b1abfaaepdrnnbgefbaDotcwatmq2g4l.com +> +> spanish.xn--porqunopuedensimplementehablarenespaol-fmd56a.com +> spanish.xn--PorqunopuedensimplementehablarenEspaol-fmd56a.com +> +> vietnamese.xn--tisaohkhngthchnitingvit-kjcr8268qyxafd2f1b9g.com +> vietnamese.xn--TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g.com +> +> a-label domain to utf-8: +> +> arab.ل.com +> arab.ليهمابتكلموشعربي؟.com +> simpl.chinese.他们为什么不说中文.com +> trad.chinese.他們爲什麽不說中文.com +> czech.Pročprostěnemluvíčesky.com +> hebrew.למההםפשוטלאמדבריםעברית.com +> hindi.यहलोगहिन्दीक्योंनहींबोलसकतेहैं.com +> japanese.なぜみんな日本語を話してくれないのか.com +> korean: 세계의모든사람들이한국어를이해한다면얼마나좋을까 +> russian.почемужеонинеговорятпорусски.com +> spanish.PorquénopuedensimplementehablarenEspañol.com +> vietnamese.TạisaohọkhôngthểchỉnóitiếngViệt.com +> +> -- 2.25.1