.cindex "expansion" "case forcing"
.cindex "&%uc%& expansion item"
This forces the letters in the string into upper-case.
+
+.vitem &*${utf8clean*&<&'utf-8 string'&>&*}*&
+.cindex "correction of invalid utf-8 sequences in strings"
+.cindex "utf-8" "utf-8 sequences"
+.cindex "wrong utf-8"
+.cindex "expansion" "utf-8 forcing"
+.cindex "&%utf8clean%& expansion item"
+This replaces any invalid utf-8 sequence in the string by the character &`?`&.
.endlist
the ACL tests for either of these two results. Patch contributed by
user bes-internal on the mailing list.
+JH/04 Add ${utf8clean:} operator. Contributed by Alex Rau.
+
Exim version 4.82
-----------------
those non-ASCII characters, but downstream apps may not, so Exim can
detect and reject if those characters are present.
+ 3. New expansion operator ${utf8clean:string} to replace malformed UTF8
+ codepoints with valid ones.
+
Version 4.82
------------
US"str2b64",
US"strlen",
US"substr",
- US"uc" };
+ US"uc",
+ US"utf8clean" };
enum {
EOP_ADDRESS = sizeof(op_table_underscore)/sizeof(uschar *),
EOP_STR2B64,
EOP_STRLEN,
EOP_SUBSTR,
- EOP_UC };
+ EOP_UC,
+ EOP_UTF8CLEAN };
/* Table of condition names, and corresponding switch numbers. The names must
continue;
}
+ /* replace illegal UTF-8 sequences by replacement character */
+
+ #define UTF8_REPLACEMENT_CHAR US"?"
+
+ case EOP_UTF8CLEAN:
+ {
+ int seq_len, index = 0;
+ int bytes_left = 0;
+ uschar seq_buff[4]; /* accumulate utf-8 here */
+
+ while (*sub != 0)
+ {
+ int complete;
+ long codepoint;
+ uschar c;
+
+ complete = 0;
+ c = *sub++;
+ if(bytes_left)
+ {
+ if ((c & 0xc0) != 0x80)
+ {
+ /* wrong continuation byte; invalidate all bytes */
+ complete = 1; /* error */
+ }
+ else
+ {
+ codepoint = (codepoint << 6) | (c & 0x3f);
+ seq_buff[index++] = c;
+ if (--bytes_left == 0) /* codepoint complete */
+ {
+ if(codepoint > 0x10FFFF) /* is it too large? */
+ complete = -1; /* error */
+ else
+ { /* finished; output utf-8 sequence */
+ yield = string_cat(yield, &size, &ptr, seq_buff, seq_len);
+ index = 0;
+ }
+ }
+ }
+ }
+ else /* no bytes left: new sequence */
+ {
+ if((c & 0x80) == 0) /* 1-byte sequence, US-ASCII, keep it */
+ {
+ yield = string_cat(yield, &size, &ptr, &c, 1);
+ continue;
+ }
+ if((c & 0xe0) == 0xc0) /* 2-byte sequence */
+ {
+ bytes_left = 1;
+ codepoint = c & 0x1f;
+ }
+ else if((c & 0xf0) == 0xe0) /* 3-byte sequence */
+ {
+ bytes_left = 2;
+ codepoint = c & 0x0f;
+ }
+ else if((c & 0xf8) == 0xf0) /* 4-byte sequence */
+ {
+ bytes_left = 3;
+ codepoint = c & 0x07;
+ }
+ else /* invalid or too long (RFC3629 allows only 4 bytes) */
+ complete = -1;
+
+ seq_buff[index++] = c;
+ seq_len = bytes_left + 1;
+ } /* if(bytes_left) */
+
+ if (complete != 0)
+ {
+ bytes_left = index = 0;
+ yield = string_cat(yield, &size, &ptr, UTF8_REPLACEMENT_CHAR, 1);
+ }
+ if ((complete == 1) && ((c & 0x80) == 0))
+ { /* ASCII character follows incomplete sequence */
+ yield = string_cat(yield, &size, &ptr, &c, 1);
+ }
+ }
+ continue;
+ }
+
/* escape turns all non-printing characters into escape sequences. */
case EOP_ESCAPE:
#endif
+/*
+ vi: aw ai sw=2
+*/
/* End of expand.c */
--- /dev/null
+# Exim test configuration 0005
+
+exim_path = EXIM_PATH
+host_lookup_order = bydns
+rfc1413_query_timeout = 0s
+spool_directory = DIR/spool
+log_file_path = DIR/spool/log/%slog
+gecos_pattern = ""
+gecos_name = CALLER_NAME
+
+# ----- Main settings -----
+
+domainlist local_domains = @
+
+acl_smtp_rcpt = accept
+acl_smtp_data = check_data
+
+trusted_users = CALLER
+
+
+# ----- ACL -----
+
+begin acl
+
+check_data:
+ accept logwrite = \
+ x-test-header-good1: ${utf8clean:$h_x-test-header-good1:}
+ logwrite = \
+ x-test-header-good2: ${utf8clean:$h_x-test-header-good2:}
+ logwrite = \
+ x-test-header-too-short: ${utf8clean:$h_x-test-header-too-short:}
+ logwrite = \
+ x-test-header-too-long: ${utf8clean:$h_x-test-header-too-long:}
+ logwrite = \
+ x-test-header-too-big: ${utf8clean:$h_x-test-header-too-big:}
+
+
+
+# ----- Routers -----
+
+begin routers
+
+fail_remote_domains:
+ driver = redirect
+ domains = ! +local_domains
+ data = :fail: unrouteable mail domain "$domain"
+
+localuser:
+ driver = accept
+ check_local_user
+ transport = local_delivery
+ headers_add = X-local-user: uid=$local_user_uid gid=$local_user_gid
+
+
+# ----- Transports -----
+
+begin transports
+
+local_delivery:
+ driver = appendfile
+ delivery_date_add
+ envelope_to_add
+ file = DIR/test-mail/$local_part
+ headers_add = "X-body-linecount: $body_linecount\n\
+ X-message-linecount: $message_linecount\n\
+ X-received-count: $received_count"
+ return_path_add
+
+# End
--- /dev/null
+1999-03-02 09:44:33 10HmaX-0005vi-00 x-test-header-good1: 1234567890qwertzuiopasdfghjklyxcvbnm,.-QWERTZUIOP+*ASDFGHJKL#'YXCVBNM,.-;:_
+1999-03-02 09:44:33 10HmaX-0005vi-00 x-test-header-good2: \303\237\303\274\303\266\303\244\342\202\254\303\234\303\226\303\204\302\264\340\244\221\340\244\225\340\244\234\341\220\201\341\221\214\341\221\225\360\253\235\206\360\253\237\230
+1999-03-02 09:44:33 10HmaX-0005vi-00 x-test-header-too-short: ?.?.?.\303\244-?.-\303\234.?..?.-?.-?..-?.-?.-?.-?.-?..-?..?.
+1999-03-02 09:44:33 10HmaX-0005vi-00 x-test-header-too-long: ?????-\303\244-?????--\303\226-\303\204-\302\264-\340\244\221-\340\244\225-\340\244\234-\341\220\201-\341\221\214-\341\221\225-?????\360\253\237\206
+1999-03-02 09:44:33 10HmaX-0005vi-00 x-test-header-too-big: ?-----\363\200\200\200
+1999-03-02 09:44:33 10HmaX-0005vi-00 <= CALLER@the.local.host.name U=CALLER P=local-smtp S=sss
+1999-03-02 09:44:33 10HmaX-0005vi-00 => CALLER <CALLER@the.local.host.name> R=localuser T=local_delivery
+1999-03-02 09:44:33 10HmaX-0005vi-00 Completed
+1999-03-02 09:44:33 10HmaY-0005vi-00 x-test-header-good1:
+1999-03-02 09:44:33 10HmaY-0005vi-00 x-test-header-good2:
+1999-03-02 09:44:33 10HmaY-0005vi-00 x-test-header-too-short:
+1999-03-02 09:44:33 10HmaY-0005vi-00 x-test-header-too-long:
+1999-03-02 09:44:33 10HmaY-0005vi-00 x-test-header-too-big:
+1999-03-02 09:44:33 10HmaY-0005vi-00 <= CALLER@the.local.host.name U=CALLER P=local-smtp S=sss
+1999-03-02 09:44:33 10HmaY-0005vi-00 => CALLER <CALLER@the.local.host.name> R=localuser T=local_delivery
+1999-03-02 09:44:33 10HmaY-0005vi-00 Completed
+1999-03-02 09:44:33 Start queue run: pid=pppp
+1999-03-02 09:44:33 End queue run: pid=pppp
--- /dev/null
+From CALLER@the.local.host.name Tue Mar 02 09:44:33 1999
+Return-path: <CALLER@the.local.host.name>
+Envelope-to: CALLER@the.local.host.name
+Delivery-date: Tue, 2 Mar 1999 09:44:33 +0000
+Received: from CALLER by the.local.host.name with local-smtp (Exim x.yz)
+ (envelope-from <CALLER@the.local.host.name>)
+ id 10HmaX-0005vi-00
+ for CALLER@the.local.host.name; Tue, 2 Mar 1999 09:44:33 +0000
+x-test-header-good1: 1234567890qwertzuiopasdfghjklyxcvbnm,.-QWERTZUIOP+*ASDFGHJKL#'YXCVBNM,.-;:_
+x-test-header-good2: ßüöä€ÜÖÄ´ऑकजᐁᑌᑕ𫝆𫟘
+x-test-header-too-short: Ã.Ã.Ã.ä-â\82.-Ã\9c.Ã..Ã.-Â.-à..-à¤.-à¤.-á\90.-á\91.-á..-ð«\9d..ð«\9f.
+x-test-header-too-long: ø\88\88\88\88-ä-ø\88\88\88\88--Ã\96-Ã\84-´-à¤\91-à¤\95-à¤\9c-á\90\81-á\91\8c-á\91\95-ø\80\80\80\80ð«\9f\86
+x-test-header-too-big: -----
+Subject: This is a test message.
+Message-Id: <E10HmaX-0005vi-00@the.local.host.name>
+From: CALLER@the.local.host.name
+Date: Tue, 2 Mar 1999 09:44:33 +0000
+X-local-user: uid=CALLER_UID gid=CALLER_GID
+X-body-linecount: 3
+X-message-linecount: 16
+X-received-count: 1
+
+This is a test message.
+It has three lines.
+This is the last line.
+
+From CALLER@the.local.host.name Tue Mar 02 09:44:33 1999
+Return-path: <CALLER@the.local.host.name>
+Envelope-to: CALLER@the.local.host.name
+Delivery-date: Tue, 2 Mar 1999 09:44:33 +0000
+Received: from CALLER by the.local.host.name with local-smtp (Exim x.yz)
+ (envelope-from <CALLER@the.local.host.name>)
+ id 10HmaY-0005vi-00
+ for CALLER@the.local.host.name; Tue, 2 Mar 1999 09:44:33 +0000
+Subject: second
+Message-Id: <E10HmaY-0005vi-00@the.local.host.name>
+From: CALLER@the.local.host.name
+Date: Tue, 2 Mar 1999 09:44:33 +0000
+X-local-user: uid=CALLER_UID gid=CALLER_GID
+X-body-linecount: 1
+X-message-linecount: 9
+X-received-count: 1
+
+This is a second test message.
+
--- /dev/null
+# ${utf8clean:string}
+#
+# -bs to simple local delivery
+exim -bs -odi
+mail from:CALLER@HOSTNAME
+rcpt to:CALLER@HOSTNAME
+data
+x-test-header-good1: 1234567890qwertzuiopasdfghjklyxcvbnm,.-QWERTZUIOP+*ASDFGHJKL#'YXCVBNM,.-;:_
+x-test-header-good2: ßüöä€ÜÖÄ´ऑकजᐁᑌᑕ𫝆𫟘
+x-test-header-too-short: Ã.Ã.Ã.ä-â\82.-Ã\9c.Ã..Ã.-Â.-à..-à¤.-à¤.-á\90.-á\91.-á..-ð«\9d..ð«\9f.
+x-test-header-too-long: ø\88\88\88\88-ä-ø\88\88\88\88--Ã\96-Ã\84-´-à¤\91-à¤\95-à¤\9c-á\90\81-á\91\8c-á\91\95-ø\80\80\80\80ð«\9f\86
+x-test-header-too-big: -----
+Subject: This is a test message.
+
+This is a test message.
+It has three lines.
+This is the last line.
+.
+quit
+****
+exim -bs -odi
+mail from:CALLER@HOSTNAME
+rcpt to:CALLER@HOSTNAME
+data
+Subject: second
+
+This is a second test message.
+.
+quit
+****
+exim -q
+****
--- /dev/null
+220 the.local.host.name ESMTP Exim x.yz Tue, 2 Mar 1999 09:44:33 +0000\r
+250 OK\r
+250 Accepted\r
+354 Enter message, ending with "." on a line by itself\r
+250 OK id=10HmaX-0005vi-00\r
+221 the.local.host.name closing connection\r
+220 the.local.host.name ESMTP Exim x.yz Tue, 2 Mar 1999 09:44:33 +0000\r
+250 OK\r
+250 Accepted\r
+354 Enter message, ending with "." on a line by itself\r
+250 OK id=10HmaY-0005vi-00\r
+221 the.local.host.name closing connection\r