Fix utf8clean not replacing incomplete final character
authorPhil Pennock <pdp@exim.org>
Fri, 17 Aug 2018 01:17:32 +0000 (21:17 -0400)
committerPhil Pennock <pdp@exim.org>
Fri, 17 Aug 2018 01:19:09 +0000 (21:19 -0400)
Before, it was just dropped, but we document that it's replaced by ?.

Tests updated, manual test-case for -be prompt is:

    ${utf8clean:${length_1:フィル}}

doc/doc-txt/ChangeLog
src/src/expand.c
test/confs/0600
test/log/0600
test/scripts/0000-Basic/0600

index bf5afe0..bc521a6 100644 (file)
@@ -92,6 +92,10 @@ JH/20 Bug 2296: Fix cutthrough for >1 address redirection.  Previously only
       time.  Either a crash (after delivery) or bogus log data could result.
       Discovery and analysis by Tim Stewart.
 
+PP/03 Make ${utf8clean:} expansion operator detect incomplete final character.
+      Previously if the string ended mid-character, we did not insert the
+      promised '?' replacement.
+
 
 Exim version 4.91
 -----------------
index b6ff96a..1508dcf 100644 (file)
@@ -1683,7 +1683,7 @@ else if (host_lookup_deferred)
   g = string_catn(g, US";\n\tiprev=temperror", 19);
 else if (host_lookup_failed)
   g = string_catn(g, US";\n\tiprev=fail", 13);
-else 
+else
   return g;
 
 if (sender_host_address)
@@ -7143,12 +7143,13 @@ while (*s != 0)
         {
         int seq_len = 0, index = 0;
         int bytes_left = 0;
-       long codepoint = -1;
+        long codepoint = -1;
+        int complete;
         uschar seq_buff[4];                    /* accumulate utf-8 here */
 
         while (*sub != 0)
          {
-         int complete = 0;
+         complete = 0;
          uschar c = *sub++;
 
          if (bytes_left)
@@ -7213,6 +7214,13 @@ while (*s != 0)
                        /* ASCII character follows incomplete sequence */
              yield = string_catn(yield, &c, 1);
          }
+        /* If given a sequence truncated mid-character, we also want to report ?
+        * Eg, ${length_1:フィル} is one byte, not one character, so we expect
+        * ${utf8clean:${length_1:フィル}} to yield '?' */
+        if (bytes_left != 0)
+          {
+          yield = string_catn(yield, UTF8_REPLACEMENT_CHAR, 1);
+          }
         continue;
         }
 
@@ -7954,7 +7962,7 @@ expand_hide_passwords(uschar * s)
 {
 return (  (  Ustrstr(s, "failed to expand") != NULL
          || Ustrstr(s, "expansion of ")    != NULL
-         ) 
+         )
        && (  Ustrstr(s, "mysql")   != NULL
          || Ustrstr(s, "pgsql")   != NULL
          || Ustrstr(s, "redis")   != NULL
@@ -7964,7 +7972,7 @@ return (  (  Ustrstr(s, "failed to expand") != NULL
          || Ustrstr(s, "ldapi:")  != NULL
          || Ustrstr(s, "ldapdn:") != NULL
          || Ustrstr(s, "ldapm:")  != NULL
-       )  ) 
+       )  )
   ? US"Temporary internal error" : s;
 }
 
index 29bc5bf..f640546 100644 (file)
@@ -20,15 +20,17 @@ begin acl
 
 check_data:
   accept logwrite = \
-                       x-test-header-good1: ${utf8clean:$h_x-test-header-good1:}
-                logwrite = \
-                       x-test-header-good2: ${utf8clean:$h_x-test-header-good2:}
-                logwrite = \
-                       x-test-header-too-short: ${utf8clean:$h_x-test-header-too-short:}
-                logwrite = \
-                       x-test-header-too-long: ${utf8clean:$h_x-test-header-too-long:}
-                logwrite = \
-                       x-test-header-too-big: ${utf8clean:$h_x-test-header-too-big:}
+                       x-test-header-good1: ${utf8clean:$h_x-test-header-good1:}
+                logwrite = \
+                       x-test-header-good2: ${utf8clean:$h_x-test-header-good2:}
+                logwrite = \
+                       x-test-header-too-short: ${utf8clean:$h_x-test-header-too-short:}
+                logwrite = \
+                       x-test-header-too-long: ${utf8clean:$h_x-test-header-too-long:}
+                logwrite = \
+                       x-test-header-too-big: ${utf8clean:$h_x-test-header-too-big:}
+                log_write = \
+                       x-test-header-truncated: ${utf8clean:$h_x-test-header-truncated:}
 
 
 
index 8fc8cfc..a18b159 100644 (file)
@@ -3,14 +3,16 @@
 1999-03-02 09:44:33 10HmaX-0005vi-00 x-test-header-too-short: ?.?.?.\303\244-?.-\303\234.?..?.-?.-?..-?.-?.-?.-?.-?..-?..?.
 1999-03-02 09:44:33 10HmaX-0005vi-00 x-test-header-too-long: ?????-\303\244-?????--\303\226-\303\204-\302\264-\340\244\221-\340\244\225-\340\244\234-\341\220\201-\341\221\214-\341\221\225-?????\360\253\237\206
 1999-03-02 09:44:33 10HmaX-0005vi-00 x-test-header-too-big: ?-----\363\200\200\200
+1999-03-02 09:44:33 10HmaX-0005vi-00 x-test-header-truncated: ?
 1999-03-02 09:44:33 10HmaX-0005vi-00 <= CALLER@the.local.host.name U=CALLER P=local-smtp S=sss
 1999-03-02 09:44:33 10HmaX-0005vi-00 => CALLER <CALLER@the.local.host.name> R=localuser T=local_delivery
 1999-03-02 09:44:33 10HmaX-0005vi-00 Completed
-1999-03-02 09:44:33 10HmaY-0005vi-00 x-test-header-good1: 
-1999-03-02 09:44:33 10HmaY-0005vi-00 x-test-header-good2: 
-1999-03-02 09:44:33 10HmaY-0005vi-00 x-test-header-too-short: 
-1999-03-02 09:44:33 10HmaY-0005vi-00 x-test-header-too-long: 
-1999-03-02 09:44:33 10HmaY-0005vi-00 x-test-header-too-big: 
+1999-03-02 09:44:33 10HmaY-0005vi-00 x-test-header-good1:
+1999-03-02 09:44:33 10HmaY-0005vi-00 x-test-header-good2:
+1999-03-02 09:44:33 10HmaY-0005vi-00 x-test-header-too-short:
+1999-03-02 09:44:33 10HmaY-0005vi-00 x-test-header-too-long:
+1999-03-02 09:44:33 10HmaY-0005vi-00 x-test-header-too-big:
+1999-03-02 09:44:33 10HmaY-0005vi-00 x-test-header-too-truncated:
 1999-03-02 09:44:33 10HmaY-0005vi-00 <= CALLER@the.local.host.name U=CALLER P=local-smtp S=sss
 1999-03-02 09:44:33 10HmaY-0005vi-00 => CALLER <CALLER@the.local.host.name> R=localuser T=local_delivery
 1999-03-02 09:44:33 10HmaY-0005vi-00 Completed
index 9d5e67b..43f7288 100644 (file)
@@ -10,6 +10,7 @@ x-test-header-good2: ßüöä€ÜÖÄ´ऑकजᐁᑌᑕ𫝆𫟘
 x-test-header-too-short: Ã.Ã.Ã.ä-â\82.-Ã\9c.Ã..Ã.-Â.-à..-à¤.-à¤.-á\90.-á\91.-á..-ð«\9d..ð«\9f.
 x-test-header-too-long: ø\88\88\88\88-ä-ø\88\88\88\88--Ã\96\84-´-à¤\91-à¤\95-à¤\9c\90\81\91\8c\91\95\80\80\80\80ð«\9f\86
 x-test-header-too-big: -----󀀀
+x-test-header-truncated: ã
 Subject: This is a test message.
 
 This is a test message.