From 29f89cad0cf7be1977f6ed36d27ac9b651aec9e2 Mon Sep 17 00:00:00 2001
From: Philip Hazel <ph10@hermes.cam.ac.uk>
Date: Wed, 14 Feb 2007 14:59:01 +0000
Subject: [PATCH] Added expansion items addresses, filter, map, reduce.

---
 doc/doc-txt/ChangeLog        |   5 +-
 doc/doc-txt/NewStuff         |  75 ++++++++++-
 src/src/expand.c             | 248 ++++++++++++++++++++++++++++++++++-
 test/scripts/0000-Basic/0002 |  28 ++++
 test/stdout/0002             |  26 ++++
 5 files changed, 379 insertions(+), 3 deletions(-)
diff --git a/doc/doc-txt/ChangeLog b/doc/doc-txt/ChangeLog
index 146c4fbfa..61b6ac275 100644
--- a/doc/doc-txt/ChangeLog
+++ b/doc/doc-txt/ChangeLog
@@ -1,4 +1,4 @@
-$Cambridge: exim/doc/doc-txt/ChangeLog,v 1.478 2007/02/14 12:22:36 steve Exp $
+$Cambridge: exim/doc/doc-txt/ChangeLog,v 1.479 2007/02/14 14:59:01 ph10 Exp $
 
 Change log file for Exim from version 4.21
 -------------------------------------------
@@ -101,6 +101,9 @@ PH/24 When the log selector "outgoing_port" was set, the port was shown as -1
       for deliveries of the second and subsequent messages over the same SMTP
       connection.
 
+PH/25 Applied Magnus Holmgren's patch for ${addresses, ${map, ${filter, and
+      ${reduce, with only minor "tidies".
+
 SC/02 Applied Daniel Tiefnig's patch to improve the '($parent) =' pattern match.
 
 
diff --git a/doc/doc-txt/NewStuff b/doc/doc-txt/NewStuff
index 04177227d..b70fa5e68 100644
--- a/doc/doc-txt/NewStuff
+++ b/doc/doc-txt/NewStuff
@@ -1,4 +1,4 @@
-$Cambridge: exim/doc/doc-txt/NewStuff,v 1.140 2007/02/07 12:23:35 ph10 Exp $
+$Cambridge: exim/doc/doc-txt/NewStuff,v 1.141 2007/02/14 14:59:01 ph10 Exp $
 
 New Features in Exim
 --------------------
@@ -292,6 +292,79 @@ Version 4.67
     option; with -I they don't. In both cases it is possible to change the case
     sensitivity within the pattern using (?i) or (?-i).
 
+14. A number of new features have been added to string expansions to make it
+    easier to process lists of items, typically addresses. These are as
+    follows:
+
+  * ${addresses:<string>}
+
+    The string (after expansion) is interpreted as a list of addresses in RFC
+    2822 format, such as can be found in a To: or Cc: header line. The
+    operative address (local-part@domain) is extracted from each item, and the
+    result of the expansion is a colon-separated list, with appropriate
+    doubling of colons should any happen to be present in the email addresses.
+    Syntactically invalid RFC2822 address items are omitted from the output.
+
+    It is possible to specify a character other than colon for the output
+    separator by starting the string with > followed by the new separator
+    character. For example:
+
+      ${addresses:>& The Boss <ceo@up.stairs>, sec@base.ment (dogsbody)}
+
+    expands to "ceo@up.stairs&sec@base.ment". Compare ${address (singular),
+    which extracts the working address from a single RFC2822 address.
+
+  * ${map{<string1>}{<string2>}}
+
+    After expansion, <string1> is interpreted as a list, colon-separated by
+    default, but the separator can be changed in the usual way. For each item
+    in this list, its value is place in $item, and then <string2> is expanded
+    and added to the output as an item in a new list. The separator used for
+    the output list is the same as the one used for the input, but is not
+    included in the output. For example:
+
+      ${map{a:b:c}{[$item]}} ${map{<- x-y-z}{($item)}}
+
+    expands to "[a]:[b]:[c] (x)-(y)-(z)". At the end of the expansion, the
+    value of $item is restored to what it was before.
+
+  * ${filter{<string1>}{<condition>}}
+
+    After expansion, <string1> is interpreted as a list, colon-separated by
+    default, but the separator can be changed in the usual way. For each item
+    in this list, its value is place in $item, and then the condition is
+    evaluated. If the condition is true, $item is added to the output as an
+    item in a new list; if the condition is false, the item is discarded. The
+    separator used for the output list is the same as the one used for the
+    input, but is not included in the output. For example:
+
+      ${filter{a:b:c}{!eq{$item}{b}}
+
+    yields "a:c". At the end of the expansion, the value of $item is restored
+    to what it was before.
+
+  * ${reduce{<string1>}{<string2>}{<string3>}}
+
+    The ${reduce expansion operation reduces a list to a single, scalar string.
+    After expansion, <string1> is interpreted as a list, colon-separated by
+    default, but the separator can be changed in the usual way. Then <string2>
+    is expanded and assigned to the $value variable. After this, each item in
+    the <string1> list is assigned to $item in turn, and <string3> is expanded
+    for each of them. The result of that expansion is assigned to $value before
+    the next iteration. When the end of the list is reached, the final value of
+    $value is added to the expansion string. The ${reduce expansion item can be
+    used in a number of ways. For example, to add up a list of numbers:
+
+      ${reduce {<, 1,2,3}{0}{${eval:$value+$item}}}
+
+    The result of that expansion would be "6". The maximum of a list of numbers
+    can be found:
+
+      ${reduce {3:0:9:4:6}{0}{${if >{$item}{$value}{$item}{$value}}}}
+
+    At the end of a ${reduce expansion, the values of $item and $value is
+    restored to what they were before.
+
 
 Version 4.66
 ------------
diff --git a/src/src/expand.c b/src/src/expand.c
index 155f4d836..afa898205 100644
--- a/src/src/expand.c
+++ b/src/src/expand.c
@@ -1,4 +1,4 @@
-/* $Cambridge: exim/src/src/expand.c,v 1.81 2007/02/10 23:51:11 magnus Exp $ */
+/* $Cambridge: exim/src/src/expand.c,v 1.82 2007/02/14 14:59:02 ph10 Exp $ */
 
 /*************************************************
 *     Exim - an Internet mail transport agent    *
@@ -106,17 +106,20 @@ alphabetical order. */
 static uschar *item_table[] = {
   US"dlfunc",
   US"extract",
+  US"filter",
   US"hash",
   US"hmac",
   US"if",
   US"length",
   US"lookup",
+  US"map",
   US"nhash",
   US"perl",
   US"prvs",
   US"prvscheck",
   US"readfile",
   US"readsocket",
+  US"reduce",
   US"run",
   US"sg",
   US"substr",
@@ -125,17 +128,20 @@ static uschar *item_table[] = {
 enum {
   EITEM_DLFUNC,
   EITEM_EXTRACT,
+  EITEM_FILTER,
   EITEM_HASH,
   EITEM_HMAC,
   EITEM_IF,
   EITEM_LENGTH,
   EITEM_LOOKUP,
+  EITEM_MAP,
   EITEM_NHASH,
   EITEM_PERL,
   EITEM_PRVS,
   EITEM_PRVSCHECK,
   EITEM_READFILE,
   EITEM_READSOCK,
+  EITEM_REDUCE,
   EITEM_RUN,
   EITEM_SG,
   EITEM_SUBSTR,
@@ -162,6 +168,7 @@ enum {
 
 static uschar *op_table_main[] = {
   US"address",
+  US"addresses",
   US"base62",
   US"base62d",
   US"domain",
@@ -193,6 +200,7 @@ static uschar *op_table_main[] = {
 
 enum {
   EOP_ADDRESS =  sizeof(op_table_underscore)/sizeof(uschar *),
+  EOP_ADDRESSES,
   EOP_BASE62,
   EOP_BASE62D,
   EOP_DOMAIN,
@@ -4682,6 +4690,181 @@ while (*s != 0)
       }
 
 
+    /* Handle list operations */
+
+    case EITEM_FILTER:
+    case EITEM_MAP:
+    case EITEM_REDUCE:
+      {
+      int sep = 0;
+      int save_ptr = ptr;
+      uschar outsep[2] = { '\0', '\0' };
+      uschar *list, *expr, *temp;
+      uschar *save_iterate_item = iterate_item;
+      uschar *save_lookup_value = lookup_value;
+
+      while (isspace(*s)) s++;
+      if (*s++ != '{') goto EXPAND_FAILED_CURLY;
+
+      list = expand_string_internal(s, TRUE, &s, skipping);
+      if (list == NULL) goto EXPAND_FAILED;
+      if (*s++ != '}') goto EXPAND_FAILED_CURLY;
+
+      if (item_type == EITEM_REDUCE)
+        {
+        while (isspace(*s)) s++;
+        if (*s++ != '{') goto EXPAND_FAILED_CURLY;
+        temp = expand_string_internal(s, TRUE, &s, skipping);
+        if (temp == NULL) goto EXPAND_FAILED;
+        lookup_value = temp;
+        if (*s++ != '}') goto EXPAND_FAILED_CURLY;
+        }
+
+      while (isspace(*s)) s++;
+      if (*s++ != '{') goto EXPAND_FAILED_CURLY;
+
+      expr = s;
+
+      /* For EITEM_FILTER, call eval_condition once, with result discarded (as
+      if scanning a "false" part). This allows us to find the end of the
+      condition, because if the list is empty, we won't actually evaluate the
+      condition for real. For EITEM_MAP and EITEM_REDUCE, do the same, using
+      the normal internal expansion function. */
+
+      if (item_type == EITEM_FILTER)
+        {
+        temp = eval_condition(expr, NULL);
+        if (temp != NULL) s = temp;
+        }
+      else
+        {
+        temp = expand_string_internal(s, TRUE, &s, TRUE);
+        }
+
+      if (temp == NULL)
+        {
+        expand_string_message = string_sprintf("%s inside \"%s\" item",
+          expand_string_message, name);
+        goto EXPAND_FAILED;
+        }
+
+      while (isspace(*s)) s++;
+      if (*s++ != '}')
+        {
+        expand_string_message = string_sprintf("missing } at end of condition "
+          "or expression inside \"%s\"", name);
+        goto EXPAND_FAILED;
+        }
+
+      while (isspace(*s)) s++;
+      if (*s++ != '}')
+        {
+        expand_string_message = string_sprintf("missing } at end of \"%s\"",
+          name);
+        goto EXPAND_FAILED;
+        }
+
+      /* If we are skipping, we can now just move on to the next item. When
+      processing for real, we perform the iteration. */
+
+      if (skipping) continue;
+      while ((iterate_item = string_nextinlist(&list, &sep, NULL, 0)) != NULL)
+        {
+        *outsep = (uschar)sep;      /* Separator as a string */
+
+        DEBUG(D_expand) debug_printf("%s: $item = \"%s\"\n", name, iterate_item);
+
+        if (item_type == EITEM_FILTER)
+          {
+          BOOL condresult;
+          if (eval_condition(expr, &condresult) == NULL)
+            {
+            expand_string_message = string_sprintf("%s inside \"%s\" condition",
+              expand_string_message, name);
+            goto EXPAND_FAILED;
+            }
+          DEBUG(D_expand) debug_printf("%s: condition is %s\n", name,
+            condresult? "true":"false");
+          if (condresult)
+            temp = iterate_item;    /* TRUE => include this item */
+          else
+            continue;               /* FALSE => skip this item */
+          }
+
+        /* EITEM_MAP and EITEM_REDUCE */
+
+        else
+          {
+          temp = expand_string_internal(expr, TRUE, NULL, skipping);
+          if (temp == NULL)
+            {
+            expand_string_message = string_sprintf("%s inside \"%s\" item",
+              expand_string_message, name);
+            goto EXPAND_FAILED;
+            }
+          if (item_type == EITEM_REDUCE)
+            {
+            lookup_value = temp;      /* Update the value of $value */
+            continue;                 /* and continue the iteration */
+            }
+          }
+
+        /* We reach here for FILTER if the condition is true, always for MAP,
+        and never for REDUCE. The value in "temp" is to be added to the output
+        list that is being created, ensuring that any occurrences of the
+        separator character are doubled. Unless we are dealing with the first
+        item of the output list, add in a space if the new item begins with the
+        separator character, or is an empty string. */
+
+        if (ptr != save_ptr && (temp[0] == *outsep || temp[0] == 0))
+          yield = string_cat(yield, &size, &ptr, US" ", 1);
+
+        /* Add the string in "temp" to the output list that we are building,
+        This is done in chunks by searching for the separator character. */
+
+        for (;;)
+          {
+          size_t seglen = Ustrcspn(temp, outsep);
+            yield = string_cat(yield, &size, &ptr, temp, seglen + 1);
+
+          /* If we got to the end of the string we output one character
+          too many; backup and end the loop. Otherwise arrange to double the
+          separator. */
+
+          if (temp[seglen] == '\0') { ptr--; break; }
+          yield = string_cat(yield, &size, &ptr, outsep, 1);
+          temp += seglen + 1;
+          }
+
+        /* Output a separator after the string: we will remove the redundant
+        final one at the end. */
+
+        yield = string_cat(yield, &size, &ptr, outsep, 1);
+        }   /* End of iteration over the list loop */
+
+      /* REDUCE has generated no output above: output the final value of
+      $value. */
+
+      if (item_type == EITEM_REDUCE)
+        {
+        yield = string_cat(yield, &size, &ptr, lookup_value,
+          Ustrlen(lookup_value));
+        lookup_value = save_lookup_value;  /* Restore $value */
+        }
+
+      /* FILTER and MAP generate lists: if they have generated anything, remove
+      the redundant final separator. Even though an empty item at the end of a
+      list does not count, this is tidier. */
+
+      else if (ptr != save_ptr) ptr--;
+
+      /* Restore preserved $item */
+
+      iterate_item = save_iterate_item;
+      continue;
+      }
+
+
     /* If ${dlfunc support is configured, handle calling dynamically-loaded
     functions, unless locked out at this time. Syntax is ${dlfunc{file}{func}}
     or ${dlfunc{file}{func}{arg}} or ${dlfunc{file}{func}{arg1}{arg2}} or up to
@@ -5042,6 +5225,69 @@ while (*s != 0)
         continue;
         }
 
+      case EOP_ADDRESSES:
+        {
+        uschar outsep[2] = { ':', '\0' };
+        uschar *address, *error;
+        int save_ptr = ptr;
+        int start, end, domain;  /* Not really used */
+
+        while (isspace(*sub)) sub++;
+        if (*sub == '>') { *outsep = *++sub; ++sub; }
+        parse_allow_group = TRUE;
+
+        for (;;)
+          {
+          uschar *p = parse_find_address_end(sub, FALSE);
+          uschar saveend = *p;
+          *p = '\0';
+          address = parse_extract_address(sub, &error, &start, &end, &domain,
+            FALSE);
+          *p = saveend;
+
+          /* Add the address to the output list that we are building. This is
+          done in chunks by searching for the separator character. At the
+          start, unless we are dealing with the first address of the output
+          list, add in a space if the new address begins with the separator
+          character, or is an empty string. */
+
+          if (address != NULL)
+            {
+            if (ptr != save_ptr && address[0] == *outsep)
+              yield = string_cat(yield, &size, &ptr, US" ", 1);
+
+            for (;;)
+              {
+              size_t seglen = Ustrcspn(address, outsep);
+              yield = string_cat(yield, &size, &ptr, address, seglen + 1);
+
+              /* If we got to the end of the string we output one character
+              too many. */
+
+              if (address[seglen] == '\0') { ptr--; break; }
+              yield = string_cat(yield, &size, &ptr, outsep, 1);
+              address += seglen + 1;
+              }
+
+            /* Output a separator after the string: we will remove the
+            redundant final one at the end. */
+
+            yield = string_cat(yield, &size, &ptr, outsep, 1);
+            }
+
+          if (saveend == '\0') break;
+          sub = p + 1;
+          }
+
+        /* If we have generated anything, remove the redundant final
+        separator. */
+
+        if (ptr != save_ptr) ptr--;
+        parse_allow_group = FALSE;
+        continue;
+        }
+
+
       /* quote puts a string in quotes if it is empty or contains anything
       other than alphamerics, underscore, dot, or hyphen.
 
diff --git a/test/scripts/0000-Basic/0002 b/test/scripts/0000-Basic/0002
index 5e0126524..870359eb2 100644
--- a/test/scripts/0000-Basic/0002
+++ b/test/scripts/0000-Basic/0002
@@ -40,12 +40,40 @@ x\
 +$11111111111111111111111111111111111
 +${11111111111111111111111111111111111}
 
+# List operations
+
+filter: "${filter{a:b:c}{eq{1}{1}}}"
+filter: ${filter{a:b:c}{!eq{$item}{b}}}
+filter: ${filter{<' a'b'c}{!eq{$item}{b}}}
+filter: ${filter{<' ''a'b' ''c}{!eq{$item}{b}}}
+filter: "${filter{}{!eq{$item}{b}}}"
+
+map: "${map{}{$item}}"
+map: ${map{a:b:c}{$item}}
+map: ${map{a:b:c}{:$item:}}
+map: ${if eq{1}{0}{${map{a:b:c}{:$item:}}}{fail string}}
+map: ${map{:b:c}{[$item]}}
+
+reduce: "${reduce{}{+}{$value$item}}"
+reduce: ${reduce{a:b:c}{+}{$value$item}}
+reduce: ${reduce {<, 1,2,3}{0}{${eval:$value+$item}}}
+reduce: ${reduce {3:0:9:4:6}{0}{${if >{$item}{$value}{$item}{$value}}}}
+
 # Operators
 
 addrss: ${address:local-part@dom.ain}
 addrss: ${address:Exim Person <local-part@dom.ain> (that's me)}
 domain: ${domain:local-part@dom.ain}
 domain: ${domain:Exim Person <local-part@dom.ain> (that's me)}
+
+addresses: ${addresses:>' 'abc@xyz, 'pqr@xyz}
+addresses: ${addresses:Exim Person <local-part@dom.ain> (that's me)}
+addresses: ${addresses:>+ Exim Person <local-part@dom.ain> (that's me),\
+           xyz@abc}
+addresses: ${addresses:Exim Person <local-part@dom.ain> (that's me), \
+           xyz@abc, nullgroupname:;, group: p@q, r@s; }
+addresses: ${addresses:local-part@dom.ain <local-part@dom.ain>}
+
 escape: ${escape:B7·F2ò}
 eval:   ${eval:1+1}
 eval:   ${eval:1+2*3}
diff --git a/test/stdout/0002 b/test/stdout/0002
index b1f29c447..d985a9c1e 100644
--- a/test/stdout/0002
+++ b/test/stdout/0002
@@ -30,12 +30,38 @@
 > +
 > +
 > 
+> # List operations
+> 
+> filter: "a:b:c"
+> filter: a:c
+> filter: a'c
+> filter: ''a' ''c
+> filter: ""
+> 
+> map: ""
+> map: a:b:c
+> map: ::a::: ::b::: ::c::
+> map: fail string
+> map: []:[b]:[c]
+> 
+> reduce: "+"
+> reduce: +abc
+> reduce: 6
+> reduce: 9
+> 
 > # Operators
 > 
 > addrss: local-part@dom.ain
 > addrss: local-part@dom.ain
 > domain: dom.ain
 > domain: dom.ain
+> 
+> addresses: ''abc@xyz' ''pqr@xyz
+> addresses: local-part@dom.ain
+> addresses: local-part@dom.ain+xyz@abc
+> addresses: local-part@dom.ain:xyz@abc:p@q:r@s
+> addresses: 
+> 
 > escape: B7\267F2\362
 > eval:   2
 > eval:   7
-- 
2.25.1