Bugzilla #502

[exim.git] / src / src / sieve.c
diff --git a/src/src/sieve.c b/src/src/sieve.c

index 34764a4a7c816c3851fc60ea8fd0838d6fd7e216..e243f653c3433792358e00f1b88a3301d67840e3 100644 (file)
--- a/src/src/sieve.c
+++ b/src/src/sieve.c
@@ -1,4 +1,4 @@
-/* $Cambridge: exim/src/src/sieve.c,v 1.24 2007/02/07 14:41:13 ph10 Exp $ */
+/* $Cambridge: exim/src/src/sieve.c,v 1.28 2007/04/19 13:19:06 ph10 Exp $ */
  
  /*************************************************
  *     Exim - an Internet mail transport agent    *
@@ -28,6 +28,9 @@
  /* Undefine it for UNIX-style \n end-of-line terminators (default). */
  #undef RFC_EOL
  
+/* Define this for development of the Sieve extension "encoded-character". */
+#undef ENCODED_CHARACTER
+
  /* Define this for development of the Sieve extension "envelope-auth". */
  #undef ENVELOPE_AUTH
  
@@ -58,6 +61,9 @@ struct Sieve
    int keep;
    int require_envelope;
    int require_fileinto;
+#ifdef ENCODED_CHARACTER
+  int require_encoded_character;
+#endif
  #ifdef ENVELOPE_AUTH
    int require_envelope_auth;
  #endif
@@ -126,6 +132,10 @@ static uschar str_fileinto_c[]="fileinto";
  static const struct String str_fileinto={ str_fileinto_c, 8 };
  static uschar str_envelope_c[]="envelope";
  static const struct String str_envelope={ str_envelope_c, 8 };
+#ifdef ENCODED_CHARACTER
+static uschar str_encoded_character_c[]="encoded-character";
+static const struct String str_encoded_character={ str_encoded_character_c, 17 };
+#endif
  #ifdef ENVELOPE_AUTH
  static uschar str_envelope_auth_c[]="envelope-auth";
  static const struct String str_envelope_auth={ str_envelope_auth_c, 13 };
@@ -1185,8 +1195,212 @@ return 1;
  }
  
  
+#ifdef ENCODED_CHARACTER
+/*************************************************
+*      Decode hex-encoded-character string       *
+*************************************************/
+
+/*
+Encoding definition:
+   blank                = SP / TAB / CRLF
+   hex-pair-seq         = *blank hex-pair *(1*blank hex-pair) *blank
+   hex-pair             = 1*2HEXDIG
+
+Arguments:
+  src         points to a hex-pair-seq
+  end         points to its end
+  dst         points to the destination of the decoded octets,
+              optionally to (uschar*)0 for checking only
+
+Returns:      >=0              number of decoded octets
+              -1               syntax error
+*/
+
+static int hex_decode(uschar *src, uschar *end, uschar *dst)
+{
+int decoded=0;
+
+while (*src==' ' || *src=='\t' || *src=='\n') ++src;
+do
+  {
+  int x,d,n;
+
+  for (x=0,d=0; d<2 && src<end && isxdigit(n=tolower(*src)); x=(x<<4)|(n>='0' && n<='9' ? n-'0' : 10+(n-'a')),++d,++src);
+  if (d==0) return -1;
+  if (dst) *dst++=x;
+  ++decoded;
+  if (src==end) return decoded;
+  if (*src==' ' || *src=='\t' || *src=='\n')
+    while (*src==' ' || *src=='\t' || *src=='\n') ++src;
+  else
+    return -1;
+  }
+while (src<end);
+return decoded;
+}
+
+
+/*************************************************
+*    Decode unicode-encoded-character string     *
+*************************************************/
+
+/*
+Encoding definition:
+   blank                = SP / TAB / CRLF
+   unicode-hex-seq      = *blank unicode-hex *(blank unicode-hex) *blank
+   unicode-hex          = 1*HEXDIG
+
+   It is an error for a script to use a hexadecimal value that isn't in
+   either the range 0 to D7FF or the range E000 to 10FFFF.
+
+   At this time, strings are already scanned, thus the CRLF is converted
+   to the internally used \n (should RFC_EOL have been used).
+
+Arguments:
+  src         points to a unicode-hex-seq
+  end         points to its end
+  dst         points to the destination of the decoded octets,
+              optionally to (uschar*)0 for checking only
+
+Returns:      >=0              number of decoded octets
+              -1               syntax error
+              -2               semantic error (character range violation)
+*/
+
+static int unicode_decode(uschar *src, uschar *end, uschar *dst)
+{
+int decoded=0;
+
+while (*src==' ' || *src=='\t' || *src=='\n') ++src;
+do
+  {
+  uschar *hex_seq;
+  int c,d,n;
+
+  unicode_hex:
+  for (hex_seq=src; src<end && *src=='0'; ++src);
+  for (c=0,d=0; d<7 && src<end && isxdigit(n=tolower(*src)); c=(c<<4)|(n>='0' && n<='9' ? n-'0' : 10+(n-'a')),++d,++src);
+  if (src==hex_seq) return -1;
+  if (d==7 || (!((c>=0 && c<=0xd7ff) || (c>=0xe000 && c<=0x10ffff)))) return -2;
+  if (c<128)
+    {
+    if (dst) *dst++=c;
+    ++decoded;
+    }
+  else if (c>=0x80 && c<=0x7ff)
+    {
+      if (dst)
+        {
+        *dst++=192+(c>>6);
+        *dst++=128+(c&0x3f);
+        }
+      decoded+=2;
+    }
+  else if (c>=0x800 && c<=0xffff)
+    {
+      if (dst)
+        {
+        *dst++=224+(c>>12);
+        *dst++=128+((c>>6)&0x3f);
+        *dst++=128+(c&0x3f);
+        }
+      decoded+=3;
+    }
+  else if (c>=0x10000 && c<=0x1fffff)
+    {
+      if (dst)
+        {
+        *dst++=240+(c>>18);
+        *dst++=128+((c>>10)&0x3f);
+        *dst++=128+((c>>6)&0x3f);
+        *dst++=128+(c&0x3f);
+        }
+      decoded+=4;
+    }
+  if (*src==' ' || *src=='\t' || *src=='\n')
+    {
+    while (*src==' ' || *src=='\t' || *src=='\n') ++src;
+    if (src==end) return decoded;
+    goto unicode_hex;
+    }
+  }
+while (src<end);
+return decoded;
+}
+
+
+/*************************************************
+*       Decode encoded-character string          *
+*************************************************/
+
+/*
+Encoding definition:
+   encoded-arb-octets   = "${hex:" hex-pair-seq "}"
+   encoded-unicode-char = "${unicode:" unicode-hex-seq "}"
+
+Arguments:
+  encoded     points to an encoded string, returns decoded string
+  filter      points to the Sieve filter including its state
+
+Returns:      1                success
+              -1               syntax error
+*/
+
+static int string_decode(struct Sieve *filter, struct String *data)
+{
+uschar *src,*dst,*end;
+
+src=data->character;
+dst=src;
+end=data->character+data->length;
+while (src<end)
+  {
+  uschar *brace;
+
+  if (
+      strncmpic(src,US "${hex:",6)==0
+      && (brace=Ustrchr(src+6,'}'))!=(uschar*)0
+      && (hex_decode(src+6,brace,(uschar*)0))>=0
+     )
+    {
+    dst+=hex_decode(src+6,brace,dst);
+    src=brace+1;
+    }
+  else if (
+           strncmpic(src,US "${unicode:",10)==0
+           && (brace=Ustrchr(src+10,'}'))!=(uschar*)0
+          )
+    {
+    switch (unicode_decode(src+10,brace,(uschar*)0))
+      {
+      case -2:
+        {
+        filter->errmsg=CUS "unicode character out of range";
+        return -1;
+        }
+      case -1:
+        {
+        *dst++=*src++;
+        break;
+        }
+      default:
+        {
+        dst+=unicode_decode(src+10,brace,dst);
+        src=brace+1;
+        }
+      }
+    }
+  else *dst++=*src++;
+  }
+  data->length=dst-data->character;
+  *dst='\0';
+return 1;
+}
+#endif
+
+
  /*************************************************
-*          Parse a optional string               *
+*          Parse an optional string              *
  *************************************************/
  
  /*
@@ -1233,6 +1447,11 @@ if (*filter->pc=='"') /* quoted string */
        ++filter->pc;
        /* that way, there will be at least one character allocated */
        data->character=string_cat(data->character,&dataCapacity,&foo,CUS "",1);
+#ifdef ENCODED_CHARACTER
+      if (filter->require_encoded_character
+          && string_decode(filter,data)==-1)
+        return -1;
+#endif
        return 1;
        }
      else if (*filter->pc=='\\' && *(filter->pc+1)) /* quoted character */
@@ -1317,6 +1536,11 @@ else if (Ustrncmp(filter->pc,CUS "text:",5)==0) /* multiline string */
          filter->pc+=2;
  #endif
          ++filter->line;
+#ifdef ENCODED_CHARACTER
+        if (filter->require_encoded_character
+            && string_decode(filter,data)==-1)
+          return -1;
+#endif
          return 1;
          }
        else if (*filter->pc=='.' && *(filter->pc+1)=='.') /* remove dot stuffing */
@@ -1358,7 +1582,7 @@ static int parse_identifier(struct Sieve *filter, const uschar *id)
  {
    size_t idlen=Ustrlen(id);
  
-  if (Ustrncmp(filter->pc,id,idlen)==0)
+  if (strncmpic(US filter->pc,US id,idlen)==0)
    {
      uschar next=filter->pc[idlen];
  
@@ -2717,7 +2941,7 @@ while (*filter->pc)
          debug_printf("Notification to `%s'.\n",method.character);
          }
  #ifndef COMPILE_SYNTAX_CHECKER
-      if (exec)
+      if (exec && filter_test == FTEST_NONE)
          {
          string_item *p;
          header_line *h;
@@ -2726,6 +2950,8 @@ while (*filter->pc)
          if ((pid = child_open_exim2(&fd,envelope_to,envelope_to))>=1)
            {
            FILE *f;
+          uschar *buffer;
+          int buffer_capacity;
  
            f = fdopen(fd, "wb");
            for (h = header_list; h != NULL; h = h->next)
@@ -2733,8 +2959,15 @@ while (*filter->pc)
            fprintf(f,"From: %s\n",from.length==-1 ? envelope_to : from.character);
            for (p=recipient; p; p=p->next) fprintf(f,"To: %s\n",p->text);
            if (header.length>0) fprintf(f,"%s",header.character);
-          fprintf(f,"Subject: %s\n",message.length==-1 ? CUS "notification" : message.character);
-          fprintf(f,"\n");
+          if (message.length==-1)
+            {
+            message.character=US"Notification";
+            message.length=Ustrlen(message.character);
+            }
+          /* Allocation is larger than neccessary, but enough even for split MIME words */
+          buffer_capacity=32+4*message.length;
+          buffer=store_get(buffer_capacity);
+          fprintf(f,"Subject: %s\n\n",parse_quote_2047(message.character, message.length, US"utf-8", buffer, buffer_capacity, TRUE));
            if (body.length>0) fprintf(f,"%s\n",body.character);
            fflush(f);
            (void)fclose(f);
@@ -3049,6 +3282,9 @@ filter->line=1;
  filter->keep=1;
  filter->require_envelope=0;
  filter->require_fileinto=0;
+#ifdef ENCODED_CHARACTER
+filter->require_encoded_character=0;
+#endif
  #ifdef ENVELOPE_AUTH
  filter->require_envelope_auth=0;
  #endif
@@ -3121,6 +3357,9 @@ while (parse_identifier(filter,CUS "require"))
      {
      if (eq_octet(check,&str_envelope,0)) filter->require_envelope=1;
      else if (eq_octet(check,&str_fileinto,0)) filter->require_fileinto=1;
+#ifdef ENCODED_CHARACTER
+    else if (eq_octet(check,&str_encoded_character,0)) filter->require_encoded_character=1;
+#endif
  #ifdef ENVELOPE_AUTH
      else if (eq_octet(check,&str_envelope_auth,0)) filter->require_envelope_auth=1;
  #endif