Add better spam header handling; also cache raw headers
[squirrelmail.git] / class / mime / Rfc822Header.class.php
index 717b09de45c364c9cb6a07b5443e0b9cc512e15c..3b072cf50ff0418fe2f92ed7bc4185c278006933 100644 (file)
@@ -5,7 +5,7 @@
  *
  * This file contains functions needed to handle headers in mime messages.
  *
- * @copyright 2003-2014 The SquirrelMail Project Team
+ * @copyright 2003-2018 The SquirrelMail Project Team
  * @license http://opensource.org/licenses/gpl-license.php GNU Public License
  * @version $Id$
  * @package squirrelmail
  * @since 1.3.0
  */
 class Rfc822Header {
+    /**
+     * All headers, unparsed
+     * @var array
+     */
+    var $raw_headers = array();
     /**
      * Date header
      * @var mixed
@@ -166,6 +171,7 @@ class Rfc822Header {
         foreach ($hdr as $line) {
             $pos = strpos($line, ':');
             if ($pos > 0) {
+                $this->raw_headers[] = $line;
                 $field = substr($line, 0, $pos);
                 if (!strstr($field,' ')) { /* valid field */
                         $value = trim(substr($line, $pos+1));
@@ -350,8 +356,12 @@ class Rfc822Header {
                 $this->mlist('id', $value);
                 break;
             case 'x-spam-status':
+            case 'x-spam-score':
                 $this->x_spam_status = $this->parseSpamStatus($value);
                 break;
+            case 'x-sm-flag-reply':
+                $this->x_sm_flag_reply = $value;
+                break;
             default:
                 break;
         }
@@ -809,45 +819,60 @@ class Rfc822Header {
     }
 
     /**
-     * Parses the X-Spam-Status header
+     * Parses the X-Spam-Status or X-Spam-Score header
      * @param string $value
      */
     function parseSpamStatus($value) {
         // Header value looks like this:
         // No, score=1.5 required=5.0 tests=MSGID_FROM_MTA_ID,NO_REAL_NAME,UPPERCASE_25_50 autolearn=disabled version=3.1.0-gr0
+        // Update circa 2018, this header can also be simply:
+        // No, score=1.5
+        // So we make the rest of the line optional (there are likely other permutations, so
+        // each element is made optional except the first two... maybe even that's not flexible enough)
+        //
+        // Also now allow parsing of X-Spam-Score header, whose value is just a float
 
         $spam_status = array();
 
-        if (preg_match ('/^(No|Yes),\s+score=(-?\d+\.\d+)\s+required=(-?\d+\.\d+)\s+tests=(.*?)\s+autolearn=(.*?)\s+version=(.+?)$/', $value, $matches)) {
+        if (preg_match ('/^(?:(No|Yes),\s+score=)?(-?\d+\.\d+)(?:\s+required=(-?\d+\.\d+))?(?:\s+tests=(.*?))?(?:\s+autolearn=(.*?))?(?:\s+version=(.+?))?$/i', $value, $matches)) {
+
             // full header
             $spam_status['bad_format'] = 0;
             $spam_status['value'] = $matches[0];
+
             // is_spam
-            if (isset($matches[1])
-                && strtolower($matches[1]) == 'yes') {
-                $spam_status['is_spam'] = true;
-            } else {
-                $spam_status['is_spam'] = false;
+            if (!empty($matches[1])) {
+                if (strtolower($matches[1]) == 'yes')
+                    $spam_status['is_spam'] = true;
+                else
+                    $spam_status['is_spam'] = false;
             }
 
             // score
-            $spam_status['score'] = $matches[2];
+            if (!empty($matches[2]))
+                $spam_status['score'] = $matches[2];
 
             // required
-            $spam_status['required'] = $matches[3];
+            if (!empty($matches[3]))
+                $spam_status['required'] = $matches[3];
 
             // tests
-            $tests = array();
-            $tests = explode(',', $matches[4]);
-            foreach ($tests as $test) {
-                $spam_status['tests'][] = trim($test);
+            if (isset($matches[4])) {
+                $tests = array();
+                $tests = explode(',', $matches[4]);
+                foreach ($tests as $test) {
+                    $spam_status['tests'][] = trim($test);
+                }
             }
 
             // autolearn
-            $spam_status['autolearn'] = $matches[5];
+            if (isset($matches[5]))
+                $spam_status['autolearn'] = $matches[5];
 
             // version
-            $spam_status['version'] = $matches[6];
+            if (isset($matches[6]))
+                $spam_status['version'] = $matches[6];
+
         } else {
             $spam_status['bad_format'] = 1;
             $spam_status['value'] = $value;
@@ -970,7 +995,7 @@ class Rfc822Header {
      *    Looks through this list of addresses and
      *    returns the array index (an integer even
      *    if the array is given with keys of a
-     *    different type) of the *last* matching
+     *    different type) of the first matching
      *    $address found in this message's
      *    TO or CC headers, unless there is an exact
      *    match (meaning that the "personal
@@ -1002,10 +1027,10 @@ class Rfc822Header {
             $i=0;
             foreach($address as $argument) {
                 $match = $this->findAddress($argument, true);
-                if ($match[1]) {
+                if ($match[1]) { // this indicates when the personal information matched
                     return $i;
                 } else {
-                    if (count($match[0]) && !$result) {
+                    if (count($match[0]) && $result === FALSE) {
                         $result = $i;
                     }
                 }