Happy New Year

[squirrelmail.git] / functions / mime.php
diff --git a/functions/mime.php b/functions/mime.php

index 6ec1c6292e8a7b93b537cdab2738638e000b1958..002e78892f76890a381be629e00c9ed92698d808 100644 (file)
--- a/functions/mime.php
+++ b/functions/mime.php
@@ -6,7 +6,7 @@
   * This contains the functions necessary to detect and decode MIME
   * messages.
   *
- * @copyright 1999-2019 The SquirrelMail Project Team
+ * @copyright 1999-2020 The SquirrelMail Project Team
   * @license http://opensource.org/licenses/gpl-license.php GNU Public License
   * @version $Id$
   * @package squirrelmail
@@ -1397,9 +1397,8 @@ function sq_casenormalize(&$val){
  function sq_skipspace($body, $offset){
      $me = 'sq_skipspace';
      preg_match('/^(\s*)/s', substr($body, $offset), $matches);
-    if (sizeof($matches{1})){
-        $count = strlen($matches{1});
-        $offset += $count;
+    if (!empty($matches[1])){
+        $offset += strlen($matches[1]);
      }
      return $offset;
  }
@@ -2125,7 +2124,17 @@ function sq_fixstyle($body, $pos, $message, $id, $mailbox){
       * body {background: blah-blah}
       * and change it to .bodyclass so we can just assign it to a <div>
       */
-    $content = preg_replace("|body(\s*\{.*?\})|si", ".bodyclass\\1", $content);
+    // $content = preg_replace("|body(\s*\{.*?\})|si", ".bodyclass\\1", $content);
+    // Nah, this is even better - try to preface all CSS selectors with
+    // our <div> class ID "bodyclass" then correct generic "body" selectors
+    // TODO: this works pretty good but breaks stuff like this:
+    //       @media print { body { font-size: 10pt; } }
+    //       but there isn't an easy way to make this regex skip @media
+    //       definitions... though lots of the ones in the wild will be
+    //       correctly handled because they tend to end with a parenthesis, like:
+    //       @media screen and (max-width:480px) { ...
+    $content = preg_replace('/([a-z0-9._-][a-z0-9 >+~|:._-]*\s*(?:,|{.*?}))/si', '.bodyclass $1', $content);
+    $content = str_replace('.bodyclass body', '.bodyclass', $content);
  
      global $use_transparent_security_image;
      if ($use_transparent_security_image) $secremoveimg = '../images/spacer.png';
@@ -2330,6 +2339,7 @@ function sq_body2div($attary, $mailbox, $message, $id){
   * @param $add_attr_to_tag      see description above
   * @param $message              message object
   * @param $id                   message id
+ * @param $recursively_called   boolean flag for recursive calls into this function (optional; default FALSE)
   * @return                      sanitized html safe to show on your pages.
   */
  function sq_sanitize($body,
@@ -2342,21 +2352,26 @@ function sq_sanitize($body,
                       $add_attr_to_tag,
                       $message,
                       $id,
-                     $mailbox
+                     $mailbox,
+                     $recursively_called=FALSE
                       ){
      $me = 'sq_sanitize';
+
+    /**
+     * See if tag_list is of tags to remove or tags to allow.
+     * false  means remove these tags
+     * true   means allow these tags
+     */
+    $orig_tag_list = $tag_list;
      $rm_tags = array_shift($tag_list);
+
      /**
       * Normalize rm_tags and rm_tags_with_content.
       */
      @array_walk($tag_list, 'sq_casenormalize');
      @array_walk($rm_tags_with_content, 'sq_casenormalize');
      @array_walk($self_closing_tags, 'sq_casenormalize');
-    /**
-     * See if tag_list is of tags to remove or tags to allow.
-     * false  means remove these tags
-     * true   means allow these tags
-     */
+
      $curpos = 0;
      $open_tags = Array();
      $trusted = "\n<!-- begin sanitized html -->\n";
@@ -2369,6 +2384,47 @@ function sq_sanitize($body,
  
      while (($curtag = sq_getnxtag($body, $curpos)) != FALSE){
          list($tagname, $attary, $tagtype, $lt, $gt) = $curtag;
+
+        /**
+         * RCDATA and RAWTEXT tags are handled differently:
+         * next instance of closing tag is used, whether or not
+         * the HTML is well formed before that
+         */
+        global $rcdata_rawtext_tags;
+        if (!$recursively_called
+         && in_array($tagname, $rcdata_rawtext_tags)
+         && $tagtype === 1){
+            $closing_tag = false;
+            $closing_tag_offset = $curpos;
+            // seek out the closing tag for the current RCDATA/RAWTEXT tag
+            while (1) {
+                // first we need to move forward to next available closing tag
+                // (intentionally leave off the closing > and let sq_getnxtag() validate a proper tag syntax)
+                $next_tag = sq_findnxreg($body, $closing_tag_offset, "</\s*$tagname");
+                if ($next_tag === false) {
+                    $closing_tag = false;
+                    break;
+                }
+                // but then we have to make sure it's a well-formed tag
+                $closing_tag = sq_getnxtag($body, $next_tag[0]);
+                if ($closing_tag === false)
+                    break;
+                else if ($closing_tag[0] !== false
+                 // these should be redundant
+                 && $closing_tag[0] === $tagname && $closing_tag[2] === 2) {
+                    $trusted .= sq_sanitize(substr($body, $curpos, $closing_tag[4] - $curpos + 1),
+                                            $orig_tag_list, $rm_tags_with_content, $self_closing_tags,
+                                            $force_tag_closing, $rm_attnames, $bad_attvals, $add_attr_to_tag,
+                                            $message, $id, $mailbox, true);
+                    $curpos = $closing_tag[4] + 1;
+                    continue 2;
+                }
+                $closing_tag_offset = $next_tag[0] + 1;
+            }
+            if ($closing_tag === false)
+            { /* no-op... there was no closing tag for this RCDATA/RAWTEXT tag - we could probably set $curpos to the end of $body, but this HTML is malformed anyway and should just fall apart on its own */ }
+        }
+
          $free_content = substr($body, $curpos, $lt-$curpos);
          /**
           * Take care of <style>
@@ -2516,7 +2572,17 @@ function magicHTML($body, $id, $message, $mailbox = 'INBOX', $take_mailto_links
      // require_once(SM_PATH . 'functions/url_parser.php');  // for $MailTo_PReg_Match
  
      global $attachment_common_show_images, $view_unsafe_images,
-           $has_unsafe_images, $block_svg_display;
+           $has_unsafe_images, $allow_svg_display, $rcdata_rawtext_tags,
+           $remove_rcdata_rawtext_tags_and_content;
+
+    $rcdata_rawtext_tags = array(
+        "noscript",
+        "noframes",
+        "noembed",
+        "textarea",
+        // also "title", "xmp", "script", "iframe", "plaintext" which we already remove below
+    );
+
      /**
       * Don't display attached images in HTML mode.
       *
@@ -2524,7 +2590,7 @@ function magicHTML($body, $id, $message, $mailbox = 'INBOX', $take_mailto_links
       */
      $attachment_common_show_images = false;
      $tag_list = Array(
-            false,
+            false, // remove these tags
              "meta",
              "html",
              "head",
@@ -2546,8 +2612,17 @@ function magicHTML($body, $id, $message, $mailbox = 'INBOX', $take_mailto_links
              "xmp",
              "xml",
              );
-    if ($block_svg_display)
+    if (!$allow_svg_display)
          $rm_tags_with_content[] = 'svg';
+    /**
+     * SquirrelMail will parse RCDATA and RAWTEXT tags and handle them as the special
+     * case that they are, but if you prefer to remove them and their contents entirely
+     * (in most cases, should be a safe thing with minimal impact), you can add the
+     * following to config/config_local.php
+     *    $remove_rcdata_rawtext_tags_and_content = TRUE; 
+     */
+    if ($remove_rcdata_rawtext_tags_and_content)
+        $rm_tags_with_content = array_merge($rm_tags_with_content, $rcdata_rawtext_tags);
  
      $self_closing_tags =  Array(
              "img",