Stop using curly braces

[squirrelmail.git] / functions / mime.php
diff --git a/functions/mime.php b/functions/mime.php

index 8cf0f23d6fc9dc36af6a3c05f7cc8b1633a90e22..9bc6182032a95e40ecff32f7c9b43e9a442102a4 100644 (file)
--- a/functions/mime.php
+++ b/functions/mime.php
@@ -6,7 +6,7 @@
   * This contains the functions necessary to detect and decode MIME
   * messages.
   *
- * @copyright 1999-2019 The SquirrelMail Project Team
+ * @copyright 1999-2021 The SquirrelMail Project Team
   * @license http://opensource.org/licenses/gpl-license.php GNU Public License
   * @version $Id$
   * @package squirrelmail
@@ -71,7 +71,7 @@ function mime_structure ($bodystructure, $flags=array()) {
      if (count($flags)) {
          foreach ($flags as $flag) {
  //FIXME: please document why it is we have to check the first char of the flag but we then go ahead and do a full string comparison anyway.  Is this a speed enhancement?  If not, let's keep it simple and just compare the full string and forget the switch block.
-            $char = strtoupper($flag{1});
+            $char = strtoupper($flag[1]);
              switch ($char) {
                  case 'S':
                      if (strtolower($flag) == '\\seen') {
@@ -151,7 +151,7 @@ function mime_fetch_body($imap_stream, $id, $ent_id=1, $fetch_size=0) {
          /* There is some information in the content info header that could be important
           * in order to parse html messages. Let's get them here.
           */
-//        if ($ret{0} == '<') {
+//        if ($ret[0] == '<') {
  //            $data = sqimap_run_command ($imap_stream, "FETCH $id BODY[$ent_id.MIME]", true, $response, $message, TRUE);
  //        }
      } else if (preg_match('/"([^"]*)"/', $topline, $regs)) {
@@ -598,7 +598,8 @@ function buildAttachmentArray($message, $exclude_id, $mailbox, $id) {
             returning any changes, changes should simply be made to the original
             arguments themselves. */
          $temp = array(&$links, &$startMessage, &$id, &$urlMailbox, &$ent,
-                    &$defaultlink, &$display_filename, &$where, &$what);
+                    &$defaultlink, &$display_filename, &$where, &$what,
+                    &$type0, &$type1);
          do_hook("attachment $type0/$type1", $temp);
          /* The API for this hook has changed as of 1.5.2 so that all plugin
             arguments are passed in an array instead of each their own plugin
@@ -606,7 +607,8 @@ function buildAttachmentArray($message, $exclude_id, $mailbox, $id) {
             returning any changes, changes should simply be made to the original
             arguments themselves. */
          $temp = array(&$links, &$startMessage, &$id, &$urlMailbox, &$ent,
-                      &$defaultlink, &$display_filename, &$where, &$what);
+                      &$defaultlink, &$display_filename, &$where, &$what,
+                      &$type0, &$type1);
          // Do not let a generic plugin change the default link if a more
          // specialized one already did it...
          if ($defaultlink != $defaultlink_orig) {
@@ -620,7 +622,8 @@ function buildAttachmentArray($message, $exclude_id, $mailbox, $id) {
             returning any changes, changes should simply be made to the original
             arguments themselves. */
          $temp = array(&$links, &$startMessage, &$id, &$urlMailbox, &$ent, 
-                      &$defaultlink, &$display_filename, &$where, &$what);
+                      &$defaultlink, &$display_filename, &$where, &$what,
+                      &$type0, &$type1);
          // Do not let a generic plugin change the default link if a more
          // specialized one already did it...
          if ($defaultlink != $defaultlink_orig) {
@@ -635,18 +638,37 @@ function buildAttachmentArray($message, $exclude_id, $mailbox, $id) {
          $this_attachment['DefaultHREF'] = $defaultlink;
          $this_attachment['DownloadHREF'] = $links['download link']['href'];
          $this_attachment['ViewHREF'] = isset($links['attachment_common']) ? $links['attachment_common']['href'] : '';
-        $this_attachment['Size'] = $header->size;
+
+        // base64 encoded file sizes are misleading, so approximate real size
+        if (!empty($header->encoding) && strtolower($header->encoding) == 'base64')
+            $this_attachment['Size'] = $header->size / 4 * 3;
+        else
+            $this_attachment['Size'] = $header->size;
+
          $this_attachment['ContentType'] = sm_encode_html_special_chars($type0 .'/'. $type1);
          $this_attachment['OtherLinks'] = array();
          foreach ($links as $val) {
-            if ($val['text']==_("Download") || $val['text'] == _("View"))
+            if ($val['text']==_("Download")) {
+                $this_attachment['DownloadHREF'] = $val['href'];
                  continue;
-            if (empty($val['text']) && empty($val['extra']))
+            }
+            if ($val['text']==_("View")) {
+                $this_attachment['ViewHREF'] = $val['href'];
+                continue;
+            }
+
+            // This makes no sense - If 'text' and 'extra' are just concatenated,
+            // there is no point in having 'extra'.... I am going to assume this
+            // was a mistake and am changing 'extra' to be what I think it was
+            // meant to be: additional tag attributes.  However, I'm not checking
+            // extensively for plugins that were using this the wrong way (but why would they?)
+            if (empty($val['text']))
                  continue;
  
              $temp = array();
              $temp['HREF'] = $val['href'];
-            $temp['Text'] = (empty($val['text']) ? '' : $val['text']) . (empty($val['extra']) ? '' : $val['extra']);
+            $temp['Text'] = $val['text'];
+            $temp['Extra'] = (empty($val['extra']) ? '' : $val['extra']);
              $this_attachment['OtherLinks'][] = $temp;
          }
          $attachments[] = $this_attachment;
@@ -979,7 +1001,7 @@ function encodeHeader ($string) {
      $iEncStart = $enc_init = false;
      $cur_l = $iOffset = 0;
      for($i = 0; $i < $j; ++$i) {
-        switch($string{$i})
+        switch($string[$i])
          {
              case '"':
              case '=':
@@ -1001,7 +1023,7 @@ function encodeHeader ($string) {
                      $ret = '';
                      $iEncStart = false;
                  } else {
-                    $ret .= sprintf("=%02X",ord($string{$i}));
+                    $ret .= sprintf("=%02X",ord($string[$i]));
                  }
                  break;
              case '(':
@@ -1031,7 +1053,7 @@ function encodeHeader ($string) {
                  }
                  break;
              default:
-                $k = ord($string{$i});
+                $k = ord($string[$i]);
                  if ($k > 126) {
                      if ($iEncStart === false) {
                          // do not start encoding in the middle of a string, also take the rest of the word.
@@ -1065,7 +1087,7 @@ function encodeHeader ($string) {
                              $cur_l = 0;
                              $ret = '';
                          } else {
-                            $ret .= $string{$i};
+                            $ret .= $string[$i];
                          }
                      }
                  }
@@ -1360,7 +1382,7 @@ function sq_tagprint($tagname, $attary, $tagtype){
          $fulltag = '<' . $tagname;
          if (is_array($attary) && sizeof($attary)){
              $atts = Array();
-            while (list($attname, $attvalue) = each($attary)){
+            foreach ($attary as $attname => $attvalue){
                  array_push($atts, "$attname=$attvalue");
              }
              $fulltag .= ' ' . join(" ", $atts);
@@ -1441,12 +1463,12 @@ function sq_findnxreg($body, $offset, $reg){
      $matches = Array();
      $retarr = Array();
      preg_match("%^(.*?)($reg)%si", substr($body, $offset), $matches);
-    if (!isset($matches{0}) || !$matches{0}){
+    if (!isset($matches[0]) || !$matches[0]){
          $retarr = false;
      } else {
-        $retarr{0} = $offset + strlen($matches{1});
-        $retarr{1} = $matches{1};
-        $retarr{2} = $matches{2};
+        $retarr[0] = $offset + strlen($matches[1]);
+        $retarr[1] = $matches[1];
+        $retarr[2] = $matches[2];
      }
      return $retarr;
  }
@@ -1604,8 +1626,8 @@ function sq_getnxtag($body, $offset){
              /**
               * Yep. So we did.
               */
-            $pos += strlen($matches{1});
-            if ($matches{2} == "/>"){
+            $pos += strlen($matches[1]);
+            if ($matches[2] == "/>"){
                  $tagtype = 3;
                  $pos++;
              }
@@ -1662,7 +1684,7 @@ function sq_getnxtag($body, $offset){
                      return $retary;
                  }
              case '>':
-                $attary{$attname} = '"yes"';
+                $attary[$attname] = '"yes"';
                  return Array($tagname, $attary, $tagtype, $lt, $pos);
                  break;
              default:
@@ -1696,7 +1718,7 @@ function sq_getnxtag($body, $offset){
                          }
                          list($pos, $attval, $match) = $regary;
                          $pos++;
-                        $attary{$attname} = "'" . $attval . "'";
+                        $attary[$attname] = "'" . $attval . "'";
                      } else if ($quot == '"'){
                          $regary = sq_findnxreg($body, $pos+1, '\"');
                          if ($regary == false){
@@ -1704,7 +1726,7 @@ function sq_getnxtag($body, $offset){
                          }
                          list($pos, $attval, $match) = $regary;
                          $pos++;
-                        $attary{$attname} = '"' . $attval . '"';
+                        $attary[$attname] = '"' . $attval . '"';
                      } else {
                          /**
                           * These are hateful. Look for \s, or >.
@@ -1718,13 +1740,13 @@ function sq_getnxtag($body, $offset){
                           * If it's ">" it will be caught at the top.
                           */
                          $attval = preg_replace("/\"/s", "&quot;", $attval);
-                        $attary{$attname} = '"' . $attval . '"';
+                        $attary[$attname] = '"' . $attval . '"';
                      }
                  } else if (preg_match("|[\w/>]|", $char)) {
                      /**
                       * That was attribute type 4.
                       */
-                    $attary{$attname} = '"yes"';
+                    $attary[$attname] = '"yes"';
                  } else {
                      /**
                       * An illegal character. Find next '>' and return.
@@ -1763,7 +1785,7 @@ function sq_deent(&$attvalue, $regex, $hex=false){
              if ($hex){
                  $numval = hexdec($numval);
              }
-            $repl{$matches[0][$i]} = chr($numval);
+            $repl[$matches[0][$i]] = chr($numval);
          }
          $attvalue = strtr($attvalue, $repl);
          return true;
@@ -1794,7 +1816,7 @@ function sq_fixatts($tagname,
                      $mailbox
                      ){
      $me = 'sq_fixatts';
-    while (list($attname, $attvalue) = each($attary)){
+    foreach ($attary as $attname => $attvalue){
          /**
           * See if this attribute should be removed.
           */
@@ -1802,7 +1824,7 @@ function sq_fixatts($tagname,
              if (preg_match($matchtag, $tagname)){
                  foreach ($matchattrs as $matchattr){
                      if (preg_match($matchattr, $attname)){
-                        unset($attary{$attname});
+                        unset($attary[$attname]);
                          continue;
                      }
                  }
@@ -1823,7 +1845,7 @@ function sq_fixatts($tagname,
              // entities are used in the attribute value. In 99% of the cases it's there as XSS
              // i.e.<div style="{ left:exp&#x0280;essio&#x0274;( alert('XSS') ) }">
              $attvalue = "idiocy";
-            $attary{$attname} = $attvalue;
+            $attary[$attname] = $attvalue;
          }
          sq_unspace($attvalue);
  
@@ -1846,7 +1868,7 @@ function sq_fixatts($tagname,
                          $newvalue =
                              preg_replace($valmatch, $valrepl, $attvalue);
                          if ($newvalue != $attvalue){
-                            $attary{$attname} = $newvalue;
+                            $attary[$attname] = $newvalue;
                              $attvalue = $newvalue;
                          }
                      }
@@ -1856,7 +1878,7 @@ function sq_fixatts($tagname,
          if ($attname == 'style') {
              if (preg_match('/[\0-\37\200-\377]+/',$attvalue)) {
                  // 8bit and control characters in style attribute values can be used for XSS, remove them
-                $attary{$attname} = '"disallowed character"';
+                $attary[$attname] = '"disallowed character"';
              }
              preg_match_all("/url\s*\((.+)\)/si",$attvalue,$aMatch);
              if (count($aMatch)) {
@@ -1864,7 +1886,7 @@ function sq_fixatts($tagname,
                      // url value
                      $urlvalue = $sMatch;
                      sq_fix_url($attname, $urlvalue, $message, $id, $mailbox,"'");
-                    $attary{$attname} = str_replace($sMatch,$urlvalue,$attvalue);
+                    $attary[$attname] = str_replace($sMatch,$urlvalue,$attvalue);
                  }
              }
          }
@@ -1875,7 +1897,7 @@ function sq_fixatts($tagname,
                || $attname == 'poster' || $attname == 'formaction'
                || $attname == 'background' || $attname == 'action') {
              sq_fix_url($attname, $attvalue, $message, $id, $mailbox);
-            $attary{$attname} = $attvalue;
+            $attary[$attname] = $attvalue;
          }
      }
      /**
@@ -2061,7 +2083,7 @@ function sq_fixstyle($body, $pos, $message, $id, $mailbox){
      $bSucces = false;
      $bEndTag = false;
      for ($i=$pos,$iCount=strlen($body);$i<$iCount;++$i) {
-        $char = $body{$i};
+        $char = $body[$i];
          switch ($char) {
              case '<':
                  $sToken = $char;
@@ -2092,7 +2114,7 @@ function sq_fixstyle($body, $pos, $message, $id, $mailbox){
              case '!':
                  if ($sToken == '<') {
                      // possible comment
-                    if (isset($body{$i+2}) && substr($body,$i,3) == '!--') {
+                    if (isset($body[$i+2]) && substr($body,$i,3) == '!--') {
                          $i = strpos($body,'-->',$i+3);
                          if ($i === false) { // no end comment
                              $i = strlen($body);
@@ -2124,7 +2146,17 @@ function sq_fixstyle($body, $pos, $message, $id, $mailbox){
       * body {background: blah-blah}
       * and change it to .bodyclass so we can just assign it to a <div>
       */
-    $content = preg_replace("|body(\s*\{.*?\})|si", ".bodyclass\\1", $content);
+    // $content = preg_replace("|body(\s*\{.*?\})|si", ".bodyclass\\1", $content);
+    // Nah, this is even better - try to preface all CSS selectors with
+    // our <div> class ID "bodyclass" then correct generic "body" selectors
+    // TODO: this works pretty good but breaks stuff like this:
+    //       @media print { body { font-size: 10pt; } }
+    //       but there isn't an easy way to make this regex skip @media
+    //       definitions... though lots of the ones in the wild will be
+    //       correctly handled because they tend to end with a parenthesis, like:
+    //       @media screen and (max-width:480px) { ...
+    $content = preg_replace('/([a-z0-9._-][a-z0-9 >+~|:._-]*\s*(?:,|{.*?}))/si', '.bodyclass $1', $content);
+    $content = str_replace('.bodyclass body', '.bodyclass', $content);
  
      global $use_transparent_security_image;
      if ($use_transparent_security_image) $secremoveimg = '../images/spacer.png';
@@ -2305,7 +2337,7 @@ function sq_body2div($attary, $mailbox, $message, $id){
              $styledef .= "color: $text; ";
          }
          if (strlen($styledef) > 0){
-            $divattary{"style"} = "\"$styledef\"";
+            $divattary["style"] = "\"$styledef\"";
          }
      }
      return $divattary;
@@ -2329,6 +2361,7 @@ function sq_body2div($attary, $mailbox, $message, $id){
   * @param $add_attr_to_tag      see description above
   * @param $message              message object
   * @param $id                   message id
+ * @param $recursively_called   boolean flag for recursive calls into this function (optional; default FALSE)
   * @return                      sanitized html safe to show on your pages.
   */
  function sq_sanitize($body,
@@ -2341,21 +2374,26 @@ function sq_sanitize($body,
                       $add_attr_to_tag,
                       $message,
                       $id,
-                     $mailbox
+                     $mailbox,
+                     $recursively_called=FALSE
                       ){
      $me = 'sq_sanitize';
+
+    /**
+     * See if tag_list is of tags to remove or tags to allow.
+     * false  means remove these tags
+     * true   means allow these tags
+     */
+    $orig_tag_list = $tag_list;
      $rm_tags = array_shift($tag_list);
+
      /**
       * Normalize rm_tags and rm_tags_with_content.
       */
      @array_walk($tag_list, 'sq_casenormalize');
      @array_walk($rm_tags_with_content, 'sq_casenormalize');
      @array_walk($self_closing_tags, 'sq_casenormalize');
-    /**
-     * See if tag_list is of tags to remove or tags to allow.
-     * false  means remove these tags
-     * true   means allow these tags
-     */
+
      $curpos = 0;
      $open_tags = Array();
      $trusted = "\n<!-- begin sanitized html -->\n";
@@ -2368,6 +2406,47 @@ function sq_sanitize($body,
  
      while (($curtag = sq_getnxtag($body, $curpos)) != FALSE){
          list($tagname, $attary, $tagtype, $lt, $gt) = $curtag;
+
+        /**
+         * RCDATA and RAWTEXT tags are handled differently:
+         * next instance of closing tag is used, whether or not
+         * the HTML is well formed before that
+         */
+        global $rcdata_rawtext_tags;
+        if (!$recursively_called
+         && in_array($tagname, $rcdata_rawtext_tags)
+         && $tagtype === 1){
+            $closing_tag = false;
+            $closing_tag_offset = $curpos;
+            // seek out the closing tag for the current RCDATA/RAWTEXT tag
+            while (1) {
+                // first we need to move forward to next available closing tag
+                // (intentionally leave off the closing > and let sq_getnxtag() validate a proper tag syntax)
+                $next_tag = sq_findnxreg($body, $closing_tag_offset, "</\s*$tagname");
+                if ($next_tag === false) {
+                    $closing_tag = false;
+                    break;
+                }
+                // but then we have to make sure it's a well-formed tag
+                $closing_tag = sq_getnxtag($body, $next_tag[0]);
+                if ($closing_tag === false)
+                    break;
+                else if ($closing_tag[0] !== false
+                 // these should be redundant
+                 && $closing_tag[0] === $tagname && $closing_tag[2] === 2) {
+                    $trusted .= sq_sanitize(substr($body, $curpos, $closing_tag[4] - $curpos + 1),
+                                            $orig_tag_list, $rm_tags_with_content, $self_closing_tags,
+                                            $force_tag_closing, $rm_attnames, $bad_attvals, $add_attr_to_tag,
+                                            $message, $id, $mailbox, true);
+                    $curpos = $closing_tag[4] + 1;
+                    continue 2;
+                }
+                $closing_tag_offset = $next_tag[0] + 1;
+            }
+            if ($closing_tag === false)
+            { /* no-op... there was no closing tag for this RCDATA/RAWTEXT tag - we could probably set $curpos to the end of $body, but this HTML is malformed anyway and should just fall apart on its own */ }
+        }
+
          $free_content = substr($body, $curpos, $lt-$curpos);
          /**
           * Take care of <style>
@@ -2409,9 +2488,9 @@ function sq_sanitize($body,
                          if ($tagname == "body"){
                              $tagname = "div";
                          }
-                        if (isset($open_tags{$tagname}) &&
-                                $open_tags{$tagname} > 0){
-                            $open_tags{$tagname}--;
+                        if (isset($open_tags[$tagname]) &&
+                                $open_tags[$tagname] > 0){
+                            $open_tags[$tagname]--;
                          } else {
                              $tagname = false;
                          }
@@ -2453,10 +2532,10 @@ function sq_sanitize($body,
                                          $message, $id);
                              }
                              if ($tagtype == 1){
-                                if (isset($open_tags{$tagname})){
-                                    $open_tags{$tagname}++;
+                                if (isset($open_tags[$tagname])){
+                                    $open_tags[$tagname]++;
                                  } else {
-                                    $open_tags{$tagname}=1;
+                                    $open_tags[$tagname]=1;
                                  }
                              }
                              /**
@@ -2515,7 +2594,17 @@ function magicHTML($body, $id, $message, $mailbox = 'INBOX', $take_mailto_links
      // require_once(SM_PATH . 'functions/url_parser.php');  // for $MailTo_PReg_Match
  
      global $attachment_common_show_images, $view_unsafe_images,
-           $has_unsafe_images, $allow_svg_display;
+           $has_unsafe_images, $allow_svg_display, $rcdata_rawtext_tags,
+           $remove_rcdata_rawtext_tags_and_content;
+
+    $rcdata_rawtext_tags = array(
+        "noscript",
+        "noframes",
+        "noembed",
+        "textarea",
+        // also "title", "xmp", "script", "iframe", "plaintext" which we already remove below
+    );
+
      /**
       * Don't display attached images in HTML mode.
       *
@@ -2523,7 +2612,7 @@ function magicHTML($body, $id, $message, $mailbox = 'INBOX', $take_mailto_links
       */
      $attachment_common_show_images = false;
      $tag_list = Array(
-            false,
+            false, // remove these tags
              "meta",
              "html",
              "head",
@@ -2547,6 +2636,15 @@ function magicHTML($body, $id, $message, $mailbox = 'INBOX', $take_mailto_links
              );
      if (!$allow_svg_display)
          $rm_tags_with_content[] = 'svg';
+    /**
+     * SquirrelMail will parse RCDATA and RAWTEXT tags and handle them as the special
+     * case that they are, but if you prefer to remove them and their contents entirely
+     * (in most cases, should be a safe thing with minimal impact), you can add the
+     * following to config/config_local.php
+     *    $remove_rcdata_rawtext_tags_and_content = TRUE; 
+     */
+    if ($remove_rcdata_rawtext_tags_and_content)
+        $rm_tags_with_content = array_merge($rm_tags_with_content, $rcdata_rawtext_tags);
  
      $self_closing_tags =  Array(
              "img",
@@ -2659,13 +2757,13 @@ function magicHTML($body, $id, $message, $mailbox = 'INBOX', $take_mailto_links
           * Remove any references to http/https if view_unsafe_images set
           * to false.
           */
-        array_push($bad_attvals{'/.*/'}{'/^src|background/i'}[0],
+        array_push($bad_attvals['/.*/']['/^src|background/i'][0],
                  '/^([\'\"])\s*https*:.*([\'\"])/si');
-        array_push($bad_attvals{'/.*/'}{'/^src|background/i'}[1],
+        array_push($bad_attvals['/.*/']['/^src|background/i'][1],
                  "\\1$secremoveimg\\1");
-        array_push($bad_attvals{'/.*/'}{'/^style/i'}[0],
+        array_push($bad_attvals['/.*/']['/^style/i'][0],
                  '/url\([\'\"]?https?:[^\)]*[\'\"]?\)/si');
-        array_push($bad_attvals{'/.*/'}{'/^style/i'}[1],
+        array_push($bad_attvals['/.*/']['/^style/i'][1],
                  "url(\\1$secremoveimg\\1)");
      }