Added a few cool themes from Jorey Bump
[squirrelmail.git] / functions / mime.php
index 3bdfedf234fe7053e834aa3a38db7f0c01992f08..357bfc4fb93a498444af5f6a96d726898f612d3d 100644 (file)
@@ -1,30 +1,48 @@
 <?php
-   /** mime.php
-    **
-    ** This contains the functions necessary to detect and decode MIME
-    ** messages.
-    **
-    ** $Id$
-    **/
-
-   if (defined('mime_php'))
-      return;
-   define('mime_php', true);
-
-   require_once('../functions/imap.php');
 
-   /** Setting up the objects that have the structure for the message **/
-
-   class msg_header {
-      /** msg_header contains generic variables for values that **/
-      /** could be in a header.                                 **/
-
-      var $type0 = '', $type1 = '', $boundary = '', $charset = '';
-      var $encoding = '', $size = 0, $to = array(), $from = '', $date = '';
-      var $cc = array(), $bcc = array(), $reply_to = '', $subject = '';
-      var $id = 0, $mailbox = '', $description = '', $filename = '';
-      var $entity_id = 0, $message_id = 0, $name = '';
-   }
+/**
+ * mime.php
+ *
+ * Copyright (c) 1999-2001 The SquirrelMail Development Team
+ * Licensed under the GNU GPL. For full terms see the file COPYING.
+ *
+ * This contains the functions necessary to detect and decode MIME
+ * messages.
+ *
+ * $Id$
+ */
+
+/*****************************************************************/
+/*** THIS FILE NEEDS TO HAVE ITS FORMATTING FIXED!!!           ***/
+/*** PLEASE DO SO AND REMOVE THIS COMMENT SECTION.             ***/
+/***    + Base level indent should begin at left margin, as    ***/
+/***      the require_once below.                              ***/
+/***    + All identation should consist of four space blocks   ***/
+/***    + Tab characters are evil.                             ***/
+/***    + all comments should use "slash-star ... star-slash"  ***/
+/***      style -- no pound characters, no slash-slash style   ***/
+/***    + FLOW CONTROL STATEMENTS (if, while, etc) SHOULD      ***/
+/***      ALWAYS USE { AND } CHARACTERS!!!                     ***/
+/***    + Please use ' instead of ", when possible. Note "     ***/
+/***      should always be used in _( ) function calls.        ***/
+/*** Thank you for your help making the SM code more readable. ***/
+/*****************************************************************/
+
+require_once('../functions/imap.php');
+require_once('../functions/attachment_common.php');
+
+/** Setting up the objects that have the structure for the message **/
+class msg_header {
+    /** msg_header contains generic variables for values that **/
+    /** could be in a header.                                 **/
+
+    var $type0 = '', $type1 = '', $boundary = '', $charset = '';
+    var $encoding = '', $size = 0, $to = array(), $from = '', $date = '';
+    var $cc = array(), $bcc = array(), $reply_to = '', $subject = '';
+    var $id = 0, $mailbox = '', $description = '', $filename = '';
+    var $entity_id = 0, $message_id = 0, $name = '';
+    // var $priority = "";
+}
 
    class message {
       /** message is the object that contains messages.  It is a recursive
       return( $pos );
    }
 
-   function mime_fetch_body ($imap_stream, $id, $ent_id) {
-      // do a bit of error correction.  If we couldn't find the entity id, just guess
-      // that it is the first one.  That is usually the case anyway.
-      if (!$ent_id) $ent_id = 1;
+    function mime_fetch_body ($imap_stream, $id, $ent_id ) {
+        // do a bit of error correction.  If we couldn't find the entity id, just guess
+        // that it is the first one.  That is usually the case anyway.
+        if (!$ent_id) 
+            $ent_id = 1;
+        $sid = sqimap_session_id();
+        fputs ($imap_stream, "$sid FETCH $id BODY[$ent_id]\r\n");
+        $data = sqimap_read_data ($imap_stream, $sid, true, $response, $message);
+        $topline = array_shift($data);
+        while (! ereg('\\* [0-9]+ FETCH ', $topline) && $data)
+            $topline = array_shift($data);
+        $wholemessage = implode('', $data);
+        if (ereg('\\{([^\\}]*)\\}', $topline, $regs)) {
+            $ret = substr( $wholemessage, 0, $regs[1] );
+            /*
+                There is some information in the content info header that could be important
+                in order to parse html messages. Let's get them here.
+            */
+            if( $ret{0} == '<' ) {
+                fputs ($imap_stream, "$sid FETCH $id BODY[$ent_id.MIME]\r\n");
+                $data = sqimap_read_data ($imap_stream, $sid, true, $response, $message);
+                $base = '';
+                $k = 10;
+                foreach( $data as $d ) {
+                    if( substr( $d, 0, 13 ) == 'Content-Base:' ) {
+                        $j = strlen( $d );
+                        $i = 13;
+                        $base = '';
+                        while( $i < $j &&
+                               ( !isNoSep( $d{$i} ) || $d{$i} == '"' )  )
+                            $i++;
+                        while( $i < $j ) {
+                            if( isNoSep( $d{$i} ) )
+                                $base .= $d{$i};
+                            $i++;
+                        }
+                        $k = 0;
+                    } elseif( $k == 1 && !isnosep( $d{0} ) ) {
+                        $base .= substr( $d, 1 );
+                    }
+                    $k++;
+                }
+                if( $base <> '' )
+                    $ret = "<base href=\"$base\">" . $ret;
+            }
+        } else if (ereg('"([^"]*)"', $topline, $regs)) {
+            $ret = $regs[1];
+        } else {
+            global $where, $what, $mailbox, $passed_id, $startMessage;
+            $par = "mailbox=".urlencode($mailbox)."&passed_id=$passed_id";
+            if (isset($where) && isset($what)) {
+                $par .= "&where=".urlencode($where)."&what=".urlencode($what);
+            } else {
+                $par .= "&startMessage=$startMessage&show_more=0";
+            }
+            $par .= '&response='.urlencode($response).'&message='.urlencode($message).
+                    '&topline='.urlencode($topline);
 
-      fputs ($imap_stream, sqimap_session_id() . " FETCH $id BODY[$ent_id]\r\n");
-      $data = sqimap_read_data ($imap_stream, sqimap_session_id(), true, $response, $message);
-      $topline = array_shift($data);
-      while (! ereg('\\* [0-9]+ FETCH ', $topline) && $data)
-          $topline = array_shift($data);
-      $wholemessage = implode('', $data);
+            echo   '<b><font color=$color[2]>Body retrieval error. The reason for this is most probably that<BR> ' .
+                   'the message is malformed. Please help us making future versions<BR> ' .
+                   "better by submitting this message to the developers knowledgebase!<BR>\n" .
+                   "<A HREF=\"../src/retrievalerror.php?$par\">Submit message</A><BR>" .
 
-      if (ereg('\\{([^\\}]*)\\}', $topline, $regs)) {
-         return substr($wholemessage, 0, $regs[1]);
-      }
-      else if (ereg('"([^"]*)"', $topline, $regs)) {
-         return $regs[1];
-      }
+                   "<tt>Response:  $response<BR>" .
+                   "Message:  $message<BR>" .
+                   "FETCH line:  $topline<BR></tt></font></b>";
 
-      $str = "Body retrieval error.  Please report this bug!\n" .
-             "Response:  $response\n" .
-             "Message:  $message\n" .
-             "FETCH line:  $topline" .
-             "---------------\n$wholemessage";
-      foreach ($data as $d) {
-          $str .= htmlspecialchars($d) . "\n";
-      }
-      return $str;
-   }
+            fputs ($imap_stream, "$sid FETCH $passed_id BODY[]\r\n");
+            $data = sqimap_read_data ($imap_stream, $sid, true, $response, $message);
+            array_shift($data);
+            $wholemessage = implode('', $data);
+
+            $ret = "---------------\n$wholemessage";
+    
+        }
+        return( $ret );
+    }
 
    function mime_print_body_lines ($imap_stream, $id, $ent_id, $encoding) {
       // do a bit of error correction.  If we couldn't find the entity id, just guess
       }
    }
 
-   // figures out what entity to display and returns the $message object
-   // for that entity.
-   function findDisplayEntity ($message, $textOnly = 1)
-   {
-      global $show_html_default;
-
-      if (! $message)
-    return 0;
-
-      if ($message->header->type0 == 'multipart' &&
-          $message->header->type1 == 'alternative' &&
-      $show_html_default && ! $textOnly) {
-     $entity = findDisplayEntityHTML($message);
-     if ($entity != 0)
-        return $entity;
-      }
-
-      // Show text/plain or text/html -- the first one we find.
-      if ( $message->header->type0 == 'text' &&
-          ( $message->header->type1 == 'plain' ||
-            $message->header->type1 == 'html' ) &&
-          isset($message->header->entity_id) )
-         return $message->header->entity_id;
-
-      for ($i=0; isset($message->entities[$i]); $i++) {
-         $entity = findDisplayEntity($message->entities[$i], $textOnly);
-         if ($entity != 0)
-            return $entity;
-      }
-
-      return 0;
-   }
+    // figures out what entity to display and returns the $message object
+    // for that entity.
+    function findDisplayEntity ($message, $textOnly = 1)   {
+        global $show_html_default;
+        
+        $entity = 0;
+        
+        if ($message) {
+            if ( $message->header->type0 == 'multipart' &&
+                 ( $message->header->type1 == 'alternative' ||
+                   $message->header->type1 == 'related' ) &&
+                 $show_html_default && ! $textOnly ) {
+                $entity = findDisplayEntityHTML($message);
+            }
+            
+            // Show text/plain or text/html -- the first one we find.
+            if ( $entity == 0 &&
+                 $message->header->type0 == 'text' &&
+                 ( $message->header->type1 == 'plain' ||
+                   $message->header->type1 == 'html' ) &&
+                 isset($message->header->entity_id) ) {
+                $entity = $message->header->entity_id;
+            }
+            
+            $i = 0;
+            while ($entity == 0 && isset($message->entities[$i]) ) {
+                $entity = findDisplayEntity($message->entities[$i], $textOnly);
+                $i++;
+            }
+        }
+      
+        return( $entity );
+    }
 
    // Shows the HTML version
    function findDisplayEntityHTML ($message) {
 
          // If there are other types that shouldn't be formatted, add
          // them here
-         if ($body_message->header->type1 != "html" || ! $show_html_default) {
+         if ($body_message->header->type1 == 'html') {
+            if( $show_html_default <> 1 ) {
+                $body = strip_tags( $body );
+                translateText($body, $wrap_at, $body_message->header->charset);
+            } else {
+                $body = MagicHTML( $body, $id );
+            }
+         } else {
             translateText($body, $wrap_at, $body_message->header->charset);
          }
 
          $body = quoted_printable_decode($body);
          
          
-         /*
-            Following code has been comented as I see no reason for it.
-            If there is any please tell me a mingo@rotedic.com
-            
          while (ereg("=\n", $body))
             $body = ereg_replace ("=\n", "", $body);
-        */
+           
       } else if ($encoding == 'base64') {
          $body = base64_decode($body);
       }
      return( $string );
  }
 
+   /*
+    Strips dangerous tags from html messages.
+   */
+
+   function MagicHTML( $body, $id ) {
+
+        global $message, $PHP_SELF, $HTTP_SERVER_VARS;
+
+        $j = strlen( $body );   // Legnth of the HTML
+        $ret = '';              // Returned string
+        $bgcolor = '#ffffff';   // Background style color (defaults to white)
+       $textcolor = '#000000'; // Foreground style color (defaults to black)
+        $leftmargin = '';       // Left margin style
+        $title = '';            // HTML title if any
+
+        $i = 0;
+        while( $i < $j ) {
+            if( $body{$i} == '<' ) {
+               $pos = $i + 1;
+               $tag = '';
+               while ($body{$pos} == ' ' || $body{$pos} == "\t" ||
+                      $body{$pos} == "\n")
+                   $pos ++;
+               while (strlen($tag) < 4 && $body{$pos} != ' ' && 
+                      $body{$pos} != "\t" && $body{$pos} != "\n") {
+                   $tag .= $body{$pos};
+                   $pos ++;
+               }
+                switch( strtoupper( $tag ) ) {
+                    // Strips the entire tag and contents
+                    case 'APPL':
+                    case 'EMBB':
+                    case 'FRAM':
+                    case 'SCRI':
+                    case 'OBJE':
+                        $etg = '/' . $tag;
+                        while( $body{$i+1}.$body{$i+2}.$body{$i+3}.$body{$i+4}.$body{$i+5} <> $etg  &&
+                               $i < $j  ) $i++;
+                        while( $i < $j && $body{++$i} <> '>' );
+                        // $ret .= "<!-- $tag removed -->";
+                        break;
+                    // Substitute Title
+                    case 'TITL':
+                        $i += 5;
+                        while( $body{$i} <> '>' &&  // </title>
+                               $i < $j )
+                                $i++;
+                        $i++;
+                        $title = '';
+                        while( $body{$i} <> '<' &&  // </title>
+                               $i < $j ) {
+                            $title .= $body{$i};
+                            $i++;
+                        }
+                        $i += 7;
+                        break;
+                    // Destroy these tags
+                    case 'HTML':
+                    case 'HEAD':
+                    case '/HTM':
+                    case '/HEA':
+                    case '!DOC':
+                    case 'META':
+                    case 'DIV ':
+                    case '/DIV':
+                    case '!-- ':
+                        $i += 4;
+                        while( $body{$i}  <> '>' &&
+                               $i < $j )
+                            $i++;
+                        // $i++;
+                        break;
+                    case 'STYL':
+                        $i += 5;
+                        while( $body{$i} <> '>' &&  // </title>
+                               $i < $j )
+                                $i++;
+                        $i++;
+                        // We parse the style to look for interesting stuff
+                        $styleblk = '';
+                        while( $body{$i} <> '>' &&
+                               $i < $j ) {
+                            // First we get the name of the style
+                            $style = '';
+                            while( $body{$i} <> '>' &&
+                                   $body{$i} <> '<' &&
+                                   $body{$i} <> '{' &&
+                                   $i < $j ) {
+                               if( isnoSep( $body{$i} ) )
+                                   $style .= $body{$i};
+                               $i++;
+                            }
+                            stripComments( $i, $j, $body );
+                            $style = strtoupper( trim( $style ) );
+                            if( $style == 'BODY' ) {
+                                // Next we look into the definitions of the body style
+                                while( $body{$i} <> '>' &&
+                                       $body{$i} <> '}' &&
+                                       $i < $j ) {
+                                    // We look for the background color if any.
+                                    if( substr( $body, $i, 17 ) == 'BACKGROUND-COLOR:' ) {
+                                        $i += 17;
+                                        $bgcolor = getStyleData( $i, $j, $body );
+                                    } elseif ( substr( $body, $i, 12 ) == 'MARGIN-LEFT:' ) {
+                                        $i += 12;
+                                        $leftmargin = getStyleData( $i, $j, $body );
+                                    }
+                                    $i++;
+                                }
+                            } else {
+                                // Other style are mantained
+                                $styleblk .= "$style ";
+                                while( $body{$i} <> '>' &&
+                                       $body{$i} <> '<' &&
+                                       $body{$i} <> '}' &&
+                                       $i < $j ) {
+                                    $styleblk .= $body{$i};
+                                    $i++;
+                                }
+                                $styleblk .= $body{$i};
+                            }
+                            stripComments( $i, $j, $body );
+                            if( $body{$i} <> '>' )
+                                $i++;
+                        }
+                        if( $styleblk <> '' )
+                            $ret .= "<style>$styleblk";
+                        break;
+                    case 'BODY':
+                        if( $title <> '' )
+                            $ret .= '<b>' . _("Title:") . " </b>$title<br>\n";
+                        $ret .= "<TABLE";
+                        $i += 5;
+                        if (! isset($base))
+                            $base = '';
+                        $ret .= stripEvent( $i, $j, $body, $id, $base );
+                        $ret .= " bgcolor=$bgcolor width=\"100%\"><tr>";
+                        if( $leftmargin <> '' )
+                            $ret .= "<td width=$leftmargin>&nbsp;</td>";
+                        $ret .= '<td>';
+                        if (strtolower($bgcolor) == 'ffffff' ||
+                            strtolower($bgcolor) == '#ffffff')
+                            $ret .= '<font color=#000000>';
+                        break;
+                    case 'BASE':
+                        $i += 5;
+                        $base = '';
+                        while( !isNoSep( $body{$i} ) &&
+                               $i < $j )
+                                $i++;
+                        if( strcasecmp( substr( $base, 0, 4 ), 'href'  ) ) {
+                                $i += 5;
+                                while( !isNoSep( $body{$i} ) &&
+                                       $i < $j )
+                                        $i++;
+                                while( $body{$i} <> '>' &&
+                                       $i < $j ) {
+                                    if( $body{$i} <> '"' )
+                                        $base .= $body{$i};
+                                        $i++;
+                                }
+                                // Debuging $ret .= "<!-- base == $base -->";
+                                if( strcasecmp( substr( $base, 0, 4 ), 'file' ) <> 0 )
+                                        $ret .= "\n<BASE HREF=\"$base\">\n";
+                        }
+                        break;
+                    case '/BOD':
+                        $ret .= '</font></td></tr></TABLE>';
+                        $i += 6;
+                        break;
+                    default:
+                        // Following tags can contain some event handler, lets search it
+                        stripComments( $i, $j, $body );
+                       if (! isset($base))
+                          $base = '';
+                        $ret .= stripEvent( $i, $j, $body, $id, $base ) . '>';
+                        // $ret .= "<!-- $tag detected -->";
+                }
+            } else {
+                $ret .= $body{$i};
+            }
+            $i++;
+        }
+
+        return( "\n\n<!-- HTML Output ahead -->\n" .
+                $ret .
+                "\n<!-- END of HTML Output --><base href=\"".
+                $HTTP_SERVER_VARS["SERVER_NAME"] . substr( $PHP_SELF, 0, strlen( $PHP_SELF ) - 13 ) .
+                "\">\n\n" );
+   }
+
+   function isNoSep( $char ) {
+
+        switch( $char ) {
+            case ' ':
+            case "\n":
+            case "\t":
+            case "\r":
+            case '>':
+            case '"':
+                return( FALSE );
+                break;
+            default:
+                return( TRUE );
+        }
+
+   }
+
+   /*
+      The following function is usefull to remove extra data that can cause
+      html not to display properly. Especialy with MS stuff.
+   */
+
+   function stripComments( &$i, $j, &$body ) {
+
+        while( $body{$i}.$body{$i+1}.$body{$i+2}.$body{$i+3} == '<!--' &&
+               $i < $j ) {
+            $i += 5;
+            while( $body{$i-2}.$body{$i-1}.$body{$i} <> '-->' &&
+                   $i < $j )
+                $i++;
+            $i++;
+        }
+
+        return;
+
+   }
+
+   /* Gets the style data of a specific style */
+
+   function getStyleData( &$i, $j, &$body ) {
+
+        // We skip spaces
+        while( $body{$i} <> '>' && !isNoSep( $body{$i} ) &&
+               $i < $j ) {
+            $i++;
+        }
+        // And get the color
+        $ret = '';
+        while( isNoSep( $body{$i} ) &&
+               $i < $j ) {
+            $ret .= $body{$i};
+            $i++;
+        }
+
+        return( $ret );
+   }
+
+   /*
+   Private function for strip_dangerous_tag. Look for event based coded and "remove" it
+   change on with no (onload -> noload)
+   */
+
+   function stripEvent( &$i, $j, &$body, $id, $base ) {
+
+        global $message;
+
+        $ret = '';
+
+        while( $body{$i} <> '>' &&
+               $i < $j ) {
+            $etg = strtolower($body{$i}.$body{$i+1}.$body{$i+2});
+            switch( $etg ) {
+                case '../':
+                        // Retrolinks are not allowed without a base because they mess with SM security
+                        if( $base == '' ) {
+                                $i += 2;
+                        } else {
+                                $ret .= '.';
+                        }
+                        break;
+                case 'cid':
+                    // Internal link
+                    $k = $i-1;
+                    if( $body{$i+3} == ':') {
+                        $i +=4;
+                        $name = '';
+                        while( isNoSep( $body{$i} ) &&
+                               $i < $j  )
+                            $name .= $body{$i++};
+                        if( $name <> '' ) {
+                            $ret .= "../src/download.php?absolute_dl=true&passed_id=$id&mailbox=" .
+                                        urlencode( $message->header->mailbox ) .
+                                        "&passed_ent_id=" . find_ent_id( $name, $message );
+                            if( $body{$k} == '"' )
+                                $ret .= '" ';
+                            else
+                                $ret .= ' ';
+                        }
+                        if( $body{$i} == '>' )
+                            $i -= 1;
+                    }
+                    break;
+                case ' on':
+                case "\non":
+                case "\ron":
+                case "\ton":
+                    $ret .= ' no';
+                    $i += 2;
+                    break;
+                case 'pt:':
+                    if( strcasecmp( $body{$i-4}.$body{$i-3}.$body{$i-2}.$body{$i-1}.$body{$i}.$body{$i+1}.$body{$i+2}, 'script:') == 0 ) {
+                        $ret .= '_no/';
+                    } else {
+                        $ret .= $etg;
+                    }
+                    $i += 2;
+                    break;
+                default:
+                    $ret .= $body{$i};
+            }
+            $i++;
+        }
+        return( $ret );
+    }
+
+
+    /* This function trys to locate the entity_id of a specific mime element */
+
+    function find_ent_id( $id, $message ) {
+
+        $ret = '';
+        for ($i=0; $ret == '' && $i < count($message->entities); $i++) {
+
+            if( $message->entities[$i]->header->entity_id == '' ) {
+                $ret = find_ent_id( $id, $message->entities[$i] );
+            } else {
+                if( strcasecmp( $message->entities[$i]->header->id, $id ) == 0 )
+                    $ret = $message->entities[$i]->header->entity_id;
+            }
+
+        }
+
+        return( $ret );
+
+    }
 ?>