functions/mime.php

   1 <?php
   2
   3 /**
   4  * mime.php
   5  *
   6  * Copyright (c) 1999-2002 The SquirrelMail Project Team
   7  * Licensed under the GNU GPL. For full terms see the file COPYING.
   8  *
   9  * This contains the functions necessary to detect and decode MIME
  10  * messages.
  11  *
  12  * $Id$
  13  */
  14
  15 require_once('../functions/imap.php');
  16 require_once('../functions/attachment_common.php');
  17
  18 /** Setting up the objects that have the structure for the message **/
  19 class msg_header {
  20     /** msg_header contains generic variables for values that **/
  21     /** could be in a header.                                 **/
  22
  23     var $type0 = '', $type1 = '', $boundary = '', $charset = '',
  24         $encoding = '', $size = 0, $to = array(), $from = '', $date = '',
  25         $cc = array(), $bcc = array(), $reply_to = '', $subject = '',
  26         $id = 0, $mailbox = '', $description = '', $filename = '',
  27         $entity_id = 0, $message_id = 0, $name = '', $priority = 3, $type = '';
  28 }
  29
  30 class message {
  31     /** message is the object that contains messages.  It is a recursive
  32       object in that through the $entities variable, it can contain
  33       more objects of type message.  See documentation in mime.txt for
  34       a better description of how this works.
  35     **/
  36     var $header = '', $entities = array();
  37
  38     function addEntity ($msg) {
  39         $this->entities[] = $msg;
  40     }
  41 }
  42
  43 /* --------------------------------------------------------------------------------- */
  44 /* MIME DECODING                                                                     */
  45 /* --------------------------------------------------------------------------------- */
  46
  47 /* This function gets the structure of a message and stores it in the "message" class.
  48  * It will return this object for use with all relevant header information and
  49  * fully parsed into the standard "message" object format.
  50  */
  51 function mime_structure ($imap_stream, $header) {
  52
  53     $ssid = sqimap_session_id();
  54     $lsid = strlen( $ssid );
  55     $id = $header->id;
  56     fputs ($imap_stream, "$ssid FETCH $id BODYSTRUCTURE\r\n");
  57     //
  58     // This should use sqimap_read_data instead of reading it itself
  59     //
  60     $read = fgets ($imap_stream, 9216);
  61     $bodystructure = '';
  62     while ( substr($read, 0, $lsid) <> $ssid &&
  63          !feof( $imap_stream ) ) {
  64         $bodystructure .= $read;
  65         $read = fgets ($imap_stream, 9216);
  66     }
  67     $read = $bodystructure;
  68
  69     // isolate the body structure and remove beginning and end parenthesis
  70     $read = trim(substr ($read, strpos(strtolower($read), 'bodystructure') + 13));
  71
  72     $read = trim(substr ($read, 0, -1));
  73     $end = mime_match_parenthesis(0, $read);
  74     while ($end == strlen($read)-1) {
  75         $read = trim(substr ($read, 0, -1));
  76         $read = trim(substr ($read, 1));
  77         $end = mime_match_parenthesis(0, $read);
  78     }
  79
  80     $msg = mime_parse_structure ($read, 0);
  81     $msg->header = $header;
  82
  83     return( $msg );
  84 }
  85
  86 /* this starts the parsing of a particular structure.  It is called recursively,
  87  * so it can be passed different structures.  It returns an object of type
  88  * $message.
  89  * First, it checks to see if it is a multipart message.  If it is, then it
  90  * handles that as it sees is necessary.  If it is just a regular entity,
  91  * then it parses it and adds the necessary header information (by calling out
  92  * to mime_get_elements()
  93  */
  94 function mime_parse_structure ($structure, $ent_id) {
  95   $properties = array();
  96   $msg = new message();
  97   if ($structure{0} == '(') {
  98      $old_ent_id = $ent_id;
  99      $ent_id = mime_new_element_level($ent_id);
 100      $start = $end = -1;
 101      do {
 102         $start = $end+1;
 103         $end = mime_match_parenthesis ($start, $structure);
 104         /* add "forgotten"  parent entities (alternative and relative) */
 105         if (strpos($ent_id, '0')  || strpos($ent_id, '0') == 0) {
 106             $str = substr($structure, $end+1 );
 107             $startprop = strrpos($str,'(');
 108             $endprop   = strrpos($str,')');
 109             $propstr = substr($str, $startprop + 1, ($endprop - $startprop)-1);
 110
 111             $type1 = trim(substr($str,0, $startprop));
 112             $pos = strrpos($type1,' ');
 113             $type1 = strtolower(trim(substr($type1,$pos +1)));
 114             $cnt = strlen($type1);
 115             $type1 = substr($type1,0,$cnt-1);
 116
 117             $properties = mime_get_props($properties, $propstr);
 118             if (count($properties)>0) {
 119                 $msg->header->entity_id = $old_ent_id;
 120                 $msg->header->type0 = 'multipart';
 121                 $msg->header->type1 = $type1;
 122             }
 123             for ($i=0; $i < count($properties); $i++) {
 124                 $msg->header->{$properties[$i]['name']} = $properties[$i]['value'];
 125                 $name = $properties[$i]['name'];
 126                 $value = $properties[$i]['value'];
 127             }
 128         }
 129
 130         $element = substr($structure, $start+1, ($end - $start)-1);
 131
 132         $ent_id = mime_increment_id ($ent_id);
 133         $newmsg = mime_parse_structure ($element, $ent_id);
 134         $msg->addEntity ($newmsg);
 135
 136      } while ($structure{$end+1} == '(');
 137   } else {
 138      // parse the elements
 139     $msg = mime_get_element ($structure, $msg, $ent_id);
 140   }
 141   return $msg;
 142 }
 143
 144 /* Increments the element ID.  An element id can look like any of
 145  * the following:  1, 1.2, 4.3.2.4.1, etc.  This function increments
 146  * the last number of the element id, changing 1.2 to 1.3.
 147  */
 148 function mime_increment_id ($id) {
 149
 150     if (strpos($id, '.')) {
 151         $first = substr($id, 0, strrpos($id, '.'));
 152         $last = substr($id, strrpos($id, '.')+1);
 153         $last++;
 154         $new = $first . '.' .$last;
 155     } else {
 156         $new = $id + 1;
 157     }
 158
 159     return $new;
 160 }
 161
 162 /*
 163  * See comment for mime_increment_id().
 164  * This adds another level on to the entity_id changing 1.3 to 1.3.0
 165  * NOTE:  1.3.0 is not a valid element ID.  It MUST be incremented
 166  *        before it can be used.  I left it this way so as not to have
 167  *        to make a special case if it is the first entity_id.  It
 168  *        always increments it, and that works fine.
 169  */
 170 function mime_new_element_level ($id) {
 171
 172     if (!$id) {
 173         $id = 0;
 174     } else {
 175         $id = $id . '.0';
 176     }
 177
 178     return( $id );
 179 }
 180
 181 function mime_get_element (&$structure, $msg, $ent_id) {
 182
 183   $elem_num = 1;
 184   $msg->header = new msg_header();
 185   $msg->header->entity_id = $ent_id;
 186   $properties = array();
 187   while (strlen($structure) > 0) {
 188      $structure = trim($structure);
 189      $char = $structure{0};
 190
 191      if (strtolower(substr($structure, 0, 3)) == 'nil') {
 192         $text = '';
 193         $structure = substr($structure, 3);
 194      } else if ($char == '"') {
 195         // loop through until we find the matching quote, and return that as a string
 196         $pos = 1;
 197         $text = '';
 198         while ( ($char = $structure{$pos} ) <> '"' && $pos < strlen($structure)) {
 199            $text .= $char;
 200            $pos++;
 201         }
 202         $structure = substr($structure, strlen($text) + 2);
 203      } else if ($char == '(') {
 204         // comment me
 205         $end = mime_match_parenthesis (0, $structure);
 206         $sub = substr($structure, 1, $end-1);
 207         $properties = mime_get_props($properties, $sub);
 208         $structure = substr($structure, strlen($sub) + 2);
 209      } else {
 210         // loop through until we find a space or an end parenthesis
 211         $pos = 0;
 212         $char = $structure{$pos};
 213         $text = '';
 214         while ($char != ' ' && $char != ')' && $pos < strlen($structure)) {
 215            $text .= $char;
 216            $pos++;
 217            $char = $structure{$pos};
 218         }
 219         $structure = substr($structure, strlen($text));
 220      }
 221
 222      // This is where all the text parts get put into the header
 223      switch ($elem_num) {
 224         case 1:
 225            $msg->header->type0 = strtolower($text);
 226            break;
 227         case 2:
 228            $msg->header->type1 = strtolower($text);
 229            break;
 230         case 4: // Id
 231            // Invisimail enclose images with <>
 232            $msg->header->id = str_replace( '<', '', str_replace( '>', '', $text ) );
 233            break;
 234         case 5:
 235            $msg->header->description = $text;
 236            break;
 237         case 6:
 238            $msg->header->encoding = strtolower($text);
 239            break;
 240         case 7:
 241            $msg->header->size = $text;
 242            break;
 243         default:
 244            if ($msg->header->type0 == 'text' && $elem_num == 8) {
 245               // This is a plain text message, so lets get the number of lines
 246               // that it contains.
 247               $msg->header->num_lines = $text;
 248
 249            } else if ($msg->header->type0 == 'message' && $msg->header->type1 == 'rfc822' && $elem_num == 8) {
 250               // This is an encapsulated message, so lets start all over again and
 251               // parse this message adding it on to the existing one.
 252               $structure = trim($structure);
 253               if ( $structure{0} == '(' ) {
 254                  $e = mime_match_parenthesis (0, $structure);
 255                  $structure = substr($structure, 0, $e);
 256                  $structure = substr($structure, 1);
 257                  $m = mime_parse_structure($structure, $msg->header->entity_id);
 258
 259                  // the following conditional is there to correct a bug that wasn't
 260                  // incrementing the entity IDs correctly because of the special case
 261                  // that message/rfc822 is.  This fixes it fine.
 262                  if (substr($structure, 1, 1) != '(')
 263                     $m->header->entity_id = mime_increment_id(mime_new_element_level($ent_id));
 264
 265                  // Now we'll go through and reformat the results.
 266                  if ($m->entities) {
 267                     for ($i=0; $i < count($m->entities); $i++) {
 268                        $msg->addEntity($m->entities[$i]);
 269                     }
 270                  } else {
 271                     $msg->addEntity($m);
 272                  }
 273                  $structure = "";
 274               }
 275            }
 276            break;
 277      }
 278      $elem_num++;
 279      $text = "";
 280   }
 281   // loop through the additional properties and put those in the various headers
 282   for ($i=0; $i < count($properties); $i++) {
 283      $msg->header->{$properties[$i]['name']} = $properties[$i]['value'];
 284   }
 285
 286   return $msg;
 287 }
 288
 289 /*
 290  * I did most of the MIME stuff yesterday (June 20, 2000), but I couldn't
 291  * figure out how to do this part, so I decided to go to bed.  I woke up
 292  * in the morning and had a flash of insight.  I went to the white-board
 293  * and scribbled it out, then spent a bit programming it, and this is the
 294  * result.  Nothing complicated, but I think my brain was fried yesterday.
 295  * Funny how that happens some times.
 296  *
 297  * This gets properties in a nested parenthesisized list.  For example,
 298  * this would get passed something like:  ("attachment" ("filename" "luke.tar.gz"))
 299  * This returns an array called $props with all paired up properties.
 300  * It ignores the "attachment" for now, maybe that should change later
 301  * down the road.  In this case, what is returned is:
 302  *    $props[0]["name"] = "filename";
 303  *    $props[0]["value"] = "luke.tar.gz";
 304  */
 305 function mime_get_props ($props, $structure) {
 306
 307   while (strlen($structure) > 0) {
 308      $structure = trim($structure);
 309      $char = $structure{0};
 310      if ($char == '"') {
 311         $pos = 1;
 312         $tmp = '';
 313         while ( ( $char = $structure{$pos} ) != '"' &&
 314                 $pos < strlen($structure)) {
 315            $tmp .= $char;
 316            $pos++;
 317         }
 318         $structure = trim(substr($structure, strlen($tmp) + 2));
 319         $char = $structure{0};
 320
 321         if ($char == '"') {
 322            $pos = 1;
 323            $value = '';
 324            while ( ( $char = $structure{$pos} ) != '"' &&
 325                    $pos < strlen($structure) ) {
 326               $value .= $char;
 327               $pos++;
 328            }
 329            $structure = trim(substr($structure, strlen($value) + 2));
 330            $k = count($props);
 331            $props[$k]['name'] = strtolower($tmp);
 332            $props[$k]['value'] = $value;
 333            if ($structure != '') {
 334                 mime_get_props($props, $structure);
 335            } else {
 336              return $props;
 337            }
 338         } else if ($char == '(') {
 339            $end = mime_match_parenthesis (0, $structure);
 340            $sub = substr($structure, 1, $end-1);
 341            if (! isset($props))
 342               $props = array();
 343               $props = mime_get_props($props, $sub);
 344               $structure = substr($structure, strlen($sub) + 2);
 345            return $props;
 346         }
 347      } else if ($char == '(') {
 348         $end = mime_match_parenthesis (0, $structure);
 349         $sub = substr($structure, 1, $end-1);
 350         $props = mime_get_props($props, $sub);
 351         $structure = substr($structure, strlen($sub) + 2);
 352         return $props;
 353      } else {
 354         return $props;
 355      }
 356   }
 357 }
 358
 359 /*
 360  *  Matches parenthesis.  It will return the position of the matching
 361  *  parenthesis in $structure.  For instance, if $structure was:
 362  *     ("text" "plain" ("val1name", "1") nil ... )
 363  *     x                                         x
 364  *  then this would return 42 to match up those two.
 365  */
 366 function mime_match_parenthesis ($pos, $structure) {
 367
 368     $j = strlen( $structure );
 369
 370     // ignore all extra characters
 371     // If inside of a string, skip string -- Boundary IDs and other
 372     // things can have ) in them.
 373     if ( $structure{$pos} != '(' ) {
 374         return( $j );
 375     }
 376
 377     while ( $pos < $j ) {
 378         $pos++;
 379         if ($structure{$pos} == ')') {
 380             return $pos;
 381         } elseif ($structure{$pos} == '"') {
 382             $pos++;
 383             while ( $structure{$pos} != '"' &&
 384                     $pos < $j ) {
 385                if (substr($structure, $pos, 2) == '\\"') {
 386                   $pos++;
 387                } elseif (substr($structure, $pos, 2) == '\\\\') {
 388                   $pos++;
 389                }
 390                $pos++;
 391             }
 392         } elseif ( $structure{$pos} == '(' ) {
 393             $pos = mime_match_parenthesis ($pos, $structure);
 394         }
 395     }
 396     echo _("Error decoding mime structure.  Report this as a bug!") . '<br>';
 397     return( $pos );
 398 }
 399
 400 function mime_fetch_body($imap_stream, $id, $ent_id ) {
 401
 402     /*
 403      * do a bit of error correction.  If we couldn't find the entity id, just guess
 404      * that it is the first one.  That is usually the case anyway.
 405      */
 406     if (!$ent_id) {
 407         $ent_id = 1;
 408     }
 409
 410     $cmd = "FETCH $id BODY[$ent_id]";
 411     $data = sqimap_run_command ($imap_stream, $cmd, true, $response, $message);
 412
 413     do {
 414         $topline = trim(array_shift( $data ));
 415     } while( $topline && $topline[0] == '*' && !preg_match( '/\* [0-9]+ FETCH.*/i', $topline )) ;
 416     $wholemessage = implode('', $data);
 417     if (ereg('\\{([^\\}]*)\\}', $topline, $regs)) {
 418
 419         $ret = substr( $wholemessage, 0, $regs[1] );
 420         /*
 421             There is some information in the content info header that could be important
 422             in order to parse html messages. Let's get them here.
 423         */
 424         if ( $ret{0} == '<' ) {
 425             $data = sqimap_run_command ($imap_stream, "FETCH $id BODY[$ent_id.MIME]", true, $response, $message);
 426             /* BASE within HTML documents is illegal (see w3 spec)
 427 *            $base = '';
 428 *            $k = 10;
 429 *            foreach( $data as $d ) {
 430 *                if ( substr( $d, 0, 13 ) == 'Content-Base:' ) {
 431 *                    $j = strlen( $d );
 432 *                    $i = 13;
 433 *                    $base = '';
 434 *                    while ( $i < $j &&
 435 *                           ( !isNoSep( $d{$i} ) || $d{$i} == '"' )  )
 436 *                        $i++;
 437 *                    while ( $i < $j ) {
 438 *                        if ( isNoSep( $d{$i} ) )
 439 *                            $base .= $d{$i};
 440 *                        $i++;
 441 *                    }
 442 *                    $k = 0;
 443 *                } elseif ( $k == 1 && !isnosep( $d{0} ) ) {
 444 *                    $base .= substr( $d, 1 );
 445 *                }
 446 *                $k++;
 447 *            }
 448 *            if ( $base <> '' ) {
 449 *                $ret = "<base href=\"$base\">" . $ret;
 450 *            }
 451 *           */
 452         }
 453     } else if (ereg('"([^"]*)"', $topline, $regs)) {
 454         $ret = $regs[1];
 455     } else {
 456         global $where, $what, $mailbox, $passed_id, $startMessage;
 457         $par = 'mailbox=' . urlencode($mailbox) . "&amp;passed_id=$passed_id";
 458         if (isset($where) && isset($what)) {
 459             $par .= '&amp;where='. urlencode($where) . "&amp;what=" . urlencode($what);
 460         } else {
 461             $par .= "&amp;startMessage=$startMessage&amp;show_more=0";
 462         }
 463         $par .= '&amp;response=' . urlencode($response) .
 464                 '&amp;message=' . urlencode($message).
 465                 '&amp;topline=' . urlencode($topline);
 466
 467         echo   '<tt><br>' .
 468                '<table width="80%"><tr>' .
 469                '<tr><td colspan=2>' .
 470                _("Body retrieval error. The reason for this is most probably that the message is malformed. Please help us making future versions better by submitting this message to the developers knowledgebase!") .
 471                " <A HREF=\"../src/retrievalerror.php?$par\"><br>" .
 472                _("Submit message") . '</A><BR>&nbsp;' .
 473                '</td></tr>' .
 474                '<td><b>' . _("Command:") . "</td><td>$cmd</td></tr>" .
 475                '<td><b>' . _("Response:") . "</td><td>$response</td></tr>" .
 476                '<td><b>' . _("Message:") . "</td><td>$message</td></tr>" .
 477                '<td><b>' . _("FETCH line:") . "</td><td>$topline</td></tr>" .
 478                "</table><BR></tt></font><hr>";
 479
 480         $data = sqimap_run_command ($imap_stream, "FETCH $passed_id BODY[]", true, $response, $message);
 481         array_shift($data);
 482         $wholemessage = implode('', $data);
 483
 484         $ret = $wholemessage;
 485     }
 486     return( $ret );
 487 }
 488
 489 function mime_print_body_lines ($imap_stream, $id, $ent_id, $encoding) {
 490     // do a bit of error correction.  If we couldn't find the entity id, just guess
 491     // that it is the first one.  That is usually the case anyway.
 492     if (!$ent_id) {
 493         $ent_id = 1;
 494     }
 495     $sid = sqimap_session_id();
 496     // Don't kill the connection if the browser is over a dialup
 497     // and it would take over 30 seconds to download it.
 498
 499     // don´t call set_time_limit in safe mode.
 500     if (!ini_get("safe_mode")) {
 501         set_time_limit(0);
 502     }
 503
 504     fputs ($imap_stream, "$sid FETCH $id BODY[$ent_id]\r\n");
 505     $cnt = 0;
 506     $continue = true;
 507     $read = fgets ($imap_stream,4096);
 508     // This could be bad -- if the section has sqimap_session_id() . ' OK'
 509     // or similar, it will kill the download.
 510     while (!ereg("^".$sid." (OK|BAD|NO)(.*)$", $read, $regs)) {
 511       if (trim($read) == ')==') {
 512           $read1 = $read;
 513           $read = fgets ($imap_stream,4096);
 514           if (ereg("^".$sid." (OK|BAD|NO)(.*)$", $read, $regs)) {
 515               return;
 516           } else {
 517               echo decodeBody($read1, $encoding) .
 518                    decodeBody($read, $encoding);
 519           }
 520       } else if ($cnt) {
 521           echo decodeBody($read, $encoding);
 522       }
 523       $read = fgets ($imap_stream,4096);
 524       $cnt++;
 525     }
 526 }
 527
 528 /* -[ END MIME DECODING ]----------------------------------------------------------- */
 529
 530
 531
 532 /* This is the first function called.  It decides if this is a multipart
 533    message or if it should be handled as a single entity
 534  */
 535 function decodeMime ($imap_stream, &$header) {
 536     global $username, $key, $imapServerAddress, $imapPort;
 537     return mime_structure ($imap_stream, $header);
 538 }
 539
 540 // This is here for debugging purposese.  It will print out a list
 541 // of all the entity IDs that are in the $message object.
 542
 543 function listEntities ($message) {
 544 if ($message) {
 545  if ($message->header->entity_id)
 546  echo "<tt>" . $message->header->entity_id . ' : ' . $message->header->type0 . '/' . $message->header->type1 . '<br>';
 547  for ($i = 0; $message->entities[$i]; $i++) {
 548     $msg = listEntities($message->entities[$i], $ent_id);
 549     if ($msg)
 550        return $msg;
 551  }
 552 }
 553 }
 554
 555
 556 /* returns a $message object for a particular entity id */
 557 function getEntity ($message, $ent_id) {
 558     if ($message) {
 559         if ($message->header->entity_id == $ent_id && strlen($ent_id) == strlen($message->header->entity_id))
 560         {
 561             return $message;
 562         } else {
 563             for ($i = 0; isset($message->entities[$i]); $i++) {
 564                 $msg = getEntity ($message->entities[$i], $ent_id);
 565                 if ($msg) {
 566                     return $msg;
 567                 }
 568             }
 569         }
 570     }
 571 }
 572
 573 /*
 574  * figures out what entity to display and returns the $message object
 575  * for that entity.
 576  */
 577 function findDisplayEntity ($msg, $textOnly = 1)   {
 578     global $show_html_default;
 579
 580     $entity = 0;
 581
 582     if ($msg) {
 583         if ( $msg->header->type0 == 'multipart' &&
 584              ( $msg->header->type1 == 'alternative' ||
 585                $msg->header->type1 == 'mixed' ||
 586                $msg->header->type1 == 'related' ) &&
 587              $show_html_default && ! $textOnly ) {
 588             $entity = findDisplayEntityHTML($msg);
 589         }
 590
 591         // Show text/plain or text/html -- the first one we find.
 592         if ( $entity == 0 &&
 593              $msg->header->type0 == 'text' &&
 594              ( $msg->header->type1 == 'plain' ||
 595                $msg->header->type1 == 'html' ) &&
 596              isset($msg->header->entity_id) ) {
 597             $entity = $msg->header->entity_id;
 598         }
 599
 600         $i = 0;
 601         while ($entity == 0 && isset($msg->entities[$i]) ) {
 602             $entity = findDisplayEntity($msg->entities[$i], $textOnly);
 603             $i++;
 604         }
 605     }
 606
 607     return( $entity );
 608 }
 609
 610 /* Shows the HTML version */
 611 function findDisplayEntityHTML ($message) {
 612
 613     if ( $message->header->type0 == 'text' &&
 614          $message->header->type1 == 'html' &&
 615          isset($message->header->entity_id)) {
 616         return $message->header->entity_id;
 617     }
 618     for ($i = 0; isset($message->entities[$i]); $i ++) {
 619         if ( $message->header->type0 == 'message' &&
 620             $message->header->type1 == 'rfc822' &&
 621             isset($message->header->entity_id)) {
 622             return 0;
 623         }
 624         $entity = findDisplayEntityHTML($message->entities[$i]);
 625         if ($entity != 0) {
 626             return $entity;
 627         }
 628     }
 629
 630     return 0;
 631 }
 632
 633 /*
 634  * translateText
 635  * Extracted from strings.php 23/03/2002
 636  */
 637
 638 function translateText(&$body, $wrap_at, $charset) {
 639     global $where, $what; /* from searching */
 640     global $color; /* color theme */
 641
 642     require_once('../functions/url_parser.php');
 643
 644     $body_ary = explode("\n", $body);
 645     $PriorQuotes = 0;
 646     for ($i=0; $i < count($body_ary); $i++) {
 647         $line = $body_ary[$i];
 648         if (strlen($line) - 2 >= $wrap_at) {
 649             sqWordWrap($line, $wrap_at);
 650         }
 651         $line = charset_decode($charset, $line);
 652         $line = str_replace("\t", '        ', $line);
 653
 654         parseUrl ($line);
 655
 656         $Quotes = 0;
 657         $pos = 0;
 658         $j = strlen( $line );
 659
 660         while ( $pos < $j ) {
 661             if ($line[$pos] == ' ') {
 662                 $pos ++;
 663             } else if (strpos($line, '&gt;', $pos) === $pos) {
 664                 $pos += 4;
 665                 $Quotes ++;
 666             } else {
 667                 break;
 668             }
 669         }
 670
 671         if ($Quotes > 1) {
 672             if (! isset($color[14])) {
 673                 $color[14] = '#FF0000';
 674             }
 675             $line = '<FONT COLOR="' . $color[14] . '">' . $line . '</FONT>';
 676         } elseif ($Quotes) {
 677             if (! isset($color[13])) {
 678                 $color[13] = '#800000';
 679             }
 680             $line = '<FONT COLOR="' . $color[13] . '">' . $line . '</FONT>';
 681         }
 682
 683         $body_ary[$i] = $line;
 684     }
 685     $body = '<pre>' . implode("\n", $body_ary) . '</pre>';
 686 }
 687
 688 /* debugfunction for looping through entities and displaying correct entities */
 689 function listMyEntities ($message) {
 690
 691 if ($message) {
 692     if ($message->header->entity_id) {
 693         echo "<tt>" . $message->header->entity_id . ' : ' . $message->header->type0 . '/' . $message->header->type1 . '<br>';
 694     }
 695     if (!($message->header->type0 == 'message' &&  $message->header->type1 == 'rfc822')) {
 696         if (isset($message->header->boundary) ) {
 697             $ent_id = $message->header->entity_id;
 698             $var = $message->header->boundary;
 699             if ($var !='')
 700             echo "<b>$ent_id boundary = $var</b><br>";
 701         }
 702         if (isset($message->header->type) ) {
 703             $var = $message->header->type;
 704             if ($var !='')
 705             echo "<b>$ent_id type = $var</b><br>";
 706         }
 707         for ($i = 0; $message->entities[$i]; $i++) {
 708             $msg = listMyEntities($message->entities[$i]);
 709         }
 710
 711         if ($msg )  return $msg;
 712     }
 713 }
 714
 715 }
 716
 717
 718
 719 /* This returns a parsed string called $body. That string can then
 720 be displayed as the actual message in the HTML. It contains
 721 everything needed, including HTML Tags, Attachments at the
 722 bottom, etc.
 723 */
 724 function formatBody($imap_stream, $message, $color, $wrap_at) {
 725     // this if statement checks for the entity to show as the
 726     // primary message. To add more of them, just put them in the
 727     // order that is their priority.
 728     global $startMessage, $username, $key, $imapServerAddress, $imapPort, $body,
 729            $show_html_default, $has_unsafe_images, $view_unsafe_images, $sort;
 730
 731     $has_unsafe_images = 0;
 732
 733     $id = $message->header->id;
 734
 735     $urlmailbox = urlencode($message->header->mailbox);
 736 //    ListMyEntities($message);
 737     // Get the right entity and redefine message to be this entity
 738     // Pass the 0 to mean that we want the 'best' viewable one
 739     $ent_num = findDisplayEntity ($message, 0);
 740     $body_message = getEntity($message, $ent_num);
 741     if (($body_message->header->type0 == 'text') ||
 742         ($body_message->header->type0 == 'rfc822')) {
 743         $body = mime_fetch_body ($imap_stream, $id, $ent_num);
 744         $body = decodeBody($body, $body_message->header->encoding);
 745         $hookResults = do_hook("message_body", $body);
 746         $body = $hookResults[1];
 747         // If there are other types that shouldn't be formatted, add
 748         // them here
 749         if ($body_message->header->type1 == 'html') {
 750             if ( $show_html_default <> 1 ) {
 751                 $body = strip_tags( $body );
 752                 translateText($body, $wrap_at, $body_message->header->charset);
 753             } else {
 754                 $body = MagicHTML( $body, $id );
 755             }
 756         } else {
 757             translateText($body, $wrap_at, $body_message->header->charset);
 758         }
 759
 760         $body .= "<CENTER><SMALL><A HREF=\"../src/download.php?absolute_dl=true&amp;passed_id=$id&amp;passed_ent_id=$ent_num&amp;mailbox=$urlmailbox&amp;showHeaders=1\">". _("Download this as a file") ."</A></SMALL></CENTER><BR>";
 761         if ($has_unsafe_images) {
 762             if ($view_unsafe_images) {
 763                 $body .= "<CENTER><SMALL><A HREF=\"read_body.php?passed_id=$id&amp;mailbox=$urlmailbox&amp;sort=$sort&amp;startMessage=$startMessage&amp;show_more=0\">". _("Hide Unsafe Images") ."</A></SMALL></CENTER><BR>\n";
 764             } else {
 765                 $body .= "<CENTER><SMALL><A HREF=\"read_body.php?passed_id=$id&amp;mailbox=$urlmailbox&amp;sort=$sort&amp;startMessage=$startMessage&amp;show_more=0&amp;view_unsafe_images=1\">". _("View Unsafe Images") ."</A></SMALL></CENTER><BR>\n";
 766             }
 767         }
 768
 769         /** Display the ATTACHMENTS: message if there's more than one part **/
 770         if (isset($message->entities[0])) {
 771             $body .= formatAttachments ($message, $ent_num, $message->header->mailbox, $id);
 772         }
 773     } else {
 774         $body = formatAttachments ($message, -1, $message->header->mailbox, $id);
 775     }
 776     return ($body);
 777 }
 778
 779 /*
 780  * A recursive function that returns a list of attachments with links
 781  * to where to download these attachments
 782  */
 783 function formatAttachments($message, $ent_id, $mailbox, $id) {
 784     global $where, $what;
 785     global $startMessage, $color;
 786     static $ShownHTML = 0;
 787
 788     $body = '';
 789     if ($ShownHTML == 0) {
 790
 791         $ShownHTML = 1;
 792         $body .= "<TABLE WIDTH=\"100%\" CELLSPACING=0 CELLPADDING=2 BORDER=0 BGCOLOR=\"$color[0]\"><TR>\n" .
 793                 "<TH ALIGN=\"left\" BGCOLOR=\"$color[9]\"><B>\n" .
 794                 _("Attachments") . ':' .
 795                 "</B></TH></TR><TR><TD>\n" .
 796                 "<TABLE CELLSPACING=0 CELLPADDING=1 BORDER=0>\n" .
 797                 formatAttachments($message, $ent_id, $mailbox, $id) .
 798                 "</TABLE></TD></TR></TABLE>";
 799
 800     } else if ($message) {
 801         $header = $message->header;
 802         $type0 = strtolower($header->type0);
 803         $type1 = strtolower($header->type1);
 804         $name = '';
 805         if (isset($header->name)) {
 806             $name = decodeHeader($header->name);
 807         }
 808         if ($type0 =='message' && $type1 == 'rfc822') {
 809
 810             $filename = decodeHeader($message->header->filename);
 811             if (trim($filename) == '') {
 812                 if (trim($name) == '') {
 813                     $display_filename = 'untitled-[' . $message->header->entity_id . ']' ;
 814                 } else {
 815                     $display_filename = $name;
 816                     $filename = $name;
 817                 }
 818             } else {
 819                 $display_filename = $filename;
 820             }
 821
 822             $urlMailbox = urlencode($mailbox);
 823             $ent = urlencode($message->header->entity_id);
 824
 825             $DefaultLink =
 826                 "../src/download.php?startMessage=$startMessage&amp;passed_id=$id&amp;mailbox=$urlMailbox&amp;passed_ent_id=$ent";
 827             if ($where && $what) {
 828                 $DefaultLink .= '&amp;where=' . urlencode($where) . '&amp;what=' . urlencode($what);
 829             }
 830             $Links['download link']['text'] = _("download");
 831             $Links['download link']['href'] =
 832                 "../src/download.php?absolute_dl=true&amp;passed_id=$id&amp;mailbox=$urlMailbox&amp;passed_ent_id=$ent";
 833             $ImageURL = '';
 834
 835             /* this executes the attachment hook with a specific MIME-type.
 836                 * if that doens't have results, it tries if there's a rule
 837                 * for a more generic type. */
 838             $HookResults = do_hook("attachment $type0/$type1", $Links,
 839                 $startMessage, $id, $urlMailbox, $ent, $DefaultLink, $display_filename, $where, $what);
 840             if(count($HookResults[1]) <= 1) {
 841                 $HookResults = do_hook("attachment $type0/*", $Links,
 842                 $startMessage, $id, $urlMailbox, $ent, $DefaultLink,
 843                 $display_filename, $where, $what);
 844             }
 845
 846             $Links = $HookResults[1];
 847             $DefaultLink = $HookResults[6];
 848
 849             $body .= '<TR><TD>&nbsp;&nbsp;</TD><TD>' .
 850                         "<A HREF=\"$DefaultLink\">$display_filename</A>&nbsp;</TD>" .
 851                         '<TD><SMALL><b>' . show_readable_size($message->header->size) .
 852                         '</b>&nbsp;&nbsp;</small></TD>' .
 853                         "<TD><SMALL>[ $type0/$type1 ]&nbsp;</SMALL></TD>" .
 854                         '<TD><SMALL>';
 855             if ($message->header->description) {
 856                 $body .= '<b>' . htmlspecialchars(_($message->header->description)) . '</b>';
 857             }
 858             $body .= '</SMALL></TD><TD><SMALL>&nbsp;';
 859
 860
 861             $SkipSpaces = 1;
 862             foreach ($Links as $Val) {
 863                 if ($SkipSpaces) {
 864                     $SkipSpaces = 0;
 865                 } else {
 866                     $body .= '&nbsp;&nbsp;|&nbsp;&nbsp;';
 867                 }
 868                 $body .= '<a href="' . $Val['href'] . '">' .  $Val['text'] . '</a>';
 869             }
 870
 871             unset($Links);
 872
 873             $body .= "</SMALL></TD></TR>\n";
 874
 875             return( $body );
 876
 877         } elseif (!$message->entities) {
 878
 879             $type0 = strtolower($message->header->type0);
 880             $type1 = strtolower($message->header->type1);
 881             $name = decodeHeader($message->header->name);
 882
 883             if ($message->header->entity_id != $ent_id) {
 884             $filename = decodeHeader($message->header->filename);
 885             if (trim($filename) == '') {
 886                 if (trim($name) == '') {
 887                     if ( trim( $message->header->id ) == '' )
 888                         $display_filename = 'untitled-[' . $message->header->entity_id . ']' ;
 889                     else
 890                         $display_filename = 'cid: ' . $message->header->id;
 891                     // $display_filename = 'untitled-[' . $message->header->entity_id . ']' ;
 892                 } else {
 893                     $display_filename = $name;
 894                     $filename = $name;
 895                 }
 896             } else {
 897                 $display_filename = $filename;
 898             }
 899
 900             $urlMailbox = urlencode($mailbox);
 901             $ent = urlencode($message->header->entity_id);
 902
 903             $DefaultLink =
 904                 "../src/download.php?startMessage=$startMessage&amp;passed_id=$id&amp;mailbox=$urlMailbox&amp;passed_ent_id=$ent";
 905             if ($where && $what) {
 906                 $DefaultLink .= '&amp;where=' . urlencode($where) . '&amp;what=' . urlencode($what);
 907             }
 908             $Links['download link']['text'] = _("download");
 909             $Links['download link']['href'] =
 910                 "../src/download.php?absolute_dl=true&amp;passed_id=$id&amp;mailbox=$urlMailbox&amp;passed_ent_id=$ent";
 911             $ImageURL = '';
 912
 913             /* this executes the attachment hook with a specific MIME-type.
 914                 * if that doens't have results, it tries if there's a rule
 915                 * for a more generic type. */
 916             $HookResults = do_hook("attachment $type0/$type1", $Links,
 917                 $startMessage, $id, $urlMailbox, $ent, $DefaultLink,
 918                 $display_filename, $where, $what);
 919             if(count($HookResults[1]) <= 1) {
 920                 $HookResults = do_hook("attachment $type0/*", $Links,
 921                 $startMessage, $id, $urlMailbox, $ent, $DefaultLink,
 922                 $display_filename, $where, $what);
 923             }
 924
 925             $Links = $HookResults[1];
 926             $DefaultLink = $HookResults[6];
 927
 928             $body .= '<TR><TD>&nbsp;&nbsp;</TD><TD>' .
 929                         "<A HREF=\"$DefaultLink\">$display_filename</A>&nbsp;</TD>" .
 930                         '<TD><SMALL><b>' . show_readable_size($message->header->size) .
 931                         '</b>&nbsp;&nbsp;</small></TD>' .
 932                         "<TD><SMALL>[ $type0/$type1 ]&nbsp;</SMALL></TD>" .
 933                         '<TD><SMALL>';
 934             if ($message->header->description) {
 935                 $body .= '<b>' . htmlspecialchars(_($message->header->description)) . '</b>';
 936             }
 937             $body .= '</SMALL></TD><TD><SMALL>&nbsp;';
 938
 939
 940             $SkipSpaces = 1;
 941             foreach ($Links as $Val) {
 942                 if ($SkipSpaces) {
 943                     $SkipSpaces = 0;
 944                 } else {
 945                     $body .= '&nbsp;&nbsp;|&nbsp;&nbsp;';
 946                 }
 947                 $body .= '<a href="' . $Val['href'] . '">' .  $Val['text'] . '</a>';
 948             }
 949
 950             unset($Links);
 951
 952             $body .= "</SMALL></TD></TR>\n";
 953             }
 954         } else {
 955             for ($i = 0; $i < count($message->entities); $i++) {
 956                 $body .= formatAttachments($message->entities[$i], $ent_id, $mailbox, $id);
 957             }
 958         }
 959     }
 960     return( $body );
 961 }
 962
 963
 964 /** this function decodes the body depending on the encoding type. **/
 965 function decodeBody($body, $encoding) {
 966   $body = str_replace("\r\n", "\n", $body);
 967   $encoding = strtolower($encoding);
 968
 969   global $show_html_default;
 970
 971   if ($encoding == 'quoted-printable') {
 972      $body = quoted_printable_decode($body);
 973
 974
 975      while (ereg("=\n", $body))
 976         $body = ereg_replace ("=\n", "", $body);
 977
 978   } else if ($encoding == 'base64') {
 979      $body = base64_decode($body);
 980   }
 981
 982   // All other encodings are returned raw.
 983   return $body;
 984 }
 985
 986 /*
 987  * This functions decode strings that is encoded according to
 988  * RFC1522 (MIME Part Two: Message Header Extensions for Non-ASCII Text).
 989  * Patched by Christian Schmidt <christian@ostenfeld.dk>  23/03/2002
 990  */
 991 function decodeHeader ($string, $utfencode=true) {
 992     if (is_array($string)) {
 993         $string = implode("\n", $string);
 994     }
 995     $i = 0;
 996     while (preg_match('/^(.{' . $i . '})(.*)=\?([^?]*)\?(Q|B)\?([^?]*)\?=/Ui',
 997                       $string, $res)) {
 998         $prefix = $res[1];
 999         // Ignore white-space between consecutive encoded-words
1000         if (strspn($res[2], " \t") != strlen($res[2])) {
1001             $prefix .= $res[2];
1002         }
1003
1004         if (ucfirst($res[4]) == 'B') {
1005             $replace = base64_decode($res[5]);
1006         } else {
1007             $replace = str_replace('_', ' ', $res[5]);
1008             $replace = preg_replace('/=([0-9a-f]{2})/ie', 'chr(hexdec("\1"))',
1009                                     $replace);
1010             /* Only encode into entities by default. Some places
1011                don't need the encoding, like the compose form. */
1012             if ($utfencode) {
1013                 $replace = charset_decode($res[3], $replace);
1014             }
1015         }
1016         $string = $prefix . $replace . substr($string, strlen($res[0]));
1017         $i = strlen($prefix) + strlen($replace);
1018     }
1019     return( $string );
1020 }
1021
1022 /*
1023  * Encode a string according to RFC 1522 for use in headers if it
1024  * contains 8-bit characters or anything that looks like it should
1025  * be encoded.
1026  */
1027 function encodeHeader ($string) {
1028     global $default_charset;
1029
1030     // Encode only if the string contains 8-bit characters or =?
1031     $j = strlen( $string  );
1032     $l = strstr($string, '=?');         // Must be encoded ?
1033     $ret = '';
1034     for( $i=0; $i < $j; ++$i) {
1035         switch( $string{$i} ) {
1036            case '=':
1037           $ret .= '=3D';
1038           break;
1039         case '?':
1040           $ret .= '=3F';
1041           break;
1042         case '_':
1043           $ret .= '=5F';
1044           break;
1045         case ' ':
1046           $ret .= '_';
1047           break;
1048         default:
1049           $k = ord( $string{$i} );
1050           if ( $k > 126 ) {
1051              $ret .= sprintf("=%02X", $k);
1052              $l = TRUE;
1053           } else
1054              $ret .= $string{$i};
1055         }
1056     }
1057
1058     if ( $l ) {
1059         $string = "=?$default_charset?Q?$ret?=";
1060     }
1061
1062     return( $string );
1063 }
1064
1065 /* This function trys to locate the entity_id of a specific mime element */
1066
1067 function find_ent_id( $id, $message ) {
1068     $ret = '';
1069     for ($i=0; $ret == '' && $i < count($message->entities); $i++) {
1070         if ( $message->entities[$i]->header->entity_id == '' || $message->entities[$i]->header->type ) {
1071             $ret = find_ent_id( $id, $message->entities[$i] );
1072         } else {
1073             if ( strcasecmp( $message->entities[$i]->header->id, $id ) == 0 )
1074                 $ret = $message->entities[$i]->header->entity_id;
1075         }
1076
1077     }
1078
1079     return( $ret );
1080
1081 }
1082
1083 /**
1084  ** HTMLFILTER ROUTINES
1085  */
1086
1087 /**
1088  * This function returns the final tag out of the tag name, an array
1089  * of attributes, and the type of the tag. This function is called by
1090  * sq_sanitize internally.
1091  *
1092  * @param  $tagname  the name of the tag.
1093  * @param  $attary   the array of attributes and their values
1094  * @param  $tagtype  The type of the tag (see in comments).
1095  * @return           a string with the final tag representation.
1096  */
1097 function sq_tagprint($tagname, $attary, $tagtype){
1098     $me = "sq_tagprint";
1099     if ($tagtype == 2){
1100         $fulltag = '</' . $tagname . '>';
1101     } else {
1102         $fulltag = '<' . $tagname;
1103         if (is_array($attary) && sizeof($attary)){
1104             $atts = Array();
1105             while (list($attname, $attvalue) = each($attary)){
1106                 array_push($atts, "$attname=$attvalue");
1107             }
1108             $fulltag .= ' ' . join(" ", $atts);
1109         }
1110         if ($tagtype == 3){
1111             $fulltag .= " /";
1112         }
1113         $fulltag .= ">";
1114     }
1115     return $fulltag;
1116 }
1117
1118 /**
1119  * A small helper function to use with array_walk. Modifies a by-ref
1120  * value and makes it lowercase.
1121  *
1122  * @param  $val a value passed by-ref.
1123  * @return      void since it modifies a by-ref value.
1124  */
1125 function sq_casenormalize(&$val){
1126     $val = strtolower($val);
1127 }
1128
1129 /**
1130  * This function skips any whitespace from the current position within
1131  * a string and to the next non-whitespace value.
1132  *
1133  * @param  $body   the string
1134  * @param  $offset the offset within the string where we should start
1135  *                 looking for the next non-whitespace character.
1136  * @return         the location within the $body where the next
1137  *                 non-whitespace char is located.
1138  */
1139 function sq_skipspace($body, $offset){
1140     $me = "sq_skipspace";
1141     preg_match("/^(\s*)/s", substr($body, $offset), $matches);
1142     if (sizeof($matches{1})){
1143         $count = strlen($matches{1});
1144         $offset += $count;
1145         if ($pos >= strlen($body)){
1146         }
1147     }
1148     return $offset;
1149 }
1150
1151 /**
1152  * This function looks for the next character within a string.  It's
1153  * really just a glorified "strpos", except it catches if failures
1154  * nicely.
1155  *
1156  * @param  $body   The string to look for needle in.
1157  * @param  $offset Start looking from this position.
1158  * @param  $needle The character/string to look for.
1159  * @return         location of the next occurance of the needle, or
1160  *                 strlen($body) if needle wasn't found.
1161  */
1162 function sq_findnxstr($body, $offset, $needle){
1163     $me = "sq_findnxstr";
1164     $pos = strpos($body, $needle, $offset);
1165     if ($pos === FALSE){
1166         $pos = strlen($body);
1167     }
1168     return $pos;
1169 }
1170
1171 /**
1172  * This function takes a PCRE-style regexp and tries to match it
1173  * within the string.
1174  *
1175  * @param  $body   The string to look for needle in.
1176  * @param  $offset Start looking from here.
1177  * @param  $reg    A PCRE-style regex to match.
1178  * @return         Returns a false if no matches found, or an array
1179  *                 with the following members:
1180  *                 - integer with the location of the match within $body
1181  *                 - string with whatever content between offset and the match
1182  *                 - string with whatever it is we matched
1183  */
1184 function sq_findnxreg($body, $offset, $reg){
1185     $me = "sq_findnxreg";
1186     $matches = Array();
1187     $retarr = Array();
1188     preg_match("%^(.*?)($reg)%s", substr($body, $offset), $matches);
1189     if (!$matches{0}){
1190         $retarr = false;
1191     } else {
1192         $retarr{0} = $offset + strlen($matches{1});
1193         $retarr{1} = $matches{1};
1194         $retarr{2} = $matches{2};
1195     }
1196     return $retarr;
1197 }
1198
1199 /**
1200  * This function looks for the next tag.
1201  *
1202  * @param  $body   String where to look for the next tag.
1203  * @param  $offset Start looking from here.
1204  * @return         false if no more tags exist in the body, or
1205  *                 an array with the following members:
1206  *                 - string with the name of the tag
1207  *                 - array with attributes and their values
1208  *                 - integer with tag type (1, 2, or 3)
1209  *                 - integer where the tag starts (starting "<")
1210  *                 - integer where the tag ends (ending ">")
1211  *                 first three members will be false, if the tag is invalid.
1212  */
1213 function sq_getnxtag($body, $offset){
1214     $me = "sq_getnxtag";
1215     if ($offset > strlen($body)){
1216         return false;
1217     }
1218     $lt = sq_findnxstr($body, $offset, "<");
1219     if ($lt == strlen($body)){
1220         return false;
1221     }
1222     /**
1223      * We are here:
1224      * blah blah <tag attribute="value">
1225      * \---------^
1226      */
1227     $pos = sq_skipspace($body, $lt+1);
1228     if ($pos >= strlen($body)){
1229         return Array(false, false, false, $lt, strlen($body));
1230     }
1231     /**
1232      * There are 3 kinds of tags:
1233      * 1. Opening tag, e.g.:
1234      *    <a href="blah">
1235      * 2. Closing tag, e.g.:
1236      *    </a>
1237      * 3. XHTML-style content-less tag, e.g.:
1238      *    <img src="blah"/>
1239      */
1240     $tagtype = false;
1241     switch (substr($body, $pos, 1)){
1242     case "/":
1243         $tagtype = 2;
1244         $pos++;
1245         break;
1246     case "!":
1247         /**
1248          * A comment or an SGML declaration.
1249          */
1250         if (substr($body, $pos+1, 2) == "--"){
1251             $gt = strpos($body, "-->", $pos)+2;
1252             if ($gt === false){
1253                 $gt = strlen($body);
1254             }
1255             return Array(false, false, false, $lt, $gt);
1256         } else {
1257             $gt = sq_findnxstr($body, $pos, ">");
1258             return Array(false, false, false, $lt, $gt);
1259         }
1260         break;
1261     default:
1262         /**
1263          * Assume tagtype 1 for now. If it's type 3, we'll switch values
1264          * later.
1265          */
1266         $tagtype = 1;
1267         break;
1268     }
1269
1270     $tag_start = $pos;
1271     $tagname = '';
1272     /**
1273      * Look for next [\W-_], which will indicate the end of the tag name.
1274      */
1275     $regary = sq_findnxreg($body, $pos, "[^\w\-_]");
1276     if ($regary == false){
1277         return Array(false, false, false, $lt, strlen($body));
1278     }
1279     list($pos, $tagname, $match) = $regary;
1280     $tagname = strtolower($tagname);
1281
1282     /**
1283      * $match can be either of these:
1284      * '>'  indicating the end of the tag entirely.
1285      * '\s' indicating the end of the tag name.
1286      * '/'  indicating that this is type-3 xhtml tag.
1287      *
1288      * Whatever else we find there indicates an invalid tag.
1289      */
1290     switch ($match){
1291     case "/":
1292         /**
1293          * This is an xhtml-style tag with a closing / at the
1294          * end, like so: <img src="blah"/>. Check if it's followed
1295          * by the closing bracket. If not, then this tag is invalid
1296          */
1297         if (substr($body, $pos, 2) == "/>"){
1298             $pos++;
1299             $tagtype = 3;
1300         } else {
1301             $gt = sq_findnxstr($body, $pos, ">");
1302             $retary = Array(false, false, false, $lt, $gt);
1303             return $retary;
1304         }
1305     case ">":
1306         return Array($tagname, false, $tagtype, $lt, $pos);
1307         break;
1308     default:
1309         /**
1310          * Check if it's whitespace
1311          */
1312         if (preg_match("/\s/", $match)){
1313         } else {
1314             /**
1315              * This is an invalid tag! Look for the next closing ">".
1316              */
1317             $gt = sq_findnxstr($body, $offset, ">");
1318             return Array(false, false, false, $lt, $gt);
1319         }
1320     }
1321
1322     /**
1323      * At this point we're here:
1324      * <tagname  attribute='blah'>
1325      * \-------^
1326      *
1327      * At this point we loop in order to find all attributes.
1328      */
1329     $attname = '';
1330     $atttype = false;
1331     $attary = Array();
1332
1333     while ($pos <= strlen($body)){
1334         $pos = sq_skipspace($body, $pos);
1335         if ($pos == strlen($body)){
1336             /**
1337              * Non-closed tag.
1338              */
1339             return Array(false, false, false, $lt, $pos);
1340         }
1341         /**
1342          * See if we arrived at a ">" or "/>", which means that we reached
1343          * the end of the tag.
1344          */
1345         $matches = Array();
1346         preg_match("%^(\s*)(>|/>)%s", substr($body, $pos), $matches);
1347         if ($matches{0}){
1348             /**
1349              * Yep. So we did.
1350              */
1351             $pos += strlen($matches{1});
1352             if ($matches{2} == "/>"){
1353                 $tagtype = 3;
1354                 $pos++;
1355             }
1356             return Array($tagname, $attary, $tagtype, $lt, $pos);
1357         }
1358
1359         /**
1360          * There are several types of attributes, with optional
1361          * [:space:] between members.
1362          * Type 1:
1363          *   attrname[:space:]=[:space:]'CDATA'
1364          * Type 2:
1365          *   attrname[:space:]=[:space:]"CDATA"
1366          * Type 3:
1367          *   attr[:space:]=[:space:]CDATA
1368          * Type 4:
1369          *   attrname
1370          *
1371          * We leave types 1 and 2 the same, type 3 we check for
1372          * '"' and convert to "&quot" if needed, then wrap in
1373          * double quotes. Type 4 we convert into:
1374          * attrname="yes".
1375          */
1376         $regary = sq_findnxreg($body, $pos, "[^\w\-_]");
1377         if ($regary == false){
1378             /**
1379              * Looks like body ended before the end of tag.
1380              */
1381             return Array(false, false, false, $lt, strlen($body));
1382         }
1383         list($pos, $attname, $match) = $regary;
1384         $attname = strtolower($attname);
1385         /**
1386          * We arrived at the end of attribute name. Several things possible
1387          * here:
1388          * '>'  means the end of the tag and this is attribute type 4
1389          * '/'  if followed by '>' means the same thing as above
1390          * '\s' means a lot of things -- look what it's followed by.
1391          *      anything else means the attribute is invalid.
1392          */
1393         switch($match){
1394         case "/":
1395             /**
1396              * This is an xhtml-style tag with a closing / at the
1397              * end, like so: <img src="blah"/>. Check if it's followed
1398              * by the closing bracket. If not, then this tag is invalid
1399              */
1400             if (substr($body, $pos, 2) == "/>"){
1401                 $pos++;
1402                 $tagtype = 3;
1403             } else {
1404                 $gt = getnxstr($body, $pos, ">");
1405                 $retary = Array(false, false, false, $lt, $gt);
1406                 return $retary;
1407             }
1408         case ">":
1409             $attary{$attname} = '"yes"';
1410             return Array($tagname, $attary, $tagtype, $lt, $pos);
1411             break;
1412         default:
1413             /**
1414              * Skip whitespace and see what we arrive at.
1415              */
1416             $pos = sq_skipspace($body, $pos);
1417             $char = substr($body, $pos, 1);
1418             /**
1419              * Two things are valid here:
1420              * '=' means this is attribute type 1 2 or 3.
1421              * \w means this was attribute type 4.
1422              * anything else we ignore and re-loop. End of tag and
1423              * invalid stuff will be caught by our checks at the beginning
1424              * of the loop.
1425              */
1426             if ($char == "="){
1427                 $pos++;
1428                 $pos = sq_skipspace($body, $pos);
1429                 /**
1430                  * Here are 3 possibilities:
1431                  * "'"  attribute type 1
1432                  * '"'  attribute type 2
1433                  * everything else is the content of tag type 3
1434                  */
1435                 $quot = substr($body, $pos, 1);
1436                 if ($quot == "'"){
1437                     $regary = sq_findnxreg($body, $pos+1, "\'");
1438                     if ($regary == false){
1439                         return Array(false, false, false, $lt, strlen($body));
1440                     }
1441                     list($pos, $attval, $match) = $regary;
1442                     $pos++;
1443                     $attary{$attname} = "'" . $attval . "'";
1444                 } else if ($quot == '"'){
1445                     $regary = sq_findnxreg($body, $pos+1, '\"');
1446                     if ($regary == false){
1447                         return Array(false, false, false, $lt, strlen($body));
1448                     }
1449                     list($pos, $attval, $match) = $regary;
1450                     $pos++;
1451                     $attary{$attname} = '"' . $attval . '"';
1452                 } else {
1453                     /**
1454                      * These are hateful. Look for \s, or >.
1455                      */
1456                     $regary = sq_findnxreg($body, $pos, "[\s>]");
1457                     if ($regary == false){
1458                         return Array(false, false, false, $lt, strlen($body));
1459                     }
1460                     list($pos, $attval, $match) = $regary;
1461                     /**
1462                      * If it's ">" it will be caught at the top.
1463                      */
1464                     $attval = preg_replace("/\"/s", "&quot;", $attval);
1465                     $attary{$attname} = '"' . $attval . '"';
1466                 }
1467             } else if (preg_match("|[\w/>]|", $char)) {
1468                 /**
1469                  * That was attribute type 4.
1470                  */
1471                 $attary{$attname} = '"yes"';
1472             } else {
1473                 /**
1474                  * An illegal character. Find next '>' and return.
1475                  */
1476                 $gt = sq_findnxstr($body, $pos, ">");
1477                 return Array(false, false, false, $lt, $gt);
1478             }
1479         }
1480     }
1481     /**
1482      * The fact that we got here indicates that the tag end was never
1483      * found. Return invalid tag indication so it gets stripped.
1484      */
1485     return Array(false, false, false, $lt, strlen($body));
1486 }
1487
1488 /**
1489  * This function checks attribute values for entity-encoded values
1490  * and returns them translated into 8-bit strings so we can run
1491  * checks on them.
1492  *
1493  * @param  $attvalue A string to run entity check against.
1494  * @return           Translated value.
1495  */
1496 function sq_deent($attvalue){
1497     $me="sq_deent";
1498     /**
1499      * See if we have to run the checks first. All entities must start
1500      * with "&".
1501      */
1502     if (strpos($attvalue, "&") === false){
1503         return $attvalue;
1504     }
1505     /**
1506      * Check named entities first.
1507      */
1508     $trans = get_html_translation_table(HTML_ENTITIES);
1509     /**
1510      * Leave &quot; in, as it can mess us up.
1511      */
1512     $trans = array_flip($trans);
1513     unset($trans{"&quot;"});
1514     while (list($ent, $val) = each($trans)){
1515         $attvalue = preg_replace("/$ent*(\W)/si", "$val\\1", $attvalue);
1516     }
1517     /**
1518      * Now translate numbered entities from 1 to 255 if needed.
1519      */
1520     if (strpos($attvalue, "#") !== false){
1521         $omit = Array(34, 39);
1522         for ($asc=1; $asc<256; $asc++){
1523             if (!in_array($asc, $omit)){
1524                 $chr = chr($asc);
1525                 $attvalue = preg_replace("/\&#0*$asc;*(\D)/si", "$chr\\1",
1526                                          $attvalue);
1527                 $attvalue = preg_replace("/\&#x0*".dechex($asc).";*(\W)/si",
1528                                          "$chr\\1", $attvalue);
1529             }
1530         }
1531     }
1532     return $attvalue;
1533 }
1534
1535 /**
1536  * This function runs various checks against the attributes.
1537  *
1538  * @param  $tagname         String with the name of the tag.
1539  * @param  $attary          Array with all tag attributes.
1540  * @param  $rm_attnames     See description for sq_sanitize
1541  * @param  $bad_attvals     See description for sq_sanitize
1542  * @param  $add_attr_to_tag See description for sq_sanitize
1543  * @param  $message         message object
1544  * @param  $id              message id
1545  * @return                  Array with modified attributes.
1546  */
1547 function sq_fixatts($tagname,
1548                     $attary,
1549                     $rm_attnames,
1550                     $bad_attvals,
1551                     $add_attr_to_tag,
1552                     $message,
1553                     $id
1554                     ){
1555     $me = "sq_fixatts";
1556     while (list($attname, $attvalue) = each($attary)){
1557         /**
1558          * See if this attribute should be removed.
1559          */
1560         foreach ($rm_attnames as $matchtag=>$matchattrs){
1561             if (preg_match($matchtag, $tagname)){
1562                 foreach ($matchattrs as $matchattr){
1563                     if (preg_match($matchattr, $attname)){
1564                         unset($attary{$attname});
1565                         continue;
1566                     }
1567                 }
1568             }
1569         }
1570         /**
1571          * Remove any entities.
1572          */
1573         $attvalue = sq_deent($attvalue);
1574
1575         /**
1576          * Now let's run checks on the attvalues.
1577          * I don't expect anyone to comprehend this. If you do,
1578          * get in touch with me so I can drive to where you live and
1579          * shake your hand personally. :)
1580          */
1581         foreach ($bad_attvals as $matchtag=>$matchattrs){
1582             if (preg_match($matchtag, $tagname)){
1583                 foreach ($matchattrs as $matchattr=>$valary){
1584                     if (preg_match($matchattr, $attname)){
1585                         /**
1586                          * There are two arrays in valary.
1587                          * First is matches.
1588                          * Second one is replacements
1589                          */
1590                         list($valmatch, $valrepl) = $valary;
1591                         $newvalue =
1592                             preg_replace($valmatch, $valrepl, $attvalue);
1593                         if ($newvalue != $attvalue){
1594                             $attary{$attname} = $newvalue;
1595                         }
1596                     }
1597                 }
1598             }
1599         }
1600         /**
1601          * Turn cid: urls into http-friendly ones.
1602          */
1603         if (preg_match("/^[\'\"]\s*cid:/si", $attvalue)){
1604             $attary{$attname} = sq_cid2http($message, $id, $attvalue);
1605         }
1606     }
1607     /**
1608      * See if we need to append any attributes to this tag.
1609      */
1610     foreach ($add_attr_to_tag as $matchtag=>$addattary){
1611         if (preg_match($matchtag, $tagname)){
1612             $attary = array_merge($attary, $addattary);
1613         }
1614     }
1615     return $attary;
1616 }
1617
1618 /**
1619  * This function edits the style definition to make them friendly and
1620  * usable in squirrelmail.
1621  *
1622  * @param  $message  the message object
1623  * @param  $id       the message id
1624  * @param  $content  a string with whatever is between <style> and </style>
1625  * @return           a string with edited content.
1626  */
1627 function sq_fixstyle($message, $id, $content){
1628     global $view_unsafe_images;
1629     $me = "sq_fixstyle";
1630     /**
1631      * First look for general BODY style declaration, which would be
1632      * like so:
1633      * body {background: blah-blah}
1634      * and change it to .bodyclass so we can just assign it to a <div>
1635      */
1636     $content = preg_replace("|body(\s*\{.*?\})|si", ".bodyclass\\1", $content);
1637     $secremoveimg = "../images/" . _("sec_remove_eng.png");
1638     /**
1639      * Fix url('blah') declarations.
1640      */
1641     $content = preg_replace("|url\(([\'\"])\s*\S+script\s*:.*?([\'\"])\)|si",
1642                             "url(\\1$secremoveimg\\2)", $content);
1643     /**
1644      * Fix url('https*://.*) declarations but only if $view_unsafe_images
1645      * is false.
1646      */
1647     if (!$view_unsafe_images){
1648         $content = preg_replace("|url\(([\'\"])\s*https*:.*?([\'\"])\)|si",
1649                                 "url(\\1$secremoveimg\\2)", $content);
1650     }
1651
1652     /**
1653      * Fix urls that refer to cid:
1654      */
1655     while (preg_match("|url\(([\'\"]\s*cid:.*?[\'\"])\)|si", $content,
1656                       $matches)){
1657         $cidurl = $matches{1};
1658         $httpurl = sq_cid2http($message, $id, $cidurl);
1659         $content = preg_replace("|url\($cidurl\)|si",
1660                                 "url($httpurl)", $content);
1661     }
1662
1663     /**
1664      * Fix stupid expression: declarations which lead to vulnerabilities
1665      * in IE.
1666      */
1667     $content = preg_replace("/expression\s*:/si", "idiocy:", $content);
1668     return $content;
1669 }
1670
1671 /**
1672  * This function converts cid: url's into the ones that can be viewed in
1673  * the browser.
1674  *
1675  * @param  $message  the message object
1676  * @param  $id       the message id
1677  * @param  $cidurl   the cid: url.
1678  * @return           a string with a http-friendly url
1679  */
1680 function sq_cid2http($message, $id, $cidurl){
1681     /**
1682      * Get rid of quotes.
1683      */
1684     $quotchar = substr($cidurl, 0, 1);
1685     $cidurl = str_replace($quotchar, "", $cidurl);
1686     $cidurl = substr(trim($cidurl), 4);
1687     $httpurl = $quotchar . "../src/download.php?absolute_dl=true&amp;" .
1688         "passed_id=$id&amp;mailbox=" . urlencode($message->header->mailbox) .
1689         "&amp;passed_ent_id=" . find_ent_id($cidurl, $message) . $quotchar;
1690     return $httpurl;
1691 }
1692
1693 /**
1694  * This function changes the <body> tag into a <div> tag since we
1695  * can't really have a body-within-body.
1696  *
1697  * @param  $attary  an array of attributes and values of <body>
1698  * @return          a modified array of attributes to be set for <div>
1699  */
1700 function sq_body2div($attary){
1701     $me = "sq_body2div";
1702     $divattary = Array("class"=>"'bodyclass'");
1703     $bgcolor="#ffffff";
1704     $text="#000000";
1705     $styledef="";
1706     if (is_array($attary) && sizeof($attary) > 0){
1707         foreach ($attary as $attname=>$attvalue){
1708             $quotchar = substr($attvalue, 0, 1);
1709             $attvalue = str_replace($quotchar, "", $attvalue);
1710             switch ($attname){
1711             case "background":
1712                 $styledef .= "background-image: url('$attvalue'); ";
1713                 break;
1714             case "bgcolor":
1715                 $styledef .= "background-color: $attvalue; ";
1716                 break;
1717             case "text":
1718                 $styledef .= "color: $attvalue; ";
1719             }
1720         }
1721         if (strlen($styledef) > 0){
1722             $divattary{"style"} = "\"$styledef\"";
1723         }
1724     }
1725     return $divattary;
1726 }
1727
1728 /**
1729  * This is the main function and the one you should actually be calling.
1730  * There are several variables you should be aware of an which need
1731  * special description.
1732  *
1733  * Since the description is quite lengthy, see it here:
1734  * http://www.mricon.com/html/phpfilter.html
1735  *
1736  * @param $body                 the string with HTML you wish to filter
1737  * @param $tag_list             see description above
1738  * @param $rm_tags_with_content see description above
1739  * @param $self_closing_tags    see description above
1740  * @param $force_tag_closing    see description above
1741  * @param $rm_attnames          see description above
1742  * @param $bad_attvals          see description above
1743  * @param $add_attr_to_tag      see description above
1744  * @param $message              message object
1745  * @param $id                   message id
1746  * @return                      sanitized html safe to show on your pages.
1747  */
1748 function sq_sanitize($body,
1749                      $tag_list,
1750                      $rm_tags_with_content,
1751                      $self_closing_tags,
1752                      $force_tag_closing,
1753                      $rm_attnames,
1754                      $bad_attvals,
1755                      $add_attr_to_tag,
1756                      $message,
1757                      $id
1758                      ){
1759     $me = "sq_sanitize";
1760     /**
1761      * Normalize rm_tags and rm_tags_with_content.
1762      */
1763     @array_walk($rm_tags, 'sq_casenormalize');
1764     @array_walk($rm_tags_with_content, 'sq_casenormalize');
1765     @array_walk($self_closing_tags, 'sq_casenormalize');
1766     /**
1767      * See if tag_list is of tags to remove or tags to allow.
1768      * false  means remove these tags
1769      * true   means allow these tags
1770      */
1771     $rm_tags = array_shift($tag_list);
1772     $curpos = 0;
1773     $open_tags = Array();
1774     $trusted = "<!-- begin sanitized html -->\n";
1775     $skip_content = false;
1776
1777     while (($curtag=sq_getnxtag($body, $curpos)) != FALSE){
1778         list($tagname, $attary, $tagtype, $lt, $gt) = $curtag;
1779         $free_content = substr($body, $curpos, $lt-$curpos);
1780         /**
1781          * Take care of <style>
1782          */
1783         if ($tagname == "style" && $tagtype == 2){
1784             /**
1785              * This is a closing </style>. Edit the
1786              * content before we apply it.
1787              */
1788             $free_content = sq_fixstyle($message, $id, $free_content);
1789         } else if ($tagname == "body"){
1790             $tagname = "div";
1791             if ($tagtype == 1){
1792                 $attary = sq_body2div($attary);
1793             }
1794         }
1795         if ($skip_content == false){
1796             $trusted .= $free_content;
1797         } else {
1798         }
1799         if ($tagname != FALSE){
1800             if ($tagtype == 2){
1801                 if ($skip_content == $tagname){
1802                     /**
1803                      * Got to the end of tag we needed to remove.
1804                      */
1805                     $tagname = false;
1806                     $skip_content = false;
1807                 } else {
1808                     if ($skip_content == false){
1809                         if (isset($open_tags{$tagname}) &&
1810                             $open_tags{$tagname} > 0){
1811                             $open_tags{$tagname}--;
1812                         } else {
1813                             $tagname = false;
1814                         }
1815                     } else {
1816                     }
1817                 }
1818             } else {
1819                 /**
1820                  * $rm_tags_with_content
1821                  */
1822                 if ($skip_content == false){
1823                     /**
1824                      * See if this is a self-closing type and change
1825                      * tagtype appropriately.
1826                      */
1827                     if ($tagtype == 1
1828                         && in_array($tagname, $self_closing_tags)){
1829                         $tagtype=3;
1830                     }
1831                     /**
1832                      * See if we should skip this tag and any content
1833                      * inside it.
1834                      */
1835                     if ($tagtype == 1 &&
1836                         in_array($tagname, $rm_tags_with_content)){
1837                         $skip_content = $tagname;
1838                     } else {
1839                         if (($rm_tags == false
1840                              && in_array($tagname, $tag_list)) ||
1841                             ($rm_tags == true &&
1842                              !in_array($tagname, $tag_list))){
1843                             $tagname = false;
1844                         } else {
1845                             if ($tagtype == 1){
1846                                 if (isset($open_tags{$tagname})){
1847                                     $open_tags{$tagname}++;
1848                                 } else {
1849                                     $open_tags{$tagname}=1;
1850                                 }
1851                             }
1852                             /**
1853                              * This is where we run other checks.
1854                              */
1855                             if (is_array($attary) && sizeof($attary) > 0){
1856                                 $attary = sq_fixatts($tagname,
1857                                                      $attary,
1858                                                      $rm_attnames,
1859                                                      $bad_attvals,
1860                                                      $add_attr_to_tag,
1861                                                      $message,
1862                                                      $id
1863                                                      );
1864                             }
1865                         }
1866                     }
1867                 } else {
1868                 }
1869             }
1870             if ($tagname != false && $skip_content == false){
1871                 $trusted .= sq_tagprint($tagname, $attary, $tagtype);
1872             }
1873         } else {
1874         }
1875         $curpos = $gt+1;
1876     }
1877     $trusted .= substr($body, $curpos, strlen($body)-$curpos);
1878     if ($force_tag_closing == true){
1879         foreach ($open_tags as $tagname=>$opentimes){
1880             while ($opentimes > 0){
1881                 $trusted .= '</' . $tagname . '>';
1882                 $opentimes--;
1883             }
1884         }
1885         $trusted .= "\n";
1886     }
1887     $trusted .= "<!-- end sanitized html -->\n";
1888     return $trusted;
1889 }
1890
1891 /**
1892  * This is a wrapper function to call html sanitizing routines.
1893  *
1894  * @param  $body  the body of the message
1895  * @param  $id    the id of the message
1896  * @return        a string with html safe to display in the browser.
1897  */
1898 function magicHTML($body, $id){
1899     global $attachment_common_show_images, $view_unsafe_images,
1900         $has_unsafe_images, $message;
1901     /**
1902      * Don't display attached images in HTML mode.
1903      */
1904     $attachment_common_show_images = false;
1905     $tag_list = Array(
1906                       false,
1907                       "object",
1908                       "meta",
1909                       "html",
1910                       "head",
1911                       "base"
1912                       );
1913
1914     $rm_tags_with_content = Array(
1915                                   "script",
1916                                   "applet",
1917                                   "embed",
1918                                   "title"
1919                                   );
1920
1921     $self_closing_tags =  Array(
1922                                 "img",
1923                                 "br",
1924                                 "hr",
1925                                 "input"
1926                                 );
1927
1928     $force_tag_closing = false;
1929
1930     $rm_attnames = Array(
1931                          "/.*/" =>
1932                          Array(
1933                                "/target/si",
1934                                "/^on.*/si"
1935                                )
1936                          );
1937
1938     $secremoveimg = "../images/" . _("sec_remove_eng.png");
1939     $bad_attvals = Array(
1940         "/.*/" =>
1941             Array(
1942                 "/^src|background|href|action/i" =>
1943                     Array(
1944                           Array(
1945                                 "|^([\'\"])\s*\.\./.*([\'\"])|si",
1946                                 "/^([\'\"])\s*\S+script\s*:.*([\'\"])/si"
1947                                 ),
1948                           Array(
1949                                 "\\1$secremoveimg\\2",
1950                                 "\\1$secremoveimg\\2"
1951                                 )
1952                         ),
1953                 "/^style/si" =>
1954                     Array(
1955                           Array(
1956                                 "/expression\s*:/si",
1957                                 "|url\(([\'\"])\s*\.\./.*([\'\"])\)|si",
1958                                 "/url\(([\'\"])\s*\S+script:.*([\'\"])\)/si"
1959                                ),
1960                           Array(
1961                                 "idiocy:",
1962                                 "url(\\1$secremoveimg\\2)",
1963                                 "url(\\1$secremoveimg\\2)"
1964                                )
1965                           )
1966                 )
1967         );
1968     if (!$view_unsafe_images){
1969         /**
1970          * Remove any references to http/https if view_unsafe_images set
1971          * to false.
1972          */
1973         $addendum = Array(
1974           "/.*/" =>
1975             Array(
1976                 "/^src|background/i" =>
1977                     Array(
1978                           Array(
1979                                 "/^([\'\"])\s*https*:.*([\'\"])/si"
1980                                 ),
1981                           Array(
1982                                 "\\1$secremoveimg\\2"
1983                                 )
1984                         ),
1985                 "/^style/si" =>
1986                     Array(
1987                           Array(
1988                                 "/url\(([\'\"])\s*https*:.*([\'\"])\)/si"
1989                                ),
1990                           Array(
1991                                 "url(\\1$secremoveimg\\2)"
1992                                )
1993                           )
1994                 )
1995           );
1996         $bad_attvals = array_merge($bad_attvals, $addendum);
1997     }
1998
1999     $add_attr_to_tag = Array(
2000                              "/^a$/si" => Array('target'=>'"_new"')
2001                              );
2002     $trusted = sq_sanitize($body,
2003                            $tag_list,
2004                            $rm_tags_with_content,
2005                            $self_closing_tags,
2006                            $force_tag_closing,
2007                            $rm_attnames,
2008                            $bad_attvals,
2009                            $add_attr_to_tag,
2010                            $message,
2011                            $id
2012                            );
2013     if (preg_match("|$secremoveimg|si", $trusted)){
2014         $has_unsafe_images = true;
2015     }
2016     return $trusted;
2017 }
2018 ?>