functions/mime.php

   1 <?php
   2
   3 /**
   4  * mime.php
   5  *
   6  * Copyright (c) 1999-2002 The SquirrelMail Project Team
   7  * Licensed under the GNU GPL. For full terms see the file COPYING.
   8  *
   9  * This contains the functions necessary to detect and decode MIME
  10  * messages.
  11  *
  12  * $Id$
  13  */
  14
  15 require_once('../functions/imap.php');
  16 require_once('../functions/attachment_common.php');
  17
  18 /** Setting up the objects that have the structure for the message **/
  19 class msg_header {
  20     /** msg_header contains generic variables for values that **/
  21     /** could be in a header.                                 **/
  22
  23     var $type0 = '', $type1 = '', $boundary = '', $charset = '',
  24         $encoding = '', $size = 0, $to = array(), $from = '', $date = '',
  25         $cc = array(), $bcc = array(), $reply_to = '', $subject = '',
  26         $id = 0, $mailbox = '', $description = '', $filename = '',
  27         $entity_id = 0, $message_id = 0, $name = '', $priority = 3, $type = '';
  28 }
  29
  30 class message {
  31     /** message is the object that contains messages.  It is a recursive
  32       object in that through the $entities variable, it can contain
  33       more objects of type message.  See documentation in mime.txt for
  34       a better description of how this works.
  35     **/
  36     var $header = '', $entities = array();
  37
  38     function addEntity ($msg) {
  39         $this->entities[] = $msg;
  40     }
  41 }
  42
  43 /* --------------------------------------------------------------------------------- */
  44 /* MIME DECODING                                                                     */
  45 /* --------------------------------------------------------------------------------- */
  46
  47 /* This function gets the structure of a message and stores it in the "message" class.
  48  * It will return this object for use with all relevant header information and
  49  * fully parsed into the standard "message" object format.
  50  */
  51 function mime_structure ($imap_stream, $header) {
  52
  53     $ssid = sqimap_session_id();
  54     $lsid = strlen( $ssid );
  55     $id = $header->id;
  56     fputs ($imap_stream, "$ssid FETCH $id BODYSTRUCTURE\r\n");
  57     //
  58     // This should use sqimap_read_data instead of reading it itself
  59     //
  60     $read = fgets ($imap_stream, 9216);
  61     $bodystructure = '';
  62     while ( substr($read, 0, $lsid) <> $ssid &&
  63          !feof( $imap_stream ) ) {
  64         $bodystructure .= $read;
  65         $read = fgets ($imap_stream, 9216);
  66     }
  67     $read = $bodystructure;
  68
  69     // isolate the body structure and remove beginning and end parenthesis
  70     $read = trim(substr ($read, strpos(strtolower($read), 'bodystructure') + 13));
  71
  72     $read = trim(substr ($read, 0, -1));
  73     $end = mime_match_parenthesis(0, $read);
  74     while ($end == strlen($read)-1) {
  75         $read = trim(substr ($read, 0, -1));
  76         $read = trim(substr ($read, 1));
  77         $end = mime_match_parenthesis(0, $read);
  78     }
  79
  80     $msg = mime_parse_structure ($read, 0);
  81     $msg->header = $header;
  82
  83     return( $msg );
  84 }
  85
  86 /* this starts the parsing of a particular structure.  It is called recursively,
  87  * so it can be passed different structures.  It returns an object of type
  88  * $message.
  89  * First, it checks to see if it is a multipart message.  If it is, then it
  90  * handles that as it sees is necessary.  If it is just a regular entity,
  91  * then it parses it and adds the necessary header information (by calling out
  92  * to mime_get_elements()
  93  */
  94 function mime_parse_structure ($structure, $ent_id) {
  95   global $mailbox;
  96   $properties = array();
  97   $msg = new message();
  98   if ($structure{0} == '(') {
  99      $old_ent_id = $ent_id;
 100      $ent_id = mime_new_element_level($ent_id);
 101      $start = $end = -1;
 102      do {
 103         $start = $end+1;
 104         $end = mime_match_parenthesis ($start, $structure);
 105
 106         /* check if we are dealing with a new entity-level */
 107         $i = strrpos($ent_id,'.');
 108         if ($i>0) {
 109             $ent = substr($ent_id, $i+1);
 110         } else {
 111             $ent = '';
 112         }
 113         /* add "forgotten"  parent entities (alternative and relative) */
 114         if ($ent == '0') {
 115             /* new entity levels have information about the type (type1) and
 116             *  the properties. This information is situated at the end of the
 117             *  structure string like for example (example between the brackets)
 118             *  [ "RELATED" ("BOUNDARY" "myboundary" "TYPE" "plain/html") ]
 119             */
 120
 121             /* get the involved properties for parsing to mime_get_properties */
 122             $startprop = strrpos($structure,'(');
 123             $properties_str = substr($structure,$startprop);
 124             $endprop = mime_match_parenthesis ($startprop, $structure);
 125             $propstr = substr($structure, $startprop + 1, ($endprop - $startprop)-1);
 126             /* cut off the used properties */
 127             if ($startprop) {
 128                 $structure_end = substr($structure, $endprop+2);
 129                 $structure = trim(substr($structure,0,$startprop));
 130             }
 131             /* get type1 */
 132             $pos = strrpos($structure,' ');
 133             if ($structure{$pos+1} =='(') $pos++;
 134
 135             $type1 = strtolower(substr($structure, $pos+2, (count($structure)-2)));
 136             /* cut off  type1 */
 137             if ($pos && $startprop) {
 138                 $structure = trim(substr($structure, 0, $pos));
 139             }
 140
 141             /* process the found information */
 142             $properties = mime_get_props($properties, $properties_str);
 143             if (count($properties)>0) {
 144                 $msg->header->entity_id = $old_ent_id;
 145                 $msg->header->type0 = 'multipart';
 146                 $msg->header->type1 = $type1;
 147                 for ($i=0; $i < count($properties); $i++) {
 148                     $msg->header->{$properties[$i]['name']} = $properties[$i]['value'];
 149                 }
 150             }
 151             $structure = $structure . ' ' . $structure_end;
 152         }
 153         $element = substr($structure, $start+1, ($end - $start)-1);
 154         $ent_id = mime_increment_id ($ent_id);
 155         $newmsg = mime_parse_structure ($element, $ent_id);
 156         /* set mailbox in case of message/rfc822 entities */
 157         if (isset($newmsg->header->type0) && isset($newmsg->header->type1)) {
 158             if ($newmsg->header->type0 == 'message' && $newmsg->header->type1 == 'rfc822') {
 159                 $newmsg->header->mailbox=$mailbox;
 160             }
 161         }
 162         $msg->addEntity ($newmsg);
 163
 164      } while ($structure{$end+1} == '(');
 165   } else {
 166      // parse the elements
 167     $msg = mime_get_element ($structure, $msg, $ent_id);
 168   }
 169   return $msg;
 170 }
 171
 172
 173 /* Increments the element ID.  An element id can look like any of
 174  * the following:  1, 1.2, 4.3.2.4.1, etc.  This function increments
 175  * the last number of the element id, changing 1.2 to 1.3.
 176  */
 177 function mime_increment_id ($id) {
 178
 179     if (strpos($id, '.')) {
 180         $first = substr($id, 0, strrpos($id, '.'));
 181         $last = substr($id, strrpos($id, '.')+1);
 182         $last++;
 183         $new = $first . '.' .$last;
 184     } else {
 185         $new = $id + 1;
 186     }
 187
 188     return $new;
 189 }
 190
 191 /*
 192  * See comment for mime_increment_id().
 193  * This adds another level on to the entity_id changing 1.3 to 1.3.0
 194  * NOTE:  1.3.0 is not a valid element ID.  It MUST be incremented
 195  *        before it can be used.  I left it this way so as not to have
 196  *        to make a special case if it is the first entity_id.  It
 197  *        always increments it, and that works fine.
 198  */
 199 function mime_new_element_level ($id) {
 200
 201     if (!$id) {
 202         $id = 0;
 203     } else {
 204         $id = $id . '.0';
 205     }
 206
 207     return( $id );
 208 }
 209
 210 function mime_get_element (&$structure, $msg, $ent_id) {
 211
 212   $elem_num = 1;
 213   $msg->header = new msg_header();
 214   $msg->header->entity_id = $ent_id;
 215   $properties = array();
 216   while (strlen($structure) > 0) {
 217      $structure = trim($structure);
 218      $char = $structure{0};
 219
 220      if (strtolower(substr($structure, 0, 3)) == 'nil') {
 221         $text = '';
 222         $structure = substr($structure, 3);
 223      } else if ($char == '"') {
 224         // loop through until we find the matching quote, and return that as a string
 225         $pos = 1;
 226         $text = '';
 227         while ( ($char = $structure{$pos} ) <> '"' && $pos < strlen($structure)) {
 228            $text .= $char;
 229            $pos++;
 230         }
 231         $structure = substr($structure, strlen($text) + 2);
 232      } else if ($char == '{') {
 233          /**
 234           * loop through until we find the matching quote,
 235           * and return that as a string
 236           */
 237          $pos = 1;
 238          $len = '';
 239          while (($char = $structure{$pos}) != '}'
 240                 && $pos < strlen($structure)) {
 241              $len .= $char;
 242              $pos++;
 243          }
 244          $structure = substr($structure, strlen($len) + 4);
 245          $text = substr($structure, 0, $len);
 246          $structure = substr($structure, $len + 1);
 247      } else if ($char == '(') {
 248         // comment me
 249         $end = mime_match_parenthesis (0, $structure);
 250         $sub = substr($structure, 1, $end-1);
 251         $properties = mime_get_props($properties, $sub);
 252         $structure = substr($structure, strlen($sub) + 2);
 253      } else {
 254         // loop through until we find a space or an end parenthesis
 255         $pos = 0;
 256         $char = $structure{$pos};
 257         $text = '';
 258         while ($char != ' ' && $char != ')' && $pos < strlen($structure)) {
 259            $text .= $char;
 260            $pos++;
 261            $char = $structure{$pos};
 262         }
 263         $structure = substr($structure, strlen($text));
 264      }
 265
 266      // This is where all the text parts get put into the header
 267      switch ($elem_num) {
 268         case 1:
 269            $msg->header->type0 = strtolower($text);
 270            break;
 271         case 2:
 272            $msg->header->type1 = strtolower($text);
 273            break;
 274         case 4: // Id
 275            // Invisimail enclose images with <>
 276            $msg->header->id = str_replace( '<', '', str_replace( '>', '', $text ) );
 277            break;
 278         case 5:
 279            $msg->header->description = $text;
 280            break;
 281         case 6:
 282            $msg->header->encoding = strtolower($text);
 283            break;
 284         case 7:
 285            $msg->header->size = $text;
 286            break;
 287         default:
 288            if ($msg->header->type0 == 'text' && $elem_num == 8) {
 289               // This is a plain text message, so lets get the number of lines
 290               // that it contains.
 291               $msg->header->num_lines = $text;
 292
 293            } else if ($msg->header->type0 == 'message' && $msg->header->type1 == 'rfc822' && $elem_num == 8) {
 294               // This is an encapsulated message, so lets start all over again and
 295               // parse this message adding it on to the existing one.
 296               $structure = trim($structure);
 297               if ( $structure{0} == '(' ) {
 298                  $e = mime_match_parenthesis (0, $structure);
 299                  $structure = substr($structure, 0, $e);
 300                  $structure = substr($structure, 1);
 301                  $m = mime_parse_structure($structure, $msg->header->entity_id);
 302
 303                  // the following conditional is there to correct a bug that wasn't
 304                  // incrementing the entity IDs correctly because of the special case
 305                  // that message/rfc822 is.  This fixes it fine.
 306                  if (substr($structure, 1, 1) != '(')
 307                     $m->header->entity_id = mime_increment_id(mime_new_element_level($ent_id));
 308
 309                  // Now we'll go through and reformat the results.
 310                  if ($m->entities) {
 311                     for ($i=0; $i < count($m->entities); $i++) {
 312                        $msg->addEntity($m->entities[$i]);
 313                     }
 314                  } else {
 315                     $msg->addEntity($m);
 316                  }
 317                  $structure = "";
 318               }
 319            }
 320            break;
 321      }
 322      $elem_num++;
 323      $text = "";
 324   }
 325   // loop through the additional properties and put those in the various headers
 326   for ($i=0; $i < count($properties); $i++) {
 327      $msg->header->{$properties[$i]['name']} = $properties[$i]['value'];
 328   }
 329
 330   return $msg;
 331 }
 332
 333 /*
 334  * I did most of the MIME stuff yesterday (June 20, 2000), but I couldn't
 335  * figure out how to do this part, so I decided to go to bed.  I woke up
 336  * in the morning and had a flash of insight.  I went to the white-board
 337  * and scribbled it out, then spent a bit programming it, and this is the
 338  * result.  Nothing complicated, but I think my brain was fried yesterday.
 339  * Funny how that happens some times.
 340  *
 341  * This gets properties in a nested parenthesisized list.  For example,
 342  * this would get passed something like:  ("attachment" ("filename" "luke.tar.gz"))
 343  * This returns an array called $props with all paired up properties.
 344  * It ignores the "attachment" for now, maybe that should change later
 345  * down the road.  In this case, what is returned is:
 346  *    $props[0]["name"] = "filename";
 347  *    $props[0]["value"] = "luke.tar.gz";
 348  */
 349 function mime_get_props ($props, $structure) {
 350
 351   while (strlen($structure) > 0) {
 352      $structure = trim($structure);
 353      $char = $structure{0};
 354      if ($char == '"') {
 355         $pos = 1;
 356         $tmp = '';
 357         while ( ( $char = $structure{$pos} ) != '"' &&
 358                 $pos < strlen($structure)) {
 359            $tmp .= $char;
 360            $pos++;
 361         }
 362         $structure = trim(substr($structure, strlen($tmp) + 2));
 363         $char = $structure{0};
 364
 365         if ($char == '"') {
 366            $pos = 1;
 367            $value = '';
 368            while ( ( $char = $structure{$pos} ) != '"' &&
 369                    $pos < strlen($structure) ) {
 370               $value .= $char;
 371               $pos++;
 372            }
 373            $structure = trim(substr($structure, strlen($value) + 2));
 374            $k = count($props);
 375            $props[$k]['name'] = strtolower($tmp);
 376            $props[$k]['value'] = $value;
 377            if ($structure != '') {
 378                 mime_get_props($props, $structure);
 379            } else {
 380              return $props;
 381            }
 382         } else if ($char == '(') {
 383            $end = mime_match_parenthesis (0, $structure);
 384            $sub = substr($structure, 1, $end-1);
 385            if (! isset($props))
 386               $props = array();
 387               $props = mime_get_props($props, $sub);
 388               $structure = substr($structure, strlen($sub) + 2);
 389            return $props;
 390         }
 391      } else if ($char == '(') {
 392         $end = mime_match_parenthesis (0, $structure);
 393         $sub = substr($structure, 1, $end-1);
 394         $props = mime_get_props($props, $sub);
 395         $structure = substr($structure, strlen($sub) + 2);
 396         return $props;
 397      } else {
 398         return $props;
 399      }
 400   }
 401 }
 402
 403 /*
 404  *  Matches parenthesis.  It will return the position of the matching
 405  *  parenthesis in $structure.  For instance, if $structure was:
 406  *     ("text" "plain" ("val1name", "1") nil ... )
 407  *     x                                         x
 408  *  then this would return 42 to match up those two.
 409  */
 410 function mime_match_parenthesis ($pos, $structure) {
 411
 412     $j = strlen( $structure );
 413
 414     /*
 415      * ignore all extra characters
 416      * If inside of a quoted string or literal, skip it -- Boundary IDs and other
 417      * things can have ) in them.
 418      */
 419
 420     if ( $structure{$pos} != '(' ) {
 421         return( $j );
 422     }
 423
 424     while ( $pos < $j ) {
 425         $pos++;
 426         if ($structure{$pos} == ')') {
 427             return $pos;
 428         } elseif ($structure{$pos} == '"') { /* check for quoted string */
 429             $pos++;
 430             while ( $structure{$pos} != '"' &&
 431                     $pos < $j ) {
 432                if (substr($structure, $pos, 2) == '\\"') {
 433                   $pos++;
 434                } elseif (substr($structure, $pos, 2) == '\\\\') {
 435                   $pos++;
 436                }
 437                $pos++;
 438             }
 439         } elseif ($structure{$pos} == '{') { /* check for literal */
 440             $str = substr($structure, $pos);
 441             $pos++;
 442             if (preg_match("/^\{(\d+)\}.*/",$str,$reg)) {
 443                 $pos = $pos + strlen($reg[1]) + $reg[1] + 2;
 444             }
 445         } elseif ( $structure{$pos} == '(' ) {
 446             $pos = mime_match_parenthesis ($pos, $structure);
 447         }
 448     }
 449     echo _("Error decoding mime structure.  Report this as a bug!") . '<br>';
 450     return( $pos );
 451 }
 452
 453 function mime_fetch_body($imap_stream, $id, $ent_id) {
 454
 455     /*
 456      * do a bit of error correction.  If we couldn't find the entity id, just guess
 457      * that it is the first one.  That is usually the case anyway.
 458      */
 459     if (!$ent_id) {
 460         $ent_id = 1;
 461     }
 462     $cmd = "FETCH $id BODY[$ent_id]";
 463
 464     $data = sqimap_run_command ($imap_stream, $cmd, true, $response, $message);
 465     do {
 466         $topline = trim(array_shift( $data ));
 467     } while( $topline && $topline[0] == '*' && !preg_match( '/\* [0-9]+ FETCH.*/i', $topline )) ;
 468     $wholemessage = implode('', $data);
 469     if (ereg('\\{([^\\}]*)\\}', $topline, $regs)) {
 470
 471         $ret = substr( $wholemessage, 0, $regs[1] );
 472         /*
 473             There is some information in the content info header that could be important
 474             in order to parse html messages. Let's get them here.
 475         */
 476         if ( $ret{0} == '<' ) {
 477             $data = sqimap_run_command ($imap_stream, "FETCH $id BODY[$ent_id.MIME]", true, $response, $message);
 478             /* BASE within HTML documents is illegal (see w3 spec)
 479 *            $base = '';
 480 *            $k = 10;
 481 *            foreach( $data as $d ) {
 482 *                if ( substr( $d, 0, 13 ) == 'Content-Base:' ) {
 483 *                    $j = strlen( $d );
 484 *                    $i = 13;
 485 *                    $base = '';
 486 *                    while ( $i < $j &&
 487 *                           ( !isNoSep( $d{$i} ) || $d{$i} == '"' )  )
 488 *                        $i++;
 489 *                    while ( $i < $j ) {
 490 *                        if ( isNoSep( $d{$i} ) )
 491 *                            $base .= $d{$i};
 492 *                        $i++;
 493 *                    }
 494 *                    $k = 0;
 495 *                } elseif ( $k == 1 && !isnosep( $d{0} ) ) {
 496 *                    $base .= substr( $d, 1 );
 497 *                }
 498 *                $k++;
 499 *            }
 500 *            if ( $base <> '' ) {
 501 *                $ret = "<base href=\"$base\">" . $ret;
 502 *            }
 503 *           */
 504         }
 505     } else if (ereg('"([^"]*)"', $topline, $regs)) {
 506         $ret = $regs[1];
 507     } else {
 508         global $where, $what, $mailbox, $passed_id, $startMessage;
 509         $par = 'mailbox=' . urlencode($mailbox) . "&amp;passed_id=$passed_id";
 510         if (isset($where) && isset($what)) {
 511             $par .= '&amp;where='. urlencode($where) . "&amp;what=" . urlencode($what);
 512         } else {
 513             $par .= "&amp;startMessage=$startMessage&amp;show_more=0";
 514         }
 515         $par .= '&amp;response=' . urlencode($response) .
 516                 '&amp;message=' . urlencode($message).
 517                 '&amp;topline=' . urlencode($topline);
 518
 519         echo   '<tt><br>' .
 520                '<table width="80%"><tr>' .
 521                '<tr><td colspan=2>' .
 522                _("Body retrieval error. The reason for this is most probably that the message is malformed. Please help us making future versions better by submitting this message to the developers knowledgebase!") .
 523                " <A HREF=\"../src/retrievalerror.php?$par\"><br>" .
 524                _("Submit message") . '</A><BR>&nbsp;' .
 525                '</td></tr>' .
 526                '<td><b>' . _("Command:") . "</td><td>$cmd</td></tr>" .
 527                '<td><b>' . _("Response:") . "</td><td>$response</td></tr>" .
 528                '<td><b>' . _("Message:") . "</td><td>$message</td></tr>" .
 529                '<td><b>' . _("FETCH line:") . "</td><td>$topline</td></tr>" .
 530                "</table><BR></tt></font><hr>";
 531
 532         $data = sqimap_run_command ($imap_stream, "FETCH $passed_id BODY[]", true, $response, $message);
 533         array_shift($data);
 534         $wholemessage = implode('', $data);
 535
 536         $ret = $wholemessage;
 537     }
 538     return( $ret );
 539 }
 540
 541 function mime_print_body_lines ($imap_stream, $id, $ent_id, $encoding) {
 542     // do a bit of error correction.  If we couldn't find the entity id, just guess
 543     // that it is the first one.  That is usually the case anyway.
 544     if (!$ent_id) {
 545         $ent_id = 1;
 546     }
 547     $sid = sqimap_session_id();
 548     // Don't kill the connection if the browser is over a dialup
 549     // and it would take over 30 seconds to download it.
 550
 551     // don´t call set_time_limit in safe mode.
 552     if (!ini_get("safe_mode")) {
 553         set_time_limit(0);
 554     }
 555
 556     fputs ($imap_stream, "$sid FETCH $id BODY[$ent_id]\r\n");
 557     $cnt = 0;
 558     $continue = true;
 559     $read = fgets ($imap_stream,4096);
 560     // This could be bad -- if the section has sqimap_session_id() . ' OK'
 561     // or similar, it will kill the download.
 562     while (!ereg("^".$sid." (OK|BAD|NO)(.*)$", $read, $regs)) {
 563       if (trim($read) == ')==') {
 564           $read1 = $read;
 565           $read = fgets ($imap_stream,4096);
 566           if (ereg("^".$sid." (OK|BAD|NO)(.*)$", $read, $regs)) {
 567               return;
 568           } else {
 569               echo decodeBody($read1, $encoding) .
 570                    decodeBody($read, $encoding);
 571           }
 572       } else if ($cnt) {
 573           echo decodeBody($read, $encoding);
 574       }
 575       $read = fgets ($imap_stream,4096);
 576       $cnt++;
 577     }
 578 }
 579
 580 /* -[ END MIME DECODING ]----------------------------------------------------------- */
 581
 582
 583
 584 /* This is the first function called.  It decides if this is a multipart
 585    message or if it should be handled as a single entity
 586  */
 587 function decodeMime ($imap_stream, &$header) {
 588     global $username, $key, $imapServerAddress, $imapPort;
 589     return mime_structure ($imap_stream, $header);
 590 }
 591
 592 // This is here for debugging purposese.  It will print out a list
 593 // of all the entity IDs that are in the $message object.
 594
 595 function listEntities ($message) {
 596 if ($message) {
 597  if ($message->header->entity_id)
 598  echo "<tt>" . $message->header->entity_id . ' : ' . $message->header->type0 . '/' . $message->header->type1 . '<br>';
 599  for ($i = 0; $message->entities[$i]; $i++) {
 600     $msg = listEntities($message->entities[$i], $ent_id);
 601     if ($msg)
 602        return $msg;
 603  }
 604 }
 605 }
 606
 607
 608 /* returns a $message object for a particular entity id */
 609 function getEntity ($message, $ent_id) {
 610     if ($message) {
 611         if ($message->header->entity_id == $ent_id && strlen($ent_id) == strlen($message->header->entity_id))
 612         {
 613             return $message;
 614         } else {
 615             for ($i = 0; isset($message->entities[$i]); $i++) {
 616                 $msg = getEntity ($message->entities[$i], $ent_id);
 617                 if ($msg) {
 618                     return $msg;
 619                 }
 620             }
 621         }
 622     }
 623 }
 624
 625 /*
 626  * figures out what entity to display and returns the $message object
 627  * for that entity.
 628  */
 629 function findDisplayEntity ($msg, $textOnly = true, $entity = array() )   {
 630     global $show_html_default;
 631
 632     $found = false;
 633     if ($msg) {
 634         $type = $msg->header->type0.'/'.$msg->header->type1;
 635         if ( $type == 'multipart/alternative') {
 636             $msg = findAlternativeEntity($msg, $textOnly);
 637             if (count($msg->entities) == 0) {
 638                 $entity[] = $msg->header->entity_id;
 639             } else {
 640                 $found = true;
 641                  $entity =findDisplayEntity($msg,$textOnly, $entity);
 642             }
 643         } else  if ( $type == 'multipart/related') {
 644             $msgs = findRelatedEntity($msg);
 645             for ($i = 0; $i < count($msgs); $i++) {
 646                 $msg = $msgs[$i];
 647                 if (count($msg->entities) == 0) {
 648                     $entity[] = $msg->header->entity_id;
 649                 } else {
 650                     $found = true;
 651                      $entity =findDisplayEntity($msg,$textOnly, $entity);
 652                 }
 653             }
 654         } else if ( count($entity) == 0 &&
 655              $msg->header->type0 == 'text' &&
 656              ( $msg->header->type1 == 'plain' ||
 657                $msg->header->type1 == 'html' ) &&
 658              isset($msg->header->entity_id) ) {
 659              if (count($msg->entities) == 0) {
 660                 $entity[] = $msg->header->entity_id;
 661              }
 662         }
 663         $i = 0;
 664         while ( isset($msg->entities[$i]) && count($entity) == 0 && !$found )  {
 665             $entity = findDisplayEntity($msg->entities[$i], $textOnly, $entity);
 666             $i++;
 667         }
 668     }
 669     if ( !isset($entity[0]) ) {
 670         $entity[]="";
 671     }
 672     return( $entity );
 673 }
 674
 675 /* Shows the HTML version */
 676 function findDisplayEntityHTML ($message) {
 677
 678     if ( $message->header->type0 == 'text' &&
 679          $message->header->type1 == 'html' &&
 680          isset($message->header->entity_id)) {
 681         return $message->header->entity_id;
 682     }
 683     for ($i = 0; isset($message->entities[$i]); $i ++) {
 684         if ( $message->header->type0 == 'message' &&
 685             $message->header->type1 == 'rfc822' &&
 686             isset($message->header->entity_id)) {
 687             return 0;
 688         }
 689
 690         $entity = findDisplayEntityHTML($message->entities[$i]);
 691         if ($entity != 0) {
 692             return $entity;
 693         }
 694     }
 695
 696     return 0;
 697 }
 698
 699 function findAlternativeEntity ($message, $textOnly) {
 700     global $show_html_default;
 701     /* if we are dealing with alternative parts then we choose the best
 702      * viewable message supported by SM.
 703      */
 704     if ($show_html_default && !$textOnly) {
 705         $alt_order = array ('text/plain','text/html');
 706     } else {
 707         $alt_order = array ('text/plain');
 708     }
 709     $best_view = 0;
 710     $ent_id = 0;
 711     $k = 0;
 712     for ($i = 0; $i < count($message->entities); $i ++) {
 713         $type = $message->entities[$i]->header->type0.'/'.$message->entities[$i]->header->type1;
 714         if ($type == 'multipart/related') {
 715            $type = $message->entities[$i]->header->type;
 716         }
 717         for ($j = $k; $j < count($alt_order); $j++) {
 718             if ($alt_order[$j] == $type && $j > $best_view) {
 719                 $best_view = $j;
 720                 $ent_id = $i;
 721                 $k = $j;
 722             }
 723         }
 724     }
 725     return $message->entities[$ent_id];
 726 }
 727
 728 function findRelatedEntity ($message) {
 729     $msgs = array();
 730     for ($i = 0; $i < count($message->entities); $i ++) {
 731         $type = $message->entities[$i]->header->type0.'/'.$message->entities[$i]->header->type1;
 732         if ($message->header->type == $type) {
 733             $msgs[] = $message->entities[$i];
 734         }
 735     }
 736     return $msgs;
 737 }
 738
 739 /*
 740  * translateText
 741  * Extracted from strings.php 23/03/2002
 742  */
 743
 744 function translateText(&$body, $wrap_at, $charset) {
 745     global $where, $what; /* from searching */
 746     global $color; /* color theme */
 747
 748     require_once('../functions/url_parser.php');
 749
 750     $body_ary = explode("\n", $body);
 751     $PriorQuotes = 0;
 752     for ($i=0; $i < count($body_ary); $i++) {
 753         $line = $body_ary[$i];
 754         if (strlen($line) - 2 >= $wrap_at) {
 755             sqWordWrap($line, $wrap_at);
 756         }
 757         $line = charset_decode($charset, $line);
 758         $line = str_replace("\t", '        ', $line);
 759
 760         parseUrl ($line);
 761
 762         $Quotes = 0;
 763         $pos = 0;
 764         $j = strlen( $line );
 765
 766         while ( $pos < $j ) {
 767             if ($line[$pos] == ' ') {
 768                 $pos ++;
 769             } else if (strpos($line, '&gt;', $pos) === $pos) {
 770                 $pos += 4;
 771                 $Quotes ++;
 772             } else {
 773                 break;
 774             }
 775         }
 776
 777         if ($Quotes > 1) {
 778             if (! isset($color[14])) {
 779                 $color[14] = '#FF0000';
 780             }
 781             $line = '<FONT COLOR="' . $color[14] . '">' . $line . '</FONT>';
 782         } elseif ($Quotes) {
 783             if (! isset($color[13])) {
 784                 $color[13] = '#800000';
 785             }
 786             $line = '<FONT COLOR="' . $color[13] . '">' . $line . '</FONT>';
 787         }
 788
 789         $body_ary[$i] = $line;
 790     }
 791     $body = '<pre>' . implode("\n", $body_ary) . '</pre>';
 792 }
 793
 794 /* debugfunction for looping through entities and displaying correct entities */
 795 function listMyEntities ($message) {
 796
 797 if ($message) {
 798     if ($message->header->entity_id) {
 799         echo "<tt>" . $message->header->entity_id . ' : ' . $message->header->type0 . '/' . $message->header->type1 . '<br>';
 800     }
 801     if (!($message->header->type0 == 'message' &&  $message->header->type1 == 'rfc822')) {
 802         if (isset($message->header->boundary) ) {
 803             $ent_id = $message->header->entity_id;
 804             $var = $message->header->boundary;
 805             if ($var !='')
 806             echo "<b>$ent_id boundary = $var</b><br>";
 807         }
 808         if (isset($message->header->type) ) {
 809             $var = $message->header->type;
 810             if ($var !='')
 811             echo "<b>$ent_id type = $var</b><br>";
 812         }
 813         for ($i = 0; $message->entities[$i]; $i++) {
 814             $msg = listMyEntities($message->entities[$i]);
 815         }
 816
 817         if ($msg )  return $msg;
 818     }
 819 }
 820
 821 }
 822
 823
 824
 825 /* This returns a parsed string called $body. That string can then
 826 be displayed as the actual message in the HTML. It contains
 827 everything needed, including HTML Tags, Attachments at the
 828 bottom, etc.
 829 */
 830 function formatBody($imap_stream, $message, $color, $wrap_at, $ent_num) {
 831     // this if statement checks for the entity to show as the
 832     // primary message. To add more of them, just put them in the
 833     // order that is their priority.
 834     global $startMessage, $username, $key, $imapServerAddress, $imapPort,
 835            $show_html_default, $has_unsafe_images, $view_unsafe_images, $sort;
 836
 837     $has_unsafe_images = 0;
 838
 839     $id = $message->header->id;
 840
 841     $urlmailbox = urlencode($message->header->mailbox);
 842
 843     $body_message = getEntity($message, $ent_num);
 844     if (($body_message->header->type0 == 'text') ||
 845         ($body_message->header->type0 == 'rfc822')) {
 846         $body = mime_fetch_body ($imap_stream, $id, $ent_num);
 847
 848         $body = decodeBody($body, $body_message->header->encoding);
 849         $hookResults = do_hook("message_body", $body);
 850         $body = $hookResults[1];
 851         // If there are other types that shouldn't be formatted, add
 852         // them here
 853         if ($body_message->header->type1 == 'html') {
 854             if ( $show_html_default <> 1 ) {
 855                 $body = strip_tags( $body );
 856                 translateText($body, $wrap_at, $body_message->header->charset);
 857             } else {
 858                 $body = magicHTML( $body, $id, $message );
 859             }
 860         } else {
 861             translateText($body, $wrap_at, $body_message->header->charset);
 862         }
 863         $body .= "<CENTER><SMALL><A HREF=\"../src/download.php?absolute_dl=true&amp;passed_id=$id&amp;passed_ent_id=$ent_num&amp;mailbox=$urlmailbox&amp;showHeaders=1\">". _("Download this as a file") ."</A></SMALL></CENTER><BR>";
 864         if ($has_unsafe_images) {
 865             if ($view_unsafe_images) {
 866                 $body .= "<CENTER><SMALL><A HREF=\"read_body.php?passed_id=$id&amp;mailbox=$urlmailbox&amp;sort=$sort&amp;startMessage=$startMessage&amp;show_more=0\">". _("Hide Unsafe Images") ."</A></SMALL></CENTER><BR>\n";
 867             } else {
 868                 $body .= "<CENTER><SMALL><A HREF=\"read_body.php?passed_id=$id&amp;mailbox=$urlmailbox&amp;sort=$sort&amp;startMessage=$startMessage&amp;show_more=0&amp;view_unsafe_images=1\">". _("View Unsafe Images") ."</A></SMALL></CENTER><BR>\n";
 869             }
 870         }
 871
 872         /** Display the ATTACHMENTS: message if there's more than one part **/
 873         if (isset($message->entities[1])) {
 874             /* Header-type alternative means we choose the best one to display
 875                so don't show the alternatives as attachment. Header-type related
 876                means that the attachments are already part of the related message.
 877             */
 878             if ($message->header->type1 !='related' && $message->header->type1 !='alternative') {
 879                 $body .= formatAttachments ($message, $ent_num, $message->header->mailbox, $id);
 880             }
 881         }
 882     } else {
 883         $body = formatAttachments ($message, -1, $message->header->mailbox, $id);
 884     }
 885     return ($body);
 886 }
 887
 888 /*
 889  * A recursive function that returns a list of attachments with links
 890  * to where to download these attachments
 891  */
 892 function formatAttachments($message, $ent_id, $mailbox, $id) {
 893     global $where, $what;
 894     global $startMessage, $color;
 895     static $ShownHTML = 0;
 896
 897     $body = '';
 898     if ($ShownHTML == 0) {
 899
 900         $ShownHTML = 1;
 901         $body .= "<TABLE WIDTH=\"100%\" CELLSPACING=0 CELLPADDING=2 BORDER=0 BGCOLOR=\"$color[0]\"><TR>\n" .
 902                 "<TH ALIGN=\"left\" BGCOLOR=\"$color[9]\"><B>\n" .
 903                 _("Attachments") . ':' .
 904                 "</B></TH></TR><TR><TD>\n" .
 905                 "<TABLE CELLSPACING=0 CELLPADDING=1 BORDER=0>\n" .
 906                 formatAttachments($message, $ent_id, $mailbox, $id) .
 907                 "</TABLE></TD></TR></TABLE>";
 908
 909     } else if ($message) {
 910         $header = $message->header;
 911         $type0 = strtolower($header->type0);
 912         $type1 = strtolower($header->type1);
 913         $name = '';
 914         if (isset($header->name)) {
 915             $name = decodeHeader($header->name);
 916         }
 917         if ($type0 =='message' && $type1 == 'rfc822') {
 918
 919             $filename = decodeHeader($message->header->filename);
 920             if (trim($filename) == '') {
 921                 if (trim($name) == '') {
 922                     $display_filename = 'untitled-[' . $message->header->entity_id . ']' ;
 923                 } else {
 924                     $display_filename = $name;
 925                     $filename = $name;
 926                 }
 927             } else {
 928                 $display_filename = $filename;
 929             }
 930
 931             $urlMailbox = urlencode($mailbox);
 932             $ent = urlencode($message->header->entity_id);
 933
 934             $DefaultLink =
 935                 "../src/download.php?startMessage=$startMessage&amp;passed_id=$id&amp;mailbox=$urlMailbox&amp;passed_ent_id=$ent";
 936             if ($where && $what) {
 937                 $DefaultLink .= '&amp;where=' . urlencode($where) . '&amp;what=' . urlencode($what);
 938             }
 939             $Links['download link']['text'] = _("download");
 940             $Links['download link']['href'] =
 941                 "../src/download.php?absolute_dl=true&amp;passed_id=$id&amp;mailbox=$urlMailbox&amp;passed_ent_id=$ent";
 942             $ImageURL = '';
 943
 944             /* this executes the attachment hook with a specific MIME-type.
 945                 * if that doens't have results, it tries if there's a rule
 946                 * for a more generic type. */
 947             $HookResults = do_hook("attachment $type0/$type1", $Links,
 948                 $startMessage, $id, $urlMailbox, $ent, $DefaultLink, $display_filename, $where, $what);
 949             if(count($HookResults[1]) <= 1) {
 950                 $HookResults = do_hook("attachment $type0/*", $Links,
 951                 $startMessage, $id, $urlMailbox, $ent, $DefaultLink,
 952                 $display_filename, $where, $what);
 953             }
 954
 955             $Links = $HookResults[1];
 956             $DefaultLink = $HookResults[6];
 957
 958             $body .= '<TR><TD>&nbsp;&nbsp;</TD><TD>' .
 959                         "<A HREF=\"$DefaultLink\">$display_filename</A>&nbsp;</TD>" .
 960                         '<TD><SMALL><b>' . show_readable_size($message->header->size) .
 961                         '</b>&nbsp;&nbsp;</small></TD>' .
 962                         "<TD><SMALL>[ $type0/$type1 ]&nbsp;</SMALL></TD>" .
 963                         '<TD><SMALL>';
 964             if ($message->header->description) {
 965                 $body .= '<b>' . htmlspecialchars(_($message->header->description)) . '</b>';
 966             }
 967             $body .= '</SMALL></TD><TD><SMALL>&nbsp;';
 968
 969
 970             $SkipSpaces = 1;
 971             foreach ($Links as $Val) {
 972                 if ($SkipSpaces) {
 973                     $SkipSpaces = 0;
 974                 } else {
 975                     $body .= '&nbsp;&nbsp;|&nbsp;&nbsp;';
 976                 }
 977                 $body .= '<a href="' . $Val['href'] . '">' .  $Val['text'] . '</a>';
 978             }
 979
 980             unset($Links);
 981
 982             $body .= "</SMALL></TD></TR>\n";
 983
 984             return( $body );
 985
 986         } elseif (!$message->entities) {
 987
 988             $type0 = strtolower($message->header->type0);
 989             $type1 = strtolower($message->header->type1);
 990             $name = decodeHeader($message->header->name);
 991
 992             if ($message->header->entity_id != $ent_id) {
 993             $filename = decodeHeader($message->header->filename);
 994             if (trim($filename) == '') {
 995                 if (trim($name) == '') {
 996                     if ( trim( $message->header->id ) == '' )
 997                         $display_filename = 'untitled-[' . $message->header->entity_id . ']' ;
 998                     else
 999                         $display_filename = 'cid: ' . $message->header->id;
1000                     // $display_filename = 'untitled-[' . $message->header->entity_id . ']' ;
1001                 } else {
1002                     $display_filename = $name;
1003                     $filename = $name;
1004                 }
1005             } else {
1006                 $display_filename = $filename;
1007             }
1008
1009             $urlMailbox = urlencode($mailbox);
1010             $ent = urlencode($message->header->entity_id);
1011
1012             $DefaultLink =
1013                 "../src/download.php?startMessage=$startMessage&amp;passed_id=$id&amp;mailbox=$urlMailbox&amp;passed_ent_id=$ent";
1014             if ($where && $what) {
1015                $DefaultLink = '&amp;where='. urlencode($where).'&amp;what='.urlencode($what);
1016             }
1017             $Links['download link']['text'] = _("download");
1018             $Links['download link']['href'] =
1019                 "../src/download.php?absolute_dl=true&amp;passed_id=$id&amp;mailbox=$urlMailbox&amp;passed_ent_id=$ent";
1020             $ImageURL = '';
1021
1022             /* this executes the attachment hook with a specific MIME-type.
1023                 * if that doens't have results, it tries if there's a rule
1024                 * for a more generic type. */
1025             $HookResults = do_hook("attachment $type0/$type1", $Links,
1026                 $startMessage, $id, $urlMailbox, $ent, $DefaultLink,
1027                 $display_filename, $where, $what);
1028             if(count($HookResults[1]) <= 1) {
1029                 $HookResults = do_hook("attachment $type0/*", $Links,
1030                 $startMessage, $id, $urlMailbox, $ent, $DefaultLink,
1031                 $display_filename, $where, $what);
1032             }
1033
1034             $Links = $HookResults[1];
1035             $DefaultLink = $HookResults[6];
1036
1037             $body .= '<TR><TD>&nbsp;&nbsp;</TD><TD>' .
1038                         "<A HREF=\"$DefaultLink\">$display_filename</A>&nbsp;</TD>" .
1039                         '<TD><SMALL><b>' . show_readable_size($message->header->size) .
1040                         '</b>&nbsp;&nbsp;</small></TD>' .
1041                         "<TD><SMALL>[ $type0/$type1 ]&nbsp;</SMALL></TD>" .
1042                         '<TD><SMALL>';
1043             if ($message->header->description) {
1044                 $body .= '<b>' . htmlspecialchars(_($message->header->description)) . '</b>';
1045             }
1046             $body .= '</SMALL></TD><TD><SMALL>&nbsp;';
1047
1048
1049             $SkipSpaces = 1;
1050             foreach ($Links as $Val) {
1051                 if ($SkipSpaces) {
1052                     $SkipSpaces = 0;
1053                 } else {
1054                     $body .= '&nbsp;&nbsp;|&nbsp;&nbsp;';
1055                 }
1056                 $body .= '<a href="' . $Val['href'] . '">' .  $Val['text'] . '</a>';
1057             }
1058
1059             unset($Links);
1060
1061             $body .= "</SMALL></TD></TR>\n";
1062             }
1063         } else {
1064             for ($i = 0; $i < count($message->entities); $i++) {
1065                 $body .= formatAttachments($message->entities[$i], $ent_id, $mailbox, $id);
1066             }
1067         }
1068     }
1069     return( $body );
1070 }
1071
1072
1073 /** this function decodes the body depending on the encoding type. **/
1074 function decodeBody($body, $encoding) {
1075   $body = str_replace("\r\n", "\n", $body);
1076   $encoding = strtolower($encoding);
1077
1078   global $show_html_default;
1079
1080   if ($encoding == 'quoted-printable' ||
1081       $encoding == 'quoted_printable') {
1082      $body = quoted_printable_decode($body);
1083
1084
1085      while (ereg("=\n", $body))
1086         $body = ereg_replace ("=\n", "", $body);
1087
1088   } else if ($encoding == 'base64') {
1089      $body = base64_decode($body);
1090   }
1091
1092   // All other encodings are returned raw.
1093   return $body;
1094 }
1095
1096 /*
1097  * This functions decode strings that is encoded according to
1098  * RFC1522 (MIME Part Two: Message Header Extensions for Non-ASCII Text).
1099  * Patched by Christian Schmidt <christian@ostenfeld.dk>  23/03/2002
1100  */
1101 function decodeHeader ($string, $utfencode=true) {
1102     if (is_array($string)) {
1103         $string = implode("\n", $string);
1104     }
1105     $i = 0;
1106     while (preg_match('/^(.{' . $i . '})(.*)=\?([^?]*)\?(Q|B)\?([^?]*)\?=/Ui',
1107                       $string, $res)) {
1108         $prefix = $res[1];
1109         // Ignore white-space between consecutive encoded-words
1110         if (strspn($res[2], " \t") != strlen($res[2])) {
1111             $prefix .= $res[2];
1112         }
1113
1114         if (ucfirst($res[4]) == 'B') {
1115             $replace = base64_decode($res[5]);
1116         } else {
1117             $replace = str_replace('_', ' ', $res[5]);
1118             $replace = preg_replace('/=([0-9a-f]{2})/ie', 'chr(hexdec("\1"))',
1119                                     $replace);
1120             /* Only encode into entities by default. Some places
1121                don't need the encoding, like the compose form. */
1122             if ($utfencode) {
1123                 $replace = charset_decode($res[3], $replace);
1124             }
1125         }
1126         $string = $prefix . $replace . substr($string, strlen($res[0]));
1127         $i = strlen($prefix) + strlen($replace);
1128     }
1129     return( $string );
1130 }
1131
1132 /*
1133  * Encode a string according to RFC 1522 for use in headers if it
1134  * contains 8-bit characters or anything that looks like it should
1135  * be encoded.
1136  */
1137 function encodeHeader ($string) {
1138     global $default_charset;
1139
1140     // Encode only if the string contains 8-bit characters or =?
1141     $j = strlen( $string  );
1142     $l = strstr($string, '=?');         // Must be encoded ?
1143     $ret = '';
1144     for( $i=0; $i < $j; ++$i) {
1145         switch( $string{$i} ) {
1146            case '=':
1147           $ret .= '=3D';
1148           break;
1149         case '?':
1150           $ret .= '=3F';
1151           break;
1152         case '_':
1153           $ret .= '=5F';
1154           break;
1155         case ' ':
1156           $ret .= '_';
1157           break;
1158         default:
1159           $k = ord( $string{$i} );
1160           if ( $k > 126 ) {
1161              $ret .= sprintf("=%02X", $k);
1162              $l = TRUE;
1163           } else
1164              $ret .= $string{$i};
1165         }
1166     }
1167
1168     if ( $l ) {
1169         $string = "=?$default_charset?Q?$ret?=";
1170     }
1171
1172     return( $string );
1173 }
1174
1175 /* This function trys to locate the entity_id of a specific mime element */
1176
1177 function find_ent_id( $id, $message ) {
1178     $ret = '';
1179     for ($i=0; $ret == '' && $i < count($message->entities); $i++) {
1180         if (( $message->entities[$i]->header->type1 == 'alternative') ||
1181             ( $message->entities[$i]->header->type1 == 'related') ||
1182             ( $message->entities[$i]->header->type1 == 'mixed')) {
1183             $ret = find_ent_id( $id, $message->entities[$i] );
1184         } else {
1185             if ( strcasecmp( $message->entities[$i]->header->id, $id ) == 0 )
1186                 $ret = $message->entities[$i]->header->entity_id;
1187         }
1188
1189     }
1190     return( $ret );
1191 }
1192
1193 /**
1194  ** HTMLFILTER ROUTINES
1195  */
1196
1197 /**
1198  * This function returns the final tag out of the tag name, an array
1199  * of attributes, and the type of the tag. This function is called by
1200  * sq_sanitize internally.
1201  *
1202  * @param  $tagname  the name of the tag.
1203  * @param  $attary   the array of attributes and their values
1204  * @param  $tagtype  The type of the tag (see in comments).
1205  * @return           a string with the final tag representation.
1206  */
1207 function sq_tagprint($tagname, $attary, $tagtype){
1208     $me = "sq_tagprint";
1209     if ($tagtype == 2){
1210         $fulltag = '</' . $tagname . '>';
1211     } else {
1212         $fulltag = '<' . $tagname;
1213         if (is_array($attary) && sizeof($attary)){
1214             $atts = Array();
1215             while (list($attname, $attvalue) = each($attary)){
1216                 array_push($atts, "$attname=$attvalue");
1217             }
1218             $fulltag .= ' ' . join(" ", $atts);
1219         }
1220         if ($tagtype == 3){
1221             $fulltag .= " /";
1222         }
1223         $fulltag .= ">";
1224     }
1225     return $fulltag;
1226 }
1227
1228 /**
1229  * A small helper function to use with array_walk. Modifies a by-ref
1230  * value and makes it lowercase.
1231  *
1232  * @param  $val a value passed by-ref.
1233  * @return      void since it modifies a by-ref value.
1234  */
1235 function sq_casenormalize(&$val){
1236     $val = strtolower($val);
1237 }
1238
1239 /**
1240  * This function skips any whitespace from the current position within
1241  * a string and to the next non-whitespace value.
1242  *
1243  * @param  $body   the string
1244  * @param  $offset the offset within the string where we should start
1245  *                 looking for the next non-whitespace character.
1246  * @return         the location within the $body where the next
1247  *                 non-whitespace char is located.
1248  */
1249 function sq_skipspace($body, $offset){
1250     $me = "sq_skipspace";
1251     preg_match("/^(\s*)/s", substr($body, $offset), $matches);
1252     if (sizeof($matches{1})){
1253         $count = strlen($matches{1});
1254         $offset += $count;
1255     }
1256     return $offset;
1257 }
1258
1259 /**
1260  * This function looks for the next character within a string.  It's
1261  * really just a glorified "strpos", except it catches if failures
1262  * nicely.
1263  *
1264  * @param  $body   The string to look for needle in.
1265  * @param  $offset Start looking from this position.
1266  * @param  $needle The character/string to look for.
1267  * @return         location of the next occurance of the needle, or
1268  *                 strlen($body) if needle wasn't found.
1269  */
1270 function sq_findnxstr($body, $offset, $needle){
1271     $me = "sq_findnxstr";
1272     $pos = strpos($body, $needle, $offset);
1273     if ($pos === FALSE){
1274         $pos = strlen($body);
1275     }
1276     return $pos;
1277 }
1278
1279 /**
1280  * This function takes a PCRE-style regexp and tries to match it
1281  * within the string.
1282  *
1283  * @param  $body   The string to look for needle in.
1284  * @param  $offset Start looking from here.
1285  * @param  $reg    A PCRE-style regex to match.
1286  * @return         Returns a false if no matches found, or an array
1287  *                 with the following members:
1288  *                 - integer with the location of the match within $body
1289  *                 - string with whatever content between offset and the match
1290  *                 - string with whatever it is we matched
1291  */
1292 function sq_findnxreg($body, $offset, $reg){
1293     $me = "sq_findnxreg";
1294     $matches = Array();
1295     $retarr = Array();
1296     preg_match("%^(.*?)($reg)%s", substr($body, $offset), $matches);
1297     if (!$matches{0}){
1298         $retarr = false;
1299     } else {
1300         $retarr{0} = $offset + strlen($matches{1});
1301         $retarr{1} = $matches{1};
1302         $retarr{2} = $matches{2};
1303     }
1304     return $retarr;
1305 }
1306
1307 /**
1308  * This function looks for the next tag.
1309  *
1310  * @param  $body   String where to look for the next tag.
1311  * @param  $offset Start looking from here.
1312  * @return         false if no more tags exist in the body, or
1313  *                 an array with the following members:
1314  *                 - string with the name of the tag
1315  *                 - array with attributes and their values
1316  *                 - integer with tag type (1, 2, or 3)
1317  *                 - integer where the tag starts (starting "<")
1318  *                 - integer where the tag ends (ending ">")
1319  *                 first three members will be false, if the tag is invalid.
1320  */
1321 function sq_getnxtag($body, $offset){
1322     $me = "sq_getnxtag";
1323     if ($offset > strlen($body)){
1324         return false;
1325     }
1326     $lt = sq_findnxstr($body, $offset, "<");
1327     if ($lt == strlen($body)){
1328         return false;
1329     }
1330     /**
1331      * We are here:
1332      * blah blah <tag attribute="value">
1333      * \---------^
1334      */
1335     $pos = sq_skipspace($body, $lt+1);
1336     if ($pos >= strlen($body)){
1337         return Array(false, false, false, $lt, strlen($body));
1338     }
1339     /**
1340      * There are 3 kinds of tags:
1341      * 1. Opening tag, e.g.:
1342      *    <a href="blah">
1343      * 2. Closing tag, e.g.:
1344      *    </a>
1345      * 3. XHTML-style content-less tag, e.g.:
1346      *    <img src="blah"/>
1347      */
1348     $tagtype = false;
1349     switch (substr($body, $pos, 1)){
1350     case "/":
1351         $tagtype = 2;
1352         $pos++;
1353         break;
1354     case "!":
1355         /**
1356          * A comment or an SGML declaration.
1357          */
1358         if (substr($body, $pos+1, 2) == "--"){
1359             $gt = strpos($body, "-->", $pos);
1360             if ($gt === false){
1361                 $gt = strlen($body);
1362             } else {
1363                 $gt += 2;
1364             }
1365             return Array(false, false, false, $lt, $gt);
1366         } else {
1367             $gt = sq_findnxstr($body, $pos, ">");
1368             return Array(false, false, false, $lt, $gt);
1369         }
1370         break;
1371     default:
1372         /**
1373          * Assume tagtype 1 for now. If it's type 3, we'll switch values
1374          * later.
1375          */
1376         $tagtype = 1;
1377         break;
1378     }
1379
1380     $tag_start = $pos;
1381     $tagname = '';
1382     /**
1383      * Look for next [\W-_], which will indicate the end of the tag name.
1384      */
1385     $regary = sq_findnxreg($body, $pos, "[^\w\-_]");
1386     if ($regary == false){
1387         return Array(false, false, false, $lt, strlen($body));
1388     }
1389     list($pos, $tagname, $match) = $regary;
1390     $tagname = strtolower($tagname);
1391
1392     /**
1393      * $match can be either of these:
1394      * '>'  indicating the end of the tag entirely.
1395      * '\s' indicating the end of the tag name.
1396      * '/'  indicating that this is type-3 xhtml tag.
1397      *
1398      * Whatever else we find there indicates an invalid tag.
1399      */
1400     switch ($match){
1401     case "/":
1402         /**
1403          * This is an xhtml-style tag with a closing / at the
1404          * end, like so: <img src="blah"/>. Check if it's followed
1405          * by the closing bracket. If not, then this tag is invalid
1406          */
1407         if (substr($body, $pos, 2) == "/>"){
1408             $pos++;
1409             $tagtype = 3;
1410         } else {
1411             $gt = sq_findnxstr($body, $pos, ">");
1412             $retary = Array(false, false, false, $lt, $gt);
1413             return $retary;
1414         }
1415     case ">":
1416         return Array($tagname, false, $tagtype, $lt, $pos);
1417         break;
1418     default:
1419         /**
1420          * Check if it's whitespace
1421          */
1422         if (preg_match("/\s/", $match)){
1423         } else {
1424             /**
1425              * This is an invalid tag! Look for the next closing ">".
1426              */
1427             $gt = sq_findnxstr($body, $offset, ">");
1428             return Array(false, false, false, $lt, $gt);
1429         }
1430     }
1431
1432     /**
1433      * At this point we're here:
1434      * <tagname  attribute='blah'>
1435      * \-------^
1436      *
1437      * At this point we loop in order to find all attributes.
1438      */
1439     $attname = '';
1440     $atttype = false;
1441     $attary = Array();
1442
1443     while ($pos <= strlen($body)){
1444         $pos = sq_skipspace($body, $pos);
1445         if ($pos == strlen($body)){
1446             /**
1447              * Non-closed tag.
1448              */
1449             return Array(false, false, false, $lt, $pos);
1450         }
1451         /**
1452          * See if we arrived at a ">" or "/>", which means that we reached
1453          * the end of the tag.
1454          */
1455         $matches = Array();
1456         if (preg_match("%^(\s*)(>|/>)%s", substr($body, $pos), $matches)) {
1457             /**
1458              * Yep. So we did.
1459              */
1460             $pos += strlen($matches{1});
1461             if ($matches{2} == "/>"){
1462                 $tagtype = 3;
1463                 $pos++;
1464             }
1465             return Array($tagname, $attary, $tagtype, $lt, $pos);
1466         }
1467
1468         /**
1469          * There are several types of attributes, with optional
1470          * [:space:] between members.
1471          * Type 1:
1472          *   attrname[:space:]=[:space:]'CDATA'
1473          * Type 2:
1474          *   attrname[:space:]=[:space:]"CDATA"
1475          * Type 3:
1476          *   attr[:space:]=[:space:]CDATA
1477          * Type 4:
1478          *   attrname
1479          *
1480          * We leave types 1 and 2 the same, type 3 we check for
1481          * '"' and convert to "&quot" if needed, then wrap in
1482          * double quotes. Type 4 we convert into:
1483          * attrname="yes".
1484          */
1485         $regary = sq_findnxreg($body, $pos, "[^\w\-_]");
1486         if ($regary == false){
1487             /**
1488              * Looks like body ended before the end of tag.
1489              */
1490             return Array(false, false, false, $lt, strlen($body));
1491         }
1492         list($pos, $attname, $match) = $regary;
1493         $attname = strtolower($attname);
1494         /**
1495          * We arrived at the end of attribute name. Several things possible
1496          * here:
1497          * '>'  means the end of the tag and this is attribute type 4
1498          * '/'  if followed by '>' means the same thing as above
1499          * '\s' means a lot of things -- look what it's followed by.
1500          *      anything else means the attribute is invalid.
1501          */
1502         switch($match){
1503         case "/":
1504             /**
1505              * This is an xhtml-style tag with a closing / at the
1506              * end, like so: <img src="blah"/>. Check if it's followed
1507              * by the closing bracket. If not, then this tag is invalid
1508              */
1509             if (substr($body, $pos, 2) == "/>"){
1510                 $pos++;
1511                 $tagtype = 3;
1512             } else {
1513                 $gt = sq_findnxstr($body, $pos, ">");
1514                 $retary = Array(false, false, false, $lt, $gt);
1515                 return $retary;
1516             }
1517         case ">":
1518             $attary{$attname} = '"yes"';
1519             return Array($tagname, $attary, $tagtype, $lt, $pos);
1520             break;
1521         default:
1522             /**
1523              * Skip whitespace and see what we arrive at.
1524              */
1525             $pos = sq_skipspace($body, $pos);
1526             $char = substr($body, $pos, 1);
1527             /**
1528              * Two things are valid here:
1529              * '=' means this is attribute type 1 2 or 3.
1530              * \w means this was attribute type 4.
1531              * anything else we ignore and re-loop. End of tag and
1532              * invalid stuff will be caught by our checks at the beginning
1533              * of the loop.
1534              */
1535             if ($char == "="){
1536                 $pos++;
1537                 $pos = sq_skipspace($body, $pos);
1538                 /**
1539                  * Here are 3 possibilities:
1540                  * "'"  attribute type 1
1541                  * '"'  attribute type 2
1542                  * everything else is the content of tag type 3
1543                  */
1544                 $quot = substr($body, $pos, 1);
1545                 if ($quot == "'"){
1546                     $regary = sq_findnxreg($body, $pos+1, "\'");
1547                     if ($regary == false){
1548                         return Array(false, false, false, $lt, strlen($body));
1549                     }
1550                     list($pos, $attval, $match) = $regary;
1551                     $pos++;
1552                     $attary{$attname} = "'" . $attval . "'";
1553                 } else if ($quot == '"'){
1554                     $regary = sq_findnxreg($body, $pos+1, '\"');
1555                     if ($regary == false){
1556                         return Array(false, false, false, $lt, strlen($body));
1557                     }
1558                     list($pos, $attval, $match) = $regary;
1559                     $pos++;
1560                     $attary{$attname} = '"' . $attval . '"';
1561                 } else {
1562                     /**
1563                      * These are hateful. Look for \s, or >.
1564                      */
1565                     $regary = sq_findnxreg($body, $pos, "[\s>]");
1566                     if ($regary == false){
1567                         return Array(false, false, false, $lt, strlen($body));
1568                     }
1569                     list($pos, $attval, $match) = $regary;
1570                     /**
1571                      * If it's ">" it will be caught at the top.
1572                      */
1573                     $attval = preg_replace("/\"/s", "&quot;", $attval);
1574                     $attary{$attname} = '"' . $attval . '"';
1575                 }
1576             } else if (preg_match("|[\w/>]|", $char)) {
1577                 /**
1578                  * That was attribute type 4.
1579                  */
1580                 $attary{$attname} = '"yes"';
1581             } else {
1582                 /**
1583                  * An illegal character. Find next '>' and return.
1584                  */
1585                 $gt = sq_findnxstr($body, $pos, ">");
1586                 return Array(false, false, false, $lt, $gt);
1587             }
1588         }
1589     }
1590     /**
1591      * The fact that we got here indicates that the tag end was never
1592      * found. Return invalid tag indication so it gets stripped.
1593      */
1594     return Array(false, false, false, $lt, strlen($body));
1595 }
1596
1597 /**
1598  * This function checks attribute values for entity-encoded values
1599  * and returns them translated into 8-bit strings so we can run
1600  * checks on them.
1601  *
1602  * @param  $attvalue A string to run entity check against.
1603  * @return           Translated value.
1604  */
1605 function sq_deent($attvalue){
1606     $me="sq_deent";
1607     /**
1608      * See if we have to run the checks first. All entities must start
1609      * with "&".
1610      */
1611     if (strpos($attvalue, "&") === false){
1612         return $attvalue;
1613     }
1614     /**
1615      * Check named entities first.
1616      */
1617     $trans = get_html_translation_table(HTML_ENTITIES);
1618     /**
1619      * Leave &quot; in, as it can mess us up.
1620      */
1621     $trans = array_flip($trans);
1622     unset($trans{"&quot;"});
1623     while (list($ent, $val) = each($trans)){
1624         $attvalue = preg_replace("/$ent*(\W)/si", "$val\\1", $attvalue);
1625     }
1626     /**
1627      * Now translate numbered entities from 1 to 255 if needed.
1628      */
1629     if (strpos($attvalue, "#") !== false){
1630         $omit = Array(34, 39);
1631         for ($asc=1; $asc<256; $asc++){
1632             if (!in_array($asc, $omit)){
1633                 $chr = chr($asc);
1634                 $attvalue = preg_replace("/\&#0*$asc;*(\D)/si", "$chr\\1",
1635                                          $attvalue);
1636                 $attvalue = preg_replace("/\&#x0*".dechex($asc).";*(\W)/si",
1637                                          "$chr\\1", $attvalue);
1638             }
1639         }
1640     }
1641     return $attvalue;
1642 }
1643
1644 /**
1645  * This function runs various checks against the attributes.
1646  *
1647  * @param  $tagname         String with the name of the tag.
1648  * @param  $attary          Array with all tag attributes.
1649  * @param  $rm_attnames     See description for sq_sanitize
1650  * @param  $bad_attvals     See description for sq_sanitize
1651  * @param  $add_attr_to_tag See description for sq_sanitize
1652  * @param  $message         message object
1653  * @param  $id              message id
1654  * @return                  Array with modified attributes.
1655  */
1656 function sq_fixatts($tagname,
1657                     $attary,
1658                     $rm_attnames,
1659                     $bad_attvals,
1660                     $add_attr_to_tag,
1661                     $message,
1662                     $id
1663                     ){
1664     $me = "sq_fixatts";
1665     while (list($attname, $attvalue) = each($attary)){
1666         /**
1667          * See if this attribute should be removed.
1668          */
1669         foreach ($rm_attnames as $matchtag=>$matchattrs){
1670             if (preg_match($matchtag, $tagname)){
1671                 foreach ($matchattrs as $matchattr){
1672                     if (preg_match($matchattr, $attname)){
1673                         unset($attary{$attname});
1674                         continue;
1675                     }
1676                 }
1677             }
1678         }
1679         /**
1680          * Remove any entities.
1681          */
1682         $attvalue = sq_deent($attvalue);
1683
1684         /**
1685          * Now let's run checks on the attvalues.
1686          * I don't expect anyone to comprehend this. If you do,
1687          * get in touch with me so I can drive to where you live and
1688          * shake your hand personally. :)
1689          */
1690         foreach ($bad_attvals as $matchtag=>$matchattrs){
1691             if (preg_match($matchtag, $tagname)){
1692                 foreach ($matchattrs as $matchattr=>$valary){
1693                     if (preg_match($matchattr, $attname)){
1694                         /**
1695                          * There are two arrays in valary.
1696                          * First is matches.
1697                          * Second one is replacements
1698                          */
1699                         list($valmatch, $valrepl) = $valary;
1700                         $newvalue =
1701                             preg_replace($valmatch, $valrepl, $attvalue);
1702                         if ($newvalue != $attvalue){
1703                             $attary{$attname} = $newvalue;
1704                         }
1705                     }
1706                 }
1707             }
1708         }
1709         /**
1710          * Turn cid: urls into http-friendly ones.
1711          */
1712         if (preg_match("/^[\'\"]\s*cid:/si", $attvalue)){
1713             $attary{$attname} = sq_cid2http($message, $id, $attvalue);
1714         }
1715     }
1716     /**
1717      * See if we need to append any attributes to this tag.
1718      */
1719     foreach ($add_attr_to_tag as $matchtag=>$addattary){
1720         if (preg_match($matchtag, $tagname)){
1721             $attary = array_merge($attary, $addattary);
1722         }
1723     }
1724     return $attary;
1725 }
1726
1727 /**
1728  * This function edits the style definition to make them friendly and
1729  * usable in squirrelmail.
1730  *
1731  * @param  $message  the message object
1732  * @param  $id       the message id
1733  * @param  $content  a string with whatever is between <style> and </style>
1734  * @return           a string with edited content.
1735  */
1736 function sq_fixstyle($message, $id, $content){
1737     global $view_unsafe_images;
1738     $me = "sq_fixstyle";
1739     /**
1740      * First look for general BODY style declaration, which would be
1741      * like so:
1742      * body {background: blah-blah}
1743      * and change it to .bodyclass so we can just assign it to a <div>
1744      */
1745     $content = preg_replace("|body(\s*\{.*?\})|si", ".bodyclass\\1", $content);
1746     $secremoveimg = "../images/" . _("sec_remove_eng.png");
1747     /**
1748      * Fix url('blah') declarations.
1749      */
1750     $content = preg_replace("|url\(([\'\"])\s*\S+script\s*:.*?([\'\"])\)|si",
1751                             "url(\\1$secremoveimg\\2)", $content);
1752     /**
1753      * Fix url('https*://.*) declarations but only if $view_unsafe_images
1754      * is false.
1755      */
1756     if (!$view_unsafe_images){
1757         $content = preg_replace("|url\(([\'\"])\s*https*:.*?([\'\"])\)|si",
1758                                 "url(\\1$secremoveimg\\2)", $content);
1759     }
1760
1761     /**
1762      * Fix urls that refer to cid:
1763      */
1764     while (preg_match("|url\(([\'\"]\s*cid:.*?[\'\"])\)|si", $content,
1765                       $matches)){
1766         $cidurl = $matches{1};
1767         $httpurl = sq_cid2http($message, $id, $cidurl);
1768         $content = preg_replace("|url\($cidurl\)|si",
1769                                 "url($httpurl)", $content);
1770     }
1771
1772     /**
1773      * Fix stupid css declarations which lead to vulnerabilities
1774      * in IE.
1775      */
1776     $match   = Array('/expression/si',
1777                      '/behaviou*r/si',
1778                      '/binding/si');
1779     $replace = Array('idiocy', 'idiocy', 'idiocy');
1780     $content = preg_replace($match, $replace, $content);
1781     return $content;
1782 }
1783
1784 /**
1785  * This function converts cid: url's into the ones that can be viewed in
1786  * the browser.
1787  *
1788  * @param  $message  the message object
1789  * @param  $id       the message id
1790  * @param  $cidurl   the cid: url.
1791  * @return           a string with a http-friendly url
1792  */
1793 function sq_cid2http($message, $id, $cidurl){
1794     /**
1795      * Get rid of quotes.
1796      */
1797     $quotchar = substr($cidurl, 0, 1);
1798     $cidurl = str_replace($quotchar, "", $cidurl);
1799     $cidurl = substr(trim($cidurl), 4);
1800     $httpurl = $quotchar . "../src/download.php?absolute_dl=true&amp;" .
1801         "passed_id=$id&amp;mailbox=" . urlencode($message->header->mailbox) .
1802         "&amp;passed_ent_id=" . find_ent_id($cidurl, $message) . $quotchar;
1803     return $httpurl;
1804 }
1805
1806 /**
1807  * This function changes the <body> tag into a <div> tag since we
1808  * can't really have a body-within-body.
1809  *
1810  * @param  $attary  an array of attributes and values of <body>
1811  * @return          a modified array of attributes to be set for <div>
1812  */
1813 function sq_body2div($attary){
1814     $me = "sq_body2div";
1815     $divattary = Array("class"=>"'bodyclass'");
1816     $bgcolor="#ffffff";
1817     $text="#000000";
1818     $styledef="";
1819     if (is_array($attary) && sizeof($attary) > 0){
1820         foreach ($attary as $attname=>$attvalue){
1821             $quotchar = substr($attvalue, 0, 1);
1822             $attvalue = str_replace($quotchar, "", $attvalue);
1823             switch ($attname){
1824             case "background":
1825                 $styledef .= "background-image: url('$attvalue'); ";
1826                 break;
1827             case "bgcolor":
1828                 $styledef .= "background-color: $attvalue; ";
1829                 break;
1830             case "text":
1831                 $styledef .= "color: $attvalue; ";
1832             }
1833         }
1834         if (strlen($styledef) > 0){
1835             $divattary{"style"} = "\"$styledef\"";
1836         }
1837     }
1838     return $divattary;
1839 }
1840
1841 /**
1842  * This is the main function and the one you should actually be calling.
1843  * There are several variables you should be aware of an which need
1844  * special description.
1845  *
1846  * Since the description is quite lengthy, see it here:
1847  * http://www.mricon.com/html/phpfilter.html
1848  *
1849  * @param $body                 the string with HTML you wish to filter
1850  * @param $tag_list             see description above
1851  * @param $rm_tags_with_content see description above
1852  * @param $self_closing_tags    see description above
1853  * @param $force_tag_closing    see description above
1854  * @param $rm_attnames          see description above
1855  * @param $bad_attvals          see description above
1856  * @param $add_attr_to_tag      see description above
1857  * @param $message              message object
1858  * @param $id                   message id
1859  * @return                      sanitized html safe to show on your pages.
1860  */
1861 function sq_sanitize($body,
1862                      $tag_list,
1863                      $rm_tags_with_content,
1864                      $self_closing_tags,
1865                      $force_tag_closing,
1866                      $rm_attnames,
1867                      $bad_attvals,
1868                      $add_attr_to_tag,
1869                      $message,
1870                      $id
1871                      ){
1872     $me = "sq_sanitize";
1873     /**
1874      * Normalize rm_tags and rm_tags_with_content.
1875      */
1876     @array_walk($rm_tags, 'sq_casenormalize');
1877     @array_walk($rm_tags_with_content, 'sq_casenormalize');
1878     @array_walk($self_closing_tags, 'sq_casenormalize');
1879     /**
1880      * See if tag_list is of tags to remove or tags to allow.
1881      * false  means remove these tags
1882      * true   means allow these tags
1883      */
1884     $rm_tags = array_shift($tag_list);
1885     $curpos = 0;
1886     $open_tags = Array();
1887     $trusted = "<!-- begin sanitized html -->\n";
1888     $skip_content = false;
1889     /**
1890      * Take care of netscape's stupid javascript entities like
1891      * &{alert('boo')};
1892      */
1893     $body = preg_replace("/&(\{.*?\};)/si", "&amp;\\1", $body);
1894
1895     while (($curtag=sq_getnxtag($body, $curpos)) != FALSE){
1896         list($tagname, $attary, $tagtype, $lt, $gt) = $curtag;
1897         $free_content = substr($body, $curpos, $lt-$curpos);
1898         /**
1899          * Take care of <style>
1900          */
1901         if ($tagname == "style" && $tagtype == 2){
1902             /**
1903              * This is a closing </style>. Edit the
1904              * content before we apply it.
1905              */
1906             $free_content = sq_fixstyle($message, $id, $free_content);
1907         }
1908         if ($skip_content == false){
1909             $trusted .= $free_content;
1910         } else {
1911         }
1912         if ($tagname != FALSE){
1913             if ($tagtype == 2){
1914                 if ($skip_content == $tagname){
1915                     /**
1916                      * Got to the end of tag we needed to remove.
1917                      */
1918                     $tagname = false;
1919                     $skip_content = false;
1920                 } else {
1921                     if ($skip_content == false){
1922                         if ($tagname == "body"){
1923                             $tagname = "div";
1924                         } else {
1925                             if (isset($open_tags{$tagname}) &&
1926                                 $open_tags{$tagname} > 0){
1927                                 $open_tags{$tagname}--;
1928                             } else {
1929                                 $tagname = false;
1930                             }
1931                         }
1932                     } else {
1933                     }
1934                 }
1935             } else {
1936                 /**
1937                  * $rm_tags_with_content
1938                  */
1939                 if ($skip_content == false){
1940                     /**
1941                      * See if this is a self-closing type and change
1942                      * tagtype appropriately.
1943                      */
1944                     if ($tagtype == 1
1945                         && in_array($tagname, $self_closing_tags)){
1946                         $tagtype=3;
1947                     }
1948                     /**
1949                      * See if we should skip this tag and any content
1950                      * inside it.
1951                      */
1952                     if ($tagtype == 1 &&
1953                         in_array($tagname, $rm_tags_with_content)){
1954                         $skip_content = $tagname;
1955                     } else {
1956                         if (($rm_tags == false
1957                              && in_array($tagname, $tag_list)) ||
1958                             ($rm_tags == true &&
1959                              !in_array($tagname, $tag_list))){
1960                             $tagname = false;
1961                         } else {
1962                             if ($tagtype == 1){
1963                                 if (isset($open_tags{$tagname})){
1964                                     $open_tags{$tagname}++;
1965                                 } else {
1966                                     $open_tags{$tagname}=1;
1967                                 }
1968                             }
1969                             /**
1970                              * This is where we run other checks.
1971                              */
1972                             if (is_array($attary) && sizeof($attary) > 0){
1973                                 $attary = sq_fixatts($tagname,
1974                                                      $attary,
1975                                                      $rm_attnames,
1976                                                      $bad_attvals,
1977                                                      $add_attr_to_tag,
1978                                                      $message,
1979                                                      $id
1980                                                      );
1981                             }
1982                             /**
1983                              * Convert body into div.
1984                              */
1985                             if ($tagname == "body"){
1986                                 $tagname = "div";
1987                                 $attary = sq_body2div($attary, $message, $id);
1988                             }
1989                         }
1990                     }
1991                 } else {
1992                 }
1993             }
1994             if ($tagname != false && $skip_content == false){
1995                 $trusted .= sq_tagprint($tagname, $attary, $tagtype);
1996             }
1997         } else {
1998         }
1999         $curpos = $gt+1;
2000     }
2001     $trusted .= substr($body, $curpos, strlen($body)-$curpos);
2002     if ($force_tag_closing == true){
2003         foreach ($open_tags as $tagname=>$opentimes){
2004             while ($opentimes > 0){
2005                 $trusted .= '</' . $tagname . '>';
2006                 $opentimes--;
2007             }
2008         }
2009         $trusted .= "\n";
2010     }
2011     $trusted .= "<!-- end sanitized html -->\n";
2012     return $trusted;
2013 }
2014
2015 /**
2016  * This is a wrapper function to call html sanitizing routines.
2017  *
2018  * @param  $body  the body of the message
2019  * @param  $id    the id of the message
2020  * @return        a string with html safe to display in the browser.
2021  */
2022 function magicHTML($body, $id, $message){
2023     global $attachment_common_show_images, $view_unsafe_images,
2024         $has_unsafe_images;
2025     /**
2026      * Don't display attached images in HTML mode.
2027      */
2028     $attachment_common_show_images = false;
2029     $tag_list = Array(
2030                       false,
2031                       "object",
2032                       "meta",
2033                       "html",
2034                       "head",
2035                       "base"
2036                       );
2037
2038     $rm_tags_with_content = Array(
2039                                   "script",
2040                                   "applet",
2041                                   "embed",
2042                                   "title"
2043                                   );
2044
2045     $self_closing_tags =  Array(
2046                                 "img",
2047                                 "br",
2048                                 "hr",
2049                                 "input"
2050                                 );
2051
2052     $force_tag_closing = false;
2053
2054     $rm_attnames = Array(
2055                          "/.*/" =>
2056                          Array(
2057                                "/target/si",
2058                                "/^on.*/si",
2059                                "/^dynsrc/si",
2060                                "/^data.*/si"
2061                                )
2062                          );
2063
2064     $secremoveimg = "../images/" . _("sec_remove_eng.png");
2065     $bad_attvals = Array(
2066         "/.*/" =>
2067             Array(
2068                 "/^src|background/i" =>
2069                     Array(
2070                           Array(
2071                                 "|^([\'\"])\s*\.\./.*([\'\"])|si",
2072                                 "/^([\'\"])\s*\S+script\s*:.*([\'\"])/si",
2073                                 "/^([\'\"])\s*mocha\s*:*.*([\'\"])/si",
2074                                 "/^([\'\"])\s*about\s*:.*([\'\"])/si"
2075                                 ),
2076                           Array(
2077                                 "\\1$secremoveimg\\2",
2078                                 "\\1$secremoveimg\\2",
2079                                 "\\1$secremoveimg\\2",
2080                                 "\\1$secremoveimg\\2"
2081                                 )
2082                         ),
2083                 "/^href|action/i" =>
2084                     Array(
2085                           Array(
2086                                 "|^([\'\"])\s*\.\./.*([\'\"])|si",
2087                                 "/^([\'\"])\s*\S+script\s*:.*([\'\"])/si",
2088                                 "/^([\'\"])\s*mocha\s*:*.*([\'\"])/si",
2089                                 "/^([\'\"])\s*about\s*:.*([\'\"])/si"
2090                                 ),
2091                           Array(
2092                                 "\\1#\\2",
2093                                 "\\1#\\2",
2094                                 "\\1#\\2",
2095                                 "\\1#\\2"
2096                                 )
2097                         ),
2098                 "/^style/si" =>
2099                     Array(
2100                           Array(
2101                                 "/expression/si",
2102                                 "/binding/si",
2103                                 "/behaviou*r/si",
2104                                 "|url\(([\'\"])\s*\.\./.*([\'\"])\)|si",
2105                                 "/url\(([\'\"])\s*\S+script\s*:.*([\'\"])\)/si",
2106                                 "/url\(([\'\"])\s*mocha\s*:.*([\'\"])\)/si",
2107                                 "/url\(([\'\"])\s*about\s*:.*([\'\"])\)/si"
2108                                ),
2109                           Array(
2110                                 "idiocy",
2111                                 "idiocy",
2112                                 "idiocy",
2113                                 "url(\\1#\\2)",
2114                                 "url(\\1#\\2)",
2115                                 "url(\\1#\\2)",
2116                                 "url(\\1#\\2)"
2117                                )
2118                           )
2119                 )
2120         );
2121     if (!$view_unsafe_images){
2122         /**
2123          * Remove any references to http/https if view_unsafe_images set
2124          * to false.
2125          */
2126          array_push($bad_attvals{'/.*/'}{'/^src|background/i'}[0],
2127                     '/^([\'\"])\s*https*:.*([\'\"])/si');
2128          array_push($bad_attvals{'/.*/'}{'/^src|background/i'}[1],
2129                     "\\1$secremoveimg\\2");
2130          array_push($bad_attvals{'/.*/'}{'/^style/si'}[0],
2131                     '/url\(([\'\"])\s*https*:.*([\'\"])\)/si');
2132          array_push($bad_attvals{'/.*/'}{'/^style/si'}[1],
2133                     "url(\\1$secremoveimg\\2)");
2134     }
2135
2136     $add_attr_to_tag = Array(
2137                              "/^a$/si" => Array('target'=>'"_new"')
2138                              );
2139     $trusted = sq_sanitize($body,
2140                            $tag_list,
2141                            $rm_tags_with_content,
2142                            $self_closing_tags,
2143                            $force_tag_closing,
2144                            $rm_attnames,
2145                            $bad_attvals,
2146                            $add_attr_to_tag,
2147                            $message,
2148                            $id
2149                            );
2150     if (preg_match("|$secremoveimg|si", $trusted)){
2151         $has_unsafe_images = true;
2152     }
2153     return $trusted;
2154 }
2155 ?>