functions/mime.php

   1 <?php
   2
   3 /**
   4  * mime.php
   5  *
   6  * Copyright (c) 1999-2002 The SquirrelMail Project Team
   7  * Licensed under the GNU GPL. For full terms see the file COPYING.
   8  *
   9  * This contains the functions necessary to detect and decode MIME
  10  * messages.
  11  *
  12  * $Id$
  13  */
  14
  15 require_once(SM_PATH . 'functions/imap.php');
  16 require_once(SM_PATH . 'functions/attachment_common.php');
  17
  18 /* --------------------------------------------------------------------------------- */
  19 /* MIME DECODING                                                                     */
  20 /* --------------------------------------------------------------------------------- */
  21
  22 /* This function gets the structure of a message and stores it in the "message" class.
  23  * It will return this object for use with all relevant header information and
  24  * fully parsed into the standard "message" object format.
  25  */
  26
  27 function mime_structure ($bodystructure, $flags=array()) {
  28
  29     /* Isolate the body structure and remove beginning and end parenthesis. */
  30     $read = trim(substr ($bodystructure, strpos(strtolower($bodystructure), 'bodystructure') + 13));
  31     $read = trim(substr ($read, 0, -1));
  32     $msg =& new Message();
  33     $res  = $msg->parseStructure($read);
  34     $msg  = $res[0];
  35     if (!is_object($msg)) {
  36         include_once(SM_PATH . 'functions/display_messages.php');
  37         global $color, $mailbox;
  38         displayPageHeader( $color, urldecode($mailbox) );
  39         echo "<BODY TEXT=\"$color[8]\" BGCOLOR=\"$color[4]\" LINK=\"$color[7]\" VLINK=\"$color[7]\" ALINK=\"$color[7]\">\n\n" .
  40          '<CENTER>';
  41         $errormessage  = _("Squirrelmail could not decode the bodystructure of the message");
  42         $errormessage .= '<BR>'._("the provided bodystructure by your imap-server").':<BR><BR>';
  43         $errormessage .= '<table><tr><td>' . htmlspecialchars($read) . '</td></tr></table>';
  44         plain_error_message( $errormessage, $color );
  45         echo '</body></html>';
  46         exit;
  47     }
  48     $msg->setEnt('0');
  49     if (count($flags)) {
  50         foreach ($flags as $flag) {
  51             $char = strtoupper($flag{1});
  52             switch ($char) {
  53                 case 'S':
  54                     if (strtolower($flag) == '\\seen') {
  55                         $msg->is_seen = true;
  56                     }
  57                     break;
  58                 case 'A':
  59                     if (strtolower($flag) == '\\answered') {
  60                         $msg->is_answered = true;
  61                     }
  62                     break;
  63                 case 'D':
  64                     if (strtolower($flag) == '\\deleted') {
  65                         $msg->is_deleted = true;
  66                     }
  67                     break;
  68                 case 'F':
  69                     if (strtolower($flag) == '\\flagged') {
  70                         $msg->is_flagged = true;
  71                     }
  72                     break;
  73                 case 'M':
  74                     if (strtolower($flag) == '$mdnsent') {
  75                         $msg->is_mdnsent = true;
  76                     }
  77                     break;
  78                 default:
  79                     break;
  80             }
  81         }
  82     }
  83     //    listEntities($msg);
  84     return $msg;
  85 }
  86
  87 /* This starts the parsing of a particular structure.  It is called recursively,
  88  * so it can be passed different structures.  It returns an object of type
  89  * $message.
  90  * First, it checks to see if it is a multipart message.  If it is, then it
  91  * handles that as it sees is necessary.  If it is just a regular entity,
  92  * then it parses it and adds the necessary header information (by calling out
  93  * to mime_get_elements()
  94  */
  95
  96 function mime_fetch_body($imap_stream, $id, $ent_id) {
  97     global $uid_support;
  98     /* Do a bit of error correction.  If we couldn't find the entity id, just guess
  99      * that it is the first one.  That is usually the case anyway.
 100      */
 101     if (!$ent_id) {
 102         $ent_id = 1;
 103     }
 104     $cmd = "FETCH $id BODY[$ent_id]";
 105
 106     $data = sqimap_run_command ($imap_stream, $cmd, true, $response, $message, $uid_support);
 107     do {
 108         $topline = trim(array_shift($data));
 109     } while($topline && ($topline[0] == '*') && !preg_match('/\* [0-9]+ FETCH.*/i', $topline)) ;
 110
 111     $wholemessage = implode('', $data);
 112     if (ereg('\\{([^\\}]*)\\}', $topline, $regs)) {
 113         $ret = substr($wholemessage, 0, $regs[1]);
 114         /* There is some information in the content info header that could be important
 115          * in order to parse html messages. Let's get them here.
 116          */
 117         if ($ret{0} == '<') {
 118             $data = sqimap_run_command ($imap_stream, "FETCH $id BODY[$ent_id.MIME]", true, $response, $message, $uid_support);
 119         }
 120     } else if (ereg('"([^"]*)"', $topline, $regs)) {
 121         $ret = $regs[1];
 122     } else {
 123         global $where, $what, $mailbox, $passed_id, $startMessage;
 124         $par = 'mailbox=' . urlencode($mailbox) . '&amp;passed_id=' . $passed_id;
 125         if (isset($where) && isset($what)) {
 126             $par .= '&amp;where=' . urlencode($where) . '&amp;what=' . urlencode($what);
 127         } else {
 128             $par .= '&amp;startMessage=' . $startMessage . '&amp;show_more=0';
 129         }
 130         $par .= '&amp;response=' . urlencode($response) .
 131                 '&amp;message='  . urlencode($message)  .
 132                 '&amp;topline='  . urlencode($topline);
 133
 134         echo   '<tt><br>' .
 135                '<table width="80%"><tr>' .
 136                '<tr><td colspan=2>' .
 137                _("Body retrieval error. The reason for this is most probably that the message is malformed.") .
 138                '</td></tr>' .
 139                '<tr><td><b>' . _("Command:") . "</td><td>$cmd</td></tr>" .
 140                '<tr><td><b>' . _("Response:") . "</td><td>$response</td></tr>" .
 141                '<tr><td><b>' . _("Message:") . "</td><td>$message</td></tr>" .
 142                '<tr><td><b>' . _("FETCH line:") . "</td><td>$topline</td></tr>" .
 143                "</table><BR></tt></font><hr>";
 144
 145         $data = sqimap_run_command ($imap_stream, "FETCH $passed_id BODY[]", true, $response, $message, $uid_support);
 146         array_shift($data);
 147         $wholemessage = implode('', $data);
 148
 149         $ret = $wholemessage;
 150     }
 151     return $ret;
 152 }
 153
 154 function mime_print_body_lines ($imap_stream, $id, $ent_id, $encoding) {
 155     global $uid_support;
 156     /* Do a bit of error correction.  If we couldn't find the entity id, just guess
 157      * that it is the first one.  That is usually the case anyway.
 158      */
 159     if (!$ent_id) {
 160         $ent_id = 1;
 161     }
 162     $sid = sqimap_session_id($uid_support);
 163     /* Don't kill the connection if the browser is over a dialup
 164      * and it would take over 30 seconds to download it.
 165      * Don´t call set_time_limit in safe mode.
 166      */
 167
 168     if (!ini_get('safe_mode')) {
 169         set_time_limit(0);
 170     }
 171     if ($uid_support) {
 172        $sid_s = substr($sid,0,strpos($sid, ' '));
 173     } else {
 174        $sid_s = $sid;
 175     }
 176
 177     $body = mime_fetch_body ($imap_stream, $id, $ent_id);
 178     echo decodeBody($body, $encoding);
 179     return;
 180 /*
 181     fputs ($imap_stream, "$sid FETCH $id BODY[$ent_id]\r\n");
 182     $cnt = 0;
 183     $continue = true;
 184     $read = fgets ($imap_stream,8192);
 185
 186
 187     // This could be bad -- if the section has sqimap_session_id() . ' OK'
 188     // or similar, it will kill the download.
 189     while (!ereg("^".$sid_s." (OK|BAD|NO)(.*)$", $read, $regs)) {
 190         if (trim($read) == ')==') {
 191             $read1 = $read;
 192             $read = fgets ($imap_stream,4096);
 193             if (ereg("^".$sid." (OK|BAD|NO)(.*)$", $read, $regs)) {
 194                 return;
 195             } else {
 196                 echo decodeBody($read1, $encoding) .
 197                      decodeBody($read, $encoding);
 198             }
 199         } else if ($cnt) {
 200             echo decodeBody($read, $encoding);
 201         }
 202         $read = fgets ($imap_stream,4096);
 203         $cnt++;
 204 //      break;
 205     }
 206 */
 207 }
 208
 209 /* -[ END MIME DECODING ]----------------------------------------------------------- */
 210
 211 /* This is here for debugging purposes.  It will print out a list
 212  * of all the entity IDs that are in the $message object.
 213  */
 214 function listEntities ($message) {
 215     if ($message) {
 216         echo "<tt>" . $message->entity_id . ' : ' . $message->type0 . '/' . $message->type1 . ' parent = '. $message->parent->entity_id. '<br>';
 217         for ($i = 0; isset($message->entities[$i]); $i++) {
 218             echo "$i : ";
 219             $msg = listEntities($message->entities[$i]);
 220
 221             if ($msg) {
 222                 echo "return: ";
 223                 return $msg;
 224             }
 225         }
 226     }
 227 }
 228
 229 function getPriorityStr($priority) {
 230     $priority_level = substr($priority,0,1);
 231
 232     switch($priority_level) {
 233         /* Check for a higher then normal priority. */
 234         case '1':
 235         case '2':
 236             $priority_string = _("High");
 237             break;
 238
 239         /* Check for a lower then normal priority. */
 240         case '4':
 241         case '5':
 242             $priority_string = _("Low");
 243             break;
 244
 245         /* Check for a normal priority. */
 246         case '3':
 247         default:
 248             $priority_level = '3';
 249             $priority_string = _("Normal");
 250             break;
 251
 252     }
 253     return $priority_string;
 254 }
 255
 256 /* returns a $message object for a particular entity id */
 257 function getEntity ($message, $ent_id) {
 258     return $message->getEntity($ent_id);
 259 }
 260
 261 /* translateText
 262  * Extracted from strings.php 23/03/2002
 263  */
 264
 265 function translateText(&$body, $wrap_at, $charset) {
 266     global $where, $what;   /* from searching */
 267     global $color;          /* color theme */
 268
 269     require_once(SM_PATH . 'functions/url_parser.php');
 270
 271     $body_ary = explode("\n", $body);
 272     for ($i=0; $i < count($body_ary); $i++) {
 273         $line = $body_ary[$i];
 274         if (strlen($line) - 2 >= $wrap_at) {
 275             sqWordWrap($line, $wrap_at);
 276         }
 277         $line = charset_decode($charset, $line);
 278         $line = str_replace("\t", '        ', $line);
 279
 280         parseUrl ($line);
 281
 282         $quotes = 0;
 283         $pos = 0;
 284         $j = strlen($line);
 285
 286         while ($pos < $j) {
 287             if ($line[$pos] == ' ') {
 288                 $pos++;
 289             } else if (strpos($line, '&gt;', $pos) === $pos) {
 290                 $pos += 4;
 291                 $quotes++;
 292             } else {
 293                 break;
 294             }
 295         }
 296
 297         if ($quotes > 1) {
 298             if (!isset($color[14])) {
 299                 $color[14] = '#FF0000';
 300             }
 301             $line = '<FONT COLOR="' . $color[14] . '">' . $line . '</FONT>';
 302         } elseif ($quotes) {
 303             if (!isset($color[13])) {
 304                 $color[13] = '#800000';
 305             }
 306             $line = '<FONT COLOR="' . $color[13] . '">' . $line . '</FONT>';
 307         }
 308
 309         $body_ary[$i] = $line;
 310     }
 311     $body = '<pre>' . implode("\n", $body_ary) . '</pre>';
 312 }
 313
 314
 315 /* This returns a parsed string called $body. That string can then
 316  * be displayed as the actual message in the HTML. It contains
 317  * everything needed, including HTML Tags, Attachments at the
 318  * bottom, etc.
 319  */
 320 function formatBody($imap_stream, $message, $color, $wrap_at, $ent_num, $id, $mailbox='INBOX') {
 321     /* This if statement checks for the entity to show as the
 322      * primary message. To add more of them, just put them in the
 323      * order that is their priority.
 324      */
 325     global $startMessage, $username, $key, $imapServerAddress, $imapPort,
 326            $show_html_default, $has_unsafe_images, $view_unsafe_images, $sort;
 327
 328     $has_unsafe_images= 0;
 329     $body = '';
 330     $urlmailbox = urlencode($mailbox);
 331     $body_message = getEntity($message, $ent_num);
 332     if (($body_message->header->type0 == 'text') ||
 333         ($body_message->header->type0 == 'rfc822')) {
 334         $body = mime_fetch_body ($imap_stream, $id, $ent_num);
 335         $body = decodeBody($body, $body_message->header->encoding);
 336         $hookResults = do_hook("message_body", $body);
 337         $body = $hookResults[1];
 338
 339         /* If there are other types that shouldn't be formatted, add
 340          * them here.
 341          */
 342
 343         if ($body_message->header->type1 == 'html') {
 344             if ($show_html_default <> 1) {
 345                 $entity_conv = array('&nbsp;' => ' ',
 346                                      '<p>'    => "\n",
 347                                      '<br>'   => "\n",
 348                                      '<P>'    => "\n",
 349                                      '<BR>'   => "\n",
 350                                      '&gt;'   => '>',
 351                                      '&lt;'   => '<');
 352                 $body = strtr($body, $entity_conv);
 353                 $body = strip_tags($body);
 354                 $body = trim($body);
 355                 translateText($body, $wrap_at,
 356                               $body_message->header->getParameter('charset'));
 357             } else {
 358                 $body = magicHTML($body, $id, $message, $mailbox);
 359             }
 360         } else {
 361             translateText($body, $wrap_at,
 362                           $body_message->header->getParameter('charset'));
 363         }
 364
 365         if ($has_unsafe_images) {
 366             if ($view_unsafe_images) {
 367                 $untext = '">' . _("Hide Unsafe Images");
 368             } else {
 369                 $untext = '&amp;view_unsafe_images=1">' . _("View Unsafe Images");
 370             }
 371             $body .= '<center><small><a href="read_body.php?passed_id=' . $id .
 372                      '&amp;passed_ent_id=' . $message->entity_id . '&amp;mailbox=' . $urlmailbox .
 373                      '&amp;sort=' . $sort . '&amp;startMessage=' . $startMessage . '&amp;show_more=0' .
 374                      $untext . '</a></small></center><br>' . "\n";
 375         }
 376     }
 377     return $body;
 378 }
 379
 380
 381 function formatAttachments($message, $exclude_id, $mailbox, $id) {
 382     global $where, $what, $startMessage, $color;
 383     static $ShownHTML = 0;
 384
 385     $att_ar = $message->getAttachments($exclude_id);
 386
 387     if (!count($att_ar)) return '';
 388
 389     $attachments = '';
 390
 391     $urlMailbox = urlencode($mailbox);
 392
 393     foreach ($att_ar as $att) {
 394         $ent = urldecode($att->entity_id);
 395         $header = $att->header;
 396         $type0 = strtolower($header->type0);
 397         $type1 = strtolower($header->type1);
 398         $name = '';
 399         $links['download link']['text'] = _("download");
 400         $links['download link']['href'] =
 401                 "../src/download.php?absolute_dl=true&amp;passed_id=$id&amp;mailbox=$urlMailbox&amp;ent_id=$ent";
 402         $ImageURL = '';
 403         if ($type0 =='message' && $type1 == 'rfc822') {
 404             $default_page = '../src/read_body.php';
 405             $rfc822_header = $att->rfc822_header;
 406             $filename = decodeHeader($rfc822_header->subject);
 407
 408             $from_o = $rfc822_header->from;
 409             if (is_object($from_o)) {
 410                 $from_name = $from_o->getAddress(false);
 411             } else {
 412                 $from_name = _("Unknown sender");
 413             }
 414             $from_name = decodeHeader(htmlspecialchars($from_name));
 415             $description = $from_name;
 416         } else {
 417             $default_page = '../src/download.php';
 418             if (is_object($header->disposition)) {
 419                 $filename = decodeHeader($header->disposition->getProperty('filename'));
 420                 if (trim($filename) == '') {
 421                     $name = decodeHeader($header->disposition->getProperty('name'));
 422                     if (trim($name) == '') {
 423                         if (trim( $header->id ) == '') {
 424                             $filename = 'untitled-[' . $ent . ']' ;
 425                         } else {
 426                             $filename = 'cid: ' . $header->id;
 427                         }
 428                     } else {
 429                         $filename = $name;
 430                     }
 431                 }
 432             } else {
 433                 if (trim( $header->id ) == '') {
 434                     $filename = 'untitled-[' . $ent . ']' ;
 435                 } else {
 436                     $filename = 'cid: ' . $header->id;
 437                 }
 438             }
 439
 440             if ($header->description) {
 441                 $description = htmlspecialchars($header->description);
 442             } else {
 443                 $description = '';
 444             }
 445         }
 446
 447         $display_filename = $filename;
 448         if (isset($passed_ent_id)) {
 449             $passed_ent_id_link = '&amp;passed_ent_id='.$passed_ent_id;
 450         } else {
 451             $passed_ent_id_link = '';
 452         }
 453         $defaultlink = $default_page . "?startMessage=$startMessage"
 454                      . "&amp;passed_id=$id&amp;mailbox=$urlMailbox"
 455                      . '&amp;ent_id='.$ent.$passed_ent_id_link;
 456         if ($where && $what) {
 457            $defaultlink .= '&amp;where='. urlencode($where).'&amp;what='.urlencode($what);
 458         }
 459         /* This executes the attachment hook with a specific MIME-type.
 460          * If that doesn't have results, it tries if there's a rule
 461          * for a more generic type.
 462          */
 463         $hookresults = do_hook("attachment $type0/$type1", $links,
 464                                $startMessage, $id, $urlMailbox, $ent, $defaultlink,
 465                                $display_filename, $where, $what);
 466         if(count($hookresults[1]) <= 1) {
 467             $hookresults = do_hook("attachment $type0/*", $links,
 468                                    $startMessage, $id, $urlMailbox, $ent, $defaultlink,
 469                                    $display_filename, $where, $what);
 470         }
 471
 472         $links = $hookresults[1];
 473         $defaultlink = $hookresults[6];
 474
 475         $attachments .= '<TR><TD>' .
 476                         "<A HREF=\"$defaultlink\">$display_filename</A>&nbsp;</TD>" .
 477                         '<TD><SMALL><b>' . show_readable_size($header->size) .
 478                         '</b>&nbsp;&nbsp;</small></TD>' .
 479                         "<TD><SMALL>[ $type0/$type1 ]&nbsp;</SMALL></TD>" .
 480                         '<TD><SMALL>';
 481         $attachments .= '<b>' . $description . '</b>';
 482         $attachments .= '</SMALL></TD><TD><SMALL>&nbsp;';
 483
 484         $skipspaces = 1;
 485         foreach ($links as $val) {
 486             if ($skipspaces) {
 487                 $skipspaces = 0;
 488             } else {
 489                 $attachments .= '&nbsp;&nbsp;|&nbsp;&nbsp;';
 490             }
 491             $attachments .= '<a href="' . $val['href'] . '">' .  $val['text'] . '</a>';
 492         }
 493         unset($links);
 494         $attachments .= "</TD></TR>\n";
 495     }
 496     return $attachments;
 497 }
 498
 499 /* This function decodes the body depending on the encoding type. */
 500 function decodeBody($body, $encoding) {
 501     global $languages, $squirrelmail_language;
 502     global $show_html_default;
 503
 504     $body = str_replace("\r\n", "\n", $body);
 505     $encoding = strtolower($encoding);
 506
 507     if ($encoding == 'quoted-printable' ||
 508         $encoding == 'quoted_printable') {
 509         $body = quoted_printable_decode($body);
 510
 511         while (ereg("=\n", $body)) {
 512             $body = ereg_replace ("=\n", '', $body);
 513         }
 514
 515     } else if ($encoding == 'base64') {
 516         $body = base64_decode($body);
 517     }
 518
 519     if (isset($languages[$squirrelmail_language]['XTRA_CODE']) &&
 520         function_exists($languages[$squirrelmail_language]['XTRA_CODE'])) {
 521         $body = $languages[$squirrelmail_language]['XTRA_CODE']('decode', $body);
 522     }
 523
 524     // All other encodings are returned raw.
 525     return $body;
 526 }
 527
 528 /*
 529  * This functions decode strings that is encoded according to
 530  * RFC1522 (MIME Part Two: Message Header Extensions for Non-ASCII Text).
 531  * Patched by Christian Schmidt <christian@ostenfeld.dk>  23/03/2002
 532  */
 533 function decodeHeader ($string, $utfencode=true) {
 534     global $languages, $squirrelmail_language;
 535     if (is_array($string)) {
 536         $string = implode("\n", $string);
 537     }
 538
 539     if (isset($languages[$squirrelmail_language]['XTRA_CODE']) &&
 540         function_exists($languages[$squirrelmail_language]['XTRA_CODE'])) {
 541         $string = $languages[$squirrelmail_language]['XTRA_CODE']('decodeheader', $string);
 542     }
 543
 544     $i = 0;
 545     while (preg_match('/^(.{' . $i . '})(.*)=\?([^?]*)\?(Q|B)\?([^?]*)\?=/Ui',
 546                       $string, $res)) {
 547         $prefix = $res[1];
 548         /* Ignore white-space between consecutive encoded-words. */
 549         if (strspn($res[2], " \t") != strlen($res[2])) {
 550             $prefix .= $res[2];
 551         }
 552
 553         if (ucfirst($res[4]) == 'B') {
 554             $replace = base64_decode($res[5]);
 555         } else {
 556             $replace = str_replace('_', ' ', $res[5]);
 557             $replace = preg_replace('/=([0-9a-f]{2})/ie', 'chr(hexdec("\1"))',
 558                                     $replace);
 559             /* Only encode into entities by default. Some places
 560              * don't need the encoding, like the compose form.
 561              */
 562             if ($utfencode) {
 563                 $replace = charset_decode($res[3], $replace);
 564             }
 565         }
 566         $string = $prefix . $replace . substr($string, strlen($res[0]));
 567         $i = strlen($prefix) + strlen($replace);
 568     }
 569     return $string;
 570 }
 571
 572 /*
 573  * Encode a string according to RFC 1522 for use in headers if it
 574  * contains 8-bit characters or anything that looks like it should
 575  * be encoded.
 576  */
 577 function encodeHeader ($string) {
 578     global $default_charset, $languages, $squirrelmail_language;
 579
 580     if (isset($languages[$squirrelmail_language]['XTRA_CODE']) &&
 581         function_exists($languages[$squirrelmail_language]['XTRA_CODE'])) {
 582         return  $languages[$squirrelmail_language]['XTRA_CODE']('encodeheader', $string);
 583     }
 584
 585     // Encode only if the string contains 8-bit characters or =?
 586     $j = strlen($string);
 587     $l = strstr($string, '=?');         // Must be encoded ?
 588     $ret = '';
 589     for($i = 0; $i < $j; ++$i) {
 590         switch($string{$i}) {
 591             case '=':
 592                 $ret .= '=3D';
 593                 break;
 594             case '?':
 595                 $ret .= '=3F';
 596                 break;
 597             case '_':
 598                 $ret .= '=5F';
 599                 break;
 600             case ' ':
 601                 $ret .= '_';
 602                 break;
 603             default:
 604                 $k = ord($string{$i});
 605                 if ($k > 126) {
 606                     $ret .= sprintf("=%02X", $k);
 607                     $l = TRUE;
 608                 } else {
 609                     $ret .= $string{$i};
 610                 }
 611                 break;
 612         }
 613     }
 614
 615     if ($l) {
 616         $string = "=?$default_charset?Q?$ret?=";
 617     }
 618
 619     return $string;
 620 }
 621
 622 /* This function trys to locate the entity_id of a specific mime element */
 623 function find_ent_id($id, $message) {
 624     for ($i = 0, $ret = ''; $ret == '' && $i < count($message->entities); $i++) {
 625         if ($message->entities[$i]->header->type0 == 'multipart')  {
 626             $ret = find_ent_id($id, $message->entities[$i]);
 627         } else {
 628             if (strcasecmp($message->entities[$i]->header->id, $id) == 0) {
 629                 if (sq_check_save_extension($message->entities[$i])) {
 630                     return $message->entities[$i]->entity_id;
 631                 }
 632             }
 633         }
 634     }
 635     return $ret;
 636 }
 637
 638 function sq_check_save_extension($message) {
 639     $filename = $message->getFilename();
 640     $ext = substr($filename, strrpos($filename,'.')+1);
 641     $save_extensions = array('jpg','jpeg','gif','png','bmp');
 642     return in_array($ext, $save_extensions);
 643 }
 644
 645
 646 /**
 647  ** HTMLFILTER ROUTINES
 648  */
 649
 650 /**
 651  * This function returns the final tag out of the tag name, an array
 652  * of attributes, and the type of the tag. This function is called by
 653  * sq_sanitize internally.
 654  *
 655  * @param  $tagname  the name of the tag.
 656  * @param  $attary   the array of attributes and their values
 657  * @param  $tagtype  The type of the tag (see in comments).
 658  * @return           a string with the final tag representation.
 659  */
 660 function sq_tagprint($tagname, $attary, $tagtype){
 661     $me = 'sq_tagprint';
 662
 663     if ($tagtype == 2){
 664         $fulltag = '</' . $tagname . '>';
 665     } else {
 666         $fulltag = '<' . $tagname;
 667         if (is_array($attary) && sizeof($attary)){
 668             $atts = Array();
 669             while (list($attname, $attvalue) = each($attary)){
 670                 array_push($atts, "$attname=$attvalue");
 671             }
 672             $fulltag .= ' ' . join(" ", $atts);
 673         }
 674         if ($tagtype == 3){
 675             $fulltag .= ' /';
 676         }
 677         $fulltag .= '>';
 678     }
 679     return $fulltag;
 680 }
 681
 682 /**
 683  * A small helper function to use with array_walk. Modifies a by-ref
 684  * value and makes it lowercase.
 685  *
 686  * @param  $val a value passed by-ref.
 687  * @return      void since it modifies a by-ref value.
 688  */
 689 function sq_casenormalize(&$val){
 690     $val = strtolower($val);
 691 }
 692
 693 /**
 694  * This function skips any whitespace from the current position within
 695  * a string and to the next non-whitespace value.
 696  *
 697  * @param  $body   the string
 698  * @param  $offset the offset within the string where we should start
 699  *                 looking for the next non-whitespace character.
 700  * @return         the location within the $body where the next
 701  *                 non-whitespace char is located.
 702  */
 703 function sq_skipspace($body, $offset){
 704     $me = 'sq_skipspace';
 705     preg_match('/^(\s*)/s', substr($body, $offset), $matches);
 706     if (sizeof($matches{1})){
 707         $count = strlen($matches{1});
 708         $offset += $count;
 709     }
 710     return $offset;
 711 }
 712
 713 /**
 714  * This function looks for the next character within a string.  It's
 715  * really just a glorified "strpos", except it catches if failures
 716  * nicely.
 717  *
 718  * @param  $body   The string to look for needle in.
 719  * @param  $offset Start looking from this position.
 720  * @param  $needle The character/string to look for.
 721  * @return         location of the next occurance of the needle, or
 722  *                 strlen($body) if needle wasn't found.
 723  */
 724 function sq_findnxstr($body, $offset, $needle){
 725     $me  = 'sq_findnxstr';
 726     $pos = strpos($body, $needle, $offset);
 727     if ($pos === FALSE){
 728         $pos = strlen($body);
 729     }
 730     return $pos;
 731 }
 732
 733 /**
 734  * This function takes a PCRE-style regexp and tries to match it
 735  * within the string.
 736  *
 737  * @param  $body   The string to look for needle in.
 738  * @param  $offset Start looking from here.
 739  * @param  $reg    A PCRE-style regex to match.
 740  * @return         Returns a false if no matches found, or an array
 741  *                 with the following members:
 742  *                 - integer with the location of the match within $body
 743  *                 - string with whatever content between offset and the match
 744  *                 - string with whatever it is we matched
 745  */
 746 function sq_findnxreg($body, $offset, $reg){
 747     $me = 'sq_findnxreg';
 748     $matches = Array();
 749     $retarr = Array();
 750     preg_match("%^(.*?)($reg)%s", substr($body, $offset), $matches);
 751     if (!$matches{0}){
 752         $retarr = false;
 753     } else {
 754         $retarr{0} = $offset + strlen($matches{1});
 755         $retarr{1} = $matches{1};
 756         $retarr{2} = $matches{2};
 757     }
 758     return $retarr;
 759 }
 760
 761 /**
 762  * This function looks for the next tag.
 763  *
 764  * @param  $body   String where to look for the next tag.
 765  * @param  $offset Start looking from here.
 766  * @return         false if no more tags exist in the body, or
 767  *                 an array with the following members:
 768  *                 - string with the name of the tag
 769  *                 - array with attributes and their values
 770  *                 - integer with tag type (1, 2, or 3)
 771  *                 - integer where the tag starts (starting "<")
 772  *                 - integer where the tag ends (ending ">")
 773  *                 first three members will be false, if the tag is invalid.
 774  */
 775 function sq_getnxtag($body, $offset){
 776     $me = 'sq_getnxtag';
 777     if ($offset > strlen($body)){
 778         return false;
 779     }
 780     $lt = sq_findnxstr($body, $offset, "<");
 781     if ($lt == strlen($body)){
 782         return false;
 783     }
 784     /**
 785      * We are here:
 786      * blah blah <tag attribute="value">
 787      * \---------^
 788      */
 789     $pos = sq_skipspace($body, $lt+1);
 790     if ($pos >= strlen($body)){
 791         return Array(false, false, false, $lt, strlen($body));
 792     }
 793     /**
 794      * There are 3 kinds of tags:
 795      * 1. Opening tag, e.g.:
 796      *    <a href="blah">
 797      * 2. Closing tag, e.g.:
 798      *    </a>
 799      * 3. XHTML-style content-less tag, e.g.:
 800      *    <img src="blah"/>
 801      */
 802     $tagtype = false;
 803     switch (substr($body, $pos, 1)){
 804         case '/':
 805             $tagtype = 2;
 806             $pos++;
 807             break;
 808         case '!':
 809             /**
 810              * A comment or an SGML declaration.
 811              */
 812             if (substr($body, $pos+1, 2) == "--"){
 813                 $gt = strpos($body, "-->", $pos);
 814                 if ($gt === false){
 815                     $gt = strlen($body);
 816                 } else {
 817                     $gt += 2;
 818                 }
 819                 return Array(false, false, false, $lt, $gt);
 820             } else {
 821                 $gt = sq_findnxstr($body, $pos, ">");
 822                 return Array(false, false, false, $lt, $gt);
 823             }
 824             break;
 825         default:
 826             /**
 827              * Assume tagtype 1 for now. If it's type 3, we'll switch values
 828              * later.
 829              */
 830             $tagtype = 1;
 831             break;
 832     }
 833
 834     $tag_start = $pos;
 835     $tagname = '';
 836     /**
 837      * Look for next [\W-_], which will indicate the end of the tag name.
 838      */
 839     $regary = sq_findnxreg($body, $pos, "[^\w\-_]");
 840     if ($regary == false){
 841         return Array(false, false, false, $lt, strlen($body));
 842     }
 843     list($pos, $tagname, $match) = $regary;
 844     $tagname = strtolower($tagname);
 845
 846     /**
 847      * $match can be either of these:
 848      * '>'  indicating the end of the tag entirely.
 849      * '\s' indicating the end of the tag name.
 850      * '/'  indicating that this is type-3 xhtml tag.
 851      *
 852      * Whatever else we find there indicates an invalid tag.
 853      */
 854     switch ($match){
 855         case '/':
 856             /**
 857              * This is an xhtml-style tag with a closing / at the
 858              * end, like so: <img src="blah"/>. Check if it's followed
 859              * by the closing bracket. If not, then this tag is invalid
 860              */
 861             if (substr($body, $pos, 2) == "/>"){
 862                 $pos++;
 863                 $tagtype = 3;
 864             } else {
 865                 $gt = sq_findnxstr($body, $pos, ">");
 866                 $retary = Array(false, false, false, $lt, $gt);
 867                 return $retary;
 868             }
 869         case '>':
 870             return Array($tagname, false, $tagtype, $lt, $pos);
 871             break;
 872         default:
 873             /**
 874              * Check if it's whitespace
 875              */
 876             if (!preg_match('/\s/', $match)){
 877                 /**
 878                  * This is an invalid tag! Look for the next closing ">".
 879                  */
 880                 $gt = sq_findnxstr($body, $offset, ">");
 881                 return Array(false, false, false, $lt, $gt);
 882             }
 883             break;
 884     }
 885
 886     /**
 887      * At this point we're here:
 888      * <tagname  attribute='blah'>
 889      * \-------^
 890      *
 891      * At this point we loop in order to find all attributes.
 892      */
 893     $attname = '';
 894     $atttype = false;
 895     $attary = Array();
 896
 897     while ($pos <= strlen($body)){
 898         $pos = sq_skipspace($body, $pos);
 899         if ($pos == strlen($body)){
 900             /**
 901              * Non-closed tag.
 902              */
 903             return Array(false, false, false, $lt, $pos);
 904         }
 905         /**
 906          * See if we arrived at a ">" or "/>", which means that we reached
 907          * the end of the tag.
 908          */
 909         $matches = Array();
 910         if (preg_match("%^(\s*)(>|/>)%s", substr($body, $pos), $matches)) {
 911             /**
 912              * Yep. So we did.
 913              */
 914             $pos += strlen($matches{1});
 915             if ($matches{2} == "/>"){
 916                 $tagtype = 3;
 917                 $pos++;
 918             }
 919             return Array($tagname, $attary, $tagtype, $lt, $pos);
 920         }
 921
 922         /**
 923          * There are several types of attributes, with optional
 924          * [:space:] between members.
 925          * Type 1:
 926          *   attrname[:space:]=[:space:]'CDATA'
 927          * Type 2:
 928          *   attrname[:space:]=[:space:]"CDATA"
 929          * Type 3:
 930          *   attr[:space:]=[:space:]CDATA
 931          * Type 4:
 932          *   attrname
 933          *
 934          * We leave types 1 and 2 the same, type 3 we check for
 935          * '"' and convert to "&quot" if needed, then wrap in
 936          * double quotes. Type 4 we convert into:
 937          * attrname="yes".
 938          */
 939         $regary = sq_findnxreg($body, $pos, "[^\w\-_]");
 940         if ($regary == false){
 941             /**
 942              * Looks like body ended before the end of tag.
 943              */
 944             return Array(false, false, false, $lt, strlen($body));
 945         }
 946         list($pos, $attname, $match) = $regary;
 947         $attname = strtolower($attname);
 948         /**
 949          * We arrived at the end of attribute name. Several things possible
 950          * here:
 951          * '>'  means the end of the tag and this is attribute type 4
 952          * '/'  if followed by '>' means the same thing as above
 953          * '\s' means a lot of things -- look what it's followed by.
 954          *      anything else means the attribute is invalid.
 955          */
 956         switch($match){
 957             case '/':
 958                 /**
 959                  * This is an xhtml-style tag with a closing / at the
 960                  * end, like so: <img src="blah"/>. Check if it's followed
 961                  * by the closing bracket. If not, then this tag is invalid
 962                  */
 963                 if (substr($body, $pos, 2) == "/>"){
 964                     $pos++;
 965                     $tagtype = 3;
 966                 } else {
 967                     $gt = sq_findnxstr($body, $pos, ">");
 968                     $retary = Array(false, false, false, $lt, $gt);
 969                     return $retary;
 970                 }
 971             case '>':
 972                 $attary{$attname} = '"yes"';
 973                 return Array($tagname, $attary, $tagtype, $lt, $pos);
 974                 break;
 975             default:
 976                 /**
 977                  * Skip whitespace and see what we arrive at.
 978                  */
 979                 $pos = sq_skipspace($body, $pos);
 980                 $char = substr($body, $pos, 1);
 981                 /**
 982                  * Two things are valid here:
 983                  * '=' means this is attribute type 1 2 or 3.
 984                  * \w means this was attribute type 4.
 985                  * anything else we ignore and re-loop. End of tag and
 986                  * invalid stuff will be caught by our checks at the beginning
 987                  * of the loop.
 988                  */
 989                 if ($char == "="){
 990                     $pos++;
 991                     $pos = sq_skipspace($body, $pos);
 992                     /**
 993                      * Here are 3 possibilities:
 994                      * "'"  attribute type 1
 995                      * '"'  attribute type 2
 996                      * everything else is the content of tag type 3
 997                      */
 998                     $quot = substr($body, $pos, 1);
 999                     if ($quot == "'"){
1000                         $regary = sq_findnxreg($body, $pos+1, "\'");
1001                         if ($regary == false){
1002                             return Array(false, false, false, $lt, strlen($body));
1003                         }
1004                         list($pos, $attval, $match) = $regary;
1005                         $pos++;
1006                         $attary{$attname} = "'" . $attval . "'";
1007                     } else if ($quot == '"'){
1008                         $regary = sq_findnxreg($body, $pos+1, '\"');
1009                         if ($regary == false){
1010                             return Array(false, false, false, $lt, strlen($body));
1011                         }
1012                         list($pos, $attval, $match) = $regary;
1013                         $pos++;
1014                         $attary{$attname} = '"' . $attval . '"';
1015                     } else {
1016                         /**
1017                          * These are hateful. Look for \s, or >.
1018                          */
1019                         $regary = sq_findnxreg($body, $pos, "[\s>]");
1020                         if ($regary == false){
1021                             return Array(false, false, false, $lt, strlen($body));
1022                         }
1023                         list($pos, $attval, $match) = $regary;
1024                         /**
1025                          * If it's ">" it will be caught at the top.
1026                          */
1027                         $attval = preg_replace("/\"/s", "&quot;", $attval);
1028                         $attary{$attname} = '"' . $attval . '"';
1029                     }
1030                 } else if (preg_match("|[\w/>]|", $char)) {
1031                     /**
1032                      * That was attribute type 4.
1033                      */
1034                     $attary{$attname} = '"yes"';
1035                 } else {
1036                     /**
1037                      * An illegal character. Find next '>' and return.
1038                      */
1039                     $gt = sq_findnxstr($body, $pos, ">");
1040                     return Array(false, false, false, $lt, $gt);
1041                 }
1042                 break;
1043         }
1044     }
1045     /**
1046      * The fact that we got here indicates that the tag end was never
1047      * found. Return invalid tag indication so it gets stripped.
1048      */
1049     return Array(false, false, false, $lt, strlen($body));
1050 }
1051
1052 /**
1053  * This function checks attribute values for entity-encoded values
1054  * and returns them translated into 8-bit strings so we can run
1055  * checks on them.
1056  *
1057  * @param  $attvalue A string to run entity check against.
1058  * @return           Translated value.
1059  */
1060 function sq_deent($attvalue){
1061     $me = 'sq_deent';
1062     /**
1063      * See if we have to run the checks first. All entities must start
1064      * with "&".
1065      */
1066     if (strpos($attvalue, "&") === false){
1067         return $attvalue;
1068     }
1069     /**
1070      * Check named entities first.
1071      */
1072     $trans = get_html_translation_table(HTML_ENTITIES);
1073     /**
1074      * Leave &quot; in, as it can mess us up.
1075      */
1076     $trans = array_flip($trans);
1077     unset($trans{"&quot;"});
1078     while (list($ent, $val) = each($trans)){
1079         $attvalue = preg_replace("/$ent*(\W)/si", "$val\\1", $attvalue);
1080     }
1081     /**
1082      * Now translate numbered entities from 1 to 255 if needed.
1083      */
1084     if (strpos($attvalue, "#") !== false){
1085         $omit = Array(34, 39);
1086         for ($asc=1; $asc<256; $asc++){
1087             if (!in_array($asc, $omit)){
1088                 $chr = chr($asc);
1089                 $attvalue = preg_replace("/\&#0*$asc;*(\D)/si", "$chr\\1",
1090                                          $attvalue);
1091                 $attvalue = preg_replace("/\&#x0*".dechex($asc).";*(\W)/si",
1092                                          "$chr\\1", $attvalue);
1093             }
1094         }
1095     }
1096     return $attvalue;
1097 }
1098
1099 /**
1100  * This function runs various checks against the attributes.
1101  *
1102  * @param  $tagname         String with the name of the tag.
1103  * @param  $attary          Array with all tag attributes.
1104  * @param  $rm_attnames     See description for sq_sanitize
1105  * @param  $bad_attvals     See description for sq_sanitize
1106  * @param  $add_attr_to_tag See description for sq_sanitize
1107  * @param  $message         message object
1108  * @param  $id              message id
1109  * @return                  Array with modified attributes.
1110  */
1111 function sq_fixatts($tagname,
1112                     $attary,
1113                     $rm_attnames,
1114                     $bad_attvals,
1115                     $add_attr_to_tag,
1116                     $message,
1117                     $id,
1118                     $mailbox
1119                     ){
1120     $me = 'sq_fixatts';
1121     while (list($attname, $attvalue) = each($attary)){
1122         /**
1123          * See if this attribute should be removed.
1124          */
1125         foreach ($rm_attnames as $matchtag=>$matchattrs){
1126             if (preg_match($matchtag, $tagname)){
1127                 foreach ($matchattrs as $matchattr){
1128                     if (preg_match($matchattr, $attname)){
1129                         unset($attary{$attname});
1130                         continue;
1131                     }
1132                 }
1133             }
1134         }
1135         /**
1136          * Remove any entities.
1137          */
1138         $attvalue = sq_deent($attvalue);
1139
1140         /**
1141          * Now let's run checks on the attvalues.
1142          * I don't expect anyone to comprehend this. If you do,
1143          * get in touch with me so I can drive to where you live and
1144          * shake your hand personally. :)
1145          */
1146         foreach ($bad_attvals as $matchtag=>$matchattrs){
1147             if (preg_match($matchtag, $tagname)){
1148                 foreach ($matchattrs as $matchattr=>$valary){
1149                     if (preg_match($matchattr, $attname)){
1150                         /**
1151                          * There are two arrays in valary.
1152                          * First is matches.
1153                          * Second one is replacements
1154                          */
1155                         list($valmatch, $valrepl) = $valary;
1156                         $newvalue =
1157                             preg_replace($valmatch, $valrepl, $attvalue);
1158                         if ($newvalue != $attvalue){
1159                             $attary{$attname} = $newvalue;
1160                         }
1161                     }
1162                 }
1163             }
1164         }
1165         /**
1166          * Turn cid: urls into http-friendly ones.
1167          */
1168         if (preg_match("/^[\'\"]\s*cid:/si", $attvalue)){
1169             $attary{$attname} = sq_cid2http($message, $id, $attvalue, $mailbox);
1170         }
1171     }
1172     /**
1173      * See if we need to append any attributes to this tag.
1174      */
1175     foreach ($add_attr_to_tag as $matchtag=>$addattary){
1176         if (preg_match($matchtag, $tagname)){
1177             $attary = array_merge($attary, $addattary);
1178         }
1179     }
1180     return $attary;
1181 }
1182
1183 /**
1184  * This function edits the style definition to make them friendly and
1185  * usable in squirrelmail.
1186  *
1187  * @param  $message  the message object
1188  * @param  $id       the message id
1189  * @param  $content  a string with whatever is between <style> and </style>
1190  * @return           a string with edited content.
1191  */
1192 function sq_fixstyle($message, $id, $content){
1193     global $view_unsafe_images;
1194     $me = 'sq_fixstyle';
1195     /**
1196      * First look for general BODY style declaration, which would be
1197      * like so:
1198      * body {background: blah-blah}
1199      * and change it to .bodyclass so we can just assign it to a <div>
1200      */
1201     $content = preg_replace("|body(\s*\{.*?\})|si", ".bodyclass\\1", $content);
1202     $secremoveimg = '../images/' . _("sec_remove_eng.png");
1203     /**
1204      * Fix url('blah') declarations.
1205      */
1206     $content = preg_replace("|url\(([\'\"])\s*\S+script\s*:.*?([\'\"])\)|si",
1207                             "url(\\1$secremoveimg\\2)", $content);
1208     /**
1209      * Fix url('https*://.*) declarations but only if $view_unsafe_images
1210      * is false.
1211      */
1212     if (!$view_unsafe_images){
1213         $content = preg_replace("|url\(([\'\"])\s*https*:.*?([\'\"])\)|si",
1214                                 "url(\\1$secremoveimg\\2)", $content);
1215     }
1216
1217     /**
1218      * Fix urls that refer to cid:
1219      */
1220     while (preg_match("|url\(([\'\"]\s*cid:.*?[\'\"])\)|si", $content,
1221                       $matches)){
1222         $cidurl = $matches{1};
1223         $httpurl = sq_cid2http($message, $id, $cidurl);
1224         $content = preg_replace("|url\($cidurl\)|si",
1225                                 "url($httpurl)", $content);
1226     }
1227
1228     /**
1229      * Fix stupid css declarations which lead to vulnerabilities
1230      * in IE.
1231      */
1232     $match   = Array('/expression/si',
1233                      '/behaviou*r/si',
1234                      '/binding/si');
1235     $replace = Array('idiocy', 'idiocy', 'idiocy');
1236     $content = preg_replace($match, $replace, $content);
1237     return $content;
1238 }
1239
1240 /**
1241  * This function converts cid: url's into the ones that can be viewed in
1242  * the browser.
1243  *
1244  * @param  $message  the message object
1245  * @param  $id       the message id
1246  * @param  $cidurl   the cid: url.
1247  * @return           a string with a http-friendly url
1248  */
1249 function sq_cid2http($message, $id, $cidurl, $mailbox){
1250     /**
1251      * Get rid of quotes.
1252      */
1253     $quotchar = substr($cidurl, 0, 1);
1254     $cidurl = str_replace($quotchar, "", $cidurl);
1255     $cidurl = substr(trim($cidurl), 4);
1256     $linkurl = find_ent_id($cidurl, $message);
1257     /* in case of non-save cid links $httpurl should be replaced by a sort of
1258        unsave link image */
1259     $httpurl = '';
1260     if ($linkurl) {
1261         $httpurl = $quotchar . '../src/download.php?absolute_dl=true&amp;' .
1262                    "passed_id=$id&amp;mailbox=" . urlencode($mailbox) .
1263                    '&amp;ent_id=' . $linkurl . $quotchar;
1264     }
1265     return $httpurl;
1266 }
1267
1268 /**
1269  * This function changes the <body> tag into a <div> tag since we
1270  * can't really have a body-within-body.
1271  *
1272  * @param  $attary  an array of attributes and values of <body>
1273  * @return          a modified array of attributes to be set for <div>
1274  */
1275 function sq_body2div($attary){
1276     $me = 'sq_body2div';
1277     $divattary = Array('class' => "'bodyclass'");
1278     $bgcolor = '#ffffff';
1279     $text = '#000000';
1280     $styledef = '';
1281     if (is_array($attary) && sizeof($attary) > 0){
1282         foreach ($attary as $attname=>$attvalue){
1283             $quotchar = substr($attvalue, 0, 1);
1284             $attvalue = str_replace($quotchar, "", $attvalue);
1285             switch ($attname){
1286                 case 'background':
1287                     $styledef .= "background-image: url('$attvalue'); ";
1288                     break;
1289                 case 'bgcolor':
1290                     $styledef .= "background-color: $attvalue; ";
1291                     break;
1292                 case 'text':
1293                     $styledef .= "color: $attvalue; ";
1294                     break;
1295             }
1296         }
1297         if (strlen($styledef) > 0){
1298             $divattary{"style"} = "\"$styledef\"";
1299         }
1300     }
1301     return $divattary;
1302 }
1303
1304 /**
1305  * This is the main function and the one you should actually be calling.
1306  * There are several variables you should be aware of an which need
1307  * special description.
1308  *
1309  * Since the description is quite lengthy, see it here:
1310  * http://www.mricon.com/html/phpfilter.html
1311  *
1312  * @param $body                 the string with HTML you wish to filter
1313  * @param $tag_list             see description above
1314  * @param $rm_tags_with_content see description above
1315  * @param $self_closing_tags    see description above
1316  * @param $force_tag_closing    see description above
1317  * @param $rm_attnames          see description above
1318  * @param $bad_attvals          see description above
1319  * @param $add_attr_to_tag      see description above
1320  * @param $message              message object
1321  * @param $id                   message id
1322  * @return                      sanitized html safe to show on your pages.
1323  */
1324 function sq_sanitize($body,
1325                      $tag_list,
1326                      $rm_tags_with_content,
1327                      $self_closing_tags,
1328                      $force_tag_closing,
1329                      $rm_attnames,
1330                      $bad_attvals,
1331                      $add_attr_to_tag,
1332                      $message,
1333                      $id,
1334                      $mailbox
1335                      ){
1336     $me = 'sq_sanitize';
1337     /**
1338      * Normalize rm_tags and rm_tags_with_content.
1339      */
1340     @array_walk($rm_tags, 'sq_casenormalize');
1341     @array_walk($rm_tags_with_content, 'sq_casenormalize');
1342     @array_walk($self_closing_tags, 'sq_casenormalize');
1343     /**
1344      * See if tag_list is of tags to remove or tags to allow.
1345      * false  means remove these tags
1346      * true   means allow these tags
1347      */
1348     $rm_tags = array_shift($tag_list);
1349     $curpos = 0;
1350     $open_tags = Array();
1351     $trusted = "<!-- begin sanitized html -->\n";
1352     $skip_content = false;
1353     /**
1354      * Take care of netscape's stupid javascript entities like
1355      * &{alert('boo')};
1356      */
1357     $body = preg_replace("/&(\{.*?\};)/si", "&amp;\\1", $body);
1358
1359     while (($curtag=sq_getnxtag($body, $curpos)) != FALSE){
1360         list($tagname, $attary, $tagtype, $lt, $gt) = $curtag;
1361         $free_content = substr($body, $curpos, $lt-$curpos);
1362         /**
1363          * Take care of <style>
1364          */
1365         if ($tagname == "style" && $tagtype == 2){
1366             /**
1367              * This is a closing </style>. Edit the
1368              * content before we apply it.
1369              */
1370             $free_content = sq_fixstyle($message, $id, $free_content);
1371         }
1372         if ($skip_content == false){
1373             $trusted .= $free_content;
1374         }
1375         if ($tagname != FALSE){
1376             if ($tagtype == 2){
1377                 if ($skip_content == $tagname){
1378                     /**
1379                      * Got to the end of tag we needed to remove.
1380                      */
1381                     $tagname = false;
1382                     $skip_content = false;
1383                 } else {
1384                     if ($skip_content == false){
1385                         if ($tagname == "body"){
1386                             $tagname = "div";
1387                         } else {
1388                             if (isset($open_tags{$tagname}) &&
1389                                 $open_tags{$tagname} > 0){
1390                                 $open_tags{$tagname}--;
1391                             } else {
1392                                 $tagname = false;
1393                             }
1394                         }
1395                     }
1396                 }
1397             } else {
1398                 /**
1399                  * $rm_tags_with_content
1400                  */
1401                 if ($skip_content == false){
1402                     /**
1403                      * See if this is a self-closing type and change
1404                      * tagtype appropriately.
1405                      */
1406                     if ($tagtype == 1
1407                         && in_array($tagname, $self_closing_tags)){
1408                         $tagtype=3;
1409                     }
1410                     /**
1411                      * See if we should skip this tag and any content
1412                      * inside it.
1413                      */
1414                     if ($tagtype == 1 &&
1415                         in_array($tagname, $rm_tags_with_content)){
1416                         $skip_content = $tagname;
1417                     } else {
1418                         if (($rm_tags == false
1419                              && in_array($tagname, $tag_list)) ||
1420                             ($rm_tags == true &&
1421                              !in_array($tagname, $tag_list))){
1422                             $tagname = false;
1423                         } else {
1424                             if ($tagtype == 1){
1425                                 if (isset($open_tags{$tagname})){
1426                                     $open_tags{$tagname}++;
1427                                 } else {
1428                                     $open_tags{$tagname}=1;
1429                                 }
1430                             }
1431                             /**
1432                              * This is where we run other checks.
1433                              */
1434                             if (is_array($attary) && sizeof($attary) > 0){
1435                                 $attary = sq_fixatts($tagname,
1436                                                      $attary,
1437                                                      $rm_attnames,
1438                                                      $bad_attvals,
1439                                                      $add_attr_to_tag,
1440                                                      $message,
1441                                                      $id,
1442                                                      $mailbox
1443                                                      );
1444                             }
1445                             /**
1446                              * Convert body into div.
1447                              */
1448                             if ($tagname == "body"){
1449                                 $tagname = "div";
1450                                 $attary = sq_body2div($attary, $message, $id);
1451                             }
1452                         }
1453                     }
1454                 }
1455             }
1456             if ($tagname != false && $skip_content == false){
1457                 $trusted .= sq_tagprint($tagname, $attary, $tagtype);
1458             }
1459         }
1460         $curpos = $gt+1;
1461     }
1462     $trusted .= substr($body, $curpos, strlen($body)-$curpos);
1463     if ($force_tag_closing == true){
1464         foreach ($open_tags as $tagname=>$opentimes){
1465             while ($opentimes > 0){
1466                 $trusted .= '</' . $tagname . '>';
1467                 $opentimes--;
1468             }
1469         }
1470         $trusted .= "\n";
1471     }
1472     $trusted .= "<!-- end sanitized html -->\n";
1473     return $trusted;
1474 }
1475
1476 /**
1477  * This is a wrapper function to call html sanitizing routines.
1478  *
1479  * @param  $body  the body of the message
1480  * @param  $id    the id of the message
1481  * @return        a string with html safe to display in the browser.
1482  */
1483 function magicHTML($body, $id, $message, $mailbox = 'INBOX'){
1484     global $attachment_common_show_images, $view_unsafe_images,
1485            $has_unsafe_images;
1486     /**
1487      * Don't display attached images in HTML mode.
1488      */
1489     $attachment_common_show_images = false;
1490     $tag_list = Array(
1491                       false,
1492                       "object",
1493                       "meta",
1494                       "html",
1495                       "head",
1496                       "base",
1497                       "link",
1498                       "frame",
1499                       "iframe"
1500                       );
1501
1502     $rm_tags_with_content = Array(
1503                                   "script",
1504                                   "applet",
1505                                   "embed",
1506                                   "title"
1507                                   );
1508
1509     $self_closing_tags =  Array(
1510                                 "img",
1511                                 "br",
1512                                 "hr",
1513                                 "input"
1514                                 );
1515
1516     $force_tag_closing = false;
1517
1518     $rm_attnames = Array(
1519                          "/.*/" =>
1520                          Array(
1521                                "/target/si",
1522                                "/^on.*/si",
1523                                "/^dynsrc/si",
1524                                "/^data.*/si"
1525                                )
1526                          );
1527
1528     $secremoveimg = "../images/" . _("sec_remove_eng.png");
1529     $bad_attvals = Array(
1530         "/.*/" =>
1531             Array(
1532                 "/^src|background/i" =>
1533                     Array(
1534                           Array(
1535                                 "|^([\'\"])\s*\.\./.*([\'\"])|si",
1536                                 "/^([\'\"])\s*\S+script\s*:.*([\'\"])/si",
1537                                 "/^([\'\"])\s*mocha\s*:*.*([\'\"])/si",
1538                                 "/^([\'\"])\s*about\s*:.*([\'\"])/si"
1539                                 ),
1540                           Array(
1541                                 "\\1$secremoveimg\\2",
1542                                 "\\1$secremoveimg\\2",
1543                                 "\\1$secremoveimg\\2",
1544                                 "\\1$secremoveimg\\2"
1545                                 )
1546                         ),
1547                 "/^href|action/i" =>
1548                     Array(
1549                           Array(
1550                                 "|^([\'\"])\s*\.\./.*([\'\"])|si",
1551                                 "/^([\'\"])\s*\S+script\s*:.*([\'\"])/si",
1552                                 "/^([\'\"])\s*mocha\s*:*.*([\'\"])/si",
1553                                 "/^([\'\"])\s*about\s*:.*([\'\"])/si"
1554                                 ),
1555                           Array(
1556                                 "\\1#\\2",
1557                                 "\\1#\\2",
1558                                 "\\1#\\2",
1559                                 "\\1#\\2"
1560                                 )
1561                         ),
1562                 "/^style/si" =>
1563                     Array(
1564                           Array(
1565                                 "/expression/si",
1566                                 "/binding/si",
1567                                 "/behaviou*r/si",
1568                                 "|url\(([\'\"])\s*\.\./.*([\'\"])\)|si",
1569                                 "/url\(([\'\"])\s*\S+script\s*:.*([\'\"])\)/si",
1570                                 "/url\(([\'\"])\s*mocha\s*:.*([\'\"])\)/si",
1571                                 "/url\(([\'\"])\s*about\s*:.*([\'\"])\)/si"
1572                                ),
1573                           Array(
1574                                 "idiocy",
1575                                 "idiocy",
1576                                 "idiocy",
1577                                 "url(\\1#\\2)",
1578                                 "url(\\1#\\2)",
1579                                 "url(\\1#\\2)",
1580                                 "url(\\1#\\2)"
1581                                )
1582                           )
1583                 )
1584         );
1585     if (!$view_unsafe_images){
1586         /**
1587          * Remove any references to http/https if view_unsafe_images set
1588          * to false.
1589          */
1590          array_push($bad_attvals{'/.*/'}{'/^src|background/i'}[0],
1591                     '/^([\'\"])\s*https*:.*([\'\"])/si');
1592          array_push($bad_attvals{'/.*/'}{'/^src|background/i'}[1],
1593                     "\\1$secremoveimg\\2");
1594          array_push($bad_attvals{'/.*/'}{'/^style/si'}[0],
1595                     '/url\(([\'\"])\s*https*:.*([\'\"])\)/si');
1596          array_push($bad_attvals{'/.*/'}{'/^style/si'}[1],
1597                     "url(\\1$secremoveimg\\2)");
1598     }
1599
1600     $add_attr_to_tag = Array(
1601                              "/^a$/si" => Array('target'=>'"_new"')
1602                              );
1603     $trusted = sq_sanitize($body,
1604                            $tag_list,
1605                            $rm_tags_with_content,
1606                            $self_closing_tags,
1607                            $force_tag_closing,
1608                            $rm_attnames,
1609                            $bad_attvals,
1610                            $add_attr_to_tag,
1611                            $message,
1612                            $id,
1613                            $mailbox
1614                            );
1615     if (preg_match("|$secremoveimg|si", $trusted)){
1616         $has_unsafe_images = true;
1617     }
1618     return $trusted;
1619 }
1620
1621 ?>