functions/mime.php

   1 <?php
   2
   3 /**
   4  * mime.php
   5  *
   6  * This contains the functions necessary to detect and decode MIME
   7  * messages.
   8  *
   9  * @copyright &copy; 1999-2007 The SquirrelMail Project Team
  10  * @license http://opensource.org/licenses/gpl-license.php GNU Public License
  11  * @version $Id$
  12  * @package squirrelmail
  13  */
  14
  15 /**
  16  * dependency information
  17    functions       dependency
  18    mime_structure
  19         class/mime/Message.class.php
  20             Message::parseStructure
  21         functions/page_header.php
  22             displayPageHeader
  23         functions/display_messages.php
  24             plain_error_message
  25    mime_fetch_body
  26         functions/imap_general.php
  27             sqimap_run_command
  28    mime_print_body_lines
  29
  30
  31
  32 functions/imap.php
  33 functions/attachment_common.php
  34 functions/display_messages.php
  35
  36 magicHtml => url_parser
  37 translateText => url_parser
  38
  39 */
  40
  41
  42 /* -------------------------------------------------------------------------- */
  43 /* MIME DECODING                                                              */
  44 /* -------------------------------------------------------------------------- */
  45
  46 /**
  47  * Get the MIME structure
  48  *
  49  * This function gets the structure of a message and stores it in the "message" class.
  50  * It will return this object for use with all relevant header information and
  51  * fully parsed into the standard "message" object format.
  52  */
  53 function mime_structure ($bodystructure, $flags=array()) {
  54
  55     /* Isolate the body structure and remove beginning and end parenthesis. */
  56     $read = trim(substr ($bodystructure, strpos(strtolower($bodystructure), 'bodystructure') + 13));
  57     $read = trim(substr ($read, 0, -1));
  58     $i = 0;
  59     $msg = Message::parseStructure($read,$i);
  60
  61     if (!is_object($msg)) {
  62         global $color, $mailbox;
  63         displayPageHeader( $color, $mailbox );
  64         $errormessage  = _("SquirrelMail could not decode the bodystructure of the message");
  65         $errormessage .= '<br />'._("The bodystructure provided by your IMAP server:").'<br /><br />';
  66         $errormessage .= '<pre>' . htmlspecialchars($read) . '</pre>';
  67         plain_error_message( $errormessage );
  68         echo '</body></html>';
  69         exit;
  70     }
  71     if (count($flags)) {
  72         foreach ($flags as $flag) {
  73 //FIXME: please document why it is we have to check the first char of the flag but we then go ahead and do a full string comparison anyway.  Is this a speed enhancement?  If not, let's keep it simple and just compare the full string and forget the switch block.
  74             $char = strtoupper($flag{1});
  75             switch ($char) {
  76                 case 'S':
  77                     if (strtolower($flag) == '\\seen') {
  78                         $msg->is_seen = true;
  79                     }
  80                     break;
  81                 case 'A':
  82                     if (strtolower($flag) == '\\answered') {
  83                         $msg->is_answered = true;
  84                     }
  85                     break;
  86                 case 'D':
  87                     if (strtolower($flag) == '\\deleted') {
  88                         $msg->is_deleted = true;
  89                     }
  90                     break;
  91                 case 'F':
  92                     if (strtolower($flag) == '\\flagged') {
  93                         $msg->is_flagged = true;
  94                     }
  95                     break;
  96                 case 'M':
  97                     if (strtolower($flag) == '$mdnsent') {
  98                         $msg->is_mdnsent = true;
  99                     }
 100                     break;
 101                 default:
 102                     break;
 103             }
 104         }
 105     }
 106     //    listEntities($msg);
 107     return $msg;
 108 }
 109
 110
 111
 112 /* This starts the parsing of a particular structure.  It is called recursively,
 113  * so it can be passed different structures.  It returns an object of type
 114  * $message.
 115  * First, it checks to see if it is a multipart message.  If it is, then it
 116  * handles that as it sees is necessary.  If it is just a regular entity,
 117  * then it parses it and adds the necessary header information (by calling out
 118  * to mime_get_elements()
 119  */
 120
 121 function mime_fetch_body($imap_stream, $id, $ent_id=1, $fetch_size=0) {
 122     /* Do a bit of error correction.  If we couldn't find the entity id, just guess
 123      * that it is the first one.  That is usually the case anyway.
 124      */
 125
 126     if (!$ent_id) {
 127         $cmd = "FETCH $id BODY[]";
 128     } else {
 129         $cmd = "FETCH $id BODY[$ent_id]";
 130     }
 131
 132     if ($fetch_size!=0) $cmd .= "<0.$fetch_size>";
 133
 134     $data = sqimap_run_command ($imap_stream, $cmd, true, $response, $message, TRUE);
 135     do {
 136         $topline = trim(array_shift($data));
 137     } while($topline && ($topline[0] == '*') && !preg_match('/\* [0-9]+ FETCH.*/i', $topline)) ;
 138
 139     $wholemessage = implode('', $data);
 140     if (ereg('\\{([^\\}]*)\\}', $topline, $regs)) {
 141         $ret = substr($wholemessage, 0, $regs[1]);
 142         /* There is some information in the content info header that could be important
 143          * in order to parse html messages. Let's get them here.
 144          */
 145 //        if ($ret{0} == '<') {
 146 //            $data = sqimap_run_command ($imap_stream, "FETCH $id BODY[$ent_id.MIME]", true, $response, $message, TRUE);
 147 //        }
 148     } else if (ereg('"([^"]*)"', $topline, $regs)) {
 149         $ret = $regs[1];
 150     } else if ((stristr($topline, 'nil') !== false) && (empty($wholemessage))) {
 151         $ret = $wholemessage;
 152     } else {
 153         global $where, $what, $mailbox, $passed_id, $startMessage;
 154         $par = 'mailbox=' . urlencode($mailbox) . '&amp;passed_id=' . $passed_id;
 155         if (isset($where) && isset($what)) {
 156             $par .= '&amp;where=' . urlencode($where) . '&amp;what=' . urlencode($what);
 157         } else {
 158             $par .= '&amp;startMessage=' . $startMessage . '&amp;show_more=0';
 159         }
 160         $par .= '&amp;response=' . urlencode($response) .
 161             '&amp;message='  . urlencode($message)  .
 162             '&amp;topline='  . urlencode($topline);
 163
 164         echo '<tt><br />' .
 165             '<table width="80%"><tr>' .
 166             '<tr><td colspan="2">' .
 167             _("Body retrieval error. The reason for this is most probably that the message is malformed.") .
 168             '</td></tr>' .
 169             '<tr><td><b>' . _("Command:") . "</td><td>$cmd</td></tr>" .
 170             '<tr><td><b>' . _("Response:") . "</td><td>$response</td></tr>" .
 171             '<tr><td><b>' . _("Message:") . "</td><td>$message</td></tr>" .
 172             '<tr><td><b>' . _("FETCH line:") . "</td><td>$topline</td></tr>" .
 173             "</table><br /></tt></font><hr />";
 174
 175         $data = sqimap_run_command ($imap_stream, "FETCH $passed_id BODY[]", true, $response, $message, TRUE);
 176         array_shift($data);
 177         $wholemessage = implode('', $data);
 178
 179         $ret = $wholemessage;
 180     }
 181     return $ret;
 182 }
 183
 184 function mime_print_body_lines ($imap_stream, $id, $ent_id=1, $encoding, $rStream='php://stdout') {
 185
 186     /* Don't kill the connection if the browser is over a dialup
 187      * and it would take over 30 seconds to download it.
 188      * Don't call set_time_limit in safe mode.
 189      */
 190
 191     if (!ini_get('safe_mode')) {
 192         set_time_limit(0);
 193     }
 194     /* in case of base64 encoded attachments, do not buffer them.
 195        Instead, echo the decoded attachment directly to screen */
 196     if (strtolower($encoding) == 'base64') {
 197         if (!$ent_id) {
 198             $query = "FETCH $id BODY[]";
 199         } else {
 200             $query = "FETCH $id BODY[$ent_id]";
 201         }
 202         sqimap_run_command($imap_stream,$query,true,$response,$message,TRUE,'sqimap_base64_decode',$rStream,true);
 203     } else {
 204         $body = mime_fetch_body ($imap_stream, $id, $ent_id);
 205         if (is_resource($rStream)) {
 206             fputs($rStream,decodeBody($body,$encoding));
 207         } else {
 208             echo decodeBody($body, $encoding);
 209         }
 210     }
 211
 212     /*
 213        TODO, use the same method for quoted printable.
 214        However, I assume that quoted printable attachments aren't that large
 215        so the performancegain / memory usage drop will be minimal.
 216        If we decide to add that then we need to adapt sqimap_fread because
 217        we need to split te result on \n and fread doesn't stop at \n. That
 218        means we also should provide $results from sqimap_fread (by ref) to
 219        te function and set $no_return to false. The $filter function for
 220        quoted printable should handle unsetting of $results.
 221      */
 222     /*
 223        TODO 2: find out how we write to the output stream php://stdout. fwrite
 224        doesn't work because 'php://stdout isn't a stream.
 225      */
 226
 227     return;
 228 }
 229
 230 /* -[ END MIME DECODING ]----------------------------------------------------------- */
 231
 232 /* This is here for debugging purposes.  It will print out a list
 233  * of all the entity IDs that are in the $message object.
 234  */
 235 function listEntities ($message) {
 236     if ($message) {
 237         echo "<tt>" . $message->entity_id . ' : ' . $message->type0 . '/' . $message->type1 . ' parent = '. $message->parent->entity_id. '<br />';
 238         for ($i = 0; isset($message->entities[$i]); $i++) {
 239             echo "$i : ";
 240             $msg = listEntities($message->entities[$i]);
 241
 242             if ($msg) {
 243                 echo "return: ";
 244                 return $msg;
 245             }
 246         }
 247     }
 248 }
 249
 250 function getPriorityStr($priority) {
 251     $priority_level = substr($priority,0,1);
 252
 253     switch($priority_level) {
 254         /* Check for a higher then normal priority. */
 255         case '1':
 256         case '2':
 257             $priority_string = _("High");
 258             break;
 259
 260         /* Check for a lower then normal priority. */
 261         case '4':
 262         case '5':
 263             $priority_string = _("Low");
 264             break;
 265
 266         /* Check for a normal priority. */
 267         case '3':
 268         default:
 269             $priority_level = '3';
 270             $priority_string = _("Normal");
 271             break;
 272
 273     }
 274     return $priority_string;
 275 }
 276
 277 /* returns a $message object for a particular entity id */
 278 function getEntity ($message, $ent_id) {
 279     return $message->getEntity($ent_id);
 280 }
 281
 282 /* translateText
 283  * Extracted from strings.php 23/03/2002
 284  */
 285
 286 function translateText(&$body, $wrap_at, $charset) {
 287     global $where, $what;   /* from searching */
 288     global $color;          /* color theme */
 289
 290     // require_once(SM_PATH . 'functions/url_parser.php');
 291
 292     $body_ary = explode("\n", $body);
 293     for ($i=0; $i < count($body_ary); $i++) {
 294         $line = $body_ary[$i];
 295         if (strlen($line) - 2 >= $wrap_at) {
 296             sqWordWrap($line, $wrap_at, $charset);
 297         }
 298         $line = charset_decode($charset, $line);
 299         $line = str_replace("\t", '        ', $line);
 300
 301         parseUrl ($line);
 302
 303         $quotes = 0;
 304         $pos = 0;
 305         $j = strlen($line);
 306
 307         while ($pos < $j) {
 308             if ($line[$pos] == ' ') {
 309                 $pos++;
 310             } else if (strpos($line, '&gt;', $pos) === $pos) {
 311                 $pos += 4;
 312                 $quotes++;
 313             } else {
 314                 break;
 315             }
 316         }
 317
 318         if ($quotes % 2) {
 319             $line = '<span class="quote1">' . $line . '</span>';
 320         } elseif ($quotes) {
 321             $line = '<span class="quote2">' . $line . '</span>';
 322         }
 323
 324         $body_ary[$i] = $line;
 325     }
 326     $body = '<pre>' . implode("\n", $body_ary) . '</pre>';
 327 }
 328
 329 /**
 330  * This returns a parsed string called $body. That string can then
 331  * be displayed as the actual message in the HTML. It contains
 332  * everything needed, including HTML Tags, Attachments at the
 333  * bottom, etc.
 334  *
 335  * Since 1.2.0 function uses message_body hook.
 336  * Till 1.3.0 function included output of formatAttachments().
 337  *
 338  * @param resource $imap_stream imap connection resource
 339  * @param object $message squirrelmail message object
 340  * @param array $color squirrelmail color theme array
 341  * @param integer $wrap_at number of characters per line
 342  * @param string $ent_num (since 1.3.0) message part id
 343  * @param integer $id (since 1.3.0) message id
 344  * @param string $mailbox (since 1.3.0) imap folder name
 345  * @param boolean $clean (since 1.5.1) Do not output stuff that's irrelevant for the printable version.
 346  * @return string html formated message text
 347  */
 348 function formatBody($imap_stream, $message, $color, $wrap_at, $ent_num, $id, $mailbox='INBOX', $clean=FALSE) {
 349     /* This if statement checks for the entity to show as the
 350      * primary message. To add more of them, just put them in the
 351      * order that is their priority.
 352      */
 353     global $startMessage, $languages, $squirrelmail_language,
 354            $show_html_default, $sort, $has_unsafe_images, $passed_ent_id,
 355            $use_iframe, $iframe_height, $download_and_unsafe_link,
 356            $download_href, $unsafe_image_toggle_href, $unsafe_image_toggle_text,
 357            $oTemplate, $nbsp;
 358
 359     // workaround for not updated config.php
 360     if (! isset($use_iframe)) $use_iframe = false;
 361
 362     // If there's no "view_unsafe_images" setting in the user's preferences,
 363     // turn unsafe images off by default.
 364     if( !sqgetGlobalVar('view_unsafe_images', $view_unsafe_images, SQ_GET) ) {
 365         $view_unsafe_images = false;
 366     }
 367
 368     $body = '';
 369     $urlmailbox = urlencode($mailbox);
 370     $body_message = getEntity($message, $ent_num);
 371     if (($body_message->header->type0 == 'text') ||
 372             ($body_message->header->type0 == 'rfc822')) {
 373         $body = mime_fetch_body ($imap_stream, $id, $ent_num);
 374         $body = decodeBody($body, $body_message->header->encoding);
 375
 376         if (isset($languages[$squirrelmail_language]['XTRA_CODE']) &&
 377                 function_exists($languages[$squirrelmail_language]['XTRA_CODE'] . '_decode')) {
 378             if (mb_detect_encoding($body) != 'ASCII') {
 379                 $body = call_user_func($languages[$squirrelmail_language]['XTRA_CODE'] . '_decode',$body);
 380             }
 381         }
 382
 383         /* As of 1.5.2, $body is passed (and modified) by reference */
 384         do_hook('message_body', $body);
 385
 386         /* If there are other types that shouldn't be formatted, add
 387          * them here.
 388          */
 389
 390         if ($body_message->header->type1 == 'html') {
 391             if ($show_html_default <> 1) {
 392                 $entity_conv = array('&nbsp;' => ' ',
 393                                      '<p>'    => "\n",
 394                                      '<P>'    => "\n",
 395                                      '<br>'   => "\n",
 396                                      '<BR>'   => "\n",
 397                                      '<br />' => "\n",
 398                                      '<BR />' => "\n",
 399                                      '&gt;'   => '>',
 400                                      '&lt;'   => '<');
 401                 $body = strtr($body, $entity_conv);
 402                 $body = strip_tags($body);
 403                 $body = trim($body);
 404                 translateText($body, $wrap_at,
 405                         $body_message->header->getParameter('charset'));
 406             } elseif ($use_iframe && ! $clean) {
 407                 // $clean is used to remove iframe in printable view.
 408
 409                 /**
 410                  * If we don't add html message between iframe tags,
 411                  * we must detect unsafe images and modify $has_unsafe_images.
 412                  */
 413                 $html_body = magicHTML($body, $id, $message, $mailbox);
 414                 // Convert character set in order to display html mails in different character set
 415                 $html_body = charset_decode($body_message->header->getParameter('charset'),$html_body,false,true);
 416
 417                 // creating iframe url
 418                 $iframeurl=sqm_baseuri().'src/view_html.php?'
 419                     . 'mailbox=' . $urlmailbox
 420                     . '&amp;passed_id=' . $id
 421                     . '&amp;ent_id=' . $ent_num
 422                     . '&amp;view_unsafe_images=' . (int) $view_unsafe_images;
 423
 424                 global $oTemplate;
 425                 $oTemplate->assign('iframe_url', $iframeurl);
 426                 $oTemplate->assign('iframe_height', $iframe_height);
 427                 $oTemplate->assign('html_body', $html_body);
 428
 429                 $body = $oTemplate->fetch('read_html_iframe.tpl');
 430             } else {
 431                 // old way of html rendering
 432                 /**
 433                  * convert character set. charset_decode does not remove html special chars
 434                  * applied by magicHTML functions and does not sanitize them second time if
 435                  * fourth argument is true.
 436                  */
 437                 $charset = $body_message->header->getParameter('charset');
 438                 if (!empty($charset)) {
 439                     $body = charset_decode($charset,$body,false,true);
 440                 }
 441                 $body = magicHTML($body, $id, $message, $mailbox);
 442             }
 443         } else {
 444             translateText($body, $wrap_at,
 445                     $body_message->header->getParameter('charset'));
 446         }
 447
 448         // if this is the clean display (i.e. printer friendly), stop here.
 449         if ( $clean ) {
 450             return $body;
 451         }
 452
 453         /*
 454          * Previously the links for downloading and unsafe images were printed
 455          * under the mail. By putting the links in a global variable we can
 456          * print it in the toolbar where it belongs. Since the original code was
 457          * in this place it's left here. It might be possible to move it to some
 458          * other place if that makes sense. The possibility to do so has not
 459          * been evaluated yet.
 460          */
 461
 462         // Initialize the global variable to an empty string.
 463         // FIXME: To have $download_and_unsafe_link as a global variable might not be needed since the use of separate variables ($download_href, $unsafe_image_toggle_href, and $unsafe_image_toggle_text) for the templates was introduced.
 464         $download_and_unsafe_link = '';
 465
 466         // Prepare and build a link for downloading the mail.
 467         $link = 'passed_id=' . $id . '&amp;ent_id='.$ent_num.
 468             '&amp;mailbox=' . $urlmailbox .'&amp;sort=' . $sort .
 469             '&amp;startMessage=' . $startMessage . '&amp;show_more=0';
 470         if (isset($passed_ent_id)) {
 471             $link .= '&amp;passed_ent_id='.$passed_ent_id;
 472         }
 473         $download_href = SM_PATH . 'src/download.php?absolute_dl=true&amp;' . $link;
 474
 475         // Always add the link for downloading the mail as a file to the global
 476         // variable.
 477         $download_and_unsafe_link .= "$nbsp|$nbsp"
 478             . create_hyperlink($download_href, _("Download this as a file"));
 479
 480         // Find out the right text to use in the link depending on the
 481         // circumstances. If the unsafe images are displayed the link should
 482         // hide them, if they aren't displayed the link should only appear if
 483         // the mail really contains unsafe images.
 484         if ($view_unsafe_images) {
 485             $text = _("Hide Unsafe Images");
 486         } else {
 487             if (isset($has_unsafe_images) && $has_unsafe_images) {
 488                 $link .= '&amp;view_unsafe_images=1';
 489                 $text = _("View Unsafe Images");
 490             } else {
 491                 $text = '';
 492             }
 493         }
 494
 495         // Only create a link for unsafe images if there's need for one. If so:
 496         // add it to the global variable.
 497         if($text != '') {
 498             $unsafe_image_toggle_href = SM_PATH . 'src/read_body.php?'.$link;
 499             $unsafe_image_toggle_text = $text;
 500             $download_and_unsafe_link .= "$nbsp|$nbsp"
 501                 . create_hyperlink($unsafe_image_toggle_href, $text);
 502         }
 503     }
 504     return $body;
 505 }
 506
 507 /**
 508  * Generate attachments array for passing to templates.  Separated from
 509  * formatAttachments() below so that the same array can be given to the
 510  * print-friendly version.
 511  *
 512  * @since 1.5.2
 513  * @param object $message SquirrelMail message object
 514  * @param array $exclude_id message parts that are not attachments.
 515  * @param string $mailbox mailbox name
 516  * @param integer $id message id
 517  */
 518 function buildAttachmentArray($message, $exclude_id, $mailbox, $id) {
 519     global $where, $what, $startMessage, $color, $passed_ent_id, $base_uri;
 520
 521     $att_ar = $message->getAttachments($exclude_id);
 522     $urlMailbox = urlencode($mailbox);
 523
 524     $attachments = array();
 525     foreach ($att_ar as $att) {
 526         $ent = $att->entity_id;
 527         $header = $att->header;
 528         $type0 = strtolower($header->type0);
 529         $type1 = strtolower($header->type1);
 530         $name = '';
 531         $links = array();
 532         $links['download link']['text'] = _("Download");
 533         $links['download link']['href'] = $base_uri .
 534             "src/download.php?absolute_dl=true&amp;passed_id=$id&amp;mailbox=$urlMailbox&amp;ent_id=$ent";
 535
 536         if ($type0 =='message' && $type1 == 'rfc822') {
 537             $default_page = $base_uri  . 'src/read_body.php';
 538             $rfc822_header = $att->rfc822_header;
 539             $filename = $rfc822_header->subject;
 540             if (trim( $filename ) == '') {
 541                 $filename = 'untitled-[' . $ent . ']' ;
 542             }
 543             $from_o = $rfc822_header->from;
 544             if (is_object($from_o)) {
 545                 $from_name = decodeHeader($from_o->getAddress(false));
 546             } elseif (is_array($from_o) && count($from_o) && is_object($from_o[0])) {
 547                 // something weird happens when a digest message is opened and you return to the digest
 548                 // now the from object is part of an array. Probably the parseHeader call overwrites the info
 549                 // retrieved from the bodystructure in a different way. We need to fix this later.
 550                 // possible starting point, do not fetch header we already have and inspect how
 551                 // the rfc822_header object behaves.
 552                 $from_name = decodeHeader($from_o[0]->getAddress(false));
 553             } else {
 554                 $from_name = _("Unknown sender");
 555             }
 556             $description = _("From").': '.$from_name;
 557         } else {
 558             $default_page = $base_uri  . 'src/download.php';
 559             $filename = $att->getFilename();
 560             if ($header->description) {
 561                 $description = decodeHeader($header->description);
 562             } else {
 563                 $description = '';
 564             }
 565         }
 566
 567         $display_filename = $filename;
 568         if (isset($passed_ent_id)) {
 569             $passed_ent_id_link = '&amp;passed_ent_id='.$passed_ent_id;
 570         } else {
 571             $passed_ent_id_link = '';
 572         }
 573         $defaultlink = $default_page . "?startMessage=$startMessage"
 574             . "&amp;passed_id=$id&amp;mailbox=$urlMailbox"
 575             . '&amp;ent_id='.$ent.$passed_ent_id_link;
 576         if ($where && $what) {
 577             $defaultlink .= '&amp;where='. urlencode($where).'&amp;what='.urlencode($what);
 578         }
 579         // IE does make use of mime content sniffing. Forcing a download
 580         // prohibit execution of XSS inside an application/octet-stream attachment
 581         if ($type0 == 'application' && $type1 == 'octet-stream') {
 582             $defaultlink .= '&amp;absolute_dl=true';
 583         }
 584
 585         /* This executes the attachment hook with a specific MIME-type.
 586          * If that doesn't have results, it tries if there's a rule
 587          * for a more generic type. Finally, a hook for ALL attachment
 588          * types is run as well.
 589          */
 590         // First remember the default link.
 591         $defaultlink_orig = $defaultlink;
 592
 593         /* The API for this hook has changed as of 1.5.2 so that all plugin
 594            arguments are passed in an array instead of each their own plugin
 595            argument, and arguments are passed by reference, so instead of
 596            returning any changes, changes should simply be made to the original
 597            arguments themselves. */
 598         $temp = array(&$links, &$startMessage, &$id, &$urlMailbox, &$ent,
 599                     &$defaultlink, &$display_filename, &$where, &$what);
 600         do_hook("attachment $type0/$type1", $temp);
 601         if(count($links) <= 1 && $defaultlink == $defaultlink_orig) {
 602             /* The API for this hook has changed as of 1.5.2 so that all plugin
 603                arguments are passed in an array instead of each their own plugin
 604                argument, and arguments are passed by reference, so instead of
 605                returning any changes, changes should simply be made to the original
 606                arguments themselves. */
 607             $temp = array(&$links, &$startMessage, &$id, &$urlMailbox, &$ent,
 608                           &$defaultlink, &$display_filename, &$where, &$what);
 609             do_hook("attachment $type0/*", $temp);
 610         }
 611         /* The API for this hook has changed as of 1.5.2 so that all plugin
 612            arguments are passed in an array instead of each their own plugin
 613            argument, and arguments are passed by reference, so instead of
 614            returning any changes, changes should simply be made to the original
 615            arguments themselves. */
 616         $temp = array(&$links, &$startMessage, &$id, &$urlMailbox, &$ent,
 617                       &$defaultlink, &$display_filename, &$where, &$what);
 618         // Do not let a generic plugin change the default link if a more
 619         // specialized one already did it...
 620         if ($defaultlink != $defaultlink_orig) {
 621             $dummy = '';
 622             $temp[5] = &$dummy;
 623         }
 624         do_hook("attachment */*", $temp);
 625
 626         $this_attachment = array();
 627         $this_attachment['Name'] = decodeHeader($display_filename);
 628         $this_attachment['Description'] = $description;
 629         $this_attachment['DefaultHREF'] = $defaultlink;
 630         $this_attachment['DownloadHREF'] = $links['download link']['href'];
 631         $this_attachment['ViewHREF'] = isset($links['attachment_common']) ? $links['attachment_common']['href'] : '';
 632         $this_attachment['Size'] = $header->size;
 633         $this_attachment['ContentType'] = htmlspecialchars($type0 .'/'. $type1);
 634         $this_attachment['OtherLinks'] = array();
 635         foreach ($links as $val) {
 636             if ($val['text']==_("Download") || $val['text'] == _("View"))
 637                 continue;
 638             if (empty($val['text']) && empty($val['extra']))
 639                 continue;
 640
 641             $temp = array();
 642             $temp['HREF'] = $val['href'];
 643             $temp['Text'] = (empty($val['text']) ? '' : $val['text']) . (empty($val['extra']) ? '' : $val['extra']);
 644             $this_attachment['OtherLinks'][] = $temp;
 645         }
 646         $attachments[] = $this_attachment;
 647
 648         unset($links);
 649     }
 650
 651     return $attachments;
 652 }
 653
 654 /**
 655  * Displays attachment links and information
 656  *
 657  * Since 1.3.0 function is not included in formatBody() call.
 658  *
 659  * Since 1.0.2 uses attachment $type0/$type1 hook.
 660  * Since 1.2.5 uses attachment $type0/* hook.
 661  * Since 1.5.0 uses attachments_bottom hook.
 662  * Since 1.5.2 uses templates and does *not* return a value.
 663  *
 664  * @param object $message SquirrelMail message object
 665  * @param array $exclude_id message parts that are not attachments.
 666  * @param string $mailbox mailbox name
 667  * @param integer $id message id
 668  */
 669 function formatAttachments($message, $exclude_id, $mailbox, $id) {
 670     global $oTemplate;
 671
 672     $attach = buildAttachmentArray($message, $exclude_id, $mailbox, $id);
 673
 674     $oTemplate->assign('attachments', $attach);
 675     $oTemplate->display('read_attachments.tpl');
 676 }
 677
 678 function sqimap_base64_decode(&$string) {
 679
 680     // Base64 encoded data goes in pairs of 4 bytes. To achieve on the
 681     // fly decoding (to reduce memory usage) you have to check if the
 682     // data has incomplete pairs
 683
 684     // Remove the noise in order to check if the 4 bytes pairs are complete
 685     $string = str_replace(array("\r\n","\n", "\r", " "),array('','','',''),$string);
 686
 687     $sStringRem = '';
 688     $iMod = strlen($string) % 4;
 689     if ($iMod) {
 690         $sStringRem = substr($string,-$iMod);
 691         // Check if $sStringRem contains padding characters
 692         if (substr($sStringRem,-1) != '=') {
 693             $string = substr($string,0,-$iMod);
 694         } else {
 695             $sStringRem = '';
 696         }
 697     }
 698     $string = base64_decode($string);
 699     return $sStringRem;
 700 }
 701
 702 /**
 703  * Decodes encoded message body
 704  *
 705  * This function decodes the body depending on the encoding type.
 706  * Currently quoted-printable and base64 encodings are supported.
 707  * decode_body hook was added to this function in 1.4.2/1.5.0
 708  * @param string $body encoded message body
 709  * @param string $encoding used encoding
 710  * @return string decoded string
 711  * @since 1.0
 712  */
 713 function decodeBody($body, $encoding) {
 714
 715     $body = str_replace("\r\n", "\n", $body);
 716     $encoding = strtolower($encoding);
 717
 718     $encoding_handler = do_hook('decode_body', $encoding);
 719
 720
 721     // plugins get first shot at decoding the body
 722     //
 723     if (!empty($encoding_handler) && function_exists($encoding_handler)) {
 724         $body = $encoding_handler('decode', $body);
 725
 726     } elseif ($encoding == 'quoted-printable' ||
 727             $encoding == 'quoted_printable') {
 728         /**
 729          * quoted_printable_decode() function is broken in older
 730          * php versions. Text with \r\n decoding was fixed only
 731          * in php 4.3.0. Minimal code requirement 4.0.4 +
 732          * str_replace("\r\n", "\n", $body); call.
 733          */
 734         $body = quoted_printable_decode($body);
 735     } elseif ($encoding == 'base64') {
 736         $body = base64_decode($body);
 737     }
 738
 739     // All other encodings are returned raw.
 740     return $body;
 741 }
 742
 743 /**
 744  * Decodes headers
 745  *
 746  * This function decodes strings that are encoded according to
 747  * RFC1522 (MIME Part Two: Message Header Extensions for Non-ASCII Text).
 748  * Patched by Christian Schmidt <christian@ostenfeld.dk>  23/03/2002
 749  *
 750  * @param string $string header string that has to be made readable
 751  * @param boolean $utfencode change message in order to be readable on user's charset. defaults to true
 752  * @param boolean $htmlsafe preserve spaces and sanitize html special characters. defaults to true
 753  * @param boolean $decide decide if string can be utfencoded. defaults to false
 754  * @return string decoded header string
 755  */
 756 function decodeHeader ($string, $utfencode=true,$htmlsafe=true,$decide=false) {
 757     global $languages, $squirrelmail_language,$default_charset;
 758     if (is_array($string)) {
 759         $string = implode("\n", $string);
 760     }
 761
 762     if (isset($languages[$squirrelmail_language]['XTRA_CODE']) &&
 763             function_exists($languages[$squirrelmail_language]['XTRA_CODE'] . '_decodeheader')) {
 764         $string = call_user_func($languages[$squirrelmail_language]['XTRA_CODE'] . '_decodeheader', $string);
 765         // Do we need to return at this point?
 766         // return $string;
 767     }
 768     $i = 0;
 769     $iLastMatch = -2;
 770     $encoded = true;
 771
 772     $aString = explode(' ',$string);
 773     $ret = '';
 774     foreach ($aString as $chunk) {
 775         if ($encoded && $chunk === '') {
 776             continue;
 777         } elseif ($chunk === '') {
 778             $ret .= ' ';
 779             continue;
 780         }
 781         $encoded = false;
 782         /* if encoded words are not separated by a linear-space-white we still catch them */
 783         $j = $i-1;
 784
 785         while ($match = preg_match('/^(.*)=\?([^?]*)\?(Q|B)\?([^?]*)\?=(.*)$/Ui',$chunk,$res)) {
 786             /* if the last chunk isn't an encoded string then put back the space, otherwise don't */
 787             if ($iLastMatch !== $j) {
 788                 if ($htmlsafe) {
 789                     $ret .= '&#32;';
 790                 } else {
 791                     $ret .= ' ';
 792                 }
 793             }
 794             $iLastMatch = $i;
 795             $j = $i;
 796             if ($htmlsafe) {
 797                 $ret .= htmlspecialchars($res[1]);
 798             } else {
 799                 $ret .= $res[1];
 800             }
 801             $encoding = ucfirst($res[3]);
 802
 803             /* decide about valid decoding */
 804             if ($decide && is_conversion_safe($res[2])) {
 805                 $utfencode=true;
 806                 $can_be_encoded=true;
 807             } else {
 808                 $can_be_encoded=false;
 809             }
 810             switch ($encoding)
 811             {
 812                 case 'B':
 813                     $replace = base64_decode($res[4]);
 814                     if ($utfencode) {
 815                         if ($can_be_encoded) {
 816                             /* convert string to different charset,
 817                              * if functions asks for it (usually in compose)
 818                              */
 819                             $ret .= charset_convert($res[2],$replace,$default_charset,$htmlsafe);
 820                         } else {
 821                             // convert string to html codes in order to display it
 822                             $ret .= charset_decode($res[2],$replace);
 823                         }
 824                     } else {
 825                         if ($htmlsafe) {
 826                             $replace = htmlspecialchars($replace);
 827                         }
 828                         $ret.= $replace;
 829                     }
 830                     break;
 831                 case 'Q':
 832                     $replace = str_replace('_', ' ', $res[4]);
 833                     $replace = preg_replace('/=([0-9a-f]{2})/ie', 'chr(hexdec("\1"))',
 834                             $replace);
 835                     if ($utfencode) {
 836                         if ($can_be_encoded) {
 837                             /* convert string to different charset,
 838                              * if functions asks for it (usually in compose)
 839                              */
 840                             $replace = charset_convert($res[2], $replace,$default_charset,$htmlsafe);
 841                         } else {
 842                             // convert string to html codes in order to display it
 843                             $replace = charset_decode($res[2], $replace);
 844                         }
 845                     } else {
 846                         if ($htmlsafe) {
 847                             $replace = htmlspecialchars($replace);
 848                         }
 849                     }
 850                     $ret .= $replace;
 851                     break;
 852                 default:
 853                     break;
 854             }
 855             $chunk = $res[5];
 856             $encoded = true;
 857         }
 858         if (!$encoded) {
 859             if ($htmlsafe) {
 860                 $ret .= '&#32;';
 861             } else {
 862                 $ret .= ' ';
 863             }
 864         }
 865
 866         if (!$encoded && $htmlsafe) {
 867             $ret .= htmlspecialchars($chunk);
 868         } else {
 869             $ret .= $chunk;
 870         }
 871         ++$i;
 872     }
 873     /* remove the first added space */
 874     if ($ret) {
 875         if ($htmlsafe) {
 876             $ret = substr($ret,5);
 877         } else {
 878             $ret = substr($ret,1);
 879         }
 880     }
 881
 882     return $ret;
 883 }
 884
 885 /**
 886  * Encodes header
 887  *
 888  * Function uses XTRA_CODE _encodeheader function, if such function exists.
 889  *
 890  * Function uses Q encoding by default and encodes a string according to RFC
 891  * 1522 for use in headers if it contains 8-bit characters or anything that
 892  * looks like it should be encoded.
 893  *
 894  * Function switches to B encoding and encodeHeaderBase64() function, if
 895  * string is 8bit and multibyte character set supported by mbstring extension
 896  * is used. It can cause E_USER_NOTICE errors, if interface is used with
 897  * multibyte character set unsupported by mbstring extension.
 898  *
 899  * @param string $string header string, that has to be encoded
 900  * @return string quoted-printable encoded string
 901  * @todo make $mb_charsets system wide constant
 902  */
 903 function encodeHeader ($string) {
 904     global $default_charset, $languages, $squirrelmail_language;
 905
 906     if (isset($languages[$squirrelmail_language]['XTRA_CODE']) &&
 907             function_exists($languages[$squirrelmail_language]['XTRA_CODE'] . '_encodeheader')) {
 908         return  call_user_func($languages[$squirrelmail_language]['XTRA_CODE'] . '_encodeheader', $string);
 909     }
 910
 911     // Use B encoding for multibyte charsets
 912     $mb_charsets = array('utf-8','big5','gb2313','euc-kr');
 913     if (in_array($default_charset,$mb_charsets) &&
 914         in_array($default_charset,sq_mb_list_encodings()) &&
 915         sq_is8bit($string)) {
 916         return encodeHeaderBase64($string,$default_charset);
 917     } elseif (in_array($default_charset,$mb_charsets) &&
 918               sq_is8bit($string) &&
 919               ! in_array($default_charset,sq_mb_list_encodings())) {
 920         // Add E_USER_NOTICE error here (can cause 'Cannot add header information' warning in compose.php)
 921         // trigger_error('encodeHeader: Multibyte character set unsupported by mbstring extension.',E_USER_NOTICE);
 922     }
 923
 924     // Encode only if the string contains 8-bit characters or =?
 925     $j = strlen($string);
 926     $max_l = 75 - strlen($default_charset) - 7;
 927     $aRet = array();
 928     $ret = '';
 929     $iEncStart = $enc_init = false;
 930     $cur_l = $iOffset = 0;
 931     for($i = 0; $i < $j; ++$i) {
 932         switch($string{$i})
 933         {
 934             case '=':
 935             case '<':
 936             case '>':
 937             case ',':
 938             case '?':
 939             case '_':
 940                 if ($iEncStart === false) {
 941                     $iEncStart = $i;
 942                 }
 943                 $cur_l+=3;
 944                 if ($cur_l > ($max_l-2)) {
 945                     /* if there is an stringpart that doesn't need encoding, add it */
 946                     $aRet[] = substr($string,$iOffset,$iEncStart-$iOffset);
 947                     $aRet[] = "=?$default_charset?Q?$ret?=";
 948                     $iOffset = $i;
 949                     $cur_l = 0;
 950                     $ret = '';
 951                     $iEncStart = false;
 952                 } else {
 953                     $ret .= sprintf("=%02X",ord($string{$i}));
 954                 }
 955                 break;
 956             case '(':
 957             case ')':
 958                 if ($iEncStart !== false) {
 959                     $aRet[] = substr($string,$iOffset,$iEncStart-$iOffset);
 960                     $aRet[] = "=?$default_charset?Q?$ret?=";
 961                     $iOffset = $i;
 962                     $cur_l = 0;
 963                     $ret = '';
 964                     $iEncStart = false;
 965                 }
 966                 break;
 967             case ' ':
 968                 if ($iEncStart !== false) {
 969                     $cur_l++;
 970                     if ($cur_l > $max_l) {
 971                         $aRet[] = substr($string,$iOffset,$iEncStart-$iOffset);
 972                         $aRet[] = "=?$default_charset?Q?$ret?=";
 973                         $iOffset = $i;
 974                         $cur_l = 0;
 975                         $ret = '';
 976                         $iEncStart = false;
 977                     } else {
 978                         $ret .= '_';
 979                     }
 980                 }
 981                 break;
 982             default:
 983                 $k = ord($string{$i});
 984                 if ($k > 126) {
 985                     if ($iEncStart === false) {
 986                         // do not start encoding in the middle of a string, also take the rest of the word.
 987                         $sLeadString = substr($string,0,$i);
 988                         $aLeadString = explode(' ',$sLeadString);
 989                         $sToBeEncoded = array_pop($aLeadString);
 990                         $iEncStart = $i - strlen($sToBeEncoded);
 991                         $ret .= $sToBeEncoded;
 992                         $cur_l += strlen($sToBeEncoded);
 993                     }
 994                     $cur_l += 3;
 995                     /* first we add the encoded string that reached it's max size */
 996                     if ($cur_l > ($max_l-2)) {
 997                         $aRet[] = substr($string,$iOffset,$iEncStart-$iOffset);
 998                         $aRet[] = "=?$default_charset?Q?$ret?= "; /* the next part is also encoded => separate by space */
 999                         $cur_l = 3;
1000                         $ret = '';
1001                         $iOffset = $i;
1002                         $iEncStart = $i;
1003                     }
1004                     $enc_init = true;
1005                     $ret .= sprintf("=%02X", $k);
1006                 } else {
1007                     if ($iEncStart !== false) {
1008                         $cur_l++;
1009                         if ($cur_l > $max_l) {
1010                             $aRet[] = substr($string,$iOffset,$iEncStart-$iOffset);
1011                             $aRet[] = "=?$default_charset?Q?$ret?=";
1012                             $iEncStart = false;
1013                             $iOffset = $i;
1014                             $cur_l = 0;
1015                             $ret = '';
1016                         } else {
1017                             $ret .= $string{$i};
1018                         }
1019                     }
1020                 }
1021                 break;
1022         }
1023     }
1024
1025     if ($enc_init) {
1026         if ($iEncStart !== false) {
1027             $aRet[] = substr($string,$iOffset,$iEncStart-$iOffset);
1028             $aRet[] = "=?$default_charset?Q?$ret?=";
1029         } else {
1030             $aRet[] = substr($string,$iOffset);
1031         }
1032         $string = implode('',$aRet);
1033     }
1034     return $string;
1035 }
1036
1037 /**
1038  * Encodes string according to rfc2047 B encoding header formating rules
1039  *
1040  * It is recommended way to encode headers with character sets that store
1041  * symbols in more than one byte.
1042  *
1043  * Function requires mbstring support. If required mbstring functions are missing,
1044  * function returns false and sets E_USER_WARNING level error message.
1045  *
1046  * Minimal requirements - php 4.0.6 with mbstring extension. Please note,
1047  * that mbstring functions will generate E_WARNING errors, if unsupported
1048  * character set is used. mb_encode_mimeheader function provided by php
1049  * mbstring extension is not used in order to get better control of header
1050  * encoding.
1051  *
1052  * Used php code functions - function_exists(), trigger_error(), strlen()
1053  * (is used with charset names and base64 strings). Used php mbstring
1054  * functions - mb_strlen and mb_substr.
1055  *
1056  * Related documents: rfc 2045 (BASE64 encoding), rfc 2047 (mime header
1057  * encoding), rfc 2822 (header folding)
1058  *
1059  * @param string $string header string that must be encoded
1060  * @param string $charset character set. Must be supported by mbstring extension.
1061  * Use sq_mb_list_encodings() to detect supported charsets.
1062  * @return string string encoded according to rfc2047 B encoding formating rules
1063  * @since 1.5.1
1064  * @todo First header line can be wrapped to $iMaxLength - $HeaderFieldLength - 1
1065  * @todo Do we want to control max length of header?
1066  * @todo Do we want to control EOL (end-of-line) marker?
1067  * @todo Do we want to translate error message?
1068  */
1069 function encodeHeaderBase64($string,$charset) {
1070     /**
1071      * Check mbstring function requirements.
1072      */
1073     if (! function_exists('mb_strlen') ||
1074         ! function_exists('mb_substr')) {
1075         // set E_USER_WARNING
1076         trigger_error('encodeHeaderBase64: Required mbstring functions are missing.',E_USER_WARNING);
1077         // return false
1078         return false;
1079     }
1080
1081     // initial return array
1082     $aRet = array();
1083
1084     /**
1085      * header length = 75 symbols max (same as in encodeHeader)
1086      * remove $charset length
1087      * remove =? ? ?= (5 chars)
1088      * remove 2 more chars (\r\n ?)
1089      */
1090     $iMaxLength = 75 - strlen($charset) - 7;
1091
1092     // set first character position
1093     $iStartCharNum = 0;
1094
1095     // loop through all characters. count characters and not bytes.
1096     for ($iCharNum=1; $iCharNum<=mb_strlen($string,$charset); $iCharNum++) {
1097         // encode string from starting character to current character.
1098         $encoded_string = base64_encode(mb_substr($string,$iStartCharNum,$iCharNum-$iStartCharNum,$charset));
1099
1100         // Check encoded string length
1101         if(strlen($encoded_string)>$iMaxLength) {
1102             // if string exceeds max length, reduce number of encoded characters and add encoded string part to array
1103             $aRet[] = base64_encode(mb_substr($string,$iStartCharNum,$iCharNum-$iStartCharNum-1,$charset));
1104
1105             // set new starting character
1106             $iStartCharNum = $iCharNum-1;
1107
1108             // encode last char (in case it is last character in string)
1109             $encoded_string = base64_encode(mb_substr($string,$iStartCharNum,$iCharNum-$iStartCharNum,$charset));
1110         } // if string is shorter than max length - add next character
1111     }
1112
1113     // add last encoded string to array
1114     $aRet[] = $encoded_string;
1115
1116     // set initial return string
1117     $sRet = '';
1118
1119     // loop through encoded strings
1120     foreach($aRet as $string) {
1121         // TODO: Do we want to control EOL (end-of-line) marker
1122         if ($sRet!='') $sRet.= " ";
1123
1124         // add header tags and encoded string to return string
1125         $sRet.= '=?'.$charset.'?B?'.$string.'?=';
1126     }
1127
1128     return $sRet;
1129 }
1130
1131 /* This function trys to locate the entity_id of a specific mime element */
1132 function find_ent_id($id, $message) {
1133     for ($i = 0, $ret = ''; $ret == '' && $i < count($message->entities); $i++) {
1134         if ($message->entities[$i]->header->type0 == 'multipart')  {
1135             $ret = find_ent_id($id, $message->entities[$i]);
1136         } else {
1137             if (strcasecmp($message->entities[$i]->header->id, $id) == 0) {
1138 //                if (sq_check_save_extension($message->entities[$i])) {
1139                 return $message->entities[$i]->entity_id;
1140 //                }
1141             } elseif (!empty($message->entities[$i]->header->parameters['name'])) {
1142                 /**
1143                  * This is part of a fix for Outlook Express 6.x generating
1144                  * cid URLs without creating content-id headers
1145                  * @@JA - 20050207
1146                  */
1147                 if (strcasecmp($message->entities[$i]->header->parameters['name'], $id) == 0) {
1148                     return $message->entities[$i]->entity_id;
1149                 }
1150             }
1151         }
1152     }
1153     return $ret;
1154 }
1155
1156 function sq_check_save_extension($message) {
1157     $filename = $message->getFilename();
1158     $ext = substr($filename, strrpos($filename,'.')+1);
1159     $save_extensions = array('jpg','jpeg','gif','png','bmp');
1160     return in_array($ext, $save_extensions);
1161 }
1162
1163
1164 /**
1165  ** HTMLFILTER ROUTINES
1166  */
1167
1168 /**
1169  * This function checks attribute values for entity-encoded values
1170  * and returns them translated into 8-bit strings so we can run
1171  * checks on them.
1172  *
1173  * @param  $attvalue A string to run entity check against.
1174  * @return           Nothing, modifies a reference value.
1175  */
1176 function sq_defang(&$attvalue){
1177     $me = 'sq_defang';
1178     /**
1179      * Skip this if there aren't ampersands or backslashes.
1180      */
1181     if (strpos($attvalue, '&') === false
1182         && strpos($attvalue, '\\') === false){
1183         return;
1184     }
1185     $m = false;
1186     // before deent, translate the dangerous unicode characters and ... to safe values
1187     // otherwise the regular expressions do not match.
1188
1189
1190
1191     do {
1192         $m = false;
1193         $m = $m || sq_deent($attvalue, '/\&#0*(\d+);*/s');
1194         $m = $m || sq_deent($attvalue, '/\&#x0*((\d|[a-f])+);*/si', true);
1195         $m = $m || sq_deent($attvalue, '/\\\\(\d+)/s', true);
1196     } while ($m == true);
1197     $attvalue = stripslashes($attvalue);
1198 }
1199
1200 /**
1201  * Kill any tabs, newlines, or carriage returns. Our friends the
1202  * makers of the browser with 95% market value decided that it'd
1203  * be funny to make "java[tab]script" be just as good as "javascript".
1204  *
1205  * @param  attvalue  The attribute value before extraneous spaces removed.
1206  * @return attvalue  Nothing, modifies a reference value.
1207  */
1208 function sq_unspace(&$attvalue){
1209     $me = 'sq_unspace';
1210     if (strcspn($attvalue, "\t\r\n\0 ") != strlen($attvalue)){
1211         $attvalue = str_replace(Array("\t", "\r", "\n", "\0", " "),
1212                                 Array('',   '',   '',   '',   ''), $attvalue);
1213     }
1214 }
1215
1216 /**
1217  * Translate all dangerous Unicode or Shift_JIS characters which are accepted by
1218  * IE as regular characters.
1219  *
1220  * @param  attvalue  The attribute value before dangerous characters are translated.
1221  * @return attvalue  Nothing, modifies a reference value.
1222  * @author Marc Groot Koerkamp.
1223  */
1224 function sq_fixIE_idiocy(&$attvalue) {
1225     // remove NUL
1226     $attvalue = str_replace("\0", "", $attvalue);
1227     // remove comments
1228     $attvalue = preg_replace("/(\/\*.*?\*\/)/","",$attvalue);
1229
1230     // IE has the evil habit of accepting every possible value for the attribute expression.
1231     // The table below contains characters which are parsed by IE if they are used in the "expression"
1232     // attribute value.
1233     $aDangerousCharsReplacementTable = array(
1234                         array('&#x029F;', '&#0671;' ,/* L UNICODE IPA Extension */
1235                               '&#x0280;', '&#0640;' ,/* R UNICODE IPA Extension */
1236                               '&#x0274;', '&#0628;' ,/* N UNICODE IPA Extension */
1237                               '&#xFF25;', '&#65317;' ,/* Unicode FULLWIDTH LATIN CAPITAL LETTER E */
1238                               '&#xFF45;', '&#65349;' ,/* Unicode FULLWIDTH LATIN SMALL LETTER E */
1239                               '&#xFF38;', '&#65336;',/* Unicode FULLWIDTH LATIN CAPITAL LETTER X */
1240                               '&#xFF58;', '&#65368;',/* Unicode FULLWIDTH LATIN SMALL LETTER X */
1241                               '&#xFF30;', '&#65328;',/* Unicode FULLWIDTH LATIN CAPITAL LETTER P */
1242                               '&#xFF50;', '&#65360;',/* Unicode FULLWIDTH LATIN SMALL LETTER P */
1243                               '&#xFF32;', '&#65330;',/* Unicode FULLWIDTH LATIN CAPITAL LETTER R */
1244                               '&#xFF52;', '&#65362;',/* Unicode FULLWIDTH LATIN SMALL LETTER R */
1245                               '&#xFF33;', '&#65331;',/* Unicode FULLWIDTH LATIN CAPITAL LETTER S */
1246                               '&#xFF53;', '&#65363;',/* Unicode FULLWIDTH LATIN SMALL LETTER S */
1247                               '&#xFF29;', '&#65321;',/* Unicode FULLWIDTH LATIN CAPITAL LETTER I */
1248                               '&#xFF49;', '&#65353;',/* Unicode FULLWIDTH LATIN SMALL LETTER I */
1249                               '&#xFF2F;', '&#65327;',/* Unicode FULLWIDTH LATIN CAPITAL LETTER O */
1250                               '&#xFF4F;', '&#65359;',/* Unicode FULLWIDTH LATIN SMALL LETTER O */
1251                               '&#xFF2E;', '&#65326;',/* Unicode FULLWIDTH LATIN CAPITAL LETTER N */
1252                               '&#xFF4E;', '&#65358;',/* Unicode FULLWIDTH LATIN SMALL LETTER N */
1253                               '&#xFF2C;', '&#65324;',/* Unicode FULLWIDTH LATIN CAPITAL LETTER L */
1254                               '&#xFF4C;', '&#65356;',/* Unicode FULLWIDTH LATIN SMALL LETTER L */
1255                               '&#xFF35;', '&#65333;',/* Unicode FULLWIDTH LATIN CAPITAL LETTER U */
1256                               '&#xFF55;', '&#65365;',/* Unicode FULLWIDTH LATIN SMALL LETTER U */
1257                               '&#x207F;', '&#8319;' ,/* Unicode SUPERSCRIPT LATIN SMALL LETTER N */
1258                               "\xEF\xBC\xA5", /* Shift JIS FULLWIDTH LATIN CAPITAL LETTER E */   // in unicode this is some Chinese char range
1259                               "\xEF\xBD\x85", /* Shift JIS FULLWIDTH LATIN SMALL LETTER E */
1260                               "\xEF\xBC\xB8", /* Shift JIS FULLWIDTH LATIN CAPITAL LETTER X */
1261                               "\xEF\xBD\x98", /* Shift JIS FULLWIDTH LATIN SMALL LETTER X */
1262                               "\xEF\xBC\xB0", /* Shift JIS FULLWIDTH LATIN CAPITAL LETTER P */
1263                               "\xEF\xBD\x90", /* Shift JIS FULLWIDTH LATIN SMALL LETTER P */
1264                               "\xEF\xBC\xB2", /* Shift JIS FULLWIDTH LATIN CAPITAL LETTER R */
1265                               "\xEF\xBD\x92", /* Shift JIS FULLWIDTH LATIN SMALL LETTER R */
1266                               "\xEF\xBC\xB3", /* Shift JIS FULLWIDTH LATIN CAPITAL LETTER S */
1267                               "\xEF\xBD\x93", /* Shift JIS FULLWIDTH LATIN SMALL LETTER S */
1268                               "\xEF\xBC\xA9", /* Shift JIS FULLWIDTH LATIN CAPITAL LETTER I */
1269                               "\xEF\xBD\x89", /* Shift JIS FULLWIDTH LATIN SMALL LETTER I */
1270                               "\xEF\xBC\xAF", /* Shift JIS FULLWIDTH LATIN CAPITAL LETTER O */
1271                               "\xEF\xBD\x8F", /* Shift JIS FULLWIDTH LATIN SMALL LETTER O */
1272                               "\xEF\xBC\xAE", /* Shift JIS FULLWIDTH LATIN CAPITAL LETTER N */
1273                               "\xEF\xBD\x8E", /* Shift JIS FULLWIDTH LATIN SMALL LETTER N */
1274                               "\xEF\xBC\xAC", /* Shift JIS FULLWIDTH LATIN CAPITAL LETTER L */
1275                               "\xEF\xBD\x8C", /* Shift JIS FULLWIDTH LATIN SMALL LETTER L */
1276                               "\xEF\xBC\xB5", /* Shift JIS FULLWIDTH LATIN CAPITAL LETTER U */
1277                               "\xEF\xBD\x95", /* Shift JIS FULLWIDTH LATIN SMALL LETTER U */
1278                               "\xE2\x81\xBF", /* Shift JIS FULLWIDTH SUPERSCRIPT N */
1279                               "\xCA\x9F", /* L UNICODE IPA Extension */
1280                               "\xCA\x80", /* R UNICODE IPA Extension */
1281                               "\xC9\xB4"),  /* N UNICODE IPA Extension */
1282                        array('l', 'l', 'r','r','n','n',
1283                              'E','E','e','e','X','X','x','x','P','P','p','p','R','R','r','r','S','S','s','s','I','I',
1284                              'i','i','O','O','o','o','N','N','n','n','L','L','l','l','U','U','u','u','n','n',
1285                              'E','e','X','x','P','p','R','r','S','s','I','i','O','o','N','n','L','l','U','u','n','l','r','n'));
1286     $attvalue = str_replace($aDangerousCharsReplacementTable[0],$aDangerousCharsReplacementTable[1],$attvalue);
1287
1288     // Escapes are useful for special characters like "{}[]()'&. In other cases they are
1289     // used for XSS.
1290     $attvalue = preg_replace("/(\\\\)([a-zA-Z]{1})/",'$2',$attvalue);
1291 }
1292
1293 /**
1294  * This function returns the final tag out of the tag name, an array
1295  * of attributes, and the type of the tag. This function is called by
1296  * sq_sanitize internally.
1297  *
1298  * @param  $tagname  the name of the tag.
1299  * @param  $attary   the array of attributes and their values
1300  * @param  $tagtype  The type of the tag (see in comments).
1301  * @return           a string with the final tag representation.
1302  */
1303 function sq_tagprint($tagname, $attary, $tagtype){
1304     $me = 'sq_tagprint';
1305
1306     if ($tagtype == 2){
1307         $fulltag = '</' . $tagname . '>';
1308     } else {
1309         $fulltag = '<' . $tagname;
1310         if (is_array($attary) && sizeof($attary)){
1311             $atts = Array();
1312             while (list($attname, $attvalue) = each($attary)){
1313                 array_push($atts, "$attname=$attvalue");
1314             }
1315             $fulltag .= ' ' . join(" ", $atts);
1316         }
1317         if ($tagtype == 3){
1318             $fulltag .= ' /';
1319         }
1320         $fulltag .= '>';
1321     }
1322     return $fulltag;
1323 }
1324
1325 /**
1326  * A small helper function to use with array_walk. Modifies a by-ref
1327  * value and makes it lowercase.
1328  *
1329  * @param  $val a value passed by-ref.
1330  * @return      void since it modifies a by-ref value.
1331  */
1332 function sq_casenormalize(&$val){
1333     $val = strtolower($val);
1334 }
1335
1336 /**
1337  * This function skips any whitespace from the current position within
1338  * a string and to the next non-whitespace value.
1339  *
1340  * @param  $body   the string
1341  * @param  $offset the offset within the string where we should start
1342  *                 looking for the next non-whitespace character.
1343  * @return         the location within the $body where the next
1344  *                 non-whitespace char is located.
1345  */
1346 function sq_skipspace($body, $offset){
1347     $me = 'sq_skipspace';
1348     preg_match('/^(\s*)/s', substr($body, $offset), $matches);
1349     if (sizeof($matches{1})){
1350         $count = strlen($matches{1});
1351         $offset += $count;
1352     }
1353     return $offset;
1354 }
1355
1356 /**
1357  * This function looks for the next character within a string.  It's
1358  * really just a glorified "strpos", except it catches if failures
1359  * nicely.
1360  *
1361  * @param  $body   The string to look for needle in.
1362  * @param  $offset Start looking from this position.
1363  * @param  $needle The character/string to look for.
1364  * @return         location of the next occurance of the needle, or
1365  *                 strlen($body) if needle wasn't found.
1366  */
1367 function sq_findnxstr($body, $offset, $needle){
1368     $me  = 'sq_findnxstr';
1369     $pos = strpos($body, $needle, $offset);
1370     if ($pos === FALSE){
1371         $pos = strlen($body);
1372     }
1373     return $pos;
1374 }
1375
1376 /**
1377  * This function takes a PCRE-style regexp and tries to match it
1378  * within the string.
1379  *
1380  * @param  $body   The string to look for needle in.
1381  * @param  $offset Start looking from here.
1382  * @param  $reg    A PCRE-style regex to match.
1383  * @return         Returns a false if no matches found, or an array
1384  *                 with the following members:
1385  *                 - integer with the location of the match within $body
1386  *                 - string with whatever content between offset and the match
1387  *                 - string with whatever it is we matched
1388  */
1389 function sq_findnxreg($body, $offset, $reg){
1390     $me = 'sq_findnxreg';
1391     $matches = Array();
1392     $retarr = Array();
1393     preg_match("%^(.*?)($reg)%si", substr($body, $offset), $matches);
1394     if (!isset($matches{0}) || !$matches{0}){
1395         $retarr = false;
1396     } else {
1397         $retarr{0} = $offset + strlen($matches{1});
1398         $retarr{1} = $matches{1};
1399         $retarr{2} = $matches{2};
1400     }
1401     return $retarr;
1402 }
1403
1404 /**
1405  * This function looks for the next tag.
1406  *
1407  * @param  $body   String where to look for the next tag.
1408  * @param  $offset Start looking from here.
1409  * @return         false if no more tags exist in the body, or
1410  *                 an array with the following members:
1411  *                 - string with the name of the tag
1412  *                 - array with attributes and their values
1413  *                 - integer with tag type (1, 2, or 3)
1414  *                 - integer where the tag starts (starting "<")
1415  *                 - integer where the tag ends (ending ">")
1416  *                 first three members will be false, if the tag is invalid.
1417  */
1418 function sq_getnxtag($body, $offset){
1419     $me = 'sq_getnxtag';
1420     if ($offset > strlen($body)){
1421         return false;
1422     }
1423     $lt = sq_findnxstr($body, $offset, "<");
1424     if ($lt == strlen($body)){
1425         return false;
1426     }
1427     /**
1428      * We are here:
1429      * blah blah <tag attribute="value">
1430      * \---------^
1431      */
1432     $pos = sq_skipspace($body, $lt+1);
1433     if ($pos >= strlen($body)){
1434         return Array(false, false, false, $lt, strlen($body));
1435     }
1436     /**
1437      * There are 3 kinds of tags:
1438      * 1. Opening tag, e.g.:
1439      *    <a href="blah">
1440      * 2. Closing tag, e.g.:
1441      *    </a>
1442      * 3. XHTML-style content-less tag, e.g.:
1443      *    <img src="blah" />
1444      */
1445     $tagtype = false;
1446     switch (substr($body, $pos, 1)){
1447         case '/':
1448             $tagtype = 2;
1449             $pos++;
1450             break;
1451         case '!':
1452             /**
1453              * A comment or an SGML declaration.
1454              */
1455             if (substr($body, $pos+1, 2) == "--"){
1456                 $gt = strpos($body, "-->", $pos);
1457                 if ($gt === false){
1458                     $gt = strlen($body);
1459                 } else {
1460                     $gt += 2;
1461                 }
1462                 return Array(false, false, false, $lt, $gt);
1463             } else {
1464                 $gt = sq_findnxstr($body, $pos, ">");
1465                 return Array(false, false, false, $lt, $gt);
1466             }
1467             break;
1468         default:
1469             /**
1470              * Assume tagtype 1 for now. If it's type 3, we'll switch values
1471              * later.
1472              */
1473             $tagtype = 1;
1474             break;
1475     }
1476
1477     $tag_start = $pos;
1478     $tagname = '';
1479     /**
1480      * Look for next [\W-_], which will indicate the end of the tag name.
1481      */
1482     $regary = sq_findnxreg($body, $pos, "[^\w\-_]");
1483     if ($regary == false){
1484         return Array(false, false, false, $lt, strlen($body));
1485     }
1486     list($pos, $tagname, $match) = $regary;
1487     $tagname = strtolower($tagname);
1488
1489     /**
1490      * $match can be either of these:
1491      * '>'  indicating the end of the tag entirely.
1492      * '\s' indicating the end of the tag name.
1493      * '/'  indicating that this is type-3 xhtml tag.
1494      *
1495      * Whatever else we find there indicates an invalid tag.
1496      */
1497     switch ($match){
1498         case '/':
1499             /**
1500              * This is an xhtml-style tag with a closing / at the
1501              * end, like so: <img src="blah" />. Check if it's followed
1502              * by the closing bracket. If not, then this tag is invalid
1503              */
1504             if (substr($body, $pos, 2) == "/>"){
1505                 $pos++;
1506                 $tagtype = 3;
1507             } else {
1508                 $gt = sq_findnxstr($body, $pos, ">");
1509                 $retary = Array(false, false, false, $lt, $gt);
1510                 return $retary;
1511             }
1512         case '>':
1513             return Array($tagname, false, $tagtype, $lt, $pos);
1514             break;
1515         default:
1516             /**
1517              * Check if it's whitespace
1518              */
1519             if (!preg_match('/\s/', $match)){
1520                 /**
1521                  * This is an invalid tag! Look for the next closing ">".
1522                  */
1523                 $gt = sq_findnxstr($body, $lt, ">");
1524                 return Array(false, false, false, $lt, $gt);
1525             }
1526             break;
1527     }
1528
1529     /**
1530      * At this point we're here:
1531      * <tagname  attribute='blah'>
1532      * \-------^
1533      *
1534      * At this point we loop in order to find all attributes.
1535      */
1536     $attname = '';
1537     $atttype = false;
1538     $attary = Array();
1539
1540     while ($pos <= strlen($body)){
1541         $pos = sq_skipspace($body, $pos);
1542         if ($pos == strlen($body)){
1543             /**
1544              * Non-closed tag.
1545              */
1546             return Array(false, false, false, $lt, $pos);
1547         }
1548         /**
1549          * See if we arrived at a ">" or "/>", which means that we reached
1550          * the end of the tag.
1551          */
1552         $matches = Array();
1553         if (preg_match("%^(\s*)(>|/>)%s", substr($body, $pos), $matches)) {
1554             /**
1555              * Yep. So we did.
1556              */
1557             $pos += strlen($matches{1});
1558             if ($matches{2} == "/>"){
1559                 $tagtype = 3;
1560                 $pos++;
1561             }
1562             return Array($tagname, $attary, $tagtype, $lt, $pos);
1563         }
1564
1565         /**
1566          * There are several types of attributes, with optional
1567          * [:space:] between members.
1568          * Type 1:
1569          *   attrname[:space:]=[:space:]'CDATA'
1570          * Type 2:
1571          *   attrname[:space:]=[:space:]"CDATA"
1572          * Type 3:
1573          *   attr[:space:]=[:space:]CDATA
1574          * Type 4:
1575          *   attrname
1576          *
1577          * We leave types 1 and 2 the same, type 3 we check for
1578          * '"' and convert to "&quot" if needed, then wrap in
1579          * double quotes. Type 4 we convert into:
1580          * attrname="yes".
1581          */
1582         $regary = sq_findnxreg($body, $pos, "[^:\w\-_]");
1583         if ($regary == false){
1584             /**
1585              * Looks like body ended before the end of tag.
1586              */
1587             return Array(false, false, false, $lt, strlen($body));
1588         }
1589         list($pos, $attname, $match) = $regary;
1590         $attname = strtolower($attname);
1591         /**
1592          * We arrived at the end of attribute name. Several things possible
1593          * here:
1594          * '>'  means the end of the tag and this is attribute type 4
1595          * '/'  if followed by '>' means the same thing as above
1596          * '\s' means a lot of things -- look what it's followed by.
1597          *      anything else means the attribute is invalid.
1598          */
1599         switch($match){
1600             case '/':
1601                 /**
1602                  * This is an xhtml-style tag with a closing / at the
1603                  * end, like so: <img src="blah" />. Check if it's followed
1604                  * by the closing bracket. If not, then this tag is invalid
1605                  */
1606                 if (substr($body, $pos, 2) == "/>"){
1607                     $pos++;
1608                     $tagtype = 3;
1609                 } else {
1610                     $gt = sq_findnxstr($body, $pos, ">");
1611                     $retary = Array(false, false, false, $lt, $gt);
1612                     return $retary;
1613                 }
1614             case '>':
1615                 $attary{$attname} = '"yes"';
1616                 return Array($tagname, $attary, $tagtype, $lt, $pos);
1617                 break;
1618             default:
1619                 /**
1620                  * Skip whitespace and see what we arrive at.
1621                  */
1622                 $pos = sq_skipspace($body, $pos);
1623                 $char = substr($body, $pos, 1);
1624                 /**
1625                  * Two things are valid here:
1626                  * '=' means this is attribute type 1 2 or 3.
1627                  * \w means this was attribute type 4.
1628                  * anything else we ignore and re-loop. End of tag and
1629                  * invalid stuff will be caught by our checks at the beginning
1630                  * of the loop.
1631                  */
1632                 if ($char == "="){
1633                     $pos++;
1634                     $pos = sq_skipspace($body, $pos);
1635                     /**
1636                      * Here are 3 possibilities:
1637                      * "'"  attribute type 1
1638                      * '"'  attribute type 2
1639                      * everything else is the content of tag type 3
1640                      */
1641                     $quot = substr($body, $pos, 1);
1642                     if ($quot == "'"){
1643                         $regary = sq_findnxreg($body, $pos+1, "\'");
1644                         if ($regary == false){
1645                             return Array(false, false, false, $lt, strlen($body));
1646                         }
1647                         list($pos, $attval, $match) = $regary;
1648                         $pos++;
1649                         $attary{$attname} = "'" . $attval . "'";
1650                     } else if ($quot == '"'){
1651                         $regary = sq_findnxreg($body, $pos+1, '\"');
1652                         if ($regary == false){
1653                             return Array(false, false, false, $lt, strlen($body));
1654                         }
1655                         list($pos, $attval, $match) = $regary;
1656                         $pos++;
1657                         $attary{$attname} = '"' . $attval . '"';
1658                     } else {
1659                         /**
1660                          * These are hateful. Look for \s, or >.
1661                          */
1662                         $regary = sq_findnxreg($body, $pos, "[\s>]");
1663                         if ($regary == false){
1664                             return Array(false, false, false, $lt, strlen($body));
1665                         }
1666                         list($pos, $attval, $match) = $regary;
1667                         /**
1668                          * If it's ">" it will be caught at the top.
1669                          */
1670                         $attval = preg_replace("/\"/s", "&quot;", $attval);
1671                         $attary{$attname} = '"' . $attval . '"';
1672                     }
1673                 } else if (preg_match("|[\w/>]|", $char)) {
1674                     /**
1675                      * That was attribute type 4.
1676                      */
1677                     $attary{$attname} = '"yes"';
1678                 } else {
1679                     /**
1680                      * An illegal character. Find next '>' and return.
1681                      */
1682                     $gt = sq_findnxstr($body, $pos, ">");
1683                     return Array(false, false, false, $lt, $gt);
1684                 }
1685                 break;
1686         }
1687     }
1688     /**
1689      * The fact that we got here indicates that the tag end was never
1690      * found. Return invalid tag indication so it gets stripped.
1691      */
1692     return Array(false, false, false, $lt, strlen($body));
1693 }
1694
1695 /**
1696  * Translates entities into literal values so they can be checked.
1697  *
1698  * @param $attvalue the by-ref value to check.
1699  * @param $regex    the regular expression to check against.
1700  * @param $hex      whether the entites are hexadecimal.
1701  * @return          True or False depending on whether there were matches.
1702  */
1703 function sq_deent(&$attvalue, $regex, $hex=false){
1704     $me = 'sq_deent';
1705     $ret_match = false;
1706     // remove comments
1707     //$attvalue = preg_replace("/(\/\*.*\*\/)/","",$attvalue);
1708     preg_match_all($regex, $attvalue, $matches);
1709     if (is_array($matches) && sizeof($matches[0]) > 0){
1710         $repl = Array();
1711         for ($i = 0; $i < sizeof($matches[0]); $i++){
1712             $numval = $matches[1][$i];
1713             if ($hex){
1714                 $numval = hexdec($numval);
1715             }
1716             $repl{$matches[0][$i]} = chr($numval);
1717         }
1718         $attvalue = strtr($attvalue, $repl);
1719         return true;
1720     } else {
1721         return false;
1722     }
1723 }
1724
1725 /**
1726  * This function runs various checks against the attributes.
1727  *
1728  * @param  $tagname         String with the name of the tag.
1729  * @param  $attary          Array with all tag attributes.
1730  * @param  $rm_attnames     See description for sq_sanitize
1731  * @param  $bad_attvals     See description for sq_sanitize
1732  * @param  $add_attr_to_tag See description for sq_sanitize
1733  * @param  $message         message object
1734  * @param  $id              message id
1735  * @return                  Array with modified attributes.
1736  */
1737 function sq_fixatts($tagname,
1738                     $attary,
1739                     $rm_attnames,
1740                     $bad_attvals,
1741                     $add_attr_to_tag,
1742                     $message,
1743                     $id,
1744                     $mailbox
1745                     ){
1746     $me = 'sq_fixatts';
1747     while (list($attname, $attvalue) = each($attary)){
1748         /**
1749          * See if this attribute should be removed.
1750          */
1751         foreach ($rm_attnames as $matchtag=>$matchattrs){
1752             if (preg_match($matchtag, $tagname)){
1753                 foreach ($matchattrs as $matchattr){
1754                     if (preg_match($matchattr, $attname)){
1755                         unset($attary{$attname});
1756                         continue;
1757                     }
1758                 }
1759             }
1760         }
1761         /**
1762          * Workaround for IE quirks
1763          */
1764         sq_fixIE_idiocy($attvalue);
1765
1766         /**
1767          * Remove any backslashes, entities, and extraneous whitespace.
1768          */
1769
1770         $oldattvalue = $attvalue;
1771         sq_defang($attvalue);
1772         if ($attname == 'style' && $attvalue !== $oldattvalue) {
1773             // entities are used in the attribute value. In 99% of the cases it's there as XSS
1774             // i.e.<div style="{ left:exp&#x0280;essio&#x0274;( alert('XSS') ) }">
1775             $attvalue = "idiocy";
1776             $attary{$attname} = $attvalue;
1777         }
1778         sq_unspace($attvalue);
1779
1780         /**
1781          * Now let's run checks on the attvalues.
1782          * I don't expect anyone to comprehend this. If you do,
1783          * get in touch with me so I can drive to where you live and
1784          * shake your hand personally. :)
1785          */
1786         foreach ($bad_attvals as $matchtag=>$matchattrs){
1787             if (preg_match($matchtag, $tagname)){
1788                 foreach ($matchattrs as $matchattr=>$valary){
1789                     if (preg_match($matchattr, $attname)){
1790                         /**
1791                          * There are two arrays in valary.
1792                          * First is matches.
1793                          * Second one is replacements
1794                          */
1795                         list($valmatch, $valrepl) = $valary;
1796                         $newvalue =
1797                             preg_replace($valmatch, $valrepl, $attvalue);
1798                         if ($newvalue != $attvalue){
1799                             $attary{$attname} = $newvalue;
1800                             $attvalue = $newvalue;
1801                         }
1802                     }
1803                 }
1804             }
1805         }
1806         if ($attname == 'style') {
1807             if (preg_match('/[\0-\37\200-\377]+/',$attvalue)) {
1808                 // 8bit and control characters in style attribute values can be used for XSS, remove them
1809                 $attary{$attname} = '"disallowed character"';
1810             }
1811             preg_match_all("/url\s*\((.+)\)/si",$attvalue,$aMatch);
1812             if (count($aMatch)) {
1813                 foreach($aMatch[1] as $sMatch) {
1814                     // url value
1815                     $urlvalue = $sMatch;
1816                     sq_fix_url($attname, $urlvalue, $message, $id, $mailbox,"'");
1817                     $attary{$attname} = str_replace($sMatch,$urlvalue,$attvalue);
1818                 }
1819             }
1820         }
1821         /**
1822          * Use white list based filtering on attributes which can contain url's
1823          */
1824         else if ($attname == 'href' || $attname == 'src' || $attname == 'background') {
1825             sq_fix_url($attname, $attvalue, $message, $id, $mailbox);
1826             $attary{$attname} = $attvalue;
1827         }
1828     }
1829     /**
1830      * See if we need to append any attributes to this tag.
1831      */
1832     foreach ($add_attr_to_tag as $matchtag=>$addattary){
1833         if (preg_match($matchtag, $tagname)){
1834             $attary = array_merge($attary, $addattary);
1835         }
1836     }
1837     return $attary;
1838 }
1839
1840 /**
1841  * This function filters url's
1842  *
1843  * @param  $attvalue        String with attribute value to filter
1844  * @param  $message         message object
1845  * @param  $id               message id
1846  * @param  $mailbox         mailbox
1847  * @param  $sQuote          quoting characters around url's
1848  */
1849 function sq_fix_url($attname, &$attvalue, $message, $id, $mailbox,$sQuote = '"') {
1850     $attvalue = trim($attvalue);
1851     if ($attvalue && ($attvalue[0] =='"'|| $attvalue[0] == "'")) {
1852         // remove the double quotes
1853         $sQuote = $attvalue[0];
1854         $attvalue = trim(substr($attvalue,1,-1));
1855     }
1856
1857     // If there's no "view_unsafe_images" setting in the user's preferences,
1858     // turn unsafe images off by default.
1859     if( !sqgetGlobalVar('view_unsafe_images', $view_unsafe_images, SQ_GET) ) {
1860         $view_unsafe_images = false;
1861     }
1862     $secremoveimg = '../images/' . _("sec_remove_eng.png");
1863
1864     /**
1865      * Replace empty src tags with the blank image.  src is only used
1866      * for frames, images, and image inputs.  Doing a replace should
1867      * not affect them working as should be, however it will stop
1868      * IE from being kicked off when src for img tags are not set
1869      */
1870     if ($attvalue == '') {
1871         $attvalue = '"' . SM_PATH . 'images/blank.png"';
1872     } else {
1873         // first, disallow 8 bit characters and control characters
1874         if (preg_match('/[\0-\37\200-\377]+/',$attvalue)) {
1875             switch ($attname) {
1876                 case 'href':
1877                     $attvalue = $sQuote . 'http://invalid-stuff-detected.example.com' . $sQuote;
1878                     break;
1879                 default:
1880                     $attvalue = $sQuote . SM_PATH . 'images/blank.png'. $sQuote;
1881                     break;
1882             }
1883         } else {
1884             $aUrl = parse_url($attvalue);
1885             if (isset($aUrl['scheme'])) {
1886                 switch(strtolower($aUrl['scheme'])) {
1887                     case 'mailto':
1888                     case 'http':
1889                     case 'https':
1890                     case 'ftp':
1891                         if ($attname != 'href') {
1892                             if ($view_unsafe_images == false) {
1893                                 $attvalue = $sQuote . $secremoveimg . $sQuote;
1894                             } else {
1895                                 if (isset($aUrl['path'])) {
1896                                     // validate image extension.
1897                                     $ext = strtolower(substr($aUrl['path'],strrpos($aUrl['path'],'.')));
1898                                     if (!in_array($ext,array('.jpeg','.jpg','xjpeg','.gif','.bmp','.jpe','.png','.xbm'))) {
1899                                         $attvalue = $sQuote . SM_PATH . 'images/blank.png'. $sQuote;
1900                                     }
1901                                 } else {
1902                                     $attvalue = $sQuote . SM_PATH . 'images/blank.png'. $sQuote;
1903                                 }
1904                             }
1905                         }
1906                         break;
1907                     case 'outbind':
1908                         /**
1909                          * "Hack" fix for Outlook using propriatary outbind:// protocol in img tags.
1910                          * One day MS might actually make it match something useful, for now, falling
1911                          * back to using cid2http, so we can grab the blank.png.
1912                          */
1913                         $attvalue = sq_cid2http($message, $id, $attvalue, $mailbox);
1914                         break;
1915                     case 'cid':
1916                         /**
1917                             * Turn cid: urls into http-friendly ones.
1918                             */
1919                         $attvalue = sq_cid2http($message, $id, $attvalue, $mailbox);
1920                         break;
1921                     default:
1922                         $attvalue = $sQuote . SM_PATH . 'images/blank.png' . $sQuote;
1923                         break;
1924                 }
1925             } else {
1926                 if (!(isset($aUrl['path']) && $aUrl['path'] == $secremoveimg)) {
1927                     // parse_url did not lead to satisfying result
1928                     $attvalue = $sQuote . SM_PATH . 'images/blank.png' . $sQuote;
1929                 }
1930             }
1931         }
1932     }
1933 }
1934
1935 /**
1936  * This function edits the style definition to make them friendly and
1937  * usable in SquirrelMail.
1938  *
1939  * @param  $message  the message object
1940  * @param  $id       the message id
1941  * @param  $content  a string with whatever is between <style> and </style>
1942  * @param  $mailbox  the message mailbox
1943  * @return           a string with edited content.
1944  */
1945 function sq_fixstyle($body, $pos, $message, $id, $mailbox){
1946     $me = 'sq_fixstyle';
1947     // workaround for </style> in between comments
1948     $iCurrentPos = $pos;
1949     $content = '';
1950     $sToken = '';
1951     $bSucces = false;
1952     $bEndTag = false;
1953     for ($i=$pos,$iCount=strlen($body);$i<$iCount;++$i) {
1954         $char = $body{$i};
1955         switch ($char) {
1956             case '<':
1957                 $sToken = $char;
1958                 break;
1959             case '/':
1960                  if ($sToken == '<') {
1961                     $sToken .= $char;
1962                     $bEndTag = true;
1963                  } else {
1964                     $content .= $char;
1965                  }
1966                  break;
1967             case '>':
1968                  if ($bEndTag) {
1969                     $sToken .= $char;
1970                     if (preg_match('/\<\/\s*style\s*\>/i',$sToken,$aMatch)) {
1971                         $newpos = $i + 1;
1972                         $bSucces = true;
1973                         break 2;
1974                     } else {
1975                         $content .= $sToken;
1976                     }
1977                     $bEndTag = false;
1978                  } else {
1979                     $content .= $char;
1980                  }
1981                  break;
1982             case '!':
1983                 if ($sToken == '<') {
1984                     // possible comment
1985                     if (isset($body{$i+2}) && substr($body,$i,3) == '!--') {
1986                         $i = strpos($body,'-->',$i+3);
1987                         if ($i === false) { // no end comment
1988                             $i = strlen($body);
1989                         }
1990                         $sToken = '';
1991                     }
1992                 } else {
1993                     $content .= $char;
1994                 }
1995                 break;
1996             default:
1997                 if ($bEndTag) {
1998                     $sToken .= $char;
1999                 } else {
2000                     $content .= $char;
2001                 }
2002                 break;
2003         }
2004     }
2005     if ($bSucces == FALSE){
2006         return array(FALSE, strlen($body));
2007     }
2008
2009
2010
2011     /**
2012      * First look for general BODY style declaration, which would be
2013      * like so:
2014      * body {background: blah-blah}
2015      * and change it to .bodyclass so we can just assign it to a <div>
2016      */
2017     $content = preg_replace("|body(\s*\{.*?\})|si", ".bodyclass\\1", $content);
2018     $secremoveimg = '../images/' . _("sec_remove_eng.png");
2019     /**
2020     * Fix url('blah') declarations.
2021     */
2022     //   $content = preg_replace("|url\s*\(\s*([\'\"])\s*\S+script\s*:.*?([\'\"])\s*\)|si",
2023     //                           "url(\\1$secremoveimg\\2)", $content);
2024
2025     // first check for 8bit sequences and disallowed control characters
2026     if (preg_match('/[\16-\37\200-\377]+/',$content)) {
2027         $content = '<!-- style block removed by html filter due to presence of 8bit characters -->';
2028         return array($content, $newpos);
2029     }
2030
2031     // IE Sucks hard. We have a special function for it.
2032     sq_fixIE_idiocy($content);
2033
2034     // remove @import line
2035     $content = preg_replace("/^\s*(@import.*)$/mi","\n<!-- @import rules forbidden -->\n",$content);
2036
2037     // translate ur\l and variations (IE parses that)
2038     // TODO check if the sq_fixIE_idiocy function already handles this.
2039     $content = preg_replace("/(\\\\)?u(\\\\)?r(\\\\)?l(\\\\)?/i", 'url', $content);
2040     preg_match_all("/url\s*\((.+)\)/si",$content,$aMatch);
2041     if (count($aMatch)) {
2042         $aValue = $aReplace = array();
2043         foreach($aMatch[1] as $sMatch) {
2044             // url value
2045             $urlvalue = $sMatch;
2046             sq_fix_url('style',$urlvalue, $message, $id, $mailbox,"'");
2047             $aValue[] = $sMatch;
2048             $aReplace[] = $urlvalue;
2049         }
2050         $content = str_replace($aValue,$aReplace,$content);
2051     }
2052
2053     /**
2054      * Remove any backslashes, entities, and extraneous whitespace.
2055      */
2056     $contentTemp = $content;
2057     sq_defang($contentTemp);
2058     sq_unspace($contentTemp);
2059
2060     /**
2061      * Fix stupid css declarations which lead to vulnerabilities
2062      * in IE.
2063      */
2064     $match   = Array('/\/\*.*\*\//',
2065                     '/expression/i',
2066                     '/behaviou*r/i',
2067                     '/binding/i',
2068                     '/include-source/i',
2069                     '/javascript/i',
2070                     '/script/i');
2071     $replace = Array('','idiocy', 'idiocy', 'idiocy', 'idiocy', 'idiocy', 'idiocy');
2072     $contentNew = preg_replace($match, $replace, $contentTemp);
2073     if ($contentNew !== $contentTemp) {
2074         // insecure css declarations are used. From now on we don't care
2075         // anymore if the css is destroyed by sq_deent, sq_unspace or sq_unbackslash
2076         $content = $contentNew;
2077     }
2078     return array($content, $newpos);
2079 }
2080
2081
2082 /**
2083  * This function converts cid: url's into the ones that can be viewed in
2084  * the browser.
2085  *
2086  * @param  $message  the message object
2087  * @param  $id       the message id
2088  * @param  $cidurl   the cid: url.
2089  * @param  $mailbox  the message mailbox
2090  * @return           a string with a http-friendly url
2091  */
2092 function sq_cid2http($message, $id, $cidurl, $mailbox){
2093     /**
2094      * Get rid of quotes.
2095      */
2096     $quotchar = substr($cidurl, 0, 1);
2097     if ($quotchar == '"' || $quotchar == "'"){
2098         $cidurl = str_replace($quotchar, "", $cidurl);
2099     } else {
2100         $quotchar = '';
2101     }
2102     $cidurl = substr(trim($cidurl), 4);
2103
2104     $match_str = '/\{.*?\}\//';
2105     $str_rep = '';
2106     $cidurl = preg_replace($match_str, $str_rep, $cidurl);
2107
2108     $linkurl = find_ent_id($cidurl, $message);
2109     /* in case of non-safe cid links $httpurl should be replaced by a sort of
2110        unsafe link image */
2111     $httpurl = '';
2112
2113     /**
2114      * This is part of a fix for Outlook Express 6.x generating
2115      * cid URLs without creating content-id headers. These images are
2116      * not part of the multipart/related html mail. The html contains
2117      * <img src="cid:{some_id}/image_filename.ext"> references to
2118      * attached images with as goal to render them inline although
2119      * the attachment disposition property is not inline.
2120      */
2121
2122     if (empty($linkurl)) {
2123         if (preg_match('/{.*}\//', $cidurl)) {
2124             $cidurl = preg_replace('/{.*}\//','', $cidurl);
2125             if (!empty($cidurl)) {
2126                 $linkurl = find_ent_id($cidurl, $message);
2127             }
2128         }
2129     }
2130
2131     if (!empty($linkurl)) {
2132         $httpurl = $quotchar . SM_PATH . 'src/download.php?absolute_dl=true&amp;' .
2133             "passed_id=$id&amp;mailbox=" . urlencode($mailbox) .
2134             '&amp;ent_id=' . $linkurl . $quotchar;
2135     } else {
2136         /**
2137          * If we couldn't generate a proper img url, drop in a blank image
2138          * instead of sending back empty, otherwise it causes unusual behaviour
2139          */
2140         $httpurl = $quotchar . SM_PATH . 'images/blank.png' . $quotchar;
2141     }
2142
2143     return $httpurl;
2144 }
2145
2146 /**
2147  * This function changes the <body> tag into a <div> tag since we
2148  * can't really have a body-within-body.
2149  *
2150  * @param  $attary   an array of attributes and values of <body>
2151  * @param  $mailbox  mailbox we're currently reading (for cid2http)
2152  * @param  $message  current message (for cid2http)
2153  * @param  $id       current message id (for cid2http)
2154  * @return           a modified array of attributes to be set for <div>
2155  */
2156 function sq_body2div($attary, $mailbox, $message, $id){
2157     $me = 'sq_body2div';
2158     $divattary = Array('class' => "'bodyclass'");
2159     $text = '#000000';
2160     $has_bgc_stl = $has_txt_stl = false;
2161     $styledef = '';
2162     if (is_array($attary) && sizeof($attary) > 0){
2163         foreach ($attary as $attname=>$attvalue){
2164             $quotchar = substr($attvalue, 0, 1);
2165             $attvalue = str_replace($quotchar, "", $attvalue);
2166             switch ($attname){
2167                 case 'background':
2168                     $attvalue = sq_cid2http($message, $id, $attvalue, $mailbox);
2169                     $styledef .= "background-image: url('$attvalue'); ";
2170                     break;
2171                 case 'bgcolor':
2172                     $has_bgc_stl = true;
2173                     $styledef .= "background-color: $attvalue; ";
2174                     break;
2175                 case 'text':
2176                     $has_txt_stl = true;
2177                     $styledef .= "color: $attvalue; ";
2178                     break;
2179             }
2180         }
2181         // Outlook defines a white bgcolor and no text color. This can lead to
2182         // white text on a white bg with certain themes.
2183         if ($has_bgc_stl && !$has_txt_stl) {
2184             $styledef .= "color: $text; ";
2185         }
2186         if (strlen($styledef) > 0){
2187             $divattary{"style"} = "\"$styledef\"";
2188         }
2189     }
2190     return $divattary;
2191 }
2192
2193 /**
2194  * This is the main function and the one you should actually be calling.
2195  * There are several variables you should be aware of an which need
2196  * special description.
2197  *
2198  * Since the description is quite lengthy, see it here:
2199  * http://linux.duke.edu/projects/mini/htmlfilter/
2200  *
2201  * @param $body                 the string with HTML you wish to filter
2202  * @param $tag_list             see description above
2203  * @param $rm_tags_with_content see description above
2204  * @param $self_closing_tags    see description above
2205  * @param $force_tag_closing    see description above
2206  * @param $rm_attnames          see description above
2207  * @param $bad_attvals          see description above
2208  * @param $add_attr_to_tag      see description above
2209  * @param $message              message object
2210  * @param $id                   message id
2211  * @return                      sanitized html safe to show on your pages.
2212  */
2213 function sq_sanitize($body,
2214                      $tag_list,
2215                      $rm_tags_with_content,
2216                      $self_closing_tags,
2217                      $force_tag_closing,
2218                      $rm_attnames,
2219                      $bad_attvals,
2220                      $add_attr_to_tag,
2221                      $message,
2222                      $id,
2223                      $mailbox
2224                      ){
2225     $me = 'sq_sanitize';
2226     $rm_tags = array_shift($tag_list);
2227     /**
2228      * Normalize rm_tags and rm_tags_with_content.
2229      */
2230     @array_walk($tag_list, 'sq_casenormalize');
2231     @array_walk($rm_tags_with_content, 'sq_casenormalize');
2232     @array_walk($self_closing_tags, 'sq_casenormalize');
2233     /**
2234      * See if tag_list is of tags to remove or tags to allow.
2235      * false  means remove these tags
2236      * true   means allow these tags
2237      */
2238     $curpos = 0;
2239     $open_tags = Array();
2240     $trusted = "\n<!-- begin sanitized html -->\n";
2241     $skip_content = false;
2242     /**
2243      * Take care of netscape's stupid javascript entities like
2244      * &{alert('boo')};
2245      */
2246     $body = preg_replace("/&(\{.*?\};)/si", "&amp;\\1", $body);
2247
2248     while (($curtag = sq_getnxtag($body, $curpos)) != FALSE){
2249         list($tagname, $attary, $tagtype, $lt, $gt) = $curtag;
2250         $free_content = substr($body, $curpos, $lt-$curpos);
2251         /**
2252          * Take care of <style>
2253          */
2254         if ($tagname == "style" && $tagtype == 1){
2255             list($free_content, $curpos) =
2256                 sq_fixstyle($body, $gt+1, $message, $id, $mailbox);
2257             if ($free_content != FALSE){
2258                 $trusted .= sq_tagprint($tagname, $attary, $tagtype);
2259                 $trusted .= $free_content;
2260                 $trusted .= sq_tagprint($tagname, false, 2);
2261             }
2262             continue;
2263         }
2264         if ($skip_content == false){
2265             $trusted .= $free_content;
2266         }
2267         if ($tagname != FALSE){
2268             if ($tagtype == 2){
2269                 if ($skip_content == $tagname){
2270                     /**
2271                      * Got to the end of tag we needed to remove.
2272                      */
2273                     $tagname = false;
2274                     $skip_content = false;
2275                 } else {
2276                     if ($skip_content == false){
2277                         if ($tagname == "body"){
2278                             $tagname = "div";
2279                         }
2280                         if (isset($open_tags{$tagname}) &&
2281                                 $open_tags{$tagname} > 0){
2282                             $open_tags{$tagname}--;
2283                         } else {
2284                             $tagname = false;
2285                         }
2286                     }
2287                 }
2288             } else {
2289                 /**
2290                  * $rm_tags_with_content
2291                  */
2292                 if ($skip_content == false){
2293                     /**
2294                      * See if this is a self-closing type and change
2295                      * tagtype appropriately.
2296                      */
2297                     if ($tagtype == 1
2298                             && in_array($tagname, $self_closing_tags)){
2299                         $tagtype = 3;
2300                     }
2301                     /**
2302                      * See if we should skip this tag and any content
2303                      * inside it.
2304                      */
2305                     if ($tagtype == 1 &&
2306                             in_array($tagname, $rm_tags_with_content)){
2307                         $skip_content = $tagname;
2308                     } else {
2309                         if (($rm_tags == false
2310                                     && in_array($tagname, $tag_list)) ||
2311                                 ($rm_tags == true &&
2312                                  !in_array($tagname, $tag_list))){
2313                             $tagname = false;
2314                         } else {
2315                             /**
2316                              * Convert body into div.
2317                              */
2318                             if ($tagname == "body"){
2319                                 $tagname = "div";
2320                                 $attary = sq_body2div($attary, $mailbox,
2321                                         $message, $id);
2322                             }
2323                             if ($tagtype == 1){
2324                                 if (isset($open_tags{$tagname})){
2325                                     $open_tags{$tagname}++;
2326                                 } else {
2327                                     $open_tags{$tagname}=1;
2328                                 }
2329                             }
2330                             /**
2331                              * This is where we run other checks.
2332                              */
2333                             if (is_array($attary) && sizeof($attary) > 0){
2334                                 $attary = sq_fixatts($tagname,
2335                                                      $attary,
2336                                                      $rm_attnames,
2337                                                      $bad_attvals,
2338                                                      $add_attr_to_tag,
2339                                                      $message,
2340                                                      $id,
2341                                                      $mailbox
2342                                                      );
2343                             }
2344                         }
2345                     }
2346                 }
2347             }
2348             if ($tagname != false && $skip_content == false){
2349                 $trusted .= sq_tagprint($tagname, $attary, $tagtype);
2350             }
2351         }
2352         $curpos = $gt+1;
2353     }
2354     $trusted .= substr($body, $curpos, strlen($body)-$curpos);
2355     if ($force_tag_closing == true){
2356         foreach ($open_tags as $tagname=>$opentimes){
2357             while ($opentimes > 0){
2358                 $trusted .= '</' . $tagname . '>';
2359                 $opentimes--;
2360             }
2361         }
2362         $trusted .= "\n";
2363     }
2364     $trusted .= "<!-- end sanitized html -->\n";
2365     return $trusted;
2366 }
2367
2368 /**
2369  * This is a wrapper function to call html sanitizing routines.
2370  *
2371  * @param  $body  the body of the message
2372  * @param  $id    the id of the message
2373
2374  * @param  $message
2375  * @param  $mailbox
2376  * @param  boolean $take_mailto_links When TRUE, converts mailto: links
2377  *                                    into internal SM compose links
2378  *                                    (optional; default = TRUE)
2379  * @return        a string with html safe to display in the browser.
2380  */
2381 function magicHTML($body, $id, $message, $mailbox = 'INBOX', $take_mailto_links =true) {
2382
2383     // require_once(SM_PATH . 'functions/url_parser.php');  // for $MailTo_PReg_Match
2384
2385     global $attachment_common_show_images, $view_unsafe_images,
2386            $has_unsafe_images;
2387     /**
2388      * Don't display attached images in HTML mode.
2389      *
2390      * SB: why?
2391      */
2392     $attachment_common_show_images = false;
2393     $tag_list = Array(
2394             false,
2395             "object",
2396             "meta",
2397             "html",
2398             "head",
2399             "base",
2400             "link",
2401             "frame",
2402             "iframe",
2403             "plaintext",
2404             "marquee"
2405             );
2406
2407     $rm_tags_with_content = Array(
2408             "script",
2409             "applet",
2410             "embed",
2411             "title",
2412             "frameset",
2413             "xmp",
2414             "xml"
2415             );
2416
2417     $self_closing_tags =  Array(
2418             "img",
2419             "br",
2420             "hr",
2421             "input",
2422             "outbind"
2423             );
2424
2425     $force_tag_closing = true;
2426
2427     $rm_attnames = Array(
2428             "/.*/" =>
2429             Array(
2430                 "/target/i",
2431                 "/^on.*/i",
2432                 "/^dynsrc/i",
2433                 "/^data.*/i",
2434                 "/^lowsrc.*/i"
2435                 )
2436             );
2437
2438     $secremoveimg = "../images/" . _("sec_remove_eng.png");
2439     $bad_attvals = Array(
2440             "/.*/" =>
2441             Array(
2442                 "/^src|background/i" =>
2443                 Array(
2444                     Array(
2445                         "/^([\'\"])\s*\S+script\s*:.*([\'\"])/si",
2446                         "/^([\'\"])\s*mocha\s*:*.*([\'\"])/si",
2447                         "/^([\'\"])\s*about\s*:.*([\'\"])/si"
2448                         ),
2449                     Array(
2450                         "\\1$secremoveimg\\2",
2451                         "\\1$secremoveimg\\2",
2452                         "\\1$secremoveimg\\2",
2453                         )
2454                     ),
2455                 "/^href|action/i" =>
2456                 Array(
2457                     Array(
2458                         "/^([\'\"])\s*\S+script\s*:.*([\'\"])/si",
2459                         "/^([\'\"])\s*mocha\s*:*.*([\'\"])/si",
2460                         "/^([\'\"])\s*about\s*:.*([\'\"])/si"
2461                         ),
2462                     Array(
2463                         "\\1#\\1",
2464                         "\\1#\\1",
2465                         "\\1#\\1"
2466                         )
2467                     ),
2468         "/^style/i" =>
2469             Array(
2470                 Array(
2471                     "/\/\*.*\*\//",
2472                     "/expression/i",
2473                     "/binding/i",
2474                     "/behaviou*r/i",
2475                     "/include-source/i",
2476                     "/position\s*:\s*absolute/i",
2477                     "/(\\\\)?u(\\\\)?r(\\\\)?l(\\\\)?/i",
2478                     "/url\s*\(\s*([\'\"])\s*\S+script\s*:.*([\'\"])\s*\)/si",
2479                     "/url\s*\(\s*([\'\"])\s*mocha\s*:.*([\'\"])\s*\)/si",
2480                     "/url\s*\(\s*([\'\"])\s*about\s*:.*([\'\"])\s*\)/si",
2481                     "/(.*)\s*:\s*url\s*\(\s*([\'\"]*)\s*\S+script\s*:.*([\'\"]*)\s*\)/si"
2482                     ),
2483                 Array(
2484                     "",
2485                     "idiocy",
2486                     "idiocy",
2487                     "idiocy",
2488                     "idiocy",
2489                     "idiocy",
2490                     "url",
2491                     "url(\\1#\\1)",
2492                     "url(\\1#\\1)",
2493                     "url(\\1#\\1)",
2494                     "\\1:url(\\2#\\3)"
2495                     )
2496                 )
2497             )
2498         );
2499
2500     // If there's no "view_unsafe_images" setting in the user's preferences,
2501     // turn unsafe images off by default.
2502     if( !sqgetGlobalVar('view_unsafe_images', $view_unsafe_images, SQ_GET) ) {
2503         $view_unsafe_images = false;
2504     }
2505
2506     if (!$view_unsafe_images){
2507         /**
2508          * Remove any references to http/https if view_unsafe_images set
2509          * to false.
2510          */
2511         array_push($bad_attvals{'/.*/'}{'/^src|background/i'}[0],
2512                 '/^([\'\"])\s*https*:.*([\'\"])/si');
2513         array_push($bad_attvals{'/.*/'}{'/^src|background/i'}[1],
2514                 "\\1$secremoveimg\\1");
2515         array_push($bad_attvals{'/.*/'}{'/^style/i'}[0],
2516                 '/url\([\'\"]?https?:[^\)]*[\'\"]?\)/si');
2517         array_push($bad_attvals{'/.*/'}{'/^style/i'}[1],
2518                 "url(\\1$secremoveimg\\1)");
2519     }
2520
2521     $add_attr_to_tag = Array(
2522             "/^a$/i" =>
2523             Array('target'=>'"_blank"',
2524                 'title'=>'"'._("This external link will open in a new window").'"'
2525                 )
2526             );
2527     $trusted = sq_sanitize($body,
2528                            $tag_list,
2529                            $rm_tags_with_content,
2530                            $self_closing_tags,
2531                            $force_tag_closing,
2532                            $rm_attnames,
2533                            $bad_attvals,
2534                            $add_attr_to_tag,
2535                            $message,
2536                            $id,
2537                            $mailbox
2538                            );
2539     if (strpos($trusted,$secremoveimg)){
2540         $has_unsafe_images = true;
2541     }
2542
2543     // we want to parse mailto's in HTML output, change to SM compose links
2544     // this is a modified version of code from url_parser.php... but Marc is
2545     // right: we need a better filtering implementation; adding this randomly
2546     // here is not a great solution
2547     //
2548     if ($take_mailto_links) {
2549         // parseUrl($trusted);   // this even parses URLs inside of tags... too aggressive
2550         global $MailTo_PReg_Match;
2551         $MailTo_PReg_Match = '/mailto:' . substr($MailTo_PReg_Match, 1) ;
2552         if ((preg_match_all($MailTo_PReg_Match, $trusted, $regs)) && ($regs[0][0] != '')) {
2553             foreach ($regs[0] as $i => $mailto_before) {
2554                 $mailto_params = $regs[10][$i];
2555                 // get rid of any tailing quote since we have to add send_to to the end
2556                 //
2557                 if (substr($mailto_before, strlen($mailto_before) - 1) == '"')
2558                     $mailto_before = substr($mailto_before, 0, strlen($mailto_before) - 1);
2559                 if (substr($mailto_params, strlen($mailto_params) - 1) == '"')
2560                     $mailto_params = substr($mailto_params, 0, strlen($mailto_params) - 1);
2561
2562                 if ($regs[1][$i]) {    //if there is an email addr before '?', we need to merge it with the params
2563                     $to = 'to=' . $regs[1][$i];
2564                     if (strpos($mailto_params, 'to=') > -1)    //already a 'to='
2565                         $mailto_params = str_replace('to=', $to . '%2C%20', $mailto_params);
2566                     else {
2567                         if ($mailto_params)    //already some params, append to them
2568                             $mailto_params .= '&amp;' . $to;
2569                         else
2570                             $mailto_params .= '?' . $to;
2571                     }
2572                 }
2573
2574                 $url_str = preg_replace(array('/to=/i', '/(?<!b)cc=/i', '/bcc=/i'), array('send_to=', 'send_to_cc=', 'send_to_bcc='), $mailto_params);
2575
2576                 // we'll already have target=_blank, no need to allow comp_in_new
2577                 // here (which would be a lot more work anyway)
2578                 //
2579                 global $compose_new_win;
2580                 $temp_comp_in_new = $compose_new_win;
2581                 $compose_new_win = 0;
2582                 $comp_uri = makeComposeLink('src/compose.php' . $url_str, $mailto_before);
2583                 $compose_new_win = $temp_comp_in_new;
2584
2585                 // remove <a href=" and anything after the next quote (we only
2586                 // need the uri, not the link HTML) in compose uri
2587                 //
2588                 $comp_uri = substr($comp_uri, 9);
2589                 $comp_uri = substr($comp_uri, 0, strpos($comp_uri, '"', 1));
2590                 $trusted = str_replace($mailto_before, $comp_uri, $trusted);
2591             }
2592         }
2593     }
2594
2595     return $trusted;
2596 }
2597
2598 /**
2599  * function SendDownloadHeaders - send file to the browser
2600  *
2601  * Original Source: SM core src/download.php
2602  * moved here to make it available to other code, and separate
2603  * front end from back end functionality.
2604  *
2605  * @param string $type0 first half of mime type
2606  * @param string $type1 second half of mime type
2607  * @param string $filename filename to tell the browser for downloaded file
2608  * @param boolean $force whether to force the download dialog to pop
2609  * @param optional integer $filesize send the Content-Header and length to the browser
2610  * @return void
2611  */
2612 function SendDownloadHeaders($type0, $type1, $filename, $force, $filesize=0) {
2613     global $languages, $squirrelmail_language;
2614     $isIE = $isIE6plus = false;
2615
2616     sqgetGlobalVar('HTTP_USER_AGENT', $HTTP_USER_AGENT, SQ_SERVER);
2617
2618     if (strstr($HTTP_USER_AGENT, 'compatible; MSIE ') !== false &&
2619             strstr($HTTP_USER_AGENT, 'Opera') === false) {
2620         $isIE = true;
2621     }
2622
2623     if (preg_match('/compatible; MSIE ([0-9]+)/', $HTTP_USER_AGENT, $match) &&
2624         ((int)$match[1]) >= 6 && strstr($HTTP_USER_AGENT, 'Opera') === false) {
2625         $isIE6plus = true;
2626     }
2627
2628     if (isset($languages[$squirrelmail_language]['XTRA_CODE']) &&
2629             function_exists($languages[$squirrelmail_language]['XTRA_CODE'] . '_downloadfilename')) {
2630         $filename =
2631             call_user_func($languages[$squirrelmail_language]['XTRA_CODE'] . '_downloadfilename', $filename, $HTTP_USER_AGENT);
2632     } else {
2633         $filename = ereg_replace('[\\/:\*\?"<>\|;]', '_', str_replace('&nbsp;', ' ', $filename));
2634     }
2635
2636     // A Pox on Microsoft and it's Internet Explorer!
2637     //
2638     // IE has lots of bugs with file downloads.
2639     // It also has problems with SSL.  Both of these cause problems
2640     // for us in this function.
2641     //
2642     // See this article on Cache Control headers and SSL
2643     // http://support.microsoft.com/default.aspx?scid=kb;en-us;323308
2644     //
2645     // The best thing you can do for IE is to upgrade to the latest
2646     // version
2647     //set all the Cache Control Headers for IE
2648     if ($isIE) {
2649         $filename=rawurlencode($filename);
2650         header ("Pragma: public");
2651         header ("Cache-Control: no-store, max-age=0, no-cache, must-revalidate"); // HTTP/1.1
2652         header ("Cache-Control: post-check=0, pre-check=0", false);
2653         header ("Cache-Control: private");
2654
2655         //set the inline header for IE, we'll add the attachment header later if we need it
2656         header ("Content-Disposition: inline; filename=$filename");
2657     }
2658
2659     if (!$force) {
2660         // Try to show in browser window
2661         header ("Content-Disposition: inline; filename=\"$filename\"");
2662         header ("Content-Type: $type0/$type1; name=\"$filename\"");
2663     } else {
2664         // Try to pop up the "save as" box
2665
2666         // IE makes this hard.  It pops up 2 save boxes, or none.
2667         // http://support.microsoft.com/support/kb/articles/Q238/5/88.ASP
2668         // http://support.microsoft.com/default.aspx?scid=kb;EN-US;260519
2669         // But, according to Microsoft, it is "RFC compliant but doesn't
2670         // take into account some deviations that allowed within the
2671         // specification."  Doesn't that mean RFC non-compliant?
2672         // http://support.microsoft.com/support/kb/articles/Q258/4/52.ASP
2673
2674         // all browsers need the application/octet-stream header for this
2675         header ("Content-Type: application/octet-stream; name=\"$filename\"");
2676
2677         // http://support.microsoft.com/support/kb/articles/Q182/3/15.asp
2678         // Do not have quotes around filename, but that applied to
2679         // "attachment"... does it apply to inline too?
2680         header ("Content-Disposition: attachment; filename=\"$filename\"");
2681
2682         if ($isIE && !$isIE6plus) {
2683             // This combination seems to work mostly.  IE 5.5 SP 1 has
2684             // known issues (see the Microsoft Knowledge Base)
2685
2686             // This works for most types, but doesn't work with Word files
2687             header ("Content-Type: application/download; name=\"$filename\"");
2688             header ("Content-Type: application/force-download; name=\"$filename\"");
2689             // These are spares, just in case.  :-)
2690             //header("Content-Type: $type0/$type1; name=\"$filename\"");
2691             //header("Content-Type: application/x-msdownload; name=\"$filename\"");
2692             //header("Content-Type: application/octet-stream; name=\"$filename\"");
2693         } else if ($isIE) {
2694              // This is to prevent IE for MIME sniffing and auto open a file in IE
2695              header ("Content-Type: application/force-download; name=\"$filename\"");
2696         } else {
2697             // another application/octet-stream forces download for Netscape
2698             header ("Content-Type: application/octet-stream; name=\"$filename\"");
2699         }
2700     }
2701
2702     //send the content-length header if the calling function provides it
2703     if ($filesize > 0) {
2704         header("Content-Length: $filesize");
2705     }
2706
2707 }  // end fn SendDownloadHeaders