- return( $string );
- }
-
- /*
- Strips dangerous tags from html messages.
- */
-
- function MagicHTML( $body, $id ) {
-
- global $message, $PHP_SELF, $HTTP_SERVER_VARS;
-
- $j = strlen( $body ); // Legnth of the HTML
- $ret = ''; // Returned string
- $bgcolor = '#ffffff'; // Background style color (defaults to white)
- $leftmargin = ''; // Left margin style
- $title = ''; // HTML title if any
-
- $i = 0;
- while( $i < $j ) {
- if( $body{$i} == '<' ) {
- $tag = $body{$i+1}.$body{$i+2}.$body{$i+3}.$body{$i+4};
- switch( strtoupper( $tag ) ) {
- // Strips the entire tag and contents
- case 'APPL':
- case 'EMBB':
- case 'FRAM':
- case 'SCRI':
- case 'OBJE':
- $etg = '/' . $tag;
- while( $body{$i+1}.$body{$i+2}.$body{$i+3}.$body{$i+4}.$body{$i+5} <> $etg &&
- $i < $j ) $i++;
- while( $i < $j && $body{++$i} <> '>' );
- // $ret .= "<!-- $tag removed -->";
- break;
- // Substitute Title
- case 'TITL':
- $i += 5;
- while( $body{$i} <> '>' && // </title>
- $i < $j )
- $i++;
- $i++;
- $title = '';
- while( $body{$i} <> '<' && // </title>
- $i < $j ) {
- $title .= $body{$i};
- $i++;
- }
- $i += 7;
- break;
- // Destroy these tags
- case 'HTML':
- case 'HEAD':
- case '/HTM':
- case '/HEA':
- case '!DOC':
- case 'META':
- case 'DIV ':
- case '/DIV':
- case '!-- ':
- $i += 4;
- while( $body{$i} <> '>' &&
- $i < $j )
- $i++;
- // $i++;
- break;
- case 'STYL':
- $i += 5;
- while( $body{$i} <> '>' && // </title>
- $i < $j )
- $i++;
- $i++;
- // We parse the style to look for interesting stuff
- $styleblk = '';
- while( $body{$i} <> '>' &&
- $i < $j ) {
- // First we get the name of the style
- $style = '';
- while( $body{$i} <> '>' &&
- $body{$i} <> '<' &&
- $body{$i} <> '{' &&
- $i < $j ) {
- if( isnoSep( $body{$i} ) )
- $style .= $body{$i};
- $i++;
- }
- stripComments( $i, $j, $body );
- $style = strtoupper( trim( $style ) );
- if( $style == 'BODY' ) {
- // Next we look into the definitions of the body style
- while( $body{$i} <> '>' &&
- $body{$i} <> '}' &&
- $i < $j ) {
- // We look for the background color if any.
- if( substr( $body, $i, 17 ) == 'BACKGROUND-COLOR:' ) {
- $i += 17;
- $bgcolor = getStyleData( $i, $j, $body );
- } elseif ( substr( $body, $i, 12 ) == 'MARGIN-LEFT:' ) {
- $i += 12;
- $leftmargin = getStyleData( $i, $j, $body );
- }
- $i++;
- }
- } else {
- // Other style are mantained
- $styleblk .= "$style ";
- while( $body{$i} <> '>' &&
- $body{$i} <> '<' &&
- $body{$i} <> '}' &&
- $i < $j ) {
- $styleblk .= $body{$i};
- $i++;
- }
- $styleblk .= $body{$i};
- }
- stripComments( $i, $j, $body );
- if( $body{$i} <> '>' )
- $i++;
- }
- if( $styleblk <> '' )
- $ret .= "<style>$styleblk";
- break;
- case 'BODY':
- if( $title <> '' )
- $ret .= '<b>' . _("Title:") . " </b>$title<br>\n";
- $ret .= "<TABLE";
- $i += 5;
- if (! isset($base))
- $base = '';
- $ret .= stripEvent( $i, $j, $body, $id, $base );
- //if( $bgcolor <> '' )
- $ret .= " bgcolor=$bgcolor";
- $ret .= ' width=100%><tr>';
- if( $leftmargin <> '' )
- $ret .= "<td width=$leftmargin> </td>";
- $ret .= '<td>';
- break;
- case 'BASE':
- $i += 5;
- $base = '';
- while( !isNoSep( $body{$i} ) &&
- $i < $j )
- $i++;
- if( strcasecmp( substr( $base, 0, 4 ), 'href' ) ) {
- $i += 5;
- while( !isNoSep( $body{$i} ) &&
- $i < $j )
- $i++;
- while( $body{$i} <> '>' &&
- $i < $j ) {
- if( $body{$i} <> '"' )
- $base .= $body{$i};
- $i++;
- }
- // Debuging $ret .= "<!-- base == $base -->";
- if( strcasecmp( substr( $base, 0, 4 ), 'file' ) <> 0 )
- $ret .= "\n<BASE HREF=\"$base\">\n";
- }
- break;
- case '/BOD':
- $ret .= '</td></tr></TABLE>';
- $i += 6;
- break;
- default:
- // Following tags can contain some event handler, lets search it
- stripComments( $i, $j, $body );
- if (! isset($base))
- $base = '';
- $ret .= stripEvent( $i, $j, $body, $id, $base ) . '>';
- // $ret .= "<!-- $tag detected -->";
+ return( $string );
+}
+
+/* This function trys to locate the entity_id of a specific mime element */
+
+function find_ent_id( $id, $message ) {
+ $ret = '';
+ for ($i=0; $ret == '' && $i < count($message->entities); $i++) {
+ if ( $message->entities[$i]->header->type0 == 'multipart') {
+ $ret = find_ent_id( $id, $message->entities[$i] );
+ } else {
+ if ( strcasecmp( $message->entities[$i]->header->id, $id ) == 0 )
+ $ret = $message->entities[$i]->entity_id;
+ }
+ }
+ return( $ret );
+}
+
+/**
+ ** HTMLFILTER ROUTINES
+ */
+
+/**
+ * This function returns the final tag out of the tag name, an array
+ * of attributes, and the type of the tag. This function is called by
+ * sq_sanitize internally.
+ *
+ * @param $tagname the name of the tag.
+ * @param $attary the array of attributes and their values
+ * @param $tagtype The type of the tag (see in comments).
+ * @return a string with the final tag representation.
+ */
+function sq_tagprint($tagname, $attary, $tagtype){
+ $me = "sq_tagprint";
+ if ($tagtype == 2){
+ $fulltag = '</' . $tagname . '>';
+ } else {
+ $fulltag = '<' . $tagname;
+ if (is_array($attary) && sizeof($attary)){
+ $atts = Array();
+ while (list($attname, $attvalue) = each($attary)){
+ array_push($atts, "$attname=$attvalue");
+ }
+ $fulltag .= ' ' . join(" ", $atts);
+ }
+ if ($tagtype == 3){
+ $fulltag .= " /";
+ }
+ $fulltag .= ">";
+ }
+ return $fulltag;
+}
+
+/**
+ * A small helper function to use with array_walk. Modifies a by-ref
+ * value and makes it lowercase.
+ *
+ * @param $val a value passed by-ref.
+ * @return void since it modifies a by-ref value.
+ */
+function sq_casenormalize(&$val){
+ $val = strtolower($val);
+}
+
+/**
+ * This function skips any whitespace from the current position within
+ * a string and to the next non-whitespace value.
+ *
+ * @param $body the string
+ * @param $offset the offset within the string where we should start
+ * looking for the next non-whitespace character.
+ * @return the location within the $body where the next
+ * non-whitespace char is located.
+ */
+function sq_skipspace($body, $offset){
+ $me = "sq_skipspace";
+ preg_match("/^(\s*)/s", substr($body, $offset), $matches);
+ if (sizeof($matches{1})){
+ $count = strlen($matches{1});
+ $offset += $count;
+ }
+ return $offset;
+}
+
+/**
+ * This function looks for the next character within a string. It's
+ * really just a glorified "strpos", except it catches if failures
+ * nicely.
+ *
+ * @param $body The string to look for needle in.
+ * @param $offset Start looking from this position.
+ * @param $needle The character/string to look for.
+ * @return location of the next occurance of the needle, or
+ * strlen($body) if needle wasn't found.
+ */
+function sq_findnxstr($body, $offset, $needle){
+ $me = "sq_findnxstr";
+ $pos = strpos($body, $needle, $offset);
+ if ($pos === FALSE){
+ $pos = strlen($body);
+ }
+ return $pos;
+}
+
+/**
+ * This function takes a PCRE-style regexp and tries to match it
+ * within the string.
+ *
+ * @param $body The string to look for needle in.
+ * @param $offset Start looking from here.
+ * @param $reg A PCRE-style regex to match.
+ * @return Returns a false if no matches found, or an array
+ * with the following members:
+ * - integer with the location of the match within $body
+ * - string with whatever content between offset and the match
+ * - string with whatever it is we matched
+ */
+function sq_findnxreg($body, $offset, $reg){
+ $me = "sq_findnxreg";
+ $matches = Array();
+ $retarr = Array();
+ preg_match("%^(.*?)($reg)%s", substr($body, $offset), $matches);
+ if (!$matches{0}){
+ $retarr = false;
+ } else {
+ $retarr{0} = $offset + strlen($matches{1});
+ $retarr{1} = $matches{1};
+ $retarr{2} = $matches{2};
+ }
+ return $retarr;
+}
+
+/**
+ * This function looks for the next tag.
+ *
+ * @param $body String where to look for the next tag.
+ * @param $offset Start looking from here.
+ * @return false if no more tags exist in the body, or
+ * an array with the following members:
+ * - string with the name of the tag
+ * - array with attributes and their values
+ * - integer with tag type (1, 2, or 3)
+ * - integer where the tag starts (starting "<")
+ * - integer where the tag ends (ending ">")
+ * first three members will be false, if the tag is invalid.
+ */
+function sq_getnxtag($body, $offset){
+ $me = "sq_getnxtag";
+ if ($offset > strlen($body)){
+ return false;
+ }
+ $lt = sq_findnxstr($body, $offset, "<");
+ if ($lt == strlen($body)){
+ return false;
+ }
+ /**
+ * We are here:
+ * blah blah <tag attribute="value">
+ * \---------^
+ */
+ $pos = sq_skipspace($body, $lt+1);
+ if ($pos >= strlen($body)){
+ return Array(false, false, false, $lt, strlen($body));
+ }
+ /**
+ * There are 3 kinds of tags:
+ * 1. Opening tag, e.g.:
+ * <a href="blah">
+ * 2. Closing tag, e.g.:
+ * </a>
+ * 3. XHTML-style content-less tag, e.g.:
+ * <img src="blah"/>
+ */
+ $tagtype = false;
+ switch (substr($body, $pos, 1)){
+ case "/":
+ $tagtype = 2;
+ $pos++;
+ break;
+ case "!":
+ /**
+ * A comment or an SGML declaration.
+ */
+ if (substr($body, $pos+1, 2) == "--"){
+ $gt = strpos($body, "-->", $pos);
+ if ($gt === false){
+ $gt = strlen($body);
+ } else {
+ $gt += 2;
+ }
+ return Array(false, false, false, $lt, $gt);
+ } else {
+ $gt = sq_findnxstr($body, $pos, ">");
+ return Array(false, false, false, $lt, $gt);
+ }
+ break;
+ default:
+ /**
+ * Assume tagtype 1 for now. If it's type 3, we'll switch values
+ * later.
+ */
+ $tagtype = 1;
+ break;
+ }
+
+ $tag_start = $pos;
+ $tagname = '';
+ /**
+ * Look for next [\W-_], which will indicate the end of the tag name.
+ */
+ $regary = sq_findnxreg($body, $pos, "[^\w\-_]");
+ if ($regary == false){
+ return Array(false, false, false, $lt, strlen($body));
+ }
+ list($pos, $tagname, $match) = $regary;
+ $tagname = strtolower($tagname);
+
+ /**
+ * $match can be either of these:
+ * '>' indicating the end of the tag entirely.
+ * '\s' indicating the end of the tag name.
+ * '/' indicating that this is type-3 xhtml tag.
+ *
+ * Whatever else we find there indicates an invalid tag.
+ */
+ switch ($match){
+ case "/":
+ /**
+ * This is an xhtml-style tag with a closing / at the
+ * end, like so: <img src="blah"/>. Check if it's followed
+ * by the closing bracket. If not, then this tag is invalid
+ */
+ if (substr($body, $pos, 2) == "/>"){
+ $pos++;
+ $tagtype = 3;
+ } else {
+ $gt = sq_findnxstr($body, $pos, ">");
+ $retary = Array(false, false, false, $lt, $gt);
+ return $retary;
+ }
+ case ">":
+ return Array($tagname, false, $tagtype, $lt, $pos);
+ break;
+ default:
+ /**
+ * Check if it's whitespace
+ */
+ if (preg_match("/\s/", $match)){
+ } else {
+ /**
+ * This is an invalid tag! Look for the next closing ">".
+ */
+ $gt = sq_findnxstr($body, $offset, ">");
+ return Array(false, false, false, $lt, $gt);
+ }
+ }
+
+ /**
+ * At this point we're here:
+ * <tagname attribute='blah'>
+ * \-------^
+ *
+ * At this point we loop in order to find all attributes.
+ */
+ $attname = '';
+ $atttype = false;
+ $attary = Array();
+
+ while ($pos <= strlen($body)){
+ $pos = sq_skipspace($body, $pos);
+ if ($pos == strlen($body)){
+ /**
+ * Non-closed tag.
+ */
+ return Array(false, false, false, $lt, $pos);
+ }
+ /**
+ * See if we arrived at a ">" or "/>", which means that we reached
+ * the end of the tag.
+ */
+ $matches = Array();
+ if (preg_match("%^(\s*)(>|/>)%s", substr($body, $pos), $matches)) {
+ /**
+ * Yep. So we did.
+ */
+ $pos += strlen($matches{1});
+ if ($matches{2} == "/>"){
+ $tagtype = 3;
+ $pos++;
+ }
+ return Array($tagname, $attary, $tagtype, $lt, $pos);
+ }
+
+ /**
+ * There are several types of attributes, with optional
+ * [:space:] between members.
+ * Type 1:
+ * attrname[:space:]=[:space:]'CDATA'
+ * Type 2:
+ * attrname[:space:]=[:space:]"CDATA"
+ * Type 3:
+ * attr[:space:]=[:space:]CDATA
+ * Type 4:
+ * attrname
+ *
+ * We leave types 1 and 2 the same, type 3 we check for
+ * '"' and convert to """ if needed, then wrap in
+ * double quotes. Type 4 we convert into:
+ * attrname="yes".
+ */
+ $regary = sq_findnxreg($body, $pos, "[^\w\-_]");
+ if ($regary == false){
+ /**
+ * Looks like body ended before the end of tag.
+ */
+ return Array(false, false, false, $lt, strlen($body));
+ }
+ list($pos, $attname, $match) = $regary;
+ $attname = strtolower($attname);
+ /**
+ * We arrived at the end of attribute name. Several things possible
+ * here:
+ * '>' means the end of the tag and this is attribute type 4
+ * '/' if followed by '>' means the same thing as above
+ * '\s' means a lot of things -- look what it's followed by.
+ * anything else means the attribute is invalid.
+ */
+ switch($match){
+ case "/":
+ /**
+ * This is an xhtml-style tag with a closing / at the
+ * end, like so: <img src="blah"/>. Check if it's followed
+ * by the closing bracket. If not, then this tag is invalid
+ */
+ if (substr($body, $pos, 2) == "/>"){
+ $pos++;
+ $tagtype = 3;
+ } else {
+ $gt = sq_findnxstr($body, $pos, ">");
+ $retary = Array(false, false, false, $lt, $gt);
+ return $retary;
+ }
+ case ">":
+ $attary{$attname} = '"yes"';
+ return Array($tagname, $attary, $tagtype, $lt, $pos);
+ break;
+ default:
+ /**
+ * Skip whitespace and see what we arrive at.
+ */
+ $pos = sq_skipspace($body, $pos);
+ $char = substr($body, $pos, 1);
+ /**
+ * Two things are valid here:
+ * '=' means this is attribute type 1 2 or 3.
+ * \w means this was attribute type 4.
+ * anything else we ignore and re-loop. End of tag and
+ * invalid stuff will be caught by our checks at the beginning
+ * of the loop.
+ */
+ if ($char == "="){
+ $pos++;
+ $pos = sq_skipspace($body, $pos);
+ /**
+ * Here are 3 possibilities:
+ * "'" attribute type 1
+ * '"' attribute type 2
+ * everything else is the content of tag type 3
+ */
+ $quot = substr($body, $pos, 1);
+ if ($quot == "'"){
+ $regary = sq_findnxreg($body, $pos+1, "\'");
+ if ($regary == false){
+ return Array(false, false, false, $lt, strlen($body));
+ }
+ list($pos, $attval, $match) = $regary;
+ $pos++;
+ $attary{$attname} = "'" . $attval . "'";
+ } else if ($quot == '"'){
+ $regary = sq_findnxreg($body, $pos+1, '\"');
+ if ($regary == false){
+ return Array(false, false, false, $lt, strlen($body));
+ }
+ list($pos, $attval, $match) = $regary;
+ $pos++;
+ $attary{$attname} = '"' . $attval . '"';
+ } else {
+ /**
+ * These are hateful. Look for \s, or >.
+ */
+ $regary = sq_findnxreg($body, $pos, "[\s>]");
+ if ($regary == false){
+ return Array(false, false, false, $lt, strlen($body));
+ }
+ list($pos, $attval, $match) = $regary;
+ /**
+ * If it's ">" it will be caught at the top.
+ */
+ $attval = preg_replace("/\"/s", """, $attval);
+ $attary{$attname} = '"' . $attval . '"';