+ /**
+ * There are several types of attributes, with optional
+ * [:space:] between members.
+ * Type 1:
+ * attrname[:space:]=[:space:]'CDATA'
+ * Type 2:
+ * attrname[:space:]=[:space:]"CDATA"
+ * Type 3:
+ * attr[:space:]=[:space:]CDATA
+ * Type 4:
+ * attrname
+ *
+ * We leave types 1 and 2 the same, type 3 we check for
+ * '"' and convert to """ if needed, then wrap in
+ * double quotes. Type 4 we convert into:
+ * attrname="yes".
+ */
+ $regary = sq_findnxreg($body, $pos, "[^\w\-_]");
+ if ($regary == false){
+ /**
+ * Looks like body ended before the end of tag.
+ */
+ return Array(false, false, false, $lt, strlen($body));
+ }
+ list($pos, $attname, $match) = $regary;
+ $attname = strtolower($attname);
+ /**
+ * We arrived at the end of attribute name. Several things possible
+ * here:
+ * '>' means the end of the tag and this is attribute type 4
+ * '/' if followed by '>' means the same thing as above
+ * '\s' means a lot of things -- look what it's followed by.
+ * anything else means the attribute is invalid.
+ */
+ switch($match){
+ case '/':
+ /**
+ * This is an xhtml-style tag with a closing / at the
+ * end, like so: <img src="blah"/>. Check if it's followed
+ * by the closing bracket. If not, then this tag is invalid
+ */
+ if (substr($body, $pos, 2) == "/>"){
+ $pos++;
+ $tagtype = 3;
+ } else {
+ $gt = sq_findnxstr($body, $pos, ">");
+ $retary = Array(false, false, false, $lt, $gt);
+ return $retary;
+ }
+ case '>':
+ $attary{$attname} = '"yes"';
+ return Array($tagname, $attary, $tagtype, $lt, $pos);
+ break;
+ default:
+ /**
+ * Skip whitespace and see what we arrive at.
+ */
+ $pos = sq_skipspace($body, $pos);
+ $char = substr($body, $pos, 1);
+ /**
+ * Two things are valid here:
+ * '=' means this is attribute type 1 2 or 3.
+ * \w means this was attribute type 4.
+ * anything else we ignore and re-loop. End of tag and
+ * invalid stuff will be caught by our checks at the beginning
+ * of the loop.
+ */
+ if ($char == "="){
+ $pos++;
+ $pos = sq_skipspace($body, $pos);
+ /**
+ * Here are 3 possibilities:
+ * "'" attribute type 1
+ * '"' attribute type 2
+ * everything else is the content of tag type 3
+ */
+ $quot = substr($body, $pos, 1);
+ if ($quot == "'"){
+ $regary = sq_findnxreg($body, $pos+1, "\'");
+ if ($regary == false){
+ return Array(false, false, false, $lt, strlen($body));
+ }
+ list($pos, $attval, $match) = $regary;
+ $pos++;
+ $attary{$attname} = "'" . $attval . "'";
+ } else if ($quot == '"'){
+ $regary = sq_findnxreg($body, $pos+1, '\"');
+ if ($regary == false){
+ return Array(false, false, false, $lt, strlen($body));
+ }
+ list($pos, $attval, $match) = $regary;
+ $pos++;
+ $attary{$attname} = '"' . $attval . '"';
+ } else {
+ /**
+ * These are hateful. Look for \s, or >.
+ */
+ $regary = sq_findnxreg($body, $pos, "[\s>]");
+ if ($regary == false){
+ return Array(false, false, false, $lt, strlen($body));
+ }
+ list($pos, $attval, $match) = $regary;
+ /**
+ * If it's ">" it will be caught at the top.
+ */
+ $attval = preg_replace("/\"/s", """, $attval);
+ $attary{$attname} = '"' . $attval . '"';
+ }
+ } else if (preg_match("|[\w/>]|", $char)) {
+ /**
+ * That was attribute type 4.
+ */
+ $attary{$attname} = '"yes"';
+ } else {
+ /**
+ * An illegal character. Find next '>' and return.
+ */
+ $gt = sq_findnxstr($body, $pos, ">");
+ return Array(false, false, false, $lt, $gt);
+ }
+ }
+ }
+ /**
+ * The fact that we got here indicates that the tag end was never
+ * found. Return invalid tag indication so it gets stripped.
+ */
+ return Array(false, false, false, $lt, strlen($body));
+}
+
+/**
+ * This function checks attribute values for entity-encoded values
+ * and returns them translated into 8-bit strings so we can run
+ * checks on them.
+ *
+ * @param $attvalue A string to run entity check against.
+ * @return Translated value.
+ */
+function sq_deent($attvalue){
+ $me = 'sq_deent';
+ /**
+ * See if we have to run the checks first. All entities must start
+ * with "&".
+ */
+ if (strpos($attvalue, "&") === false){
+ return $attvalue;
+ }
+ /**
+ * Check named entities first.
+ */
+ $trans = get_html_translation_table(HTML_ENTITIES);
+ /**
+ * Leave " in, as it can mess us up.
+ */
+ $trans = array_flip($trans);
+ unset($trans{"""});
+ while (list($ent, $val) = each($trans)){
+ $attvalue = preg_replace("/$ent*(\W)/si", "$val\\1", $attvalue);
+ }
+ /**
+ * Now translate numbered entities from 1 to 255 if needed.
+ */
+ if (strpos($attvalue, "#") !== false){
+ $omit = Array(34, 39);
+ for ($asc=1; $asc<256; $asc++){
+ if (!in_array($asc, $omit)){
+ $chr = chr($asc);
+ $attvalue = preg_replace("/\�*$asc;*(\D)/si", "$chr\\1",
+ $attvalue);
+ $attvalue = preg_replace("/\�*".dechex($asc).";*(\W)/si",
+ "$chr\\1", $attvalue);
+ }
+ }
+ }
+ return $attvalue;
+}
+
+/**
+ * This function runs various checks against the attributes.
+ *
+ * @param $tagname String with the name of the tag.
+ * @param $attary Array with all tag attributes.
+ * @param $rm_attnames See description for sq_sanitize
+ * @param $bad_attvals See description for sq_sanitize
+ * @param $add_attr_to_tag See description for sq_sanitize
+ * @param $message message object
+ * @param $id message id
+ * @return Array with modified attributes.
+ */
+function sq_fixatts($tagname,
+ $attary,
+ $rm_attnames,
+ $bad_attvals,
+ $add_attr_to_tag,
+ $message,
+ $id,
+ $mailbox
+ ){
+ $me = 'sq_fixatts';
+ while (list($attname, $attvalue) = each($attary)){
+ /**
+ * See if this attribute should be removed.
+ */
+ foreach ($rm_attnames as $matchtag=>$matchattrs){
+ if (preg_match($matchtag, $tagname)){
+ foreach ($matchattrs as $matchattr){
+ if (preg_match($matchattr, $attname)){
+ unset($attary{$attname});
+ continue;
+ }
+ }
+ }
+ }
+ /**
+ * Remove any entities.
+ */
+ $attvalue = sq_deent($attvalue);
+
+ /**
+ * Now let's run checks on the attvalues.
+ * I don't expect anyone to comprehend this. If you do,
+ * get in touch with me so I can drive to where you live and
+ * shake your hand personally. :)
+ */
+ foreach ($bad_attvals as $matchtag=>$matchattrs){
+ if (preg_match($matchtag, $tagname)){
+ foreach ($matchattrs as $matchattr=>$valary){
+ if (preg_match($matchattr, $attname)){
+ /**
+ * There are two arrays in valary.
+ * First is matches.
+ * Second one is replacements
+ */
+ list($valmatch, $valrepl) = $valary;
+ $newvalue =
+ preg_replace($valmatch, $valrepl, $attvalue);
+ if ($newvalue != $attvalue){
+ $attary{$attname} = $newvalue;
+ }
+ }
+ }
+ }
+ }
+ /**
+ * Turn cid: urls into http-friendly ones.
+ */
+ if (preg_match("/^[\'\"]\s*cid:/si", $attvalue)){
+ $attary{$attname} = sq_cid2http($message, $id, $attvalue, $mailbox);
+ }
+ }
+ /**
+ * See if we need to append any attributes to this tag.
+ */
+ foreach ($add_attr_to_tag as $matchtag=>$addattary){
+ if (preg_match($matchtag, $tagname)){
+ $attary = array_merge($attary, $addattary);
+ }
+ }
+ return $attary;
+}
+
+/**
+ * This function edits the style definition to make them friendly and
+ * usable in squirrelmail.
+ *
+ * @param $message the message object
+ * @param $id the message id
+ * @param $content a string with whatever is between <style> and </style>
+ * @return a string with edited content.
+ */
+function sq_fixstyle($message, $id, $content){
+ global $view_unsafe_images;
+ $me = 'sq_fixstyle';
+ /**
+ * First look for general BODY style declaration, which would be
+ * like so:
+ * body {background: blah-blah}
+ * and change it to .bodyclass so we can just assign it to a <div>
+ */
+ $content = preg_replace("|body(\s*\{.*?\})|si", ".bodyclass\\1", $content);
+ $secremoveimg = "../images/" . _("sec_remove_eng.png");
+ /**
+ * Fix url('blah') declarations.
+ */
+ $content = preg_replace("|url\(([\'\"])\s*\S+script\s*:.*?([\'\"])\)|si",
+ "url(\\1$secremoveimg\\2)", $content);
+ /**
+ * Fix url('https*://.*) declarations but only if $view_unsafe_images
+ * is false.
+ */
+ if (!$view_unsafe_images){
+ $content = preg_replace("|url\(([\'\"])\s*https*:.*?([\'\"])\)|si",
+ "url(\\1$secremoveimg\\2)", $content);
+ }
+
+ /**
+ * Fix urls that refer to cid:
+ */
+ while (preg_match("|url\(([\'\"]\s*cid:.*?[\'\"])\)|si", $content,
+ $matches)){
+ $cidurl = $matches{1};
+ $httpurl = sq_cid2http($message, $id, $cidurl);
+ $content = preg_replace("|url\($cidurl\)|si",
+ "url($httpurl)", $content);
+ }