+/**
+* Kill any tabs, newlines, or carriage returns. Our friends the
+* makers of the browser with 95% market value decided that it'd
+* be funny to make "java[tab]script" be just as good as "javascript".
+*
+* @param attvalue The attribute value before extraneous spaces removed.
+* @return attvalue The attribute value after extraneous spaces removed.
+*/
+function sq_unspace($attvalue){
+ if (strcspn($attvalue, "\t\r\n") != strlen($attvalue)){
+ $attvalue = str_replace(Array("\t", "\r", "\n"), Array('', '', ''),
+ $attvalue);
+ }
+ return $attvalue;
+}
+
+/**
+* This function returns the final tag out of the tag name, an array
+* of attributes, and the type of the tag. This function is called by
+* sq_sanitize internally.
+*
+* @param $tagname the name of the tag.
+* @param $attary the array of attributes and their values
+* @param $tagtype The type of the tag (see in comments).
+* @return a string with the final tag representation.
+*/
+function sq_tagprint($tagname, $attary, $tagtype){
+ $me = 'sq_tagprint';
+
+ if ($tagtype == 2){
+ $fulltag = '</' . $tagname . '>';
+ } else {
+ $fulltag = '<' . $tagname;
+ if (is_array($attary) && sizeof($attary)){
+ $atts = Array();
+ while (list($attname, $attvalue) = each($attary)){
+ array_push($atts, "$attname=$attvalue");
+ }
+ $fulltag .= ' ' . join(" ", $atts);
+ }
+ if ($tagtype == 3){
+ $fulltag .= ' /';
+ }
+ $fulltag .= '>';
+ }
+ return $fulltag;
+}
+
+/**
+* A small helper function to use with array_walk. Modifies a by-ref
+* value and makes it lowercase.
+*
+* @param $val a value passed by-ref.
+* @return void since it modifies a by-ref value.
+*/
+function sq_casenormalize(&$val){
+ $val = strtolower($val);
+}
+
+/**
+* This function skips any whitespace from the current position within
+* a string and to the next non-whitespace value.
+*
+* @param $body the string
+* @param $offset the offset within the string where we should start
+* looking for the next non-whitespace character.
+* @return the location within the $body where the next
+* non-whitespace char is located.
+*/
+function sq_skipspace($body, $offset){
+ $me = 'sq_skipspace';
+ preg_match('/^(\s*)/s', substr($body, $offset), $matches);
+ if (sizeof($matches{1})){
+ $count = strlen($matches{1});
+ $offset += $count;
+ }
+ return $offset;
+}
+
+/**
+* This function looks for the next character within a string. It's
+* really just a glorified "strpos", except it catches if failures
+* nicely.
+*
+* @param $body The string to look for needle in.
+* @param $offset Start looking from this position.
+* @param $needle The character/string to look for.
+* @return location of the next occurance of the needle, or
+* strlen($body) if needle wasn't found.
+*/
+function sq_findnxstr($body, $offset, $needle){
+ $me = 'sq_findnxstr';
+ $pos = strpos($body, $needle, $offset);
+ if ($pos === FALSE){
+ $pos = strlen($body);
+ }
+ return $pos;
+}
+
+/**
+* This function takes a PCRE-style regexp and tries to match it
+* within the string.
+*
+* @param $body The string to look for needle in.
+* @param $offset Start looking from here.
+* @param $reg A PCRE-style regex to match.
+* @return Returns a false if no matches found, or an array
+* with the following members:
+* - integer with the location of the match within $body
+* - string with whatever content between offset and the match
+* - string with whatever it is we matched
+*/
+function sq_findnxreg($body, $offset, $reg){
+ $me = 'sq_findnxreg';
+ $matches = Array();
+ $retarr = Array();
+ preg_match("%^(.*?)($reg)%si", substr($body, $offset), $matches);
+ if (!isset($matches{0}) || !$matches{0}){
+ $retarr = false;
+ } else {
+ $retarr{0} = $offset + strlen($matches{1});
+ $retarr{1} = $matches{1};
+ $retarr{2} = $matches{2};
+ }
+ return $retarr;
+}
+
+/**
+* This function looks for the next tag.
+*
+* @param $body String where to look for the next tag.
+* @param $offset Start looking from here.
+* @return false if no more tags exist in the body, or
+* an array with the following members:
+* - string with the name of the tag
+* - array with attributes and their values
+* - integer with tag type (1, 2, or 3)
+* - integer where the tag starts (starting "<")
+* - integer where the tag ends (ending ">")
+* first three members will be false, if the tag is invalid.
+*/
+function sq_getnxtag($body, $offset){
+ $me = 'sq_getnxtag';
+ if ($offset > strlen($body)){
+ return false;
+ }
+ $lt = sq_findnxstr($body, $offset, "<");
+ if ($lt == strlen($body)){
+ return false;
+ }
+ /**
+ * We are here:
+ * blah blah <tag attribute="value">
+ * \---------^
+ */
+ $pos = sq_skipspace($body, $lt+1);
+ if ($pos >= strlen($body)){
+ return Array(false, false, false, $lt, strlen($body));
+ }
+ /**
+ * There are 3 kinds of tags:
+ * 1. Opening tag, e.g.:
+ * <a href="blah">
+ * 2. Closing tag, e.g.:
+ * </a>
+ * 3. XHTML-style content-less tag, e.g.:
+ * <img src="blah" />
+ */
+ $tagtype = false;
+ switch (substr($body, $pos, 1)){
+ case '/':
+ $tagtype = 2;
+ $pos++;
+ break;
+ case '!':
+ /**
+ * A comment or an SGML declaration.
+ */
+ if (substr($body, $pos+1, 2) == "--"){
+ $gt = strpos($body, "-->", $pos);
+ if ($gt === false){
+ $gt = strlen($body);
+ } else {
+ $gt += 2;
+ }
+ return Array(false, false, false, $lt, $gt);
+ } else {
+ $gt = sq_findnxstr($body, $pos, ">");
+ return Array(false, false, false, $lt, $gt);
+ }
+ break;
+ default:
+ /**
+ * Assume tagtype 1 for now. If it's type 3, we'll switch values
+ * later.
+ */
+ $tagtype = 1;
+ break;
+ }
+
+ $tagname = '';
+ /**
+ * Look for next [\W-_], which will indicate the end of the tag name.
+ */
+ $regary = sq_findnxreg($body, $pos, "[^\w\-_]");
+ if ($regary == false){
+ return Array(false, false, false, $lt, strlen($body));
+ }
+ list($pos, $tagname, $match) = $regary;
+ $tagname = strtolower($tagname);
+
+ /**
+ * $match can be either of these:
+ * '>' indicating the end of the tag entirely.
+ * '\s' indicating the end of the tag name.
+ * '/' indicating that this is type-3 xhtml tag.
+ *
+ * Whatever else we find there indicates an invalid tag.
+ */
+ switch ($match){
+ case '/':
+ /**
+ * This is an xhtml-style tag with a closing / at the
+ * end, like so: <img src="blah" />. Check if it's followed
+ * by the closing bracket. If not, then this tag is invalid
+ */
+ if (substr($body, $pos, 2) == "/>"){
+ $pos++;
+ $tagtype = 3;
+ } else {
+ $gt = sq_findnxstr($body, $pos, ">");
+ $retary = Array(false, false, false, $lt, $gt);
+ return $retary;