Location: PHPKode > scripts > BS XHTML Valid > bs-xhtml-valid/class.php
<?php
# BS_xhtml_valid
# author: BAANANAS (hide@address.com)
# version: 1.00
# this class should be used to clear user input from forbidden tags and data
# I hope you will find this class usefull and of course I accept all your suggestions and bug fixes
# it's always nice to know, that someone find your work usefull, so, please, let me know, if you use this class in your scripts


$GLOBAL_ERROR = array(); #global aray for errors
$BS_lang_a = array( #reasons used in function BS_plain_error_glob()
	1 => "Not allowed element &#039;%s&#039;.",
	2 => "Required attribute &#039;%s&#039; in element &#039;%s&#039; uses only values matching the pattern &#039;%s&#039;.",
	3 => "Missing required attribute &#039;%s&#039; in element &#039;%s&#039;.",
	4 => "Not allowed attribute &#039;%s&#039; in element &#039;%s&#039;.",
	5 => "Attribute &#039;%s&#039; in element &#039;%s&#039; uses only values matching the pattern &#039;%s&#039;.",
	6 => "In element &#039;%s&#039; can not be nested element &#039;%s&#039;.",
	7 => "Element &#039;%s&#039; can be nested only in elements matching the pattern &#039;%s&#039;",
	8 => "Element &#039;%s&#039; can contain only other elements, no text.",
);

# function BS_plain_error_glob creates from array containing errors with syntax 
# 	error_id:unix_timestamp:argument1:argument2:arg...
# a new array with full text messages according to the used language
function BS_plain_error_glob($array = false) {
	global $BS_lang_a, $GLOBAL_ERROR;
	if($array === false) { $array = $GLOBAL_ERROR; } #if you don't specify array which should be used, it uses global array for errors used in class BS_xhtml_valid
	$result = array();
	foreach($array as $key => $value) {
		$args = explode(':',$value); #format $value = error_id:unix_timestamp:argument1:argument2...
		switch($args[0]) {
			case 'BS_xhtml_valid_xml': $result[] = xml_error_string($args[2]); break;
			case 'BS_xhtml_valid_1': $result[] = sprintf($BS_lang_a[1],$args[2]); break;
			case 'BS_xhtml_valid_2': $result[] = sprintf($BS_lang_a[2],$args[2],$args[3],$args[4]); break;
			case 'BS_xhtml_valid_3': $result[] = sprintf($BS_lang_a[3],$args[2],$args[3]); break;
			case 'BS_xhtml_valid_4': $result[] = sprintf($BS_lang_a[4],$args[2],$args[3]); break;
			case 'BS_xhtml_valid_5': $result[] = sprintf($BS_lang_a[5],$args[2],$args[3],$args[4]); break;
			case 'BS_xhtml_valid_6': $result[] = sprintf($BS_lang_a[6],$args[2],$args[3]); break;
			case 'BS_xhtml_valid_7': $result[] = sprintf($BS_lang_a[7],$args[2],$args[3]); break;
			case 'BS_xhtml_valid_8': $result[] = sprintf($BS_lang_a[8],$args[2]); break;
			default: break;
		}
	}
	return $result;
}

# array $allowed_tags contains array of tags and they allowed attributes, also carries informations about nesting of tags etc.
# all tags and attributes must be UPPER CASED
# indexes 'pair', 'single', 'contains', 'contained', 'notext', 'attr_strict' are erserved indexes used by the validator class
# in attributes you can also use patterns used in perl compatible regular expressions
# the value 'true' for tags means you can use this tag but it has no attributes or any other informations specified, so user cann't use attributes
# the value 'true' for attributes means you can use this attribute with any value
# the value 'true' for 'nocdata' means that the element cann't contain text (value 'false' means that the element can contain text)
$allowed_tags = array(
  'pair' => array( #your array must be divided into two subarrays -> 'pair' (contains elements which have to have a start tag and an end tag, e.g. <a ...>link</a>) and 'single' (contains tags which do not have end tags, e.g. <br />)
  	'TABLE' => array(
			'contains' => 'TBODY', #the element TABLE can contain only TBODY tag/s -> good: <table><tbody>... ; bad: <table><div><tbody>...
			'nocdata' => true, #the element TABLE cann't contain any character data -> good: <table><tbody>... ; bad: <table>some text<tbody>...
		),
		'TBODY' => array(
			'contains' => 'TR',
			'contained' => 'TABLE', #the element TBODY must be nested in the TABLE element -> good: <table><tbody>... ; bad: <div><tbody>...
			'nocdata' => true,
		),
		'TR' => array(
			'contained' => 'TBODY',
			'contains' => 'TD',
			'nocdata' => true,
		),
		'TD' => array(
			'contained' => 'TR',
		),
    'A' => array(
      'attr_strict' => array( #element A have some attributes that user must use, otherwise, the A element won't be allowed
        'HREF' => '^http:\/\/www.mysite.com\/', #the HREF attribute must be specified, however, it must have value matching the pattern ^http:\/\/www.mysite.com\/ -> good: <a href="http://www.mysite.com/profile/home.html"> ; bad: <a href="http://www.my_brand_new_site.org/index.html">
      ),
			'NAME' => true, #user can use the NAME attribute in the element A
      'CLASS' => '^my_class', #user can use the CLASS attribute in the element A, but it has to match the pattern ^my_class. If it doesn't, the tag will be allowed but without this attribute -> good: <a class="my_class_xyz" ... ; bad: <a class="your_class" ...
      'STYLE' => true,
      'TITLE' => true,
    ),
		'DIV' => array(
      'NAME' => true,
      'CLASS' => true,
      'STYLE' => true,
      'ID' => '^div_id_[0-9]+$',
		),
		'I' => true, #user can use the I element, but no attributes will be allowed
	),
	'single' => array(
		'BR' => true,
		'IMG' => array(
			'attr_strict' => array(
				'SRC' => true,
				'ALT' => true,
			),
			'ID' => true,
			'NAME' => true,
			'CLASS' => true,
			'STYLE' => true,
			'WIDTH' => true,
			'HEIGHT' => true,
		)
	)
);
# class BS_xhtml_valid
class BS_xhtml_valid {
	var $parser; #xml resource parser
	var $safe_text; #returned string
	var $allowed_tags; #array of allowed tags and attributes
	var $allow_tag = array();
	function BS_xhtml_valid() { #class constructor
		global $allowed_tags;
		$this->allowed_tags = $allowed_tags; #loads array containg elements, attributes, informations, etc.
		$this->parser = xml_parser_create(); #initiate the xml_parser
		xml_set_object($this->parser,$this); #allows use of xml_parser within the class
		xml_set_element_handler($this->parser,"tag_start","tag_end"); #passes start tags and end tags to the parser functions
		xml_set_character_data_handler($this->parser, "cdata"); #passes basic text to the parser functions
	}
	function parse($data) { #function which starts the parsing
		global $GLOBAL_ERROR;
		$data = $this->escape_data($data); #calls function for escaping malicious code
		if(!xml_parse($this->parser,'<xhtml>' . $data . '</xhtml>')) { #parse trough the data, if there is some parser error, stops the parsing, returns no data and adds error into the global error array
			$GLOBAL_ERROR[] = 'BS_xhtml_valid_xml:' . time() .':' . xml_get_error_code($this->parser);
			return false;
		}
		else { #after parsing the whole string we have to change unknown entities (e.g. &myentity;) into safe text (e.g. $amp;myentity;)
			#the data looks now somethig tike ...<br /> text &amp;quot; another text &amp;unknown_entity; and some text
			$tr_table = get_html_translation_table(HTML_ENTITIES);
			foreach($tr_table as $key => $value) {
				$key = $value;
				$entities[$key] = htmlentities($value);
			}
			foreach($entities as $key => $value) {
				$replace[$key] = $key;
			}
			$this->safe_text = str_replace($entities,$replace,$this->safe_text); #makes from the data something like this ...<br /> text &quot; another text &amp;unknown_entity; and some text 
			return $this->safe_text; #outputs safe text
		}
	}
	function escape_data($data) { #repairs malicious data
		$data = preg_replace("/\&/","&amp;",$data); #replaces '&' with '&amp;' (important by recognition of unknown entities, otherwise we would change all tags into something like this 'text &lt;a href=&quot;about:blank&quot; ...')
		$data = preg_replace("/\< /","&lt; ",$data); #replaces '< ' with '&lt;' (so parser do not think that it is a start of element)
		$data = preg_replace("/<(?:(br|img|hr|link|meta)((?:[\s]+|[\w]+\=(?:\"(?:[^\"]*)*\"|\'(?:[^\']*)*\'))*))(?:[ \/]*)>/","<\\1\\2 />",$data); #closes elements which have to be always closed
		$data = preg_replace_callback("/<([\w]+)((?:[\s]+|(?:[\s]+(?:[\w]+)[\s]+)|[\w]+\=(?:\"(?:[^\"]*)*\"|\'(?:[^\']*)*\'))*)([ \/]{0,1})>/",create_function('$matches','return "<$matches[1]" . preg_replace("/[\s]+([\w]+)[\s]+/"," \\\1=\"\\\1\" ",stripslashes("$matches[2]")) . "$matches[3]>";'),$data); #changes <tag attr="value" value attr="value"> into <tag attr="value" value="value" attr="value">
		$data = preg_replace("/(?:<([\w]+(?:[\s]+|[\w]+\=(?:\"(?:[^\"]*)*\"|\'(?:[^\']*)*\'))*)([ \/]{0,1})>)/e","'<' . str_replace(array('<','>'),array('%3C','%3E'),stripslashes('\\1')) . '\\2>'",$data); #replaces '<' and '>' with '%3C' and '%3E' in values of element's attributes
		return $data;
	}
	function tag_start($parser,$tag,$attr) { #process start tags
		global $GLOBAL_ERROR;
		$pair = '';
		$fatal = false;
		$stop = true;
		if($tag != 'XHTML') { #element xhtml is added by calling parser (xml parser needs to have enclosed data in some element)
			if(isset($this->allowed_tags['pair'][$tag])) { $tag_close = ''; $pair = 'pair'; $stop = false; }
			if(isset($this->allowed_tags['single'][$tag])) { $tag_close = ' /'; $pair = 'single'; $stop = false;}
			if($stop) {
				$GLOBAL_ERROR[] = "BS_xhtml_valid_1:" . time() . ":$tag";
			}
			if((count($this->allow_tag) >= 1) && (isset($this->allowed_tags['pair'][$this->allow_tag[count($this->allow_tag) - 1]]['contains']))) { #checks if the tag can be nested in the tag berfore
				if(!preg_match("/" . $this->allowed_tags['pair'][$this->allow_tag[count($this->allow_tag) - 1]]['contains'] . "/",$tag)) {
					$GLOBAL_ERROR[] = "BS_xhtml_valid_6:" . time() . ":" . $this->allow_tag[count($this->allow_tag) - 1] . ":$tag";
					$stop = true;
				}
			}
			if(isset($this->allowed_tags[$pair][$tag]['contained'])) {
				if((!preg_match("/" . $this->allowed_tags[$pair][$tag]['contained'] . "/",$this->allow_tag[count($this->allow_tag) - 1])) || (count($this->allow_tag) < 1)) { #checks if the tag can be nested in the tag berfore
					$GLOBAL_ERROR[] = "BS_xhtml_valid_7:" . time() . ":$tag:" . $this->allowed_tags[$pair][$tag]['contained'];
					$stop = true;
				}
			}
			if(!$stop) {
				$attr = array_flip(array_unique(array_flip($attr))); #deletes all duplicate attributes
				if(isset($this->allowed_tags[$pair][$tag]['attr_strict'])) { #if there are any requested attributes, checks if they are used, otherwise it doesn't allow this element
					foreach($this->allowed_tags[$pair][$tag]['attr_strict'] as $key => $value) {
						if(array_key_exists($key,$attr)) {
							if($value === true) { continue; } #if the value = true it means, that it doesn't depend on the value of the attribute
							else {
								if(!preg_match("/" . $value . "/",$attr[$key])) { #compares the attribute's value with allowed possibilities, if there isn't any match - forbid the element
									$fatal = true;
									unset($attr[$key]);
									$GLOBAL_ERROR[] = "BS_xhtml_valid_2:" . time() . ":$key:$tag:$value";
								}
							}
						}
						else { #we don't have our needed attribute - forbid the element
							$fatal = true;
							$GLOBAL_ERROR[] = "BS_xhtml_valid_3:" . time() . ":$key:$tag";
						}
					}
				}
				if(is_array($attr)) {
					foreach($attr as $key => $value) {
						if((!array_key_exists($key,$this->allowed_tags[$pair][$tag])) && (!array_key_exists($key,$this->allowed_tags[$pair][$tag]['attr_strict']))) { #discard all attributes that aren't allowed
							unset($attr[$key]);
							$GLOBAL_ERROR[] = "BS_xhtml_valid_4:" . time() . ":$key:$tag";
						}
						if(array_key_exists($key,$this->allowed_tags[$pair][$tag])) { #checks attributes that are not requested
							if($this->allowed_tags[$pair][$tag][$key] !== true) {
								if(!preg_match("/" . $this->allowed_tags[$pair][$tag][$key] . "/",$value)) { #compares the attribute's value with allowed possibilities
									unset($attr[$key]);
									$GLOBAL_ERROR[] = "BS_xhtml_valid_5:" . time() . ":$key:$tag:" . $this->allowed_tags[$pair][$tag][$key];
								}
							}
						}
					}
				}
				if(!$fatal) {
					if($pair == 'pair') { $this->allow_tag[count($this->allow_tag)] = $tag; }
					foreach($attr as $key => $value) {
						$attributes .= ' ' . strtolower($key) . '="' . htmlspecialchars($value,ENT_NOQUOTES) . '"';
					}
					$this->safe_text .= "<" . strtolower($tag) . $attributes .  "$tag_close>"; #saves the safe text
				}
			}
		}
	}
	function tag_end($parser,$tag) { #process end tags
		global $GLOBAL_ERROR;
		if(($tag != 'XHTML') && ($this->allow_tag[count($this->allow_tag) - 1] == $tag)) {
			$this->safe_text .= "</" . strtolower($tag) . ">";
			unset($this->allow_tag[count($this->allow_tag) - 1]);
		}
	}
	function cdata($parser,$data) { #process basic text
		global $GLOBAL_ERROR;
		if((count($this->allow_tag) < 1) || ((count($this->allow_tag) >= 1) && (!$this->allowed_tags['pair'][$this->allow_tag[count($this->allow_tag) - 1]]['nocdata']))) { #also checks if the tag can contain text or only other tags
			$this->safe_text .= htmlentities($data);
		}
		else {
			$GLOBAL_ERROR[] = "BS_xhtml_valid_8:" . time() . ":" . $this->allow_tag[count($this->allow_tag) - 1];
		}
	}
}
$test_string = 'some text <div align="center">another <td>bad col</td> text</div> and table <table><tbody><tr><td>col 1</td><td>col 2</td></tr></tbody></table>'; #string you want to check
$validator = new BS_xhtml_valid; #initiate class
echo $validator->parse($test_string) . "\n<br />"; #validate!
print_r(BS_plain_error_glob()); #prints errors in plain text

?>
Return current item: BS XHTML Valid