<?php
############### COPYLEFT GPLv3 LICENSE ###############
##
## Copyright 2009 GPLv3 - http://www.opensource.org/licenses/gpl-3.0.html
##
## Anthony Gallon
## hide@address.com
##
## Permission is hereby granted to any person having a copy of this software
## to freely use and modify as required so long as the copyright notices
## and branding remain intact.
##
############### COPYLEFT GPLv3 LICENSE ###############
if(!class_exists('phpQuery')) die('Antz_TagFilter requires class phpQuery - see '.__FILE__.', line '.__LINE__);
/**
* Strips unwanted and malicious tags from html content with whitelist and blacklist approach.
* Supports whitelist tagnames, attributes and explicit tag/attribute combinations
*/
class Antz_TagFilter
{
protected $attributeWhitelist = array();
protected $attributeBlacklist = array();
protected $tagnameWhitelist = array();
protected $tagnameBlacklist = array();
protected $explicitWhitelist = array();
protected $explicitBlacklist = array();
protected $htmlMode = 'xhtml';
protected $errors = array();
protected $removeNodes = array();
protected $allowDoctype = false;
public function __construct(){
}
public function getErrors(){
return $this->errors;
}
/**
* Set the mode which phpQuery runs (XHTML or HTML)
* @param string $mode
*/
public function setHtmlMode($mode='xhtml'){
$mode = strtolower((string) $mode);
if($mode === 'xhtml' || $mode === 'html') $this->htmlMode = $mode;
}
/**
* Overwrite attributes whitelist with new values
* @param mixed $atts
*/
public function setAttributeWhitelist($atts){
if(!is_array($atts)) return;
$this->attributeWhitelist = array();
$this->addAttributeWhitelist($atts);
}
/**
* Overwrite attributes blacklist with new values
* @param mixed $atts
*/
public function setAttributeBlacklist($atts){
if(!is_array($atts)) return;
$this->attributeBlacklist = array();
$this->addAttributeBlacklist($atts);
}
/**
* Overwrite tagname whitelist with new values
* @param mixed $tags
*/
public function setTagnameWhitelist($tags){
if(!is_array($tags)) return;
$this->tagnameWhitelist = array();
$this->addTagnameWhitelist($tags);
}
/**
* Overwrite tagname blacklist with new values
* @param mixed $tags
*/
public function setTagnameBlacklist($tags){
if(!is_array($tags)) return;
$this->tagnameBlacklist = array();
$this->addTagnameBlacklist($tags);
}
/**
* Overwrite explicit blacklist with new values
* @param mixed $tags
*/
public function setExplicitBlacklist($tags){
if(!is_array($tags)) return;
$this->explicitBlacklist = array();
$this->addExplicitBlacklist($tags);
}
/**
* Overwrite explicit whitelist with new values
* @param mixed $tags
*/
public function setExplicitWhitelist($tags){
if(!is_array($tags)) return;
$this->explicitWhitelist = array();
$this->addExplicitWhitelist($tags);
}
/**
* Add an explicit blacklist rule (tagname=>attname)
* @param mixed $tags
*/
public function addExplicitBlacklist($tags){
if(!is_array($tags)) return;
if(count($tags)==1){
foreach($tags as $tagname=>$attname){
if(is_array($attname)){
$this->addExplicitBlacklist($attname);
return;
}else{
$this->explicitBlacklist[] = array($tagname=>$attname);
return;
}
}
}else{
foreach($tags as $tagname=>$attname){
if(is_array($attname)){
$this->addExplicitBlacklist($attname);
}else{
$this->explicitBlacklist[] = array($tagname=>$attname);
}
}
}
}
/**
* Add an explicit blacklist rule (tagname=>attname)
* @param mixed $tags
*/
public function addExplicitWhitelist($tags){
if(!is_array($tags)) return;
if(count($tags)==1){
foreach($tags as $tagname=>$attname){
if(is_array($attname)){
$this->addExplicitWhitelist($attname);
return;
}else{
$this->explicitWhitelist[] = array($tagname=>$attname);
return;
}
}
}else{
foreach($tags as $tagname=>$attname){
if(is_array($attname)){
$this->addExplicitWhitelist($attname);
}else{
$this->explicitWhitelist[] = array($tagname=>$attname);
}
}
}
}
/**
* Add an tagname blacklist rule
* @param mixed $tagname
*/
public function addTagnameBlacklist($tagname){
if(is_array($tagname)){
foreach($tagname as $tag){
$this->addTagnameBlacklist($tag);
}
}else{
if(!in_array($tagname, $this->tagnameBlacklist)) $this->tagnameBlacklist[] = trim($tagname);
}
}
/**
* Add an tagname whitelist rule
* @param mixed $tagname
*/
public function addTagnameWhitelist($tagname){
if(is_array($tagname)){
foreach($tagname as $tag){
$this->addTagnameWhitelist($tag);
}
}else{
if(!in_array($tagname, $this->tagnameWhitelist)) $this->tagnameWhitelist[] = trim($tagname);
}
}
/**
* Add an attribute blacklist rule
* @param mixed $att
*/
public function addAttributeBlacklist($att){
if(is_array($att)){
foreach($att as $at){
$this->addAttributeBlacklist($at);
}
}else{
if(!in_array($att, $this->attributeBlacklist)) $this->attributeBlacklist[] = trim($att);
}
}
/**
* Add an attribute whitelist rule
* @param mixed $att
*/
public function addAttributeWhitelist($att){
if(is_array($att)){
foreach($att as $at){
$this->addAttributeWhitelist($at);
}
}else{
if(!in_array($att, $this->attributeWhitelist)) $this->attributeWhitelist[] = trim($att);
}
}
/**
* Remove a tagname blacklist rule
* @param mixed $tagname
*/
public function removeTagnameBlacklist($tagname){
if(is_array($tagname)){
foreach($tagname as $tag){
$this->removeTagnameBlacklist($tag);
}
}else{
if(in_array($tagname, $this->tagnameBlacklist)) unset($this->tagnameBlacklist[trim($tagname)]);
}
}
/**
* Remove a tagname whitelist rule
* @param mixed $tagname
*/
public function removeTagnameWhitelist($tagname){
if(is_array($tagname)){
foreach($tagname as $tag){
$this->removeTagnameWhitelist($tag);
}
}else{
if(in_array($tagname, $this->tagnameWhitelist)) unset($this->tagnameWhitelist[trim($tagname)]);
}
}
/**
* Remove an attribute blacklist rule
* @param mixed $att
*/
public function removeAttributeBlacklist($att){
if(is_array($att)){
foreach($att as $at){
$this->removeAttributeBlacklist($at);
}
}else{
if(in_array($att, $this->attributeBlacklist)) unset($this->attributeBlacklist[trim($att)]);
}
}
/**
* Remove an attribute whitelist rule
* @param mixed $att
*/
public function removeAttributeWhitelist($att){
if(is_array($att)){
foreach($att as $at){
$this->removeAttributeWhitelist($at);
}
}else{
if(in_array($att, $this->attributeWhitelist)) unset($this->attributeWhitelist[trim($att)]);
}
}
/**
* Sanitizes and returns supplied HTML with all blacklisted and non-whitelisted tags/attributes removed
* @param string $content
* @return string $content
*/
public function process($content){
$this->removedNodes = array();
$content = trim($content);
foreach($this->tagnameBlacklist as $k=>$tagname){
$content = eregi_replace("<{$tagname}[^>]*>.*</{$tagname}[^>]*>", "", $content);
$content = eregi_replace("<{$tagname}[^>]*>", "", $content);
}
$dom = $this->initDom($content);
foreach($dom->elements as $k => &$el){
$this->processElement($el);
}
foreach($this->explicitBlacklist as $k=>$v){
foreach($v as $tagname=>$attribute){
$removedNodes = pq($tagname.'['.$attribute.']');
foreach($removedNodes as $node){
$node->removeAttribute($attribute);
}
}
}
foreach($this->removedNodes as $obj){
pq($obj)->remove();
}
$content = (string) $dom;
return $content;
}
/**
* Creates a new phpQuery dom element
* @param string $content
* @return object DOMDocument
*/
protected function initDom($content){
switch($this->htmlMode){
case 'xhtml':
$dom = phpQuery::newDocumentXhtml($content);
break;
case 'html':
$dom = phpQuery::newDocumentHtml($content);
break;
default:
$this->errors[] = 'Invalid mode: should be xhtml or html';
return $content;
}
return $dom;
}
/**
* Removes blacklisted and non-whitelisted attributes from the element and recurses into all child nodes
* @param DOMElement $el
*/
protected function processElement(&$el){
if(false === ($el instanceof DOMElement) && false === ($el instanceof DOMDocument)){
return;
}
$invalidAtts = array();
$elAtts = $el->attributes;
if($elAtts==null) $elAtts = array();
foreach($elAtts as $k3=>$att){
// check if explicitly allowed
$explicitelyAllowed = false;
foreach($this->explicitWhitelist as $k=>$v){
foreach($v as $tagname => $attname){
if($tagname == $el->nodeName && $attname == $att->name){
$explicitelyAllowed = true;
}
}
}
if(in_array($att->name, $this->attributeBlacklist)) $invalidAtts[] = $att->name;
else if(!in_array($att->name, $this->attributeWhitelist) && !$explicitelyAllowed) $invalidAtts[] = $att->name;
}
foreach($invalidAtts as $k => $v){
$el->removeAttribute($v);
}
$childNodes = $el->childNodes;
if(is_object($childNodes) && $childNodes->length > 0){
for($i=0, $max=$childNodes->length; $i<$max; $i++){
$this->processElement($childNodes->item($i));
}
}
if($el instanceof DOMDocument) return;
if(in_array($el->nodeName, $this->tagnameBlacklist)){
$this->removedNodes[] = $el;
return;
}else if(!in_array($el->nodeName, $this->tagnameWhitelist)){
$this->removedNodes[] = $el;
return;
}
}
}
<?php
############### COPYLEFT GPLv3 LICENSE ###############
##
## Copyright 2009 GPLv3 - http://www.opensource.org/licenses/gpl-3.0.html
##
## Anthony Gallon
## hide@address.com
##
## Permission is hereby granted to any person having a copy of this software
## to freely use and modify as required so long as the copyright notices
## and branding remain intact.
##
############### COPYLEFT GPLv3 LICENSE ###############
if(!class_exists('phpQuery')) die('Antz_TagFilter requires class phpQuery - see '.__FILE__.', line '.__LINE__);
/**
* Strips unwanted and malicious tags from html content with whitelist and blacklist approach.
* Supports whitelist tagnames, attributes and explicit tag/attribute combinations
*/
class Antz_TagFilter
{
protected $attributeWhitelist = array();
protected $attributeBlacklist = array();
protected $tagnameWhitelist = array();
protected $tagnameBlacklist = array();
protected $explicitWhitelist = array();
protected $explicitBlacklist = array();
protected $htmlMode = 'xhtml';
protected $errors = array();
protected $removeNodes = array();
protected $allowDoctype = false;
public function __construct(){
}
public function getErrors(){
return $this->errors;
}
/**
* Set the mode which phpQuery runs (XHTML or HTML)
* @param string $mode
*/
public function setHtmlMode($mode='xhtml'){
$mode = strtolower((string) $mode);
if($mode === 'xhtml' || $mode === 'html') $this->htmlMode = $mode;
}
/**
* Overwrite attributes whitelist with new values
* @param mixed $atts
*/
public function setAttributeWhitelist($atts){
if(!is_array($atts)) return;
$this->attributeWhitelist = array();
$this->addAttributeWhitelist($atts);
}
/**
* Overwrite attributes blacklist with new values
* @param mixed $atts
*/
public function setAttributeBlacklist($atts){
if(!is_array($atts)) return;
$this->attributeBlacklist = array();
$this->addAttributeBlacklist($atts);
}
/**
* Overwrite tagname whitelist with new values
* @param mixed $tags
*/
public function setTagnameWhitelist($tags){
if(!is_array($tags)) return;
$this->tagnameWhitelist = array();
$this->addTagnameWhitelist($tags);
}
/**
* Overwrite tagname blacklist with new values
* @param mixed $tags
*/
public function setTagnameBlacklist($tags){
if(!is_array($tags)) return;
$this->tagnameBlacklist = array();
$this->addTagnameBlacklist($tags);
}
/**
* Overwrite explicit blacklist with new values
* @param mixed $tags
*/
public function setExplicitBlacklist($tags){
if(!is_array($tags)) return;
$this->explicitBlacklist = array();
$this->addExplicitBlacklist($tags);
}
/**
* Overwrite explicit whitelist with new values
* @param mixed $tags
*/
public function setExplicitWhitelist($tags){
if(!is_array($tags)) return;
$this->explicitWhitelist = array();
$this->addExplicitWhitelist($tags);
}
/**
* Add an explicit blacklist rule (tagname=>attname)
* @param mixed $tags
*/
public function addExplicitBlacklist($tags){
if(!is_array($tags)) return;
if(count($tags)==1){
foreach($tags as $tagname=>$attname){
if(is_array($attname)){
$this->addExplicitBlacklist($attname);
return;
}else{
$this->explicitBlacklist[] = array($tagname=>$attname);
return;
}
}
}else{
foreach($tags as $tagname=>$attname){
if(is_array($attname)){
$this->addExplicitBlacklist($attname);
}else{
$this->explicitBlacklist[] = array($tagname=>$attname);
}
}
}
}
/**
* Add an explicit blacklist rule (tagname=>attname)
* @param mixed $tags
*/
public function addExplicitWhitelist($tags){
if(!is_array($tags)) return;
if(count($tags)==1){
foreach($tags as $tagname=>$attname){
if(is_array($attname)){
$this->addExplicitWhitelist($attname);
return;
}else{
$this->explicitWhitelist[] = array($tagname=>$attname);
return;
}
}
}else{
foreach($tags as $tagname=>$attname){
if(is_array($attname)){
$this->addExplicitWhitelist($attname);
}else{
$this->explicitWhitelist[] = array($tagname=>$attname);
}
}
}
}
/**
* Add an tagname blacklist rule
* @param mixed $tagname
*/
public function addTagnameBlacklist($tagname){
if(is_array($tagname)){
foreach($tagname as $tag){
$this->addTagnameBlacklist($tag);
}
}else{
if(!in_array($tagname, $this->tagnameBlacklist)) $this->tagnameBlacklist[] = trim($tagname);
}
}
/**
* Add an tagname whitelist rule
* @param mixed $tagname
*/
public function addTagnameWhitelist($tagname){
if(is_array($tagname)){
foreach($tagname as $tag){
$this->addTagnameWhitelist($tag);
}
}else{
if(!in_array($tagname, $this->tagnameWhitelist)) $this->tagnameWhitelist[] = trim($tagname);
}
}
/**
* Add an attribute blacklist rule
* @param mixed $att
*/
public function addAttributeBlacklist($att){
if(is_array($att)){
foreach($att as $at){
$this->addAttributeBlacklist($at);
}
}else{
if(!in_array($att, $this->attributeBlacklist)) $this->attributeBlacklist[] = trim($att);
}
}
/**
* Add an attribute whitelist rule
* @param mixed $att
*/
public function addAttributeWhitelist($att){
if(is_array($att)){
foreach($att as $at){
$this->addAttributeWhitelist($at);
}
}else{
if(!in_array($att, $this->attributeWhitelist)) $this->attributeWhitelist[] = trim($att);
}
}
/**
* Remove a tagname blacklist rule
* @param mixed $tagname
*/
public function removeTagnameBlacklist($tagname){
if(is_array($tagname)){
foreach($tagname as $tag){
$this->removeTagnameBlacklist($tag);
}
}else{
if(in_array($tagname, $this->tagnameBlacklist)) unset($this->tagnameBlacklist[trim($tagname)]);
}
}
/**
* Remove a tagname whitelist rule
* @param mixed $tagname
*/
public function removeTagnameWhitelist($tagname){
if(is_array($tagname)){
foreach($tagname as $tag){
$this->removeTagnameWhitelist($tag);
}
}else{
if(in_array($tagname, $this->tagnameWhitelist)) unset($this->tagnameWhitelist[trim($tagname)]);
}
}
/**
* Remove an attribute blacklist rule
* @param mixed $att
*/
public function removeAttributeBlacklist($att){
if(is_array($att)){
foreach($att as $at){
$this->removeAttributeBlacklist($at);
}
}else{
if(in_array($att, $this->attributeBlacklist)) unset($this->attributeBlacklist[trim($att)]);
}
}
/**
* Remove an attribute whitelist rule
* @param mixed $att
*/
public function removeAttributeWhitelist($att){
if(is_array($att)){
foreach($att as $at){
$this->removeAttributeWhitelist($at);
}
}else{
if(in_array($att, $this->attributeWhitelist)) unset($this->attributeWhitelist[trim($att)]);
}
}
/**
* Sanitizes and returns supplied HTML with all blacklisted and non-whitelisted tags/attributes removed
* @param string $content
* @return string $content
*/
public function process($content){
$this->removedNodes = array();
$content = trim($content);
foreach($this->tagnameBlacklist as $k=>$tagname){
$content = eregi_replace("<{$tagname}[^>]*>.*</{$tagname}[^>]*>", "", $content);
$content = eregi_replace("<{$tagname}[^>]*>", "", $content);
}
$dom = $this->initDom($content);
foreach($dom->elements as $k => &$el){
$this->processElement($el);
}
foreach($this->explicitBlacklist as $k=>$v){
foreach($v as $tagname=>$attribute){
$removedNodes = pq($tagname.'['.$attribute.']');
foreach($removedNodes as $node){
$node->removeAttribute($attribute);
}
}
}
foreach($this->removedNodes as $obj){
pq($obj)->remove();
}
$content = (string) $dom;
return $content;
}
/**
* Creates a new phpQuery dom element
* @param string $content
* @return object DOMDocument
*/
protected function initDom($content){
switch($this->htmlMode){
case 'xhtml':
$dom = phpQuery::newDocumentXhtml($content);
break;
case 'html':
$dom = phpQuery::newDocumentHtml($content);
break;
default:
$this->errors[] = 'Invalid mode: should be xhtml or html';
return $content;
}
return $dom;
}
/**
* Removes blacklisted and non-whitelisted attributes from the element and recurses into all child nodes
* @param DOMElement $el
*/
protected function processElement(&$el){
if(false === ($el instanceof DOMElement) && false === ($el instanceof DOMDocument)){
return;
}
$invalidAtts = array();
$elAtts = $el->attributes;
if($elAtts==null) $elAtts = array();
foreach($elAtts as $k3=>$att){
// check if explicitly allowed
$explicitelyAllowed = false;
foreach($this->explicitWhitelist as $k=>$v){
foreach($v as $tagname => $attname){
if($tagname == $el->nodeName && $attname == $att->name){
$explicitelyAllowed = true;
}
}
}
if(in_array($att->name, $this->attributeBlacklist)) $invalidAtts[] = $att->name;
else if(!in_array($att->name, $this->attributeWhitelist) && !$explicitelyAllowed) $invalidAtts[] = $att->name;
}
foreach($invalidAtts as $k => $v){
$el->removeAttribute($v);
}
$childNodes = $el->childNodes;
if(is_object($childNodes) && $childNodes->length > 0){
for($i=0, $max=$childNodes->length; $i<$max; $i++){
$this->processElement($childNodes->item($i));
}
}
if($el instanceof DOMDocument) return;
if(in_array($el->nodeName, $this->tagnameBlacklist)){
$this->removedNodes[] = $el;
return;
}else if(!in_array($el->nodeName, $this->tagnameWhitelist)){
$this->removedNodes[] = $el;
return;
}
}
}