Location: PHPKode > scripts > Antz_TagFilter > antz_tagfilter/Antz_TagFilter.php
<?php
###############   COPYLEFT GPLv3 LICENSE   ###############
##
## Copyright 2009 GPLv3 - http://www.opensource.org/licenses/gpl-3.0.html
##
## Anthony Gallon
## hide@address.com
##
## Permission is hereby granted to any person having a copy of this software
## to freely use and modify as required so long as the copyright notices
## and branding remain intact.
##
###############   COPYLEFT GPLv3 LICENSE   ###############

if(!class_exists('phpQuery')) die('Antz_TagFilter requires class phpQuery - see '.__FILE__.', line '.__LINE__);

/**
 * Strips unwanted and malicious tags from html content with whitelist and blacklist approach.
 * Supports whitelist tagnames, attributes and explicit tag/attribute combinations
 */
class Antz_TagFilter
{
    protected $attributeWhitelist = array();
    protected $attributeBlacklist = array();
    protected $tagnameWhitelist = array();
    protected $tagnameBlacklist = array();
    protected $explicitWhitelist = array();
    protected $explicitBlacklist = array();
    protected $htmlMode = 'xhtml';
    protected $errors = array();
	protected $removeNodes = array();
	protected $allowDoctype = false;

    public function __construct(){

    }

    public function getErrors(){
        return $this->errors;
    }

    /**
     * Set the mode which phpQuery runs (XHTML or HTML)
     * @param string $mode
     */
    public function setHtmlMode($mode='xhtml'){
        $mode = strtolower((string) $mode);
        if($mode === 'xhtml' || $mode === 'html') $this->htmlMode = $mode;
    }

    /**
     * Overwrite attributes whitelist with new values
     * @param mixed $atts
     */
    public function setAttributeWhitelist($atts){
        if(!is_array($atts)) return;
        $this->attributeWhitelist = array();
        $this->addAttributeWhitelist($atts);
    }

    /**
     * Overwrite attributes blacklist with new values
     * @param mixed $atts
     */
    public function setAttributeBlacklist($atts){
        if(!is_array($atts)) return;
        $this->attributeBlacklist = array();
        $this->addAttributeBlacklist($atts);
    }

    /**
     * Overwrite tagname whitelist with new values
     * @param mixed $tags
     */
    public function setTagnameWhitelist($tags){
        if(!is_array($tags)) return;
        $this->tagnameWhitelist = array();
        $this->addTagnameWhitelist($tags);
    }

    /**
     * Overwrite tagname blacklist with new values
     * @param mixed $tags
     */
    public function setTagnameBlacklist($tags){
        if(!is_array($tags)) return;
        $this->tagnameBlacklist = array();
        $this->addTagnameBlacklist($tags);
    }

    /**
     * Overwrite explicit blacklist with new values
     * @param mixed $tags
     */
    public function setExplicitBlacklist($tags){
        if(!is_array($tags)) return;
        $this->explicitBlacklist = array();
        $this->addExplicitBlacklist($tags);
    }

    /**
     * Overwrite explicit whitelist with new values
     * @param mixed $tags
     */
    public function setExplicitWhitelist($tags){
        if(!is_array($tags)) return;
        $this->explicitWhitelist = array();
        $this->addExplicitWhitelist($tags);
    }

    /**
     * Add an explicit blacklist rule (tagname=>attname)
     * @param mixed $tags
     */
    public function addExplicitBlacklist($tags){
        if(!is_array($tags)) return;
        if(count($tags)==1){
            foreach($tags as $tagname=>$attname){
                if(is_array($attname)){
                    $this->addExplicitBlacklist($attname);
                    return;
                }else{
                    $this->explicitBlacklist[] = array($tagname=>$attname);
                    return;
                }
            }
        }else{
            foreach($tags as $tagname=>$attname){
                if(is_array($attname)){
                    $this->addExplicitBlacklist($attname);
                }else{
                    $this->explicitBlacklist[] = array($tagname=>$attname);
                }
            }
        }
    }

    /**
     * Add an explicit blacklist rule (tagname=>attname)
     * @param mixed $tags
     */
    public function addExplicitWhitelist($tags){
        if(!is_array($tags)) return;
        if(count($tags)==1){
            foreach($tags as $tagname=>$attname){
                if(is_array($attname)){
                    $this->addExplicitWhitelist($attname);
                    return;
                }else{
                    $this->explicitWhitelist[] = array($tagname=>$attname);
                    return;
                }
            }
        }else{
            foreach($tags as $tagname=>$attname){
                if(is_array($attname)){
                    $this->addExplicitWhitelist($attname);
                }else{
                    $this->explicitWhitelist[] = array($tagname=>$attname);
                }
            }
        }
    }

    /**
     * Add an tagname blacklist rule
     * @param mixed $tagname
     */
    public function addTagnameBlacklist($tagname){
        if(is_array($tagname)){
            foreach($tagname as $tag){
                $this->addTagnameBlacklist($tag);
            }
        }else{
            if(!in_array($tagname, $this->tagnameBlacklist)) $this->tagnameBlacklist[] = trim($tagname);
        }
    }

    /**
     * Add an tagname whitelist rule
     * @param mixed $tagname
     */
    public function addTagnameWhitelist($tagname){
        if(is_array($tagname)){
            foreach($tagname as $tag){
                $this->addTagnameWhitelist($tag);
            }
        }else{
            if(!in_array($tagname, $this->tagnameWhitelist)) $this->tagnameWhitelist[] = trim($tagname);
        }
    }

    /**
     * Add an attribute blacklist rule
     * @param mixed $att
     */
    public function addAttributeBlacklist($att){
        if(is_array($att)){
            foreach($att as $at){
                $this->addAttributeBlacklist($at);
            }
        }else{
            if(!in_array($att, $this->attributeBlacklist)) $this->attributeBlacklist[] = trim($att);
        }
    }

    /**
     * Add an attribute whitelist rule
     * @param mixed $att
     */
    public function addAttributeWhitelist($att){
        if(is_array($att)){
            foreach($att as $at){
                $this->addAttributeWhitelist($at);
            }
        }else{
            if(!in_array($att, $this->attributeWhitelist)) $this->attributeWhitelist[] = trim($att);
        }
    }

    /**
     * Remove a tagname blacklist rule
     * @param mixed $tagname
     */
    public function removeTagnameBlacklist($tagname){
        if(is_array($tagname)){
            foreach($tagname as $tag){
                $this->removeTagnameBlacklist($tag);
            }
        }else{
            if(in_array($tagname, $this->tagnameBlacklist)) unset($this->tagnameBlacklist[trim($tagname)]);
        }
    }

    /**
     * Remove a tagname whitelist rule
     * @param mixed $tagname
     */
    public function removeTagnameWhitelist($tagname){
        if(is_array($tagname)){
            foreach($tagname as $tag){
                $this->removeTagnameWhitelist($tag);
            }
        }else{
            if(in_array($tagname, $this->tagnameWhitelist)) unset($this->tagnameWhitelist[trim($tagname)]);
        }
    }

    /**
     * Remove an attribute blacklist rule
     * @param mixed $att
     */
    public function removeAttributeBlacklist($att){
        if(is_array($att)){
            foreach($att as $at){
                $this->removeAttributeBlacklist($at);
            }
        }else{
            if(in_array($att, $this->attributeBlacklist)) unset($this->attributeBlacklist[trim($att)]);
        }
    }

    /**
     * Remove an attribute whitelist rule
     * @param mixed $att
     */
    public function removeAttributeWhitelist($att){
        if(is_array($att)){
            foreach($att as $at){
                $this->removeAttributeWhitelist($at);
            }
        }else{
            if(in_array($att, $this->attributeWhitelist)) unset($this->attributeWhitelist[trim($att)]);
        }
    }


    /**
     * Sanitizes and returns supplied HTML with all blacklisted and non-whitelisted tags/attributes removed
     * @param string $content
     * @return string $content
     */
    public function process($content){
        $this->removedNodes = array();
        $content = trim($content);

        foreach($this->tagnameBlacklist as $k=>$tagname){
            $content = eregi_replace("<{$tagname}[^>]*>.*</{$tagname}[^>]*>", "", $content);
            $content = eregi_replace("<{$tagname}[^>]*>", "", $content);
        }

        $dom = $this->initDom($content);

        foreach($dom->elements as $k => &$el){
            $this->processElement($el);
        }

        foreach($this->explicitBlacklist as $k=>$v){
            foreach($v as $tagname=>$attribute){
                $removedNodes = pq($tagname.'['.$attribute.']');
                foreach($removedNodes as $node){
                    $node->removeAttribute($attribute);
                }
            }
        }

        foreach($this->removedNodes as $obj){
            pq($obj)->remove();
        }

        $content = (string) $dom;

        return $content;
    }


    /**
     * Creates a new phpQuery dom element
     * @param string $content
     * @return object DOMDocument
     */
	protected function initDom($content){
		switch($this->htmlMode){
            case 'xhtml':
                $dom = phpQuery::newDocumentXhtml($content);
            break;
            case 'html':
                $dom = phpQuery::newDocumentHtml($content);
            break;
            default:
                $this->errors[] = 'Invalid mode: should be xhtml or html';
                return $content;
        }
		return $dom;
	}

    /**
     * Removes blacklisted and non-whitelisted attributes from the element and recurses into all child nodes
     * @param DOMElement $el
     */
    protected function processElement(&$el){

        if(false === ($el instanceof DOMElement) && false === ($el instanceof DOMDocument)){
            return;
        }

        $invalidAtts = array();
		$elAtts = $el->attributes;
		if($elAtts==null) $elAtts = array();

        foreach($elAtts as $k3=>$att){
            // check if explicitly allowed
            $explicitelyAllowed = false;
            foreach($this->explicitWhitelist as $k=>$v){
                foreach($v as $tagname => $attname){
                    if($tagname == $el->nodeName && $attname == $att->name){
                        $explicitelyAllowed = true;
                    }
                }
            }
            if(in_array($att->name, $this->attributeBlacklist)) $invalidAtts[] = $att->name;
            else if(!in_array($att->name, $this->attributeWhitelist) && !$explicitelyAllowed) $invalidAtts[] = $att->name;
            
        }

        foreach($invalidAtts as $k => $v){
            $el->removeAttribute($v);
        }
        $childNodes = $el->childNodes;

        if(is_object($childNodes) && $childNodes->length > 0){
            for($i=0, $max=$childNodes->length; $i<$max; $i++){
                $this->processElement($childNodes->item($i));
            }
        }
		if($el instanceof DOMDocument) return;


        if(in_array($el->nodeName, $this->tagnameBlacklist)){
            $this->removedNodes[] = $el;
            return;
        }else if(!in_array($el->nodeName, $this->tagnameWhitelist)){
            $this->removedNodes[] = $el;
            return;
        }

    }


}


<?php
###############   COPYLEFT GPLv3 LICENSE   ###############
##
## Copyright 2009 GPLv3 - http://www.opensource.org/licenses/gpl-3.0.html
##
## Anthony Gallon
## hide@address.com
##
## Permission is hereby granted to any person having a copy of this software
## to freely use and modify as required so long as the copyright notices
## and branding remain intact.
##
###############   COPYLEFT GPLv3 LICENSE   ###############

if(!class_exists('phpQuery')) die('Antz_TagFilter requires class phpQuery - see '.__FILE__.', line '.__LINE__);

/**
 * Strips unwanted and malicious tags from html content with whitelist and blacklist approach.
 * Supports whitelist tagnames, attributes and explicit tag/attribute combinations
 */
class Antz_TagFilter
{
    protected $attributeWhitelist = array();
    protected $attributeBlacklist = array();
    protected $tagnameWhitelist = array();
    protected $tagnameBlacklist = array();
    protected $explicitWhitelist = array();
    protected $explicitBlacklist = array();
    protected $htmlMode = 'xhtml';
    protected $errors = array();
	protected $removeNodes = array();
	protected $allowDoctype = false;

    public function __construct(){

    }

    public function getErrors(){
        return $this->errors;
    }

    /**
     * Set the mode which phpQuery runs (XHTML or HTML)
     * @param string $mode
     */
    public function setHtmlMode($mode='xhtml'){
        $mode = strtolower((string) $mode);
        if($mode === 'xhtml' || $mode === 'html') $this->htmlMode = $mode;
    }

    /**
     * Overwrite attributes whitelist with new values
     * @param mixed $atts
     */
    public function setAttributeWhitelist($atts){
        if(!is_array($atts)) return;
        $this->attributeWhitelist = array();
        $this->addAttributeWhitelist($atts);
    }

    /**
     * Overwrite attributes blacklist with new values
     * @param mixed $atts
     */
    public function setAttributeBlacklist($atts){
        if(!is_array($atts)) return;
        $this->attributeBlacklist = array();
        $this->addAttributeBlacklist($atts);
    }

    /**
     * Overwrite tagname whitelist with new values
     * @param mixed $tags
     */
    public function setTagnameWhitelist($tags){
        if(!is_array($tags)) return;
        $this->tagnameWhitelist = array();
        $this->addTagnameWhitelist($tags);
    }

    /**
     * Overwrite tagname blacklist with new values
     * @param mixed $tags
     */
    public function setTagnameBlacklist($tags){
        if(!is_array($tags)) return;
        $this->tagnameBlacklist = array();
        $this->addTagnameBlacklist($tags);
    }

    /**
     * Overwrite explicit blacklist with new values
     * @param mixed $tags
     */
    public function setExplicitBlacklist($tags){
        if(!is_array($tags)) return;
        $this->explicitBlacklist = array();
        $this->addExplicitBlacklist($tags);
    }

    /**
     * Overwrite explicit whitelist with new values
     * @param mixed $tags
     */
    public function setExplicitWhitelist($tags){
        if(!is_array($tags)) return;
        $this->explicitWhitelist = array();
        $this->addExplicitWhitelist($tags);
    }

    /**
     * Add an explicit blacklist rule (tagname=>attname)
     * @param mixed $tags
     */
    public function addExplicitBlacklist($tags){
        if(!is_array($tags)) return;
        if(count($tags)==1){
            foreach($tags as $tagname=>$attname){
                if(is_array($attname)){
                    $this->addExplicitBlacklist($attname);
                    return;
                }else{
                    $this->explicitBlacklist[] = array($tagname=>$attname);
                    return;
                }
            }
        }else{
            foreach($tags as $tagname=>$attname){
                if(is_array($attname)){
                    $this->addExplicitBlacklist($attname);
                }else{
                    $this->explicitBlacklist[] = array($tagname=>$attname);
                }
            }
        }
    }

    /**
     * Add an explicit blacklist rule (tagname=>attname)
     * @param mixed $tags
     */
    public function addExplicitWhitelist($tags){
        if(!is_array($tags)) return;
        if(count($tags)==1){
            foreach($tags as $tagname=>$attname){
                if(is_array($attname)){
                    $this->addExplicitWhitelist($attname);
                    return;
                }else{
                    $this->explicitWhitelist[] = array($tagname=>$attname);
                    return;
                }
            }
        }else{
            foreach($tags as $tagname=>$attname){
                if(is_array($attname)){
                    $this->addExplicitWhitelist($attname);
                }else{
                    $this->explicitWhitelist[] = array($tagname=>$attname);
                }
            }
        }
    }

    /**
     * Add an tagname blacklist rule
     * @param mixed $tagname
     */
    public function addTagnameBlacklist($tagname){
        if(is_array($tagname)){
            foreach($tagname as $tag){
                $this->addTagnameBlacklist($tag);
            }
        }else{
            if(!in_array($tagname, $this->tagnameBlacklist)) $this->tagnameBlacklist[] = trim($tagname);
        }
    }

    /**
     * Add an tagname whitelist rule
     * @param mixed $tagname
     */
    public function addTagnameWhitelist($tagname){
        if(is_array($tagname)){
            foreach($tagname as $tag){
                $this->addTagnameWhitelist($tag);
            }
        }else{
            if(!in_array($tagname, $this->tagnameWhitelist)) $this->tagnameWhitelist[] = trim($tagname);
        }
    }

    /**
     * Add an attribute blacklist rule
     * @param mixed $att
     */
    public function addAttributeBlacklist($att){
        if(is_array($att)){
            foreach($att as $at){
                $this->addAttributeBlacklist($at);
            }
        }else{
            if(!in_array($att, $this->attributeBlacklist)) $this->attributeBlacklist[] = trim($att);
        }
    }

    /**
     * Add an attribute whitelist rule
     * @param mixed $att
     */
    public function addAttributeWhitelist($att){
        if(is_array($att)){
            foreach($att as $at){
                $this->addAttributeWhitelist($at);
            }
        }else{
            if(!in_array($att, $this->attributeWhitelist)) $this->attributeWhitelist[] = trim($att);
        }
    }

    /**
     * Remove a tagname blacklist rule
     * @param mixed $tagname
     */
    public function removeTagnameBlacklist($tagname){
        if(is_array($tagname)){
            foreach($tagname as $tag){
                $this->removeTagnameBlacklist($tag);
            }
        }else{
            if(in_array($tagname, $this->tagnameBlacklist)) unset($this->tagnameBlacklist[trim($tagname)]);
        }
    }

    /**
     * Remove a tagname whitelist rule
     * @param mixed $tagname
     */
    public function removeTagnameWhitelist($tagname){
        if(is_array($tagname)){
            foreach($tagname as $tag){
                $this->removeTagnameWhitelist($tag);
            }
        }else{
            if(in_array($tagname, $this->tagnameWhitelist)) unset($this->tagnameWhitelist[trim($tagname)]);
        }
    }

    /**
     * Remove an attribute blacklist rule
     * @param mixed $att
     */
    public function removeAttributeBlacklist($att){
        if(is_array($att)){
            foreach($att as $at){
                $this->removeAttributeBlacklist($at);
            }
        }else{
            if(in_array($att, $this->attributeBlacklist)) unset($this->attributeBlacklist[trim($att)]);
        }
    }

    /**
     * Remove an attribute whitelist rule
     * @param mixed $att
     */
    public function removeAttributeWhitelist($att){
        if(is_array($att)){
            foreach($att as $at){
                $this->removeAttributeWhitelist($at);
            }
        }else{
            if(in_array($att, $this->attributeWhitelist)) unset($this->attributeWhitelist[trim($att)]);
        }
    }


    /**
     * Sanitizes and returns supplied HTML with all blacklisted and non-whitelisted tags/attributes removed
     * @param string $content
     * @return string $content
     */
    public function process($content){
        $this->removedNodes = array();
        $content = trim($content);

        foreach($this->tagnameBlacklist as $k=>$tagname){
            $content = eregi_replace("<{$tagname}[^>]*>.*</{$tagname}[^>]*>", "", $content);
            $content = eregi_replace("<{$tagname}[^>]*>", "", $content);
        }

        $dom = $this->initDom($content);

        foreach($dom->elements as $k => &$el){
            $this->processElement($el);
        }

        foreach($this->explicitBlacklist as $k=>$v){
            foreach($v as $tagname=>$attribute){
                $removedNodes = pq($tagname.'['.$attribute.']');
                foreach($removedNodes as $node){
                    $node->removeAttribute($attribute);
                }
            }
        }

        foreach($this->removedNodes as $obj){
            pq($obj)->remove();
        }

        $content = (string) $dom;

        return $content;
    }


    /**
     * Creates a new phpQuery dom element
     * @param string $content
     * @return object DOMDocument
     */
	protected function initDom($content){
		switch($this->htmlMode){
            case 'xhtml':
                $dom = phpQuery::newDocumentXhtml($content);
            break;
            case 'html':
                $dom = phpQuery::newDocumentHtml($content);
            break;
            default:
                $this->errors[] = 'Invalid mode: should be xhtml or html';
                return $content;
        }
		return $dom;
	}

    /**
     * Removes blacklisted and non-whitelisted attributes from the element and recurses into all child nodes
     * @param DOMElement $el
     */
    protected function processElement(&$el){

        if(false === ($el instanceof DOMElement) && false === ($el instanceof DOMDocument)){
            return;
        }

        $invalidAtts = array();
		$elAtts = $el->attributes;
		if($elAtts==null) $elAtts = array();

        foreach($elAtts as $k3=>$att){
            // check if explicitly allowed
            $explicitelyAllowed = false;
            foreach($this->explicitWhitelist as $k=>$v){
                foreach($v as $tagname => $attname){
                    if($tagname == $el->nodeName && $attname == $att->name){
                        $explicitelyAllowed = true;
                    }
                }
            }
            if(in_array($att->name, $this->attributeBlacklist)) $invalidAtts[] = $att->name;
            else if(!in_array($att->name, $this->attributeWhitelist) && !$explicitelyAllowed) $invalidAtts[] = $att->name;
            
        }

        foreach($invalidAtts as $k => $v){
            $el->removeAttribute($v);
        }
        $childNodes = $el->childNodes;

        if(is_object($childNodes) && $childNodes->length > 0){
            for($i=0, $max=$childNodes->length; $i<$max; $i++){
                $this->processElement($childNodes->item($i));
            }
        }
		if($el instanceof DOMDocument) return;


        if(in_array($el->nodeName, $this->tagnameBlacklist)){
            $this->removedNodes[] = $el;
            return;
        }else if(!in_array($el->nodeName, $this->tagnameWhitelist)){
            $this->removedNodes[] = $el;
            return;
        }

    }


}


















Return current item: Antz_TagFilter