<?php
/**
* PHPTAL templating engine
*
* PHP Version 5
*
* @category HTML
* @package PHPTAL
* @author Kornel LesiÅski <hide@address.com>
* @license http://www.gnu.org/licenses/lgpl.html GNU Lesser General Public License
* @version SVN: $Id: $
* @link http://phptal.org/
*/
/**
* Removes all unnecessary whitespace from XHTML documents.
*
* extends Normalize only to re-use helper methods
*/
class PHPTAL_PreFilter_Compress extends PHPTAL_PreFilter_Normalize
{
/**
* keeps track whether last element had trailing whitespace (or didn't need it).
* If had_space==false, next element must keep leading space.
*/
private $had_space=false;
/**
* last text node before closing tag that may need trailing whitespace trimmed.
* It's often last-child, but comments, multiple end tags make that trickier.
*/
private $most_recent_text_node=null;
function filterDOM(PHPTAL_Dom_Element $root)
{
// let xml:space=preserve preserve everything
if ($root->getAttributeNS("http://www.w3.org/XML/1998/namespace", 'space') == 'preserve') {
$this->most_recent_text_node = null;
$this->findElementToFilter($root);
return;
}
// tal:replace makes element behave like text
if ($root->getAttributeNS('http://xml.zope.org/namespaces/tal','replace')) {
$this->most_recent_text_node = null;
$this->had_space = false;
return;
}
$this->normalizeAttributes($root);
$this->elementSpecificOptimizations($root);
// <head>, <tr> don't have any significant whitespace
$no_spaces = $this->hasNoInterelementSpace($root);
// mostly block-level elements
$breaks_line = $no_spaces || $this->breaksLine($root);
// start tag newline
if ($breaks_line) {
if ($this->most_recent_text_node) {
$this->most_recent_text_node->setValueEscaped(rtrim($this->most_recent_text_node->getValueEscaped()));
$this->most_recent_text_node = null;
}
$this->had_space = true;
} else if ($this->isInlineBlock($root)) {
// spaces around <img> must be kept
$this->most_recent_text_node = null;
$this->had_space = false;
}
// <pre>, <textarea> are handled separately from xml:space, because they may have attributes normalized
if ($this->isSpaceSensitiveInXHTML($root)) {
$this->most_recent_text_node = null;
// HTML 5 (9.1.2.5) specifies quirk that a first *single* newline in <pre> can be removed
if (count($root->childNodes) && $root->childNodes[0] instanceof PHPTAL_Dom_Text) {
if (preg_match('/^\n[^\n]/', $root->childNodes[0]->getValueEscaped())) {
$root->childNodes[0]->setValueEscaped(substr($root->childNodes[0]->getValueEscaped(),1));
}
}
$this->findElementToFilter($root);
return;
}
foreach ($root->childNodes as $node) {
if ($node instanceof PHPTAL_Dom_Text) {
// replaces runs of whitespace with ' '
$norm = $this->normalizeSpace($node->getValueEscaped(), $node->getEncoding());
if ($no_spaces) {
$norm = trim($norm);
} elseif ($this->had_space) {
$norm = ltrim($norm);
}
$node->setValueEscaped($norm);
// collapsed whitespace-only nodes are ignored (otherwise trimming of most_recent_text_node would be useless)
if ($norm !== '') {
$this->most_recent_text_node = $node;
$this->had_space = (substr($norm,-1) == ' ');
}
} else if ($node instanceof PHPTAL_Dom_Element) {
$this->filterDOM($node);
} else if ($node instanceof PHPTAL_Dom_DocumentType || $node instanceof PHPTAL_Dom_XMLDeclaration) {
$this->had_space = true;
} else if ($node instanceof PHPTAL_Dom_ProcessingInstruction) {
// PI may output something requiring spaces
$this->most_recent_text_node = null;
$this->had_space = false;
}
}
// repeated element may need trailing space.
if (!$breaks_line && $root->getAttributeNS('http://xml.zope.org/namespaces/tal','repeat')) {
$this->most_recent_text_node = null;
}
// tal:content may replace element with something without space
if (!$breaks_line && $root->getAttributeNS('http://xml.zope.org/namespaces/tal','content')) {
$this->had_space = false;
$this->most_recent_text_node = null;
}
// line break caused by end tag
if ($breaks_line) {
if ($this->most_recent_text_node) {
$this->most_recent_text_node->setValueEscaped(rtrim($this->most_recent_text_node->getValueEscaped()));
$this->most_recent_text_node = null;
}
$this->had_space = true;
}
}
private static $no_interelement_space = array(
'html','head','table','thead','tfoot','select','optgroup','dl','ol','ul','tr','datalist',
);
private function hasNoInterelementSpace(PHPTAL_Dom_Element $element)
{
if ($element->getLocalName() === 'block'
&& $element->parentNode
&& $element->getNamespaceURI() === 'http://xml.zope.org/namespaces/tal') {
return $this->hasNoInterelementSpace($element->parentNode);
}
return in_array($element->getLocalName(), self::$no_interelement_space)
&& ($element->getNamespaceURI() === 'http://www.w3.org/1999/xhtml' || $element->getNamespaceURI() === '');
}
/**
* li is deliberately omitted, as it's commonly used with display:inline in menus.
*/
private static $breaks_line = array(
'address','article','aside','base','blockquote','body','br','dd','div','dl','dt','fieldset','figure',
'footer','form','h1','h2','h3','h4','h5','h6','head','header','hgroup','hr','html','legend','link',
'meta','nav','ol','option','p','param','pre','section','style','table','tbody','td','th','thead',
'title','tr','ul','details',
);
private function breaksLine(PHPTAL_Dom_Element $element)
{
if ($element->getAttributeNS('http://xml.zope.org/namespaces/metal','define-macro')) {
return true;
}
if (!$element->parentNode) {
return true;
}
if ($element->getNamespaceURI() !== 'http://www.w3.org/1999/xhtml'
&& $element->getNamespaceURI() !== '') {
return false;
}
return in_array($element->getLocalName(), self::$breaks_line);
}
/**
* replaced elements need to preserve spaces before and after
*/
private static $inline_blocks = array(
'select','input','button','img','textarea','output','progress','meter',
);
private function isInlineBlock(PHPTAL_Dom_Element $element)
{
if ($element->getNamespaceURI() !== 'http://www.w3.org/1999/xhtml'
&& $element->getNamespaceURI() !== '') {
return false;
}
return in_array($element->getLocalName(), self::$inline_blocks);
}
/**
* Consistent sorting of attributes might give slightly better gzip performance
*/
protected function normalizeAttributes(PHPTAL_Dom_Element $element)
{
parent::normalizeAttributes($element);
$attrs_by_qname = array();
foreach ($element->getAttributeNodes() as $attrnode) {
// safe, as there can't be two attrs with same qname
$attrs_by_qname[$attrnode->getQualifiedName()] = $attrnode;
}
if (count($attrs_by_qname) > 1) {
uksort($attrs_by_qname, array($this, 'compareQNames'));
$element->setAttributeNodes(array_values($attrs_by_qname));
}
}
/**
* pre-defined order of attributes roughly by popularity
*/
private static $attributes_order = array(
'href','src','class','rel','type','title','width','height','alt','content','name','style','lang','id',
);
/**
* compare names according to $attributes_order array.
* Elements that are not in array, are considered greater than all elements in array,
* and are sorted alphabetically.
*/
private static function compareQNames($a, $b) {
$a_index = array_search($a, self::$attributes_order);
$b_index = array_search($b, self::$attributes_order);
if ($a_index !== false && $b_index !== false) {
return $a_index - $b_index;
}
if ($a_index === false && $b_index === false) {
return strcmp($a, $b);
}
return ($a_index === false) ? 1 : -1;
}
/**
* HTML5 doesn't care about boilerplate
*/
private function elementSpecificOptimizations(PHPTAL_Dom_Element $element)
{
if ($element->getNamespaceURI() !== 'http://www.w3.org/1999/xhtml'
&& $element->getNamespaceURI() !== '') {
return;
}
if ($this->getPHPTAL()->getOutputMode() !== PHPTAL::HTML5) {
return;
}
// <meta charset>
if ('meta' === $element->getLocalName() &&
$element->getAttributeNS('','http-equiv') === 'Content-Type') {
$element->removeAttributeNS('','http-equiv');
$element->removeAttributeNS('','content');
$element->setAttributeNS('','charset',strtolower($this->getPHPTAL()->getEncoding()));
}
elseif (('link' === $element->getLocalName() && $element->getAttributeNS('','rel') === 'stylesheet') ||
('style' === $element->getLocalName())) {
// There's only one type of stylesheets that works.
$element->removeAttributeNS('','type');
} elseif ('script' === $element->getLocalName()) {
$element->removeAttributeNS('','language');
// Only remove type that matches default. E4X, vbscript, coffeescript, etc. must be preserved
$type = $element->getAttributeNS('','type');
$is_std = preg_match('/^(?:text|application)\/(?:ecma|java)script(\s*;\s*charset\s*=\s*[^;]*)?$/', $type);
// Remote scripts should have type specified in HTTP headers.
if ($is_std || $element->getAttributeNS('','src')) {
$element->removeAttributeNS('','type');
}
}
}
}