<?php
namespace gnomephp\string;
/**
*
* StringParser can be used to create a parser for strings.
* toString method is implemented so the return value as string will be the parsed string output.
*
* Once parsed , it will not be parsed again if not parse(true) method is called.
*
* This class also holds much java like methods for String, such as equals, substring, equalsIgnoreCase and more.
*
* @author peec
*
*/
class StringParser{
/**
* Contains the parsed string.
* @var string
*/
protected $string;
/**
* Regexp rules ( normal replace pattern )
* @var array
*/
protected $regexpRulesReplace=array();
/**
* Regexp rules ( callback )
* @var array
*/
protected $regexpRulesCallback=array();
/**
* Regexp rules with a name, a view can replace these in the view.
* @var array
*/
protected $regexpNameRules = array();
protected $normalReplace = array();
/**
* Table of contents array of a string.
* @var array Array of table of content ID -> Title
*/
protected $tocTable=array();
protected $tocCallback = null;
/**
* If parsed this will go to true.
* @var boolean
*/
private $parsed = false;
/**
* Constructor.
* @param string $string
*/
public function __construct($string){
$this->string = $string;
}
/*
* Java like functions.
*/
/**
* Returns the length of this string.
* The length is equal to the number of Unicode code units in the string.
* @return int
*/
public function length(){
return strlen($this->string);
}
/**
* Checks if the string is equal to another string.
* @param string $string
* @return boolean
*/
public function equals($string){
return $this->string == $string;
}
/**
* Checks if the string is equals to another string, but ignores the case sensitivity.
* @param string $string
* @return boolean
*/
public function equalsIgnoreCase($string){
return strtoupper($this->string) == strtoupper($string);
}
/**
* Checks if string matches a regexp pattern.
* @param string $regexp
* @return boolean
*/
public function matches($regexp){
return preg_match($regexp, $this->string);
}
/**
* Checks if the string is empty.
*/
public function isEmpty(){
return $this->string == '';
}
/**
* @param $start int
* If start is non-negative, the returned string will start at the start'th position in string, counting from zero. For instance, in the string 'abcdef', the character at position 0 is 'a', the character at position 2 is 'c', and so forth.
* If start is negative, the returned string will start at the start'th character from the end of string.
* If string is less than or equal to start characters long, false will be returned.
* Using a negative start ]]>
* @param $length int[optional]
* If length is given and is positive, the string returned will contain at most length characters beginning from start (depending on the length of string).
* If length is given and is negative, then that many characters will be omitted from the end of string (after the start position has been calculated when a start is negative). If start denotes a position beyond this truncation, an empty string will be returned.
* If length is given and is 0, false or &null; an empty string will be returned.
* Using a negative length ]]>
* @return string the extracted part of string&return.falseforfailure;.
*
*/
public function substring($start, $length){
return substr($this->string, $start, $length);
}
/**
* Replace pattern for regexp OR regexp with callback as return value.
* @param string $regexp
* @param string $replace callback or regular expression
*/
public function addReplace($regexp, $replace){
if (is_callable($replace)){
$this->regexpRulesCallback[] = array($regexp, $replace);
}else{
$this->regexpRulesReplace[] = array($regexp, $replace);
}
return $this;
}
/**
* Add normal replace
* @param string $string
* @param string $replace
*/
public function addNormalReplace($string, $replace){
$this->normalReplace[] = array($string, $replace);
return $this;
}
/**
* Tries to parse specific header tags as table of contents.
* This adds id's to the hX element if it does not have one from before.
* It also register [toc] tag that you can place anywhere in your string.
* [toc] will output the table of contents as OL LI list by default ( changable with $closure )
*
* You can define what tags that should be headers for the table of contents.s
*
* Notice, string MUST be parsable by DomDocumenet as HTML.
*
* Newlines will get replaced in the end output.
*
*
* @param function $closure Closure function that has array argument with link => title
* @param string $headers Headers that should be added to the toc.
*/
public function addTocParser($closure=null, $headers = array('h1','h2','h3','h4','h5','h6')){
// We got to replace newlines.
$html = str_replace(chr(13),'',$this->string);
$doc = new \DOMDocument();
$doc->resolveExternals = true;
$doc->formatOutput = false;
if (!@$doc->loadHTML($html)){
$this->addNormalReplace('[toc]', '');
return false;
}
$elements = $doc->getElementsByTagName('*');
foreach($elements as $node){
if (in_array($node->nodeName, $headers)){
if (!$node->getAttribute('id')){
$ID = \gnomephp\Url::toAscii($node->nodeValue);
$LU = array_keys($this->tocTable);
while(in_array('header-'.$ID, $LU)){
$ID .= '-1';
}
$ID = 'header-' . $ID;
$node->setAttribute('id', $ID);
}else{
$ID = 'header-'.$node->getAttribute('id');
}
// H1 = 1
$headLevel = (int) substr($node->nodeName, 1);
$this->tocTable[$ID] = $node->nodeValue;
}
}
$node = $doc->documentElement->getElementsByTagName('body');
// Replace source.
$this->string = $this->getInnerHTML($node->item(0));
if (!is_callable($closure)){
$closure = function($tocT){
$html = '';
if (count($tocT) > 0){
$html .= '<ol>';
foreach($tocT as $link => $text){
$html .= '<li><a href="#'. $link .'">'. $text .'</a></li>';
}
$html .= '</ol>';
}
return $html;
};
}
$this->tocCallback = $closure;
$this->addNormalReplace('[toc]', $closure($this->tocTable));
return $this;
}
/**
* Some times you might want to have TOC outside of
* the string, you can assign the string to a variable and getTocHTML() where you want
* in the specific view.
* Remember to call addTocParser() before calling getTocHTML.
*
*/
public function getTocHTML(){
if (is_callable($this->tocCallback) && $callback = $this->tocCallback){
return $callback($this->tocTable);
}
}
/**
* Generates inner html of a dom node.
* @param \DomNode $node
*/
protected function getInnerHTML( $node ) {
$innerHTML= '';
$children = $node->childNodes;
foreach ($children as $child) {
$innerHTML .= $child->ownerDocument->saveXML( $child );
}
return $innerHTML;
}
/**
* BBCode to callback.
* @param string $tag
* @param function $closure
*/
public function addBBCodeCallback($tag, $closure){
$this->addReplace('/\['.$tag.'\](.*?)\[\/'.$tag.'\]/s', $closure);
return $this;
}
/**
* Best practice when doing stuff from a model.
*
* @param string $name Unique name of the pattern
* @param string/function $pattern Regexp pattern
*/
public function addPattern($name, $pattern, $defaultReplacePattern=null){
$this->regexpNameRules[$name] = array($pattern, $defaultReplacePattern);
return $this;
}
/**
* Should be used in the view if addPattern has been called in the model forexample.
*
* @param string $name Unique name of the pattern
* @param string/function $pattern
*/
public function parsePattern($name, $replacePattern=null){
if (isset($this->regexpNameRules[$name])){
if ($replacePattern===null){
if ($this->regexpNameRules[$name][1]===null)throw new \Exception("There are no replace pattern defined for $name pattern.");
$this->addReplace($this->regexpNameRules[$name][0], $this->regexpNameRules[$name][1]);
}else{
$this->addReplace($this->regexpNameRules[$name][0], $replacePattern);
}
}
return $this;
}
/**
* Can be overridden to allow custom parsing.
* Does nothing by default.
*/
protected function parser(){
}
/**
* Parse.
*
* @param boolean $overrideParseCache
*/
public function parse($overrideParseCache=false){
if ($overrideParseCache || !$this->parsed){
$this->parser();
// Normal regexp.
foreach($this->regexpRulesReplace as $set){
$this->string = preg_replace($set[0], $set[1], $this->string);
}
// Callback replace
foreach($this->regexpRulesCallback as $set){
$this->string = preg_replace_callback($set[0], $set[1], $this->string);
}
// Normal
foreach($this->normalReplace as $set){
$this->string = str_replace($set[0], $set[1], $this->string);
}
$this->parsed = true;
}
return $this;
}
/**
* To string.
* @Override
*/
public function __toString(){
$this->parse();
return $this->string;
}
}