Location: PHPKode > projects > Pieforms > pieforms-php5-0.2.2/doc/geshi/geshi/languages/php/class.geshiphpcodeparser.php
<?php
/**
 * GeSHi - Generic Syntax Highlighter
 * <pre>
 *   File:   geshi/languages/php/class.geshiphpcodeparser.php
 *   Author: Nigel McNie
 *   E-mail: hide@address.com
 * </pre>
 * 
 * For information on how to use GeSHi, please consult the documentation
 * found in the docs/ directory, or online at http://geshi.org/docs/
 * 
 * This program is part of GeSHi.
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 * 
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
 *
 * @package    geshi
 * @subpackage lang
 * @author     Nigel McNie <hide@address.com>
 * @license    http://www.gnu.org/copyleft/gpl.html GNU GPL
 * @copyright  (C) 2004 - 2006 Nigel McNie
 * @version    $Id: class.geshiphpcodeparser.php 909 2007-02-18 01:23:18Z oracleshinoda $
 * 
 */

/**
 * The GeSHiPHPCodeParser class.
 * 
 * This class utilises the known information from basic parsing to provide highlighting of:
 * 
 * <ul>
 *  <li>Constants defined using <kbd>defined</kbd></li>
 *  <li>User-defined classes, even when only referred to and not defined in the source</li>
 *  <li>Function and method names, including function names where they are called</li>
 *  <li>PHPDoc to a higher degree than the basic parsing</li>
 * </ul>
 * 
 * @package    geshi
 * @subpackage lang
 * @author     Nigel McNie <hide@address.com>
 * @since      1.1.1
 * @version    $Revision: 909 $
 */
class GeSHiPHPCodeParser extends GeSHiCodeParser
{
    
    // {{{ properties
    
    /**#@+
     * @access private
     */
    
    /**
     * A flag that can be used for the "state" of parsing
     * 
     * @var string
     */
    var $_state = '';
    
    /**
     * A list of class names detected in the source
     * 
     * @var array
     */
    var $_classNames = array();
    
    /**
     * A list of constant names detected in the source
     * 
     * @var array
     */
    var $_constants = array();

    /**
     * A list of phpdoc tags that have content that the parser should highlight
     * 
     * @var array
     * {@internal Note that this list does not include valid tags that have _no_
     * content - e.g. static and some others}}
     */
    var $_validPHPDocTags = array(
        'access', 'author', 'copyright', 'license', 'link', 'package', 'param',
        'return', 'revision', 'since', 'subpackage', 'var', 'version'
    );
    
    /**
     * Whether the current token is whitespace or not
     * 
     * @var boolean
     */
    var $_tokenIsWhitespace = false;
    
    /**#@-*/
    
    // }}}
    // {{{ parseToken()
    
    /**
     * @todo [blocking 1.2.0] possibly highlight methods differently? may not be worth effort
     * @todo [blocking 1.2.0] What about php5 public/private methods? Do they work?
     * @todo [blocking 1.2.0] Highlight phpdoc internal rule, and check for other rules
     */
    function parseToken ($token, $context_name, $data)
    {
        // Store this result for use by called methods
        $this->_tokenIsWhitespace = geshi_is_whitespace($token);
        
        // We don't care about whitespace for these methods
        if (!$this->_tokenIsWhitespace) {
            $this->_detectClassNames($token, $context_name, $data);
            $this->_detectConstants($token, $context_name, $data);
            $this->_detectFunctionNames($token, $context_name, $data);
            $this->_fixHeredoc($token, $context_name, $data);
        }

        // This method needs to know about whitespace tokens so it can find the end of
        // each line after a phpdoc tag
        $this->_handlePHPDoc($token, $context_name, $data);
        
        // Some detections require special handling because they use the stack
        $result = $this->_handleStackParsing($token, $context_name, $data);
        
        return $result;
    }
    
    // }}}
    
    /**#@+
     * @access private
     */
    
    // {{{ _detectClassNames()
    
    /**
     * Detects class names in the source.
     * 
     * The only class names not detected are static class names that are not referred to by
     * a class definition beforehand. These are detected using the stack (by looking for
     * tokens before the :: operator).
     * 
     * @param string $token        The source token 
     * @param string $context_name The context of the source token
     * @param array  $data         Additional data
     */
    function _detectClassNames (&$token, &$context_name, &$data)
    {
        if ('class' == $this->_state && !$this->_tokenIsWhitespace) {
            // We just read the keyword "class", so this token is a keyword, unless it
            // is a variable (PHP allows new $class_name)
            if ('var' != substr($context_name, -3)) {
                $context_name = $this->_language . '/classname';
                if (!in_array($token, $this->_classNames)) {
                    $this->_classNames[] = $token;
                }
            }
            $this->_state = '';
        } elseif (('class' == $token || 'extends' == $token || 'new' == $token)
            && $this->_language . '/keyword' == $context_name) {
            // We are about to read a class name
            $this->_state = 'class';
        } elseif (in_array($token, $this->_classNames) && $this->_language == $context_name) {
            // Detected use of class name we have already detected
            $context_name .= '/classname';
        }
    }
    
    // }}}
    // {{{ _detectConstants()
    
    /**
     * Detects user-defined constants in the source.
     * 
     * This will be fooled by:
     * <code> define('FO' . 'O', 'bar');
     * // or even...
     * define($foo, 'bar');</code>
     * Not much that can be done about this...
     * 
     * @param string $token        The source token 
     * @param string $context_name The context of the source token
     * @param array  $data         Additional data
     */
    function _detectConstants (&$token, &$context_name, &$data)
    {
        if ('define' == $this->_state && false !== strpos($context_name, 'string')
            && false === strpos($context_name, 'start')) {
            // We are in the define state and have found the constant name
            if (!in_array($token, $this->_constants)) {
                $this->_constants[] = $token;
            }
            $this->_state = '';
        } elseif ('define' == $this->_state && '(' != $token
            && $this->_language . '/symbol' == $context_name) {
            // Make sure we reset the state if the constant could not be found
            $this->_state = '';
        } elseif ('define' == $token && $this->_language . '/function' == $context_name) {
            // We found a "define" function call, so be ready to get the constant name
            $this->_state = 'define';
        } elseif (in_array($token, $this->_constants) && $this->_language == $context_name) {
            // Found a constant that we have already found
            $context_name .= '/definedconstant';
        }
    }
    
    // }}}
    // {{{ _handlePHPDoc()
    
    /**
     * Improves the standard phpdoc highlighting to highlight stuff on the line after tags.
     * 
     * @param string $token        The source token 
     * @param string $context_name The context of the source token
     * @param array  $data         Additional data
     */
    function _handlePHPDoc (&$token, &$context_name, &$data)
    {
        static $phpdoc_state = '';
        static $param_flag = false;
        
        if ($this->_state == 'phpdoc') {
            if ($context_name == $this->_language . '/phpdoc_comment/end') {
                $this->_state = '';
                return;
            }
            
            if ($context_name == $this->_language . '/phpdoc_comment/tag') {
                $phpdoc_state = $token;
                return;
            }
            
            if ($phpdoc_state) {
                if (!$this->_tokenIsWhitespace) {
                    if (in_array($phpdoc_state, $this->_validPHPDocTags)
                        && $context_name == $this->_language . '/phpdoc_comment') {
                        $context_name .= '/tagvalue';
                    }
                    switch ($phpdoc_state) {
                        case 'access': // one of public/private/protected, could check it
                        case 'package':
                        case 'return': // note: return requires a type after it (can't detect
                        // because types could be int|false for example. Just note this
                        case 'since':
                        case 'subpackage':
                        case 'var': // requires a type (can't be detected for same reason)
                            // For all of those cases, only one token necessary afterwards
                            $phpdoc_state = '';
                            break;
                        case 'author':
                        case 'copyright':
                        case 'revision':
                        case 'version':
                            // The whole line is all good
                            break;
                        case 'param': // should be a type
                            if (!$param_flag) {
                                $param_flag = true;
                            } elseif ($param_flag) {
                                if ('$' == substr($token, 0, 1)) {
                                    // Found a variable name after the type
                                    $context_name = $this->_language . '/phpdoc_comment/var';
                                } else {
                                    // Reset context as it has been converted incorrectly
                                    $context_name = $this->_language . '/phpdoc_comment';
                                }   
                                $param_flag = false;
                                $phpdoc_state = '';
                            }
                            break;
                        case 'license':
                        case 'link':
                            if ('http://' == substr($token, 0, 7)) {
                                $data['url'] = $token;
                            }
                            break;
                        default:
                            // Unknown token
                            $phpdoc_state = '';
                    }
                } else {
                    if (false !== strpos($token, "\n")) {
                        $phpdoc_state = '';
                    }
                }
            }
            
            return;
        }
        if ($context_name == $this->_language . '/phpdoc_comment/start') {
            $this->_state = 'phpdoc';
        }
    }
    
    // }}}
    // {{{ _detectFunctionNames()
    
    /**
     * Detects functions and methods in the source.
     * 
     * @param string $token        The source token 
     * @param string $context_name The context of the source token
     * @param array  $data         Additional data
     */
    function _detectFunctionNames (&$token, &$context_name, &$data)
    {
        if ('function' == $this->_state) {
            // We just read the keyword "function", so this token is a function
            // name, unless it is a "&" followed by a function name.
            if ($context_name == $this->_language . '/symbol') {
                // Ignore the symbol
                return;
            }
            $context_name = $this->_language . '/functionname';
            $this->_state = '';
        } elseif (('function' == $token)
            && $this->_language . '/keyword' == $context_name) {
            // We are about to read a function name
            $this->_state = 'function';
        }
    }
    
    // }}}
    // {{{ _fixHeredoc()

    /**
     * Makes symbols in heredoc tokens appear as symbols.
     *
     * @param  string $token        The source token 
     * @param  string $context_name The context of the source token
     * @param  array  $data         Additional data
     */
    function _fixHeredoc (&$token, &$context_name, &$data)
    {
        if ($this->_language . '/heredoc/start' == $context_name) {
            if ('<<<' == substr($token, 0, 3)) {
                // This works because the '<<<' token will be pushed onto the
                // stack before the return result of this function.
                $token = substr($token, 3);
                $this->push('<<<', $this->_language . '/symbol', $data);
            }
        }
        if ($this->_language . '/heredoc/end' == $context_name) {
            // As per the regex for heredoc enders, the last character will
            // be a newline, which makes the second to last character the
            // optional semi-colon.
            if (substr($token, -2, 1) == ';') {
                $this->push(substr($token, 0, -2), $this->_language . '/heredoc/end', $data);
                $token = ";\n";
                $context_name = $this->_language . '/symbol';
            }
        }
    }

    // }}}
    // {{{ _handleStackParsing()
    
    /**
     * Handles any parsing that uses the stack.
     * 
     * The stack is used to find function calls (note: different to function definitions)
     * and also to find static classes that haven't otherwise been detected.
     * 
     * @param  string $token        The source token 
     * @param  string $context_name The context of the source token
     * @param  array  $data         Additional data
     * @return mixed                As for the return value of {@link parseToken()}
     * @todo [blocking 1.1.5] this method could take a fourth parameter to say whether to
     * detect just function calls, classnames or both, when it comes to configuring what
     * the code parser actually highlights.
     */
    function _handleStackParsing (&$token, &$context_name, &$data)
    {
        if (!$this->_stack) {
            if ($this->_language == $context_name && !$this->_tokenIsWhitespace) {
                $this->push($token, $context_name, $data);
                return false;
            }
            
            // Some other random token that we don't care about
            return array($token, $context_name, $data);
        } else {
            if ($this->_language . '/symbol' == $context_name) {
                // Worth pointing out: the object splitter is :: and is forced to be
                // in the symbol context so that's why we check for :: instead of :.
                // Incidentally, that's actually quite a nice side effect of OO support.
                if ('(' == $token || '::' == $token) {
                    // Change the last non-whitespace token to be a user function or static
                    // class as depending on $token
                    for ($i = count($this->_stack) - 1; $i >= 0; $i--) {
                        // If the token is not whitespace, we have found the one place where
                        // a function call could be
                        if (!geshi_is_whitespace($this->_stack[$i][0])) {
                            // If the token is a bareword then we convert it to a function call
                            if ($this->_language == $this->_stack[$i][1]) {
                                $this->_stack[$i][1] = $this->_stack[$i][1]
                                    . (('(' == $token) ? '/functioncall' : '/classname');
                                // Add the token to the list of class names if it is one
                                if ('::' == $token
                                    && !in_array($this->_stack[$i][1], $this->_classNames)) {
                                    $this->_classNames[] = $this->_stack[$i][1];
                                } 
                            }
                            break;
                        }
                    }
                }
                
                // Add the symbol onto the stack and return
                return $this->flush($token, $context_name, $data);
            } else {
                // Store this token (either whitespace or a & symbol) on our mini-stack
                $this->push($token, $context_name, $data);
                return false;
            }
        }
    }
    
    // }}}
    
    /**#@-*/
    
}

?>
Return current item: Pieforms