<?php
/**
* php-sql-parser.php
*
* A pure PHP SQL (non validating) parser w/ focus on MySQL dialect of SQL
*
* Copyright (c) 2010-2012, Justin Swanhart
* with contributions by André Rothe <hide@address.com, hide@address.com>
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification,
* are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
* SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE.
*/
if (!defined('HAVE_PHP_SQL_PARSER')) {
require_once(dirname(__FILE__) . '/classes/parser-utils.php');
require_once(dirname(__FILE__) . '/classes/lexer.php');
require_once(dirname(__FILE__) . '/classes/position-calculator.php');
/**
* This class implements the parser functionality.
* @author hide@address.com
* @author hide@address.com
*/
class PHPSQLParser extends PHPSQLParserUtils {
private $lexer;
public function __construct($sql = false, $calcPositions = false) {
$this->lexer = new PHPSQLLexer();
if ($sql) {
$this->parse($sql, $calcPositions);
}
}
public function parse($sql, $calcPositions = false) {
#lex the SQL statement
$inputArray = $this->split_sql($sql);
#This is the highest level lexical analysis. This is the part of the
#code which finds UNION and UNION ALL query parts
$queries = $this->processUnion($inputArray);
# If there was no UNION or UNION ALL in the query, then the query is
# stored at $queries[0].
if (!$this->isUnion($queries)) {
$queries = $this->processSQL($queries[0]);
}
# calc the positions of some important tokens
if ($calcPositions) {
$calculator = new PositionCalculator();
$queries = $calculator->setPositionsWithinSQL($sql, $queries);
}
# store the parsed queries
$this->parsed = $queries;
return $this->parsed;
}
private function processUnion($inputArray) {
$outputArray = array();
#sometimes the parser needs to skip ahead until a particular
#token is found
$skipUntilToken = false;
#This is the last type of union used (UNION or UNION ALL)
#indicates a) presence of at least one union in this query
# b) the type of union if this is the first or last query
$unionType = false;
#Sometimes a "query" consists of more than one query (like a UNION query)
#this array holds all the queries
$queries = array();
foreach ($inputArray as $key => $token) {
$trim = trim($token);
# overread all tokens till that given token
if ($skipUntilToken) {
if ($trim === "") {
continue; # read the next token
}
if (strtoupper($trim) === $skipUntilToken) {
$skipUntilToken = false;
continue; # read the next token
}
}
if (strtoupper($trim) !== "UNION") {
$outputArray[] = $token; # here we get empty tokens, if we remove these, we get problems in parse_sql()
continue;
}
$unionType = "UNION";
# we are looking for an ALL token right after UNION
for ($i = $key + 1; $i < count($inputArray); ++$i) {
if (trim($inputArray[$i]) === "") {
continue;
}
if (strtoupper($inputArray[$i]) !== "ALL") {
break;
}
# the other for-loop should overread till "ALL"
$skipUntilToken = "ALL";
$unionType = "UNION ALL";
}
# store the tokens related to the unionType
$queries[$unionType][] = $outputArray;
$outputArray = array();
}
# the query tokens after the last UNION or UNION ALL
# or we don't have an UNION/UNION ALL
if (!empty($outputArray)) {
if ($unionType) {
$queries[$unionType][] = $outputArray;
} else {
$queries[] = $outputArray;
}
}
return $this->processMySQLUnion($queries);
}
/** MySQL supports a special form of UNION:
* (select ...)
* union
* (select ...)
*
* This function handles this query syntax. Only one such subquery
* is supported in each UNION block. (select)(select)union(select) is not legal.
* The extra queries will be silently ignored.
*/
private function processMySQLUnion($queries) {
$unionTypes = array('UNION', 'UNION ALL');
foreach ($unionTypes as $unionType) {
if (empty($queries[$unionType])) {
continue;
}
foreach ($queries[$unionType] as $key => $tokenList) {
foreach ($tokenList as $z => $token) {
$token = trim($token);
if ($token === "") {
continue;
}
# starts with "(select"
if (preg_match("/^\\(\\s*select\\s*/i", $token)) {
$queries[$unionType][$key] = $this->parse($this->removeParenthesisFromStart($token));
break;
}
$queries[$unionType][$key] = $this->processSQL($queries[$unionType][$key]);
break;
}
}
}
# it can be parsed or not
return $queries;
}
private function isUnion($queries) {
$unionTypes = array('UNION', 'UNION ALL');
foreach ($unionTypes as $unionType) {
if (!empty($queries[$unionType])) {
return true;
}
}
return false;
}
#this function splits up a SQL statement into easy to "parse"
#tokens for the SQL processor
private function split_sql($sql) {
return $this->lexer->split($sql);
}
/* This function breaks up the SQL statement into logical sections.
Some sections are then further handled by specialized functions.
*/
private function processSQL(&$tokens) {
$prev_category = "";
$token_category = "";
$skip_next = false;
$out = false;
$tokenCount = count($tokens);
for ($tokenNumber = 0; $tokenNumber < $tokenCount; ++$tokenNumber) {
$token = $tokens[$tokenNumber];
$trim = trim($token); # this removes also \n and \t!
# if it starts with an "(", it should follow a SELECT
if ($trim !== "" && $trim[0] == "(" && $token_category == "") {
$token_category = 'SELECT';
}
/* If it isn't obvious, when $skip_next is set, then we ignore the next real
token, that is we ignore whitespace.
*/
if ($skip_next) {
if ($trim === "") {
if ($token_category !== "") { # is this correct??
$out[$token_category][] = $token;
}
continue;
}
#to skip the token we replace it with whitespace
$trim = "";
$token = "";
$skip_next = false;
}
$upper = strtoupper($trim);
switch ($upper) {
/* Tokens that get their own sections. These keywords have subclauses. */
case 'SELECT':
case 'ORDER':
case 'LIMIT':
case 'SET':
case 'DUPLICATE':
case 'VALUES':
case 'GROUP':
case 'ORDER':
case 'HAVING':
case 'WHERE':
case 'RENAME':
case 'CALL':
case 'PROCEDURE':
case 'FUNCTION':
case 'DATABASE':
case 'SERVER':
case 'LOGFILE':
case 'DEFINER':
case 'RETURNS':
case 'TABLESPACE':
case 'TRIGGER':
case 'DO':
case 'PLUGIN':
case 'FROM':
case 'FLUSH':
case 'KILL':
case 'RESET':
case 'START':
case 'STOP':
case 'PURGE':
case 'EXECUTE':
case 'PREPARE':
case 'DEALLOCATE':
if ($trim === 'DEALLOCATE') {
$skip_next = true;
}
/* this FROM is different from FROM in other DML (not join related) */
if ($token_category === 'PREPARE' && $upper === 'FROM') {
continue 2;
}
$token_category = $upper;
break;
case 'EVENT':
# issue 71
if ($prev_category === 'DROP' || $prev_category === 'ALTER' || $prev_category === 'CREATE') {
$token_category = $upper;
}
break;
case 'DATA':
# prevent wrong handling of DATA as keyword
if ($prev_category === 'LOAD') {
$token_category = $upper;
}
break;
case 'PASSWORD':
# prevent wrong handling of PASSWORD as keyword
if ($prev_category === 'SET') {
$token_category = $upper;
}
break;
case 'INTO':
# prevent wrong handling of CACHE within LOAD INDEX INTO CACHE...
if ($prev_category === 'LOAD') {
$out[$prev_category][] = $upper;
continue 2;
}
$token_category = $upper;
break;
case 'USER':
# prevent wrong processing as keyword
if ($prev_category === 'CREATE' || $prev_category === 'RENAME' || $prev_category === 'DROP') {
$token_category = $upper;
}
break;
case 'VIEW':
# prevent wrong processing as keyword
if ($prev_category === 'CREATE' || $prev_category === 'ALTER' || $prev_category === 'DROP') {
$token_category = $upper;
}
break;
/* These tokens get their own section, but have no subclauses.
These tokens identify the statement but have no specific subclauses of their own. */
case 'DELETE':
case 'ALTER':
case 'INSERT':
case 'REPLACE':
case 'TRUNCATE':
case 'CREATE':
case 'TRUNCATE':
case 'OPTIMIZE':
case 'GRANT':
case 'REVOKE':
case 'SHOW':
case 'HANDLER':
case 'LOAD':
case 'ROLLBACK':
case 'SAVEPOINT':
case 'UNLOCK':
case 'INSTALL':
case 'UNINSTALL':
case 'ANALZYE':
case 'BACKUP':
case 'CHECK':
case 'CHECKSUM':
case 'REPAIR':
case 'RESTORE':
case 'DESCRIBE':
case 'EXPLAIN':
case 'USE':
case 'HELP':
$token_category = $upper; /* set the category in case these get subclauses
in a future version of MySQL */
$out[$upper][0] = $upper;
continue 2;
break;
case 'CACHE':
if ($prev_category === "" || $prev_category === 'RESET' || $prev_category === 'FLUSH'
|| $prev_category === 'LOAD') {
$token_category = $upper;
continue 2;
}
break;
/* This is either LOCK TABLES or SELECT ... LOCK IN SHARE MODE*/
case 'LOCK':
if ($token_category === "") {
$token_category = $upper;
$out[$upper][0] = $upper;
} else {
$trim = 'LOCK IN SHARE MODE';
$skip_next = true;
$out['OPTIONS'][] = $trim;
}
continue 2;
break;
case 'USING': /* USING in FROM clause is different from USING w/ prepared statement*/
if ($token_category === 'EXECUTE') {
$token_category = $upper;
continue 2;
}
if ($token_category === 'FROM' && !empty($out['DELETE'])) {
$token_category = $upper;
continue 2;
}
break;
/* DROP TABLE is different from ALTER TABLE DROP ... */
case 'DROP':
if ($token_category !== 'ALTER') {
$token_category = $upper;
$out[$upper][0] = $upper;
continue 2;
}
break;
case 'FOR':
$skip_next = true;
$out['OPTIONS'][] = 'FOR UPDATE';
continue 2;
break;
case 'UPDATE':
if ($token_category === "") {
$token_category = $upper;
continue 2;
}
if ($token_category === 'DUPLICATE') {
continue 2;
}
break;
case 'START':
$trim = "BEGIN";
$out[$upper][0] = $upper;
$skip_next = true;
break;
/* These tokens are ignored. */
case 'BY':
case 'ALL':
case 'SHARE':
case 'MODE':
case 'TO':
case ';':
continue 2;
break;
case 'KEY':
if ($token_category === 'DUPLICATE') {
continue 2;
}
break;
/* These tokens set particular options for the statement. They never stand alone.*/
case 'DISTINCTROW':
$trim = 'DISTINCT';
case 'DISTINCT':
case 'HIGH_PRIORITY':
case 'LOW_PRIORITY':
case 'DELAYED':
case 'IGNORE':
case 'FORCE':
case 'STRAIGHT_JOIN':
case 'SQL_SMALL_RESULT':
case 'SQL_BIG_RESULT':
case 'QUICK':
case 'SQL_BUFFER_RESULT':
case 'SQL_CACHE':
case 'SQL_NO_CACHE':
case 'SQL_CALC_FOUND_ROWS':
$out['OPTIONS'][] = $upper;
continue 2;
break;
case 'WITH':
if ($token_category === 'GROUP') {
$skip_next = true;
$out['OPTIONS'][] = 'WITH ROLLUP';
continue 2;
}
break;
case 'AS':
break;
case '':
case ',':
case ';':
break;
default:
break;
}
# remove obsolete category after union (empty category because of
# empty token before select)
if ($token_category !== "" && ($prev_category === $token_category)) {
$out[$token_category][] = $token;
}
$prev_category = $token_category;
}
return $this->processSQLParts($out);
}
private function processSQLParts($out) {
if (!$out) {
return false;
}
if (!empty($out['SELECT'])) {
$out['SELECT'] = $this->process_select($out['SELECT']);
}
if (!empty($out['FROM'])) {
$out['FROM'] = $this->process_from($out['FROM']);
}
if (!empty($out['USING'])) {
$out['USING'] = $this->process_from($out['USING']);
}
if (!empty($out['UPDATE'])) {
$out['UPDATE'] = $this->process_from($out['UPDATE']);
}
if (!empty($out['GROUP'])) {
# set empty array if we have partial SQL statement
$out['GROUP'] = $this->process_group($out['GROUP'], isset($out['SELECT']) ? $out['SELECT'] : array());
}
if (!empty($out['ORDER'])) {
# set empty array if we have partial SQL statement
$out['ORDER'] = $this->process_order($out['ORDER'], isset($out['SELECT']) ? $out['SELECT'] : array());
}
if (!empty($out['LIMIT'])) {
$out['LIMIT'] = $this->process_limit($out['LIMIT']);
}
if (!empty($out['WHERE'])) {
$out['WHERE'] = $this->process_expr_list($out['WHERE']);
}
if (!empty($out['HAVING'])) {
$out['HAVING'] = $this->process_expr_list($out['HAVING']);
}
if (!empty($out['SET'])) {
$out['SET'] = $this->process_set_list($out['SET'], isset($out['UPDATE']));
}
if (!empty($out['DUPLICATE'])) {
$out['ON DUPLICATE KEY UPDATE'] = $this->process_set_list($out['DUPLICATE']);
unset($out['DUPLICATE']);
}
if (!empty($out['INSERT'])) {
$out = $this->process_insert($out);
}
if (!empty($out['REPLACE'])) {
$out = $this->process_insert($out, 'REPLACE');
}
if (!empty($out['DELETE'])) {
$out = $this->process_delete($out);
}
if (!empty($out['VALUES'])) {
$out = $this->process_values($out);
}
if (!empty($out['INTO'])) {
$out = $this->process_into($out);
}
return $out;
}
/* A SET list is simply a list of key = value expressions separated by comma (,).
This function produces a list of the key/value expressions.
*/
private function getAssignment($base_expr) {
$assignment = $this->process_expr_list($this->split_sql($base_expr));
return array('expr_type' => 'expression', 'base_expr' => trim($base_expr), 'sub_tree' => $assignment);
}
private function getVariableType($expression) {
// $expression must contain only upper-case characters
if ($expression[1] !== "@") {
return 'user_variable';
}
$type = substr($expression, 2, strpos($expression, ".", 2));
switch ($type) {
case 'GLOBAL':
case 'LOCAL':
case 'SESSION':
$type = strtolower($type) . '_variable';
break;
default:
$type = 'session_variable';
break;
}
return $type;
}
private function process_set_list($tokens, $isUpdate) {
$result = array();
$baseExpr = "";
$assignment = false;
$varType = false;
foreach ($tokens as $token) {
$upper = strtoupper(trim($token));
switch ($upper) {
case 'LOCAL':
case 'SESSION':
case 'GLOBAL':
if (!$isUpdate) {
$varType = strtolower($upper) . '_variable';
$baseExpr = "";
continue 2;
}
break;
case ',':
$assignment = $this->getAssignment($baseExpr);
if (!$isUpdate) {
if ($varType !== false) {
$assignment['sub_tree'][0]['expr_type'] = $varType;
}
}
$result[] = $assignment;
$baseExpr = "";
$varType = false;
continue 2;
default:
}
$baseExpr .= $token;
}
if (trim($baseExpr) !== "") {
$assignment = $this->getAssignment($baseExpr);
if (!$isUpdate) {
if ($varType !== false) {
$assignment['sub_tree'][0]['expr_type'] = $varType;
}
}
$result[] = $assignment;
}
return $result;
}
/* This function processes the LIMIT section.
start,end are set. If only end is provided in the query
then start is set to 0.
*/
private function process_limit($tokens) {
$rowcount = "";
$offset = "";
$comma = -1;
$exchange = false;
for ($i = 0; $i < count($tokens); ++$i) {
$trim = trim($tokens[$i]);
if ($trim === ",") {
$comma = $i;
break;
}
if ($trim === "OFFSET") {
$comma = $i;
$exchange = true;
break;
}
}
for ($i = 0; $i < $comma; ++$i) {
if ($exchange) {
$rowcount .= $tokens[$i];
} else {
$offset .= $tokens[$i];
}
}
for ($i = $comma + 1; $i < count($tokens); ++$i) {
if ($exchange) {
$offset .= $tokens[$i];
} else {
$rowcount .= $tokens[$i];
}
}
return array('offset' => trim($offset), 'rowcount' => trim($rowcount));
}
/* This function processes the SELECT section. It splits the clauses at the commas.
Each clause is then processed by process_select_expr() and the results are added to
the expression list.
Finally, at the end, the epxression list is returned.
*/
private function process_select(&$tokens) {
$expression = "";
$expr = array();
foreach ($tokens as $token) {
if (trim($token) === ',') {
$expr[] = $this->process_select_expr(trim($expression));
$expression = "";
} else {
$expression .= $token;
}
}
if ($expression) {
$expr[] = $this->process_select_expr(trim($expression));
}
return $expr;
}
private function revokeEscaping($sql) {
$sql = trim($sql);
if (($sql[0] === '`') && ($sql[strlen($sql) - 1] === '`')) {
$sql = substr($sql, 1, -1);
}
return str_replace('``', '`', $sql);
}
private function isWhitespaceToken($token) {
return (trim($token) === "");
}
private function isCommentToken($token) {
return isset($token[0]) && isset($token[1])
&& (($token[0] === '-' && $token[1] === '-') || ($token[0] === '/' && $token[1] === '*'));
}
private function isColumnReference($out) {
return (isset($out['expr_type']) && $out['expr_type'] === 'colref');
}
private function isReserved($out) {
return (isset($out['expr_type']) && $out['expr_type'] === 'reserved');
}
private function isConstant($out) {
return (isset($out['expr_type']) && $out['expr_type'] === 'const');
}
private function isAggregateFunction($out) {
return (isset($out['expr_type']) && $out['expr_type'] === 'aggregate_function');
}
private function isFunction($out) {
return (isset($out['expr_type']) && $out['expr_type'] === 'function');
}
private function isExpression($out) {
return (isset($out['expr_type']) && $out['expr_type'] === 'expression');
}
private function isBrackedExpression($out) {
return (isset($out['expr_type']) && $out['expr_type'] === 'bracked_expression');
}
private function isSubQuery($out) {
return (isset($out['expr_type']) && $out['expr_type'] === 'subquery');
}
/* This fuction processes each SELECT clause. We determine what (if any) alias
is provided, and we set the type of expression.
*/
private function process_select_expr($expression) {
$tokens = $this->split_sql($expression);
$token_count = count($tokens);
/* Determine if there is an explicit alias after the AS clause.
If AS is found, then the next non-whitespace token is captured as the alias.
The tokens after (and including) the AS are removed.
*/
$base_expr = "";
$stripped = array();
$capture = false;
$alias = false;
$processed = false;
for ($i = 0; $i < $token_count; ++$i) {
$token = $tokens[$i];
$upper = strtoupper($token);
if ($upper === 'AS') {
$alias = array('as' => true, "name" => "", "base_expr" => $token);
$tokens[$i] = "";
$capture = true;
continue;
}
if (!$this->isWhitespaceToken($upper)) {
$stripped[] = $token;
}
// we have an explicit AS, next one can be the alias
// but also a comment!
if ($capture) {
if (!$this->isWhitespaceToken($upper) && !$this->isCommentToken($upper)) {
$alias['name'] .= $token;
array_pop($stripped);
}
$alias['base_expr'] .= $token;
$tokens[$i] = "";
continue;
}
$base_expr .= $token;
}
$stripped = $this->process_expr_list($stripped);
# TODO: the last part can also be a comment, don't use array_pop
# we remove the last token, if it is a colref,
# it can be an alias without an AS
$last = array_pop($stripped);
if (!$alias && $this->isColumnReference($last)) {
# TODO: it can be a comment, don't use array_pop
# check the token before the colref
$prev = array_pop($stripped);
if ($this->isReserved($prev) || $this->isConstant($prev) || $this->isAggregateFunction($prev)
|| $this->isFunction($prev) || $this->isExpression($prev) || $this->isSubQuery($prev)
|| $this->isColumnReference($prev) || $this->isBrackedExpression($prev)) {
$alias = array('as' => false, 'name' => trim($last['base_expr']),
'base_expr' => trim($last['base_expr']));
#remove the last token
array_pop($tokens);
$base_expr = join("", $tokens);
}
}
if (!$alias) {
$base_expr = join("", $tokens);
} else {
/* remove escape from the alias */
$alias['name'] = $this->revokeEscaping(trim($alias['name']));
$alias['base_expr'] = trim($alias['base_expr']);
}
# this is always done with $stripped, how we do it twice?
$processed = $this->process_expr_list($tokens);
# if there is only one part, we copy the expr_type
# in all other cases we use "expression" as global type
$type = 'expression';
if (count($processed) == 1) {
if (!$this->isSubQuery($processed[0])) {
$type = $processed[0]['expr_type'];
$base_expr = $processed[0]['base_expr'];
$processed = $processed[0]['sub_tree']; // it can be FALSE
}
}
return array('expr_type' => $type, 'alias' => $alias, 'base_expr' => trim($base_expr),
'sub_tree' => $processed);
}
private function process_from(&$tokens) {
$parseInfo = $this->initParseInfoForFrom();
$expr = array();
$skip_next = false;
$i = 0;
foreach ($tokens as $token) {
$upper = strtoupper(trim($token));
if ($skip_next && $token !== "") {
$parseInfo['token_count']++;
$skip_next = false;
continue;
} else {
if ($skip_next) {
continue;
}
}
switch ($upper) {
case 'OUTER':
case 'LEFT':
case 'RIGHT':
case 'NATURAL':
case 'CROSS':
case ',':
case 'JOIN':
case 'INNER':
break;
default:
$parseInfo['expression'] .= $token;
if ($parseInfo['ref_type'] !== false) { # all after ON / USING
$parseInfo['ref_expr'] .= $token;
}
break;
}
switch ($upper) {
case 'AS':
$parseInfo['alias'] = array('as' => true, 'name' => "", 'base_expr' => $token);
$parseInfo['token_count']++;
$n = 1;
$str = "";
while ($str == "") {
$parseInfo['alias']['base_expr'] .= ($tokens[$i + $n] === "" ? " " : $tokens[$i + $n]);
$str = trim($tokens[$i + $n]);
++$n;
}
$parseInfo['alias']['name'] = $str;
$parseInfo['alias']['base_expr'] = trim($parseInfo['alias']['base_expr']);
continue;
case 'INDEX':
if ($token_category == 'CREATE') {
$token_category = $upper;
continue 2;
}
break;
case 'USING':
case 'ON':
$parseInfo['ref_type'] = $upper;
$parseInfo['ref_expr'] = "";
case 'CROSS':
case 'USE':
case 'FORCE':
case 'IGNORE':
case 'INNER':
case 'OUTER':
$parseInfo['token_count']++;
continue;
break;
case 'FOR':
$parseInfo['token_count']++;
$skip_next = true;
continue;
break;
case 'LEFT':
case 'RIGHT':
case 'STRAIGHT_JOIN':
$parseInfo['next_join_type'] = $upper;
break;
case ',':
$parseInfo['next_join_type'] = 'CROSS';
case 'JOIN':
if ($parseInfo['subquery']) {
$parseInfo['sub_tree'] = $this->parse($this->removeParenthesisFromStart($parseInfo['subquery']));
$parseInfo['expression'] = $parseInfo['subquery'];
}
$expr[] = $this->processFromExpression($parseInfo);
$parseInfo = $this->initParseInfoForFrom($parseInfo);
break;
default:
if ($upper === "") {
continue; # ends the switch statement!
}
if ($parseInfo['token_count'] === 0) {
if ($parseInfo['table'] === "") {
$parseInfo['table'] = $token;
}
} else if ($parseInfo['token_count'] === 1) {
$parseInfo['alias'] = array('as' => false, 'name' => trim($token), 'base_expr' => trim($token));
}
$parseInfo['token_count']++;
break;
}
++$i;
}
$expr[] = $this->processFromExpression($parseInfo);
return $expr;
}
private function initParseInfoForFrom($parseInfo = false) {
# first init
if ($parseInfo === false) {
$parseInfo = array('join_type' => "", 'saved_join_type' => "JOIN");
}
# loop init
return array('expression' => "", 'token_count' => 0, 'table' => "", 'alias' => false, 'join_type' => "",
'next_join_type' => "", 'saved_join_type' => $parseInfo['saved_join_type'],
'ref_type' => false, 'ref_expr' => false, 'base_expr' => false, 'sub_tree' => false,
'subquery' => "");
}
private function processFromExpression(&$parseInfo) {
$res = array();
# exchange the join types (join_type is save now, saved_join_type holds the next one)
$parseInfo['join_type'] = $parseInfo['saved_join_type']; # initialized with JOIN
$parseInfo['saved_join_type'] = ($parseInfo['next_join_type'] ? $parseInfo['next_join_type'] : 'JOIN');
# we have a reg_expr, so we have to parse it
if ($parseInfo['ref_expr'] !== false) {
$unparsed = $this->split_sql($this->removeParenthesisFromStart($parseInfo['ref_expr']));
// here we can get a comma separated list
foreach ($unparsed as $k => $v) {
if (trim($v) === ',') {
$unparsed[$k] = "";
}
}
$parseInfo['ref_expr'] = $this->process_expr_list($unparsed);
}
# there is an expression, we have to parse it
if (substr(trim($parseInfo['table']), 0, 1) == '(') {
$parseInfo['expression'] = $this->removeParenthesisFromStart($parseInfo['table']);
if (preg_match("/^\\s*select/i", $parseInfo['expression'])) {
$parseInfo['sub_tree'] = $this->parse($parseInfo['expression']);
$res['expr_type'] = 'subquery';
} else {
$tmp = $this->split_sql($parseInfo['expression']);
$parseInfo['sub_tree'] = $this->process_from($tmp);
$res['expr_type'] = 'table_expression';
}
} else {
$res['expr_type'] = 'table';
$res['table'] = $parseInfo['table'];
}
$res['alias'] = $parseInfo['alias'];
$res['join_type'] = $parseInfo['join_type'];
$res['ref_type'] = $parseInfo['ref_type'];
$res['ref_clause'] = $parseInfo['ref_expr'];
$res['base_expr'] = trim($parseInfo['expression']);
$res['sub_tree'] = $parseInfo['sub_tree'];
return $res;
}
private function processOrderExpression(&$parseInfo, $select) {
$parseInfo['expr'] = trim($parseInfo['expr']);
if ($parseInfo['expr'] === "") {
return false;
}
$parseInfo['expr'] = trim($this->revokeEscaping($parseInfo['expr']));
if (is_numeric($parseInfo['expr'])) {
$parseInfo['type'] = 'pos';
} else {
#search to see if the expression matches an alias
foreach ($select as $clause) {
if (!$clause['alias']) {
continue;
}
if ($clause['alias']['name'] === $parseInfo['expr']) {
$parseInfo['type'] = 'alias';
}
}
}
if ($parseInfo['type'] === "expression") {
$expr = $this->process_select_expr($parseInfo['expr']);
$expr['direction'] = $parseInfo['dir'];
unset($expr['alias']);
return $expr;
}
return array('expr_type' => $parseInfo['type'], 'base_expr' => $parseInfo['expr'],
'direction' => $parseInfo['dir']);
}
private function initParseInfoForOrder() {
return array('expr' => "", 'dir' => "ASC", 'type' => 'expression');
}
private function process_order($tokens, $select) {
$out = array();
$parseInfo = $this->initParseInfoForOrder();
if (!$tokens) {
return false;
}
foreach ($tokens as $token) {
$upper = strtoupper(trim($token));
switch ($upper) {
case ',':
$out[] = $this->processOrderExpression($parseInfo, $select);
$parseInfo = $this->initParseInfoForOrder();
break;
case 'DESC':
$parseInfo['dir'] = "DESC";
break;
case 'ASC':
$parseInfo['dir'] = "ASC";
break;
default:
$parseInfo['expr'] .= $token;
}
}
$out[] = $this->processOrderExpression($parseInfo, $select);
return $out;
}
private function process_group($tokens, $select) {
$out = array();
$parseInfo = $this->initParseInfoForOrder();
if (!$tokens) {
return false;
}
foreach ($tokens as $token) {
$trim = strtoupper(trim($token));
switch ($trim) {
case ',':
$parsed = $this->processOrderExpression($parseInfo, $select);
unset($parsed['direction']);
$out[] = $parsed;
$parseInfo = $this->initParseInfoForOrder();
break;
default:
$parseInfo['expr'] .= $token;
}
}
$parsed = $this->processOrderExpression($parseInfo, $select);
unset($parsed['direction']);
$out[] = $parsed;
return $out;
}
private function removeParenthesisFromStart($token) {
$parenthesisRemoved = 0;
$trim = trim($token);
if ($trim !== "" && $trim[0] === "(") { // remove only one parenthesis pair now!
$parenthesisRemoved++;
$trim[0] = " ";
$trim = trim($trim);
}
$parenthesis = $parenthesisRemoved;
$i = 0;
$string = 0;
while ($i < strlen($trim)) {
if ($trim[$i] === "\\") {
$i += 2; # an escape character, the next character is irrelevant
continue;
}
if ($trim[$i] === "'" || $trim[$i] === '"') {
$string++;
}
if (($string % 2 === 0) && ($trim[$i] === "(")) {
$parenthesis++;
}
if (($string % 2 === 0) && ($trim[$i] === ")")) {
if ($parenthesis == $parenthesisRemoved) {
$trim[$i] = " ";
$parenthesisRemoved--;
}
$parenthesis--;
}
$i++;
}
return trim($trim);
}
private function initParseInfoExprList($parseInfo = false) {
if ($parseInfo === false) {
return array('processed' => false, 'expr' => "", 'key' => false, 'token' => false, 'tokenType' => "",
'prevToken' => "", 'prevTokenType' => "", 'trim' => false, 'upper' => false);
}
$expr = $parseInfo['expr'];
$expr[] = array('expr_type' => $parseInfo['tokenType'], 'base_expr' => $parseInfo['token'],
'sub_tree' => $parseInfo['processed']);
return array('processed' => false, 'expr' => $expr, 'key' => false, 'token' => false, 'tokenType' => "",
'prevToken' => $parseInfo['upper'], 'prevTokenType' => $parseInfo['tokenType'],
'trim' => false, 'upper' => false);
}
/* Some sections are just lists of expressions, like the WHERE and HAVING clauses. This function
processes these sections. Recursive.
*/
private function process_expr_list($tokens) {
$parseInfo = $this->initParseInfoExprList();
$skip_next = false;
foreach ($tokens as $parseInfo['key'] => $parseInfo['token']) {
$parseInfo['trim'] = trim($parseInfo['token']);
if ($parseInfo['trim'] === "") {
continue;
}
if ($skip_next) {
# skip the next non-whitespace token
$skip_next = false;
continue;
}
$parseInfo['upper'] = strtoupper($parseInfo['trim']);
/* is it a subquery?*/
if (preg_match("/^\\(\\s*SELECT/i", $parseInfo['trim'])) {
#tokenize and parse the subquery.
#we remove the enclosing parenthesis for the tokenizer
$parseInfo['processed'] = $this->parse($this->removeParenthesisFromStart($parseInfo['trim']));
$parseInfo['tokenType'] = 'subquery';
} elseif ($parseInfo['upper'][0] === '(' && substr($parseInfo['upper'], -1) === ')') {
/* is it an inlist (upper is derived from trim!) */
# if we have a colref followed by a parenthesis pair,
# it isn't a colref, it is a user-function
if ($parseInfo['prevTokenType'] === 'colref' || $parseInfo['prevTokenType'] === 'function'
|| $parseInfo['prevTokenType'] === 'aggregate_function') {
$tmptokens = $this->split_sql($this->removeParenthesisFromStart($parseInfo['trim']));
foreach ($tmptokens as $k => $v) {
if (trim($v) == ',') {
unset($tmptokens[$k]);
}
}
$tmptokens = array_values($tmptokens);
$parseInfo['processed'] = $this->process_expr_list($tmptokens);
$last = array_pop($parseInfo['expr']);
$parseInfo['token'] = $last['base_expr'];
$parseInfo['tokenType'] = ($parseInfo['prevTokenType'] === 'colref' ? 'function'
: $parseInfo['prevTokenType']);
$parseInfo['prevTokenType'] = $parseInfo['prevToken'] = "";
}
if ($parseInfo['prevToken'] == 'IN') {
$tmptokens = $this->split_sql($this->removeParenthesisFromStart($parseInfo['trim']));
foreach ($tmptokens as $k => $v) {
if (trim($v) == ',') {
unset($tmptokens[$k]);
}
}
$tmptokens = array_values($tmptokens);
$parseInfo['processed'] = $this->process_expr_list($tmptokens);
$parseInfo['prevTokenType'] = $parseInfo['prevToken'] = "";
$parseInfo['tokenType'] = "in-list";
}
if ($parseInfo['prevToken'] == 'AGAINST') {
$tmptokens = $this->split_sql($this->removeParenthesisFromStart($parseInfo['trim']));
if (count($tmptokens) > 1) {
$match_mode = implode('', array_slice($tmptokens, 1));
$parseInfo['processed'] = array($list[0], $match_mode);
} else {
$parseInfo['processed'] = $list[0];
}
$parseInfo['prevTokenType'] = $parseInfo['prevToken'] = "";
$parseInfo['tokenType'] = "match-arguments";
}
} elseif ($parseInfo['upper'][0] === '@') {
// a variable
$parseInfo['tokenType'] = $this->getVariableType($parseInfo['upper']);
$parseInfo['processed'] = false;
} else {
/* it is either an operator, a colref or a constant */
switch ($parseInfo['upper']) {
case '*':
$parseInfo['processed'] = false; #no subtree
# last token is colref, const or expression
# it is an operator, in all other cases it is an all-columns-alias
# if the previous colref ends with a dot, the * is the all-columns-alias
if (!is_array($parseInfo['expr'])) {
$parseInfo['tokenType'] = "colref"; # single or first element of select -> *
break;
}
$last = array_pop($parseInfo['expr']);
if ($last['expr_type'] !== 'colref' && $last['expr_type'] !== 'const'
&& $last['expr_type'] !== 'expression') {
$parseInfo['expr'][] = $last;
$parseInfo['tokenType'] = "colref";
break;
}
if ($last['expr_type'] === 'colref' && substr($last['base_expr'], -1, 1) === ".") {
$last['base_expr'] .= '*'; # tablealias dot *
$parseInfo['expr'][] = $last;
continue 2;
}
$parseInfo['expr'][] = $last;
$parseInfo['tokenType'] = "operator";
break;
case 'AND':
case '&&':
case 'BETWEEN':
case 'AND':
case 'BINARY':
case '&':
case '~':
case '|':
case '^':
case 'DIV':
case '/':
case '<=>':
case '=':
case '>=':
case '>':
case 'IS':
case 'NOT':
case '<<':
case '<=':
case '<':
case 'LIKE':
case '%':
case '!=':
case '<>':
case 'REGEXP':
case '!':
case '||':
case 'OR':
case '>>':
case 'RLIKE':
case 'SOUNDS':
case 'XOR':
case 'IN':
$parseInfo['processed'] = false;
$parseInfo['tokenType'] = "operator";
break;
case 'NULL':
$parseInfo['processed'] = false;
$parseInfo['tokenType'] = 'const';
break;
case '-':
case '+':
// differ between preceding sign and operator
$parseInfo['processed'] = false;
if ($parseInfo['prevTokenType'] === 'colref' || $parseInfo['prevTokenType'] === 'function'
|| $parseInfo['prevTokenType'] === 'aggregate_function'
|| $parseInfo['prevTokenType'] === 'const'
|| $parseInfo['prevTokenType'] === 'subquery') {
$parseInfo['tokenType'] = "operator";
} else {
$parseInfo['tokenType'] = "sign";
}
break;
default:
switch ($parseInfo['token'][0]) {
case "'":
case '"':
$parseInfo['tokenType'] = 'const';
break;
case '`':
$parseInfo['tokenType'] = 'colref';
break;
default:
if (is_numeric($parseInfo['token'])) {
$parseInfo['tokenType'] = 'const';
if ($parseInfo['prevTokenType'] === 'sign') {
array_pop($parseInfo['expr']);
$parseInfo['token'] = $parseInfo['prevToken'] . $parseInfo['token'];
}
} else {
$parseInfo['tokenType'] = 'colref';
}
break;
}
$parseInfo['processed'] = false;
}
}
/* is a reserved word? */
if ($parseInfo['tokenType'] !== 'operator' && $parseInfo['tokenType'] !== 'in-list'
&& $parseInfo['tokenType'] !== 'function' && $parseInfo['tokenType'] !== 'aggregate_function'
&& in_array($parseInfo['upper'], parent::$reserved)) {
switch ($parseInfo['upper']) {
case 'AVG':
case 'SUM':
case 'COUNT':
case 'MIN':
case 'MAX':
case 'STDDEV':
case 'STDDEV_SAMP':
case 'STDDEV_POP':
case 'VARIANCE':
case 'VAR_SAMP':
case 'VAR_POP':
case 'GROUP_CONCAT':
case 'BIT_AND':
case 'BIT_OR':
case 'BIT_XOR':
$parseInfo['tokenType'] = 'aggregate_function';
break;
case 'NULL':
// it is a reserved word, but we would like to have set it as constant
$parseInfo['tokenType'] = 'const';
break;
default:
if (in_array($parseInfo['upper'], parent::$functions)) {
$parseInfo['tokenType'] = 'function';
} else {
$parseInfo['tokenType'] = 'reserved';
}
break;
}
}
if (!$parseInfo['tokenType']) {
if ($parseInfo['upper'][0] === '(') {
$local_expr = $this->removeParenthesisFromStart($parseInfo['trim']);
$parseInfo['tokenType'] = 'bracket_expression';
} else {
$local_expr = $parseInfo['trim'];
$parseInfo['tokenType'] = 'expression';
}
$parseInfo['processed'] = $this->process_expr_list($this->split_sql($local_expr));
}
$parseInfo = $this->initParseInfoExprList($parseInfo);
} // end of for-loop
return (is_array($parseInfo['expr']) ? $parseInfo['expr'] : false);
}
private function process_update($tokens) {
}
private function process_delete($tokens) {
$tables = array();
$del = $tokens['DELETE'];
foreach ($tokens['DELETE'] as $expression) {
if ($expression != 'DELETE' && trim($expression, ' .*') != "" && $expression != ',') {
$tables[] = trim($expression, '.* ');
}
}
if (empty($tables)) {
foreach ($tokens['FROM'] as $table) {
$tables[] = $table['table'];
}
}
$tokens['DELETE'] = array('TABLES' => $tables);
return $tokens;
}
private function process_insert($tokens, $token_category = 'INSERT') {
$table = "";
$cols = array();
$into = $tokens['INTO'];
foreach ($into as $token) {
if (trim($token) === "")
continue;
if ($table === "") {
$table = $token;
} elseif (empty($cols)) {
$cols[] = $token;
}
}
if (empty($cols)) {
$cols = false;
} else {
$columns = explode(",", $this->removeParenthesisFromStart($cols[0]));
$cols = array();
foreach ($columns as $k => $v) {
$cols[] = array('expr_type' => 'colref', 'base_expr' => trim($v));
}
}
unset($tokens['INTO']);
$tokens[$token_category] = array('table' => $table, 'columns' => $cols, 'base_expr' => $table);
return $tokens;
}
private function process_record($unparsed) {
$unparsed = $this->removeParenthesisFromStart($unparsed);
$values = $this->split_sql($unparsed);
foreach ($values as $k => $v) {
if (trim($v) === ",") {
$values[$k] = "";
}
}
return $this->process_expr_list($values);
}
private function process_values($tokens) {
$unparsed = "";
foreach ($tokens['VALUES'] as $k => $v) {
if (trim($v) === "") {
continue;
}
$unparsed .= $v;
}
$values = $this->split_sql($unparsed);
$parsed = array();
foreach ($values as $k => $v) {
if (trim($v) === ",") {
unset($values[$k]);
} else {
$values[$k] = array('expr_type' => 'record', 'base_expr' => $v, 'data' => $this->process_record($v));
}
}
$tokens['VALUES'] = array_values($values);
return $tokens;
}
/**
* TODO: This is a dummy function, we cannot parse INTO as part of SELECT
* at the moment
*/
private function process_into($tokens) {
$unparsed = $tokens['INTO'];
foreach ($unparsed as $k => $token) {
if ((trim($token) === "") || (trim($token) === ",")) {
unset($unparsed[$k]);
}
}
$tokens['INTO'] = array_values($unparsed);
return $tokens;
}
}
define('HAVE_PHP_SQL_PARSER', 1);
}