Location: PHPKode > projects > Open Media Collectors Database > functions/BooleanParser.class.inc
<?php
/* 	
 	Open Media Collectors Database
	Copyright (C) 2001,2006 by Jason Pell

	This program is free software; you can redistribute it and/or
	modify it under the terms of the GNU General Public License
	as published by the Free Software Foundation; either version 2
	of the License, or (at your option) any later version.

	This program is distributed in the hope that it will be useful,
	but WITHOUT ANY WARRANTY; without even the implied warranty of
	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
	GNU General Public License for more details.

	You should have received a copy of the GNU General Public License
	along with this program; if not, write to the Free Software
	Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
*/
class BooleanLexer
{
	var $round_brace;
	var $dbl_quote;
	var $error;
	
	var $stackPtr;
	var $tokenStack;
	var $lookahead;
	
	var $tokText;
	var $tokPtr;
	
	var $string;
	var $stringLen;
	
	// Do nothing.
	function BooleanLexer(){}
	
	function parse($string, $lookahead=NULL)
	{
		$this->string = $string;
        $this->stringLen = strlen($string);

		// Initialise		
		$this->round_brace = 0;
		
		if(is_numeric($lookahead))
			$this->lookahead = $lookahead;
		else
			$this->lookahead = 0; // no lookahead
		
		// Initialise lookahead stack
		$this->stackPtr = 0;
		$this->tokenStack = NULL;
	}
	
	/*
	* To get very last character, use get(-1), to get previous character from
	* current one, use -2.  To get next character,without iterating the pointer (to sneek a look),
	* use get(0)
	*/
	function get($idx=NULL)
	{
		if(is_numeric($idx))
		{
			// If idx is negative, this should work as well.
			$index = $this->tokPtr + $idx;
			if($index>=0 && $index < $this->stringLen)
				return $this->string{$index};
			else
				return NULL;
		}
		else
		{
			if($this->tokPtr < $this->stringLen)
				return $this->string{$this->tokPtr++};
			else
				return NULL;// reached end of string
		}
    }
	
	function unget()
	{
        --$this->tokPtr;
	}
	
	function getError()
	{
		return $this->error;
	}
	
	/*
	* Return current token, as returned from nextToken
	*/
	function getToken()
	{
		return $this->tokText;
	}
	
	/**
	*/
	function skipWhiteSpace()
	{
		$c = $this->get();
		if($c != NULL)//end of string
		{
			while($c == ' ' || $c == "\t" || $c == "\n" || $c == "\r")
			{
				$c = $this->get();
			}
		
			// unget last whitespace character.
			$this->unget();
		}			
	}

	/*
	* Convert and / or / not (or any case deviations ) to AND / OR / NOT
	*/
	function normaliseToken($token)
	{
		if(strcasecmp($token,'and')===0)
			return 'AND';
		else if(strcasecmp($token,'or')===0)
			return 'OR';
		else if(strcasecmp($token,'not')===0)
			return 'NOT';
		else
			return $token;
	}
	
	function nextToken()
	{
		if($this->lookahead>0)
		{
        	// The stackPtr, should always be the same as the count of
        	// elements in the tokenStack.  The stackPtr, can be thought
        	// of as pointing to the next token to be added.  If however
        	// a pushBack() call is made, the stackPtr, will be less than the
        	// count, to indicate that we should take that token from the
        	// stack, instead of calling nextToken for a new token.
			if($this->stackPtr < count($this->tokenStack))
			{
				$this->tokText = $this->tokenStack[$this->stackPtr];
            
            	// We have read the token, so now iterate again.
	            $this->stackPtr++;
    	        return $this->tokText;
			}
			else
			{
        	    // If $tokenStack is full (equal to lookahead), pop the oldest
            	// element off, to make room for the new one.
	            if ($this->stackPtr == $this->lookahead)
				{
        	        // For some reason array_shift and
            	    // array_pop screw up the indexing, so we do it manually.
                	for($i=0; $i<(count($this->tokenStack)-1); $i++)
					{
    	                $this->tokenStack[$i] = $this->tokenStack[$i+1];
        	        }
                
            	    // Indicate that we should put the element in
                	// at the stackPtr position.
	                $this->stackPtr--;
    	        }
        
				$this->tokText = $this->normaliseToken($this->_nextToken());
				$this->tokenStack[$this->stackPtr] = $this->tokText;
				$this->stackPtr++;
            	return $this->tokText;
			}				
        }
	    else
    	{
			$this->tokText = $this->normaliseToken($this->_nextToken());
        	return $this->tokText;
		}			
	}
	
	function pushBack()
	{
    	if($this->lookahead>0 && count($this->tokenStack)>0 && $this->stackPtr>0)
		{
        	$this->stackPtr--;
	    }
	}

	/*
	* Hidden function
	*/
	function _nextToken()
	{
		$this->tokText = NULL;
		$dbl_quote = FALSE;
		$expr = "";

		$this->skipWhiteSpace();
		while(true)
		{
			$c = $this->get();
			switch($c)
			{
				case '"':
					if($this->get(-2)=="\\")
						$expr .= $c;
					else
					{
						if($dbl_quote)
							return $expr;
						else
							$dbl_quote = TRUE;
					}
					break;
			
				case "\\":
					if($this->get(0)!="\"")//Only support escaping double quotes, otherwise
					{						//pass through the escaping characters
						$expr .= $c;
					}
					// else ignore
					break;
			
				case '(':
					if(!$dbl_quote)
					{
						if(strlen($expr)>0)
						{						
							// Unget quote
							$this->unget();
							return $expr;
						}
						else
						{
							$this->round_brace++;
							return $c;
						}
					}
					else
						$expr .= $c;
					break;
			
				case ')':
					if(!$dbl_quote)
					{
						if($this->round_brace>0)
						{
							if(strlen($expr)>0)
							{						
								// Unget bracket
								$this->unget();
								return $expr;
							}
							else
							{
								$this->round_brace--;
								return $c;
							}
						}
						else
						{
							$this->error = "Mismatched braces";
							return FALSE;
						}
					}
					else
						$expr .= $c;
					break;
		
				case ' ':
				case "\t":
				case "\n":
				case "\r":
					if($dbl_quote)
						$expr .= $c;
					else
						return $expr; // Indicates end of token
						
					break;
				
				case NULL: // end of string
					if(strlen($expr)>0)
						return $expr;
					else
						return NULL;
					
				default:
					$expr .= $c;
			}//switch
		}//while
	}
}

class BooleanParser
{
	var $lexer = NULL;
	
	// Do nothing.
	function BooleanParser(){}
	
	function parseBooleanStatement($statement)
	{
		if($this->lexer == NULL)
			$this->lexer = new BooleanLexer();
			
		$this->lexer->parse($statement,1);
		
		while( true )
		{
			$statement = $this->parseStatement();
			if($statement===FALSE)
				return FALSE;
			else if($statement!==NULL)
				$statements[] = $statement;
			else
				break; // finished
		}
		
		return $statements;
	}
	
	function getError()
	{
		return $this->lexer->getError();
	}
	
	function parseStatement()
	{
		$conditions[] = $this->parseCompoundStatement();
		$token = $this->lexer->nextToken();
		while($token == 'OR')
		{
			$conditions[] = $this->parseCompoundStatement();
			$token = $this->lexer->nextToken();
		}
		$this->lexer->pushBack();
		
		if(is_array($conditions) && count($conditions)>1)
			return array('or'=>$conditions);
		else
			return $conditions[0];
	}
	
	/*
	* Will parse several basic 'left <op> right' condition statements, as
	* long as they are separated by AND tokens.
	* 
	* Will also support conditions, enclosed in brackets, and treat them
	* as normal compound conditions.
	* 
	* So the following will be supported
	* 
	* 	<left> <op> <right> AND (<left> <op> <right> OR <left> <op> <right>)
	*/
	function parseCompoundStatement()
	{
		$token = $this->lexer->nextToken();
		if($token == 'NOT')
		{
			return array('not'=>
				$this->parseStatement());
		}
		else if($token == '(')
		{
			$condition = $this->parseStatement();
			$token = $this->lexer->nextToken();
			if($token != ')')
			{
				return FALSE; // should never happen!
			}
		}
		else if($this->isTextToken($token))
		{
			$condition = $token;
		}
		
		if($condition !== FALSE)
		{
			$conditions[] = $condition;
			while(true)
			{
				$token = $this->lexer->nextToken();
				if($token == 'AND')
				{
					$condition = $this->parseCompoundStatement();
					if($condition !== FALSE)
						$conditions[] = $condition;
					else
						return FALSE;
				}//if($token == 'and')
				else
				{
					$this->lexer->pushBack();
					break;
				}
			}

			if(is_array($conditions) && count($conditions)>1)
				return array('and'=>$conditions);
			else
				return $conditions[0];
		}
		else//if($condition !== FALSE)
		{
			return FALSE;
		}
	}
	
	function isTextToken($token)
	{
		if($token == NULL || $token == '(' || $token == ')' || $token == 'AND' || $token == 'OR' || $token == 'NOT')
			return FALSE;
		else
			return TRUE;
	}
}

// ------------------------------------------------
// Utility Functions
// ------------------------------------------------

/*
* @param $column_name
* @param $column_value
* @param $match_mode ["word" | "exact" | "partial"]
*/
function get_compare_clause($column_name, $column_value, $match_mode, $case_sensitive)
{
	$column_value_wildcard = FALSE;
	for($i=0; $i<strlen($column_value); $i++)
	{
		if( ($column_value[$i] == '%' || $column_value[$i] == '_') && ($i == 0 || $column_value[$i-1] != '\\'))
		{
			$column_value_wildcard = TRUE;
		}
	}

	if($column_value_wildcard)
	{
		return "UPPER($column_name) LIKE '".trim($column_value)."'";
	}
	else
	{
		if(strcasecmp($match_mode,"word")===0)
		{	
			$column_value = trim($column_value);
			if(is_null($case_sensitive))
			{
				return "($column_name RLIKE '[[:<:]]".$column_value."[[:>:]]')";
			}
			else
			{
				return "($column_name RLIKE BINARY '[[:<:]]".$column_value."[[:>:]]')";
			}
		}
		else if(strcasecmp($match_mode,"partial")===0)
		{
			$column_value = trim($column_value);
			if(is_null($case_sensitive))
			{
				return "$column_name LIKE '%".$column_value."%'";
			}
			else
			{
				return "$column_name LIKE BINARY '%".$column_value."%'";
			}
		}
		else if(strcasecmp($match_mode,"exact")===0)
		{
			if(is_null($case_sensitive))
			{		
				return "$column_name = '".str_replace('\_','_',trim($column_value))."'";
			}
			else
			{
				return "BINARY $column_name = '".str_replace('\_','_',trim($column_value))."'";
			}
		}
		else // plain
		{
			if(is_null($case_sensitive))
			{
				return "$column_name = '".str_replace('\_','_',$column_value)."'";
			}
			else
			{
				return "BINARY $column_name = '".str_replace('\_','_',$column_value)."'";
			}
		}
	}
}

/*
* Builds a where sub-clause based on the statements array returned 
* from BooleanParser::parseBooleanStatement(...)
*/
function build_boolean_clause($statement_rs, $column_name, $match_mode, $mode='AND', $case_sensitive=NULL)
{
	$query = "";
	
	while(list($key,$statement) = each($statement_rs))
	{
		if(strlen($query)>0)
			$query .= " $mode ";
			
		if(is_array($statement['not']))
		{
			$query .= "NOT (".build_boolean_clause($statement['not'], $column_name, $match_mode, $mode, $case_sensitive).")";
		}
		else if(is_array($statement['and']))
		{
			$query .= "(".build_boolean_clause($statement['and'], $column_name, $match_mode, 'AND', $case_sensitive).")";
		}
		else if(is_array($statement['or']))
		{
			$query .= "(".build_boolean_clause($statement['or'], $column_name, $match_mode, 'OR', $case_sensitive).")";
		}
		else if(is_array($statement))
		{
			$query .= "(";
			
			if(isset($statement['not']))
				$query .= "NOT ";
			
			$query .= "(".build_boolean_clause($statement, $column_name, $match_mode, $key=='or'?'OR':$mode, $case_sensitive).")";

			$query .= ")";
		}
		else
		{
			$query .= get_compare_clause($column_name, $statement, $match_mode, $case_sensitive);
		}
	}
	
	return $query;
}
?>
Return current item: Open Media Collectors Database