Location: PHPKode > scripts > ER Manager > ermanager/queryparser.php
<?PHP
/**
 * Searchquerystring Parser
 *
 * Parses a search query into it's tokens kann detect + - AND OR NOT Operators
 * and Phrases.
 *
 * @todo    remove german errorstrings, maybe handle umlauts and stuff...
 * @package search
 * @version $Id: queryparser.php,v 1.1.1.1 2004/07/28 12:09:06 pbworks Exp $
 */


/**
 * Querystringparser
 *
 * Parses a querystring into a datastrukture
 *
 * @param   string  $query    Querystring
 * @param   string  &$errors  Stringreference to write errors back
 * @return  array             parsed querytokens
 */
function queryparser($query,&$errors){
	$query = trim($query);
	//Nichterlaubte Zeichen filtern
	/*
	if (preg_match('/[^\w \.\(\)"\'*]/',$query)){
		$errors .= "Nicht erlaubte Zeichen wurden ignoriert\n";
		$query = preg_replace('/[^\w \.\(\)"\'*]/','',$query);
	}
	*/

	$ops = array ();
	$tokens = tokenizer($query);
	$struct = array();

	while ($current = array_shift($tokens)){
		if (preg_match("/^(AND|OR|NOT)$/i",$current)){  //token is operator
		$ops[] = strtoupper($current);
		}
		else {                                      //token is searchword
			if (! count($ops)) {
				$ops[] = 'AND';
			}
			$cleanops = cleanoperators($ops,$errors);
			//check wildcards:
			$wild = '';
			if(substr($current,0,1) == '*'){
				$wild .= 'l';
			}
			if(substr($current,-1) == '*'){
				$wild .= 'r';
			}
			$current = str_replace('*','',$current);
			$struct[]= array('ops' => $cleanops,
						'token' => $current,
						'wildcard' => $wild);
			$ops = array();
		}
	}
	return $struct;
}

/**
 * Querystring tokenizer
 *
 * @param   string    Querystring
 * @return  array     All tokens of the Strings
 */
function tokenizer($qstring){
	$tokens = array();

	$qstring = ' '.$qstring; //for following regexps
	$qstring = preg_replace('/(\s)-(\S)/','\1NOT \2',$qstring);
	$qstring = preg_replace('/(\s)\+(\S)/','\1AND \2',$qstring);
	$qstring = trim($qstring);

	$sep  = '\s';
	$current='';

	for($i=0; $i < strlen($qstring); $i++) {
		$char = $qstring{$i};
	
		if (preg_match("/$sep/",$char)){
			$current = trim($current);
			if( (!empty($current)) AND ((str_replace('*','',$current))!='') ) {
				$tokens[] = $current;
			}
			$current  = '';
			$sep = '\s';
		}
		elseif ($char == '"') {
			$sep = '"';
		}else {
			$current .= $char;
	  }
	}
	$current = trim($current);
	if(!empty($current) AND ((str_replace('*','',$current))!='')) {
		$tokens[] = $current;
	}
	return $tokens;
}

/**
 * Operator cleaning
 *
 * removes illogical operator combinations...
 *
 * @param   array   Operators
 * @param   string  Stringreference to write errors back
 * @return  string  cleaned Operators
 */
function cleanoperators($ops,&$errors){
	$newops = array();

	//make unique
	$ops = array_unique($ops);
	//sort
	if (in_array('AND',$ops)) $newops[]='AND';
	if (in_array('OR',$ops)) $newops[]='OR';
	if (in_array('NOT',$ops)) $newops[]='NOT';
	//join
	$opstr = join(" ",$newops);
	//clean unnormal conditions

	if(strstr($opstr,"AND OR")){
		$errors.="Die logische Verknüpfung 'AND OR' ist nicht erlaubt und wurde in 'OR' umgewandelt.\n";
		$opstr = str_replace("AND OR","OR",$opstr);
	}
	if(strstr($opstr,"OR NOT")){
		$errors.="Die logische Verknüpfung 'OR NOT' ist nicht erlaubt und wurde in 'AND' umgewandelt.\n";
		$opstr = str_replace("OR NOT","AND",$opstr);
	}
	if ($opstr == 'NOT'){
		$opstr = 'AND NOT';
	}

	return $opstr;
}


?>
Return current item: ER Manager