<?PHP
/**
* Searchquerystring Parser
*
* Parses a search query into it's tokens kann detect + - AND OR NOT Operators
* and Phrases.
*
* @todo remove german errorstrings, maybe handle umlauts and stuff...
* @package search
* @version $Id: queryparser.php,v 1.1.1.1 2004/07/28 12:09:06 pbworks Exp $
*/
/**
* Querystringparser
*
* Parses a querystring into a datastrukture
*
* @param string $query Querystring
* @param string &$errors Stringreference to write errors back
* @return array parsed querytokens
*/
function queryparser($query,&$errors){
$query = trim($query);
//Nichterlaubte Zeichen filtern
/*
if (preg_match('/[^\w \.\(\)"\'*]/',$query)){
$errors .= "Nicht erlaubte Zeichen wurden ignoriert\n";
$query = preg_replace('/[^\w \.\(\)"\'*]/','',$query);
}
*/
$ops = array ();
$tokens = tokenizer($query);
$struct = array();
while ($current = array_shift($tokens)){
if (preg_match("/^(AND|OR|NOT)$/i",$current)){ //token is operator
$ops[] = strtoupper($current);
}
else { //token is searchword
if (! count($ops)) {
$ops[] = 'AND';
}
$cleanops = cleanoperators($ops,$errors);
//check wildcards:
$wild = '';
if(substr($current,0,1) == '*'){
$wild .= 'l';
}
if(substr($current,-1) == '*'){
$wild .= 'r';
}
$current = str_replace('*','',$current);
$struct[]= array('ops' => $cleanops,
'token' => $current,
'wildcard' => $wild);
$ops = array();
}
}
return $struct;
}
/**
* Querystring tokenizer
*
* @param string Querystring
* @return array All tokens of the Strings
*/
function tokenizer($qstring){
$tokens = array();
$qstring = ' '.$qstring; //for following regexps
$qstring = preg_replace('/(\s)-(\S)/','\1NOT \2',$qstring);
$qstring = preg_replace('/(\s)\+(\S)/','\1AND \2',$qstring);
$qstring = trim($qstring);
$sep = '\s';
$current='';
for($i=0; $i < strlen($qstring); $i++) {
$char = $qstring{$i};
if (preg_match("/$sep/",$char)){
$current = trim($current);
if( (!empty($current)) AND ((str_replace('*','',$current))!='') ) {
$tokens[] = $current;
}
$current = '';
$sep = '\s';
}
elseif ($char == '"') {
$sep = '"';
}else {
$current .= $char;
}
}
$current = trim($current);
if(!empty($current) AND ((str_replace('*','',$current))!='')) {
$tokens[] = $current;
}
return $tokens;
}
/**
* Operator cleaning
*
* removes illogical operator combinations...
*
* @param array Operators
* @param string Stringreference to write errors back
* @return string cleaned Operators
*/
function cleanoperators($ops,&$errors){
$newops = array();
//make unique
$ops = array_unique($ops);
//sort
if (in_array('AND',$ops)) $newops[]='AND';
if (in_array('OR',$ops)) $newops[]='OR';
if (in_array('NOT',$ops)) $newops[]='NOT';
//join
$opstr = join(" ",$newops);
//clean unnormal conditions
if(strstr($opstr,"AND OR")){
$errors.="Die logische Verknüpfung 'AND OR' ist nicht erlaubt und wurde in 'OR' umgewandelt.\n";
$opstr = str_replace("AND OR","OR",$opstr);
}
if(strstr($opstr,"OR NOT")){
$errors.="Die logische Verknüpfung 'OR NOT' ist nicht erlaubt und wurde in 'AND' umgewandelt.\n";
$opstr = str_replace("OR NOT","AND",$opstr);
}
if ($opstr == 'NOT'){
$opstr = 'AND NOT';
}
return $opstr;
}
?>