Location: PHPKode > scripts > XqueryLite > xquerylite/class_xquery_lite.php
<?
// ##################################################################################
// Title                     : XqueryLite (class_xquery_lite.php)
// Version                   : 0.4 beta
// Author                    : Luis Argerich (hide@address.com)
// Last modification date    : 05-21-2002
// Description               : This is an implementation of a subset of the Xquery
//                             language with intention to add new features in 
//                             next releases. It is based on flwr expressions.
// ##################################################################################
// History: 
// 05-21-2002                : First release of this class from my prototype engine
// 05-17-2002                : Some changes to solve W3C use-cases
// ##################################################################################
// To-Dos:
// ##################################################################################
// How to use it:
// ##################################################################################


class XqueryLite {
  var $result_sets=Array();
  var $bindings=Array();

  function init() {
    $this->result_sets=Array();
    $this->bindings=Array(); 
  }

  function _tokenize($exp) {
    $exprs=Array();
    $current='';
    $level=0;
    $tok = strtok($exp," \n\t");
    while ($tok) {
      // Now see if there's a "{" in the token or a "}" in the token
      for($i=0;$i<strlen($tok);$i++) {
        if(substr($tok,$i,1)=="{") {
          $level++;
        }
        if(substr($tok,$i,1)=="}") {
          $level--;
        }  
      }
      
      if($level==0) {
        if(in_array(trim(strtoupper($tok)),Array("FOR","LET","RETURN","WHERE"))) {
          if(strlen($current)>0) {
            $exprs[]=$current; 
            $current='';
          }
        }
      }
      $current.=$tok.' ';
      $tok = strtok(" \n\t");
    }
    if(strlen($current)>0) {
      $exprs[]=$current; 
      $current=$tok;
    }
    
    return $exprs;
  }

  // This function is the "main" function of the flwr-lite engine, it evaluates a flwr expression
  // returning an XML fragment as a string.
  // The function won't be called only for top-level flwr expressions but for inner sub-expressions
  // recursively as well. 
  function evaluate_xqueryl($expr) {
    $result='';
    $qexpr='';
    $i=0;
    $chr=substr($expr,$i,1);
    $level=0;
    $query='';
  
    while($i<strlen($expr)) {
      if($chr=='{') {
        $level++; 
      }
      if($chr=='}') {
        $level--; 
      }
      if( (($level>0) && ($chr<>'{')) || ($level>1)) {
        $query.=$chr;
      } 
      if($chr=='}') {
        if($level==0) {
          if(strlen($query)>0) {
            $result.=$this->_parse_query($query); 
          } 
          $query='';
        }
      }
      if( ($chr<>"{") && ($chr<>"}") &&($level==0) ){
        $result.=$chr;
      }
      $i++;
      $chr=substr($expr,$i,1);   
    }
   
    return $result;
  }

  // This function returns the root element tagname of an XML
  // fragment that is later used for auto-adding the root
  // path to path expressions
  function _get_root_name($node) {
    $name=$node->node_name();
    return $name; 
  }

  // This parses a flwr-lite FOR expression binding and
  // returns the name of the flwr-lite variable associated
  // the nodeset is stored in the result_sets array.
  // A flwr-lite FOR expression can be:
  // FOR $name IN xmlmem($xml)/xpath_expression 
  // or
  // FOR $name IN xmldoc($xml)/xpath_expression
  // or
  // FOR $name IN $name/xpath_expression
  function _parse_for($expr) {
    $result='';
    $tokens=split(" ",$expr);
    $name=$tokens[1];
    if(strtoupper($tokens[2])<>"IN") {
      trigger_error("Invalid FOR expresion $expr <br/>",E_USER_WARNING);
      return false; 
    }
    $path=$tokens[3];
  
    // while the beginning of path is not $ or document then 
    // queues the function and repeat
    $functions=Array();
    $cosa=substr($path,0,6);
    
    while( (substr($path,0,1)<>'$') && (substr($path,0,8)<>"document") && (substr($path,0,6)<>"xmlmem")) {
       preg_match("/([^(]*)\((.*)\)/",$path,$regs);
       $path=$regs[2];
       $path=substr($path,0,strlen($path));
       
       array_unshift($functions,$regs[1]);
    }
    
   
    $parts=explode("/",$path,2);
    $xml_source=$parts[0];
    $path='/'.$parts[1];
    
    // Source maybe xmldoc($path)
    //           or xmlmem($xml)
    //           or $x
    if(substr($xml_source,0,8)=='document') {
      /* PROCESSING FROM A FILE */
      ereg("document\((.*)\)",$xml_source,$regs);
      $source=$regs[1];
      $name_doc=str_replace('"','',$source);
      if(!file_exists($name_doc)) {
        trigger_error("$name_doc file not found", E_USER_WARNING); 
      }
      $doc=xmldocfile($name_doc);
      //$rootname=_get_root_name($doc->document_element());
      //$path='/'.$rootname.$path;
      
      if(!$doc) {
        trigger_error("XML source document $name_doc was not well formed",E_USER_WARNING); 
      }
      
      $xpath=$doc->xpath_init();
      $ctx = $doc->xpath_new_context();
      $result=$ctx->xpath_eval($path);
      $nodes=$result->nodeset;
      foreach($functions as $f) {
           
           if($f=="distinct-values") {$f="distinct";}
           $nodes=$this->$f($nodes);
      }
      $nodeset=Array();
      foreach($nodes as $node) {
        if($node->node_type()==XML_ATTRIBUTE_NODE) {
          
          $nodeset[]=$node->value;
        } else {
          $nodeset[]=$node->dump_node($node);
        }
      }
      unset($xpath);
      unset($doc);
      unset($cts);
      unset($result);
    } elseif(substr($xml_source,0,6)=='xmlmem') {
      /* PROCESSING FROM MEM */
           
      ereg("xmlmem\((.*)\)",$xml_source,$regs);
      $source=$regs[1];
      $source=str_replace('"','',$source);
      
      $name_var=substr($source,1);
      // NOTE THAT THE XML STRING MUST BE GLOBAL
      if(!isset($GLOBALS[$name_var])) {
        trigger_error("$name_var is not visible from here plase use a global string for XML data",E_USER_WARNING);
        return false; 
      }
      $data=$GLOBALS[$name_var];
      if(strlen($data)>0) {
        $doc=xmldoc($data);
        $rootname=$this->_get_root_name($doc->document_element());
        //$path='/'.$rootname.$path;
        
        if(!$doc) {
          trigger_error("XML source was not well formed",E_USER_WARNING); 
        }
        
        $xpath=$doc->xpath_init();
        $ctx = $doc->xpath_new_context();
        $result=$ctx->xpath_eval($path);
        $nodes=$result->nodeset;
        foreach($functions as $f) {
           
           if($f=="distinct-values") {$f="distinct";}
           $nodes=$this->$f($nodes);
        }
        $nodeset=Array();
        foreach($nodes as $node) {
          if($node->node_type()==XML_ATTRIBUTE_NODE) {
            $nodeset[]=$node->value;
            
          } else {
            $nodeset[]=$node->dump_node($node);
          }
        }
      }
      unset($xpath);
      unset($doc);
      unset($cts);
      unset($result);
    } elseif(substr($xml_source,0,1)=='$') {
      
      /* PROCESS FROM A VARIABLE */
      //ereg("xmlmem\((.*)\)",$xml_source,$regs);
      $source=$xml_source;
      
      $var_name=substr($source,1);
      $data=$this->bindings[$var_name];
      if(strlen($data)>0) {
        $doc=xmldoc($data);
        $rootname=$this->_get_root_name($doc->document_element());
        $path='/'.$rootname.$path;
        
        if(!$doc) {
          trigger_error("XML source variable $name_var was not well formed",E_USER_WARNING); 
        }
        
        $xpath=$doc->xpath_init();
        $ctx = $doc->xpath_new_context();
        $result=$ctx->xpath_eval($path);
        $nodes=$result->nodeset;
        foreach($functions as $f) {
           
           if($f=="distinct-values") {$f="distinct";}
           $nodes=$this->$f($nodes);
        }
        $nodeset=Array();
        foreach($nodes as $node) {
          if($node->node_type()==XML_ATTRIBUTE_NODE) {
            $nodeset=$node->value;
            
          } else {
            $nodeset[]=$node->dump_node($node);
          }
        }
      }
      unset($xpath);
      unset($doc);
      unset($cts);
      unset($result);
    } else {
      trigger_error("Invalid xml source $xml_source <br/>",E_USER_WARNING);
      return false; 
    }
    $name_of_name=substr($name,1);
    
    // Here's where the node_set is set but (but!) we may need to apply a function
    $this->result_sets[$name_of_name]=$nodeset;
    return $name_of_name;
  }

  // Functions that can be applied to a resulting nodeset of a FOR expression
  // List: _distinct-values

  // This function eliminate duplicate results from the nodeset
  // We store a normalized representation of each node in the nodeset and

  function _distinct($nodeset) {
    
    $new_nodeset=Array();
    $seen=Array();
    $cant=count($nodeset);
    
    foreach($nodeset as $node) {
      $normalized=$this->normalize_elements($node);
      
      
      if(!in_array($normalized,$seen)) {
        
        $new_nodeset[]=$node;
        $seen[]=$normalized;
        
      }
    }
    $cant=count($new_nodeset);
    
    
    return $new_nodeset; 
  }

  // Normalize can eliminate all the tags
  // If the node has only one child and it is text then just the text is returned
  function _normalize_elements($node) {

    if($node->node_type()==XML_ATTRIBUTE_NODE) {
      
      return $node->value; 
    }
    $data=trim($node->dump_node($node));
    
    
    preg_match_all("/<([^>]*)>[^<]*<\/[^>]*>/",$data,$foo);
    
    if(count($foo[1])==1) {
      
      $data=trim(preg_replace("/<.*>(.*)<\/.*>/","$1",$data));
    } else {
      if($node->node_type()==XML_ELEMENT_NODE) {
        $data=preg_replace("/\n/"," ",$data);
        $data=preg_replace("/\t/"," ",$data);
        $data=preg_replace("/\>\s*\</","><",$data);
      }
    }
    
    return $data;
  }

  // This function parses an expression in the form:
  // $name/xpath_expression
  // outside a FOR expression so it aways returns a
  // string, if the xpath expression returned an element
  // the element is normalized.
  function _parse_var($expr,$norm) {
    $result='';
    
    // If it is a var is $name/expr
    $parts=explode("/",$expr,2);
    $var_name=substr($parts[0],1);
    if(strlen($parts[1])>0) {
      $path="/".$parts[1];
    }
    $data=$this->bindings[$var_name];
    if(strlen($data)==0) {
      return ''; 
    }
    
    
    
    if(strlen($path)>0) {
      $doc=xmldoc($data);
      $rootname=$this->_get_root_name($doc->document_element());
      $path='/'.$rootname.$path;
      
      if(!$doc) {
        trigger_error("cannot evaluate a xpath expression because $data is not xml ",E_USER_WARNING);
      }
      
    
      $xpath=$doc->xpath_init(); 
      $ctx = $doc->xpath_new_context();
      $result_xp=$ctx->xpath_eval($path);
      $nodes=$result_xp->nodeset;
      if(count($nodes)>0) {
        foreach($nodes as $a_node  ) {
          if($norm) {
            $res=$this->_normalize_elements($a_node);
            $result.=$res;
          } else {
            if($a_node->node_type()==XML_ATTRIBUTE_NODE) {
              
              $res=$a_node->value;
            
              $result.=$res;
            } else {
              $res=$a_node->dump_node($a_node); 
              $result.=$res;
            }
          }
        }
      } else {
        $result=''; 
      }
    } else {
      $result=$data;
      /*
      $doc=xmldoc($data);
      $root=$doc->document_element($doc);
      if($norm) {
        $result=_normalize_elements($root); 
      } else {
        print("El resultado es el dump simple <br />");
        if($root->node_type()==XML_ATTRIBUTE_NODE) {
          print("dumping an attribute <br/>");
          $result=$root->value; 
        } else {
          $result=$root->dump_node($root); 
        }
      }
      */
    }
    unset($xpath);
    unset($ctx);
    unset($result_xp);
    unset($doc);
    
    return $result;
  }

  // This function is very similar to _parse_var BUT
  // instead of returning the result or the variable
  // it just counts the number of elements in the nodeset
  function _count_var($expr) {
    $result='';
    
    // If it is a var is $name/expr
    $parts=explode("/",$expr,2);
    $var_name=substr($parts[0],1);
    if(strlen($parts[1])>0) {
      $path="/".$parts[1];
    }
    $data=$this->bindings[$var_name];
    if(strlen($data)==0) {
      return ''; 
    }
    
    
    
    if(strlen($path)>0) {
      $doc=xmldoc($data);
      $rootname=$this->_get_root_name($doc->document_element());
      $path='/'.$rootname.$path;
      
      if(!$doc) {
        trigger_error("cannot evaluate a xpath expression because $data is not xml ",E_USER_WARNING);
      }
      
      $xpath=$doc->xpath_init();
      $ctx = $doc->xpath_new_context();
      $result_xp=$ctx->xpath_eval($path);
      $nodes=$result_xp->nodeset;
      unset($xpath);
      unset($ctx);
      unset($result_xp);
      unset($doc);
    
      return count($nodes);
    } else {
      return 1;
      //$result=$data;
      /*
      $doc=xmldoc($data);
      $root=$doc->document_element($doc);
      if($norm) {
        $result=_normalize_elements($root); 
      } else {
        print("El resultado es el dump simple <br />");
        if($root->node_type()==XML_ATTRIBUTE_NODE) {
          print("dumping an attribute <br/>");
          $result=$root->value; 
        } else {
          $result=$root->dump_node($root); 
        }
      }
      */
    }
    
    return $result;
  }

  // This function parses a flwr-lite where expression returning 
  // true/false depending on the expression value
  // First flwr variables followed or not by an expression are
  // evaluated and replaced by their values
  // then and/or are replaced by &&/||
  // then a PHP eval construction is used to eval the expression
  // :TODO: code this function
  function _parse_where($expr) {
    $result=true;
    $expr=ltrim($expr);
    $wexpr=substr($expr,5);
    
    $wexpr=preg_replace("/([^A-Za-z0-9])and([^A-Za-z0-9])/","$1&&$2",$wexpr);  $wexpr=preg_replace("/([^A-Za-z0-9])or([^A-Za-z0-9])/","$1||$2",$wexpr);
    $wexpr=preg_replace("/([^=><!])=([^=])/","$1==$2",$wexpr);
    
    if(strstr($wexpr,"count")) {
      
    }
    preg_match_all("/count\(([^)]*)\)/",$wexpr,$counts);
    
    for($i=0;$i<count($counts[1]);$i++) {
      
      $cant=$this->_count_var($counts[1][$i]);
      
      $cosa=$counts[0][$i];
      $wexpr=str_replace($cosa,"$cant",$wexpr);
      
    }
    $vars=Array();
    $is_a_var=false;
    $a_var='';
    for($i=0;$i<strlen($wexpr);$i++) {
      $chr=substr($wexpr,$i,1);
      if($chr=="$") {
        $a_var=''; 
        $is_a_var=true;
      }  
      if($is_a_var) {
        if(in_array($chr,Array(' ',"\t","\n",';',"\n"))) {
           $is_a_var=false;  
        }         
        if($chr=="[") {
          $predicate=true;
        } 
        if($chr=="]") {
          $predicate=false; 
        }
        if(!$predicate) {
          if(in_array($chr,Array('=','>','<','+','-','*',';',"\n"))) {
            $is_a_var=false; 
          } 
        }
        if(!$is_a_var) {
          $vars[]=$a_var;
          $a_var='';
        }
      }
      if($is_a_var && $chr<>"$") {
        $a_var.=$chr;
      }
    }
    if($is_a_var) {
      $vars[]=$a_var; 
    }
    
    
    
    // Now each variable must be evaluated
    foreach($vars as $exp) {
      $exp='$'.$exp;
      
      $ret=$this->_parse_var($exp,1);
      
      // And now strreplace $exp for the value
      $ret='"'.$ret.'"';
      $wexpr=str_replace($exp,$ret,$wexpr);
      
    }
    $php_code='return('.$wexpr.');';
    
    $result=eval($php_code);
    
    return $result;
  }

  // This function parses a flwr-lite RETURN expression
  // basically a return expression just contains the word
  // RETURN followed by another flwr-lite query that can
  // contain flwr-lite expressions.
  function _parse_return($expr) {
    $expr=ltrim($expr);
    $result='';
    
    // A return expr is 
    $retexp=substr($expr,6);
    $sub=$this->evaluate_xqueryl($retexp);
    
    return $sub;
  }

  // This function parses a flwr-lite LET expression
  // a LET statement only binds an evaluation to a variable
  // name
  // Let won't normalize path expressions
  function parse_let($expr) {
    $expr=ltrim($expr);
    // A let is in the form LET $name := value
    $letexpr=substr($expr,4);
    
    $tokens=split(":=",$letexpr);
    $var_name=$tokens[0];
    $var_value=$tokens[1];
    $var_value=trim($var_value);
    $var_name=trim($var_name);
    if(strstr($var_value,'$')) {
      // We are assigning to a var value
      //$var_value=substr($var_value,1);
      
      $var_value=$this->_parse_var($var_value,false); 
    }  
    $var_name=substr($var_name,1);
    $this->bindings[$var_name]=$var_value;
    
    
  }

  function _split_fors($expr) {
    
    $fors=Array();
    $afor='';
    $level=0;
    for($i=0;$i<strlen($expr);$i++) {
      $chr=substr($expr,$i,1);
      if($chr=="[") {
        $level++; 
      }
      if($chr=="]") {
        $level--; 
      }
      if( ($chr==',') && ($level==0) ) {
        if(strlen($afor)>0) {
          $fors[]=$afor;
          $afor=''; 
        } 
      } else {
        $afor.=$chr; 
      }
      
    } 
    if(strlen($afor)>0) {
      $fors[]=$afor;
      $afor=''; 
    } 
    return $fors;
  }

  // This function parses a flwr-lite expression 
  // this function is called after filtering out XML constructs from
  // a flwr-lite query
  function _parse_query($query) {
    $result='';
    
  
    $exprs=$this->_tokenize($query);
    
    $expr=array_shift($exprs);
    $expr=trim($expr);
    $tokens=split(" ",$expr);
    $what=trim($tokens[0]);
  
    if(substr($what,0,1)=="$") {
      $result.=$this->_parse_var($what,0); 
    } else {
      switch(strtoupper($what)) {
        case "FOR":
          
          
          // This produces a result-set
          // and then the rest of the expression
          // is evaluated for each element in the
          // node set
          // Expresion should be split in commas
          // but don't count commas inside pairs of []
          $multi_fors=$this->_split_fors($expr);
          
          if(count($multi_fors)>1) {
            // then we have to append for lines next
            for($i=count($multi_fors)-1;$i>0;$i--) {
              $afor=ltrim($multi_fors[$i]);
              if(strtoupper(substr($afor,3))<>'FOR') {
                $afor='FOR '.$afor; 
              }
              $afor=rtrim($afor);
              
              array_unshift($exprs,$afor);
            } 
            $expr=$multi_fors[0];
          }
        
          
          
                 
          $name=$this->_parse_for($expr);
          $nodes=$this->result_sets[$name];
          foreach($nodes as $node) {
            $this->bindings[$name]=$node;
            
            
            $query=implode("\n",$exprs);    // What follows the FOR expr
            
            $result.=$this->_parse_query($query);  // is parsed
          }
          break;
        case "WHERE":
          // If we have a where then the rest is evaluated only if the WHERE is true
          if($this->_parse_where($expr)) {
            // :TODO: cambiar el implode
            $query=implode("\n",$exprs);
            $result.=$this->_parse_query($query);  
          }
          break;      
        case "RETURN":
          // If we have a return we parse the return and nothing can follow a return
          // Theres nothing after a return
          $result.=$this->_parse_return($expr);
          break;
        case "LET":
          // Parse the LET statement and continue evaluating the query
          $this->parse_let($expr);
          // :TODO: cambiar el implode
          $query=implode("\n",$exprs);
          $result.=$this->_parse_query($query);    
          break;
        default:
          // If we have something else (whitespace I hope) we process what follows
          // :TODO: cambiar el implode
          $query=implode("\n",$exprs);
          $result.=$this->_parse_query($query);
      }
    }
    return $result;
  }
}

/*
$case1='
<bib>
 {
  for $b in xmlmem($bib)/bib/book
  where $b/publisher = "Addison-Wesley" and $b/@year > 1991
  return
    <book year="{ $b/@year }">
     { $b/title }
    </book>
 }
</bib> ';

$bib='<bib>
    <book year="1994">
        <title>TCP/IP Illustrated</title>
        <author><last>Stevens</last><first>W.</first></author>
        <publisher>Addison-Wesley</publisher>
        <price> 65.95</price>
    </book>
 
    <book year="1992">
        <title>Advanced XML Programming in the Unix environment</title>
        <author><last>Stevens</last><first>W.</first></author>
        <publisher>Addison-Wesley</publisher>
        <price>65.95</price>
    </book>
 
    <book year="2000">
        <title>Data on the Web</title>
        <author><last>Abiteboul</last><first>Serge</first></author>
        <author><last>Buneman</last><first>Peter</first></author>
        <author><last>Suciu</last><first>Dan</first></author>
        <publisher>Morgan Kaufmann Publishers</publisher>
        <price> 39.95</price>
    </book>
 
    <book year="1999">
        <title>The Economics of Technology and Content for Digital TV</title>
        <editor>
               <last>Gerbarg</last><first>Darcy</first>
                <affiliation>CITI</affiliation>
        </editor>
            <publisher>Kluwer Academic Publishers</publisher>
        <price>129.95</price>
    </book>
 
</bib>';




$xq=new XqueryLite();
$result=$xq->evaluate_xqueryl($case1);
print("Result:<br />");
print("<textarea rows='20' cols='50'>$result</textarea>");
*/
?>
Return current item: XqueryLite