<?php
// ##################################################################################
// Title : RDQL (class_rdql.php)
// Version : 1.0
// Author : Luis Argerich (hide@address.com)
// Last modification date : 06-30-2002
// Description : This class implements the RDQL language
// to query RDF documents from paths or URLs.
// ##################################################################################
// History:
// 06-30-2002 : First release of this class
// ##################################################################################
// To-Dos:
// ##################################################################################
// How to use it: Check rdql_test.php for an example.
// rdql.html describes the RDQL language.
// class_rdql.html contains this class documentation.
// ##################################################################################
include_once("class_rdf_parser.php");
// A wrapper class to Query RDF documents
class RDQL_query_document {
// Queries documents passed as urls or filenames (use urls or filenames in the FROM part of the RDQL query)
function rdql_query_url($query)
{
$iterator = new RDF_document_iterator();
$q = new RDQL_query($iterator);
$res = $q->parse_query($query);
// Now process the query result
return $res;
}
} // end of class
// Abstract class defining methods for an RDF_iterator
// The RDF iterator is used by the RDQL_query class, the iterator
// MUST provide a find_tuples($sources,$subject,$predicate,$object)
// method that returns all the tuples matching subject, predicate and object
// from the designated sources (The FROM part of a RDQL expresion)
class RDF_iterator {
function init($sources) {}
function get_tuple() {}
function find_tuples($sources,$subject,$predicate,$object) {}
function tuple_match($condition,$tuple) {
if($condition{0}=='?') {
return true;
} else {
if(trim($condition)==trim($tuple)) {
return true;
} else {
return false;
}
}
}
}
// This class implements the RDQL engine
class RDQL_query {
var $iterator;
// Constructor receives a RDF_iterator object
// that must implement a find_tuples($sources,$subject,$predicate,$object) method
// returning all the tuples in the RDF sources matching the provided arguments.
// There're two RDF_Iterator classes provided: RDF_document_iterator AND
// RDF_mysql_iterator
// The first one is used to query a set of RDF documents passed as filepaths or URLs
// The second one can be used to query a document stored in MySQL using the RDF_store class
function RDQL_query($iterator) {
$this->iterator=$iterator;
}
// This parses the RDQL query returning an array of asociative arrays with the Query Results.
function parse_query($query) {
$exps=$this->tokenize($query);
$select_vars=Array();
$sources=Array();
$conditions=Array();
$filters=Array();
$ns=Array();
foreach($exps as $exp) {
$exp=trim($exp);
if(strtoupper(substr($exp,0,6)) == "SELECT") {
$select_vars=$this->parse_select($exp);
}
if(strtoupper(substr($exp,0,4)) == "FROM") {
$sources=$this->parse_from($exp);
}
if(strtoupper(substr($exp,0,5)) == "WHERE") {
$conditions=$this->parse_where($exp);
}
if(strtoupper(substr($exp,0,3)) == "AND") {
$filters=$this->parse_and($exp);
}
if(strtoupper(substr($exp,0,5)) == "USING") {
$ns=$this->parse_using($exp);
}
}
// Now everything is parsed and the query can be processed.
// The next step will parse all the conditions against the
// supplied source's tuples returning an array of asociative
// arrays with all the variables involved in the conditions
$tuples = $this->find_matching_tuples($sources, $conditions, $ns);
foreach($filters as $filter) {
// $tuples is passed by reference
$this->filter_tuples($tuples,$filter);
}
$query_results=Array();
foreach ($tuples as $a_tuple) {
$a_result=Array();
foreach($a_tuple as $key=>$val) {
if(in_array($key,$select_vars)) {
$a_result[$key]=$val;
}
}
if(count($a_result)>0) {
ksort($a_result);
$query_results[]=$a_result;
}
}
if(count($query_results)>0) {
return $query_results;
} else {
return false;
}
}
function tokenize($exp) {
$exprs=Array();
$current='';
$tok = strtok($exp," \n\t");
while ($tok) {
if(in_array(trim(strtoupper($tok)),Array("SELECT","FROM","WHERE","AND","USING"))) {
if(strlen($current)>0) {
$exprs[]=$current;
$current='';
}
}
$current.=$tok.' ';
$tok = strtok(" \n\t");
}
if(strlen($current)>0) {
$exprs[]=$current;
$current=$tok;
}
return $exprs;
}
function array_sql_join($v1, $v2) {
$result_set=Array();
foreach ($v1 as $elemv1) {
foreach ($v2 as $elemv2) {
$res = $this->array_join_elems($elemv1, $elemv2);
if($res) {
$result_set[]=$res;
}
}
}
return $result_set;
}
function array_join_elems($v1, $v2) {
$ret=Array();
foreach (array_keys($v1) as $k1) {
if(isset($v2[$k1])) {
if($v2[$k1]==$v1[$k1]) {
$ret[$k1]=$v1[$k1];
} else {
return false;
}
} else {
$ret[$k1]=$v1[$k1];
}
}
foreach (array_keys($v2) as $k2) {
if(!isset($ret[$k2])) {
$ret[$k2]=$v2[$k2];
}
}
return $ret;
}
// This parses a 'SELECT ?x,?y,?z' expression returning an array with variable names.
function parse_select($exp) {
$vars=Array();
$exp=trim($exp);
$exp_parts=explode(" ",$exp);
if($exp_parts[0]!="SELECT") {
trigger_error("Expected a SELECT token in the query",E_USER_WARNING);
}
array_shift($exp_parts);
$vars=explode(',',implode('',$exp_parts));
return $vars;
}
// This parses a 'FROM doc1,doc2' expression returning an array with document URIs/filenames.
function parse_from($exp) {
$vars=Array();
$exp=trim($exp);
$exp_parts=explode(" ",$exp);
if($exp_parts[0]!="FROM") {
trigger_error("Expected a FROM token in the query",E_USER_WARNING);
}
array_shift($exp_parts);
$vars=explode(',',implode('',$exp_parts));
return $vars;
}
// This parses a where construction in the form 'WHERE (x1,x2,x3),(z1,z2,z3)' returning and array of conditions
function parse_where($exp) {
$vars=Array();
$exp=trim($exp);
$exp_parts=explode(" ",$exp);
if($exp_parts[0]!="WHERE") {
trigger_error("Expected a WHERE token in the query",E_USER_WARNING);
}
array_shift($exp_parts);
$expr=implode('',$exp_parts);
$avar='';
$level=0;
for($i=0;$i<strlen($expr);$i++) {
$chr=substr($expr,$i,1);
if($chr=="(") {
$level++;
}
if($chr==")") {
$level--;
}
if( ($chr==',') && ($level==0) ) {
if(strlen($avar)>0) {
$vars[]=$avar;
$avar='';
}
} else {
$avar.=$chr;
}
}
if(strlen($avar)>0) {
$vars[]=$avar;
$avar='';
}
return $vars;
}
// This parses and AND condition
function parse_and($exp) {
$vars=Array();
$exp=trim($exp);
$exp_parts=explode(" ",$exp);
if($exp_parts[0]!="AND") {
trigger_error("Expected a AND token in the query",E_USER_WARNING);
}
array_shift($exp_parts);
$vars=explode(',',implode('',$exp_parts));
return $vars;
}
// This parses a "USING" expr in the form USING prefix for URI, prefix for URI
function parse_using($exp) {
$vars=Array();
$ns=Array();
$exp=trim($exp);
$exp_parts=explode(" ",$exp);
if($exp_parts[0]!="USING") {
trigger_error("Expected a USING token in the query",E_USER_WARNING);
}
array_shift($exp_parts);
$vars=explode(',',implode(' ',$exp_parts));
foreach($vars as $var) {
$var_parts=explode(' ',trim($var));
if(strtoupper($var_parts[1])!="FOR") {
trigger_error("Expected a for token in the USING part: $exp",E_USER_WARNING);
}
preg_match("/\<([^>]*)\>/",$var_parts[2],$reqs);
$var_parts[2]=$reqs[1];
$ns[$var_parts[0]]=$var_parts[2];
}
return $ns;
}
// This function filters the tuples passed as arguments according to the filter
function filter_tuples(&$tuples,$filter) {
$toelim=Array();
for($i=0;$i<count($tuples);$i++) {
$a_tuple=$tuples[$i];
$a_filter=$filter;
foreach($a_tuple as $varname=>$value) {
$a_filter=str_replace($varname,"\"$value\"",$a_filter);
}
$php_code='return('.$a_filter.');';
//print("code: $php_code");
$result=eval($php_code);
if(!$result) {
$toelim[]=$i;
}
}
foreach($toelim as $i) {
unset($tuples[$i]);
}
}
// IN: $sources array with the names of RDF documents stored (keys)
// IN: $conditions array with the coditions to be evaluated
// IN: $ns array with the namespaces
function find_matching_tuples($sources,$conditions,$ns) {
$vec='';
//$iterator=new RDF_triplets_iterator();
foreach($conditions as $condition) {
$condition=trim($condition);
//print("Condition: $condition<br/>");
preg_match("/\(([^)]*)\)/",$condition,$reqs);
$elems=explode(',',$reqs[1]);
// Check each element, if it is <something:foo> then replace it by the
// namespace
if($elems[0]{0}=='<') {
preg_match("/\<([^>]*)\>/",$elems[0],$reqs);
$elems[0]=$reqs[1];
$predicate_parts=explode(':',$elems[0]);
$elems[0]=$ns[$predicate_parts[0]].$predicate_parts[1];
}
if($elems[1]{0}=='<') {
preg_match("/\<([^>]*)\>/",$elems[1],$reqs);
$elems[1]=$reqs[1];
$predicate_parts=explode(':',$elems[1]);
$elems[1]=$ns[$predicate_parts[0]].$predicate_parts[1];
}
if($elems[2]{0}=='<') {
preg_match("/\<([^>]*)\>/",$elems[2],$reqs);
$elems[2]=$reqs[1];
$predicate_parts=explode(':',$elems[2]);
$elems[2]=$ns[$predicate_parts[0]].$predicate_parts[1];
}
$a_vec=$this->iterator->find_tuples($sources,$elems[0],$elems[1],$elems[2]);
//print_r($a_vec);print("<br/>");
if($vec) {
$vec=$this->array_sql_join($a_vec,$vec);
} else {
$vec=$a_vec;
}
}
return $vec;
}
} //end of class
// This class implements an iterator for RDF documents
// using URLs or filenames (paths) to locate the documents.
class RDF_document_iterator extends RDF_iterator {
var $rdf_parser;
var $subject;
var $object;
var $predicate;
var $tuples=Array();
function init($sources) {
}
function get_tuple() {
}
function find_tuples($sources,$subject,$predicate,$object) {
$this->subject=$subject;
$this->predicate=$predicate;
$this->object=$object;
$this->tuples=Array();
foreach($sources as $source) {
preg_match("/\<([^>]*)\>/",$source,$reqs);
$source=$reqs[1];
$this->rdf_parser=new Rdf_parser();
$this->rdf_parser->rdf_parser_create( NULL );
$this->rdf_parser->rdf_set_statement_handler( "_statement_handler" );
$this->rdf_parser->rdf_set_user_data( $this );
$input=fopen($source,"r");
$done=false;
if(!$input) {
$done=true;
}
$done=false;
while(!$done) {
$buf = fread( $input, 512 );
$done = feof($input);
if ( ! $this->rdf_parser->rdf_parse( $buf, strlen($buf), feof($input) ) ) {
$done=true;
}
}
fclose($input);
$this->rdf_parser->rdf_parser_free();
}
return $this->tuples;
}
function RDF_document_iterator() {
}
} // end of class
// This is the statement handler used by the RDF parser in the Document Iterator
function _statement_handler(&$user_data,$subject_type,$subject,$predicate,$ordinal,$object_type,$object,$xml_lang) {
if($user_data->tuple_match($user_data->subject,$subject) &&
$user_data->tuple_match($user_data->predicate,$predicate) &&
$user_data->tuple_match($user_data->object,$object) ) {
$result=Array();
if($user_data->subject{0}=='?') {
$result[$user_data->subject]=$subject;
}
if($user_data->predicate{0}=='?') {
$result[$user_data->predicate]=$predicate;
}
if($user_data->object{0}=='?') {
$result[$user_data->object]=$object;
}
if(count($result)>0) {
$user_data->tuples[]=$result;
}
}
}
// This is an iterator for RDF triplets the sources in the
// FROM part of the RDQL expression must be PHP vars in the
// form $var
class RDF_triplets_iterator extends RDF_iterator {
var $cosa;
var $index;
var $tuples;
function init() {
$this->index=0;
}
function find_tuples($sources,$subject,$predicate,$object) {
$ret=Array();
$this->init();
$elems[0]=$subject;
$elems[1]=$predicate;
$elems[2]=$object;
foreach($sources as $source) {
// remove '$' from source
preg_match("/\<([^>]*)\>/",$source,$reqs);
$source=$reqs[1];
if($source{0}=='$') {
$source=substr($source,1);
}
global $$source;
$this->tuples=$$source;
$this->init();
while($tuple=$this->get_tuple()) {
if($this->tuple_match($elems[0],$tuple[0]) && $this->tuple_match($elems[1],$tuple[1]) && $this->tuple_match($elems[2],$tuple[2])) {
$result=Array();
for($i=0;$i<3;$i++) {
if($elems[$i]{0}=='?') {
$result[$elems[$i]]= $tuple[$i];
}
}
if(count(result)>0) {
$ret[]=$result;
}
}
}
}
return $ret;
}
function get_tuple() {
if($this->index>=count($this->tuples)) {
return false;
}
$elem=$this->tuples[$this->index];
$this->index++;
return $elem;
}
} // end of class
?>