<?php
/*
Search.php, main class for handling the search system
Copyright (C) 2005 Arend van Beelen, Auton Rijnsburg
This program is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the Free
Software Foundation; either version 2 of the License, or (at your option)
any later version.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
For any questions, comments or whatever, you may mail me at: hide@address.com
*/
require_once('Config.php');
require_once('Database.php');
require_once('Document.php');
require_once('MIME.php');
require_once('URI.php');
/**
* @brief Container class for search results.
*
* This class is used for storing search results after a search.
*
* @sa Search
*
* @since Aukyla 1.1
*/
class SearchResult
{
/**
* The application (or gateway) returning the result.
*/
public $application;
/**
* URI to the found document.
*
* This might be both a Local URI or a regular internet URL if @p fromGateway
* is @p true.
*/
public $uri;
/**
* If @p true, the given @p uri is not a Local URI, but rather a regular
* internet URL.
*/
public $fromGateway;
/**
* A title describing the result.
*/
public $title;
/**
* An optional short summary of the result.
*/
public $summary;
/**
* MIME type of the result.
*/
public $mimeType;
/**
* Other meta-data found.
*/
public $metaData;
/**
* The score of the result.
*/
public $score;
/**
* Flags which possibly indicate the availability of other properties.
* Currently unused.
*/
public $flags;
}
/**
* @brief Main class for using the Aukyla General Search System.
*
* This class can be used to perform searches through all Aukyla applications.
* Also, Aukyla applications should push their indexes to this class so Aukyla
* always has an up-to-date index of all content of all applications.
*
* @since Aukyla 1.1
*/
class Search
{
/**
* Returns an instance of the Search class.
*
* @return An instance of the Search class.
*/
public static function instance()
{
if(self::$instance == null)
{
self::$instance = new Search();
}
return self::$instance;
}
/**
* @internal
*
* Constructor.
*
* You should not instantiate this class yourself, instead you should use the
* static doSearch() and index() functions.
*/
private function __construct()
{
$this->database = null;
$this->gateways = array();
$this->indexes = array();
$this->gatewaysOnly = false;
$this->availableInterfaces = array();
$this->availableInterfaceNames = array();
$this->selectedInterfaces = array();
$this->selectedIndexes = array();
$this->connectToDatabase();
$this->loadInterfaces();
}
/**
* @internal
*
* Connects to the search database.
*/
private function connectToDatabase()
{
$type = Config::globals('searchDatabaseType');
$server = Config::globals('searchDatabaseServer');
$database = Config::globals('searchDatabaseName');
$username = Config::globals('searchDatabaseUsername');
$password = Config::globals('searchDatabasePassword');
$this->database = Database::connection($type, $server, $database, $username, $password);
if($this->database === false)
{
$this->database = null;
$this->gatewaysOnly = true;
}
}
/**
* @internal
*
* Searches and loads all interfaces.
*/
private function loadInterfaces()
{
$interfaces = glob(AUKYLA_DIR.'/plugins/SearchInterfaces/*.php');
if($interfaces !== false)
{
foreach($interfaces as $interface)
{
$interfaceName = basename($interface, '.php');
$className = "{$interfaceName}_SearchInterface";
$include = include_once($interface);
if($include == false ||
class_exists($className) == false)
{
trigger_error("Invalid search interface \"$className\" in plugins/SearchInterfaces");
continue;
}
if($this->gatewaysOnly == true)
{
trigger_error("Interface \"$className\" ignored because no database is available");
continue;
}
$interface = new $className();
$this->indexes[$interfaceName] = $interface;
$this->availableInterfaces[] = $interfaceName;
$this->availableInterfaceNames[$interfaceName] = $interface->name();
$this->selectedIndexes[] = $interfaceName;
$this->selectedInterfaces[] = $interfaceName;
}
}
$interfaces = glob(AUKYLA_DIR.'/plugins/SearchGatewayInterfaces/*.php');
if($interfaces !== false)
{
foreach($interfaces as $interface)
{
$interfaceName = basename($interface, '.php');
$className = "{$interfaceName}_SearchGatewayInterface";
$include = include_once($interface);
if($include == false ||
class_exists($className) == false)
{
trigger_error("Invalid search interface \"$className\" in plugins/SearchGatewayInterfaces");
continue;
}
$gateway = new $className();
$this->gateways[$interfaceName] = $gateway;
$this->availableInterfaces[] = $interfaceName;
$this->availableInterfaceNames[$interfaceName] = $gateway->name();
$this->selectedInterfaces[] = $interfaceName;
}
}
}
/**
* Returns a list of all available interfaces.
*
* @return An array containing all available interfaces.
*/
public function availableInterfaces()
{
return $this->availableInterfaces;
}
/**
* Returns the full display names of all available interfaces.
*
* @return An array containing all the full display names of available
* interfaces. The keys are the internal names of the interfaces
* as reported by availableInterfaces().
*/
public function interfaceNames()
{
return $this->availableInterfaceNames;
}
/**
* Sets which interfaces should be used for searching. By default,
* all available search interfaces are used.
*
* @param interfaces An array of strings containing the names of the
* interfaces which should be used when searching.
*/
public function setInterfaces($interfaces)
{
$this->selectedInterfaces = array();
foreach($interfaces as $interface)
{
if(in_array($interface, $this->availableInterfaces))
{
$this->selectedInterfaces[] = $interface;
if(isset($this->indexes[$interface]))
{
$this->selectedIndexes[] = $interface;
}
}
}
}
/**
* Performs a search.
*
* @param keywords A list of space seperated keywords to search for.
* @param maxResults The maximum number of results to return per
* selected interface.
* @param offset The offset of the results to return per interface.
*
* @return An array of arrays with SearchResult objects. Every key in the
* first array corresponds to the name of the interface which returned
* the results in the value array. THe value arrays are made up of
* SearchResult objects.
*/
public function doSearch($keywords, $maxResults = 10, $offset = 0)
{
$keywords = self::splitKeywords($keywords);
$results = array();
foreach($this->gateways as $gatewayName => $gateway)
{
if(in_array($gatewayName, $this->selectedInterfaces))
{
$results[$gatewayName] = $gateway->search($keywords, $maxResults, $offset);
}
}
if($this->gatewaysOnly == true)
{
return $results;
}
foreach($this->selectedIndexes as $application)
{
$results[$application] = array();
$firstKeyword = strtolower($keywords[0]);
$otherKeywords = array_slice($keywords, 1);
$application = addslashes($application);
$query = "SELECT uri, SUM(weight) AS weight ".
"FROM (SELECT D.uri AS uri, C.weight AS weight ".
" FROM Documents D, ContainsWord C, Words W ".
" WHERE D.app = '$application' AND D.did = C.did AND C.wid = W.wid AND W.word LIKE '%{$firstKeyword}%'";
foreach($otherKeywords as $keyword)
{
$query .=" INTERSECT ALL ".
"SELECT D.uri AS uri, C.weight AS weight ".
"FROM Documents D, ContainsWord C, Words W ".
"WHERE D.app = '$application' AND D.did = C.did AND C.wid = W.wid AND W.word LIKE '%".strtolower($keyword)."%'";
}
$query .= ") AS results ".
"GROUP BY uri ".
"ORDER BY weight DESC ".
"LIMIT $maxResults ".
"OFFSET $offset;";
$this->database->query($query);
for($i = 0; $i < $this->database->numberOfRows(); $i++)
{
$row = $this->database->resultArray();
$result = $results[$application][] = new SearchResult();
$result->application = $application;
$result->uri = $row[0];
$result->fromGateway = false;
$result->score = $row[1];
$result->title = URI::metaData($result->uri, 'name');
$result->summary = URI::metaData($result->uri, 'comments');
$result->mimeType = MIME::type($result->uri);
}
}
return $results;
}
/**
* Rebuilds the database.
*
* Cleans up the entire database and asks all applications to re-index
* their content. This can be quite a costly operation and should
* generally not be done in a regular page load.
*
* You can use the base/RebuildSearchDatabase.php script to rebuild the
* database from the command line.
*/
public function rebuildDatabase()
{
if($this->gatewaysOnly == true)
{
return;
}
$this->createNewDatabase();
foreach($this->indexes as $index)
{
$index->indexAll();
}
}
/**
* Removes all tables and creates a new clean database. This will discard your
* entire search index!
*/
public function createNewDatabase()
{
if($this->gatewaysOnly == true)
{
return;
}
$this->database->query('DROP TABLE MetaData;');
$this->database->query('DROP TABLE ContainsWord;');
$this->database->query('DROP TABLE Words;');
$this->database->query('DROP TABLE Documents;');
$this->database->query('CREATE TABLE Documents'.
'('.
' did SERIAL NOT NULL,'.
' app VARCHAR(255) NOT NULL,'.
' uri VARCHAR(255) NOT NULL,'.
' PRIMARY KEY(did),'.
' UNIQUE(app, uri)'.
');');
$this->database->query('CREATE TABLE Words'.
'('.
' wid SERIAL NOT NULL,'.
' word VARCHAR(50) NOT NULL,'.
' PRIMARY KEY(wid),'.
' UNIQUE(word)'.
');');
$this->database->query('CREATE TABLE ContainsWord'.
'('.
' did INTEGER NOT NULL,'.
' wid INTEGER NOT NULL,'.
' weight INTEGER NOT NULL DEFAULT 1,'.
' PRIMARY KEY(did, wid),'.
' FOREIGN KEY(did) REFERENCES Documents,'.
' FOREIGN KEY(wid) REFERENCES Words'.
');');
$this->database->query('CREATE TABLE MetaData'.
'('.
' did SERIAL NOT NULL,'.
' key VARCHAR(50) NOT NULL,'.
' value VARCHAR(255) NOT NULL,'.
' PRIMARY KEY(did, key),'.
' FOREIGN KEY(did) REFERENCES Documents'.
');');
}
/**
* Adds a document to the search index and indexes it.
*
* This function automatically calls Document::index() for you.
*
* @param application The name of the application the document belongs
* to. By supplying this argument, the search engine
* can sort documents from different applications.
* @param uri URI of the document to be added to the index.
*
* @return The ID of the added document, or @p false on error. This ID
* can be used for subsequent indexText() calls.
*
* @sa indexText(), removeDocument()
*/
public function addDocument($application, $uri)
{
if($this->gatewaysOnly == true)
{
return false;
}
$application = addslashes($application);
$uri = addslashes($uri);
$this->database->query("SELECT D.did ".
"FROM Documents D ".
"WHERE D.app = '$application' AND D.uri = '$uri';");
if($this->database->hasResults())
{
$did = $this->database->result(0, 0);
}
else
{
$this->database->query("SELECT nextval('Documents_did_seq') as key;");
$did = $this->database->result(0, 0);
$this->database->query("INSERT INTO Documents (did, app, uri) ".
"VALUES ($did, '$application', '$uri');");
}
Document::index($did, $uri);
return $did;
}
/**
* Returns the ID of a document in the index.
*
* @param application The name of the application the document belongs
* to. This should have the same value as when the
* document was added.
* @param uri URI of the document in the index.
*
* @return The ID of the document, or @p false on error.
*/
public function documentId($application, $uri)
{
if($this->gatewaysOnly == true)
{
return false;
}
$application = addslashes($application);
$uri = addslashes($uri);
$this->database->query("SELECT D.did ".
"FROM Documents D ".
"WHERE D.app = '$application' AND D.uri = '$uri';");
if($this->database->hasResults())
{
return $this->database->result(0, 0);
}
else
{
return false;
}
}
/**
* Moves a document from one location to another in the search index.
*
* @param application The name of the application the document belongs
* to. This should have the same value as when the
* document was added.
* @param sourceUri URI of the document to be moved in the index.
* @param destUri The new URI of the document.
*
* @sa addDocument()
*/
public function moveDocument($application, $sourceUri, $destUri)
{
if($this->gatewaysOnly == true)
{
return;
}
$application = addslashes($application);
$sourceUri = addslashes($sourceUri);
$destUri = addslashes($destUri);
$this->database->query("UPDATE Documents SET uri = '$destUri' ".
"WHERE app = '$application' AND uri = '$sourceUri';");
}
/**
* Removes a document and all its associations from the search index.
*
* @param application The name of the application the document belonged
* to. This should have the same value as when the
* document was added.
* @param uri URI of the document to be removed from the index.
*
* @sa addDocument()
*/
public function removeDocument($application, $uri)
{
if($this->gatewaysOnly == true)
{
return;
}
$application = addslashes($application);
$uri = addslashes($uri);
$this->database->query("SELECT D.did ".
"FROM Documents D ".
"WHERE D.app = '$application' AND D.uri = '$uri';");
if($this->database->hasResults())
{
$did = $this->database->result(0, 0);
$this->database->query("DELETE FROM MetaData ".
"WHERE did = $did;");
$this->database->query("DELETE FROM ContainsWord ".
"WHERE did = $did;");
$this->database->query("DELETE FROM Documents ".
"WHERE did = $did;");
}
}
/**
* Adds the words from a given plain text block to the search index and
* associates them with the document with the given ID. The associations
* are given a weight as specified.
*
* As a rule of thumb, plain words which occur in the content of a
* document are given a weight of 1, whereas words occuring in the title
* are given a weight of 50. If a word and a document are associated
* with each other more than once, the weight of both associations is
* added and a single association remains.
*
* @param documentId ID of the document the text block should be
* associated with.
* @param text The plain text containing the words to associate
* with the document.
* @param weight The weight of the association.
*
* @sa documentId()
*/
public function indexText($documentId, $text, $weight)
{
if($this->gatewaysOnly == true)
{
return;
}
$keywords = self::splitKeywords($text);
foreach($keywords as $keyword)
{
$this->indexWord($documentId, $keyword, $weight);
}
}
/**
* @internal
*
* Indexes a single word containing only alphanumeric characters.
*/
private function indexWord($documentId, $word, $weight)
{
$word = strtolower($word);
$this->database->query("SELECT W.wid ".
"FROM Words W ".
"WHERE W.word = '$word';");
if($this->database->hasResults())
{
$wid = $this->database->result(0, 0);
}
else
{
$this->database->query("SELECT nextval('Words_wid_seq') as key");
$wid = $this->database->result(0, 0);
$this->database->query("INSERT INTO Words (wid, word) ".
"VALUES ($wid, '$word');");
}
$this->database->query("SELECT C.weight ".
"FROM ContainsWord C ".
"WHERE C.did = $documentId AND C.wid = $wid;");
if($this->database->hasResults())
{
$newWeight = $weight + $this->database->result(0, 0);
$this->database->query("UPDATE ContainsWord SET weight = $newWeight ".
"WHERE did = $documentId AND wid = $wid;");
}
else
{
$this->database->query("INSERT INTO ContainsWord (did, wid, weight) ".
"VALUES ($documentId, $wid, $weight);");
}
}
/**
* Sets a meta-data property on a document in the index.
*
* @param documentId ID of the document to which the meta-data should be
* applied.
* @param key Meta-data key to set.
* @param value Value of the meta-data to set.
*
* @sa documentId()
*/
public function setMetaData($documentId, $key, $value)
{
if($this->gatewaysOnly == true)
{
return;
}
$key = addslashes($key);
$value = addslashes($value);
$this->database->query("SELECT M.did ".
"FROM MetaData M ".
"WHERE M.did = $documentId AND M.key = '$key';");
if($this->database->hasResults())
{
$this->database->query("UPDATE MetaData SET value = '$value' ".
"WHERE did = $documentId AND M.key = '$key';");
}
else
{
$this->database->query("INSERT INTO MetaData (did, key, value) ".
"VALUES ($documentId, '$key', '$value');");
}
}
/**
* Unsets a meta-data property of a document in the index.
*
* @param documentId ID of the document of which the meta-data should be
* unset.
* @param key Meta-data key to unset.
*
* @sa documentId()
*/
public function unsetMetaData($documentId, $key)
{
if($this->gatewaysOnly == true)
{
return;
}
$key = addslashes($key);
$this->database->query("DELETE FROM MetaData ".
"WHERE did = $documentId AND key = '$key';");
}
/**
* @internal
*
* Splits a space seperated list of keywords into an array.
*/
private static function splitKeywords($keywords)
{
$keywordsArray = array();
$word = '';
for($i = 0; $i < strlen($keywords); $i++)
{
if(ctype_alnum($keywords{$i}) == false)
{
if($keywords{$i} == '\'' || $keywords{$i} == '-')
{
continue;
}
if($word != '')
{
$keywordsArray[] = $word;
$word = '';
}
}
else
{
$word .= $keywords{$i};
}
}
if($word != '')
{
$keywordsArray[] = $word;
$word = '';
}
return $keywordsArray;
}
private static $instance = null;
private $database;
private $gateways;
private $indexes;
private $gatewaysOnly;
private $availableInterfaces;
private $availableInterfaceNames;
private $selectedInterfaces;
private $selectedIndexes;
}
?>