Location: PHPKode > projects > Aukyla Document Management System > base/Search.php
<?php
/*
     Search.php, main class for handling the search system
     Copyright (C) 2005 Arend van Beelen, Auton Rijnsburg

     This program is free software; you can redistribute it and/or modify it
     under the terms of the GNU General Public License as published by the Free
     Software Foundation; either version 2 of the License, or (at your option)
     any later version.

     This program is distributed in the hope that it will be useful, but WITHOUT
     ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
     FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
     more details.

     You should have received a copy of the GNU General Public License along
     with this program; if not, write to the Free Software Foundation, Inc.,
     59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

     For any questions, comments or whatever, you may mail me at: hide@address.com
*/

require_once('Config.php');
require_once('Database.php');
require_once('Document.php');
require_once('MIME.php');
require_once('URI.php');

/**
 * @brief Container class for search results.
 *
 * This class is used for storing search results after a search.
 *
 * @sa Search
 *
 * @since Aukyla 1.1
 */
class SearchResult
{
	/**
	 * The application (or gateway) returning the result.
	 */
	public $application;

	/**
	 * URI to the found document.
	 *
	 * This might be both a Local URI or a regular internet URL if @p fromGateway
	 * is @p true.
	 */
	public $uri;

	/**
	 * If @p true, the given @p uri is not a Local URI, but rather a regular
	 * internet URL.
	 */
	public $fromGateway;

	/**
	 * A title describing the result.
	 */
	public $title;

	/**
	 * An optional short summary of the result.
	 */
	public $summary;

	/**
	 * MIME type of the result.
	 */
	public $mimeType;

	/**
	 * Other meta-data found.
	 */
	public $metaData;

	/**
	 * The score of the result.
	 */
	public $score;

	/**
	 * Flags which possibly indicate the availability of other properties.
	 * Currently unused.
	 */
	public $flags;
}

/**
 * @brief Main class for using the Aukyla General Search System.
 *
 * This class can be used to perform searches through all Aukyla applications.
 * Also, Aukyla applications should push their indexes to this class so Aukyla
 * always has an up-to-date index of all content of all applications.
 *
 * @since Aukyla 1.1
 */
class Search
{
	/**
	 * Returns an instance of the Search class.
	 *
	 * @return An instance of the Search class.
	 */
	public static function instance()
	{
		if(self::$instance == null)
		{
			self::$instance = new Search();
		}

		return self::$instance;
	}

	/**
	 * @internal
	 *
	 * Constructor.
	 *
	 * You should not instantiate this class yourself, instead you should use the
	 * static doSearch() and index() functions.
	 */
	private function __construct()
	{
		$this->database = null;
		$this->gateways = array();
		$this->indexes = array();
		$this->gatewaysOnly = false;
		$this->availableInterfaces = array();
		$this->availableInterfaceNames = array();
		$this->selectedInterfaces = array();
		$this->selectedIndexes = array();

		$this->connectToDatabase();

		$this->loadInterfaces();
	}

	/**
	 * @internal
	 *
	 * Connects to the search database.
	 */
	private function connectToDatabase()
	{
		$type = Config::globals('searchDatabaseType');
		$server = Config::globals('searchDatabaseServer');
		$database = Config::globals('searchDatabaseName');
		$username = Config::globals('searchDatabaseUsername');
		$password = Config::globals('searchDatabasePassword');

		$this->database = Database::connection($type, $server, $database, $username, $password);

		if($this->database === false)
		{
			$this->database = null;
			$this->gatewaysOnly = true;
		}
	}

	/**
	 * @internal
	 *
	 * Searches and loads all interfaces.
	 */
	private function loadInterfaces()
	{
		$interfaces = glob(AUKYLA_DIR.'/plugins/SearchInterfaces/*.php');
		if($interfaces !== false)
		{
			foreach($interfaces as $interface)
			{
				$interfaceName = basename($interface, '.php');
				$className = "{$interfaceName}_SearchInterface";

				$include = include_once($interface);
				if($include == false ||
				   class_exists($className) == false)
				{
					trigger_error("Invalid search interface \"$className\" in plugins/SearchInterfaces");
					continue;
				}

				if($this->gatewaysOnly == true)
				{
					trigger_error("Interface \"$className\" ignored because no database is available");
					continue;
				}

				$interface = new $className();
				$this->indexes[$interfaceName] = $interface;
				$this->availableInterfaces[] = $interfaceName;
				$this->availableInterfaceNames[$interfaceName] = $interface->name();
				$this->selectedIndexes[] = $interfaceName;
				$this->selectedInterfaces[] = $interfaceName;
			}
		}

		$interfaces = glob(AUKYLA_DIR.'/plugins/SearchGatewayInterfaces/*.php');
		if($interfaces !== false)
		{
			foreach($interfaces as $interface)
			{
				$interfaceName = basename($interface, '.php');
				$className = "{$interfaceName}_SearchGatewayInterface";

				$include = include_once($interface);
				if($include == false ||
				   class_exists($className) == false)
				{
					trigger_error("Invalid search interface \"$className\" in plugins/SearchGatewayInterfaces");
					continue;
				}

				$gateway = new $className();
				$this->gateways[$interfaceName] = $gateway;
				$this->availableInterfaces[] = $interfaceName;
				$this->availableInterfaceNames[$interfaceName] = $gateway->name();
				$this->selectedInterfaces[] = $interfaceName;
			}
		}
	}

	/**
	 * Returns a list of all available interfaces.
	 *
	 * @return An array containing all available interfaces.
	 */
	public function availableInterfaces()
	{
		return $this->availableInterfaces;
	}

	/**
	 * Returns the full display names of all available interfaces.
	 *
	 * @return An array containing all the full display names of available
	 *         interfaces. The keys are the internal names of the interfaces
	 *         as reported by availableInterfaces().
	 */
	public function interfaceNames()
	{
		return $this->availableInterfaceNames;
	}

	/**
	 * Sets which interfaces should be used for searching. By default,
	 * all available search interfaces are used.
	 *
	 * @param interfaces An array of strings containing the names of the
	 *                   interfaces which should be used when searching.
	 */
	public function setInterfaces($interfaces)
	{
		$this->selectedInterfaces = array();
		foreach($interfaces as $interface)
		{
			if(in_array($interface, $this->availableInterfaces))
			{
				$this->selectedInterfaces[] = $interface;

				if(isset($this->indexes[$interface]))
				{
					$this->selectedIndexes[] = $interface;
				}
			}
		}
	}

	/**
	 * Performs a search.
	 *
	 * @param keywords   A list of space seperated keywords to search for.
	 * @param maxResults The maximum number of results to return per
	 *                   selected interface.
	 * @param offset     The offset of the results to return per interface.
	 *
	 * @return An array of arrays with SearchResult objects. Every key in the
	 *         first array corresponds to the name of the interface which returned
	 *         the results in the value array. THe value arrays are made up of
	 *         SearchResult objects.
	 */
	public function doSearch($keywords, $maxResults = 10, $offset = 0)
	{
		$keywords = self::splitKeywords($keywords);

		$results = array();
		foreach($this->gateways as $gatewayName => $gateway)
		{
			if(in_array($gatewayName, $this->selectedInterfaces))
			{
				$results[$gatewayName] = $gateway->search($keywords, $maxResults, $offset);
			}
		}

		if($this->gatewaysOnly == true)
		{
			return $results;
		}

		foreach($this->selectedIndexes as $application)
		{
			$results[$application] = array();

			$firstKeyword  = strtolower($keywords[0]);
			$otherKeywords = array_slice($keywords, 1);

			$application = addslashes($application);

			$query = "SELECT uri, SUM(weight) AS weight ".
			         "FROM (SELECT D.uri AS uri, C.weight AS weight ".
			         "      FROM Documents D, ContainsWord C, Words W ".
			         "      WHERE D.app = '$application' AND D.did = C.did AND C.wid = W.wid AND W.word LIKE '%{$firstKeyword}%'";
			foreach($otherKeywords as $keyword)
			{
				$query .=" INTERSECT ALL ".
				          "SELECT D.uri AS uri, C.weight AS weight ".
				          "FROM Documents D, ContainsWord C, Words W ".
				          "WHERE D.app = '$application' AND D.did = C.did AND C.wid = W.wid AND W.word LIKE '%".strtolower($keyword)."%'";
			}
			$query .= ") AS results ".
			          "GROUP BY uri ".
			          "ORDER BY weight DESC ".
			          "LIMIT $maxResults ".
			          "OFFSET $offset;";

			$this->database->query($query);

			for($i = 0; $i < $this->database->numberOfRows(); $i++)
			{
				$row = $this->database->resultArray();

				$result = $results[$application][] = new SearchResult();

				$result->application = $application;
				$result->uri         = $row[0];
				$result->fromGateway = false;
				$result->score       = $row[1];
				$result->title       = URI::metaData($result->uri, 'name');
				$result->summary     = URI::metaData($result->uri, 'comments');
				$result->mimeType    = MIME::type($result->uri);
			}
		}

		return $results;
	}

	/**
	 * Rebuilds the database.
	 *
	 * Cleans up the entire database and asks all applications to re-index
	 * their content. This can be quite a costly operation and should
	 * generally not be done in a regular page load.
	 *
	 * You can use the base/RebuildSearchDatabase.php script to rebuild the
	 * database from the command line.
	 */
	public function rebuildDatabase()
	{
		if($this->gatewaysOnly == true)
		{
			return;
		}

		$this->createNewDatabase();

		foreach($this->indexes as $index)
		{
			$index->indexAll();
		}
	}

	/**
	 * Removes all tables and creates a new clean database. This will discard your
	 * entire search index!
	 */
	public function createNewDatabase()
	{
		if($this->gatewaysOnly == true)
		{
			return;
		}

		$this->database->query('DROP TABLE MetaData;');
		$this->database->query('DROP TABLE ContainsWord;');
		$this->database->query('DROP TABLE Words;');
		$this->database->query('DROP TABLE Documents;');

		$this->database->query('CREATE TABLE Documents'.
		                       '('.
		                       ' did SERIAL NOT NULL,'.
		                       ' app VARCHAR(255) NOT NULL,'.
		                       ' uri VARCHAR(255) NOT NULL,'.
		                       ' PRIMARY KEY(did),'.
		                       ' UNIQUE(app, uri)'.
		                       ');');
		$this->database->query('CREATE TABLE Words'.
		                       '('.
		                       ' wid SERIAL NOT NULL,'.
		                       ' word VARCHAR(50) NOT NULL,'.
		                       ' PRIMARY KEY(wid),'.
		                       ' UNIQUE(word)'.
		                       ');');
		$this->database->query('CREATE TABLE ContainsWord'.
		                       '('.
		                       ' did INTEGER NOT NULL,'.
		                       ' wid INTEGER NOT NULL,'.
		                       ' weight INTEGER NOT NULL DEFAULT 1,'.
		                       ' PRIMARY KEY(did, wid),'.
		                       ' FOREIGN KEY(did) REFERENCES Documents,'.
		                       ' FOREIGN KEY(wid) REFERENCES Words'.
		                       ');');
		$this->database->query('CREATE TABLE MetaData'.
		                       '('.
		                       ' did SERIAL NOT NULL,'.
		                       ' key VARCHAR(50) NOT NULL,'.
		                       ' value VARCHAR(255) NOT NULL,'.
		                       ' PRIMARY KEY(did, key),'.
		                       ' FOREIGN KEY(did) REFERENCES Documents'.
		                       ');');
	}

	/**
	 * Adds a document to the search index and indexes it.
	 *
	 * This function automatically calls Document::index() for you.
	 *
	 * @param application The name of the application the document belongs
	 *                    to. By supplying this argument, the search engine
	 *                    can sort documents from different applications.
	 * @param uri         URI of the document to be added to the index.
	 *
	 * @return The ID of the added document, or @p false on error. This ID
	 *         can be used for subsequent indexText() calls.
	 *
	 * @sa indexText(), removeDocument()
	 */
	public function addDocument($application, $uri)
	{
		if($this->gatewaysOnly == true)
		{
			return false;
		}

		$application = addslashes($application);
		$uri         = addslashes($uri);

		$this->database->query("SELECT D.did ".
		                       "FROM Documents D ".
		                       "WHERE D.app = '$application' AND D.uri = '$uri';");
		if($this->database->hasResults())
		{
			$did = $this->database->result(0, 0);
		}
		else
		{
			$this->database->query("SELECT nextval('Documents_did_seq') as key;");
			$did = $this->database->result(0, 0);
			$this->database->query("INSERT INTO Documents (did, app, uri) ".
			                       "VALUES ($did, '$application', '$uri');");
		}

		Document::index($did, $uri);

		return $did;
	}

	/**
	 * Returns the ID of a document in the index.
	 *
	 * @param application The name of the application the document belongs
	 *                    to. This should have the same value as when the
	 *                    document was added.
	 * @param uri         URI of the document in the index.
	 *
	 * @return The ID of the document, or @p false on error.
	 */
	public function documentId($application, $uri)
	{
		if($this->gatewaysOnly == true)
		{
			return false;
		}

		$application = addslashes($application);
		$uri         = addslashes($uri);

		$this->database->query("SELECT D.did ".
		                       "FROM Documents D ".
		                       "WHERE D.app = '$application' AND D.uri = '$uri';");
		if($this->database->hasResults())
		{
			return $this->database->result(0, 0);
		}
		else
		{
			return false;
		}
	}

	/**
	 * Moves a document from one location to another in the search index.
	 *
	 * @param application The name of the application the document belongs
	 *                    to. This should have the same value as when the
	 *                    document was added.
	 * @param sourceUri   URI of the document to be moved in the index.
	 * @param destUri     The new URI of the document.
	 *
	 * @sa addDocument()
	 */
	public function moveDocument($application, $sourceUri, $destUri)
	{
		if($this->gatewaysOnly == true)
		{
			return;
		}

		$application = addslashes($application);
		$sourceUri   = addslashes($sourceUri);
		$destUri     = addslashes($destUri);

		$this->database->query("UPDATE Documents SET uri = '$destUri' ".
		                       "WHERE app = '$application' AND uri = '$sourceUri';");
	}

	/**
	 * Removes a document and all its associations from the search index.
	 *
	 * @param application The name of the application the document belonged
	 *                    to. This should have the same value as when the
	 *                    document was added.
	 * @param uri         URI of the document to be removed from the index.
	 *
	 * @sa addDocument()
	 */
	public function removeDocument($application, $uri)
	{
		if($this->gatewaysOnly == true)
		{
			return;
		}

		$application = addslashes($application);
		$uri         = addslashes($uri);

		$this->database->query("SELECT D.did ".
		                       "FROM Documents D ".
		                       "WHERE D.app = '$application' AND D.uri = '$uri';");
		if($this->database->hasResults())
		{
			$did = $this->database->result(0, 0);
			$this->database->query("DELETE FROM MetaData ".
			                       "WHERE did = $did;");
			$this->database->query("DELETE FROM ContainsWord ".
			                       "WHERE did = $did;");
			$this->database->query("DELETE FROM Documents ".
			                       "WHERE did = $did;");
		}
	}

	/**
	 * Adds the words from a given plain text block to the search index and
	 * associates them with the document with the given ID. The associations
	 * are given a weight as specified.
	 *
	 * As a rule of thumb, plain words which occur in the content of a
	 * document are given a weight of 1, whereas words occuring in the title
	 * are given a weight of 50. If a word and a document are associated
	 * with each other more than once, the weight of both associations is
	 * added and a single association remains.
	 *
	 * @param documentId ID of the document the text block should be
	 *                   associated with.
	 * @param text       The plain text containing the words to associate
	 *                   with the document.
	 * @param weight     The weight of the association.
	 *
	 * @sa documentId()
	 */
	public function indexText($documentId, $text, $weight)
	{
		if($this->gatewaysOnly == true)
		{
			return;
		}

		$keywords = self::splitKeywords($text);
		foreach($keywords as $keyword)
		{
			$this->indexWord($documentId, $keyword, $weight);
		}
	}

	/**
	 * @internal
	 *
	 * Indexes a single word containing only alphanumeric characters.
	 */
	private function indexWord($documentId, $word, $weight)
	{
		$word = strtolower($word);

		$this->database->query("SELECT W.wid ".
		                       "FROM Words W ".
		                       "WHERE W.word = '$word';");
		if($this->database->hasResults())
		{
			$wid = $this->database->result(0, 0);
		}
		else
		{
			$this->database->query("SELECT nextval('Words_wid_seq') as key");
			$wid = $this->database->result(0, 0);
			$this->database->query("INSERT INTO Words (wid, word) ".
			                       "VALUES ($wid, '$word');");
		}

		$this->database->query("SELECT C.weight ".
		                       "FROM ContainsWord C ".
		                       "WHERE C.did = $documentId AND C.wid = $wid;");
		if($this->database->hasResults())
		{
			$newWeight = $weight + $this->database->result(0, 0);
			$this->database->query("UPDATE ContainsWord SET weight = $newWeight ".
			                       "WHERE did = $documentId AND wid = $wid;");
		}
		else
		{
			$this->database->query("INSERT INTO ContainsWord (did, wid, weight) ".
			                       "VALUES ($documentId, $wid, $weight);");
		}
	}

	/**
	 * Sets a meta-data property on a document in the index.
	 *
	 * @param documentId ID of the document to which the meta-data should be
	 *                   applied.
	 * @param key        Meta-data key to set.
	 * @param value      Value of the meta-data to set.
	 *
	 * @sa documentId()
	 */
	public function setMetaData($documentId, $key, $value)
	{
		if($this->gatewaysOnly == true)
		{
			return;
		}

		$key   = addslashes($key);
		$value = addslashes($value);

		$this->database->query("SELECT M.did ".
		                       "FROM MetaData M ".
		                       "WHERE M.did = $documentId AND M.key = '$key';");
		if($this->database->hasResults())
		{
			$this->database->query("UPDATE MetaData SET value = '$value' ".
			                       "WHERE did = $documentId AND M.key = '$key';");
		}
		else
		{
			$this->database->query("INSERT INTO MetaData (did, key, value) ".
			                       "VALUES ($documentId, '$key', '$value');");
		}
	}

	/**
	 * Unsets a meta-data property of a document in the index.
	 *
	 * @param documentId ID of the document of which the meta-data should be
	 *                   unset.
	 * @param key        Meta-data key to unset.
	 *
	 * @sa documentId()
	 */
	public function unsetMetaData($documentId, $key)
	{
		if($this->gatewaysOnly == true)
		{
			return;
		}

		$key = addslashes($key);

		$this->database->query("DELETE FROM MetaData ".
		                       "WHERE did = $documentId AND key = '$key';");
	}

	/**
	 * @internal
	 *
	 * Splits a space seperated list of keywords into an array.
	 */
	private static function splitKeywords($keywords)
	{
		$keywordsArray = array();
		$word = '';
		for($i = 0; $i < strlen($keywords); $i++)
		{
			if(ctype_alnum($keywords{$i}) == false)
			{
				if($keywords{$i} == '\'' || $keywords{$i} == '-')
				{
					continue;
				}
				if($word != '')
				{
					$keywordsArray[] = $word;
					$word = '';
				}
			}
			else
			{
				$word .= $keywords{$i};
			}
		}
		if($word != '')
		{
			$keywordsArray[] = $word;
			$word = '';
		}

		return $keywordsArray;
	}

	private static $instance = null;
	private $database;
	private $gateways;
	private $indexes;
	private $gatewaysOnly;
	private $availableInterfaces;
	private $availableInterfaceNames;
	private $selectedInterfaces;
	private $selectedIndexes;
}

?>
Return current item: Aukyla Document Management System