Location: PHPKode > projects > Taxonomic Search Engine > itis_wrapper.php
<?php

// $Id: itis_wrapper.php,v 1.1.1.1 2005/05/19 10:31:10 rdmpage Exp $


/**
 * @brief Wrapper to talk to the Integrated Taxonomic Information System (ITIS).
 * We use the ITIS*ca server, which can returns results in XML.
 * The DTD for ITIS is available at <A HREF="http://www.cbif.gc.ca/xml/itis_our.dtd">
 * http://www.cbif.gc.ca/xml/itis_our.dtd</A>.
 *
 */


class ITISWrapper extends Wrapper {

	var $xpath;
	

	function ITISWrapper ()
	{
	
		$this->server = "www.cbif.gc.ca";

		$this->authority = "itis.usda.gov";
		$this->namespace = "tsn";
		
		$this->StartXML();
	}
	
	/**
	 * @brief Try to connect to ITIS*ca server
	 *
	 * Simply tries to open an HTTP connection to www.cbif.gc.ca.
	 
	 * @return true if service is live, false otherwise
	 */	function IsAlive ()
	{
		$query = "http://";
		$query .= $this->server;
		$result = true;
		$http = new Net_HTTP_Client();
		if ($config['proxy_name'] != '')
		{
			$http->setProxy ($config['proxy_name'], $config['proxy_port']);
		}
		$http->Connect($query, 80 ) or $result = false;
		$http->Disconnect();
		return $result;				
	}
	
	/**
	 * @brief Fix mistakes and problems in ITIS XML
	 * @private
	 *
	 * There are a few "gotchas" to
	 * deal with. Sablotron (the XSL engine used by PHP) doesn't handle URIs 
	 * begining with http, so we need to replace the line
	 *
	 * <pre><!DOCTYPE itis SYSTEM "http://www.cbif.gc.ca/xml/itis_our.dtd"></pre>
	 *
	 * with
	 *
	 *<pre><!DOCTYPE itis SYSTEM "file://dtd/itis_our.dtd"></pre>
	 *
	 * which points to a local copy of this file in the dtd directory. We need the DTD
	 * to be able to resolve entities in the ITIS XML.
	 *
	 * Some of the text comments (e.g., in a <detail> tag) may contain ampersands
	 * (&), which need to be converted to HTML entities &amp; (duh!)
	 *
	 * @param itis_xml Raw XML obtained from ITIS server
	 *
	 */
	function fixXML ($itis_xml)
	{
		// Sablot does not handle URIs begining with http, so use a file DTD
		$itis_xml = str_replace (
			"<!DOCTYPE itis SYSTEM \"http://www.cbif.gc.ca/xml/itis_our.dtd\">", 
			"<!DOCTYPE itis SYSTEM \"file://dtd/itis_our.dtd\">", 
			$itis_xml);
		
		// Comments in text may contain &, rather than &amp; (duh!)
		$itis_xml = str_replace (" & ", " &amp; ", $itis_xml);
		
		// Strip any <http...> tags as these are unbalanced (see e.g Nimbaphrynoides)
		$itis_xml = preg_replace ("/<http(.*?)>/", "", $itis_xml);
		
		return $itis_xml;

	
	}


	/**
	 * @brief Return details for a single record in ITIS, using their XML interface
	 *
	 * @param id ITIS tsn of taxon name to retrieve
	 * @return Result in my XML format
	 */
	function GetDataForID ($id)
	{
		$result = "";
		
		global $config;
		
		$itis_xml = $this->GetDataFromCache ("GetDataForID", "xml", $id);
		if ($itis_xml == "")
		{
			
			
			// Get fresh data
			$query = "http://";
			$query .= $this->server;
			$query .= "/pls/itisca/taxa_xml.record?p_tsn=";
			$query .= $id;
			$query .= "&p_type=y&p_lang=";
	
			$http = new Net_HTTP_Client();
			if ($config['proxy_name'] != '')
			{
				$http->setProxy ($config['proxy_name'], $config['proxy_port']);
			}
			if (!$http->Connect($query, 80 ))
			{
				$this->Error ("server", "Can't connect");
				$this->EndXML();
				return $this->xml;
			}
			
			$this->StartTimer();
			$status = $http->Get( $query );	
			$this->StopTimer();
	
			if( $status != 200 )
			{
				$this->Error ("GET", $http->getStatusMessage());
				$this->EndXML();
				return $this->xml;
			}
	
			$itis_xml = $http->getBody();
			$http->Disconnect();

			$this->StoreDataInCache ("GetDataForID", "xml", $id, $itis_xml);
		}
		
		$itis_xml = $this->fixXML ($itis_xml);
		

		//echo $this->xml;		
				
		// Tell XSLT processor the time used to search this service
		$params = array("timeused" => $this->time_used);				
		
		$xslt_file = "xsl/itis_taxon.xsl";
		if ($config['sabcmd'] != '')
		{	
			// Transform using external sablot processor
			$xpresult = XSLT_Buffer ($itis_xml, $xslt_file,'', $params);
			
			// If we have an error then bail out
			if (strpos ( $xpresult,"[code:" ))
			{				
				$this->Error ("XSLT", $xpresult);
				$this->EndXML();
				return $this->xml;
			}
			
			
		}
		else
		{
			// Use XSLT extension
			$xslt_processor = xslt_create();
			$xslt = join ("", file($xslt_file));
			$arg_buffer = array("/xml" => $itis_xml, "/xslt" => $xslt);
			$xp = xslt_create() or die ("Could not create XSLT processor");
			if (!($xpresult = xslt_process($xp, "arg:/xml", "arg:/xslt", NULL, $arg_buffer, $params)))
			{
				echo "An error occurred: " . xslt_error($xp) . "(error code " . xslt_errno($xp) . ")";
			}
			xslt_free($xp);
		}		

		$result = $xpresult;
						
		//echo $result;
		return $result;
		
	}
	

	
	/**
	 * @brief Search for name and return an XML document listing the names
	 *
	 * We talk to ITIS using its URL API and get XML back from the server.
	 * This is then transformed into our own format. 
	 *
	 * @param name The taxon name to search for
	 * @param qualifier Kind of search (default is exact)
	 * @param max_results The maximum number of records to return (default is 1)
	 */
	function NameSearch ($name, $qualifier = EXACT, $max_results = 1) 
	{
		$result = "";
		
		global $config;
		
		$id = nameToSafe ($name);
		
		//echo $id;
		$this->StartTimer();
		
		$itis_xml = $this->GetDataFromCache ("NameSearch", "xml", $id);
		if ($itis_xml == "")
		{
			$query = "http://";
			$query .= $this->server;
			$query .= "/pls/itisca/taxastep?king=every";
			if ($qualifier == EXACT)
			{
				// Return an exact match
				$query .= "&p_action=exactly+for";
			}
			else
			{
				$query .= "&p_action=every";
			}
			$query .= "&taxa=";
			$query .= ereg_replace (" " ,"%20" ,$name ); 
			$query .="&p_format=xml&p_ifx=&p_lang=";
	
			//echo $query;
	
			$http = new Net_HTTP_Client();
			if ($config['proxy_name'] != '')
			{
				$http->setProxy ($config['proxy_name'], $config['proxy_port']);
			}
			if (!$http->Connect($query, 80 ))
			{
				$this->Error ("server", "Can't connect");
				$this->EndXML();
				return $this->xml;
			}
			
			$status = $http->Get( $query );	
			
			if( $status != 200 )
			{
				$this->Error ("GET", $http->getStatusMessage());
				$this->EndXML();
				return $this->xml;
			}
			
			// Get the raw ITIS XML
			$itis_xml = $http->getBody();
			$http->Disconnect();
	
			// $xml has the result returned from ITIS
			//echo $xml;
			$this->StoreDataInCache ("NameSearch", "xml", $id, $itis_xml);
		}
		$this->StopTimer();
				
		$itis_xml = $this->fixXML ($itis_xml);
		
		
		// Transform ITIS XML into our format
				
		// Tell XSLT processor the time used to search this service
		$params = array("timeused" => $this->time_used);				
		
		$xslt_file = "xsl/itis.xsl";
		if ($config['sabcmd'] != '')
		{	
			// Transform using external sablot processor
			$xpresult = XSLT_Buffer ($itis_xml, $xslt_file,'', $params);
			
			// If we have an error then bail out
			if (strpos ( $xpresult,"[code:" ))
			{
				$this->Error ("XSLT", $xpresult);
				$this->EndXML();
				return $this->xml;
			}
			
		}
		else
		{
			// Use XSLT extension
			$xslt_processor = xslt_create();
			$xslt = join ("", file($xslt_file));
			$arg_buffer = array("/xml" => $itis_xml, "/xslt" => $xslt);
			$xp = xslt_create() or die ("Could not create XSLT processor");
			if (!($xpresult = xslt_process($xp, "arg:/xml", "arg:/xslt", NULL, $arg_buffer, $params)))
			{
				/*$msg = xslt_error($xp);
				$msg .= " (" . xslt_errno($xp) . ")";
				$this->Error("XSLT", $msg);
				$this->EndXML();
				return $this->xml;*/
				
				echo "An error occurred: " . xslt_error($xp) . "(error code " . xslt_errno($xp) . ")";
			}
			xslt_free($xp);
		}		

		$result = $xpresult;
						
		//echo $result;
		return $result;
		



	}

/*	function GetStandardData ($id)
	{
		// 1. Get data from server

		// 2. extract all info and store in a hash

		// 3. call a method to convert array to XML document

		// to do: store result in a cache
	}

	function GetTimeToGet()
	{
	}
*/

}

?>
Return current item: Taxonomic Search Engine