Location: PHPKode > scripts > KnowMore > knowmore/knowmore.php
<?php

/**
 * KnowMore Class - a class to screen scrape knowmore.org, and link
 * company names to the knowmore.org database.
 *
 * Copyright 2006 John Kawakami
 *
 * This class is licensed under the terms of the GNU General Public License.
 * For details, see http://gnu.org/copyleft/gpl.html
 * 
 * Version 1 - 2-Aug-2006
 */
class KnowMore
{
	var $text;
	var $data;
	function KnowMore()
	{
		if (file_exists('/tmp/knowmore'))
		{
			$this->text = file_get_contents( '/tmp/knowmore' );
			$this->getAllTitles();
			return;
		}
		$this->text = file_get_contents( 'http://www.knowmore.org/index.php?title=Special:Browse&filter=all&id=1' );
		$fh = fopen( '/tmp/knowmore', 'w' );
		fwrite( $fh, $this->text );
		fclose($fh);
		$this->getAllTitles();
	}
	/**
	 * Scrapes the data into an array.
	 */
	function &getAllTitles()
	{
		if (file_exists('/tmp/knowmore.db'))
		{
			$this->data = unserialize( file_get_contents('/tmp/knowmore.db') );
			return $this->data;
		}
		// else parse the data and save it
		$this->text = file_get_contents( '/tmp/knowmore' );
		$this->data = array();
		$count = preg_match_all( '/a href="([^>]+?)" title ="(.+?)"/im', $this->text, $matches, PREG_SET_ORDER );
		// print_r($matches);
		reset($matches);
		foreach($matches as $match)
		{
			$this->data[] = array( $match[1], $match[2] );
		}
		$fh = fopen( '/tmp/knowmore.db', 'w' );
		fwrite( $fh, serialize($this->data) );
		fclose($fh);
		return $this->data;
	}
	function asJsArray()
	{
		reset($this->data);
		foreach( $this->data as $link )
		{
			if ($result) $result .= ', ';
			$result .= '[ \''. addslashes( $link[1] ) .'\', ';
			$result .= '\''. 'http://knowmore.org' . $link[0] .'\' ]';
		}
		return '['.$result . '];';
	}
	/**
	 * Returns the URL to the first company that matches $string.
	 * If it's not an exact match, it'll try to find an inexact match.
	 */
	function select( $string )
	{
		if ($this->data[$string]) return $this->data[$string];
		
		// normalize search string
		$string = preg_replace( "/('|\\.|\\/|,|Inc.| )/", '', $string );

		foreach( $this->data as $value )
		{
			list( $url, $name ) = $value;
			if (preg_match( "/$string/i", $name))
				return $value;
			// normalize company name
			$modified = preg_replace( "/('|\\.|\\/|,| )/", '', $name );
			if (preg_match( "/$string/i", $modified ))
				return $value;
		}
	}

	/** 
	 * Scans text for strings bewteen [braces] and converts them
	 * into knowmore.org links if the company exists on knowmore.org.
	 */
	function knoWiki( $string )
	{
		$delimRE = '\\[.+?\\]';

   		preg_match_all( "/$delimRE/i", $string, $lnk );
	   	$size = sizeof($lnk[0]);
   		$i = 0;
   		while ($i < $size) {
       		$name = ltrim( rtrim( $lnk[0][$i], ']'), '[' );
			if ( $value = $this->select($name) )
			{
				list( $url, $fullname ) = $value;
				$string = str_replace( $lnk[0][$i], "<a href='http://knowmore.org$url'>$name</a>", $string );
			}
		   	$i++;
	   	}
		return $string;
	}
}
?>

Return current item: KnowMore