Location: PHPKode > scripts > Moby Thesaurus > MobyThesaurus.php
<?php
/**
 * @author Brent Rossen
 * Website: http://brent.lizandbrent.com
 */

/**
 * Class for accessing the moby thesaurus and parts of speech.
 * This class is capable of retrieving a list of synonyms for a word, or parts of speech for a word.
 *
 */
class MobyThesaurus {
	
	/**
	 * Gets the word in the thesaurus that is most similar to the passed word. Uses extension php_stem for stemming if it is available (highly recommended).
	 *
	 * @param string $word
	 * @return array The array of synonyms, array position 0 is the matched word 
	 */
	public static function GetSynonyms($word) {
		//get the thesaurus
		$thesaurus_array = file ( dirname ( __FILE__ ) . "/thesaurus_files/moby_thesaurus.txt" );
		
		//get the stemmed word, requires the PECL extension php_stem
		if (function_exists ( "stem" )) {
			$stemmed_word = stem ( $word );
		} else {
			//can't get the stemmed word
			$stemmed_word = $word;
		}
		
		//the array of potential entries
		$potential_entries = array ();
		
		//loop through the thesaurus entries
		foreach ( $thesaurus_array as $entry ) {
			if (MobyThesaurus::StartsWith ( $stemmed_word, $entry )) {
				$entry_arr = split ( ",", $entry );
				if ($entry_arr [0] == $word) {
					return $entry_arr;
				} else {
					array_push ( $potential_entries, $entry_arr );
				}
			}
		}
		
		//anything above 10 is way too far away
		$lowest_distance = 10;
		foreach ( $potential_entries as $entry ) {
			$distance = levenshtein ( $entry [0], $word );
			//keep only the word that is closest to the original word
			if ($distance < $lowest_distance) {
				$lowest_distance = $distance;
				$best_entry = $entry;
			}
		}
		
		if (isset ( $best_entry )) {
			return $best_entry;
		} else {
			return array ();
		}
	}
	
	/**
	 * Gets the PartsOfSpeech for the entries that start with the given word.
	 *
	 * @param string $word
	 * @return array of parts of speech
	 */
	public static function GetPartsOfSpeech($word) {
		//get the thesaurus
		$pos_array = file ( dirname ( __FILE__ ) . "/thesaurus_files/moby_part_of_speech.txt" );
		
		$poss = array ();
		foreach ( $pos_array as $entry ) {
			if (MobyThesaurus::StartsWith ( $word, $entry )) {
				//split the word from it's parts of speech
				$line_arr = split ( "[\\]", $entry );
				$poss[$line_arr[0]] = array();
				$line_arr [1] = trim ( $line_arr [1] );
				//go through each part of speech item
				for($i = 0; $i < strlen ( $line_arr [1] ); $i ++) {
					$symbol = trim ( $line_arr [1] [$i] );
					array_push ( $poss [$line_arr[0]], $symbol );
				}
			}
		}
		
		return $poss;
	}
	
	/**
	 * Discovers if haystack starts with needle
	 *
	 * @param string $needle
	 * @param string $haystack
	 * @return boolean
	 */
	private static function StartsWith($needle, $haystack) {
		return (substr($haystack,0,strlen($needle)) == $needle);
	}
}

?>
Return current item: Moby Thesaurus