Location: PHPKode > scripts > Similarities Finder > similarity.class.php
<?php 
/*
*********************
* Similarity Class  *
*    Version 1.0    *
*   March 17, 2011  *
*                   *
*     by Razvan     *
*********************

This is the 5th class I've ever made.

The class compares unlimited number of strings
and returns similarities (with the highest word count) if any was found. 
Similarities = substrings that appear in every string

If you have any suggestions or find any bugs please contact me.

=========================================================================================
This is how it works:

	$check	= new Similarity;

First you add the strings using the add() function
	
	$array	= array("one two three four five six",
					"two three four five six one seven",
					"three four five two one six");
			
	$test	= $check -> add($array);

Then, you compare the strings with the compare() function

	$test	= $check -> compare();

And in the end, you have a few options:
	
	If you want the similarities to return as a string, you should use get_string($delimiter);  
	The default delimiter is "|" and it's useful when there are multiple similarities (same word count) found.
		$test	= $check -> get_string();
	
	If you want the similarities to return as an array, you should use get_array();  
		$test	= $check -> get_array();
		
	If you want	a list with the strings and the similarities, you should use html();
		$test	= $check -> html();
		
	It will return something like this:
		<h2>The strings:</h2>
		<ul>
			<li>one two three four five six</li>
			<li>two three four five six one seven</li>
			<li>three four five two one six</li>
		</ul>
		<h2>The similarities:</h2>
		<ul>
			<li>three four five</li>
		</ul>
=========================================================================================
*/
class Similarity{


private $newarray				= array();	//the clean strings will be put here
private $err_messages			= "";		//error messages will be put here
private $errors					= array("0");
private $founded_similarities	= array();	//the similarities will be put here




	private function show_error($err, $show=TRUE, $echo=FALSE){
	/*
	== this function outputs error messages
	*/
	
		$errors = array("0" => "You need to supply an array.",
						"1" => "Less than two valid strings found in the array!",
						"2" => "No similarities were found!",
						"3" => "Add strings first! Use add(somearray) somearray = array(string1, string2, ...)",
						"4" => "The string is NULL. No similarities were found!",
						
						); 

		if (array_key_exists($err,$errors)){
			$show = $errors[$err];
			}else{
			$show = "Unknown error."; //Unknown error.
		}
				
		$err = '<div style = "color: #000000;font-weight:bold; background-color: #EBEBEB;font-family:Arial; font-size:10px; border: 4px solid;margin: 10px 0px; padding:10px;"><u>Similarity Class</u>: '.$show."</div>\n";

		if($echo){return $err;}
		if($show){$this -> err_messages .= $err;}
	}




	public function add($array=""){

		if(is_array($array) && !empty($array)){
		
				foreach($array as $string){
					//check if the elements from the array are valid
					$newstring  = strtolower(trim($string));
					$newstring	= preg_replace("/ {2,}/", " ", $newstring);
			
					if($newstring !=""){ $new_array[] = $newstring; } //creating a new and clean array
				}
				
				$counted	= count($new_array);
				
				if($counted > 1){
						foreach($new_array as $key => $attr){
						//we do this to make sure the keys are consecutives starting with 0
							$this -> newarray[] = $attr;
						}
											
					} else {
					//less than 2 elements in the array. we can't compare.
						$this -> show_error("1", TRUE) ;
						$this -> errors[] = "1";
				}
				
						
			} else {
				$this -> show_error("0", TRUE) ;
				$this -> errors[] = "1";
		}
		
				
		
	}

	public function compare(){
		
	if(!in_array("1", $this -> errors) && !empty($this -> newarray)){
		//explode the 1st string in the array
			$similarity	= array();
			$check		= array();
			$array		= $this -> newarray;
			$str1		= explode(" ", $array["0"]); 
			$count_str1	= count($str1);
			unset($array["0"]); 
			
			
			for($i=1;$i<=$count_str1;$i++){
			 //$i = number of consecutive words (from the 1st string) we will search ... 1 TO (wordcount)
			 
				foreach ($array as $stringX){
				//check every string added
				
					for($elem=0;$elem<$count_str1;$elem++){
					//we begin the "search string" with word number (elem + 1)
					   $words			= 0;
					   $search_string	= "";
					   
					   if(($elem + $i) <= $count_str1){
					   
							 while($words < $i){
							 //we add $i words to the search
								$key			= $elem + $words;
								$search_string .= $str1[$key]." "; //this will be the searched string 
								$words			= $words + 1;
								
							 }
							 						  
								
							$search_string	= trim($search_string);
							$pos			= stristr($stringX, $search_string);
							//search if these words (search_string) exist in the other string
							//if they do, add them to the list of similarities
							if($pos){
									$similarity[$i][$elem] = $search_string;
									$check[$i][$elem][] = "0";
									
								} else {
									
									$check[$i][$elem][] = "1"; //the string we've searched not present in every string
									
							}
					   }
		   
					
					}
				}
										
					if(is_array($check[$i])){
							foreach($check[$i] as $key => $element){
								if(in_array("1", $element) && isset($similarity[$i][$key])){
								//if the string we've searched was not present in every string we remove it from the array
									unset ($similarity[$i][$key]);
								}
							}
					}
			}
					
					
			//clear empty arrays
			$similarity = array_filter($similarity);
			

			//return only the strings with the highest word count
			$similar = end($similarity); 
			//remove duplicates
			if(empty($similar)){
					$this -> show_error("2", TRUE) ;
				} else {
					$similar = array_unique($similar);
					$this -> founded_similarities = $similar;
			}

		} else {
			return $this -> err_messages ;
	}		
	

	
	}
	
	
	

	public function get_string($delimiter="|"){
	//returns the similarities as a string. you can also set a delimiter useful if there are multiple similarities 
		if(!empty($this -> newarray)){		
				if(!empty($this -> founded_similarities)){
						return implode($delimiter, $this -> founded_similarities);
					} else {
						
						return $this -> err_messages ;
				}
			} else {
			$this -> show_error("3", TRUE) ;
			return $this -> err_messages ;
		}
	
	}
	
	
	
	
	public function get_array(){

		if(!empty($this -> newarray)){
				if(!empty($this -> founded_similarities)){
						return $this -> founded_similarities;
					} else {
						//no similarities found
						return FALSE;
				}
			} else {
				//not enough strings
				$this -> show_error("3", TRUE) ;
				return $this -> err_messages ;
		}
	}
	
	
	
	public function html(){
		if(count($this -> newarray) >1){
				$html ="<h2>The strings:</h2>\n<ul>\n\t<li>";
				$html.=implode("</li>\n\t<li>", $this ->newarray);
				$html.="</li>\n</ul>\n";
				$html.="<h2>The similarities:</h2>\n";	
				if(!empty($this -> founded_similarities)){
						$html.="<ul>\n\t<li>";
						$html.=implode("</li>\n\t<li>", $this -> founded_similarities);
						$html.="</li>\n</ul>\n";
				
					} else {
						//no similarities found
						$html.= $this -> err_messages;
				}
				
			} else {
			//not enough strings
			$html = $this -> err_messages;
		}
		return $html;
	}
	
}

?>
Return current item: Similarities Finder