<?php
/*
*********************
* Similarity Class *
* Version 1.0 *
* March 17, 2011 *
* *
* by Razvan *
*********************
This is the 5th class I've ever made.
The class compares unlimited number of strings
and returns similarities (with the highest word count) if any was found.
Similarities = substrings that appear in every string
If you have any suggestions or find any bugs please contact me.
=========================================================================================
This is how it works:
$check = new Similarity;
First you add the strings using the add() function
$array = array("one two three four five six",
"two three four five six one seven",
"three four five two one six");
$test = $check -> add($array);
Then, you compare the strings with the compare() function
$test = $check -> compare();
And in the end, you have a few options:
If you want the similarities to return as a string, you should use get_string($delimiter);
The default delimiter is "|" and it's useful when there are multiple similarities (same word count) found.
$test = $check -> get_string();
If you want the similarities to return as an array, you should use get_array();
$test = $check -> get_array();
If you want a list with the strings and the similarities, you should use html();
$test = $check -> html();
It will return something like this:
<h2>The strings:</h2>
<ul>
<li>one two three four five six</li>
<li>two three four five six one seven</li>
<li>three four five two one six</li>
</ul>
<h2>The similarities:</h2>
<ul>
<li>three four five</li>
</ul>
=========================================================================================
*/
class Similarity{
private $newarray = array(); //the clean strings will be put here
private $err_messages = ""; //error messages will be put here
private $errors = array("0");
private $founded_similarities = array(); //the similarities will be put here
private function show_error($err, $show=TRUE, $echo=FALSE){
/*
== this function outputs error messages
*/
$errors = array("0" => "You need to supply an array.",
"1" => "Less than two valid strings found in the array!",
"2" => "No similarities were found!",
"3" => "Add strings first! Use add(somearray) somearray = array(string1, string2, ...)",
"4" => "The string is NULL. No similarities were found!",
);
if (array_key_exists($err,$errors)){
$show = $errors[$err];
}else{
$show = "Unknown error."; //Unknown error.
}
$err = '<div style = "color: #000000;font-weight:bold; background-color: #EBEBEB;font-family:Arial; font-size:10px; border: 4px solid;margin: 10px 0px; padding:10px;"><u>Similarity Class</u>: '.$show."</div>\n";
if($echo){return $err;}
if($show){$this -> err_messages .= $err;}
}
public function add($array=""){
if(is_array($array) && !empty($array)){
foreach($array as $string){
//check if the elements from the array are valid
$newstring = strtolower(trim($string));
$newstring = preg_replace("/ {2,}/", " ", $newstring);
if($newstring !=""){ $new_array[] = $newstring; } //creating a new and clean array
}
$counted = count($new_array);
if($counted > 1){
foreach($new_array as $key => $attr){
//we do this to make sure the keys are consecutives starting with 0
$this -> newarray[] = $attr;
}
} else {
//less than 2 elements in the array. we can't compare.
$this -> show_error("1", TRUE) ;
$this -> errors[] = "1";
}
} else {
$this -> show_error("0", TRUE) ;
$this -> errors[] = "1";
}
}
public function compare(){
if(!in_array("1", $this -> errors) && !empty($this -> newarray)){
//explode the 1st string in the array
$similarity = array();
$check = array();
$array = $this -> newarray;
$str1 = explode(" ", $array["0"]);
$count_str1 = count($str1);
unset($array["0"]);
for($i=1;$i<=$count_str1;$i++){
//$i = number of consecutive words (from the 1st string) we will search ... 1 TO (wordcount)
foreach ($array as $stringX){
//check every string added
for($elem=0;$elem<$count_str1;$elem++){
//we begin the "search string" with word number (elem + 1)
$words = 0;
$search_string = "";
if(($elem + $i) <= $count_str1){
while($words < $i){
//we add $i words to the search
$key = $elem + $words;
$search_string .= $str1[$key]." "; //this will be the searched string
$words = $words + 1;
}
$search_string = trim($search_string);
$pos = stristr($stringX, $search_string);
//search if these words (search_string) exist in the other string
//if they do, add them to the list of similarities
if($pos){
$similarity[$i][$elem] = $search_string;
$check[$i][$elem][] = "0";
} else {
$check[$i][$elem][] = "1"; //the string we've searched not present in every string
}
}
}
}
if(is_array($check[$i])){
foreach($check[$i] as $key => $element){
if(in_array("1", $element) && isset($similarity[$i][$key])){
//if the string we've searched was not present in every string we remove it from the array
unset ($similarity[$i][$key]);
}
}
}
}
//clear empty arrays
$similarity = array_filter($similarity);
//return only the strings with the highest word count
$similar = end($similarity);
//remove duplicates
if(empty($similar)){
$this -> show_error("2", TRUE) ;
} else {
$similar = array_unique($similar);
$this -> founded_similarities = $similar;
}
} else {
return $this -> err_messages ;
}
}
public function get_string($delimiter="|"){
//returns the similarities as a string. you can also set a delimiter useful if there are multiple similarities
if(!empty($this -> newarray)){
if(!empty($this -> founded_similarities)){
return implode($delimiter, $this -> founded_similarities);
} else {
return $this -> err_messages ;
}
} else {
$this -> show_error("3", TRUE) ;
return $this -> err_messages ;
}
}
public function get_array(){
if(!empty($this -> newarray)){
if(!empty($this -> founded_similarities)){
return $this -> founded_similarities;
} else {
//no similarities found
return FALSE;
}
} else {
//not enough strings
$this -> show_error("3", TRUE) ;
return $this -> err_messages ;
}
}
public function html(){
if(count($this -> newarray) >1){
$html ="<h2>The strings:</h2>\n<ul>\n\t<li>";
$html.=implode("</li>\n\t<li>", $this ->newarray);
$html.="</li>\n</ul>\n";
$html.="<h2>The similarities:</h2>\n";
if(!empty($this -> founded_similarities)){
$html.="<ul>\n\t<li>";
$html.=implode("</li>\n\t<li>", $this -> founded_similarities);
$html.="</li>\n</ul>\n";
} else {
//no similarities found
$html.= $this -> err_messages;
}
} else {
//not enough strings
$html = $this -> err_messages;
}
return $html;
}
}
?>