<?php
/*
- author: Skakunov Alexander [hide@address.com]
- date: 16.02.2008
- what for: generates phonetic combinations of a given word
example:
$ph = new Phonetic();
$variants = $ph->generate( array('microsoft', 'google') );
//you can edit the replacement rules to customize the replcaments
*/
class Phonetic
{
//replacement rules
protected $arrRules = array(
array("1", "l"),
array("l", "1"),
array("1", "I"),
array("I", "1"),
array("6", "b"),
array("b", "6"),
array("0", "O"),
array("O", "0"),
array("0", "o"),
array("o", "0"),
array("s", "$"),
array("$", "s"),
array("y", "i"),
array("y", "j"),
array("i", "y"),
array("c", "ck"),
array("k", "ck"),
array("f", "ph"),
array("ph", "f"),
array("j", "dzh"),
array("dzh", "j"),
array("h", "kh"),
array("kh", "h"),
array("d", "dd"),
array("dd", "d"),
array("ll", "l"),
array("l", "ll"),
array("n", "nn"),
array("nn", "n"),
array("m", "mm"),
array("m", "m"),
array("s", "ss"),
array("ss", "s"),
array("t", "tt"),
array("tt", "t"),
array("o", "ou"),
array("x", "ks"),
array("x", "kz"),
array("x", "gz"),
array("x", "gs"),
array("ks", "x"),
array("kz", "x"),
array("gz", "x"),
array("gs", "x"),
array("qu", "kw"),
array("qu", "kv"),
array("q", "c"),
array("kw", "qu"),
array("qu", "kw"),
array("kw", "qu"),
array("ee", "i"),
array("ea", "i"),
array("ts", "c"),
array("ew", "ju"),
array("w", "v"),
array("v", "w"),
array("u", "yu"),
array("u", "oo"),
array("oo", "u"),
array("u", "ju"),
array("ju", "u"),
// array("ei", "ay"),
// array("ay", "ei"),
// array("ay", "ey"),
array("ng", "n")
);
protected function applyRule($num, $arrGens, $arrRules, $src, $offset=0)
{
return substr($src, 0, $arrGens[$num]['pos']+$offset) . $arrRules[$arrGens[$num]['rule']][1] . substr($src, $arrGens[$num]['pos']+$offset+strlen($arrRules[$arrGens[$num]['rule']][0]));
}
//return array of generated data
// $arrTexts - array of incoming lines
// $complexAlgorithm - should replacement rules be combined. If true - more results would be generated.
public function generate( $arrTexts, $complexAlgorithm = false )
{
$arrVariants = array();
foreach($arrTexts as $id=>$text)
{
$arrGens = array(); //array for generations
$g=0;
$text = strtolower(trim($text));
//find what every letter can be replaced by and store it in arrGens array
for($i=0; $i<strlen($text); $i++)
{
for($key=0; $key<sizeof($this->arrRules); $key++)
{
if($this->arrRules[$key][0]==substr($text, $i, strlen($this->arrRules[$key][0])))
{
$arrGens[$g]['z'] = 2; //should be 2, this field is used for combining
$arrGens[$g]['pos'] = $i;
$arrGens[$g]['rule'] = $key;
$g++;
}
}
}
$N = sizeof($arrGens);
$log = str_repeat("0", $N);
$arrVariants[$id] = array();
if ( !$complexAlgorithm ) //use simple algorithm
{
for($i=0; $i<$N; $i++)
$arrVariants[$id][] = $this->applyRule($i, $arrGens, $this->arrRules, $text);
}
else //use COMBINE, more complex algorithm
for($f=0; $f<pow(2,$N)-1; $f++)
{
$i=0; // bit index
$str = $text;
while ($arrGens[$i]['z']==1)
{
$arrGens[$i]['z']=0; // modeling of next digit transfer while adding
$log[$i]=0;
$i++;
}
$arrGens[$i]['z']=1;
$log[$i]=1;
$bits = substr_count($log, "1");
$offset=0;
for($t=0; $t<$N; $t++)
if ($arrGens[$t]['z']==1) //if 1 - apply rule
{
$len = strlen($this->arrRules[$arrGens[$t]['rule']][1]);
$str = $this->applyRule($t, $arrGens, $this->arrRules, $str, $offset);
if ($len>1 && $bits>1)
{
$offset += $len-1;
}
}
$arrVariants[$id][] = $str;
}
$arrVariants[$id] = array_unique($arrVariants[$id]);
}
return $arrVariants;
}
}