Location: PHPKode > scripts > Phonetic > phonetic/phonetic.class.php
<?php
/*
- author: Skakunov Alexander [hide@address.com]
- date: 16.02.2008
- what for: generates phonetic combinations of a given word

example:

$ph = new Phonetic();
$variants = $ph->generate( array('microsoft', 'google') );

//you can edit the replacement rules to customize the replcaments

*/

class Phonetic
{
  //replacement rules
  protected $arrRules = array(
    array("1", "l"),
    array("l", "1"),
    array("1", "I"),
    array("I", "1"),
    
    array("6", "b"),
    array("b", "6"),
    
    array("0", "O"),
    array("O", "0"),
    array("0", "o"),
    array("o", "0"),
    
    array("s", "$"),
    array("$", "s"),
    
    array("y", "i"),
    array("y", "j"),
    array("i", "y"),
    array("c", "ck"),
    array("k", "ck"),
    array("f", "ph"),
    array("ph", "f"),
    array("j", "dzh"),
    array("dzh", "j"),
    array("h", "kh"),
    array("kh", "h"),
    array("d", "dd"),
    array("dd", "d"),
    array("ll", "l"),
    array("l", "ll"),
    array("n", "nn"),
    array("nn", "n"),
    array("m", "mm"),
    array("m", "m"),
    array("s", "ss"),
    array("ss", "s"),
    array("t", "tt"),
    array("tt", "t"),
    array("o", "ou"),
    array("x", "ks"),
    array("x", "kz"),
    array("x", "gz"),
    array("x", "gs"),
    array("ks", "x"),
    array("kz", "x"),
    array("gz", "x"),
    array("gs", "x"),
    array("qu", "kw"),
    array("qu", "kv"),
    array("q", "c"),
    array("kw", "qu"),
    array("qu", "kw"),
    array("kw", "qu"),
    array("ee", "i"),
    array("ea", "i"),
    array("ts", "c"),
    array("ew", "ju"),
    array("w", "v"),
    array("v", "w"),
    array("u", "yu"),
    array("u", "oo"),
    array("oo", "u"),
    array("u", "ju"),
    array("ju", "u"),
//    array("ei", "ay"),
//    array("ay", "ei"),
//    array("ay", "ey"),
    array("ng", "n")
  );
  
  
  protected function applyRule($num, $arrGens, $arrRules, $src, $offset=0)
  {
    return substr($src, 0, $arrGens[$num]['pos']+$offset) . $arrRules[$arrGens[$num]['rule']][1] . substr($src, $arrGens[$num]['pos']+$offset+strlen($arrRules[$arrGens[$num]['rule']][0]));
  }


  //return array of generated data
  // $arrTexts - array of incoming lines
  // $complexAlgorithm - should replacement rules be combined. If true - more results would be generated.
  public function generate( $arrTexts, $complexAlgorithm = false )
  {
    $arrVariants = array();
    foreach($arrTexts as $id=>$text)
    {
      $arrGens = array();  //array for generations
      $g=0;
      
      $text = strtolower(trim($text));
      //find what every letter can be replaced by and store it in arrGens array
      for($i=0; $i<strlen($text); $i++)
      {
        for($key=0; $key<sizeof($this->arrRules); $key++)
        {
          if($this->arrRules[$key][0]==substr($text, $i, strlen($this->arrRules[$key][0])))
          {
            $arrGens[$g]['z']    = 2; //should be 2, this field is used for combining
            $arrGens[$g]['pos']  = $i;
            $arrGens[$g]['rule'] = $key;
            $g++;
          }
        }
      }
    
      $N = sizeof($arrGens);
      $log = str_repeat("0", $N);
      
      $arrVariants[$id] = array();
      
      if ( !$complexAlgorithm ) //use simple algorithm
      {
        for($i=0; $i<$N; $i++)
          $arrVariants[$id][] = $this->applyRule($i, $arrGens, $this->arrRules, $text);
      }
    
      else //use COMBINE, more complex algorithm
    
        for($f=0; $f<pow(2,$N)-1; $f++)
        {
          $i=0; // bit index
          $str = $text;
          while ($arrGens[$i]['z']==1)
          {
            $arrGens[$i]['z']=0; // modeling of next digit transfer while adding
            $log[$i]=0;
            $i++;
          }
          $arrGens[$i]['z']=1;
          $log[$i]=1;
          $bits = substr_count($log, "1");
          $offset=0;
    
          for($t=0; $t<$N; $t++)
            if ($arrGens[$t]['z']==1)  //if 1 - apply rule
            {
              $len = strlen($this->arrRules[$arrGens[$t]['rule']][1]);
              $str = $this->applyRule($t, $arrGens, $this->arrRules, $str, $offset);
              if ($len>1 && $bits>1)
              {
                $offset += $len-1;
              }
            }
            $arrVariants[$id][] = $str;
        }
      $arrVariants[$id] = array_unique($arrVariants[$id]);
    }
    return $arrVariants;
  }
}
Return current item: Phonetic