<?php
// ----------------------------------------------------------------------
// Copyright (C) 2006 by Khaled Al-Shamaa.
// http://www.al-shamaa.com/
// ----------------------------------------------------------------------
// LICENSE
// This program is open source product; you can redistribute it and/or
// modify it under the terms of the GNU General Public License (GPL)
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// To read the license please visit http://www.gnu.org/copyleft/gpl.html
// ----------------------------------------------------------------------
// Class Name: Arabic Soundex
// Filename: ASoundex.class.php
// Original Author(s): Khaled Al-Sham'aa <hide@address.com>
// Purpose: Arabic soundex algorithm takes Arabic word as an input
// and produces a character string which identifies a set words
// that are (roughly) phonetically alike.
// ----------------------------------------------------------------------
class ASoundex {
var $asoundexCode = array('/Ç|æ|í|Ú|Í|å/',
'/È|Ý/',
'/Î|Ì|Ò|Ó|Õ|Ù|Þ|ß|Û|Ô/',
'/Ê|Ë|Ï|Ð|Ö|Ø|É/',
'/á/',
'/ã|ä/',
'/Ñ/'
);
var $aphonixCode = array('/Ç|æ|í|Ú|Í|å/',
'/È/',
'/Î|Ì|Õ|Ù|Þ|ß|Û|Ô/',
'/Ê|Ë|Ï|Ð|Ö|Ø|É/',
'/á/',
'/ã|ä/',
'/Ñ/',
'/Ý/',
'/Ò|Ó/'
);
var $transliteration = array('Ç' => 'A',
'È' => 'B',
'Ê' => 'T',
'Ë' => 'T',
'Ì' => 'J',
'Í' => 'H',
'Î' => 'K',
'Ï' => 'D',
'Ð' => 'Z',
'Ñ' => 'R',
'Ò' => 'Z',
'Ó' => 'S',
'Ô' => 'S',
'Õ' => 'S',
'Ö' => 'D',
'Ø' => 'T',
'Ù' => 'Z',
'Ú' => 'A',
'Û' => 'G',
'Ý' => 'F',
'Þ' => 'Q',
'ß' => 'K',
'á' => 'L',
'ã' => 'M',
'ä' => 'N',
'å' => 'H',
'æ' => 'W',
'í' => 'Y'
);
var $len;
var $lang;
var $code;
function ASoundex($len=4, $lang='en', $code='soundex'){
$this->len = $len;
$this->lang = $lang;
$this->code = $code;
}
/**
* @return String : the calculated soundex/phonix numeric code
* @param String : the word that we want to encode it
* [soundex|phonix] : define mapping code to be used in this converting
* @desc mapCode : methode to create soundex/phonix numric code for a given word
* @author Khaled Al-Shamaa
*/
function mapCode($word){
$encodedWord = $word;
if($this->code == 'phonix'){ $map = $this->aphonixCode; }else{ $map = $this->asoundexCode; }
foreach($map as $code=>$condition){
$encodedWord = preg_replace($condition, $code, $encodedWord);
}
$encodedWord = preg_replace('/\D/', '0', $encodedWord);
return $encodedWord;
}
function trimRep($word){
$chars = preg_split('//',$word);
foreach($chars as $char){
if($char != $lastChar){ $cleanWord .= $char; }
$lastChar = $char;
}
return $cleanWord;
}
function soundex($word){
list($dump, $soundex, $rest) = preg_split('//',$word,3);
if($this->lang == 'en'){ $soundex = $this->transliteration[$soundex]; }
$encodedRest = $this->mapCode($rest);
$cleanEncodedRest = $this->trimRep($encodedRest);
$soundex .= $cleanEncodedRest;
$soundex = preg_replace('/0/', '', $soundex);
$totalLen = strlen($soundex);
if($totalLen > $this->len){
$soundex = substr($soundex, 0, $this->len);
}else{
$soundex .= str_repeat('0', $this->len - $totalLen);
}
return $soundex;
}
}