Location: PHPKode > scripts > ASoundex > asoundex/php5/ASoundex.class.php
<?php
// ----------------------------------------------------------------------
// Copyright (C) 2006 by Khaled Al-Shamaa.
// http://www.al-shamaa.com/
// ----------------------------------------------------------------------
// LICENSE

// This program is open source product; you can redistribute it and/or
// modify it under the terms of the GNU General Public License (GPL)
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.

// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.

// To read the license please visit http://www.gnu.org/copyleft/gpl.html
// ----------------------------------------------------------------------
// Class Name: Arabic Soundex
// Filename:   ASoundex.class.php
// Original    Author(s): Khaled Al-Sham'aa <hide@address.com>
// Purpose:    Arabic soundex algorithm takes Arabic word as an input
//             and produces a character string which identifies a set words
//             that are (roughly) phonetically alike.
// ----------------------------------------------------------------------

class ASoundex {
     private $asoundexCode = array('/Ç|æ|í|Ú|Í|å/',
                              '/È|Ý/',
                              '/Î|Ì|Ò|Ó|Õ|Ù|Þ|ß|Û|Ô/',
                              '/Ê|Ë|Ï|Ð|Ö|Ø|É/',
                              '/á/',
                              '/ã|ä/',
                              '/Ñ/'
                              );

     private $aphonixCode = array('/Ç|æ|í|Ú|Í|å/',
                             '/È/',
                             '/Î|Ì|Õ|Ù|Þ|ß|Û|Ô/',
                             '/Ê|Ë|Ï|Ð|Ö|Ø|É/',
                             '/á/',
                             '/ã|ä/',
                             '/Ñ/',
                             '/Ý/',
                             '/Ò|Ó/'
                             );

     private $transliteration = array('Ç' => 'A',
                                  'È' => 'B',
                                  'Ê' => 'T',
                                  'Ë' => 'T',
                                  'Ì' => 'J',
                                  'Í' => 'H',
                                  'Î' => 'K',
                                  'Ï' => 'D',
                                  'Ð' => 'Z',
                                  'Ñ' => 'R',
                                  'Ò' => 'Z',
                                  'Ó' => 'S',
                                  'Ô' => 'S',
                                  'Õ' => 'S',
                                  'Ö' => 'D',
                                  'Ø' => 'T',
                                  'Ù' => 'Z',
                                  'Ú' => 'A',
                                  'Û' => 'G',
                                  'Ý' => 'F',
                                  'Þ' => 'Q',
                                  'ß' => 'K',
                                  'á' => 'L',
                                  'ã' => 'M',
                                  'ä' => 'N',
                                  'å' => 'H',
                                  'æ' => 'W',
                                  'í' => 'Y'
                             );
    private $len;
    private $lang;
    private $code;

    public function ASoundex($len=4, $lang='en', $code='soundex'){
        $this->len  = $len;
        $this->lang = $lang;
        $this->code = $code;
    }

    /**
    * @return  String : the calculated soundex/phonix numeric code
    * @param   String : the word that we want to encode it
    *         [soundex|phonix] : define mapping code to be used in this converting
    * @desc   mapCode : methode to create soundex/phonix numric code for a given word
    * @author Khaled Al-Shamaa
    */
    private function mapCode($word){
        $encodedWord = $word;

        if($this->code == 'phonix'){ $map = $this->aphonixCode; }else{ $map = $this->asoundexCode; }

        foreach($map as $code=>$condition){
            $encodedWord = preg_replace($condition, $code, $encodedWord);
        }
        $encodedWord = preg_replace('/\D/', '0', $encodedWord);

        return $encodedWord;
    }

    private function trimRep($word){
        $chars = preg_split('//',$word);

        foreach($chars as $char){
            if($char != $lastChar){ $cleanWord .= $char; }
            $lastChar = $char;
        }

        return $cleanWord;
    }

    public function soundex($word){
        list($dump, $soundex, $rest) = preg_split('//',$word,3);

        if($this->lang == 'en'){ $soundex = $this->transliteration[$soundex]; }

        $encodedRest = $this->mapCode($rest);
        $cleanEncodedRest = $this->trimRep($encodedRest);

        $soundex .= $cleanEncodedRest;

        $soundex = preg_replace('/0/', '', $soundex);

        $totalLen = strlen($soundex);
        if($totalLen > $this->len){
            $soundex  = substr($soundex, 0, $this->len);
        }else{
            $soundex  .= str_repeat('0', $this->len - $totalLen);
        }

        return $soundex;
    }
}
Return current item: ASoundex