Location: PHPKode > projects > SiteBar Client for Firefox 3.6 > SiteBar-3.3.9/inc/converter.inc.php
<?php
/******************************************************************************
 *  SiteBar 3 - The Bookmark Server for Personal and Team Use.                *
 *  Copyright (C) 2003-2006  Ondrej Brablc <http://brablc.com/mailto?o>       *
 *                                                                            *
 *  This program is free software; you can redistribute it and/or modify      *
 *  it under the terms of the GNU General Public License as published by      *
 *  the Free Software Foundation; either version 2 of the License, or         *
 *  (at your option) any later version.                                       *
 *                                                                            *
 *  This program is distributed in the hope that it will be useful,           *
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of            *
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the             *
 *  GNU General Public License for more details.                              *
 *                                                                            *
 *  You should have received a copy of the GNU General Public License         *
 *  along with this program; if not, write to the Free Software               *
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA *
 ******************************************************************************/

require_once('./inc/errorhandler.inc.php');

define('SB_CHARSET_IGNORE',   0);
define('SB_CHARSET_ICONV',    1);
define('SB_CHARSET_LIBICONV', 2);
define('SB_CHARSET_RECODE',   3);

class SB_Converter extends SB_ErrorHandler
{
    // List of available languages
    var $languages;

    // Default charSet
    var $charSet;

    // Use conversion engine
    var $useEngine = false;


    function SB_Converter($useEngine=true, $charSet='utf-8')
    {
        $this->useEngine = $useEngine;

        if ($charSet==null)
        {
            $charSet = 'utf-8';
        }

        $this->languages = array(
            'af-iso-8859-1'=> array('af|afrikaans', 'afrikaans', 'iso-8859-1', 'af'),
            'ar-win1256'   => array('ar([-_][[:alpha:]]{2})?|arabic', 'arabic', 'windows-1256', 'ar'),
            'bg-win1251'   => array('bg|bulgarian', 'bulgarian', 'windows-1251', 'bg'),
            'bg-koi8-r'    => array('bg|bulgarian', 'bulgarian', 'koi8-r', 'bg'),
            'ca-iso-8859-1'=> array('ca|catalan', 'catalan', 'iso-8859-1', 'ca'),
            'cs-iso-8859-2'=> array('cs|czech', 'czech', 'iso-8859-2', 'cs'),
            'cs-win1250'   => array('cs|czech', 'czech', 'windows-1250', 'cs'),
            'da-iso-8859-1'=> array('da|danish', 'danish', 'iso-8859-1', 'da'),
            'de-iso-8859-1'=> array('de([-_][[:alpha:]]{2})?|german', 'german', 'iso-8859-1', 'de'),
            'el-iso-8859-7'=> array('el|greek',  'greek', 'iso-8859-7', 'el'),
            'en-iso-8859-1'=> array('en([-_][[:alpha:]]{2})?|english',  'english', 'iso-8859-1', 'en'),
            'es-iso-8859-1'=> array('es([-_][[:alpha:]]{2})?|spanish', 'spanish', 'iso-8859-1', 'es'),
            'et-iso-8859-1'=> array('et|estonian', 'estonian', 'iso-8859-1', 'et'),
            'fi-iso-8859-1'=> array('fi|finnish', 'finnish', 'iso-8859-1', 'fi'),
            'fr-iso-8859-1'=> array('fr([-_][[:alpha:]]{2})?|french', 'french', 'iso-8859-1', 'fr'),
            'gl-iso-8859-1'=> array('gl|galician', 'galician', 'iso-8859-1', 'gl'),
            'he-iso-8859-8-i'=> array('he|hebrew', 'hebrew', 'iso-8859-8-i', 'he'),
            'hr-win1250'   => array('hr|croatian', 'croatian', 'windows-1250', 'hr'),
            'hr-iso-8859-2'=> array('hr|croatian', 'croatian', 'iso-8859-2', 'hr'),
            'hu-iso-8859-2'=> array('hu|hungarian', 'hungarian', 'iso-8859-2', 'hu'),
            'id-iso-8859-1'=> array('id|indonesian', 'indonesian', 'iso-8859-1', 'id'),
            'it-iso-8859-1'=> array('it|italian', 'italian', 'iso-8859-1', 'it'),
            'ja-euc'       => array('ja|japanese', 'japanese', 'euc-jp', 'ja'),
            'ja-sjis'      => array('ja|japanese', 'japanese', 'shift_jis', 'ja'),
            'ko-uhc'       => array('ko|korean', 'korean', 'uhc', 'ko'),
            'lt-win1257'   => array('lt|lithuanian', 'lithuanian', 'windows-1257', 'lt'),
            'lv-win1257'   => array('lv|latvian', 'latvian', 'windows-1257', 'lv'),
            'ms-iso-8859-1'=> array('ms|malay', 'malay', 'iso-8859-1', 'ms'),
            'nl-iso-8859-1'=> array('nl([-_][[:alpha:]]{2})?|dutch', 'dutch', 'iso-8859-1', 'nl'),
            'no-iso-8859-1'=> array('no|norwegian', 'norwegian', 'iso-8859-1', 'no'),
            'pl-iso-8859-2'=> array('pl|polish', 'polish', 'iso-8859-2', 'pl'),
            'pt-br-iso-8859-1' => array('pt[-_]br|brazilian port.', 'brazilian_portuguese', 'iso-8859-1', 'pt-BR'),
            'pt-iso-8859-1'=> array('pt([-_][[:alpha:]]{2})?|portuguese', 'portuguese', 'iso-8859-1', 'pt'),
            'ro-iso-8859-1'=> array('ro|romanian', 'romanian', 'iso-8859-1', 'ro'),
            'ru-win1251'   => array('ru|russian', 'russian', 'windows-1251', 'ru'),
            'ru-dos-866'   => array('ru|russian', 'russian', 'dos-866', 'ru'),
            'ru-koi8-r'    => array('ru|russian', 'russian', 'koi8-r', 'ru'),
            'sk-iso-8859-2'=> array('sk|slovak', 'slovak', 'iso-8859-2', 'sk'),
            'sk-win1250'   => array('sk|slovak', 'slovak', 'windows-1250', 'sk'),
            'sl-iso-8859-2'=> array('sl|slovenian', 'slovenian', 'iso-8859-2', 'sl'),
            'sl-win1250'   => array('sl|slovenian', 'slovenian', 'windows-1250', 'sl'),
            'sq-iso-8859-1'=> array('sq|albanian', 'albanian', 'iso-8859-1', 'sq'),
            'sr-win1250'   => array('sr|serbian', 'serbian', 'windows-1250', 'sr'),
            'sv-iso-8859-1'=> array('sv|swedish', 'swedish', 'iso-8859-1', 'sv'),
            'th-tis-620'   => array('th|thai', 'thai', 'tis-620', 'th'),
            'tr-iso-8859-9'=> array('tr|turkish', 'turkish', 'iso-8859-9', 'tr'),
            'uk-win1251'   => array('uk|ukrainian', 'ukrainian', 'windows-1251', 'uk'),
            'zh-tw'        => array('zh[-_]tw|chinese traditional', 'chinese', 'big5', 'zh-TW'),
            'zh-gbk'       => array('zh|chinese simplified', 'chinese', 'gbk', 'zh')
        );

        $this->setCharSet($charSet);
    }

    function setCharSet($charset)
    {
        $this->charSet = strtolower($charset);
    }

    function getEngine()
    {
        static $engine = -1;

        if ($engine!=-1)
        {
            return $engine;
        }

        if (!$this->useEngine)
        {
            $engine = SB_CHARSET_IGNORE;
            return $engine;
        }

        $suffix = (defined('PHP_OS') && eregi('win', PHP_OS));

        if (!function_exists('iconv') && !extension_loaded('iconv'))
        {
            $this->useHandler(false);
            @dl('iconv' . $suffix);
            $this->useHandler();
        }

        if (function_exists('iconv'))
        {
            $engine = SB_CHARSET_ICONV;
        }
        elseif (function_exists('libiconv'))
        {
            $engine = SB_CHARSET_LIBICONV;
        }
        else
        {
            if (!function_exists('recode_string') && !extension_loaded('recode'))
            {
                $this->useHandler(false);
                @dl('recode' . $suffix);
                $this->useHandler();
            }

            if (function_exists('recode_string'))
            {
                $engine = SB_CHARSET_RECODE;
            }
            else
            {
                $engine = SB_CHARSET_IGNORE;
            }
        }

        return $engine;
    }

    function langDetect()
    {
        if (!empty($_SERVER['HTTP_ACCEPT_LANGUAGE']))
        {
            $str = $_SERVER['HTTP_ACCEPT_LANGUAGE'];
            foreach ($this->languages as $key => $value)
            {
                if (preg_match('/^(' . $value[0] . ').*?(;q=[0-9]\\.[0-9])?$/', $str))
                {
                    return $key;
                }
            }
        }

        if (!empty($_SERVER['HTTP_USER_AGENT']))
        {
            $str = $_SERVER['HTTP_USER_AGENT'];
            foreach ($this->languages as $key => $value)
            {
                if (eregi('(\(|\[|;[[:space:]])(' . $value[0] . ')(;|\]|\))', $str))
                {
                    return $key;
                }
            }
        }

        return $this->charSet;
    }

    function toUTF8($text)
    {
        if ($this->charSet == 'utf-8')
        {
            return $text;
        }

        switch ($this->getEngine())
        {
            case SB_CHARSET_ICONV:
                return iconv($this->charSet, 'utf-8', $text);

            case SB_CHARSET_LIBICONV:
                return libiconv($this->charSet, 'utf-8', $text);

            case SB_CHARSET_RECODE:
                return recode_string($this->charSet .'..'. 'utf-8', $text);

            default:
                return utf8_encode($text);
        }
    }

    function fromUTF8($text)
    {
        if ($this->charSet == 'utf-8')
        {
            return $text;
        }

        switch ($this->getEngine())
        {
            case SB_CHARSET_ICONV:
                return iconv('utf-8', $this->charSet."//TRANSLIT", $text);

            case SB_CHARSET_LIBICONV:
                return libiconv('utf-8', $this->charSet, $text);

            case SB_CHARSET_RECODE:
                return recode_string('utf-8' .'..'. $this->charSet, $text);

            default:
                return utf8_decode($text);
        }
    }

    function utf8RawUrlDecode($source)
    {
        $decodedStr = '';
        $pos = 0;
        $len = strlen ($source);

        while ($pos < $len)
        {
            $charAt = substr ($source, $pos, 1);
            if ($charAt == '%')
            {
                $pos++;
                $charAt = substr ($source, $pos, 1);
                if ($charAt == 'u')
                {
                    // we got a unicode character
                    $pos++;
                    $unicodeHexVal = substr ($source, $pos, 4);
                    $unicode = hexdec ($unicodeHexVal);
                    $entity = "&#". $unicode . ';';
                    $decodedStr .= $this->utf8Encode($entity);
                    $pos += 4;
                }
                else
                {
                    // we have an escaped ascii character
                    $hexVal = substr ($source, $pos, 2);
                    $decodedStr .= $this->toUTF8(chr (hexdec ($hexVal)));
                    $pos += 2;
                }
            }
            else
            {
                $decodedStr .= $this->toUTF8($charAt);
                $pos++;
            }
        }
        return $decodedStr;
    }

    function utf8Encode ($source)
    {
        $utf8Str = '';
        $entityArray = explode ("&#", $source);
        $size = count ($entityArray);
        for ($i = 0; $i < $size; $i++)
        {
            $subStr = $entityArray[$i];
            $nonEntity = strstr ($subStr, ';');
            if ($nonEntity !== false)
            {
                $unicode = intval (substr ($subStr, 0, (strpos ($subStr, ';') + 1)));
                // determine how many chars are needed to reprsent this unicode char
                if ($unicode < 128)
                {
                    $utf8Substring = chr ($unicode);
                }
                else if ($unicode >= 128 && $unicode < 2048)
                {
                    $binVal = str_pad (decbin ($unicode), 11, '0', STR_PAD_LEFT);
                    $binPart1 = substr ($binVal, 0, 5);
                    $binPart2 = substr ($binVal, 5);

                    $char1 = chr (192 + bindec ($binPart1));
                    $char2 = chr (128 + bindec ($binPart2));
                    $utf8Substring = $char1 . $char2;
                }
                else if ($unicode >= 2048 && $unicode < 65536)
                {
                    $binVal = str_pad (decbin ($unicode), 16, '0', STR_PAD_LEFT);
                    $binPart1 = substr ($binVal, 0, 4);
                    $binPart2 = substr ($binVal, 4, 6);
                    $binPart3 = substr ($binVal, 10);

                    $char1 = chr (224 + bindec ($binPart1));
                    $char2 = chr (128 + bindec ($binPart2));
                    $char3 = chr (128 + bindec ($binPart3));
                    $utf8Substring = $char1 . $char2 . $char3;
                }
                else
                {
                    $binVal = str_pad (decbin ($unicode), 21, '0', STR_PAD_LEFT);
                    $binPart1 = substr ($binVal, 0, 3);
                    $binPart2 = substr ($binVal, 3, 6);
                    $binPart3 = substr ($binVal, 9, 6);
                    $binPart4 = substr ($binVal, 15);

                    $char1 = chr (240 + bindec ($binPart1));
                    $char2 = chr (128 + bindec ($binPart2));
                    $char3 = chr (128 + bindec ($binPart3));
                    $char4 = chr (128 + bindec ($binPart4));
                    $utf8Substring = $char1 . $char2 . $char3 . $char4;
                }

                if (strlen ($nonEntity) > 1)
                {
                    $nonEntity = substr ($nonEntity, 1); // chop the first char (';')
                }
                else
                {
                    $nonEntity = '';
                }
                $utf8Str .= $utf8Substring . $nonEntity;
            }
            else
            {
                $utf8Str .= $subStr;
            }
        }
        return $utf8Str;
    }
}
?>
Return current item: SiteBar Client for Firefox 3.6