Location: PHPKode > scripts > PHPBiDi > phpbidi/unicode.php
<?php

/* vim: set expandtab tabstop=4 shiftwidth=4: */

/**
 * UniChar is a Unicode Character class that supports PHPBiDi.
 *
 * UniChar helps PHPBiDi handle Unicode character code values and their
 * characteristics to create bidirectional texts.
 *
 * @package    PHPBiDi
 * @author     Efthimios Mavrogeorgiadis <hide@address.com>
 * @version    $Id: unicode.php, v 1.00 Wed Jan 09 2008 22:17:46 GMT+0200 Efthimios Mavrogeorgiadis $
 * @since      Wed Jan 09 2008 22:17:46 GMT+0200
 * @access     public
 * @uses       unicode_data.php Bidirectional values, mirrored characters and Arabic letters in accordance to Unicode 5.0
 */

// Load required file
require_once (dirname(__FILE__) . '/unicode_data.php');

class UniChar {

    /**
     * Unicode character code value
     * @access private
     * @var integer
     */
    private $nu = '';

    /**
     * Unicode bidi category
     * @access private
     * @var string
     */
    private $ty = '';

    /**
     * Unicode bidi category after the Unicode Bidirectional Algorithm has been applied
     * @access private
     * @var string
     */
    private $tb = '';

    /**
     * Character's embedding level after the Unicode Bidirectional Algorithm has been applied
     * @access private
     * @var integer
     */
    private $le = '';

    /**
     * Unicode character encoded in UTF-8
     * @access private
     * @var string
     */
    private $lt = '';

    /**
     * Arabic zero-length (non-)joiner (can be either 8204 or 8205)
     * @access private
     * @var integer
     */
    private $ar = '';

    /**
     * Tags associated with characters
     * @access private
     * @var integer
     */
    private $ta = array();

    /**
     * Set basic characteristics of Unicode character
     *
     * @access  public
     * @param   integer [$number] Unicode character code value
     * @author  Efthimios Mavrogeorgiadis <hide@address.com>
     * @since   Wed Jan 09 2008 22:25:19 GMT+0200
     * @version v 1.00 Wed Jan 09 2008 22:25:19 GMT+0200
     */
    public function setNumber($number) {
        $this->nu = $number;
        $this->setTypeChar();
        $this->setTypeBiDi($this->getTypeChar());
    }

    /**
     * Set character's Unicode bidi category
     *
     * @access  public
     * @param   string [$type] Unicode bidi category
     * @author  Efthimios Mavrogeorgiadis <hide@address.com>
     * @since   Wed Jan 09 2008 22:25:19 GMT+0200
     * @version v 1.00 Wed Jan 09 2008 22:25:19 GMT+0200
     */
    public function setTypeBiDi($type) {
        $this->tb = $type;
    }

    /**
     * Set character's embedding level within paragraph
     *
     * @access  public
     * @param   integer [$level] Embedding level 
     * @author  Efthimios Mavrogeorgiadis <hide@address.com>
     * @since   Wed Jan 09 2008 22:25:19 GMT+0200
     * @version v 1.00 Wed Jan 09 2008 22:25:19 GMT+0200
     */
    public function setLevel($level) {
        $this->le = $level;
    }

    /**
     * Set character's representation encoded in UTF-8
     *
     * @access  public
     * @param   string [$letter] Unicode character encoded in UTF-8 
     * @author  Efthimios Mavrogeorgiadis <hide@address.com>
     * @since   Wed Jan 09 2008 22:25:19 GMT+0200
     * @version v 1.00 Wed Jan 09 2008 22:25:19 GMT+0200
     */
    public function setLetter($letter) {
        $this->lt = $letter;
    }

    /**
     * Associate tag with character
     *
     * @access  public
     * @param   string [$ta] Tag
     * @author  Efthimios Mavrogeorgiadis <hide@address.com>
     * @since   Wed Jan 09 2008 22:25:19 GMT+0200
     * @version v 1.00 Wed Jan 09 2008 22:25:19 GMT+0200
     */
    public function setTag($ta) {
        $this->ta[] = $ta;
    }

    /**
     * Add byte to UTF-8 encoded character
     *
     * @access  public
     * @param   string [$letter] Byte of UTF-8 encoded character posing as a letter 
     * @author  Efthimios Mavrogeorgiadis <hide@address.com>
     * @since   Wed Jan 09 2008 22:25:19 GMT+0200
     * @version v 1.00 Wed Jan 09 2008 22:25:19 GMT+0200
     */
    public function addLetter($letter) {
        $this->lt.= $letter;
    }

    /**
     * Get character's Unicode code value
     *
     * @access  public
     * @return  integer Unicode code value
     * @author  Efthimios Mavrogeorgiadis <hide@address.com>
     * @since   Wed Jan 09 2008 22:25:19 GMT+0200
     * @version v 1.00 Wed Jan 09 2008 22:25:19 GMT+0200
     */
    public function getNumber() {
        return $this->nu;
    }

    /**
     * Get character's Unicode bidi category
     *
     * @access  public
     * @return  string Unicode bidi category
     * @author  Efthimios Mavrogeorgiadis <hide@address.com>
     * @since   Wed Jan 09 2008 22:25:19 GMT+0200
     * @version v 1.00 Wed Jan 09 2008 22:25:19 GMT+0200
     */
    public function getTypeChar() {
        return $this->ty;
    }

    /**
     * Get character's bidi category after application of the Unicode Bidirectional Algorithm
     *
     * @access  public
     * @return  string Unicode bidi category
     * @author  Efthimios Mavrogeorgiadis <hide@address.com>
     * @since   Wed Jan 09 2008 22:25:19 GMT+0200
     * @version v 1.00 Wed Jan 09 2008 22:25:19 GMT+0200
     */
    public function getTypeBiDi() {
        return $this->tb;
    }

    /**
     * Get character's embedding level within paragraph
     *
     * @access  public
     * @return  integer Embedding level
     * @author  Efthimios Mavrogeorgiadis <hide@address.com>
     * @since   Wed Jan 09 2008 22:25:19 GMT+0200
     * @version v 1.00 Wed Jan 09 2008 22:25:19 GMT+0200
     */
    public function getLevel() {
        return $this->le;
    }

    /**
     * Get character's representation encoded in UTF-8
     *
     * @access  public
     * @return  string Character encoded in UTF-8
     * @author  Efthimios Mavrogeorgiadis <hide@address.com>
     * @since   Wed Jan 09 2008 22:25:19 GMT+0200
     * @version v 1.00 Wed Jan 09 2008 22:25:19 GMT+0200
     */
    public function getLetter() {
        return $this->lt;
    }

    /**
     * Get tag
     *
     * @access  public
     * @param   integer [$i] Array index
     * @return  string Tag
     * @author  Efthimios Mavrogeorgiadis <hide@address.com>
     * @since   Wed Jan 09 2008 22:25:19 GMT+0200
     * @version v 1.00 Wed Jan 09 2008 22:25:19 GMT+0200
     */
    public function getTag($i) {
        return $this->ta[$i];
    }

    /**
     * Get tag array size
     *
     * @access  public
     * @return  integer Array size
     * @author  Efthimios Mavrogeorgiadis <hide@address.com>
     * @since   Wed Jan 09 2008 22:25:19 GMT+0200
     * @version v 1.00 Wed Jan 09 2008 22:25:19 GMT+0200
     */
    public function getTagSize() {
        return count($this->ta);
    }

    /**
     * Get character's mirror character
     *
     * @access  public
     * @return  string UTF-8 encoded Unicode character
     * @author  Efthimios Mavrogeorgiadis <hide@address.com>
     * @since   Wed Jan 09 2008 22:25:19 GMT+0200
     * @version v 1.00 Wed Jan 09 2008 22:25:19 GMT+0200
     */
    public function getMirror() {
        if ($this->getTypeBiDi() == 'R' and $this->checkConstant('M' . $this->getNumber())) {
            return $this->encodeUTF8Num(constant('M' . $this->getNumber()));
        }
        else {
            return $this->getLetter();
        }
    }

    /**
     * Get number of possible forms an Arabic letter can take
     *
     * @access  public
     * @return  string Name of constant
     * @author  Efthimios Mavrogeorgiadis <hide@address.com>
     * @since   Wed Jan 09 2008 22:25:19 GMT+0200
     * @version v 1.00 Wed Jan 09 2008 22:25:19 GMT+0200
     */
    public function getArLetSize() {
        return $this->checkConstant('A' . $this->getNumber() . 'S');
    }

    /**
     * Get isolated form of Arabic letter
     *
     * @access  public
     * @return  string Name of constant
     * @author  Efthimios Mavrogeorgiadis <hide@address.com>
     * @since   Wed Jan 09 2008 22:25:19 GMT+0200
     * @version v 1.00 Wed Jan 09 2008 22:25:19 GMT+0200
     */
    public function getArLetIsolated() {
        return $this->checkConstant('A' . $this->getNumber() . 'I');
    }

    /**
     * Get final form of Arabic letter
     *
     * @access  public
     * @return  string Name of constant
     * @author  Efthimios Mavrogeorgiadis <hide@address.com>
     * @since   Wed Jan 09 2008 22:25:19 GMT+0200
     * @version v 1.00 Wed Jan 09 2008 22:25:19 GMT+0200
     */
    public function getArLetFinal() {
        return $this->checkConstant('A' . $this->getNumber() . 'F');
    }

    /**
     * Get initial form of Arabic letter
     *
     * @access  public
     * @return  string Name of constant
     * @author  Efthimios Mavrogeorgiadis <hide@address.com>
     * @since   Wed Jan 09 2008 22:25:19 GMT+0200
     * @version v 1.00 Wed Jan 09 2008 22:25:19 GMT+0200
     */
    public function getArLetInitial() {
        return $this->checkConstant('A' . $this->getNumber() . 'N');
    }

    /**
     * Get medial form of Arabic letter
     *
     * @access  public
     * @return  string Name of constant
     * @author  Efthimios Mavrogeorgiadis <hide@address.com>
     * @since   Wed Jan 09 2008 22:25:19 GMT+0200
     * @version v 1.00 Wed Jan 09 2008 22:25:19 GMT+0200
     */
    public function getArLetMedial() {
        return $this->checkConstant('A' . $this->getNumber() . 'M');
    }

    /**
     * Get a UTF-8 encoded character.
     *
     * Feed this function with a Unicode character's code value in decimal
     * and you will get it encoded in UTF-8.
     *
     * @access  public
     * @param   integer [$char_num] The decimal code value of a Unicode character
     * @return  string The character encoded in UTF-8
     * @static
     * @author  Efthimios Mavrogeorgiadis <hide@address.com>
     * @since   Wed Jan 09 2008 02:35:24 GMT+0200
     * @version v 1.00 Wed Jan 09 2008 02:35:24 GMT+0200
     */
    public static function encodeUTF8Num($char_num) {
        if ($char_num < 0) {
            echo "\nCharacter number too small... Aborted!\n";
            exit;
        }
        $a = 0;
        $j = 0;
        while ($j < 6) {
            $top = pow(2, (7-$a+6*$j));
            if ($char_num < $top) {
                break;
            } else {
                $j++;
                $a = $j+1;
            }
        }
        if ($j == 6) {
            echo "\nCharacter number too large... Aborted!\n";
            exit;
        } elseif ($j) {
            $lt = '';
            $k = $j;
            $firstbits = 128;
            while ($k) {
                $firstbits+= 128/(pow(2, $k));
                $k--;
            }
            $j++;
            $mask = 255;
            while ($j) {
                $rshift = 6*$j-6;
                $lt.= chr((($char_num>>$rshift) &$mask) |$firstbits);
                $firstbits = 128;
                $mask = 63;
                $j--;
            }
        } else {
            $lt = chr($char_num);
        }
        return $lt;
    }

    /**
     * Attach zero-length (non-)joiner to Arabic letter
     *
     * @access  public
     * @param   integer [$num] Unicode code value
     * @author  Efthimios Mavrogeorgiadis <hide@address.com>
     * @since   Wed Jan 09 2008 22:25:19 GMT+0200
     * @version v 1.00 Wed Jan 09 2008 22:25:19 GMT+0200
     */
    public function setJoiner($num) {
        $this->ar = $num;
    }

    /**
     * Retrieve zero-length (non-)joiner's effect on Arabic letter
     *
     * @access  public
     * @return  integer Either 1 (non-joiner) or 4 (joiner)
     * @author  Efthimios Mavrogeorgiadis <hide@address.com>
     * @since   Wed Jan 09 2008 22:25:19 GMT+0200
     * @version v 1.00 Wed Jan 09 2008 22:25:19 GMT+0200
     */
    public function getJoiner() {
        if ($this->ar == 8204) {
            return 1;
        } elseif ($this->ar == 8205) {
            return 4;
        } else {
            return 0;
        }
    }

    /**
     * Set character's Unicode bidi category
     *
     * @access  public
     * @author  Efthimios Mavrogeorgiadis <hide@address.com>
     * @since   Wed Jan 09 2008 22:25:19 GMT+0200
     * @version v 1.00 Wed Jan 09 2008 22:25:19 GMT+0200
     */
    private function setTypeChar() {
        $this->ty = $this->checkConstant('U' . $this->nu);
    }

    /**
     * Check whether constant exists and assign its value
     *
     * @access  public
     * @param   string [$c] Name of constant
     * @return  integer|string Unicode bidi category, code value or number of forms an Arabic letter can take
     * @author  Efthimios Mavrogeorgiadis <hide@address.com>
     * @since   Wed Jan 09 2008 22:25:19 GMT+0200
     * @version v 1.00 Wed Jan 09 2008 22:25:19 GMT+0200
     */
    private function checkConstant($c) {
        return (defined($c)) ? constant($c) : null;
    }
}
?>
Return current item: PHPBiDi