<?php
/* vim: set expandtab tabstop=4 shiftwidth=4: */
/**
* UniChar is a Unicode Character class that supports PHPBiDi.
*
* UniChar helps PHPBiDi handle Unicode character code values and their
* characteristics to create bidirectional texts.
*
* @package PHPBiDi
* @author Efthimios Mavrogeorgiadis <hide@address.com>
* @version $Id: unicode.php, v 1.00 Wed Jan 09 2008 22:17:46 GMT+0200 Efthimios Mavrogeorgiadis $
* @since Wed Jan 09 2008 22:17:46 GMT+0200
* @access public
* @uses unicode_data.php Bidirectional values, mirrored characters and Arabic letters in accordance to Unicode 5.0
*/
// Load required file
require_once (dirname(__FILE__) . '/unicode_data.php');
class UniChar {
/**
* Unicode character code value
* @access private
* @var integer
*/
private $nu = '';
/**
* Unicode bidi category
* @access private
* @var string
*/
private $ty = '';
/**
* Unicode bidi category after the Unicode Bidirectional Algorithm has been applied
* @access private
* @var string
*/
private $tb = '';
/**
* Character's embedding level after the Unicode Bidirectional Algorithm has been applied
* @access private
* @var integer
*/
private $le = '';
/**
* Unicode character encoded in UTF-8
* @access private
* @var string
*/
private $lt = '';
/**
* Arabic zero-length (non-)joiner (can be either 8204 or 8205)
* @access private
* @var integer
*/
private $ar = '';
/**
* Tags associated with characters
* @access private
* @var integer
*/
private $ta = array();
/**
* Set basic characteristics of Unicode character
*
* @access public
* @param integer [$number] Unicode character code value
* @author Efthimios Mavrogeorgiadis <hide@address.com>
* @since Wed Jan 09 2008 22:25:19 GMT+0200
* @version v 1.00 Wed Jan 09 2008 22:25:19 GMT+0200
*/
public function setNumber($number) {
$this->nu = $number;
$this->setTypeChar();
$this->setTypeBiDi($this->getTypeChar());
}
/**
* Set character's Unicode bidi category
*
* @access public
* @param string [$type] Unicode bidi category
* @author Efthimios Mavrogeorgiadis <hide@address.com>
* @since Wed Jan 09 2008 22:25:19 GMT+0200
* @version v 1.00 Wed Jan 09 2008 22:25:19 GMT+0200
*/
public function setTypeBiDi($type) {
$this->tb = $type;
}
/**
* Set character's embedding level within paragraph
*
* @access public
* @param integer [$level] Embedding level
* @author Efthimios Mavrogeorgiadis <hide@address.com>
* @since Wed Jan 09 2008 22:25:19 GMT+0200
* @version v 1.00 Wed Jan 09 2008 22:25:19 GMT+0200
*/
public function setLevel($level) {
$this->le = $level;
}
/**
* Set character's representation encoded in UTF-8
*
* @access public
* @param string [$letter] Unicode character encoded in UTF-8
* @author Efthimios Mavrogeorgiadis <hide@address.com>
* @since Wed Jan 09 2008 22:25:19 GMT+0200
* @version v 1.00 Wed Jan 09 2008 22:25:19 GMT+0200
*/
public function setLetter($letter) {
$this->lt = $letter;
}
/**
* Associate tag with character
*
* @access public
* @param string [$ta] Tag
* @author Efthimios Mavrogeorgiadis <hide@address.com>
* @since Wed Jan 09 2008 22:25:19 GMT+0200
* @version v 1.00 Wed Jan 09 2008 22:25:19 GMT+0200
*/
public function setTag($ta) {
$this->ta[] = $ta;
}
/**
* Add byte to UTF-8 encoded character
*
* @access public
* @param string [$letter] Byte of UTF-8 encoded character posing as a letter
* @author Efthimios Mavrogeorgiadis <hide@address.com>
* @since Wed Jan 09 2008 22:25:19 GMT+0200
* @version v 1.00 Wed Jan 09 2008 22:25:19 GMT+0200
*/
public function addLetter($letter) {
$this->lt.= $letter;
}
/**
* Get character's Unicode code value
*
* @access public
* @return integer Unicode code value
* @author Efthimios Mavrogeorgiadis <hide@address.com>
* @since Wed Jan 09 2008 22:25:19 GMT+0200
* @version v 1.00 Wed Jan 09 2008 22:25:19 GMT+0200
*/
public function getNumber() {
return $this->nu;
}
/**
* Get character's Unicode bidi category
*
* @access public
* @return string Unicode bidi category
* @author Efthimios Mavrogeorgiadis <hide@address.com>
* @since Wed Jan 09 2008 22:25:19 GMT+0200
* @version v 1.00 Wed Jan 09 2008 22:25:19 GMT+0200
*/
public function getTypeChar() {
return $this->ty;
}
/**
* Get character's bidi category after application of the Unicode Bidirectional Algorithm
*
* @access public
* @return string Unicode bidi category
* @author Efthimios Mavrogeorgiadis <hide@address.com>
* @since Wed Jan 09 2008 22:25:19 GMT+0200
* @version v 1.00 Wed Jan 09 2008 22:25:19 GMT+0200
*/
public function getTypeBiDi() {
return $this->tb;
}
/**
* Get character's embedding level within paragraph
*
* @access public
* @return integer Embedding level
* @author Efthimios Mavrogeorgiadis <hide@address.com>
* @since Wed Jan 09 2008 22:25:19 GMT+0200
* @version v 1.00 Wed Jan 09 2008 22:25:19 GMT+0200
*/
public function getLevel() {
return $this->le;
}
/**
* Get character's representation encoded in UTF-8
*
* @access public
* @return string Character encoded in UTF-8
* @author Efthimios Mavrogeorgiadis <hide@address.com>
* @since Wed Jan 09 2008 22:25:19 GMT+0200
* @version v 1.00 Wed Jan 09 2008 22:25:19 GMT+0200
*/
public function getLetter() {
return $this->lt;
}
/**
* Get tag
*
* @access public
* @param integer [$i] Array index
* @return string Tag
* @author Efthimios Mavrogeorgiadis <hide@address.com>
* @since Wed Jan 09 2008 22:25:19 GMT+0200
* @version v 1.00 Wed Jan 09 2008 22:25:19 GMT+0200
*/
public function getTag($i) {
return $this->ta[$i];
}
/**
* Get tag array size
*
* @access public
* @return integer Array size
* @author Efthimios Mavrogeorgiadis <hide@address.com>
* @since Wed Jan 09 2008 22:25:19 GMT+0200
* @version v 1.00 Wed Jan 09 2008 22:25:19 GMT+0200
*/
public function getTagSize() {
return count($this->ta);
}
/**
* Get character's mirror character
*
* @access public
* @return string UTF-8 encoded Unicode character
* @author Efthimios Mavrogeorgiadis <hide@address.com>
* @since Wed Jan 09 2008 22:25:19 GMT+0200
* @version v 1.00 Wed Jan 09 2008 22:25:19 GMT+0200
*/
public function getMirror() {
if ($this->getTypeBiDi() == 'R' and $this->checkConstant('M' . $this->getNumber())) {
return $this->encodeUTF8Num(constant('M' . $this->getNumber()));
}
else {
return $this->getLetter();
}
}
/**
* Get number of possible forms an Arabic letter can take
*
* @access public
* @return string Name of constant
* @author Efthimios Mavrogeorgiadis <hide@address.com>
* @since Wed Jan 09 2008 22:25:19 GMT+0200
* @version v 1.00 Wed Jan 09 2008 22:25:19 GMT+0200
*/
public function getArLetSize() {
return $this->checkConstant('A' . $this->getNumber() . 'S');
}
/**
* Get isolated form of Arabic letter
*
* @access public
* @return string Name of constant
* @author Efthimios Mavrogeorgiadis <hide@address.com>
* @since Wed Jan 09 2008 22:25:19 GMT+0200
* @version v 1.00 Wed Jan 09 2008 22:25:19 GMT+0200
*/
public function getArLetIsolated() {
return $this->checkConstant('A' . $this->getNumber() . 'I');
}
/**
* Get final form of Arabic letter
*
* @access public
* @return string Name of constant
* @author Efthimios Mavrogeorgiadis <hide@address.com>
* @since Wed Jan 09 2008 22:25:19 GMT+0200
* @version v 1.00 Wed Jan 09 2008 22:25:19 GMT+0200
*/
public function getArLetFinal() {
return $this->checkConstant('A' . $this->getNumber() . 'F');
}
/**
* Get initial form of Arabic letter
*
* @access public
* @return string Name of constant
* @author Efthimios Mavrogeorgiadis <hide@address.com>
* @since Wed Jan 09 2008 22:25:19 GMT+0200
* @version v 1.00 Wed Jan 09 2008 22:25:19 GMT+0200
*/
public function getArLetInitial() {
return $this->checkConstant('A' . $this->getNumber() . 'N');
}
/**
* Get medial form of Arabic letter
*
* @access public
* @return string Name of constant
* @author Efthimios Mavrogeorgiadis <hide@address.com>
* @since Wed Jan 09 2008 22:25:19 GMT+0200
* @version v 1.00 Wed Jan 09 2008 22:25:19 GMT+0200
*/
public function getArLetMedial() {
return $this->checkConstant('A' . $this->getNumber() . 'M');
}
/**
* Get a UTF-8 encoded character.
*
* Feed this function with a Unicode character's code value in decimal
* and you will get it encoded in UTF-8.
*
* @access public
* @param integer [$char_num] The decimal code value of a Unicode character
* @return string The character encoded in UTF-8
* @static
* @author Efthimios Mavrogeorgiadis <hide@address.com>
* @since Wed Jan 09 2008 02:35:24 GMT+0200
* @version v 1.00 Wed Jan 09 2008 02:35:24 GMT+0200
*/
public static function encodeUTF8Num($char_num) {
if ($char_num < 0) {
echo "\nCharacter number too small... Aborted!\n";
exit;
}
$a = 0;
$j = 0;
while ($j < 6) {
$top = pow(2, (7-$a+6*$j));
if ($char_num < $top) {
break;
} else {
$j++;
$a = $j+1;
}
}
if ($j == 6) {
echo "\nCharacter number too large... Aborted!\n";
exit;
} elseif ($j) {
$lt = '';
$k = $j;
$firstbits = 128;
while ($k) {
$firstbits+= 128/(pow(2, $k));
$k--;
}
$j++;
$mask = 255;
while ($j) {
$rshift = 6*$j-6;
$lt.= chr((($char_num>>$rshift) &$mask) |$firstbits);
$firstbits = 128;
$mask = 63;
$j--;
}
} else {
$lt = chr($char_num);
}
return $lt;
}
/**
* Attach zero-length (non-)joiner to Arabic letter
*
* @access public
* @param integer [$num] Unicode code value
* @author Efthimios Mavrogeorgiadis <hide@address.com>
* @since Wed Jan 09 2008 22:25:19 GMT+0200
* @version v 1.00 Wed Jan 09 2008 22:25:19 GMT+0200
*/
public function setJoiner($num) {
$this->ar = $num;
}
/**
* Retrieve zero-length (non-)joiner's effect on Arabic letter
*
* @access public
* @return integer Either 1 (non-joiner) or 4 (joiner)
* @author Efthimios Mavrogeorgiadis <hide@address.com>
* @since Wed Jan 09 2008 22:25:19 GMT+0200
* @version v 1.00 Wed Jan 09 2008 22:25:19 GMT+0200
*/
public function getJoiner() {
if ($this->ar == 8204) {
return 1;
} elseif ($this->ar == 8205) {
return 4;
} else {
return 0;
}
}
/**
* Set character's Unicode bidi category
*
* @access public
* @author Efthimios Mavrogeorgiadis <hide@address.com>
* @since Wed Jan 09 2008 22:25:19 GMT+0200
* @version v 1.00 Wed Jan 09 2008 22:25:19 GMT+0200
*/
private function setTypeChar() {
$this->ty = $this->checkConstant('U' . $this->nu);
}
/**
* Check whether constant exists and assign its value
*
* @access public
* @param string [$c] Name of constant
* @return integer|string Unicode bidi category, code value or number of forms an Arabic letter can take
* @author Efthimios Mavrogeorgiadis <hide@address.com>
* @since Wed Jan 09 2008 22:25:19 GMT+0200
* @version v 1.00 Wed Jan 09 2008 22:25:19 GMT+0200
*/
private function checkConstant($c) {
return (defined($c)) ? constant($c) : null;
}
}
?>