Location: PHPKode > scripts > PHPBiDi > phpbidi/phpbidi.php
<?php

/* vim: set expandtab tabstop=4 shiftwidth=4: */

/**
 * PHPBiDi is an implementation of the Unicode Bidirectional Algorithm in PHP.
 *
 * This class is meant to support external applications that need to handle
 * text whose characters are placed in logical order but some or all
 * characters should be displayed or printed in a right-to-left direction.
 * Plain UTF-8 text and HTML are supported.
 *
 * It parses the text in two stages. As soon as the object is constructed,
 * the text is parsed and tagged in accordance to Unicode's Bidirectional
 * Algorithm. Once the external application has decided where to break each
 * line, the line details are passed to function getLine(), which returns the
 * requested characters in the correct visual order.
 *
 * @package    PHPBiDi
 * @author     Efthimios Mavrogeorgiadis <hide@address.com>
 * @version    $Id: phpbidi.php, v 1.00 Wed Jan 09 2008 21:31:36 GMT+0200 Efthimios Mavrogeorgiadis $
 * @since      Wed Jan 09 2008 01:32:38 GMT+0200
 * @access     public
 * @uses       unicode.php to parse UTF-8 characters
 * @example    example.php  description
 * Displays <a href="http://www.gnu.org/licenses/old-licenses/gpl-2.0.html">GNU Public License</a>
 * @license    http://www.gnu.org/licenses/old-licenses/gpl-2.0.html GNU Public License
 * @copyright  Copyright (c) 2008, Efthimios Mavrogeorgiadis
 */

// Load required file
require_once (dirname(__FILE__) . '/unicode.php');
class PHPBiDi {

    /**
     * The input as it is transformed by the functions of the class.
     * @access private
     * @var string
     */
    private $text = '';

    /**
     * The offset of array $paragraphs, i.e. the number of the paragraph being processed.
     * @access private
     * @var integer
     */
    private $par = 0;

    /**
     * How many characters have been processed?
     * Resets to 0 whenever a new paragraph is loaded.
     * @access private
     * @var integer
     */
    private $char_counter = 0;

    /**
     * The paragraph embedding level (can be either 0 or 1).
     * @access private
     * @var integer
     */
    private $pel = 0;

    /**
     * Is the input HTML text?
     * @access private
     * @var boolean
     */
    private $html = false;

    /**
     * Does the input contain right-to-left characters?
     * @access private
     * @var boolean
     */
    private $rtl = false;

    /**
     * Does the input contain Arabic characters?
     * @access private
     * @var boolean
     */
    private $arabic = false;

    /**
     * The paragraphs comprising the input.
     * @access private
     * @var array
     */
    private $paragraphs = array();

    /**
     * The results of the first stage of parsing.
     * @access private
     * @var array
     */
    private $result = array();

    /**
     * The embedding level of each paragraph.
     * @access private
     * @var array
     */
    private $pels = array();

    /**
     * The tags that are stripped from each paragraph.
     * @access private
     * @var array
     */
    private $tags = array();

    /**
     * The characters found in each line.
     * @access private
     * @var array
     */
    private $line_chars = array();

    /**
     * The tags found in each line.
     * @access private
     * @var array
     */
    private $line_tags = array();

    /**
     * The tags that are open in each line.
     * @access private
     * @var array
     */
    private $open_tags = array();

    /**
     * The tags that split the paragraphs.
     * @access private
     * @var array
     */
    private $split_tags = array();

    /**
     * The characters of each paragraph.
     * @access private
     * @var array
     */
    private $text_array = array();

    /**
     * The embedded levels of each paragraph.
     * @access private
     * @var array
     */
    private $new_levels = array();

    /**
     * Constructor function
     *
     * It feeds on text and HTML (true or false).
     *
     * @access  public
     * @param   string [$text] The input
     * @param   boolean [$html] Is it HTML or not?
     * @author  Efthimios Mavrogeorgiadis <hide@address.com>
     * @since   Wed Jan 09 2008 02:35:24 GMT+0200
     * @version v 1.00 Wed Jan 09 2008 02:35:24 GMT+0200
     */
    public function PHPBidi($text, $html) {
        $this->html = $html;
        $this->setText($text);
        $this->checkLanguages();
        $this->getParagraphs();
        foreach($this->paragraphs as &$par) {
            $this->result[] = array();
            $last = count($this->result) -1;
            $this->pel = 0;
            $this->new_levels = array();
            $this->setText($par);
            $this->checkLanguages();
            if ($this->html) {
                $this->getTags();
            }
            $this->decodeUTF8();
            $this->getBiDiText();
            $this->parseBiDiText();
            $this->getText();
            if ($this->arabic) {
                $this->fixArabChars();
            }
            $this->getText();
            $this->pels[] = $this->pel;
            if ($this->html and count($this->tags[$this->par])) {
                $this->result[$last]['text'] = $this->fixTags();
            } else {
                $this->result[$last]['text'] = $this->text;
            }
            $this->result[$last]['newtext'] = $this->text;
            $this->result[$last]['rtl'] = $this->pel ? true : false;
            $this->result[$last]['levels'] = $this->new_levels;
            $this->changeParagraph();
        }
        $this->resetParagraph();
    }

    /**
     * Get your line in right-to-left order.
     *
     * @access  public
     * @param   integer [$start] The position in the paragraph where the new line starts (first character is 0). Only positive values are supported.
     * @param   integer [$length] The length of the new line.
     * @param   boolean [$tags] True if you need tags returned.
     * @param   boolean [$input] True if $start and $length are based on getResultText().
     * @return  string The new line in right-to-left order.
     * @author  Efthimios Mavrogeorgiadis <hide@address.com>
     * @since   Wed Jan 09 2008 02:35:24 GMT+0200
     * @version v 1.00 Wed Jan 09 2008 02:35:24 GMT+0200
     */
    public function getLine($start, $length, $tags = false, $input = false) {
        if ($input) {
            $txt = preg_replace('/<[^>]+>/', '', substr($this->getResultText(), $start, $length));
            $length = strlen($txt);
        } else {
            $txt = substr($this->getResultNewText(), $start, $length);
        }
        $this->char_counter+= $length;
        $this->getStrippedValues($txt, $start, $length);
        $ret = $this->getBiDiLine($start, $length);
        if ($this->html and $tags) {
            $ret = $this->restoreTags($start, $length);
        }
        return $ret;
    }

    /**
     * Change paragraph.
     *
     * Use it to notify PHPBiDi that you've finished parsing a paragraph.
     *
     * @access  public
     * @author  Efthimios Mavrogeorgiadis <hide@address.com>
     * @since   Wed Jan 09 2008 02:35:24 GMT+0200
     * @version v 1.00 Wed Jan 09 2008 02:35:24 GMT+0200
     */
    public function changeParagraph() {
        $this->par++;
        $this->char_counter = 0;
    }

    /**
     * Parse the first paragraph.
     *
     * Use it to notify PHPBiDi that you want to reset its paragraph counter
     * and start from the beginning.
     *
     * @access  public
     * @author  Efthimios Mavrogeorgiadis <hide@address.com>
     * @since   Wed Jan 09 2008 02:35:24 GMT+0200
     * @version v 1.00 Wed Jan 09 2008 02:35:24 GMT+0200
     */
    public function resetParagraph() {
        $this->par = 0;
        $this->char_counter = 0;
    }

    /**
     * Parse a specific paragraph.
     *
     * Use it to notify PHPBiDi that you want to move to a specific paragraph.
     *
     * @access  public
     * @param   integer [$par] The paragraph number to move to.
     * @author  Efthimios Mavrogeorgiadis <hide@address.com>
     * @since   Wed Jan 09 2008 02:35:24 GMT+0200
     * @version v 1.00 Wed Jan 09 2008 02:35:24 GMT+0200
     */
    public function goToParagraph($par) {
        $this->par = $par;
        $this->char_counter = 0;
    }

    /**
     * Get the number of current paragraph.
     *
     * Use it to notify PHPBiDi that you want to move to a specific paragraph.
     *
     * @access  public
     * @author  Efthimios Mavrogeorgiadis <hide@address.com>
     * @since   Wed Jan 09 2008 02:35:24 GMT+0200
     * @version v 1.00 Wed Jan 09 2008 02:35:24 GMT+0200
     */
    public function getParagraphNum() {
        return $this->par;
    }

    /**
     * How big is the $result array?
     *
     * Ask PHPBiDi to tell you how many paragraphs it has identified
     * in your input.
     *
     * @access  public
     * @return  integer The number of paragraphs in your text
     * @author  Efthimios Mavrogeorgiadis <hide@address.com>
     * @since   Wed Jan 09 2008 02:35:24 GMT+0200
     * @version v 1.00 Wed Jan 09 2008 02:35:24 GMT+0200
     */
    public function getResultNum() {
        return count($this->result);
    }

    /**
     * Get an original paragraph.
     *
     * Get the text of the paragraph you are processing as it was
     * in your original input.
     *
     * @access  public
     * @return  string A paragraph of your original input
     * @author  Efthimios Mavrogeorgiadis <hide@address.com>
     * @since   Wed Jan 09 2008 02:35:24 GMT+0200
     * @version v 1.00 Wed Jan 09 2008 02:35:24 GMT+0200
     */
    public function getResultText() {
        return $this->result[$this->par]['text'];
    }

    /**
     * Get a tagless paragraph.
     *
     * Get the text of the paragraph you are processing.
     * If it's HTML text, it's stripped of its tags. Otherwise, you get
     * your original input.
     *
     * @access  public
     * @return  string A tagless paragraph
     * @author  Efthimios Mavrogeorgiadis <hide@address.com>
     * @since   Wed Jan 09 2008 02:35:24 GMT+0200
     * @version v 1.00 Wed Jan 09 2008 02:35:24 GMT+0200
     */
    public function getResultNewText() {
        return $this->result[$this->par]['newtext'];
    }

    /**
     * Does this paragraph contain right-to-left characters?
     *
     * If it doesn't you may skip the second stage of processing.
     *
     * @access  public
     * @return  boolean True of right-to-left characters are found in the current paragraph.
     * @author  Efthimios Mavrogeorgiadis <hide@address.com>
     * @since   Wed Jan 09 2008 02:35:24 GMT+0200
     * @version v 1.00 Wed Jan 09 2008 02:35:24 GMT+0200
     */
    public function getResultRTL() {
        return $this->result[$this->par]['rtl'];
    }

    /**
     * Get all HTML tags found in input together with their offset
     * and strip the input tagless.
     *
     * @access  private
     * @author  Efthimios Mavrogeorgiadis <hide@address.com>
     * @since   Wed Jan 09 2008 02:35:24 GMT+0200
     * @version v 1.00 Wed Jan 09 2008 02:35:24 GMT+0200
     */
    private function getTags() {
        preg_match_all('/<[^<>]+>/', $this->text, $matches, PREG_OFFSET_CAPTURE);
        $diff = 0;
        $t = array();
        $open = array();
        for ($i = 0;$i < count($matches[0]);$i++) {
            $t[$i] = array();
            $t[$i]['tag'] = $matches[0][$i][0];
            $t[$i]['offset'] = $matches[0][$i][1]-$diff;
            $diff+= strlen($matches[0][$i][0]);
            if (preg_match('/^<\s*\//', $matches[0][$i][0])) {
                $t[$i]['offset']--;
            }
        }
        $this->text = preg_replace('/<[^>]+>/Uu', '', $this->text);
        $pattern = constant('UTF8PATTERN');
        for ($i = 0;$i < count($t);$i++) {
            $sub = substr($this->text, 0, $t[$i]['offset']);
            $previous = $t[$i]['offset'];
            preg_match_all($pattern, $sub, $matches);
            $t[$i]['offset'] = count($matches[0]);
        }
        $this->tags[] = $t;
    }

    /**
     * Wrapper function to identify characters.
     *
     * This function calls checkArabic() and checkText() to determine
     * whether Arabic or other right-to-left characters appear in the input.
     *
     * @access  private
     * @author  Efthimios Mavrogeorgiadis <hide@address.com>
     * @since   Wed Jan 09 2008 02:35:24 GMT+0200
     * @version v 1.00 Wed Jan 09 2008 02:35:24 GMT+0200
     */
    private function checkLanguages() {
        $this->arabic = $this->checkArabic();
        if ($this->arabic) {
            $this->rtl = true;
        } else {
            $this->rtl = $this->checkText();
        }
    }

    /**
     * Set the $text variable.
     *
     * @access  private
     * @param   string [$text] The input as it is processed by the functions
     * @author  Efthimios Mavrogeorgiadis <hide@address.com>
     * @since   Wed Jan 09 2008 02:35:24 GMT+0200
     * @version v 1.00 Wed Jan 09 2008 02:35:24 GMT+0200
     */
    private function setText($text) {
        $this->text = $text;
    }

    /**
     * Set the $text_array.
     *
     * The input split in characters with their Unicode characteristics
     * and bidirectional information.
     *
     * @access  private
     * @param   array [$text_array] The input split in characters
     * @author  Efthimios Mavrogeorgiadis <hide@address.com>
     * @since   Wed Jan 09 2008 02:35:24 GMT+0200
     * @version v 1.00 Wed Jan 09 2008 02:35:24 GMT+0200
     */
    private function setTextArray($text_array) {
        $this->text_array = $text_array;
    }

    /**
     * Get the input as an array of characters.
     *
     * @access  private
     * @return  array The input as an array of characters
     * @author  Efthimios Mavrogeorgiadis <hide@address.com>
     * @since   Wed Jan 09 2008 02:35:24 GMT+0200
     * @version v 1.00 Wed Jan 09 2008 02:35:24 GMT+0200
     */
    private function getTextArray() {
        return $this->text_array;
    }

    /**
     * Decode the UTF-8 encoded input into an array of Unicode character code values
     *
     * @access  private
     * @author  Efthimios Mavrogeorgiadis <hide@address.com>
     * @since   Wed Jan 09 2008 02:35:24 GMT+0200
     * @version v 1.00 Wed Jan 09 2008 02:35:24 GMT+0200
     */
    private function decodeUTF8() {
        $str = $this->UTF8Text2Array();
        $open = array();
        $t = $this->tags[$this->par];
        $tag = '';
        if (count($t)) {
            $tag = array_shift($t);
        }
        for ($i = 0;$i < count($str);$i++) {
            $char = new UniChar();
            
            // Attach tags to Unicode characters
            while ($this->html and is_array($tag) and $tag['offset'] == $i) {
                $closing = preg_match('/<\s*\//', $tag['tag']);
                $solo = preg_match('/<[^>]+\/\s*>/', $tag['tag']);
                if ($closing) {
                    if (count($open)) {
                        preg_match('/<\s*\/\s*([^\s>]+)/', $tag['tag'], $m);
                        $tm = $m[1][0];
                        $k = 0;
                        while ($k < count($open)) {
                            if (preg_match('/<\s*' . $tm . '(\s+[^>]+)*>/', $open[$k])) {
                                $char->setTag($open[$k]);
                                array_splice($open, $k, 1);
                                break;
                            }
                            $k++;
                        }
                    }
                }
                elseif ($solo) {
                    $char->setTag($tag['tag']);
                }
                else {
                    $char->setTag($tag['tag']);
                    array_unshift($open, $tag['tag']);
                }
                $tag = '';
                if (count($t)) {
                    $tag = array_shift($t);
                }
            }
            
            $length = strlen($str[$i]);
            if ($length > 6) {
                echo "\nCharacter out of range... Aborted!\n";
                exit;
            } elseif ($length == 1) {
                $char->setNumber(ord($str[$i]));
                $char->setLetter($str[$i]);
                $str[$i] = $char;
            } else {
                $string = $str[$i];
                $j = $length;
                $mask = (64/(pow(2, ($length-1)))) -1;
                while ($j) {
                    $pos = $length-$j;
                    $lshift = 6*$j-6;
                    $char_num = ord($string{$pos});
                    $char->setNumber($char->getNumber() |(($char_num&$mask) <<$lshift));
                    $mask = 63;
                    $j--;
                }
                $char->setLetter($str[$i]);
                $str[$i] = $char;
            }
            if ($char->getNumber() == 8204 or $char->getNumber() == 8205) {
                if ($i) {
                    $str[$i-1]->setJoiner($char->getNumber());
                }
            }
        }
        $this->setTextArray($str);
    }

    /**
     * Check whether input contains Arabic characters
     *
     * @access  private
     * @return  boolean Returns true if input contains Arabic characters
     * @author  Efthimios Mavrogeorgiadis <hide@address.com>
     * @since   Wed Jan 09 2008 02:35:24 GMT+0200
     * @version v 1.00 Wed Jan 09 2008 02:35:24 GMT+0200
     */
    private function checkArabic() {
        $pattern = '/(
			  \xD8[\x80-\x83\x8B\x8D\x9B\x9E\x9F\xA1-\xBA]	# AL
			| \xD9[\x80-\x8A\xAD-\xAF\xB1-\xBF]		# AL
			| \xDA[\x80-\xBF]				# AL
			| \xDB[\x80-\x95\x9D\xA5\xA6\xAE\xAF\xBA-\xBF]	# AL
			| \xDC[\x80-\x8D\x90\x92-\xAF]			# AL
			| \xDD[\x8D-\xAD]				# AL
			| \xDE[\x80-\xA5\xB1]				# AL
			| \xEF\xAD[\x90-\xBF]				# AL
			| \xEF\xAE[\x80-\xB1]				# AL
			| \xEF\xAF[\x93-\xBF]				# AL
			| \xEF[\xB0-\xB3][\x80-\xBF]			# AL
			| \xEF\xB4[\x80-\xBD]				# AL
			| \xEF\xB5[\x90-\xBF]				# AL
			| \xEF\xB6[\x80-\x8F\x92-\xBF]			# AL
			| \xEF\xB7[\x80-\x87\xB0-\xBC]			# AL
			| \xEF\xB9[\xB0-\xB4\xB6-\xBF]			# AL
			| \xEF\xBA[\x80-\xBF]				# AL
			| \xEF\xBB[\x80-\xBC]				# AL
			| \xD9[\xA0-\xA9\xAB\xAC]			# AN
			)/x';
        if (preg_match($pattern, $this->text)) {
            return true;
        } else {
            return false;
        }
    }

    /**
     * Check whether input contains other right-to-left characters apart from Arabic
     *
     * @access  private
     * @return  boolean Returns true if input contains right-to-left characters
     * @author  Efthimios Mavrogeorgiadis <hide@address.com>
     * @since   Wed Jan 09 2008 02:35:24 GMT+0200
     * @version v 1.00 Wed Jan 09 2008 02:35:24 GMT+0200
     */
    private function checkText() {
        $pattern = '/(
			  \xD6\xBE						# R
			| \xD7[\x80\x83\x86\x90-\xAA\xB0-\xB4]			# R
			| \xDF[\x80-\xAA\xB4\xB5\xBA]				# R
			| \xE2\x80\x8F						# R
			| \xEF\xAC[\x9D\x9F\xA0-\xA8\xAA-\xB6\xB8-\xBC\xBE]	# R
			| \xEF\xAD[\x80\x81\x83\x84\x86-\x8F]			# R
			| \xF0\x90\xA0[\x80-\x85\x88\x8A-\xB5\xB7\xB8\xBC\xBF]	# R
			| \xF0\x90\xA4[\x80-\x99]				# R
			| \xF0\x90\xA8[\x80\x90-\x93\x95-\x97\x99-\xB3]		# R
			| \xF0\x90\xA9[\x80-\x87\x90-\x98]			# R
			| \xE2\x80[\xAB\xAE]					# RLE & RLO
			)/x';
        if (preg_match($pattern, $this->text)) {
            return true;
        } else {
            return false;
        }
    }

    /**
     * Check whether input contains other any right-to-left character
     *
     * @access  private
     * @return  boolean Returns true if input contains any right-to-left character
     * @author  Efthimios Mavrogeorgiadis <hide@address.com>
     * @since   Wed Jan 09 2008 02:35:24 GMT+0200
     * @version v 1.00 Wed Jan 09 2008 02:35:24 GMT+0200
     */
    private function checkFull() {
        $pattern = '/(
			  \xD8[\x80-\x83\x8B\x8D\x9B\x9E\x9F\xA1-\xBA]		# AL
			| \xD9[\x80-\x8A\xAD-\xAF\xB1-\xBF]			# AL
			| \xDA[\x80-\xBF]					# AL
			| \xDB[\x80-\x95\x9D\xA5\xA6\xAE\xAF\xBA-\xBF]		# AL
			| \xDC[\x80-\x8D\x90\x92-\xAF]				# AL
			| \xDD[\x8D-\xAD]					# AL
			| \xDE[\x80-\xA5\xB1]					# AL
			| \xEF\xAD[\x90-\xBF]					# AL
			| \xEF\xAE[\x80-\xB1]					# AL
			| \xEF\xAF[\x93-\xBF]					# AL
			| \xEF[\xB0-\xB3][\x80-\xBF]				# AL
			| \xEF\xB4[\x80-\xBD]					# AL
			| \xEF\xB5[\x90-\xBF]					# AL
			| \xEF\xB6[\x80-\x8F\x92-\xBF]				# AL
			| \xEF\xB7[\x80-\x87\xB0-\xBC]				# AL
			| \xEF\xB9[\xB0-\xB4\xB6-\xBF]				# AL
			| \xEF\xBA[\x80-\xBF]					# AL
			| \xEF\xBB[\x80-\xBC]					# AL
			| \xD9[\xA0-\xA9\xAB\xAC]				# AN
			| \xD6\xBE						# R
			| \xD7[\x80\x83\x86\x90-\xAA\xB0-\xB4]			# R
			| \xDF[\x80-\xAA\xB4\xB5\xBA]				# R
			| \xE2\x80\x8F						# R
			| \xEF\xAC[\x9D\x9F\xA0-\xA8\xAA-\xB6\xB8-\xBC\xBE]	# R
			| \xEF\xAD[\x80\x81\x83\x84\x86-\x8F]			# R
			| \xF0\x90\xA0[\x80-\x85\x88\x8A-\xB5\xB7\xB8\xBC\xBF]	# R
			| \xF0\x90\xA4[\x80-\x99]				# R
			| \xF0\x90\xA8[\x80\x90-\x93\x95-\x97\x99-\xB3]		# R
			| \xF0\x90\xA9[\x80-\x87\x90-\x98]			# R
			| \xE2\x80[\xAB\xAE]					# RLE & RLO
			)/x';
        if (preg_match($pattern, $this->getResultNewText())) {
            return true;
        } else {
            return false;
        }
    }

    /**
     * Turns UTF-8 encoded input into an array of Unicode characters encoded in UTF-8
     *
     * @access  private
     * @return  array Unicode characters encoded in UTF-8
     * @author  Efthimios Mavrogeorgiadis <hide@address.com>
     * @since   Wed Jan 09 2008 02:35:24 GMT+0200
     * @version v 1.00 Wed Jan 09 2008 02:35:24 GMT+0200
     */
    private function UTF8Text2Array() {
        $pattern = constant('UTF8PATTERN');
        preg_match_all($pattern, $this->text, $matches);
        if (!count($matches[0]) and strlen($this->text)) {
            echo "\nInvalid text... Aborted!\n";
            exit;
        }
        return $matches[0];
    }

    // P1. Split the text into separate paragraphs. A paragraph separator is kept with the previous paragraph.
    // Within each paragraph, apply all the other rules of this algorithm.

    
    /**
     * Split input into paragraphs
     *
     * Rule P1 of the Unicode Bidirectional Algorithm:
     * Split the text into separate paragraphs.
     * A paragraph separator is kept with the previous paragraph.
     * Within each paragraph, apply all the other rules of this algorithm.
     *
     * @access  private
     * @author  Efthimios Mavrogeorgiadis <hide@address.com>
     * @since   Wed Jan 09 2008 02:35:24 GMT+0200
     * @version v 1.00 Wed Jan 09 2008 02:35:24 GMT+0200
     */
    private function getParagraphs() {
        $this->checkEntities();
        if ($this->html) {
            $this->text = preg_replace('/\r*\n/', ' ', $this->text);
            while (preg_match('/(<\s*(\w+?)\s*[^>]*(\s*dir\s*=\s*["\']\s*(rtl|ltr)\s*["\'])[^>]*>)/Uu', $this->text, $matches, PREG_OFFSET_CAPTURE)) {
                $start = $matches[0][1];
                $tag = $matches[2][0];
                $newtag = str_replace($matches[3][0], '', $matches[0][0]);
                $this->text = substr($this->text, 0, $matches[0][1]) . $newtag . $this->changeDir($matches[4][0]) . substr($this->text, ($matches[0][1]+strlen($matches[0][0])));
                $pat = '/<\s*\/*\s*' . $tag . '(\s*>|\s+[^>]*>)/Uu';
                preg_match_all($pat, $this->text, $matches, PREG_OFFSET_CAPTURE);
                $counter = 0;
                for ($i = 0;$i < count($matches);$i++) {
                    if (preg_match('/<\s*\//', $matches[0][$i][0])) {
                        $counter--;
                    } else {
                        $counter++;
                    }
                    if (!$counter) {
                        $start = $matches[0][$i][1];
                    }
                }
                $this->text = substr($this->text, 0, $start) . chr(226) . chr(128) . chr(172) . substr($this->text, $start);
            }
            preg_match_all('/(<\s*(p|div|br|td|th|hr|h\d|legend|input|ol|ul|pre)\s*[^>]*>)/Uu', $this->text, $this->split_tags);
            $this->paragraphs = preg_split('/(<\s*(p|div|br|td|th|hr|h\d|legend|input|ol|ul|pre)\s*[^>]*>)/Uu', $this->text);
        } else {
            while (preg_match('/\n\n\n/', $this->text)) {
                $this->text = preg_replace('/(\r*\n\r*\n)\r*\n/', '\\1', $this->text);
            }
            $this->paragraphs = preg_split('/\r*\n\r*\n/', $this->text);
        }
    }

    /**
     * Turn HTML Entities into Unicode character code values
     *
     * @access  private
     * @author  Efthimios Mavrogeorgiadis <hide@address.com>
     * @since   Wed Jan 09 2008 02:35:24 GMT+0200
     * @version v 1.00 Wed Jan 09 2008 02:35:24 GMT+0200
     */
    private function checkEntities() {
        $this->text = preg_replace("/&(\w+);/e", "PHPBiDi::decodeEntity('\\1')", $this->text);
        $this->text = preg_replace("/&#(\d+);/e", "UniChar::encodeUTF8Num('\\1')", $this->text);
    }

    /**
     * Decode an HTML Entity
     *
     * @access  private
     * @param   string [$entity] The name of the HTML Entity
     * @return  string The character encoded in UTF-8
     * @author  Efthimios Mavrogeorgiadis <hide@address.com>
     * @since   Wed Jan 09 2008 02:35:24 GMT+0200
     * @version v 1.00 Wed Jan 09 2008 02:35:24 GMT+0200
     */
    private function decodeEntity($entity) {
        $num = ord(html_entity_decode("&" . $entity . ";"));
        return UniChar::encodeUTF8Num($num);
    }

    /**
     * Set the paragraph embedding level
     *
     * Rule P2 of the Unicode Bidirectional Algorithm:
     * In each paragraph, find the first character of type L, AL, or R.
     *
     * Rule P3 of the Unicode Bidirectional Algorithm:
     * If a character is found in P2 and it is of type AL or R, then set
     * the paragraph embedding level to one; otherwise, set it to zero.
     *
     * @access  private
     * @author  Efthimios Mavrogeorgiadis <hide@address.com>
     * @since   Wed Jan 09 2008 02:35:24 GMT+0200
     * @version v 1.00 Wed Jan 09 2008 02:35:24 GMT+0200
     */
    private function getBiDiText() {
        $ta = &$this->getTextArray();
        $i = 0;
        while ($i < count($ta)) {
            $type = $ta[$i]->getTypeChar();
            if ($type == 'L') {
                $this->pel = 0;
                break;
            } elseif ($type == 'AL' or $type == 'R') {
                $this->pel = 1;
                break;
            }
            $i++;
        }
    }

    /**
     * Complete the first stage of parsing
     *
     * Apply rules X1-X10, W1-W7, N1-N2 and I1-I2
     * of the Unicode Bidirectional Algorithm
     *
     * @author  Efthimios Mavrogeorgiadis <hide@address.com>
     * @since   Wed Jan 09 2008 02:35:24 GMT+0200
     * @version v 1.00 Wed Jan 09 2008 02:35:24 GMT+0200
     */
    private function parseBiDiText() {
        $neutral = array('B', 'S', 'WS', 'ON');
        $lrm = 8206;
        $rlm = 8207;
        $lre = 8234;
        $rle = 8235;
        $pdf = 8236;
        $lro = 8237;
        $rlo = 8238;
        $ta = &$this->getTextArray();

        // X1. Begin by setting the current embedding level to the paragraph embedding level.
        // Set the directional override status to neutral. Process each character iteratively, applying rules X2 through X9.
        // Only embedding levels from 0 to 61 are valid in this phase.
        $cel = $this->pel;
        $dos = 'N';
        $remember = array();
        $sor = $this->pel%2 ? 'R' : 'L';
        $levels = array(array('level' => $cel, 'sor' => $sor, 'eor' => '', 'chars' => array()));
        $current_level = &$levels[count($levels) -1];
        $i = 0;
        while ($i < count($ta)) {
            $char = &$ta[$i];

            // X2. With each RLE, compute the least greater odd embedding level.
            // a. If this new level would be valid, then this embedding code is valid. Remember (push) the current embedding level and override status.
            //Reset the current level to this new level, and reset the override status to neutral.
            // b. If the new level would not be valid, then this code is invalid. Do not change the current level or override status.
            if ($char->getNumber() == $rle) {
                $next_level = $cel+($cel%2) +1;
                if ($next_level < 62) {
                    $remember[] = array('num' => $rle, 'cel' => $cel, 'dos' => $dos);
                    $cel = $next_level;
                    $sor = $cel%2 ? 'R' : 'L';
                    $current_level['eor'] = $sor;
                    $levels[] = array('level' => $cel, 'sor' => '', 'eor' => '', 'chars' => array());
                    $current_level = &$levels[count($levels) -1];
                    $current_level['sor'] = $sor;
                    $dos = 'N';
                }
            }

            // X3. With each LRE, compute the least greater even embedding level.
            // a. If this new level would be valid, then this embedding code is valid. Remember (push) the current embedding level and override status.
            // Reset the current level to this new level, and reset the override status to neutral.
            // b. If the new level would not be valid, then this code is invalid. Do not change the current level or override status.
            elseif ($char->getNumber() == $lre) {
                $next_level = $cel+2-($cel%2);
                if ($next_level < 62) {
                    $remember[] = array('num' => $lre, 'cel' => $cel, 'dos' => $dos);
                    $cel = $next_level;
                    $sor = $cel%2 ? 'R' : 'L';
                    $current_level['eor'] = $sor;
                    $levels[] = array('level' => $cel, 'sor' => '', 'eor' => '', 'chars' => array());
                    $current_level = &$levels[count($levels) -1];
                    $current_level['sor'] = $sor;
                    $dos = 'N';
                }
            }

            // X4. With each RLO, compute the least greater odd embedding level.
            // a. If this new level would be valid, then this embedding code is valid. Remember (push) the current embedding level and override status.
            // Reset the current level to this new level, and reset the override status to right-to-left.
            elseif ($char->getNumber() == $rlo) {
                $next_level = $cel+($cel%2) +1;
                if ($next_level < 62) {
                    $remember[] = array('num' => $rlo, 'cel' => $cel, 'dos' => $dos);
                    $cel = $next_level;
                    $sor = $cel%2 ? 'R' : 'L';
                    $current_level['eor'] = $sor;
                    $levels[] = array('level' => $cel, 'sor' => '', 'eor' => '', 'chars' => array());
                    $current_level = &$levels[count($levels) -1];
                    $current_level['sor'] = $sor;
                    $dos = 'R';
                }
            }

            // X5. With each LRO, compute the least greater even embedding level.
            // a. If this new level would be valid, then this embedding code is valid. Remember (push) the current embedding level and override status.
            // Reset the current level to this new level, and reset the override status to left-to-right.
            // b. If the new level would not be valid, then this code is invalid. Do not change the current level or override status.
            elseif ($char->getNumber() == $lro) {
                $next_level = $cel+2-($cel%2);
                if ($next_level < 62) {
                    $remember[] = array('num' => $lro, 'cel' => $cel, 'dos' => $dos);
                    $cel = $next_level;
                    $sor = $cel%2 ? 'R' : 'L';
                    $current_level['eor'] = $sor;
                    $levels[] = array('level' => $cel, 'sor' => '', 'eor' => '', 'chars' => array());
                    $current_level = &$levels[count($levels) -1];
                    $current_level['sor'] = $sor;
                    $dos = 'L';
                }
            }

            // X7. With each PDF, determine the matching embedding or override code.
            // If there was a valid matching code, restore (pop) the last remembered (pushed) embedding level and directional override.
            elseif ($char->getNumber() == $pdf) {
                if (count($remember)) {
                    $last = count($remember) -1;
                    if ($remember[$last]['num'] == $rle or $remember[$last]['num'] == $lre or $remember[$last]['num'] == $rlo or $remember[$last]['num'] == $lro) {
                        $match = array_pop($remember);
                        $sor = ($cel > $match['cel'] ? $cel : $match['cel']) %2 ? 'R' : 'L';
                        $cel = $match['cel'];
                        $current_level['eor'] = $sor;
                        $levels[] = array('level' => $cel, 'sor' => '', 'eor' => '', 'chars' => array());
                        $current_level = &$levels[count($levels) -1];
                        $current_level['sor'] = $sor;
                        $dos = $match['dos'];
                    }
                }
            }

            // X6. For all types besides RLE, LRE, RLO, LRO, and PDF:
            // a. Set the level of the current character to the current embedding level.
            // b. Whenever the directional override status is not neutral, reset the current character type to the directional override status.
            elseif ($char->getTypeChar() != 'BN') {
                $char->setLevel($cel);
                $current_level['chars'][] = $char;
                if ($dos != 'N') {
                    $char->setTypeBiDi($dos);
                }
            }

            // X8. All explicit directional embeddings and overrides are completely terminated at the end of each paragraph.
            // Paragraph separators are not included in the embedding.
            $i++;
        }
        $last = &$levels[count($levels) -1];
        $last['eor'] = ($last['level'] > $this->pel ? $last['level'] : $this->pel) %2 ? 'R' : 'L';

        // X9. Remove all RLE, LRE, RLO, LRO, PDF, and BN codes.
        // Note that an implementation does not have to actually remove the codes; it just has to behave as though the codes were not present for the remainder of the algorithm.
        // Conformance does not require any particular placement of these codes as long as all other characters are ordered correctly.
        // The zero width joiner and non-joiner affect the shaping of the adjacent characters?hose that are adjacent in the original backing-store order,
        // even though those characters may end up being rearranged to be non-adjacent by the Bidirectional Algorithm.
        // X10. The remaining rules are applied to each run of characters at the same level.
        // For each run, determine the start-of-level-run (sor) and end-of-level-run (eor) type, either L or R.
        // This depends on the higher of the two levels on either side of the boundary
        // (at the start or end of the paragraph, the level of the ?ther?run is the base embedding level).
        // If the higher level is odd, the type is R; otherwise, it is L.
        $i = 0;
        while ($i < count($levels)) {
            $current = &$levels[$i];
            $chars = &$current['chars'];
            $num_of_chars = count($chars);

            // W1. Examine each nonspacing mark (NSM) in the level run, and change the type of the NSM to the type of the previous character.
            // If the NSM is at the start of the level run, it will get the type of sor.
            // W2. Search backward from each instance of a European number until the first strong type (R, L, AL, or sor) is found.
            // If an AL is found, change the type of the European number to Arabic number.
            $j = 0;
            while ($j < $num_of_chars) {
                if ($chars[$j]->getTypeBiDi() == 'NSM' and $j == 0) {
                    $chars[$j]->setTypeBiDi($current['sor']);
                } elseif ($chars[$j]->getTypeBiDi() == 'NSM' and $j > 0) {
                    $chars[$j]->setTypeBiDi($chars[$j-1]->getTypeBiDi());
                }
                if ($chars[$j]->getTypeBiDi() == 'EN' and $j > 0) {
                    $k = $j;
                    while ($k) {
                        $k--;
                        if ($chars[$k]->getTypeBiDi() == 'R' or $chars[$k]->getTypeBiDi() == 'L') {
                            break;
                        } elseif ($chars[$k]->getTypeBiDi() == 'AL') {
                            $chars[$j]->setTypeBiDi('AN');
                            break;
                        }
                    }
                }
                $j++;
            }

            // W3. Change all ALs to R.
            // W4. A single European separator between two European numbers changes to a European number.
            // A single common separator between two numbers of the same type changes to that type.
            // W5. A sequence of European terminators adjacent to European numbers changes to all European numbers.
            // W6. Otherwise, separators and terminators change to Other Neutral.
            $j = 0;
            while ($j < $num_of_chars) {
                if ($chars[$j]->getTypeBiDi() == 'AL') {
                    $chars[$j]->setTypeBiDi('R');
                } elseif ($chars[$j]->getTypeBiDi() == 'EN' and $j < $num_of_chars-2) {
                    if (($chars[$j+1]->getTypeBiDi() == 'ES' or $chars[$j+1]->getTypeBiDi() == 'CS') and $chars[$j+2]->getTypeBiDi() == 'EN') {
                        $chars[$j+1]->setTypeBiDi('EN');
                    } elseif ($chars[$j+1]->getTypeBiDi() == 'ES' or $chars[$j+1]->getTypeBiDi() == 'CS') {
                        $chars[$j+1]->setTypeBiDi('ON');
                    }
                } elseif ($chars[$j]->getTypeBiDi() == 'AN' and $j < $num_of_chars-2) {
                    if ($chars[$j+1]->getTypeBiDi() == 'CS' and $chars[$j+2]->getTypeBiDi() == 'AN') {
                        $chars[$j+1]->setTypeBiDi('AN');
                    } elseif ($chars[$j+1]->getTypeBiDi() == 'CS') {
                        $chars[$j+1]->setTypeBiDi('ON');
                    }
                } elseif ($chars[$j]->getTypeBiDi() == 'ET' and $num_of_chars > 1) {
                    if ($j == $num_of_chars-1) {
                        if ($chars[$j-1]->getTypeBiDi() == 'EN') {
                            $chars[$j]->setTypeBiDi('EN');
                        } else {
                            $chars[$j]->setTypeBiDi('ON');
                        }
                    } elseif ($j == 0) {
                        if ($chars[$j+1]->getTypeBiDi() == 'EN') {
                            $chars[$j]->setTypeBiDi('EN');
                        } elseif ($chars[$j+1]->getTypeBiDi() == 'ET') {
                            $k = $j+1;
                            while ($chars[$k]->getTypeBiDi() == 'ET') {
                                $k++;
                                if ($k < $num_of_chars and $chars[$k]->getTypeBiDi() == 'EN') {
                                    for ($m = $k-1;$m < $j;$m--) {
                                        $chars[$m]->setTypeBiDi('EN');
                                    }
                                } elseif ($k < $num_of_chars and $chars[$k]->getTypeBiDi() == 'ET') {
                                    continue;
                                } else {
                                    for ($m = $k-1;$m < $j;$m--) {
                                        $chars[$m]->setTypeBiDi('ON');
                                    }
                                }
                            }
                        } else {
                            $chars[$j]->setTypeBiDi('ON');
                        }
                    } else {
                        if ($chars[$j-1]->getTypeBiDi() == 'EN' or $chars[$j+1]->getTypeBiDi() == 'EN') {
                            $chars[$j]->setTypeBiDi('EN');
                        } elseif ($chars[$j+1]->getTypeBiDi() == 'ET') {
                            $k = $j+1;
                            while ($chars[$k]->getTypeBiDi() == 'ET') {
                                $k++;
                                if ($k < $num_of_chars and $chars[$k]->getTypeBiDi() == 'EN') {
                                    for ($m = $k-1;$m < $j;$m--) {
                                        $chars[$m]->setTypeBiDi('EN');
                                    }
                                } elseif ($k < $num_of_chars and $chars[$k]->getTypeBiDi() == 'ET') {
                                    continue;
                                } else {
                                    for ($m = $k-1;$m < $j;$m--) {
                                        $chars[$m]->setTypeBiDi('ON');
                                    }
                                }
                            }
                        } else {
                            $chars[$j]->setTypeBiDi('ON');
                        }
                    }
                } elseif ($chars[$j]->getTypeBiDi() == 'ES' or $chars[$j]->getTypeBiDi() == 'CS' or $chars[$j]->getTypeBiDi() == 'ET') {
                    $chars[$j]->setTypeBiDi('ON');
                }
                $j++;
            }

            // W7. Search backward from each instance of a European number until the first strong type (R, L, or sor) is found.
            // If an L is found, then change the type of the European number to L.
            $j = 0;
            while ($j < $num_of_chars) {
                if ($chars[$j]->getTypeBiDi() == 'EN' and $j > 0) {
                    $k = $j;
                    while ($k) {
                        $k--;
                        if ($chars[$k]->getTypeBiDi() == 'R') {
                            break;
                        } elseif ($chars[$k]->getTypeBiDi() == 'L' or ($k == 0 and $current['sor'] == 'L')) {
                            $chars[$j]->setTypeBiDi('L');
                            break;
                        }
                    }
                } elseif ($chars[$j]->getTypeBiDi() == 'EN' and $current['sor'] == 'L') {
                    $chars[$j]->setTypeBiDi('L');
                }
                $j++;
            }

            // N1. A sequence of neutrals takes the direction of the surrounding strong text if the text on both sides has the same direction.
            // European and Arabic numbers act as if they were R in terms of their influence on neutrals.
            // Start-of-level-run (sor) and end-of-level-run (eor) are used at level run boundaries.
            // N2. Any remaining neutrals take the embedding direction.
            $j = 0;
            while ($j < $num_of_chars) {
                if (in_array($chars[$j]->getTypeBiDi(), $neutral)) {
                    if ($num_of_chars == 1) {
                        $chars[$j]->setTypeBiDi($current['level']%2 ? 'R' : 'L');
                    } else {
                        $start_char = $j;
                        if ($j == 0) {
                            $first_char = $current['sor'];
                        } else {
                            $first_char = $chars[$j-1]->getTypeBiDi();
                        }
                        while (1) {
                            $j++;
                            $end_char = $j;
                            if ($j == $num_of_chars) {
                                $last_char = $current['eor'];
                                break;
                            } elseif (in_array($chars[$j]->getTypeBiDi(), $neutral)) {
                                continue;
                            } else {
                                $last_char = $chars[$j]->getTypeBiDi();
                                break;
                            }
                        }
                        $right_context = array('AN', 'EN', 'R');
                        if ($first_char == 'L' and $last_char == 'L') {
                            for ($k = $start_char;$k < $end_char;$k++) {
                                $chars[$k]->setTypeBiDi('L');
                            }
                        } elseif (in_array($first_char, $right_context) and in_array($last_char, $right_context)) {
                            for ($k = $start_char;$k < $end_char;$k++) {
                                $chars[$k]->setTypeBiDi('R');
                            }
                        } else {
                            for ($k = $start_char;$k < $end_char;$k++) {
                                $chars[$k]->setTypeBiDi($current['level']%2 ? 'R' : 'L');
                            }
                        }
                    }
                }
                $j++;
            }
            $i++;
        }

        // I1. For all characters with an even (left-to-right) embedding direction, those of type R go up one level and those of type AN or EN go up two levels.
        // I2. For all characters with an odd (right-to-left) embedding direction, those of type L, EN or AN go up one level.
        $i = 0;
        $j = 0;
        $this->new_levels = array();
        $x = 0;
        while ($i < count($levels)) {
            $current = &$levels[$i];
            if (!count($current['chars'])) {
                $i++;
                continue;
            }
            $odd = $current['level']%2;
            $this->new_levels[] = array('level' => '', 'chars' => array());
            $last_level = &$this->new_levels[count($this->new_levels) -1];
            $first_char = &$current['chars'][0];
            $previous_bidi = $first_char->getTypeBiDi();
            $previous_level = $first_char->getLevel();
            if ($odd) {
                if ($previous_bidi == 'L' or $previous_bidi == 'AN' or $previous_bidi == 'EN') {
                    $previous_level = $first_char->getLevel() +1;
                    $first_char->setLevel($previous_level);
                }
            } else {
                if ($previous_bidi == 'R') {
                    $previous_level = $first_char->getLevel() +1;
                    $first_char->setLevel($previous_level);
                } elseif ($previous_bidi == 'AN' or $previous_bidi == 'EN') {
                    $previous_level = $first_char->getLevel() +2;
                    $first_char->setLevel($previous_level);
                }
            }
            $last_level['level'] = $first_char->getLevel();
            $last_level['chars'][] = $first_char;
            $k = 1;
            while ($k < count($current['chars'])) {
                if (isset($char)) {
                    unset($char);
                }
                $char = &$current['chars'][$k];
                if ($previous_bidi == $char->getTypeBiDi()) {
                    $char->setLevel($previous_level);
                    $last_level['chars'][] = $char;
                } else {
                    $this->new_levels[] = array('level' => '', 'chars' => array());
                    if (isset($last_level)) {
                        unset($last_level);
                    }
                    $last_level = &$this->new_levels[count($this->new_levels) -1];
                    $first_char = &$current['chars'][$k];
                    $previous_bidi = $char->getTypeBiDi();
                    $previous_level = $first_char->getLevel();
                    if ($odd) {
                        if ($previous_bidi == 'L' or $previous_bidi == 'AN' or $previous_bidi == 'EN') {
                            $previous_level = $char->getLevel() +1;
                            $char->setLevel($previous_level);
                        }
                    } else {
                        if ($previous_bidi == 'R') {
                            $previous_level = $char->getLevel() +1;
                            $char->setLevel($previous_level);
                        } elseif ($previous_bidi == 'AN' or $previous_bidi == 'EN') {
                            $previous_level = $char->getLevel() +2;
                            $char->setLevel($previous_level);
                        }
                    }
                    $last_level['level'] = $char->getLevel();
                    $last_level['chars'][] = $char;
                }
                $k++;
            }
            $i++;
        }
        
        if ($this->html) {
            $open = array();
            for ($i = 0;$i < count($this->new_levels);$i++) {
                $current = &$this->new_levels[$i];
                $chars = &$current['chars'];
                $last = count($chars) - 1;
                for ($j = 0;$j <= $last;$j++) {
                    $char = &$chars[$j];
                    $ts = $char->getTagSize();
                    for ($k = 0;$k < $ts;$k++) {
                        $cur_tag = $char->getTag($k);
                        $op = 1;
                        $l = 0;
                        while ($l < count($open)) {
                            if ($open[$l] == $cur_tag) {
                                $op = 0;
                                array_splice($open, $l, 1);
                                if (!$j) {
                                    $char->setTag($cur_tag);
                                }
                                break;
                            }
                            $l++;
                        }
                        if ($op) {
                            array_unshift($open, $cur_tag);
                        }
                    }
                    if (!$j) {
                        $l = 0;
                        while ($l < count($open)) {
                            $char->setTag($open[$l]);
                            $l++;
                        }
                    }
                }
                $char = &$chars[$last];
                $l = 0;
                while ($l < count($open)) {
                    $char->setTag($open[$l]);
                    $l++;
                }
            }
        }
    }

    /**
     * Complete the second stage of parsing
     *
     * Apply rules L1-L4 of the Unicode Bidirectional Algorithm
     *
     * @access  private
     * @param   integer [$start] Offset where line begins in paragraph
     * @param   integer [$length] Length of line
     * @return  string The line in visual order
     * @author  Efthimios Mavrogeorgiadis <hide@address.com>
     * @since   Wed Jan 09 2008 02:35:24 GMT+0200
     * @version v 1.00 Wed Jan 09 2008 02:35:24 GMT+0200
     */
    private function getBiDiLine($start, $length) {
        $orig_start = $start;
        $lev = &$this->result[$this->par]['levels'];
        $temp = array();
        if (!$length) {
            $length = -1;
        }
        for ($i = 0;$i < count($lev);$i++) {
            $l = $lev[$i]['chars'];
            if ($start >= count($l)) {
                $start-= count($l);
                continue;
            } else {
                if ($start) {
                    $j = $start;
                    $start = 0;
                } else {
                    $j = 0;
                }
                while ($length and $j < count($l)) {
                    $temp[] = $l[$j];
                    $length--;
                    $j++;
                }
            }
        }

        // L1. On each line, reset the embedding level of the following characters to the paragraph embedding level:
        // 1. Segment separators,
        // 2. Paragraph separators,
        // 3. Any sequence of whitespace characters preceding a segment separator or paragraph separator, and
        // 4. Any sequence of white space characters at the end of the line.
        // The types of characters used here are the original types, not those modified by the previous phase.
        // Because a Paragraph Separator breaks lines, there will be at most one per line, at the end of that line.
        $i = 0;
        $open = array();
        if (count($this->line_tags)) {
            foreach ($this->line_tags as $tag) {
                $temp[0]->setTag($tag);
            }
            $this->line_tags = array();
        }
        while ($i < count($temp)) {
            $char = &$temp[$i];
            if ($char->getTypeChar() == 'B' or $char->getTypeChar() == 'S') {
                $char->setLevel($this->pels[$this->par]);
            } elseif ($char->getTypeChar() == 'WS') {
                $j = $i+1;
                while (1) {
                    if ($j == count($temp) or $temp[$j]->getTypeChar() == 'B' or $temp[$j]->getTypeChar() == 'S') {
                        break;
                    } elseif ($temp[$j]->getTypeChar() == 'WS') {
                        $j++;
                        continue;
                    } else {
                        $j = 0;
                        break;
                    }
                }
                if ($j) {
                    $k = $i;
                    while ($k < $j) {
                        $temp[$k]->setLevel($this->pels[$this->par]);
                        $k++;
                    }
                }
            }
            $ts = $char->getTagSize();
            if ($ts) {
                $op = 1;
                for ($j = 0;$j < $ts;$j++) {
                    $cur_tag = $char->getTag($j);
                    $k = 0;
                    while ($k < count($open)) {
                        if ($open[$k] == $cur_tag) {
                            $op = 0;
                            array_splice($open, $k, 1);
                            break;
                        }
                        $k++;
                    }
                    if ($op) {
                        array_unshift($open, $cur_tag);
                    }
                }
            }
            $i++;
        }
        if (count($open)) {
            foreach ($open as $tag) {
                $temp[count($temp)-1]->setTag($tag);
            }
        }
        $this->line_tags = $open;

        // L2. From the highest level found in the text to the lowest odd level on each line, including intermediate levels not actually present in the text,
        // reverse any contiguous sequence of characters that are at that level or higher.
        $levels = array();
        $highest = 0;
        $lowest = 1000;
        $previous = -1;
        $i = 0;
        while ($i < count($temp)) {
            $level = $temp[$i]->getLevel();
            if ($previous != $level) {
                $levels[] = array('level' => $level, 'chars' => array($temp[$i]));
                $highest = $highest >= $level ? $highest : $level;
                if ($level%2) {
                    $lowest = $lowest <= $level ? $lowest : $level;
                }
                $previous = $level;
            } else {
                $levels[count($levels) -1]['chars'][] = $temp[$i];
            }
            $i++;
        }
        while ($highest >= $lowest) {
            $rev = array();
            $rev = array(array('action' => '0', 'chars' => array()));
            for ($i = 0;$i < count($temp);$i++) {
                $last = count($rev) -1;
                if ($temp[$i]->getLevel() >= $highest) {
                    if ($rev[$last]['action']) {
                        $rev[$last]['chars'][] = $temp[$i];
                    } else {
                        $rev[] = array('action' => '1', 'chars' => array($temp[$i]));
                    }
                } else {
                    if ($rev[$last]['action']) {
                        $rev[] = array('action' => '0', 'chars' => array($temp[$i]));
                    } else {
                        $rev[$last]['chars'][] = $temp[$i];
                    }
                }
            }
            $temp = array();
            for ($i = 0;$i < count($rev);$i++) {
                if ($rev[$i]['action']) {
                    $rev[$i]['chars'] = array_reverse($rev[$i]['chars']);
                }
                $temp = array_merge($temp, $rev[$i]['chars']);
            }
            $highest--;
        }
        
        // L3. Combining marks applied to a right-to-left base character will at this point precede their base character.
        // If the rendering engine expects them to follow the base characters in the final display process, then the ordering of the marks and the base character must be reversed.
        // L4. A character is depicted by a mirrored glyph if and only if (a) the resolved directionality of that character is R,
        // and (b) the Bidi_Mirrored property value of that character is true.
        $ret = '';
        $i = 0;
        while ($i < count($temp)) {
            $ret.= $temp[$i]->getMirror();
            $i++;
        }
        $this->line_chars = $temp;
        return $ret;
    }

    /**
     * Create Arabic ligatures
     *
     * @access  private
     * @author  Efthimios Mavrogeorgiadis <hide@address.com>
     * @since   Wed Jan 09 2008 02:35:24 GMT+0200
     * @version v 1.00 Wed Jan 09 2008 02:35:24 GMT+0200
     */
    private function fixArabChars() {
        $levels = &$this->new_levels;
        $i = 0;
        while ($i < count($levels)) {
            if (!($levels[$i]['level']%2)) {
                $i++;
                continue;
            }
            $chars = &$levels[$i]['chars'];
            $arabic = array(array());
            $chars_j = array(array());
            $last_word = &$arabic[count($arabic) -1];
            $last_j = &$chars_j[count($chars_j) -1];
            $j = 0;
            while ($j < count($chars)) {
                if (($chars[$j]->getNumber() > 1535 and $chars[$j]->getNumber() < 1792 and $chars[$j]->getTypeChar() == 'AL') or $chars[$j]->getNumber() == 8204 or $chars[$j]->getNumber() == 8205) {
                    $last_word[] = &$chars[$j];
                    $last_j[] = $j;
                } else {
                    if (count($last_word)) {
                        $arabic[] = array();
                        $chars_j[] = array();
                        $last_word = &$arabic[count($arabic) -1];
                        $last_j = &$chars_j[count($chars_j) -1];
                    }
                }
                $j++;
            }
            if (!count($last_word)) {
                unset($last_word);
                unset($last_j);
                array_pop($arabic);
                array_pop($chars_j);
            }
            $spl = array();
            $j = 0;
            while ($j < count($arabic)) {
                $link_prev = 0;
                if (count($arabic[$j]) > 1) {
                    for ($k = 0;$k < count($arabic[$j]);$k++) {
                        $oldtb = $arabic[$j][$k]->getTypeBiDi();
                        if ($join = $arabic[$j][$k]->getJoiner()) {
                            if ($join == 4) {
                                $link_next = 1;
                            } else {
                                $link_next = 0;
                            }
                        } else {
                            if ($k != count($arabic[$j]) -1 and $arabic[$j][$k]->getArLetSize() > 2 and $arabic[$j][$k+1]->getArLetSize() and $arabic[$j][$k+1]->getArLetSize() != 1) {
                                $link_next = 1;
                            } else {
                                $link_next = 0;
                            }
                        }
                        if ($link_prev and $link_next and $arabic[$j][$k]->getArLetSize() > 2) {
                            if ($arabic[$j][$k]->getArLetMedial()) {
                                $arabic[$j][$k]->setNumber($arabic[$j][$k]->getArLetMedial());
                            }
                            $link_prev = 1;
                        } elseif (!$link_prev and $link_next and $arabic[$j][$k]->getArLetSize() > 2) {
                            if ($arabic[$j][$k]->getArLetInitial()) {
                                $arabic[$j][$k]->setNumber($arabic[$j][$k]->getArLetInitial());
                            }
                            $link_prev = 1;
                        } elseif ($link_prev and !$link_next and $arabic[$j][$k]->getArLetSize() > 1) {
                            if ($arabic[$j][$k]->getArLetFinal()) {
                                $arabic[$j][$k]->setNumber($arabic[$j][$k]->getArLetFinal());
                            }
                            if ($k and $arabic[$j][$k]->getNumber() == 65166) {
                                switch ($arabic[$j][$k-1]->getNumber()) {
                                    case "65247":
                                        $arabic[$j][$k]->setNumber(65275);
                                        $spl[] = $chars_j[$j][$k-1];
                                    break;
                                    case "65248":
                                        $arabic[$j][$k]->setNumber(65276);
                                        $spl[] = $chars_j[$j][$k-1];
                                    break;
                                    default:
                                    break;
                                }
                            }
                            $link_prev = 0;
                        } else {
                            if ($arabic[$j][$k]->getArLetIsolated()) {
                                $arabic[$j][$k]->setNumber($arabic[$j][$k]->getArLetIsolated());
                            }
                            $link_prev = 0;
                        }
                        $arabic[$j][$k]->setTypeBiDi($oldtb);
                        $arabic[$j][$k]->encodeUTF8Num($arabic[$j][$k]->getNumber());
                    }
                }
                $j++;
            }
            for ($k = count($spl) -1;$k >= 0;$k--) {
                array_splice($chars, $spl[$k], 1);
            }
            $i++;
        }
    }

    /**
     * Create 'newtext' at the end of the first stage of processing
     *
     * @access  private
     * @author  Efthimios Mavrogeorgiadis <hide@address.com>
     * @since   Wed Jan 09 2008 02:35:24 GMT+0200
     * @version v 1.00 Wed Jan 09 2008 02:35:24 GMT+0200
     */
    private function getText() {
        $levels = &$this->new_levels;
        $ret = '';
        $i = 0;
        while ($i < count($levels)) {
            $chars = &$levels[$i]['chars'];
            $j = 0;
            while ($j < count($chars)) {
                $ret.= $chars[$j]->getLetter();
                $j++;
            }
            $i++;
        }
        $this->setText($ret);
    }

    /**
     * Place the tags back where they were stripped from.
     *
     * @access  private
     * @return  string Processed text with tags
     * @author  Efthimios Mavrogeorgiadis <hide@address.com>
     * @since   Wed Jan 09 2008 02:35:24 GMT+0200
     * @version v 1.00 Wed Jan 09 2008 02:35:24 GMT+0200
     */
    private function fixTags() {
        $levels = &$this->new_levels;
        $ret = '';
        $i = 0;
        $char_counter = 0;
        $t = $this->tags[$this->par];
        while ($i < count($levels)) {
            $chars = &$levels[$i]['chars'];
            $j = 0;
            while ($j < count($chars)) {
                while (count($t) and !is_array($tag)) {
                    $tag = array_shift($t);
                    while (count($t) and $tag['offset'] === $t[0]['offset']) {
                        $tag['tag'].= $t[0]['tag'];
                        array_shift($t);
                    }
                }
                if ($tag['offset'] == $char_counter) {
                    if (preg_match('/<\s*\//', $tag['tag'])) {
                        $ret.= $chars[$j]->getLetter() . $tag['tag'];
                    } else {
                        $ret.= $tag['tag'] . $chars[$j]->getLetter();
                    }
                    $tag = '';
                } else {
                    $ret.= $chars[$j]->getLetter();
                }
                $char_counter++;
                $j++;
            }
            $i++;
        }
        return $this->split_tags[0][$this->par] . $ret;
    }

    /**
     * Create hexadecimal pattern for preg_match()
     *
     * @access  private
     * @param   string [$str] String to be turned into pattern
     * @return  string Pattern of hexadecimal character representations
     * @author  Efthimios Mavrogeorgiadis <hide@address.com>
     * @since   Wed Jan 09 2008 02:35:24 GMT+0200
     * @version v 1.00 Wed Jan 09 2008 02:35:24 GMT+0200
     */
    private function getPattern($str) {
        $ret = '';
        $i = 0;
        while ($i < strlen($str)) {
            $ret.= '\x' . strtoupper(dechex(ord($str{$i})));
            $i++;
        }
        return $ret;
    }

    /**
     * Get RLO or LRO
     *
     * @access  private
     * @param   string [$dir] Tag attribute 'dir' value (either 'rtl' or 'ltr')
     * @return  string RLO or LRO encoded in UTF-8
     * @author  Efthimios Mavrogeorgiadis <hide@address.com>
     * @since   Wed Jan 09 2008 02:35:24 GMT+0200
     * @version v 1.00 Wed Jan 09 2008 02:35:24 GMT+0200
     */
    private function changeDir($dir) {
        if ($dir == 'rtl') {
            return chr(226) . chr(128) . chr(174);
        } else {
            return chr(226) . chr(128) . chr(173);
        }
    }

    /**
     * Restore tags in tagless text.
     *
     * @access  private
     * @param   integer [$start] Offset where line begins in paragraph
     * @param   integer [$length] Length of line
     * @return  string The new line in right-to-left order with tags.
     * @author  Efthimios Mavrogeorgiadis <hide@address.com>
     * @since   Wed Jan 09 2008 02:35:24 GMT+0200
     * @version v 1.00 Wed Jan 09 2008 02:35:24 GMT+0200
     */
    private function restoreTags($start, $length) {
        $ret = '';
        $open = array();
        $chars = &$this->line_chars;
        for ($i = 0;$i < count($chars);$i++) {
            $char = $this->line_chars[$i];
            $ts = $char->getTagSize();
            $c = $char->getMirror();
            if (!$ts) {
                $ret.= $c;
                continue;
            }
            $op = 1;
            $snap = $open;
            $clt = array();
            for ($j = 0;$j < $ts;$j++) {
                $cur_tag = $char->getTag($j);
                $k = 0;
                while ($k < count($open)) {
                    if ($open[$k] == $cur_tag) {
                        $op = 0;
                        preg_match('/<\s*([^\s>]+)(\s+[^>]+)*>/', $cur_tag, $m);
                        $clt[] = '</' . $m[1][0] . '>';
                        array_splice($open, $k, 1);
                        break;
                    }
                    $k++;
                }
                if ($op) {
                    $c = $cur_tag . $c;
                    array_unshift($open, $cur_tag);
                }
            }
            $nclt = array();
            for ($j = count($snap) - 1;$j >= 0;$j--) {
                $cur_tag = $snap[$j];
                preg_match('/<\s*([^\s>]+)(\s+[^>]+)*>/', $cur_tag, $m);
                for($k = 0;$k < count($clt);$k++) {
                    if ($clt[$k] == '</' . $m[1][0] . '>') {
                        $nclt[] = $clt[$k];
                        array_splice($clt, $k, 1);
                        break;
                    }
                }
            }
            $nclt = array_merge($clt, $nclt);
            $ret.= $c . join('', $nclt);
        }
        if (count($this->split_tags[0])) {
            if (!$start) {
                $ret = $this->split_tags[0][$this->par] . $ret;
            }
            else {
                $total = 0;
                foreach ($this->result[$this->par]['levels'] as $level) {
                    $total+= count($level['chars']);
                }
                if ($start + $length == $total) {
                    preg_match('/<\s*([^\s>]+)(\s+[^>]+)*>/', $this->split_tags[0][$this->par], $m);
                    $ret.= '</' . $m[1][0] . '>';
                }
            }
        }
        while (preg_match('/(<\s*([^\s>]+)(\s+[^>]+)*>)[^<]+<\s*\/\s*\2\s*>\1/Uu', $ret)) {
            $ret = preg_replace('/(<\s*([^\s>]+)(\s+[^>]+)*>)([^<]+)<\s*\/\s*\2\s*>\1/Uu', '\\1\\4', $ret);
        }
        return $ret;
    }

    /**
     * Get substr values for tagless text based on original HTML input.
     *
     * All three parameters are required. By providing the substring of the
     * paragraph that is to be displayed as a new line, you may avoid the
     * parsing process.
     *
     * @access  private
     * @param   string [$txt] The new line.
     * @param   integer [$start] The position in the paragraph where the new line starts (first character is 0). Only positive values are supported.
     * @param   integer [$length] The length of the new line.
     * @author  Efthimios Mavrogeorgiadis <hide@address.com>
     * @since   Wed Jan 09 2008 02:35:24 GMT+0200
     * @version v 1.00 Wed Jan 09 2008 02:35:24 GMT+0200
     */
    private function getStrippedValues($txt, &$start, &$length) {
        $pattern = '/' . $this->getPattern($txt) . '/';
        preg_match($pattern, $this->getResultNewText(), $matches, PREG_OFFSET_CAPTURE, $this->char_counter - $length);
        $start = $matches[0][1];
        $length = strlen($txt);
        if ($start) {
            $prev = substr($this->getResultNewText(), 0, $start);
        } else {
            $prev = '';
        }
        $main = substr($this->getResultNewText(), $start, $length);
        $pattern = constant('UTF8PATTERN');
        if ($prev) {
            preg_match_all($pattern, $prev, $prev_matches);
        } else {
            $prev_matches = array();
        }
        preg_match_all($pattern, $main, $main_matches);
        $start = count($prev_matches[0]);
        $length = count($main_matches[0]);
    }
}
?>
Return current item: PHPBiDi