Location: PHPKode > scripts > wpSearch > wpsearch/WPSearch/Backends/Phplucene/Zend/Search/Lucene/Index/SegmentInfo.php
<?php
/**
 * Zend Framework
 *
 * LICENSE
 *
 * This source file is subject to the new BSD license that is bundled
 * with this package in the file LICENSE.txt.
 * It is also available through the world-wide-web at this URL:
 * http://framework.zend.com/license/new-bsd
 * If you did not receive a copy of the license and are unable to
 * obtain it through the world-wide-web, please send an email
 * to hide@address.com so we can send you a copy immediately.
 *
 * @category   Zend
 * @package    Zend_Search_Lucene
 * @subpackage Index
 * @copyright  Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
 * @license    http://framework.zend.com/license/new-bsd     New BSD License
 */

/** Zend_Search_Lucene_Index_DictionaryLoader */
require_once dirname(__FILE__) . '/../../../../Zend/Search/Lucene/Index/DictionaryLoader.php';


/** Zend_Search_Lucene_Exception */
require_once dirname(__FILE__) . '/../../../../Zend/Search/Lucene/Exception.php';

/** Zend_Search_Lucene_LockManager */
require_once dirname(__FILE__) . '/../../../../Zend/Search/Lucene/LockManager.php';


/**
 * @category   Zend
 * @package    Zend_Search_Lucene
 * @subpackage Index
 * @copyright  Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
 * @license    http://framework.zend.com/license/new-bsd     New BSD License
 */
class Zend_Search_Lucene_Index_SegmentInfo
{
    /**
     * Number of docs in a segment
     *
     * @var integer
     */
    private $_docCount;

    /**
     * Segment name
     *
     * @var string
     */
    private $_name;

    /**
     * Term Dictionary Index
     *
     * Array of arrays (Zend_Search_Lucene_Index_Term objects are represented as arrays because
     * of performance considerations)
     * [0] -> $termValue
     * [1] -> $termFieldNum
     *
     * Corresponding Zend_Search_Lucene_Index_TermInfo object stored in the $_termDictionaryInfos
     *
     * @var array
     */
    private $_termDictionary;

    /**
     * Term Dictionary Index TermInfos
     *
     * Array of arrays (Zend_Search_Lucene_Index_TermInfo objects are represented as arrays because
     * of performance considerations)
     * [0] -> $docFreq
     * [1] -> $freqPointer
     * [2] -> $proxPointer
     * [3] -> $skipOffset
     * [4] -> $indexPointer
     *
     * @var array
     */
    private $_termDictionaryInfos;

    /**
     * Segment fields. Array of Zend_Search_Lucene_Index_FieldInfo objects for this segment
     *
     * @var array
     */
    private $_fields;

    /**
     * Field positions in a dictionary.
     * (Term dictionary contains filelds ordered by names)
     *
     * @var array
     */
    private $_fieldsDicPositions;


    /**
     * Associative array where the key is the file name and the value is data offset
     * in a compound segment file (.csf).
     *
     * @var array
     */
    private $_segFiles;

    /**
     * Associative array where the key is the file name and the value is file size (.csf).
     *
     * @var array
     */
    private $_segFileSizes;

    /**
     * Delete file generation number
     *
     * -2 means autodetect latest delete generation
     * -1 means 'there is no delete file'
     *  0 means pre-2.1 format delete file
     *  X specifies used delete file
     *
     * @var integer
     */
    private $_delGen;

    /**
     * Segment has single norms file
     *
     * If true then one .nrm file is used for all fields
     * Otherwise .fN files are used
     *
     * @var boolean
     */
    private $_hasSingleNormFile;

    /**
     * Use compound segment file (*.cfs) to collect all other segment files
     * (excluding .del files)
     *
     * @var boolean
     */
    private $_isCompound;


    /**
     * File system adapter.
     *
     * @var Zend_Search_Lucene_Storage_Directory_Filesystem
     */
    private $_directory;

    /**
     * Normalization factors.
     * An array fieldName => normVector
     * normVector is a binary string.
     * Each byte corresponds to an indexed document in a segment and
     * encodes normalization factor (float value, encoded by
     * Zend_Search_Lucene_Search_Similarity::encodeNorm())
     *
     * @var array
     */
    private $_norms = array();

    /**
     * List of deleted documents.
     * bitset if bitset extension is loaded or array otherwise.
     *
     * @var mixed
     */
    private $_deleted = null;

    /**
     * $this->_deleted update flag
     *
     * @var boolean
     */
    private $_deletedDirty = false;


    /**
     * Zend_Search_Lucene_Index_SegmentInfo constructor
     *
     * @param Zend_Search_Lucene_Storage_Directory $directory
     * @param string $name
     * @param integer $docCount
     * @param integer $delGen
     * @param boolean $isCompound
     */
    public function __construct(Zend_Search_Lucene_Storage_Directory $directory, $name, $docCount, $delGen = 0, $hasSingleNormFile = false, $isCompound = null)
    {
        $this->_directory = $directory;
        $this->_name              = $name;
        $this->_docCount          = $docCount;
        $this->_hasSingleNormFile = $hasSingleNormFile;
        $this->_delGen            = $delGen;
        $this->_termDictionary    = null;

        if ($isCompound !== null) {
            $this->_isCompound    = $isCompound;
        } else {
            // It's a pre-2.1 segment or isCompound is set to 'unknown'
            // Detect if segment uses compound file
            try {
                // Try to open compound file
                $this->_directory->getFileObject($name . '.cfs');

                // Compound file is found
                $this->_isCompound = true;
            } catch (Zend_Search_Lucene_Exception $e) {
                if (strpos($e->getMessage(), 'is not readable') !== false) {
                    // Compound file is not found or is not readable
                    $this->_isCompound = false;
                } else {
                    throw $e;
                }
            }
        }

        $this->_segFiles = array();
        if ($this->_isCompound) {
            $cfsFile = $this->_directory->getFileObject($name . '.cfs');
            $segFilesCount = $cfsFile->readVInt();

            for ($count = 0; $count < $segFilesCount; $count++) {
                $dataOffset = $cfsFile->readLong();
                if ($count != 0) {
                    $this->_segFileSizes[$fileName] = $dataOffset - end($this->_segFiles);
                }
                $fileName = $cfsFile->readString();
                $this->_segFiles[$fileName] = $dataOffset;
            }
            if ($count != 0) {
                $this->_segFileSizes[$fileName] = $this->_directory->fileLength($name . '.cfs') - $dataOffset;
            }
        }

        $fnmFile = $this->openCompoundFile('.fnm');
        $fieldsCount = $fnmFile->readVInt();
        $fieldNames = array();
        $fieldNums  = array();
        $this->_fields = array();
        for ($count=0; $count < $fieldsCount; $count++) {
            $fieldName = $fnmFile->readString();
            $fieldBits = $fnmFile->readByte();
            $this->_fields[$count] = new Zend_Search_Lucene_Index_FieldInfo($fieldName,
                                                                            $fieldBits & 1,
                                                                            $count,
                                                                            $fieldBits & 2 );
            if ($fieldBits & 0x10) {
                // norms are omitted for the indexed field
                $this->_norms[$count] = str_repeat(chr(Zend_Search_Lucene_Search_Similarity::encodeNorm(1.0)), $docCount);
            }

            $fieldNums[$count]  = $count;
            $fieldNames[$count] = $fieldName;
        }
        array_multisort($fieldNames, SORT_ASC, SORT_REGULAR, $fieldNums);
        $this->_fieldsDicPositions = array_flip($fieldNums);

        if ($this->_delGen == -2) {
        	$this->_detectLatestDelGen();
        }

        if ($this->_delGen == -1) {
            // There is no delete file for this segment
            // Do nothing
        } else if ($this->_delGen == 0) {
            // It's a segment with pre-2.1 format delete file
            // Try to find delete file
            try {
                // '.del' files always stored in a separate file
                // Segment compound is not used
                $delFile = $this->_directory->getFileObject($this->_name . '.del');

                $byteCount = $delFile->readInt();
                $byteCount = ceil($byteCount/8);
                $bitCount  = $delFile->readInt();

                if ($bitCount == 0) {
                    $delBytes = '';
                } else {
                    $delBytes = $delFile->readBytes($byteCount);
                }

                if (extension_loaded('bitset')) {
                    $this->_deleted = $delBytes;
                } else {
                    $this->_deleted = array();
                    for ($count = 0; $count < $byteCount; $count++) {
                        $byte = ord($delBytes{$count});
                        for ($bit = 0; $bit < 8; $bit++) {
                            if ($byte & (1<<$bit)) {
                                $this->_deleted[$count*8 + $bit] = 1;
                            }
                        }
                    }
                }
            } catch(Zend_Search_Exception $e) {
                if (strpos($e->getMessage(), 'is not readable') === false ) {
                    throw $e;
                }
                // There is no delete file
                // Do nothing
            }
        } else {
            // It's 2.1+ format delete file
            $delFile = $this->_directory->getFileObject($this->_name . '_' . base_convert($this->_delGen, 10, 36) . '.del');

            $format = $delFile->readInt();

            if ($format == (int)0xFFFFFFFF) {
                if (extension_loaded('bitset')) {
                    $this->_deleted = bitset_empty();
                } else {
                    $this->_deleted = array();
                }

                $byteCount = $delFile->readInt();
                $bitCount  = $delFile->readInt();
                
                $delFileSize = $this->_directory->fileLength($this->_name . '_' . base_convert($this->_delGen, 10, 36) . '.del');
                $byteNum = 0;
                
                do {
                    $dgap = $delFile->readVInt();
                    $nonZeroByte = $delFile->readByte();
                    
                    $byteNum += $dgap;

                    for ($bit = 0; $bit < 8; $bit++) {
                        if ($nonZeroByte & (1<<$bit)) {
                            if (extension_loaded('bitset')) {
                                bitset_incl($this->_deleted, $byteNum*8 + $bit);
                            } else {
                                $this->_deleted[$byteNum*8 + $bit] = 1;
                            }
                        }
                    }
                } while ($delFile->tell() < $delFileSize);
                
            } else {
                // $format is actually byte count
                $byteCount = ceil($format/8);
                $bitCount  = $delFile->readInt();

                if ($bitCount == 0) {
                    $delBytes = '';
                } else {
                    $delBytes = $delFile->readBytes($byteCount);
                }

                if (extension_loaded('bitset')) {
                    $this->_deleted = $delBytes;
                } else {
                    $this->_deleted = array();
                    for ($count = 0; $count < $byteCount; $count++) {
                        $byte = ord($delBytes{$count});
                        for ($bit = 0; $bit < 8; $bit++) {
                            if ($byte & (1<<$bit)) {
                                $this->_deleted[$count*8 + $bit] = 1;
                            }
                        }
                    }
                }
            }
        }
    }

    /**
     * Opens index file stoted within compound index file
     *
     * @param string $extension
     * @param boolean $shareHandler
     * @throws Zend_Search_Lucene_Exception
     * @return Zend_Search_Lucene_Storage_File
     */
    public function openCompoundFile($extension, $shareHandler = true)
    {
        $filename = $this->_name . $extension;

        if (!$this->_isCompound) {
            return $this->_directory->getFileObject($filename, $shareHandler);
        }

        if( !isset($this->_segFiles[$filename]) ) {
            throw new Zend_Search_Lucene_Exception('Segment compound file doesn\'t contain '
                                       . $filename . ' file.' );
        }

        $file = $this->_directory->getFileObject($this->_name . '.cfs', $shareHandler);
        $file->seek($this->_segFiles[$filename]);
        return $file;
    }

    /**
     * Get compound file length
     *
     * @param string $extension
     * @return integer
     */
    public function compoundFileLength($extension)
    {
        $filename = $this->_name . $extension;

        // Try to get common file first
        if ($this->_directory->fileExists($filename)) {
            return $this->_directory->fileLength($filename);
        }

        if( !isset($this->_segFileSizes[$filename]) ) {
            throw new Zend_Search_Lucene_Exception('Index compound file doesn\'t contain '
                                       . $filename . ' file.' );
        }

        return $this->_segFileSizes[$filename];
    }

    /**
     * Returns field index or -1 if field is not found
     *
     * @param string $fieldName
     * @return integer
     */
    public function getFieldNum($fieldName)
    {
        foreach( $this->_fields as $field ) {
            if( $field->name == $fieldName ) {
                return $field->number;
            }
        }

        return -1;
    }

    /**
     * Returns field info for specified field
     *
     * @param integer $fieldNum
     * @return Zend_Search_Lucene_Index_FieldInfo
     */
    public function getField($fieldNum)
    {
        return $this->_fields[$fieldNum];
    }

    /**
     * Returns array of fields.
     * if $indexed parameter is true, then returns only indexed fields.
     *
     * @param boolean $indexed
     * @return array
     */
    public function getFields($indexed = false)
    {
        $result = array();
        foreach( $this->_fields as $field ) {
            if( (!$indexed) || $field->isIndexed ) {
                $result[ $field->name ] = $field->name;
            }
        }
        return $result;
    }

    /**
     * Returns array of FieldInfo objects.
     *
     * @return array
     */
    public function getFieldInfos()
    {
        return $this->_fields;
    }

    /**
     * Returns actual deletions file generation number.
     *
     * @return integer
     */
    public function getDelGen()
    {
        return $this->_delGen;
    }

    /**
     * Returns the total number of documents in this segment (including deleted documents).
     *
     * @return integer
     */
    public function count()
    {
        return $this->_docCount;
    }

    /**
     * Returns number of deleted documents.
     *
     * @return integer
     */
    private function _deletedCount()
    {
        if ($this->_deleted === null) {
            return 0;
        }

        if (extension_loaded('bitset')) {
            return count(bitset_to_array($this->_deleted));
        } else {
            return count($this->_deleted);
        }
    }

    /**
     * Returns the total number of non-deleted documents in this segment.
     *
     * @return integer
     */
    public function numDocs()
    {
        if ($this->hasDeletions()) {
            return $this->_docCount - $this->_deletedCount();
        } else {
            return $this->_docCount;
        }
    }

    /**
     * Get field position in a fields dictionary
     *
     * @param integer $fieldNum
     * @return integer
     */
    private function _getFieldPosition($fieldNum) {
        // Treat values which are not in a translation table as a 'direct value'
        return isset($this->_fieldsDicPositions[$fieldNum]) ?
                           $this->_fieldsDicPositions[$fieldNum] : $fieldNum;
    }

    /**
     * Return segment name
     *
     * @return string
     */
    public function getName()
    {
        return $this->_name;
    }


    /**
     * TermInfo cache
     *
     * Size is 1024.
     * Numbers are used instead of class constants because of performance considerations
     *
     * @var array
     */
    private $_termInfoCache = array();

    private function _cleanUpTermInfoCache()
    {
        // Clean 256 term infos
        foreach ($this->_termInfoCache as $key => $termInfo) {
            unset($this->_termInfoCache[$key]);

            // leave 768 last used term infos
            if (count($this->_termInfoCache) == 768) {
                break;
            }
        }
    }

    /**
     * Load terms dictionary index
     * 
     * @throws Zend_Search_Lucene_Exception
     */
    private function _loadDictionaryIndex()
    {
        // Check, if index is already serialized
        if ($this->_directory->fileExists($this->_name . '.sti')) {
            // Load serialized dictionary index data
            $stiFile = $this->_directory->getFileObject($this->_name . '.sti');
            $stiFileData = $stiFile->readBytes($this->_directory->fileLength($this->_name . '.sti'));

            // Load dictionary index data
            if (($unserializedData = @unserialize($stiFileData)) !== false) {
                list($this->_termDictionary, $this->_termDictionaryInfos) = $unserializedData;
                return;
            }
        }

        // Load data from .tii file and generate .sti file

        // Prefetch dictionary index data
        $tiiFile = $this->openCompoundFile('.tii');
        $tiiFileData = $tiiFile->readBytes($this->compoundFileLength('.tii'));

        // Load dictionary index data
        list($this->_termDictionary, $this->_termDictionaryInfos) =
                    Zend_Search_Lucene_Index_DictionaryLoader::load($tiiFileData);

        $stiFileData = serialize(array($this->_termDictionary, $this->_termDictionaryInfos));
        $stiFile = $this->_directory->createFile($this->_name . '.sti');
        $stiFile->writeBytes($stiFileData);
    }

    /**
     * Scans terms dictionary and returns term info
     *
     * @param Zend_Search_Lucene_Index_Term $term
     * @return Zend_Search_Lucene_Index_TermInfo
     */
    public function getTermInfo(Zend_Search_Lucene_Index_Term $term)
    {
        $termKey = $term->key();
        if (isset($this->_termInfoCache[$termKey])) {
            $termInfo = $this->_termInfoCache[$termKey];

            // Move termInfo to the end of cache
            unset($this->_termInfoCache[$termKey]);
            $this->_termInfoCache[$termKey] = $termInfo;

            return $termInfo;
        }


        if ($this->_termDictionary === null) {
            $this->_loadDictionaryIndex();
        }

        $searchField = $this->getFieldNum($term->field);

        if ($searchField == -1) {
            return null;
        }
        $searchDicField = $this->_getFieldPosition($searchField);

        // search for appropriate value in dictionary
        $lowIndex = 0;
        $highIndex = count($this->_termDictionary)-1;
        while ($highIndex >= $lowIndex) {
            // $mid = ($highIndex - $lowIndex)/2;
            $mid = ($highIndex + $lowIndex) >> 1;
            $midTerm = $this->_termDictionary[$mid];

            $fieldNum = $this->_getFieldPosition($midTerm[0] /* field */);
            $delta = $searchDicField - $fieldNum;
            if ($delta == 0) {
                $delta = strcmp($term->text, $midTerm[1] /* text */);
            }

            if ($delta < 0) {
                $highIndex = $mid-1;
            } elseif ($delta > 0) {
                $lowIndex  = $mid+1;
            } else {
                // return $this->_termDictionaryInfos[$mid]; // We got it!
                $a = $this->_termDictionaryInfos[$mid];
                $termInfo = new Zend_Search_Lucene_Index_TermInfo($a[0], $a[1], $a[2], $a[3], $a[4]);

                // Put loaded termInfo into cache
                $this->_termInfoCache[$termKey] = $termInfo;

                return $termInfo;
            }
        }

        if ($highIndex == -1) {
            // Term is out of the dictionary range
            return null;
        }

        $prevPosition = $highIndex;
        $prevTerm = $this->_termDictionary[$prevPosition];
        $prevTermInfo = $this->_termDictionaryInfos[$prevPosition];

        $tisFile = $this->openCompoundFile('.tis');
        $tiVersion = $tisFile->readInt();
        if ($tiVersion != (int)0xFFFFFFFE /* pre-2.1 format */  &&
            $tiVersion != (int)0xFFFFFFFD /* 2.1+ format    */) {
            throw new Zend_Search_Lucene_Exception('Wrong TermInfoFile file format');
        }

        $termCount     = $tisFile->readLong();
        $indexInterval = $tisFile->readInt();
        $skipInterval  = $tisFile->readInt();
        if ($tiVersion == (int)0xFFFFFFFD /* 2.1+ format */) {
            $maxSkipLevels = $tisFile->readInt();
        }

        $tisFile->seek($prevTermInfo[4] /* indexPointer */ - (($tiVersion == (int)0xFFFFFFFD)? 24 : 20) /* header size*/, SEEK_CUR);

        $termValue    = $prevTerm[1] /* text */;
        $termFieldNum = $prevTerm[0] /* field */;
        $freqPointer = $prevTermInfo[1] /* freqPointer */;
        $proxPointer = $prevTermInfo[2] /* proxPointer */;
        for ($count = $prevPosition*$indexInterval + 1;
             $count <= $termCount &&
             ( $this->_getFieldPosition($termFieldNum) < $searchDicField ||
              ($this->_getFieldPosition($termFieldNum) == $searchDicField &&
               strcmp($termValue, $term->text) < 0) );
             $count++) {
            $termPrefixLength = $tisFile->readVInt();
            $termSuffix       = $tisFile->readString();
            $termFieldNum     = $tisFile->readVInt();
            $termValue        = Zend_Search_Lucene_Index_Term::getPrefix($termValue, $termPrefixLength) . $termSuffix;

            $docFreq      = $tisFile->readVInt();
            $freqPointer += $tisFile->readVInt();
            $proxPointer += $tisFile->readVInt();
            if( $docFreq >= $skipInterval ) {
                $skipOffset = $tisFile->readVInt();
            } else {
                $skipOffset = 0;
            }
        }

        if ($termFieldNum == $searchField && $termValue == $term->text) {
            $termInfo = new Zend_Search_Lucene_Index_TermInfo($docFreq, $freqPointer, $proxPointer, $skipOffset);
        } else {
            $termInfo = null;
        }

        // Put loaded termInfo into cache
        $this->_termInfoCache[$termKey] = $termInfo;

        if (count($this->_termInfoCache) == 1024) {
            $this->_cleanUpTermInfoCache();
        }

        return $termInfo;
    }

    /**
     * Returns term freqs array.
     * Result array structure: array(docId => freq, ...)
     *
     * @param Zend_Search_Lucene_Index_Term $term
     * @param integer $shift
     * @return Zend_Search_Lucene_Index_TermInfo
     */
    public function termFreqs(Zend_Search_Lucene_Index_Term $term, $shift = 0)
    {
        $termInfo = $this->getTermInfo($term);

        if (!$termInfo instanceof Zend_Search_Lucene_Index_TermInfo) {
            return array();
        }

        $frqFile = $this->openCompoundFile('.frq');
        $frqFile->seek($termInfo->freqPointer,SEEK_CUR);
        $result = array();
        $docId = 0;

        for ($count = 0; $count < $termInfo->docFreq; $count++) {
            $docDelta = $frqFile->readVInt();
            if ($docDelta % 2 == 1) {
                $docId += ($docDelta-1)/2;
                $result[$shift + $docId] = 1;
            } else {
                $docId += $docDelta/2;
                $result[$shift + $docId] = $frqFile->readVInt();
            }
        }

        return $result;
    }

    /**
     * Returns term positions array.
     * Result array structure: array(docId => array(pos1, pos2, ...), ...)
     *
     * @param Zend_Search_Lucene_Index_Term $term
     * @param integer $shift
     * @return Zend_Search_Lucene_Index_TermInfo
     */
    public function termPositions(Zend_Search_Lucene_Index_Term $term, $shift = 0)
    {
        $termInfo = $this->getTermInfo($term);

        if (!$termInfo instanceof Zend_Search_Lucene_Index_TermInfo) {
            return array();
        }

        $frqFile = $this->openCompoundFile('.frq');
        $frqFile->seek($termInfo->freqPointer,SEEK_CUR);
        $freqs = array();
        $docId = 0;

        for ($count = 0; $count < $termInfo->docFreq; $count++) {
            $docDelta = $frqFile->readVInt();
            if ($docDelta % 2 == 1) {
                $docId += ($docDelta-1)/2;
                $freqs[$docId] = 1;
            } else {
                $docId += $docDelta/2;
                $freqs[$docId] = $frqFile->readVInt();
            }
        }

        $result = array();
        $prxFile = $this->openCompoundFile('.prx');
        $prxFile->seek($termInfo->proxPointer, SEEK_CUR);
        foreach ($freqs as $docId => $freq) {
            $termPosition = 0;
            $positions = array();

            for ($count = 0; $count < $freq; $count++ ) {
                $termPosition += $prxFile->readVInt();
                $positions[] = $termPosition;
            }

            $result[$shift + $docId] = $positions;
        }

        return $result;
    }

    /**
     * Load normalizatin factors from an index file
     *
     * @param integer $fieldNum
     * @throws Zend_Search_Lucene_Exception
     */
    private function _loadNorm($fieldNum)
    {
        if ($this->_hasSingleNormFile) {
            $normfFile = $this->openCompoundFile('.nrm');

            $header              = $normfFile->readBytes(3);
            $headerFormatVersion = $normfFile->readByte();

            if ($header != 'NRM'  ||  $headerFormatVersion != (int)0xFF) {
                throw new  Zend_Search_Lucene_Exception('Wrong norms file format.');
            }

            foreach ($this->_fields as $fieldNum => $fieldInfo) {
                if ($fieldInfo->isIndexed) {
                    $this->_norms[$fieldNum] = $normfFile->readBytes($this->_docCount);
                }
            }
        } else {
            $fFile = $this->openCompoundFile('.f' . $fieldNum);
            $this->_norms[$fieldNum] = $fFile->readBytes($this->_docCount);
        }
    }

    /**
     * Returns normalization factor for specified documents
     *
     * @param integer $id
     * @param string $fieldName
     * @return float
     */
    public function norm($id, $fieldName)
    {
        $fieldNum = $this->getFieldNum($fieldName);

        if ( !($this->_fields[$fieldNum]->isIndexed) ) {
            return null;
        }

        if (!isset($this->_norms[$fieldNum])) {
            $this->_loadNorm($fieldNum);
        }

        return Zend_Search_Lucene_Search_Similarity::decodeNorm( ord($this->_norms[$fieldNum]{$id}) );
    }

    /**
     * Returns norm vector, encoded in a byte string
     *
     * @param string $fieldName
     * @return string
     */
    public function normVector($fieldName)
    {
        $fieldNum = $this->getFieldNum($fieldName);

        if ($fieldNum == -1  ||  !($this->_fields[$fieldNum]->isIndexed)) {
            $similarity = Zend_Search_Lucene_Search_Similarity::getDefault();

            return str_repeat(chr($similarity->encodeNorm( $similarity->lengthNorm($fieldName, 0) )),
                              $this->_docCount);
        }

        if (!isset($this->_norms[$fieldNum])) {
            $this->_loadNorm($fieldNum);
        }

        return $this->_norms[$fieldNum];
    }


    /**
     * Returns true if any documents have been deleted from this index segment.
     *
     * @return boolean
     */
    public function hasDeletions()
    {
        return $this->_deleted !== null;
    }


    /**
     * Returns true if segment has single norms file.
     *
     * @return boolean
     */
    public function hasSingleNormFile()
    {
        return $this->_hasSingleNormFile ? 1 : 0;
    }

    /**
     * Returns true if segment is stored using compound segment file.
     *
     * @return boolean
     */
    public function isCompound()
    {
        return $this->_isCompound;
    }

    /**
     * Deletes a document from the index segment.
     * $id is an internal document id
     *
     * @param integer
     */
    public function delete($id)
    {
        $this->_deletedDirty = true;

        if (extension_loaded('bitset')) {
            if ($this->_deleted === null) {
                $this->_deleted = bitset_empty($id);
            }
            bitset_incl($this->_deleted, $id);
        } else {
            if ($this->_deleted === null) {
                $this->_deleted = array();
            }

            $this->_deleted[$id] = 1;
        }
    }

    /**
     * Checks, that document is deleted
     *
     * @param integer
     * @return boolean
     */
    public function isDeleted($id)
    {
        if ($this->_deleted === null) {
            return false;
        }

        if (extension_loaded('bitset')) {
            return bitset_in($this->_deleted, $id);
        } else {
            return isset($this->_deleted[$id]);
        }
    }


    /**
     * Detect latest delete generation
     *
     * Is actualy used from writeChanges() method or from the constructor if it's invoked from
     * Index writer. In both cases index write lock is already obtained, so we shouldn't care
     * about it
     */
    private function _detectLatestDelGen()
    {
        $delFileList = array();
        foreach ($this->_directory->fileList() as $file) {
            if ($file == $this->_name . '.del') {
                // Matches <segment_name>.del file name
                $delFileList[] = 0;
            } else if (preg_match('/^' . $this->_name . '_([a-zA-Z0-9]+)\.del$/i', $file, $matches)) {
                // Matches <segment_name>_NNN.del file names
                $delFileList[] = (int)base_convert($matches[1], 36, 10);
            }
        }

        if (count($delFileList) == 0) {
            // There is no deletions file for current segment in the directory
            // Set detetions file generation number to 1
            $this->_delGen = -1;
        } else {
            // There are some deletions files for current segment in the directory
            // Set deletions file generation number to the highest nuber
            $this->_delGen = max($delFileList);
        }
    }

    /**
     * Write changes if it's necessary.
     *
     * This method must be invoked only from the Writer _updateSegments() method,
     * so index Write lock has to be already obtained.
     *
     * @internal
     */
    public function writeChanges()
    {
        if (!$this->_deletedDirty) {
            return;
        }

        if (extension_loaded('bitset')) {
            $delBytes = $this->_deleted;
            $bitCount = count(bitset_to_array($delBytes));
        } else {
            $byteCount = floor($this->_docCount/8)+1;
            $delBytes = str_repeat(chr(0), $byteCount);
            for ($count = 0; $count < $byteCount; $count++) {
                $byte = 0;
                for ($bit = 0; $bit < 8; $bit++) {
                    if (isset($this->_deleted[$count*8 + $bit])) {
                        $byte |= (1<<$bit);
                    }
                }
                $delBytes{$count} = chr($byte);
            }
            $bitCount = count($this->_deleted);
        }


        // Get new generation number
        $this->_detectLatestDelGen();

        if ($this->_delGen == -1) {
        	// Set delete file generation number to 1
        	$this->_delGen = 1;
        } else {
        	// Increase delete file generation number by 1
        	$this->_delGen++;
        }

        $delFile = $this->_directory->createFile($this->_name . '_' . base_convert($this->_delGen, 10, 36) . '.del');
        $delFile->writeInt($this->_docCount);
        $delFile->writeInt($bitCount);
        $delFile->writeBytes($delBytes);

        $this->_deletedDirty = false;
    }



    /**
     * Term Dictionary File object for stream like terms reading
     *
     * @var Zend_Search_Lucene_Storage_File
     */
    private $_tisFile = null;

    /**
     * Actual offset of the .tis file data
     *
     * @var integer
     */
    private $_tisFileOffset;

    /**
     * Frequencies File object for stream like terms reading
     *
     * @var Zend_Search_Lucene_Storage_File
     */
    private $_frqFile = null;

    /**
     * Actual offset of the .frq file data
     *
     * @var integer
     */
    private $_frqFileOffset;

    /**
     * Positions File object for stream like terms reading
     *
     * @var Zend_Search_Lucene_Storage_File
     */
    private $_prxFile = null;

    /**
     * Actual offset of the .prx file in the compound file
     *
     * @var integer
     */
    private $_prxFileOffset;


    /**
     * Actual number of terms in term stream
     *
     * @var integer
     */
    private $_termCount = 0;

    /**
     * Overall number of terms in term stream
     *
     * @var integer
     */
    private $_termNum = 0;

    /**
     * Segment index interval
     *
     * @var integer
     */
    private $_indexInterval;

    /**
     * Segment skip interval
     *
     * @var integer
     */
    private $_skipInterval;

    /**
     * Last TermInfo in a terms stream
     *
     * @var Zend_Search_Lucene_Index_TermInfo
     */
    private $_lastTermInfo = null;

    /**
     * Last Term in a terms stream
     *
     * @var Zend_Search_Lucene_Index_Term
     */
    private $_lastTerm = null;

    /**
     * Map of the document IDs
     * Used to get new docID after removing deleted documents.
     * It's not very effective from memory usage point of view,
     * but much more faster, then other methods
     *
     * @var array|null
     */
    private $_docMap = null;

    /**
     * An array of all term positions in the documents.
     * Array structure: array( docId => array( pos1, pos2, ...), ...)
     *
     * Is set to null if term positions loading has to be skipped
     *
     * @var array|null
     */
    private $_lastTermPositions;


    /**
     * Terms scan mode
     *
     * Values:
     *
     * self::SM_TERMS_ONLY - terms are scanned, no additional info is retrieved
     * self::SM_MERGE_INFO - terms are scanned, frequency and position info is retrieved
     *                       document numbers are compacted (shifted if segment has deleted documents)
     *
     * @var integer
     */
    private $_termsScanMode;

    /** Scan modes */
    const SM_TERMS_ONLY = 0;    // terms are scanned, no additional info is retrieved
    const SM_FULL_INFO  = 1;    // terms are scanned, frequency and position info is retrieved
    const SM_MERGE_INFO = 2;    // terms are scanned, frequency and position info is retrieved
                                // document numbers are compacted (shifted if segment contains deleted documents)

    /**
     * Reset terms stream
     *
     * $startId - id for the fist document
     * $compact - remove deleted documents
     *
     * Returns start document id for the next segment
     *
     * @param integer $startId
     * @param integer $mode
     * @throws Zend_Search_Lucene_Exception
     * @return integer
     */
    public function reset($startId = 0, $mode = self::SM_TERMS_ONLY)
    {
        if ($this->_tisFile !== null) {
            $this->_tisFile = null;
        }

        $this->_tisFile = $this->openCompoundFile('.tis', false);
        $this->_tisFileOffset = $this->_tisFile->tell();

        $tiVersion = $this->_tisFile->readInt();
        if ($tiVersion != (int)0xFFFFFFFE /* pre-2.1 format */  &&
            $tiVersion != (int)0xFFFFFFFD /* 2.1+ format    */) {
            throw new Zend_Search_Lucene_Exception('Wrong TermInfoFile file format');
        }

        $this->_termCount     =
              $this->_termNum = $this->_tisFile->readLong(); // Read terms count
        $this->_indexInterval = $this->_tisFile->readInt();  // Read Index interval
        $this->_skipInterval  = $this->_tisFile->readInt();  // Read skip interval
        if ($tiVersion == (int)0xFFFFFFFD /* 2.1+ format */) {
            $maxSkipLevels = $this->_tisFile->readInt();
        }

        if ($this->_frqFile !== null) {
            $this->_frqFile = null;
        }
        if ($this->_prxFile !== null) {
            $this->_prxFile = null;
        }
        $this->_docMap = array();

        $this->_lastTerm          = new Zend_Search_Lucene_Index_Term('', -1);
        $this->_lastTermInfo      = new Zend_Search_Lucene_Index_TermInfo(0, 0, 0, 0);
        $this->_lastTermPositions = null;

        $this->_termsScanMode = $mode;

        switch ($mode) {
            case self::SM_TERMS_ONLY:
                // Do nothing
                break;

            case self::SM_FULL_INFO:
                // break intentionally omitted
            case self::SM_MERGE_INFO:
                $this->_frqFile = $this->openCompoundFile('.frq', false);
                $this->_frqFileOffset = $this->_frqFile->tell();

                $this->_prxFile = $this->openCompoundFile('.prx', false);
                $this->_prxFileOffset = $this->_prxFile->tell();

                for ($count = 0; $count < $this->_docCount; $count++) {
                    if (!$this->isDeleted($count)) {
                        $this->_docMap[$count] = $startId + (($mode == self::SM_MERGE_INFO) ? count($this->_docMap) : $count);
                    }
                }
                break;

            default:
                throw new Zend_Search_Lucene_Exception('Wrong terms scaning mode specified.');
                break;
        }


        $this->nextTerm();
        return $startId + (($mode == self::SM_MERGE_INFO) ? count($this->_docMap) : $this->_docCount);
    }


    /**
     * Skip terms stream up to specified term preffix.
     *
     * Prefix contains fully specified field info and portion of searched term
     *
     * @param Zend_Search_Lucene_Index_Term $prefix
     * @throws Zend_Search_Lucene_Exception
     */
    public function skipTo(Zend_Search_Lucene_Index_Term $prefix)
    {
        if ($this->_termDictionary === null) {
            $this->_loadDictionaryIndex();
        }

        $searchField = $this->getFieldNum($prefix->field);

        if ($searchField == -1) {
            /**
             * Field is not presented in this segment
             * Go to the end of dictionary
             */
            $this->_tisFile = null;
            $this->_frqFile = null;
            $this->_prxFile = null;

            $this->_lastTerm          = null;
            $this->_lastTermInfo      = null;
            $this->_lastTermPositions = null;

            return;
        }
        $searchDicField = $this->_getFieldPosition($searchField);

        // search for appropriate value in dictionary
        $lowIndex = 0;
        $highIndex = count($this->_termDictionary)-1;
        while ($highIndex >= $lowIndex) {
            // $mid = ($highIndex - $lowIndex)/2;
            $mid = ($highIndex + $lowIndex) >> 1;
            $midTerm = $this->_termDictionary[$mid];

            $fieldNum = $this->_getFieldPosition($midTerm[0] /* field */);
            $delta = $searchDicField - $fieldNum;
            if ($delta == 0) {
                $delta = strcmp($prefix->text, $midTerm[1] /* text */);
            }

            if ($delta < 0) {
                $highIndex = $mid-1;
            } elseif ($delta > 0) {
                $lowIndex  = $mid+1;
            } else {
                // We have reached term we are looking for
                break;
            }
        }

        if ($highIndex == -1) {
            // Term is out of the dictionary range
            $this->_tisFile = null;
            $this->_frqFile = null;
            $this->_prxFile = null;

            $this->_lastTerm          = null;
            $this->_lastTermInfo      = null;
            $this->_lastTermPositions = null;

            return;
        }

        $prevPosition = $highIndex;
        $prevTerm = $this->_termDictionary[$prevPosition];
        $prevTermInfo = $this->_termDictionaryInfos[$prevPosition];

        if ($this->_tisFile === null) {
            // The end of terms stream is reached and terms dictionary file is closed
            // Perform mini-reset operation
            $this->_tisFile = $this->openCompoundFile('.tis', false);

            if ($this->_termsScanMode == self::SM_FULL_INFO  ||  $this->_termsScanMode == self::SM_MERGE_INFO) {
                $this->_frqFile = $this->openCompoundFile('.frq', false);
                $this->_prxFile = $this->openCompoundFile('.prx', false);
            }
        }
        $this->_tisFile->seek($this->_tisFileOffset + $prevTermInfo[4], SEEK_SET);

        $this->_lastTerm     = new Zend_Search_Lucene_Index_Term($prevTerm[1] /* text */,
                                                                 ($prevTerm[0] == -1) ? '' : $this->_fields[$prevTerm[0] /* field */]->name);
        $this->_lastTermInfo = new Zend_Search_Lucene_Index_TermInfo($prevTermInfo[0] /* docFreq */,
                                                                     $prevTermInfo[1] /* freqPointer */,
                                                                     $prevTermInfo[2] /* proxPointer */,
                                                                     $prevTermInfo[3] /* skipOffset */);
        $this->_termCount  =  $this->_termNum - $prevPosition*$this->_indexInterval;

        if ($highIndex == 0) {
            // skip start entry
            $this->nextTerm();
        } else if ($prefix->field == $this->_lastTerm->field  &&  $prefix->text  == $this->_lastTerm->text) {
            // We got exact match in the dictionary index

            if ($this->_termsScanMode == self::SM_FULL_INFO  ||  $this->_termsScanMode == self::SM_MERGE_INFO) {
                $this->_lastTermPositions = array();

                $this->_frqFile->seek($this->_lastTermInfo->freqPointer + $this->_frqFileOffset, SEEK_SET);
                $freqs = array();   $docId = 0;
                for( $count = 0; $count < $this->_lastTermInfo->docFreq; $count++ ) {
                    $docDelta = $this->_frqFile->readVInt();
                    if( $docDelta % 2 == 1 ) {
                        $docId += ($docDelta-1)/2;
                        $freqs[ $docId ] = 1;
                    } else {
                        $docId += $docDelta/2;
                        $freqs[ $docId ] = $this->_frqFile->readVInt();
                    }
                }

                $this->_prxFile->seek($this->_lastTermInfo->proxPointer + $this->_prxFileOffset, SEEK_SET);
                foreach ($freqs as $docId => $freq) {
                    $termPosition = 0;  $positions = array();

                    for ($count = 0; $count < $freq; $count++ ) {
                        $termPosition += $this->_prxFile->readVInt();
                        $positions[] = $termPosition;
                    }

                    if (isset($this->_docMap[$docId])) {
                        $this->_lastTermPositions[$this->_docMap[$docId]] = $positions;
                    }
                }
            }

            return;
        }

        // Search term matching specified prefix
        while ($this->_lastTerm !== null) {
            if ( strcmp($this->_lastTerm->field, $prefix->field) > 0  ||
                 ($prefix->field == $this->_lastTerm->field  &&  strcmp($this->_lastTerm->text, $prefix->text) >= 0) ) {
                    // Current term matches or greate than the pattern
                    return;
            }

            $this->nextTerm();
        }
    }


    /**
     * Scans terms dictionary and returns next term
     *
     * @return Zend_Search_Lucene_Index_Term|null
     */
    public function nextTerm()
    {
        if ($this->_tisFile === null  ||  $this->_termCount == 0) {
            $this->_lastTerm          = null;
            $this->_lastTermInfo      = null;
            $this->_lastTermPositions = null;
            $this->_docMap            = null;

            // may be necessary for "empty" segment
            $this->_tisFile = null;
            $this->_frqFile = null;
            $this->_prxFile = null;

            return null;
        }

        $termPrefixLength = $this->_tisFile->readVInt();
        $termSuffix       = $this->_tisFile->readString();
        $termFieldNum     = $this->_tisFile->readVInt();
        $termValue        = Zend_Search_Lucene_Index_Term::getPrefix($this->_lastTerm->text, $termPrefixLength) . $termSuffix;

        $this->_lastTerm = new Zend_Search_Lucene_Index_Term($termValue, $this->_fields[$termFieldNum]->name);

        $docFreq     = $this->_tisFile->readVInt();
        $freqPointer = $this->_lastTermInfo->freqPointer + $this->_tisFile->readVInt();
        $proxPointer = $this->_lastTermInfo->proxPointer + $this->_tisFile->readVInt();
        if ($docFreq >= $this->_skipInterval) {
            $skipOffset = $this->_tisFile->readVInt();
        } else {
            $skipOffset = 0;
        }

        $this->_lastTermInfo = new Zend_Search_Lucene_Index_TermInfo($docFreq, $freqPointer, $proxPointer, $skipOffset);


        if ($this->_termsScanMode == self::SM_FULL_INFO  ||  $this->_termsScanMode == self::SM_MERGE_INFO) {
            $this->_lastTermPositions = array();

            $this->_frqFile->seek($this->_lastTermInfo->freqPointer + $this->_frqFileOffset, SEEK_SET);
            $freqs = array();   $docId = 0;
            for( $count = 0; $count < $this->_lastTermInfo->docFreq; $count++ ) {
                $docDelta = $this->_frqFile->readVInt();
                if( $docDelta % 2 == 1 ) {
                    $docId += ($docDelta-1)/2;
                    $freqs[ $docId ] = 1;
                } else {
                    $docId += $docDelta/2;
                    $freqs[ $docId ] = $this->_frqFile->readVInt();
                }
            }

            $this->_prxFile->seek($this->_lastTermInfo->proxPointer + $this->_prxFileOffset, SEEK_SET);
            foreach ($freqs as $docId => $freq) {
                $termPosition = 0;  $positions = array();

                for ($count = 0; $count < $freq; $count++ ) {
                    $termPosition += $this->_prxFile->readVInt();
                    $positions[] = $termPosition;
                }

                if (isset($this->_docMap[$docId])) {
                    $this->_lastTermPositions[$this->_docMap[$docId]] = $positions;
                }
            }
        }

        $this->_termCount--;
        if ($this->_termCount == 0) {
            $this->_tisFile = null;
            $this->_frqFile = null;
            $this->_prxFile = null;
        }

        return $this->_lastTerm;
    }

    /**
     * Close terms stream
     *
     * Should be used for resources clean up if stream is not read up to the end
     */
    public function closeTermsStream()
    {
        $this->_tisFile = null;
        $this->_frqFile = null;
        $this->_prxFile = null;

        $this->_lastTerm          = null;
        $this->_lastTermInfo      = null;
        $this->_lastTermPositions = null;

        $this->_docMap            = null;
    }


    /**
     * Returns term in current position
     *
     * @return Zend_Search_Lucene_Index_Term|null
     */
    public function currentTerm()
    {
        return $this->_lastTerm;
    }


    /**
     * Returns an array of all term positions in the documents.
     * Return array structure: array( docId => array( pos1, pos2, ...), ...)
     *
     * @return array
     */
    public function currentTermPositions()
    {
        return $this->_lastTermPositions;
    }
}

Return current item: wpSearch