Location: PHPKode > scripts > wpSearch > wpsearch/WPSearch/Backends/Phplucene/Zend/Search/Lucene/Search/Query/Boolean.php
<?php
/**
 * Zend Framework
 *
 * LICENSE
 *
 * This source file is subject to the new BSD license that is bundled
 * with this package in the file LICENSE.txt.
 * It is also available through the world-wide-web at this URL:
 * http://framework.zend.com/license/new-bsd
 * If you did not receive a copy of the license and are unable to
 * obtain it through the world-wide-web, please send an email
 * to hide@address.com so we can send you a copy immediately.
 *
 * @category   Zend
 * @package    Zend_Search_Lucene
 * @subpackage Search
 * @copyright  Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
 * @license    http://framework.zend.com/license/new-bsd     New BSD License
 */


/** Zend_Search_Lucene_Search_Query */
require_once dirname(__FILE__) . '/../../../../../Zend/Search/Lucene/Search/Query.php';

/** Zend_Search_Lucene_Search_Weight_Boolean */
require_once dirname(__FILE__) . '/../../../../../Zend/Search/Lucene/Search/Weight/Boolean.php';


/**
 * @category   Zend
 * @package    Zend_Search_Lucene
 * @subpackage Search
 * @copyright  Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
 * @license    http://framework.zend.com/license/new-bsd     New BSD License
 */
class Zend_Search_Lucene_Search_Query_Boolean extends Zend_Search_Lucene_Search_Query
{

    /**
     * Subqueries
     * Array of Zend_Search_Lucene_Search_Query
     *
     * @var array
     */
    private $_subqueries = array();

    /**
     * Subqueries signs.
     * If true then subquery is required.
     * If false then subquery is prohibited.
     * If null then subquery is neither prohibited, nor required
     *
     * If array is null then all subqueries are required
     *
     * @var array
     */
    private $_signs = array();

    /**
     * Result vector.
     *
     * @var array
     */
    private $_resVector = null;

    /**
     * A score factor based on the fraction of all query subqueries
     * that a document contains.
     * float for conjunction queries
     * array of float for non conjunction queries
     *
     * @var mixed
     */
    private $_coord = null;


    /**
     * Class constructor.  Create a new Boolean query object.
     *
     * if $signs array is omitted then all subqueries are required
     * it differs from addSubquery() behavior, but should never be used
     *
     * @param array $subqueries    Array of Zend_Search_Search_Query objects
     * @param array $signs    Array of signs.  Sign is boolean|null.
     * @return void
     */
    public function __construct($subqueries = null, $signs = null)
    {
        if (is_array($subqueries)) {
            $this->_subqueries = $subqueries;

            $this->_signs = null;
            // Check if all subqueries are required
            if (is_array($signs)) {
                foreach ($signs as $sign ) {
                    if ($sign !== true) {
                        $this->_signs = $signs;
                        break;
                    }
                }
            }
        }
    }


    /**
     * Add a $subquery (Zend_Search_Lucene_Search_Query) to this query.
     *
     * The sign is specified as:
     *     TRUE  - subquery is required
     *     FALSE - subquery is prohibited
     *     NULL  - subquery is neither prohibited, nor required
     *
     * @param  Zend_Search_Lucene_Search_Query $subquery
     * @param  boolean|null $sign
     * @return void
     */
    public function addSubquery(Zend_Search_Lucene_Search_Query $subquery, $sign=null) {
        if ($sign !== true || $this->_signs !== null) {       // Skip, if all subqueries are required
            if ($this->_signs === null) {                     // Check, If all previous subqueries are required
                $this->_signs = array();
                foreach ($this->_subqueries as $prevSubquery) {
                    $this->_signs[] = true;
                }
            }
            $this->_signs[] = $sign;
        }

        $this->_subqueries[] = $subquery;
    }

    /**
     * Re-write queries into primitive queries
     *
     * @param Zend_Search_Lucene_Interface $index
     * @return Zend_Search_Lucene_Search_Query
     */
    public function rewrite(Zend_Search_Lucene_Interface $index)
    {
        $query = new Zend_Search_Lucene_Search_Query_Boolean();
        $query->setBoost($this->getBoost());

        foreach ($this->_subqueries as $subqueryId => $subquery) {
            $query->addSubquery($subquery->rewrite($index),
                                ($this->_signs === null)?  true : $this->_signs[$subqueryId]);
        }

        return $query;
    }

    /**
     * Optimize query in the context of specified index
     *
     * @param Zend_Search_Lucene_Interface $index
     * @return Zend_Search_Lucene_Search_Query
     */
    public function optimize(Zend_Search_Lucene_Interface $index)
    {
        $subqueries = array();
        $signs      = array();

        // Optimize all subqueries
        foreach ($this->_subqueries as $id => $subquery) {
            $subqueries[] = $subquery->optimize($index);
            $signs[]      = ($this->_signs === null)? true : $this->_signs[$id];
        }

        // Remove insignificant subqueries
        foreach ($subqueries as $id => $subquery) {
            if ($subquery instanceof Zend_Search_Lucene_Search_Query_Insignificant) {
                // Insignificant subquery has to be removed anyway
                unset($subqueries[$id]);
                unset($signs[$id]);
            }
        }
        if (count($subqueries) == 0) {
            // Boolean query doesn't has non-insignificant subqueries
            return new Zend_Search_Lucene_Search_Query_Insignificant();
        }
        // Check if all non-insignificant subqueries are prohibited
        $allProhibited = true;
        foreach ($signs as $sign) {
            if ($sign !== false) {
                $allProhibited = false;
                break;
            }
        }
        if ($allProhibited) {
            return new Zend_Search_Lucene_Search_Query_Insignificant();
        }


        // Check for empty subqueries
        foreach ($subqueries as $id => $subquery) {
            if ($subquery instanceof Zend_Search_Lucene_Search_Query_Empty) {
                if ($signs[$id] === true) {
                    // Matching is required, but is actually empty
                    return new Zend_Search_Lucene_Search_Query_Empty();
                } else {
                    // Matching is optional or prohibited, but is empty
                    // Remove it from subqueries and signs list
                    unset($subqueries[$id]);
                    unset($signs[$id]);
                }
            }
        }

        // Check, if reduced subqueries list is empty
        if (count($subqueries) == 0) {
            return new Zend_Search_Lucene_Search_Query_Empty();
        }

        // Check if all non-empty subqueries are prohibited
        $allProhibited = true;
        foreach ($signs as $sign) {
            if ($sign !== false) {
                $allProhibited = false;
                break;
            }
        }
        if ($allProhibited) {
            return new Zend_Search_Lucene_Search_Query_Empty();
        }


        // Check, if reduced subqueries list has only one entry
        if (count($subqueries) == 1) {
            // It's a query with only one required or optional clause
            // (it's already checked, that it's not a prohibited clause)

            if ($this->getBoost() == 1) {
                return reset($subqueries);
            }

            $optimizedQuery = clone reset($subqueries);
            $optimizedQuery->setBoost($optimizedQuery->getBoost()*$this->getBoost());

            return $optimizedQuery;
        }


        // Prepare first candidate for optimized query
        $optimizedQuery = new Zend_Search_Lucene_Search_Query_Boolean($subqueries, $signs);
        $optimizedQuery->setBoost($this->getBoost());


        $terms        = array();
        $tsigns       = array();
        $boostFactors = array();

        // Try to decompose term and multi-term subqueries
        foreach ($subqueries as $id => $subquery) {
            if ($subquery instanceof Zend_Search_Lucene_Search_Query_Term) {
                $terms[]        = $subquery->getTerm();
                $tsigns[]       = $signs[$id];
                $boostFactors[] = $subquery->getBoost();

                // remove subquery from a subqueries list
                unset($subqueries[$id]);
                unset($signs[$id]);
           } else if ($subquery instanceof Zend_Search_Lucene_Search_Query_MultiTerm) {
                $subTerms = $subquery->getTerms();
                $subSigns = $subquery->getSigns();

                if ($signs[$id] === true) {
                    // It's a required multi-term subquery.
                    // Something like '... +(+term1 -term2 term3 ...) ...'

                    // Multi-term required subquery can be decomposed only if it contains
                    // required terms and doesn't contain prohibited terms:
                    // ... +(+term1 term2 ...) ... => ... +term1 term2 ...
                    //
                    // Check this
                    $hasRequired   = false;
                    $hasProhibited = false;
                    if ($subSigns === null) {
                        // All subterms are required
                        $hasRequired = true;
                    } else {
                        foreach ($subSigns as $sign) {
                            if ($sign === true) {
                                $hasRequired   = true;
                            } else if ($sign === false) {
                                $hasProhibited = true;
                                break;
                            }
                        }
                    }
                    // Continue if subquery has prohibited terms or doesn't have required terms
                    if ($hasProhibited  ||  !$hasRequired) {
                        continue;
                    }

                    foreach ($subTerms as $termId => $term) {
                        $terms[]        = $term;
                        $tsigns[]       = ($subSigns === null)? true : $subSigns[$termId];
                        $boostFactors[] = $subquery->getBoost();
                    }

                    // remove subquery from a subqueries list
                    unset($subqueries[$id]);
                    unset($signs[$id]);

                } else { // $signs[$id] === null  ||  $signs[$id] === false
                    // It's an optional or prohibited multi-term subquery.
                    // Something like '... (+term1 -term2 term3 ...) ...'
                    // or
                    // something like '... -(+term1 -term2 term3 ...) ...'

                    // Multi-term optional and required subqueries can be decomposed
                    // only if all terms are optional.
                    //
                    // Check if all terms are optional.
                    $onlyOptional = true;
                    if ($subSigns === null) {
                        // All subterms are required
                        $onlyOptional = false;
                    } else {
                        foreach ($subSigns as $sign) {
                            if ($sign !== null) {
                                $onlyOptional = false;
                                break;
                            }
                        }
                    }

                    // Continue if non-optional terms are presented in this multi-term subquery
                    if (!$onlyOptional) {
                        continue;
                    }

                    foreach ($subTerms as $termId => $term) {
                        $terms[]  = $term;
                        $tsigns[] = ($signs[$id] === null)? null  /* optional */ :
                                                            false /* prohibited */;
                        $boostFactors[] = $subquery->getBoost();
                    }

                    // remove subquery from a subqueries list
                    unset($subqueries[$id]);
                    unset($signs[$id]);
                }
            }
        }


        // Check, if there are no decomposed subqueries
        if (count($terms) == 0 ) {
            // return prepared candidate
            return $optimizedQuery;
        }


        // Check, if all subqueries have been decomposed and all terms has the same boost factor
        if (count($subqueries) == 0  &&  count(array_unique($boostFactors)) == 1) {
            $optimizedQuery = new Zend_Search_Lucene_Search_Query_MultiTerm($terms, $tsigns);
            $optimizedQuery->setBoost(reset($boostFactors)*$this->getBoost());

            return $optimizedQuery;
        }


        // This boolean query can't be transformed to Term/MultiTerm query and still contains
        // several subqueries

        // Separate prohibited terms
        $prohibitedTerms        = array();
        foreach ($terms as $id => $term) {
            if ($tsigns[$id] === false) {
                $prohibitedTerms[]        = $term;

                unset($terms[$id]);
                unset($tsigns[$id]);
                unset($boostFactors[$id]);
            }
        }

        if (count($terms) == 1) {
            $clause = new Zend_Search_Lucene_Search_Query_Term(reset($terms));
            $clause->setBoost(reset($boostFactors));

            $subqueries[] = $clause;
            $signs[]      = reset($tsigns);

            // Clear terms list
            $terms = array();
        } else if (count($terms) > 1  &&  count(array_unique($boostFactors)) == 1) {
            $clause = new Zend_Search_Lucene_Search_Query_MultiTerm($terms, $tsigns);
            $clause->setBoost(reset($boostFactors));

            $subqueries[] = $clause;
            // Clause sign is 'required' if clause contains required terms. 'Optional' otherwise.
            $signs[]      = (in_array(true, $tsigns))? true : null;

            // Clear terms list
            $terms = array();
        }

        if (count($prohibitedTerms) == 1) {
            // (boost factors are not significant for prohibited clauses)
            $subqueries[] = new Zend_Search_Lucene_Search_Query_Term(reset($prohibitedTerms));
            $signs[]      = false;

            // Clear prohibited terms list
            $prohibitedTerms = array();
        } else if (count($prohibitedTerms) > 1) {
            // prepare signs array
            $prohibitedSigns = array();
            foreach ($prohibitedTerms as $id => $term) {
                // all prohibited term are grouped as optional into multi-term query
                $prohibitedSigns[$id] = null;
            }

            // (boost factors are not significant for prohibited clauses)
            $subqueries[] = new Zend_Search_Lucene_Search_Query_MultiTerm($prohibitedTerms, $prohibitedSigns);
            // Clause sign is 'prohibited'
            $signs[]      = false;

            // Clear terms list
            $prohibitedTerms = array();
        }

        /** @todo Group terms with the same boost factors together */

        // Check, that all terms are processed
        // Replace candidate for optimized query
        if (count($terms) == 0  &&  count($prohibitedTerms) == 0) {
            $optimizedQuery = new Zend_Search_Lucene_Search_Query_Boolean($subqueries, $signs);
            $optimizedQuery->setBoost($this->getBoost());
        }

        return $optimizedQuery;
    }

    /**
     * Returns subqueries
     *
     * @return array
     */
    public function getSubqueries()
    {
        return $this->_subqueries;
    }


    /**
     * Return subqueries signs
     *
     * @return array
     */
    public function getSigns()
    {
        return $this->_signs;
    }


    /**
     * Constructs an appropriate Weight implementation for this query.
     *
     * @param Zend_Search_Lucene_Interface $reader
     * @return Zend_Search_Lucene_Search_Weight
     */
    public function createWeight(Zend_Search_Lucene_Interface $reader)
    {
        $this->_weight = new Zend_Search_Lucene_Search_Weight_Boolean($this, $reader);
        return $this->_weight;
    }


    /**
     * Calculate result vector for Conjunction query
     * (like '<subquery1> AND <subquery2> AND <subquery3>')
     */
    private function _calculateConjunctionResult()
    {
        $this->_resVector = null;

        if (count($this->_subqueries) == 0) {
            $this->_resVector = array();
        }

        $resVectors      = array();
        $resVectorsSizes = array();
        $resVectorsIds   = array(); // is used to prevent arrays comparison
        foreach ($this->_subqueries as $subqueryId => $subquery) {
            $resVectors[]      = $subquery->matchedDocs();
            $resVectorsSizes[] = count(end($resVectors));
            $resVectorsIds[]   = $subqueryId;
        }
        // sort resvectors in order of subquery cardinality increasing
        array_multisort($resVectorsSizes, SORT_ASC, SORT_NUMERIC,
                        $resVectorsIds,   SORT_ASC, SORT_NUMERIC,
                        $resVectors);
        
        foreach ($resVectors as $nextResVector) {
            if($this->_resVector === null) {
                $this->_resVector = $nextResVector;
            } else {
                //$this->_resVector = array_intersect_key($this->_resVector, $nextResVector);
                
                /**
                 * This code is used as workaround for array_intersect_key() slowness problem.
                 */
                $updatedVector = array();
                foreach ($this->_resVector as $id => $value) {
                    if (isset($nextResVector[$id])) {
                        $updatedVector[$id] = $value;
                    }
                }
                $this->_resVector = $updatedVector;
            }

            if (count($this->_resVector) == 0) {
                // Empty result set, we don't need to check other terms
                break;
            }
        }

        // ksort($this->_resVector, SORT_NUMERIC);
        // Used algorithm doesn't change elements order
    }


    /**
     * Calculate result vector for non Conjunction query
     * (like '<subquery1> AND <subquery2> AND NOT <subquery3> OR <subquery4>')
     */
    private function _calculateNonConjunctionResult()
    {
        $requiredVectors      = array();
        $requiredVectorsSizes = array();
        $requiredVectorsIds   = array(); // is used to prevent arrays comparison

        $optional = array();

        foreach ($this->_subqueries as $subqueryId => $subquery) {
            if ($this->_signs[$subqueryId] === true) {
                // required
                $requiredVectors[]      = $subquery->matchedDocs();
                $requiredVectorsSizes[] = count(end($requiredVectors));
                $requiredVectorsIds[]   = $subqueryId;
            } elseif ($this->_signs[$subqueryId] === false) {
                // prohibited
                // Do nothing. matchedDocs() may include non-matching id's
                // Calculating prohibited vector may take significant time, but do not affect the result
                // Skipped.  
            } else {
                // neither required, nor prohibited
                // array union
                $optional += $subquery->matchedDocs();
            }
        }

        // sort resvectors in order of subquery cardinality increasing
        array_multisort($requiredVectorsSizes, SORT_ASC, SORT_NUMERIC,
                        $requiredVectorsIds,   SORT_ASC, SORT_NUMERIC,
                        $requiredVectors);
        
        $required = null;
        foreach ($requiredVectors as $nextResVector) {
            if($required === null) {
                $required = $nextResVector;
            } else {
                //$required = array_intersect_key($required, $nextResVector);
                
                /**
                 * This code is used as workaround for array_intersect_key() slowness problem.
                 */
                $updatedVector = array();
                foreach ($required as $id => $value) {
                    if (isset($nextResVector[$id])) {
                        $updatedVector[$id] = $value;
                    }
                }
                $required = $updatedVector;
            }

            if (count($required) == 0) {
                // Empty result set, we don't need to check other terms
                break;
            }
        }
                
        
        if ($required !== null) {
            $this->_resVector = &$required;
        } else {
            $this->_resVector = &$optional;
        }

        ksort($this->_resVector, SORT_NUMERIC);
    }


    /**
     * Score calculator for conjunction queries (all subqueries are required)
     *
     * @param integer $docId
     * @param Zend_Search_Lucene_Interface $reader
     * @return float
     */
    public function _conjunctionScore($docId, Zend_Search_Lucene_Interface $reader)
    {
        if ($this->_coord === null) {
            $this->_coord = $reader->getSimilarity()->coord(count($this->_subqueries),
                                                            count($this->_subqueries) );
        }

        $score = 0;

        foreach ($this->_subqueries as $subquery) {
            $subscore = $subquery->score($docId, $reader);

            if ($subscore == 0) {
                return 0;
            }

            $score += $subquery->score($docId, $reader) * $this->_coord;
        }

        return $score * $this->_coord * $this->getBoost();
    }


    /**
     * Score calculator for non conjunction queries (not all subqueries are required)
     *
     * @param integer $docId
     * @param Zend_Search_Lucene_Interface $reader
     * @return float
     */
    public function _nonConjunctionScore($docId, Zend_Search_Lucene_Interface $reader)
    {
        if ($this->_coord === null) {
            $this->_coord = array();

            $maxCoord = 0;
            foreach ($this->_signs as $sign) {
                if ($sign !== false /* not prohibited */) {
                    $maxCoord++;
                }
            }

            for ($count = 0; $count <= $maxCoord; $count++) {
                $this->_coord[$count] = $reader->getSimilarity()->coord($count, $maxCoord);
            }
        }

        $score = 0;
        $matchedSubqueries = 0;
        foreach ($this->_subqueries as $subqueryId => $subquery) {
            $subscore = $subquery->score($docId, $reader);

            // Prohibited
            if ($this->_signs[$subqueryId] === false && $subscore != 0) {
                return 0;
            }

            // is required, but doen't match
            if ($this->_signs[$subqueryId] === true &&  $subscore == 0) {
                return 0;
            }

            if ($subscore != 0) {
                $matchedSubqueries++;
                $score += $subscore;
            }
        }

        return $score * $this->_coord[$matchedSubqueries] * $this->getBoost();
    }

    /**
     * Execute query in context of index reader
     * It also initializes necessary internal structures
     *
     * @param Zend_Search_Lucene_Interface $reader
     */
    public function execute(Zend_Search_Lucene_Interface $reader)
    {
        // Initialize weight if it's not done yet
        $this->_initWeight($reader);

        foreach ($this->_subqueries as $subquery) {
            $subquery->execute($reader);
        }

        if ($this->_signs === null) {
            $this->_calculateConjunctionResult();
        } else {
            $this->_calculateNonConjunctionResult();
        }
    }



    /**
     * Get document ids likely matching the query
     *
     * It's an array with document ids as keys (performance considerations)
     *
     * @return array
     */
    public function matchedDocs()
    {
        return $this->_resVector;
    }

    /**
     * Score specified document
     *
     * @param integer $docId
     * @param Zend_Search_Lucene_Interface $reader
     * @return float
     */
    public function score($docId, Zend_Search_Lucene_Interface $reader)
    {
        if (isset($this->_resVector[$docId])) {
            if ($this->_signs === null) {
                return $this->_conjunctionScore($docId, $reader);
            } else {
                return $this->_nonConjunctionScore($docId, $reader);
            }
        } else {
            return 0;
        }
    }

    /**
     * Return query terms
     *
     * @return array
     */
    public function getQueryTerms()
    {
        $terms = array();

        foreach ($this->_subqueries as $id => $subquery) {
            if ($this->_signs === null  ||  $this->_signs[$id] !== false) {
                $terms = array_merge($terms, $subquery->getQueryTerms());
            }
        }

        return $terms;
    }

    /**
     * Highlight query terms
     *
     * @param integer &$colorIndex
     * @param Zend_Search_Lucene_Document_Html $doc
     */
    public function highlightMatchesDOM(Zend_Search_Lucene_Document_Html $doc, &$colorIndex)
    {
        foreach ($this->_subqueries as $id => $subquery) {
            if ($this->_signs === null  ||  $this->_signs[$id] !== false) {
                $subquery->highlightMatchesDOM($doc, $colorIndex);
            }
        }
    }

    /**
     * Print a query
     *
     * @return string
     */
    public function __toString()
    {
        // It's used only for query visualisation, so we don't care about characters escaping

        $query = '';

        foreach ($this->_subqueries as $id => $subquery) {
            if ($id != 0) {
                $query .= ' ';
            }

            if ($this->_signs === null || $this->_signs[$id] === true) {
                $query .= '+';
            } else if ($this->_signs[$id] === false) {
                $query .= '-';
            }

            $query .= '(' . $subquery->__toString() . ')';

            if ($subquery->getBoost() != 1) {
                $query .= '^' . round($subquery->getBoost(), 4);
            }
        }

        return $query;
    }
}

Return current item: wpSearch