Location: PHPKode > projects > Yioop! > yioop-v0.78/lib/crawl_constants.php
<?php
/** 
 *  SeekQuarry/Yioop --
 *  Open Source Pure PHP Search Engine, Crawler, and Indexer
 *
 *  Copyright (C) 2009, 2010, 2011  Chris Pollett hide@address.com
 *
 *  LICENSE:
 *
 *  This program is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *
 *  END LICENSE
 *
 * @author Chris Pollett hide@address.com
 * @package seek_quarry
 * @subpackage library
 * @license http://www.gnu.org/licenses/ GPL3
 * @link http://www.seekquarry.com/
 * @copyright 2009, 2010, 2011
 * @filesource
 */

if(!defined('BASE_DIR')) {echo "BAD REQUEST"; exit();}

/**
 * Shared constants and enums used by components that are involved in the
 * crawling process
 *
 * @author Chris Pollett
 * @package seek_quarry
 * @subpackage library
 */ 
 
interface CrawlConstants
{
    const queue_base_name = "QueueBundle";
    const archive_base_name = "Archive";
    const schedule_data_base_name = "ScheduleData";
    const schedule_name = "FetchSchedule";
    const robot_data_base_name = "RobotData";
    const index_data_base_name = "IndexData";
    const index_closed_name = "IndexClosed";
    const fetch_batch_name = "FetchBatch";
    const fetch_crawl_info = "FetchInfo";
    const fetch_closed_name = "FetchClosed";
    const data_base_name = "At";
    const schedule_start_name = "StartCrawlSchedule.txt";

    const MAX = 1;
    const MIN = -1;

    const STOP_STATE = -1;
    const CONTINUE_STATE = 1;
    const NO_DATA_STATE = 2;
    const WAITING_START_MESSAGE_STATE = 3; 
    
    const STATUS = 'a';
    const CRAWL_TIME = 'b';

    const HTTP_CODE = 'c';
    const TIMESTAMP = 'd';
    const TYPE = 'e';
    const ENCODING = 'f';

    const SEEN_URLS = 'g';
    const MACHINE = 'h';
    const INVERTED_INDEX = 'i';

    const SAVED_CRAWL_TIMES= 'j';
    const SCHEDULE_TIME = 'k';
    const URL = 'l';
    const WEIGHT = 'm';
    const ROBOT_PATHS = 'n';
    const HASH = 'o';
    const GOT_ROBOT_TXT = 'p';
    const PAGE = 'q';
    const DOC_INFO = 'r';
    const TITLE = 's';
    const DESCRIPTION = 't';
    const THUMB = 'u';
    const CRAWL_DELAY = 'v';
    const LINKS = 'w';
    const ROBOT_TXT = 'x';
    const TO_CRAWL = 'y';
    const INDEX = 'z';

    const AVERAGE_TITLE_LENGTH = 'A';
    const AVERAGE_DESCRIPTION_LENGTH = 'B';
    const AVERAGE_TOTAL_LINK_TEXT_LENGTH = 'C';
    const TITLE_LENGTH = 'D';
    const DESCRIPTION_LENGTH = 'E';
    const LINK_LENGTH = 'F';
    const TITLE_WORDS = 'G';
    const DESCRIPTION_WORDS = 'H';
    const LINK_WORDS = 'I';
    const TITLE_WORD_SCORE = 'J';
    const DESCRIPTION_WORD_SCORE = 'K';
    const LINK_WORD_SCORE = 'L';
    const DOC_DEPTH = 'M';
    const DOC_RANK = 'N';
    const URL_WEIGHT = 'O';
    const INLINKS = 'P';

    const NEW_CRAWL = 'Q';
    const OFFSET = 'R';
    const PATHS = 'S';
    const HASH_URL = 'T';
    const SUMMARY_OFFSET = 'U';
    const DUMMY = 'V';
    const SITES = 'W';
    const SCORE = 'X';

    const CRAWL_ORDER = 'Y';
    const RESTRICT_SITES_BY_URL = 'Z';
    const ALLOWED_SITES = 'aa';
    const DISALLOWED_SITES = 'ab';
    const BREADTH_FIRST = 'ac';
    const PAGE_IMPORTANCE = 'ad';

    const MACHINE_URI = 'ae';
    const SITE_INFO = 'af';
    const FILETYPE = 'ag';
    const SUMMARY = 'ah';
    const URL_INFO = 'ai';
    const HASH_SEEN_URLS ='aj';
    const RECENT_URLS ='ak';
    const MEMORY_USAGE ='al';
    const DOC_ID ='am';
    const RELEVANCE ='an';
    const META_WORDS ='ao';
    const CACHE_PAGE_PARTITION = 'ap';
    const GENERATION = 'aq';
    const HASH_SUM_SCORE = 'ar';
    const HASH_URL_COUNT = 'as';
    const IS_DOC = 'at';
    const BOOST = 'av';
    const IP_ADDRESSES = 'au';
    const JUST_METAS = 'aw';
    const WEB_CRAWL = 'ax';
    const ARCHIVE_CRAWL = 'ay';
    const CRAWL_TYPE = 'az';
    const CRAWL_INDEX = 'ba';
    const HEADER = 'bb';
    const SERVER = 'bc';
    const SERVER_VERSION = 'bd';
    const OPERATING_SYSTEM = 'be';
    const MODIFIED = 'bf';
    const LANG = 'bg';
    const ROBOT_INSTANCE = 'bh';
    const DOC_LEN = 'bi';
    const SUBDOCS = 'bj';
    const SUBDOCTYPE = 'bk';
    const INDEXING_PLUGINS = 'bl';
    const DOMAIN_WEIGHTS = 'bm';
    const POSITION_LIST = 'bn';
    const PROXIMITY = 'bo';
    const LOCATION = 'bp';

    const NEEDS_OFFSET_FLAG = 0x7FFFFFFF;

} 
?>
Return current item: Yioop!