Location: PHPKode > projects > Sphider Plus > sphider-plus_v.2.9/admin/spider.php
<?php

    set_time_limit (0);
    define("_SECURE",1);    // define secure constant

    $plus_nr        = '';

    $include_dir    = "../include";
    $settings_dir   = "../settings";
    $converter_dir  = "../converter";
    $dict_dir       = "$converter_dir/dictionaries";
    $stem_dir       = "$include_dir/stemming";
    $textcache_dir  = "$include_dir/textcache";
    $mediacache_dir = "$include_dir/mediacache";

    include "$settings_dir/database.php";
    include "$include_dir/commonfuncs.php";

    //      get active database for this task
    if ($dba_act == '1') {
        $db_con     = db1_connect() ;
        $success    = @mysql_select_db ($database1, $db_con);
        $mysql_table_prefix = $mysql_table_prefix1;
    }

    if ($dba_act == '2') {
        $db_con = db2_connect() ;
        $success    = @mysql_select_db ($database2, $db_con);
        $mysql_table_prefix = $mysql_table_prefix2;
    }

    if ($dba_act == '3') {
        $db_con = db3_connect() ;
        $success    = @mysql_select_db ($database3, $db_con);
        $mysql_table_prefix = $mysql_table_prefix3;
    }

    if ($dba_act == '4') {
        $db_con = db4_connect() ;
        $success    = @mysql_select_db ($database4, $db_con);
        $mysql_table_prefix = $mysql_table_prefix4;
    }

    if ($dba_act == '5') {
        $db_con = db5_connect() ;
        $success    = @mysql_select_db ($database5, $db_con);
        $mysql_table_prefix = $mysql_table_prefix5;
    }

    @include "".$settings_dir."/db".$dba_act."/conf_".$mysql_table_prefix.".php";
    if (!$plus_nr) {
        include "/settings/backup/Sphider-plus_default-configuration.php";
    }

    if ($default_agent == 1) {
        $user_agent = 'Mozilla/5.0 (Windows NT 6.1; rv:12.0) Gecko/20100101 Firefox/12.0';
    }

    include "messages.php";
    include "spiderfuncs.php";

    $com_in = array();
    $local_redir    = '';
    $url            = '';
    $multi          = '';

    //  now replace some variables with actual Admin settings
    if (is_dir($common_dir)) {
        $handle = opendir($common_dir);
        if ($use_common == 'all') {
            while (false !== ($common_file = readdir($handle))) {   //  get all common files
                if (strpos($common_file, "ommon_")) {
                    $act = @file($common_dir.$common_file);         //  get content of actual common file
                    $com_in = array_merge($com_in, $act);                 //  build a complete array of common words
                }
            }
        }

        if ($use_common != 'all' && $use_common != 'none') {
            $com_in = @file("".$common_dir."common_".$use_common.".txt");         //  get content of language specific common file
        }

        $suffix = @file($common_dir.'suffix.txt');      //  get all file suffixes to be ignored during index procedure
        if ($use_white1 == '1' || $use_white2 == '1') $white_in = @file($common_dir.'whitelist.txt');    //  get all words to enable page indexing
        if ($use_black == '1') $black_in = @file($common_dir.'blacklist.txt');     //  get all words to prevent indexing of page

        if ($index_image) $image = @file($common_dir.'image.txt');       //  get all image suffixes to be indexed
        if ($index_audio) $audio = @file($common_dir.'audio.txt');       //  get all audio suffixes to be indexed
        if ($index_video) $video = @file($common_dir.'video.txt');       //  get all audio suffixes to be indexed

        $divs_not   = @file($common_dir.'divs_not.txt');    //  get all div's to not to be indexed (Admin selected)
        $divs_use   = @file($common_dir.'divs_use.txt');    //  get all div's to be indexed (Admin selected)
        $sld        = @file($common_dir.'sld.txt');         //  get all SLDs

        closedir($handle);

        if (is_array($com_in)) {
            while (list($id, $word) = each($com_in))
            $common[trim($word)] = 1;
        }

        if (is_array($suffix)) {
            while (list($id, $word) = each($suffix))
            $ext[] = trim($word);
            $ext = array_unique($ext);
            sort($ext);

        }

        if (is_array($white_in)) {
            foreach ($white_in as $val) {
                if ($case_sensitive == '0') {
                    $val = lower_case($val);
                }
                $val = @iconv($home_charset,"UTF-8",$val);

                $white[] = addslashes($val);
            }

            while (list($id, $word) = each($white))
            $whitelist[] = trim($word);
            $whitelist = array_unique($whitelist);
            sort($whitelist);
        }

        if (is_array($black_in)) {
            foreach ($black_in as $val) {
                if ($case_sensitive == '0') {
                    $val = lower_case($val);
                }
                $val = @iconv($home_charset,"UTF-8",$val);
                $black[] = $val;

            }

            while (list($id, $word) = each($black))
            $blacklist[] = trim($word);
            $blacklist = array_unique($blacklist);
            sort($blacklist);
        }

        if (is_array($image)) {
            while (list($id, $word) = each($image))
            $imagelist[] = trim(strtolower($word));
            $imagelist = array_unique($imagelist);
            sort($imagelist);
        }

        if (is_array($audio)) {
            while (list($id, $word) = each($audio))
            $audiolist[] = trim(strtolower($word));
            $audiolist = array_unique($audiolist);
            sort($audiolist);
        }

        if (is_array($video)) {
            while (list($id, $word) = each($video))
            $videolist[] = trim(strtolower($word));
            $videolist = array_unique($videolist);
            sort($videolist);
        }

        if (is_array($divs_not)) {
            while (list($id, $word) = each($divs_not))
            $not_divlist[] = trim($word);
            $not_divlist = array_unique($not_divlist);
            sort($not_divlist);
        }

        if (is_array($divs_use)) {
            while (list($id, $word) = each($divs_use))
            $use_divlist[] = trim($word);
            $use_divlist = array_unique($use_divlist);
            sort($use_divlist);
        }

        if (is_array($sld)) {
            while (list($id, $word) = each($sld))
            $sldlist[] = trim($word);
            $sldlist = array_unique($sldlist);
            sort($sldlist);
        }
    }

    if ($mb == 1) {
        mb_internal_encoding("UTF-8");      //  define standard charset for mb functions
    }

    if ($dba_act == '1') {
        $db_con     = db1_connect() ;
        $success    = @mysql_select_db ($database1, $db_con);
        $mysql_table_prefix = $mysql_table_prefix1;
        $tables     =  mysql_query("select * from ".$mysql_table_prefix."addurl");
        //  try to initialize a 10 MByte MySQL cache (might not work on shared hosting systems) for database  1
        $mysql_csize    = mysql_query("SET GLOBAL query_cache_size = 10485760");
        $mysql_cacheon  = mysql_query("SET GLOBAL query_cache_type = ON") ;
    }

    if ($dba_act == '2') {
        $db_con = db2_connect() ;
        $success    = @mysql_select_db ($database2, $db_con);
        $mysql_table_prefix = $mysql_table_prefix2;
        $tables     =  mysql_query("select * from ".$mysql_table_prefix."addurl");
        //  try to initialize a 10 MByte MySQL cache (might not work on shared hosting systems) for database  2
        $mysql_csize    = mysql_query("SET GLOBAL query_cache_size = 10485760");
        $mysql_cacheon  = mysql_query("SET GLOBAL query_cache_type = ON") ;
    }

    if ($dba_act == '3') {
        $db_con = db3_connect() ;
        $success    = @mysql_select_db ($database3, $db_con);
        $mysql_table_prefix = $mysql_table_prefix3;
        $tables     =  mysql_query("select * from ".$mysql_table_prefix."addurl");
        //  try to initialize a 10 MByte MySQL cache (might not work on shared hosting systems) for database  3
        $mysql_csize    = mysql_query("SET GLOBAL query_cache_size = 10485760");
        $mysql_cacheon  = mysql_query("SET GLOBAL query_cache_type = ON") ;
    }

    if ($dba_act == '4') {
        $db_con = db4_connect() ;
        $success    = @mysql_select_db ($database4, $db_con);
        $mysql_table_prefix = $mysql_table_prefix4;
        $tables     =  mysql_query("select * from ".$mysql_table_prefix."addurl");
        //  try to initialize a 10 MByte MySQL cache (might not work on shared hosting systems) for database  4
        $mysql_csize    = mysql_query("SET GLOBAL query_cache_size = 10485760");
        $mysql_cacheon  = mysql_query("SET GLOBAL query_cache_type = ON") ;
    }

    if ($dba_act == '5') {
        $db_con = db5_connect() ;
        $success    = @mysql_select_db ($database5, $db_con);
        $mysql_table_prefix = $mysql_table_prefix5;
        $tables     =  mysql_query("select * from ".$mysql_table_prefix."addurl");
        //  try to initialize a 10 MByte MySQL cache (might not work on shared hosting systems) for database  5
        $mysql_csize    = mysql_query("SET GLOBAL query_cache_size = 10485760");
        $mysql_cacheon  = mysql_query("SET GLOBAL query_cache_type = ON") ;
    }

    extract (getHttpVars());
    $id = $multi;   //  update $id

    require_once ("$converter_dir/ConvertCharset.class.php");

    $template_dir   = "../".$templ_dir."";
    $template_path  = "$template_dir/$template";
    $id3_dir        = "./getid3";

    if ($all == '1' && $multi_indexer > '1'){    //  'index/re-index all' was initialized by Admin interface, but not by command line operation
        pre_all();      //  define all sites as erased, but don't erase the content
        $all = '3';     //  now re-index all with support of multithreaded indexer
    }

    if ($index_rss == '1') {
        include "$converter_dir/feed_parser.php";
    }

    if ($index_id3 == '1') {
        include "$id3_dir/getid3.php";
    }

    //  delete complete query log in database
    if ($clear_query == '1') {
        mysql_query ("truncate ".$mysql_table_prefix."query_log");
        if ($debug > '0') echo mysql_error();
    }

    $delay_time     = 0;
    $command_line   = 0;
    $copy           = '1';
    $omit           = '';
    $cl             = '0';
    $tmp_urls       = Array();

    if (isset($_SERVER['argv']) && $_SERVER['argc'] >= 2) {
        $multi_indexer = 1;     //  command line operation does not require interactive start of indexer
        $id = '0';
        $started = time();
        $command_line = 1;
        $cl = 1;
        $ac = 1; 	//argument counter
        while ($ac < (count($_SERVER['argv']))) {
            $arg = $_SERVER['argv'][$ac];

            if ($arg  == '-all') {
                $all = 1;
                if ($clear_cache == '1') {
                    clear_TextCache();
                    clear_MediaCache();
                }
                //pre_all();      //  define all sites as erased, but don't erase the content
                $log_handle = create_logFile($id);
                break;

            } else if (strpos($arg, "new")) {
                $all= 2;
                $id  = substr($arg, strpos($arg, "new")+3);     //  extract ID from command line
                break;

            } else if (strpos($arg, "erased")) {
                $all = 3;
                $id  = substr($arg, strpos($arg, "erased")+6);  //  extract ID from command line
                break;

            } else if ($arg  == '-eall') {
                $all= 4;
                break;

            } else if ($arg  == '-erase') {
                $all= 5;
                break;

            } else if ($arg  == '-preall') {
                $all= 6;
                break;

            } else if ($arg  == '-u') {
                $url = $_SERVER['argv'][$ac+1];
                $ac= $ac+2;
            } else if ($arg  == '-f') {
                $soption = 'full';
                $ac++;
            } else if ($arg == '-d') {
                $soption = 'level';
                $maxlevel =  $_SERVER['argv'][$ac+1];;
                $ac= $ac+2;
            } else if ($arg == '-l') {
                $can_leave = 1;
                $ac++;
            } else if ($arg == '-r') {
                $reindex = 1;
                $ac++;
            } else if ($arg  == '-m') {
                $in =  str_replace("\\n", chr(10), $_SERVER['argv'][$ac+1]);
                $ac= $ac+2;
            } else if ($arg  == '-n') {
                $out =  str_replace("\\n", chr(10), $_SERVER['argv'][$ac+1]);
                $ac= $ac+2;
            } else {
                commandline_help();
                die();
            }
        }
    }

    /*
     // simulate command line  operation
     $started = time();
     $multi_indexer = 1;
     $command_line = 1;
     $cl = 1;
     */

    if (isset($soption) && $soption == 'full') {
        $maxlevel = '-1';
    }

    if (!isset($can_leave)) {
        $can_leave = '0';
    }

    if (!isset($use_pref)) {
        $use_pref = '0';
    }

    if(!isset($reindex)) {
        $reindex = '0';
    }

    if(!isset($not_use_robot) || $not_use_robot == '0') {
        $use_robot = '1';
    }

    if ($not_use_robot == '1') {
        $use_robot = '0';
    }

    if(!isset($not_use_nofollow) || $not_use_nofollow == '0') {
        $use_nofollow = '1';
    }
    if ($not_use_nofollow == '1') {
        $use_nofollow = '0';
    }

    if(!isset($maxlevel)) {
        $maxlevel = '0';
    }

    if ($multi_indexer > '1') { //  multithreaded indexing?
        if (!$multi) {          //  first loop in multi-indexer
            $multi = '0';
        }
        $multi++;
    }

    if ($keep_log && $multi != '1' && ($all < '4' || $all >= '20') || $url) {

        $log_handle = create_logFile($id);
    }

    if (!$started){
        $started = '0'; //  initialize this variable(will become timestamp when first indexer was started)
    }
    if ($multi == '2'){ //  Admin started the first indexer
        $started = time();
    }
    if ($all != '6') {
        printHTMLHeader($omit, $url, $cl, $multi, $all, $started) ;
    }

    if ($multi == '1' && !$url) {    //  Wait for admin's first thread activation
        die();
    }

    if ($multi_indexer > '1') { // superess output for multithreaded indexing
        $cl = '1';
        $command_line = '1';
    }

    if ($all == '1') {      //  for command line operation: index all sites in database
        index_all();
    }

    if ($all == '2') {      //  index all new sites, never indexed before
        index_new();
    }

    if ($all == '3') {      //  index all erased sites
        index_erased();
    }

    if ($all == '4') {      //  'Erase & Re-index all' for command line operation
        erase();
        $log_handle = create_logFile($id);
        index();
    }

    if ($all == '5') {
        erase();            //  erase for command line operation
    }

    if ($all == '6') {
        pre_all();          //  clear 'last indexed' for command line operation
        die();
    }

    if ($all == '20') {     //  index all suspended sites
        index_suspended();
    }

    if ($all == '21') {     //  index all sites shown on one page
        index_these();
    }

    if ($all != '1' && $all != '2' && $all != '3' && $all != '4' && $all != '5' && $all != '20' && $all != '21') {
        if ($reindex == 1 && $command_line == 1) {
            mysqltest();

            $result=mysql_query("select url, spider_depth, required, disallowed, can_leave_domain, use_prefcharset from ".$mysql_table_prefix."sites where url='$url'");
            if ($debug > '0') echo mysql_error();
            if($row=mysql_fetch_row($result)) {
                $url = $row[0];
                $maxlevel = $row[1];
                $in= $row[2];
                $out = $row[3];
                $can_leave = $row[4];
                $use_pref = $row[5];

                if ($can_leave=='') {
                    $can_leave=0;
                }
                if ($maxlevel == -1) {
                    $soption = 'full';
                } else {
                    $soption = 'level';
                }
            }
            if ($clear == 1) clean_resource($result, '01') ;
        }

        if (!isset($in)) {
            $in = "";
        }
        if (!isset($out)) {
            $out = "";
        }

        $started = time();
        index_site($url, $reindex, $maxlevel, $soption, $in, $out, $can_leave, $use_robot, $use_nofollow, $cl, $all, $use_pref);
        $ended = time();

        $consumed = $ended - $started;
        printConsumedReport('consumed', $cl, '0', $consumed);
        printStandardReport('ReindexFinish',$command_line, '0');
    }

    printStandardReport('quit',$command_line, '0');

    if ($email_log) {
        $indexed = ($all==1) ? 'ALL' : $url;
        $log_report = "";

        if ($log_handle) {
            $log_report = "Log saved into $log_file";
        }
        mail($admin_email, "Sphider indexing report", "Sphider has finished indexing $indexed at ".date("y-m-d H:i:s").". ".$log_report);
    }

    if ( $log_handle) {
        fclose($log_handle);
    }

?>
Return current item: Sphider Plus