<?php
set_time_limit (0);
define("_SECURE",1); // define secure constant
$plus_nr = '';
$include_dir = "../include";
$settings_dir = "../settings";
$converter_dir = "../converter";
$dict_dir = "$converter_dir/dictionaries";
$stem_dir = "$include_dir/stemming";
$textcache_dir = "$include_dir/textcache";
$mediacache_dir = "$include_dir/mediacache";
include "$settings_dir/database.php";
include "$include_dir/commonfuncs.php";
// get active database for this task
if ($dba_act == '1') {
$db_con = db1_connect() ;
$success = @mysql_select_db ($database1, $db_con);
$mysql_table_prefix = $mysql_table_prefix1;
}
if ($dba_act == '2') {
$db_con = db2_connect() ;
$success = @mysql_select_db ($database2, $db_con);
$mysql_table_prefix = $mysql_table_prefix2;
}
if ($dba_act == '3') {
$db_con = db3_connect() ;
$success = @mysql_select_db ($database3, $db_con);
$mysql_table_prefix = $mysql_table_prefix3;
}
if ($dba_act == '4') {
$db_con = db4_connect() ;
$success = @mysql_select_db ($database4, $db_con);
$mysql_table_prefix = $mysql_table_prefix4;
}
if ($dba_act == '5') {
$db_con = db5_connect() ;
$success = @mysql_select_db ($database5, $db_con);
$mysql_table_prefix = $mysql_table_prefix5;
}
@include "".$settings_dir."/db".$dba_act."/conf_".$mysql_table_prefix.".php";
if (!$plus_nr) {
include "/settings/backup/Sphider-plus_default-configuration.php";
}
if ($default_agent == 1) {
$user_agent = 'Mozilla/5.0 (Windows NT 6.1; rv:12.0) Gecko/20100101 Firefox/12.0';
}
include "messages.php";
include "spiderfuncs.php";
$com_in = array();
$local_redir = '';
$url = '';
$multi = '';
// now replace some variables with actual Admin settings
if (is_dir($common_dir)) {
$handle = opendir($common_dir);
if ($use_common == 'all') {
while (false !== ($common_file = readdir($handle))) { // get all common files
if (strpos($common_file, "ommon_")) {
$act = @file($common_dir.$common_file); // get content of actual common file
$com_in = array_merge($com_in, $act); // build a complete array of common words
}
}
}
if ($use_common != 'all' && $use_common != 'none') {
$com_in = @file("".$common_dir."common_".$use_common.".txt"); // get content of language specific common file
}
$suffix = @file($common_dir.'suffix.txt'); // get all file suffixes to be ignored during index procedure
if ($use_white1 == '1' || $use_white2 == '1') $white_in = @file($common_dir.'whitelist.txt'); // get all words to enable page indexing
if ($use_black == '1') $black_in = @file($common_dir.'blacklist.txt'); // get all words to prevent indexing of page
if ($index_image) $image = @file($common_dir.'image.txt'); // get all image suffixes to be indexed
if ($index_audio) $audio = @file($common_dir.'audio.txt'); // get all audio suffixes to be indexed
if ($index_video) $video = @file($common_dir.'video.txt'); // get all audio suffixes to be indexed
$divs_not = @file($common_dir.'divs_not.txt'); // get all div's to not to be indexed (Admin selected)
$divs_use = @file($common_dir.'divs_use.txt'); // get all div's to be indexed (Admin selected)
$sld = @file($common_dir.'sld.txt'); // get all SLDs
closedir($handle);
if (is_array($com_in)) {
while (list($id, $word) = each($com_in))
$common[trim($word)] = 1;
}
if (is_array($suffix)) {
while (list($id, $word) = each($suffix))
$ext[] = trim($word);
$ext = array_unique($ext);
sort($ext);
}
if (is_array($white_in)) {
foreach ($white_in as $val) {
if ($case_sensitive == '0') {
$val = lower_case($val);
}
$val = @iconv($home_charset,"UTF-8",$val);
$white[] = addslashes($val);
}
while (list($id, $word) = each($white))
$whitelist[] = trim($word);
$whitelist = array_unique($whitelist);
sort($whitelist);
}
if (is_array($black_in)) {
foreach ($black_in as $val) {
if ($case_sensitive == '0') {
$val = lower_case($val);
}
$val = @iconv($home_charset,"UTF-8",$val);
$black[] = $val;
}
while (list($id, $word) = each($black))
$blacklist[] = trim($word);
$blacklist = array_unique($blacklist);
sort($blacklist);
}
if (is_array($image)) {
while (list($id, $word) = each($image))
$imagelist[] = trim(strtolower($word));
$imagelist = array_unique($imagelist);
sort($imagelist);
}
if (is_array($audio)) {
while (list($id, $word) = each($audio))
$audiolist[] = trim(strtolower($word));
$audiolist = array_unique($audiolist);
sort($audiolist);
}
if (is_array($video)) {
while (list($id, $word) = each($video))
$videolist[] = trim(strtolower($word));
$videolist = array_unique($videolist);
sort($videolist);
}
if (is_array($divs_not)) {
while (list($id, $word) = each($divs_not))
$not_divlist[] = trim($word);
$not_divlist = array_unique($not_divlist);
sort($not_divlist);
}
if (is_array($divs_use)) {
while (list($id, $word) = each($divs_use))
$use_divlist[] = trim($word);
$use_divlist = array_unique($use_divlist);
sort($use_divlist);
}
if (is_array($sld)) {
while (list($id, $word) = each($sld))
$sldlist[] = trim($word);
$sldlist = array_unique($sldlist);
sort($sldlist);
}
}
if ($mb == 1) {
mb_internal_encoding("UTF-8"); // define standard charset for mb functions
}
if ($dba_act == '1') {
$db_con = db1_connect() ;
$success = @mysql_select_db ($database1, $db_con);
$mysql_table_prefix = $mysql_table_prefix1;
$tables = mysql_query("select * from ".$mysql_table_prefix."addurl");
// try to initialize a 10 MByte MySQL cache (might not work on shared hosting systems) for database 1
$mysql_csize = mysql_query("SET GLOBAL query_cache_size = 10485760");
$mysql_cacheon = mysql_query("SET GLOBAL query_cache_type = ON") ;
}
if ($dba_act == '2') {
$db_con = db2_connect() ;
$success = @mysql_select_db ($database2, $db_con);
$mysql_table_prefix = $mysql_table_prefix2;
$tables = mysql_query("select * from ".$mysql_table_prefix."addurl");
// try to initialize a 10 MByte MySQL cache (might not work on shared hosting systems) for database 2
$mysql_csize = mysql_query("SET GLOBAL query_cache_size = 10485760");
$mysql_cacheon = mysql_query("SET GLOBAL query_cache_type = ON") ;
}
if ($dba_act == '3') {
$db_con = db3_connect() ;
$success = @mysql_select_db ($database3, $db_con);
$mysql_table_prefix = $mysql_table_prefix3;
$tables = mysql_query("select * from ".$mysql_table_prefix."addurl");
// try to initialize a 10 MByte MySQL cache (might not work on shared hosting systems) for database 3
$mysql_csize = mysql_query("SET GLOBAL query_cache_size = 10485760");
$mysql_cacheon = mysql_query("SET GLOBAL query_cache_type = ON") ;
}
if ($dba_act == '4') {
$db_con = db4_connect() ;
$success = @mysql_select_db ($database4, $db_con);
$mysql_table_prefix = $mysql_table_prefix4;
$tables = mysql_query("select * from ".$mysql_table_prefix."addurl");
// try to initialize a 10 MByte MySQL cache (might not work on shared hosting systems) for database 4
$mysql_csize = mysql_query("SET GLOBAL query_cache_size = 10485760");
$mysql_cacheon = mysql_query("SET GLOBAL query_cache_type = ON") ;
}
if ($dba_act == '5') {
$db_con = db5_connect() ;
$success = @mysql_select_db ($database5, $db_con);
$mysql_table_prefix = $mysql_table_prefix5;
$tables = mysql_query("select * from ".$mysql_table_prefix."addurl");
// try to initialize a 10 MByte MySQL cache (might not work on shared hosting systems) for database 5
$mysql_csize = mysql_query("SET GLOBAL query_cache_size = 10485760");
$mysql_cacheon = mysql_query("SET GLOBAL query_cache_type = ON") ;
}
extract (getHttpVars());
$id = $multi; // update $id
require_once ("$converter_dir/ConvertCharset.class.php");
$template_dir = "../".$templ_dir."";
$template_path = "$template_dir/$template";
$id3_dir = "./getid3";
if ($all == '1' && $multi_indexer > '1'){ // 'index/re-index all' was initialized by Admin interface, but not by command line operation
pre_all(); // define all sites as erased, but don't erase the content
$all = '3'; // now re-index all with support of multithreaded indexer
}
if ($index_rss == '1') {
include "$converter_dir/feed_parser.php";
}
if ($index_id3 == '1') {
include "$id3_dir/getid3.php";
}
// delete complete query log in database
if ($clear_query == '1') {
mysql_query ("truncate ".$mysql_table_prefix."query_log");
if ($debug > '0') echo mysql_error();
}
$delay_time = 0;
$command_line = 0;
$copy = '1';
$omit = '';
$cl = '0';
$tmp_urls = Array();
if (isset($_SERVER['argv']) && $_SERVER['argc'] >= 2) {
$multi_indexer = 1; // command line operation does not require interactive start of indexer
$id = '0';
$started = time();
$command_line = 1;
$cl = 1;
$ac = 1; //argument counter
while ($ac < (count($_SERVER['argv']))) {
$arg = $_SERVER['argv'][$ac];
if ($arg == '-all') {
$all = 1;
if ($clear_cache == '1') {
clear_TextCache();
clear_MediaCache();
}
//pre_all(); // define all sites as erased, but don't erase the content
$log_handle = create_logFile($id);
break;
} else if (strpos($arg, "new")) {
$all= 2;
$id = substr($arg, strpos($arg, "new")+3); // extract ID from command line
break;
} else if (strpos($arg, "erased")) {
$all = 3;
$id = substr($arg, strpos($arg, "erased")+6); // extract ID from command line
break;
} else if ($arg == '-eall') {
$all= 4;
break;
} else if ($arg == '-erase') {
$all= 5;
break;
} else if ($arg == '-preall') {
$all= 6;
break;
} else if ($arg == '-u') {
$url = $_SERVER['argv'][$ac+1];
$ac= $ac+2;
} else if ($arg == '-f') {
$soption = 'full';
$ac++;
} else if ($arg == '-d') {
$soption = 'level';
$maxlevel = $_SERVER['argv'][$ac+1];;
$ac= $ac+2;
} else if ($arg == '-l') {
$can_leave = 1;
$ac++;
} else if ($arg == '-r') {
$reindex = 1;
$ac++;
} else if ($arg == '-m') {
$in = str_replace("\\n", chr(10), $_SERVER['argv'][$ac+1]);
$ac= $ac+2;
} else if ($arg == '-n') {
$out = str_replace("\\n", chr(10), $_SERVER['argv'][$ac+1]);
$ac= $ac+2;
} else {
commandline_help();
die();
}
}
}
/*
// simulate command line operation
$started = time();
$multi_indexer = 1;
$command_line = 1;
$cl = 1;
*/
if (isset($soption) && $soption == 'full') {
$maxlevel = '-1';
}
if (!isset($can_leave)) {
$can_leave = '0';
}
if (!isset($use_pref)) {
$use_pref = '0';
}
if(!isset($reindex)) {
$reindex = '0';
}
if(!isset($not_use_robot) || $not_use_robot == '0') {
$use_robot = '1';
}
if ($not_use_robot == '1') {
$use_robot = '0';
}
if(!isset($not_use_nofollow) || $not_use_nofollow == '0') {
$use_nofollow = '1';
}
if ($not_use_nofollow == '1') {
$use_nofollow = '0';
}
if(!isset($maxlevel)) {
$maxlevel = '0';
}
if ($multi_indexer > '1') { // multithreaded indexing?
if (!$multi) { // first loop in multi-indexer
$multi = '0';
}
$multi++;
}
if ($keep_log && $multi != '1' && ($all < '4' || $all >= '20') || $url) {
$log_handle = create_logFile($id);
}
if (!$started){
$started = '0'; // initialize this variable(will become timestamp when first indexer was started)
}
if ($multi == '2'){ // Admin started the first indexer
$started = time();
}
if ($all != '6') {
printHTMLHeader($omit, $url, $cl, $multi, $all, $started) ;
}
if ($multi == '1' && !$url) { // Wait for admin's first thread activation
die();
}
if ($multi_indexer > '1') { // superess output for multithreaded indexing
$cl = '1';
$command_line = '1';
}
if ($all == '1') { // for command line operation: index all sites in database
index_all();
}
if ($all == '2') { // index all new sites, never indexed before
index_new();
}
if ($all == '3') { // index all erased sites
index_erased();
}
if ($all == '4') { // 'Erase & Re-index all' for command line operation
erase();
$log_handle = create_logFile($id);
index();
}
if ($all == '5') {
erase(); // erase for command line operation
}
if ($all == '6') {
pre_all(); // clear 'last indexed' for command line operation
die();
}
if ($all == '20') { // index all suspended sites
index_suspended();
}
if ($all == '21') { // index all sites shown on one page
index_these();
}
if ($all != '1' && $all != '2' && $all != '3' && $all != '4' && $all != '5' && $all != '20' && $all != '21') {
if ($reindex == 1 && $command_line == 1) {
mysqltest();
$result=mysql_query("select url, spider_depth, required, disallowed, can_leave_domain, use_prefcharset from ".$mysql_table_prefix."sites where url='$url'");
if ($debug > '0') echo mysql_error();
if($row=mysql_fetch_row($result)) {
$url = $row[0];
$maxlevel = $row[1];
$in= $row[2];
$out = $row[3];
$can_leave = $row[4];
$use_pref = $row[5];
if ($can_leave=='') {
$can_leave=0;
}
if ($maxlevel == -1) {
$soption = 'full';
} else {
$soption = 'level';
}
}
if ($clear == 1) clean_resource($result, '01') ;
}
if (!isset($in)) {
$in = "";
}
if (!isset($out)) {
$out = "";
}
$started = time();
index_site($url, $reindex, $maxlevel, $soption, $in, $out, $can_leave, $use_robot, $use_nofollow, $cl, $all, $use_pref);
$ended = time();
$consumed = $ended - $started;
printConsumedReport('consumed', $cl, '0', $consumed);
printStandardReport('ReindexFinish',$command_line, '0');
}
printStandardReport('quit',$command_line, '0');
if ($email_log) {
$indexed = ($all==1) ? 'ALL' : $url;
$log_report = "";
if ($log_handle) {
$log_report = "Log saved into $log_file";
}
mail($admin_email, "Sphider indexing report", "Sphider has finished indexing $indexed at ".date("y-m-d H:i:s").". ".$log_report);
}
if ( $log_handle) {
fclose($log_handle);
}
?>