<?php
set_time_limit(2000);
require_once("queue.php");
class SearchLinks
{
/******************************************************************************************************************
-Used to Initialise variables.
/******************************************************************************************************************/
var $page_url="";
var $depth=4;
var $link_to_search="";
var $regex_search_text="";
/******************************************************************************************************************
-Class constructor.
/******************************************************************************************************************/
function SearchLinks($page_url,$depth,$link_to_search,$regex_search_text)
{
$this->page_url=$page_url;
$this->depth=$depth;
$this->link_to_search=strtolower($link_to_search);
$this->regex_search_text=$regex_search_text;
if(strpos($regex_search_text,"/")===false)$this->regex_search_text="/".$this->regex_search_text."/iU";
}
/******************************************************************************************************************
-Used to perform breadth first search on the links.
/******************************************************************************************************************/
function Search()
{
$body_str=file_get_contents($this->page_url);
$url_list=$this->GetLinks($body_str,$this->page_url);
$url_queue = new Queue;
$this->AddLinksToQueue($url_list,$url_queue);
$counter=count($url_list);
$current_depth=0;
$display_count=1;
while($url_queue->GetCurrentSize()>0)
{
$result=$url_queue->Get();
list($key,$value)=explode('*',$result);
if($value=="")continue;
$key=trim(strip_tags($key));
if($this->link_to_search!="")
{
if(strtolower(trim($key))==$this->link_to_search)
{
$body_str=file_get_contents($value);
if (preg_match($this->regex_search_text,strip_tags($body_str))==1){echo $display_count.") <a href='".$value."' target='_new'>".$key."</a><br/>";flush();}
$url_list=$this->GetLinks($body_str,$value);
$this->AddLinksToQueue($url_list,$url_queue);
$display_count++;
}
}
else
{
$body_str=file_get_contents($value);
if (preg_match($this->regex_search_text,strip_tags($body_str))==1){echo $display_count.") <a href='".$value."' target='_new'>".$key."</a><br/>";flush();}
$url_list=$this->GetLinks($body_str,$value);
$this->AddLinksToQueue($url_list,$url_queue);
$display_count++;
}
$counter--;
if($counter==0)
{
$current_depth++;
$counter=$url_queue->GetCurrentSize();
}
if($current_depth==$this->depth)break;
}
}
/******************************************************************************************************************
-Used to add list of links to the queue.
/******************************************************************************************************************/
function AddLinksToQueue($url_list,&$queue)
{
if($url_list==""||count($url_list)==0)return;
foreach ($url_list as $key=>$value)
{
list($text,$count)=explode("_",$key);
$url_info=$text."*".$value;
$queue->Put($url_info);
}
}
/******************************************************************************************************************
-Used to retrieve links from html body.
/******************************************************************************************************************/
function GetLinks($body_str,$parent_url)
{
$url_list=array();
preg_match_all('/http:\/\/(.*)\//iU', $parent_url, $matches, PREG_SET_ORDER);
$server_name=$matches[0][1]."/";
preg_match_all('/< *a.*href *= *[\'"](.*)[\'"].*>(.*)< *\/a *>/iU', $body_str, $matches, PREG_SET_ORDER);
for($count=0;$count<count($matches);$count++)
{
$text=$matches[$count][2];
if(strpos(strtolower($matches[$count][1]),"http://")===false&&strpos(strtolower($matches[$count][1]),"www")===false)
$href="http://".$server_name.trim($matches[$count][1],"/");
else $href=$matches[$count][1];
$url_list[$text."_".$count]=$href;
}
return $url_list;
}
/******************************************************************************************************************/
}
?>