Location: PHPKode > scripts > Link Searcher > link-searcher/link_searcher.php
<?php
set_time_limit(2000);

require_once("queue.php");

class SearchLinks
	{
/******************************************************************************************************************
-Used to Initialise variables.
/******************************************************************************************************************/
		var $page_url="";
		var $depth=4;
		var $link_to_search="";
		var $regex_search_text="";
/******************************************************************************************************************
-Class constructor.
/******************************************************************************************************************/		
		function SearchLinks($page_url,$depth,$link_to_search,$regex_search_text)
			{
				$this->page_url=$page_url;					
				$this->depth=$depth;
				$this->link_to_search=strtolower($link_to_search);
				$this->regex_search_text=$regex_search_text;
				if(strpos($regex_search_text,"/")===false)$this->regex_search_text="/".$this->regex_search_text."/iU";
			}
/******************************************************************************************************************
-Used to perform breadth first search on the links.
/******************************************************************************************************************/			
		function Search()
			{
				$body_str=file_get_contents($this->page_url);				
				$url_list=$this->GetLinks($body_str,$this->page_url);
				$url_queue = new Queue;
				$this->AddLinksToQueue($url_list,$url_queue);
					
				$counter=count($url_list);
				$current_depth=0;				
				$display_count=1;
				
				while($url_queue->GetCurrentSize()>0)
					{
						$result=$url_queue->Get();
						
						list($key,$value)=explode('*',$result);
						if($value=="")continue;	
						
						$key=trim(strip_tags($key));
						
						if($this->link_to_search!="")	
							{
								if(strtolower(trim($key))==$this->link_to_search)
									{						
										$body_str=file_get_contents($value);
										if (preg_match($this->regex_search_text,strip_tags($body_str))==1){echo $display_count.") <a href='".$value."' target='_new'>".$key."</a><br/>";flush();}
										$url_list=$this->GetLinks($body_str,$value);										
										$this->AddLinksToQueue($url_list,$url_queue);																																
										$display_count++;
									}
							}
						else
							{
								$body_str=file_get_contents($value);								
								if (preg_match($this->regex_search_text,strip_tags($body_str))==1){echo $display_count.") <a href='".$value."' target='_new'>".$key."</a><br/>";flush();}
								$url_list=$this->GetLinks($body_str,$value);
								$this->AddLinksToQueue($url_list,$url_queue);	
								$display_count++;
							}
						$counter--;
						if($counter==0)
							{
								$current_depth++;								
								$counter=$url_queue->GetCurrentSize();			
							}
						if($current_depth==$this->depth)break;
					}	
			}
/******************************************************************************************************************
-Used to add list of links to the queue.
/******************************************************************************************************************/
		function AddLinksToQueue($url_list,&$queue)
			{
				if($url_list==""||count($url_list)==0)return;
				foreach ($url_list as $key=>$value)
					{
						list($text,$count)=explode("_",$key);
						$url_info=$text."*".$value;
					  	$queue->Put($url_info);
					}	
			}
/******************************************************************************************************************
-Used to retrieve links from html body.
/******************************************************************************************************************/		
		function GetLinks($body_str,$parent_url)
			{			
				$url_list=array();
					
				preg_match_all('/http:\/\/(.*)\//iU', $parent_url, $matches, PREG_SET_ORDER);
				$server_name=$matches[0][1]."/";
				
				preg_match_all('/< *a.*href *= *[\'"](.*)[\'"].*>(.*)< *\/a *>/iU', $body_str, $matches, PREG_SET_ORDER);
				
				for($count=0;$count<count($matches);$count++)
					{
						$text=$matches[$count][2];
						
						if(strpos(strtolower($matches[$count][1]),"http://")===false&&strpos(strtolower($matches[$count][1]),"www")===false)
						$href="http://".$server_name.trim($matches[$count][1],"/");							
						else $href=$matches[$count][1];								
												
						$url_list[$text."_".$count]=$href;	
					}	
				return $url_list;
			}			
/******************************************************************************************************************/
	}
?>
Return current item: Link Searcher