Location: PHPKode > scripts > Mini spiders and bots class > mini-spiders-and-bots-class/minibots.class.php
<?
/* ---------------------------------------------------------- */
/* minibots.class.php Ver.1                                   */
/* ---------------------------------------------------------- */
/* Mini Bots class is a small php class that allows you to    */
/* use some free web seriveces online to retrive usefull data */
/* and infos. This version includes:                          */
/* smtp validation, check spelling, meteo, exchange rates,    */
/* shorten urls, and geo referencing with IP address.         */
/* Feel free to use in your applications, but link my blog:   */
/* http://www.barattalo.it                                    */
/* Giulio Pons                                                */
/* ---------------------------------------------------------- */
Class Minibots 
{

	public function __construct () {
	}

	private function getIP() {
		$ip="";
		if (getenv("HTTP_CLIENT_IP")) $ip = getenv("HTTP_CLIENT_IP");
		else if(getenv("HTTP_X_FORWARDED_FOR")) $ip = getenv("HTTP_X_FORWARDED_FOR");
		else if(getenv("REMOTE_ADDR")) $ip = getenv("REMOTE_ADDR");
		else $ip = "";
		return $ip;
	}


	private function dayadd($days,$date=null , $format="d/m/Y"){
		// add days to a date
		return date($format,strtotime($days." days",strtotime( $date ? $date : date($format) )));
	}


	private function attr($s,$attrname) {
		//retrn html attribute
		preg_match_all('#\s*('.$attrname.')\s*=\s*["|\']([^"\']*)["|\']\s*#i', $s, $x); 
		if (count($x)>=3) return $x[2][0];
		return "";
	}



	public function doSpelling($q) {
		// (thanks to google)
		// grab google page with search
		$web_page = file_get_contents( "http://www.google.it/search?q=" . urlencode($q) );
		// put anchors tag in an array
		preg_match_all('#<a([^>]*)?>(.*)</a>#Us', $web_page, $a_array);
		for($j=0;$j<count($a_array[0]);$j++) {
			// find link with spell suggestion and return it
			if(stristr($a_array[0][$j],"class=spell")) return strip_tags($a_array[0][$j]);
		}
		return $q;	//if no results returns the q value
	}


	public function doExchangeRate($m,$d) {
		// (thanks to bank of italy)
		// grab exchange rates
		$dar = explode("-" , $this->dayadd(-1,$d,"Y-m-d") );
		$web_page = file_get_contents( "http://uif.bancaditalia.it/UICFEWebroot/QueryOneDateAllCur?lang=en&rate=0&initDay=".$dar[2]."&initMonth=".$dar[1]."&initYear=".$dar[0]."&refCur=euro&R1=csv");
		// parse csv results
		$lines = explode("\n",$web_page);
		for($j=0;$j<count($lines);$j++) {
			$fields = explode(",",$lines[$j]);
			if ($fields[2]==$m) return $fields[4];
		}
		return "";
	}


	public function doMeteo($q,$date="") {
		//(thanks to google)
		if (!$date) $date = date("Y-m-d");	//today
		if ($date>$this->dayadd(3,date("Y-m-d"),"Y-m-d"))return "";
		// grab google page with meteo query
		$web_page = file_get_contents( "http://www.google.it/search?q=meteo+" . urlencode($q) );
		//parse to find data
		preg_match_all('#<div class=e>(.*)</table>#Us', $web_page, $m);
		if (count($m)>0) {
			$p = array();
			preg_match_all('#<img([^>]*)?>#Us', $m[0][0], $img);
			for ($i=0;$i<count($img[0]);$i++) {
				$tag = str_replace("src=\"/","src=\"http://www.google.it/",$img[0][$i]);
				$p[$this->dayadd($i,date("Y-m-d"),"Y-m-d")]["title"] = $this->attr($tag,"title");
				$p[$this->dayadd($i,date("Y-m-d"),"Y-m-d")]["img"] = $this->attr($tag,"src");
			}
			preg_match_all('#<nobr>(.*)</nobr>#Uis', $m[0][0], $nobr);
			for ($i=0;$i<count($nobr[1]);$i++) {
				$temp= explode("|",$nobr[1][$i]);
				$p[$this->dayadd($i,date("Y-m-d"),"Y-m-d")]["min"] = utf8_encode( trim($temp[1]) );
				$p[$this->dayadd($i,date("Y-m-d"),"Y-m-d")]["max"] = utf8_encode( trim($temp[0]) );
			}
			return $p[$date];
		}
		return array();
	}


	public function doShortURL($ToConvert) {
		//(thanks to tinyurl.com)
		$short_url= file_get_contents('http://tinyurl.com/api-create.php?url=' . $ToConvert);
		return $short_url;
	}


	public function doGeoIp($ip="") {
		//(thanks to geoiptool)
		if (!$ip) $ip = $this->getIP();
		$ar = array();
		$web_page = file_get_contents( "http://www.geoiptool.com/en/?IP=".$ip );
		preg_match_all('#<table([^>]*)tbl_style([^>]*)?>(.*)</table>#Us', $web_page, $t_array);
		for($j=0;$j<count($t_array[0]);$j++) {
			//find table with data
			if (stristr($t_array[0][$j],"IP Address")) {
				//parse data
				$table = $t_array[0][$j];
				preg_match_all('#<tr([^>]*)?>(.*)</tr>#Us', $table, $tr_array);
				for($i=0;$i<count($tr_array[0]);$i++) {
					$tar = explode(":", strip_tags ( $tr_array[0][$i] ) );
					$ar[ trim($tar[0]) ] = trim($tar[1]);
				}
			}
		}
		return $ar;
	}

	function doSMTPValidation($email, $probe_address="", $debug=false) {
		# --------------------------------
		# function to validate email address 
		# through a smtp connection with the 
		# mail server. returns an true when ok
		# or an array (msg, error code) when fails.
		# --------------------------------
		$output = "";
		# --------------------------------
		# Check syntax with regular expression
		# --------------------------------
		if (!$probe_address) $probe_address = $_SERVER["SERVER_ADMIN"];
		if (preg_match('/^([a-zA-Z0-9\._\+-]+)\@((\[?)[a-zA-Z0-9\-\.]+\.([a-zA-Z]{2,7}|[0-9]{1,3})(\]?))$/', $email, $matches)) {
			$user = $matches[1];
			$domain = $matches[2];
			# --------------------------------
			# Check availability of DNS MX records
			# --------------------------------
			if (function_exists('checkdnsrr')) {
				# --------------------------------
				# Construct array of available mailservers
				# --------------------------------
				if(getmxrr($domain, $mxhosts, $mxweight)) {
					for($i=0;$i<count($mxhosts);$i++){
						$mxs[$mxhosts[$i]] = $mxweight[$i];
					}
					asort($mxs);
					$mailers = array_keys($mxs);
				} elseif(checkdnsrr($domain, 'A')) {
					$mailers[0] = gethostbyname($domain);
				} else {
					$mailers=array();
				}
				$total = count($mailers);
				# --------------------------------
				# Query each mailserver
				# --------------------------------
				if($total > 0) {
					# --------------------------------
					# Check if mailers accept mail
					# --------------------------------
					for($n=0; $n < $total; $n++) {
						# --------------------------------
						# Check if socket can be opened
						# --------------------------------
						if($debug) { $output .= "Checking server $mailers[$n]...\n";}
						$connect_timeout = 2;
						$errno = 0;
						$errstr = 0;
						# --------------------------------
						# controllo probe address
						# --------------------------------
						if (preg_match('/^([a-zA-Z0-9\._\+-]+)\@((\[?)[a-zA-Z0-9\-\.]+\.([a-zA-Z]{2,7}|[0-9]{1,3})(\]?))$/', $probe_address,$fakematches)) {
							$probe_domain = str_replace("@","",strstr($probe_address, '@'));

							# --------------------------------
							# Try to open up socket
							# --------------------------------
							if($sock = @fsockopen($mailers[$n], 25, $errno , $errstr, $connect_timeout)) {
								$response = fgets($sock);
								if($debug) {$output .= "Opening up socket to $mailers[$n]... Success!\n";}
								stream_set_timeout($sock, 5);
								$meta = stream_get_meta_data($sock);
								if($debug) { $output .= "$mailers[$n] replied: $response\n";}
								# --------------------------------
								# Be sure to set this correctly!
								# --------------------------------
								$cmds = array(
									"HELO $probe_domain",
									"MAIL FROM: <$probe_address>",
									"RCPT TO: <$email>",
									"QUIT",
								);
								# --------------------------------
								# Hard error on connect -> break out
								# --------------------------------
								if(!$meta['timed_out'] && !preg_match('/^2\d\d[ -]/', $response)) {
									$codice = trim(substr(trim($response),0,3));
									if ($codice=="421") {
										//421 #4.4.5 Too many connections to this host.
										$error = $response;
										break;
									} else {
										if($response=="" || $codice=="") {
											//c'è stato un errore ma la situazione è poco chiara
											$codice = "0";
										}
										$error = "Error: $mailers[$n] said: $response\n";
										break;
									}
									break;
								}
								foreach($cmds as $cmd) {
									$before = microtime(true);
									fputs($sock, "$cmd\r\n");
									$response = fgets($sock, 4096);
									$t = 1000*(microtime(true)-$before);
									if($debug) {$output .= "$cmd\n$response" . "(" . sprintf('%.2f', $t) . " ms)\n";}
									if(!$meta['timed_out'] && preg_match('/^5\d\d[ -]/', $response)) {
										$codice = trim(substr(trim($response),0,3));
										if ($codice<>"552") {
											$error = "Unverified address: $mailers[$n] said: $response";
											break 2;
										} else {
											$error = $response;
											break 2;
										}
										# --------------------------------
										// il 554 e il 552 sono quota
										// 554 Recipient address rejected: mailbox overquota
										// 552 RCPT TO: Mailbox disk quota exceeded
										# --------------------------------
									}
								}
								fclose($sock);
								if($debug) { $output .= "Succesful communication with $mailers[$n], no hard errors, assuming OK\n";}
								break;
							} elseif($n == $total-1) {
								$error = "None of the mailservers listed for $domain could be contacted";
								$codice = "0";
							}
						} else {
							$error = "Il probe_address non è una mail valida.";
						}
					}
				} elseif($total <= 0) {
					$error = "No usable DNS records found for domain '$domain'";
				}
			}
		} else {
			$error = 'Address syntax not correct';
		}
		if($debug) {
			print nl2br(htmlentities($output));
		}
		if(!isset($codice)) {$codice="n.a.";}
		if(isset($error)) return array($error,$codice); else return true;
	}

}

?>
Return current item: Mini spiders and bots class