Location: PHPKode > scripts > File Host Link Tester > class.linktest.php
<?php

/**
 * linktest class
 * 
 * This class is used to check the validity of file host links (also called one-click hosts). 
 * It does so by looking for the file size that the host displays. It requires that you have
 * curl installed and a version of PHP that supports preg_match and preg_replace.
 * 
 * @license 	GPL license
 * @author	 	Max Power
 * @copyright	2008, Max Power
 */
class linktest {
	
	// global variables shared by all methods
	public $url, $method, $format, $domain, $adjustment, $filters;
	
	// class constants 
	const PATTERN = "@([\d\.,\s]+)(KB|MB|GB)@i";
	const CONVERSION = 1024;
	
	/**
	 * The test method is the only public method and the only method necessary for interfacing
	 * with this class. The list of supported hosts is in this method. Only six hosts are used
	 * as default but many others are available as needed and need to be uncommented for use.
	 * 
	 * @param string url (required) - must be a full url including http://
	 * @param string format (optional) - only accepted strings are 'KB', 'MB', and 'GB'
	 * @param boolean supported (optional) - to only allow supported hosts or not
	 * @return array result - zero index is either a number or false
	 */ 
	public function test($url, $format = 'MB', $supported = true) {
		
		// VARIABLE SETUP
		
		// check for valid hostname in url
		$pattern = '@^https?://?([^/]+)@i';
		if (preg_match($pattern, $url, $matches)) {
			$hostname = $matches[1];
		} else {
			$result[0] = false;
			$result[1] = 'invalid url';
			$result[2] = "The link provided is not a valid url";
			return $result;
		}
		
		// set format to 'MB' if variable is not KB, MB or GB
		$format = strtoupper($format);
		if ($format !== 'KB' && $format !== 'MB' && $format !== 'GB') {
			$format = 'MB';
		}
		
		// set supported to true if variable is not true or false
		if ($supported !== true && $supported !== false) {
			$supported = true;
		}
		
		// set global variables
		$this->url = $url;
		$this->format = $format;
		
		// FILE HOST PROCESSING
		
		// array of hosts to check against url
		// important: do not change key names
		// the following is the format of the hosts array:
		// $hosts[method name][domain name] = array(domain pattern, url retrieve method, size adjustment, filters array);
		
		// most popular hosts
		$hosts['rapidshare']['rapidshare.com'] 		= array("@rapidshare\hide@address.com", 'curl', 1000/self::CONVERSION, array('@<u>100 MB</u>@i'));
		$hosts['rapidshare']['rapidshare.de'] 		= array("@rapidshare\hide@address.com", 'curl', 1, array('@>300 MB<@i'));
		$hosts['other']['megaupload.com'] 			= array("@megaupload\hide@address.com", 'curl', 1);
		$hosts['other']['megarotic.com'] 			= array("@megarotic\hide@address.com", 'curl', 1);
		$hosts['other']['depositfiles.com'] 		= array("@depositfiles\hide@address.com", 'file', 1);
		$hosts['other']['megashares.com'] 			= array("@megashares\hide@address.com", 'curl', 1, array('@ hide@address.com'));
		
		// lesser known hosts
		// these hosts are commented out but can be used as needed
		//$hosts['other']['filefactory.com'] 		= array("@filefactory\hide@address.com", 'curl', 1);
		//$hosts['other']['sendspace.com'] 			= array("@sendspace\hide@address.com", 'file', 1);
		//$hosts['other']['badongo.com'] 			= array("@badongo\hide@address.com", 'curl', 1);
		//$hosts['other']['filecloud.com'] 			= array("@filecloud\hide@address.com", 'curl', 1);
		//$hosts['other']['filefront.com'] 			= array("@filefront\hide@address.com", 'curl', 1);
		//$hosts['other']['gigasize.com'] 			= array("@gigasize\hide@address.com", 'curl', 1);
		//$hosts['other']['uploadmb.com'] 			= array("@uploadmb\hide@address.com", 'curl', pow(1000/self::CONVERSION, 2));
		//$hosts['other']['speedshare.org'] 		= array("@speedshare\hide@address.com", 'curl', 1);
		//$hosts['other']['uploading.com'] 			= array("@uploading\hide@address.com", 'curl', 1);
		//$hosts['other']['furk.net'] 				= array("@furk\hide@address.com", 'curl', 1);
		//$hosts['other']['savefile.info'] 			= array("@savefile\hide@address.com", 'curl', 1);
		//$hosts['other']['arbup.org'] 				= array("@arbup\hide@address.com", 'curl', 1, array('@x 120MB hide@address.com'));
		//$hosts['other']['getupload.com']			= array("@getupload\hide@address.com", 'curl', 1);
		//$hosts['other']['turboupload.com'] 		= array("@turboupload\hide@address.com", 'curl', 1);
		//$hosts['other']['titanicshare.com'] 		= array("@titanicshare\hide@address.com", 'curl', 1);
		//$hosts['other']['file2you.net'] 			= array("@file2you\hide@address.com", 'curl', 1);
		//$hosts['other']['upitus.com'] 			= array("@upitus\hide@address.com", 'curl', 1, array('@o 80 MB \(@i'));
		//$hosts['other']['egoshare.com'] 			= array("@egoshare\hide@address.com", 'curl', 1);
		//$hosts['other']['tornadodrive.com'] 		= array("@tornadodrive\hide@address.com", 'curl', 1);
		//$hosts['other']['uploadpalace.com'] 		= array("@uploadpalace\hide@address.com", 'curl', 1);
		//$hosts['other']['4filehosting.com'] 		= array("@4filehosting\hide@address.com", 'curl', 1);
		//$hosts['other']['primeupload.com'] 		= array("@primeupload\hide@address.com", 'curl', 1);
		//$hosts['other']['yousendit.com'] 			= array("@yousendit\hide@address.com", 'file', 1);
		//$hosts['other']['transferbigfiles.com']	= array("@transferbigfiles\hide@address.com", 'file', 1, array('@o 1gb hide@address.com', '@>~300kb<@i'));
		//$hosts['other']['mailbigfile.com'] 		= array("@mailbigfile\hide@address.com", 'curl', 1);
		//$hosts['other']['friendlyfiles.net'] 		= array("@friendlyfiles\hide@address.com", 'curl', 1);
		//$hosts['other']['bigupload.com'] 			= array("@bigupload\hide@address.com", 'file', 1);
		//$hosts['other']['axifile.com'] 			= array("@axifile\hide@address.com", 'curl', 1, array('@ 200 @i'));
		//$hosts['other']['speedyshare.com'] 		= array("@speedyshare\hide@address.com", 'curl', 1);
		//$hosts['other']['justupit.com'] 			= array("@justupit\hide@address.com", 'curl', 1, array('@>170mb!!<@i'));
		//$hosts['other']['momoshare.com'] 			= array("@momoshare\hide@address.com", 'curl', 1);
		//$hosts['other']['internetfiles.org'] 		= array("@internetfiles\hide@address.com", 'curl', 1);
		//$hosts['other']['ultrashare.net'] 		= array("@ultrashare\hide@address.com", 'curl', 1, array('@ 100MB hide@address.com'));
		//$hosts['other']['upload2.net'] 			= array("@upload2\hide@address.com", 'curl', 1, array('@s hide@address.com'));
		//$hosts['other']['webfilehost.com'] 		= array("@webfilehost\hide@address.com", 'curl', 1, array('@500\s?hide@address.com'));
		//$hosts['other']['rapidfile.net'] 			= array("@rapidfile\hide@address.com", 'file', 1, array('@o 300 MB hide@address.com'));
		//$hosts['other']['zshare.net'] 			= array("@zshare\hide@address.com", 'file', 1);*/
		
		// find out which host to check and set variables from array
		$host = false;
		foreach ($hosts as $key => $value) {
			foreach ($value as $domain => $pattern) {
				if (preg_match($pattern[0], $hostname, $matches)) {
					$host = $key;
					$this->domain = $domain;
					$this->method = $pattern[1];
					$this->adjustment = $pattern[2];
					$this->filters = $pattern[3];
				}
			}
		}
		
		// return false if no supported hosts were matched or set default variables if supported is false
		if (!$host) {
			if ($supported == true) {
				$result[0] = false;
				$result[1] = 'invalid host';
				$result[2] = "The domain $hostname is not a supported host";
				return $result;
			} else {
				$host = 'other';
				$this->domain = $hostname;
				$this->method = 'curl';
				$this->adjustment = 1;
				$this->filters = null;
			}
		}
		
		// dynamic function call
		$result = $this->$host();
		
		return $result;
	}
	
	/**
	 * Rapidshare requires a two step process in order to view the file size. To make it more
	 * complicated, the second page can only be reached using POST. This method gathers the 
	 * information required to make the POST call and passes it to the other function, which
	 * the other domains use. If other hosts require a two step process, this rapidshare method
	 * can be used as a template.
	 * 
	 * @return array result
	 */
	private function rapidshare() {
		
		// get rapidshare submit form url
		$pattern = '@<form.*.action="(.*)".*hide@address.com';
		$matches = $this->match($this->url, $pattern);
		$url = $matches[1];
		$params = "dl.start=Free";
		
		// get rapidshare.de hidden param
		if ($this->domain == 'rapidshare.de') {
			$pattern = '@<input.*.hidden.*.value="(.*)">@i';
			$matches = $this->match($this->url, $pattern);
			$param = $matches[1];
			if (!is_null($param)) {
				$params = "$params&uri=$param";
			}
		}
		
		// get file size
		$this->url = $url;
		$result = $this->other($params);
		
		return $result;
	}
	
	/**
	 * The other method is used to get the file size by all domains other than rapidshare. It
	 * contains the standard pattern for finding the file size and also makes the call to the
	 * match method and convertSize method.
	 * 
	 * @param string params (optional) - params is used for passing POST parameters
	 * @return array result
	 */
	private function other($params = null) {
		
		// get file size and format
		$pattern = self::PATTERN;
		$matches = $this->match($this->url, $pattern, $params);
		$size = $matches[1];
		$sourceFormat = strtoupper($matches[2]);
		if (is_null($size) || rtrim($size) == '') {
			$result[0] = false;
			$result[1] = 'invalid link';
			$result[2] = "This link for $this->domain is invalid";
			return $result;
		}
		
		// convert size to requested format
		$result = $this->convertSize($size, $sourceFormat);
		
		return $result;
	}
	
	/**
	 * The match method is used by the other methods to get the HTML and perform the preg_replace
	 * and preg_match. First, it gets the HTML using the curl or file_get_contents method accodding
	 * to the global method variable. Next, the HTML is filtered for common problems and then for
	 * the domain specific filters that are stored in the global fitlers array. Finally, the filtered
	 * HTML is matched against the pattern that is passed into the method.
	 * 
	 * @param string url (required) - this url may not always be the same as the global url so it must be passed
	 * @param string params (optional) - params is used for passing POST parameters
	 * @return array result
	 */
	private function match($url, $pattern, $params = null) {
		
		// get html from url
		if ($this->method == 'curl') {
			$curl = curl_init();
			if (!is_null($params)) {
				curl_setopt($curl, CURLOPT_POST, 1);
				curl_setopt($curl, CURLOPT_POSTFIELDS, $params);
			}
			curl_setopt($curl, CURLOPT_URL, $url);
			curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
			$html = curl_exec($curl);
			curl_close($curl);
		} else {
			if (!is_null($params)) {
				$options = array('http' => array('method' => 'POST', 'content' => $params));
				$context = stream_context_create($options);
				$html = file_get_contents($url, null, $context);
			} else {
				$html = file_get_contents($url);
			}
		}
		
		// uncomment line below to test unfiltered html
		// echo "<xmp>$html</xmp>"; exit;
		
		// setup patterns for preg_replace to remove common-problem text
		$patterns[] = '@<title>.*?</title>@i';
		$patterns[] = '@<meta.*?>@i';
		$patterns[] = '@<noscript>(.|\n)*?</noscript>@i';
		$patterns[] = '@&nbsp;@i';
		$patterns[] = '@</b>@i';
		
		// add custom patterns from filters array
		if (is_array($this->filters)) {
			foreach ($this->filters as $value) {
				$patterns[] = $value;
			}
		}
		
		// process patterns with preg_replace
		foreach ($patterns as $value) {
			$test = preg_replace($value, ' ', $html);
			if (!is_null($test)) {
				$html = $test;
			}
		}
		
		// uncomment line below to test filtered html
		// echo "<xmp>$html</xmp>"; exit;
		
		// check html against pattern and return result
		if (preg_match($pattern, $html, $matches)) {
			return $matches;
		} else {
			return false;
		}
	}
	
	/**
	 * The convertSize method is used to change the file size from the format that the host
	 * uses (KB, MB, or GB) to the file size format that was requested from the test method.
	 * It also uses the adjustment variable, which is used if the host converts their file
	 * sizes wrong (the most noteable example is rapidshare.com, which needs adjustment).
	 * 
	 * @param number size (required) - the file size that was matched from the host
	 * @param string sourceFormat (required) - the format that the host uses (not the requested format)
	 * @return array result
	 */
	private function convertSize($size, $sourceFormat) {
		
		// set variables for equation
		$size = str_replace(',', '', $size);
		$conversion = self::CONVERSION;
		$adjustment = $this->adjustment;
		$format['source'] = $sourceFormat;
		$format['final'] = $this->format;
		
		// set multiplier and divsor for equation
		foreach ($format as $key => $value) {
			switch ($value) {
			case 'KB':
				$x[$key] = 1;
				break;
			case 'MB':
				$x[$key] = $conversion;
				break;
			case 'GB':
				$x[$key] = $conversion * $conversion;
				break;
			}
		}
		
		// convert size to KB then convert to final format
		$size = $size * $adjustment;
		$size = ($size * $x['source']) / $x['final'];
		$result[0] = $size;
		
		return $result;
	}

}

?>
Return current item: File Host Link Tester