<?php
/**
* linktest class
*
* This class is used to check the validity of file host links (also called one-click hosts).
* It does so by looking for the file size that the host displays. It requires that you have
* curl installed and a version of PHP that supports preg_match and preg_replace.
*
* @license GPL license
* @author Max Power
* @copyright 2008, Max Power
*/
class linktest {
// global variables shared by all methods
public $url, $method, $format, $domain, $adjustment, $filters;
// class constants
const PATTERN = "@([\d\.,\s]+)(KB|MB|GB)@i";
const CONVERSION = 1024;
/**
* The test method is the only public method and the only method necessary for interfacing
* with this class. The list of supported hosts is in this method. Only six hosts are used
* as default but many others are available as needed and need to be uncommented for use.
*
* @param string url (required) - must be a full url including http://
* @param string format (optional) - only accepted strings are 'KB', 'MB', and 'GB'
* @param boolean supported (optional) - to only allow supported hosts or not
* @return array result - zero index is either a number or false
*/
public function test($url, $format = 'MB', $supported = true) {
// VARIABLE SETUP
// check for valid hostname in url
$pattern = '@^https?://?([^/]+)@i';
if (preg_match($pattern, $url, $matches)) {
$hostname = $matches[1];
} else {
$result[0] = false;
$result[1] = 'invalid url';
$result[2] = "The link provided is not a valid url";
return $result;
}
// set format to 'MB' if variable is not KB, MB or GB
$format = strtoupper($format);
if ($format !== 'KB' && $format !== 'MB' && $format !== 'GB') {
$format = 'MB';
}
// set supported to true if variable is not true or false
if ($supported !== true && $supported !== false) {
$supported = true;
}
// set global variables
$this->url = $url;
$this->format = $format;
// FILE HOST PROCESSING
// array of hosts to check against url
// important: do not change key names
// the following is the format of the hosts array:
// $hosts[method name][domain name] = array(domain pattern, url retrieve method, size adjustment, filters array);
// most popular hosts
$hosts['rapidshare']['rapidshare.com'] = array("@rapidshare\hide@address.com", 'curl', 1000/self::CONVERSION, array('@<u>100 MB</u>@i'));
$hosts['rapidshare']['rapidshare.de'] = array("@rapidshare\hide@address.com", 'curl', 1, array('@>300 MB<@i'));
$hosts['other']['megaupload.com'] = array("@megaupload\hide@address.com", 'curl', 1);
$hosts['other']['megarotic.com'] = array("@megarotic\hide@address.com", 'curl', 1);
$hosts['other']['depositfiles.com'] = array("@depositfiles\hide@address.com", 'file', 1);
$hosts['other']['megashares.com'] = array("@megashares\hide@address.com", 'curl', 1, array('@ hide@address.com'));
// lesser known hosts
// these hosts are commented out but can be used as needed
//$hosts['other']['filefactory.com'] = array("@filefactory\hide@address.com", 'curl', 1);
//$hosts['other']['sendspace.com'] = array("@sendspace\hide@address.com", 'file', 1);
//$hosts['other']['badongo.com'] = array("@badongo\hide@address.com", 'curl', 1);
//$hosts['other']['filecloud.com'] = array("@filecloud\hide@address.com", 'curl', 1);
//$hosts['other']['filefront.com'] = array("@filefront\hide@address.com", 'curl', 1);
//$hosts['other']['gigasize.com'] = array("@gigasize\hide@address.com", 'curl', 1);
//$hosts['other']['uploadmb.com'] = array("@uploadmb\hide@address.com", 'curl', pow(1000/self::CONVERSION, 2));
//$hosts['other']['speedshare.org'] = array("@speedshare\hide@address.com", 'curl', 1);
//$hosts['other']['uploading.com'] = array("@uploading\hide@address.com", 'curl', 1);
//$hosts['other']['furk.net'] = array("@furk\hide@address.com", 'curl', 1);
//$hosts['other']['savefile.info'] = array("@savefile\hide@address.com", 'curl', 1);
//$hosts['other']['arbup.org'] = array("@arbup\hide@address.com", 'curl', 1, array('@x 120MB hide@address.com'));
//$hosts['other']['getupload.com'] = array("@getupload\hide@address.com", 'curl', 1);
//$hosts['other']['turboupload.com'] = array("@turboupload\hide@address.com", 'curl', 1);
//$hosts['other']['titanicshare.com'] = array("@titanicshare\hide@address.com", 'curl', 1);
//$hosts['other']['file2you.net'] = array("@file2you\hide@address.com", 'curl', 1);
//$hosts['other']['upitus.com'] = array("@upitus\hide@address.com", 'curl', 1, array('@o 80 MB \(@i'));
//$hosts['other']['egoshare.com'] = array("@egoshare\hide@address.com", 'curl', 1);
//$hosts['other']['tornadodrive.com'] = array("@tornadodrive\hide@address.com", 'curl', 1);
//$hosts['other']['uploadpalace.com'] = array("@uploadpalace\hide@address.com", 'curl', 1);
//$hosts['other']['4filehosting.com'] = array("@4filehosting\hide@address.com", 'curl', 1);
//$hosts['other']['primeupload.com'] = array("@primeupload\hide@address.com", 'curl', 1);
//$hosts['other']['yousendit.com'] = array("@yousendit\hide@address.com", 'file', 1);
//$hosts['other']['transferbigfiles.com'] = array("@transferbigfiles\hide@address.com", 'file', 1, array('@o 1gb hide@address.com', '@>~300kb<@i'));
//$hosts['other']['mailbigfile.com'] = array("@mailbigfile\hide@address.com", 'curl', 1);
//$hosts['other']['friendlyfiles.net'] = array("@friendlyfiles\hide@address.com", 'curl', 1);
//$hosts['other']['bigupload.com'] = array("@bigupload\hide@address.com", 'file', 1);
//$hosts['other']['axifile.com'] = array("@axifile\hide@address.com", 'curl', 1, array('@ 200 @i'));
//$hosts['other']['speedyshare.com'] = array("@speedyshare\hide@address.com", 'curl', 1);
//$hosts['other']['justupit.com'] = array("@justupit\hide@address.com", 'curl', 1, array('@>170mb!!<@i'));
//$hosts['other']['momoshare.com'] = array("@momoshare\hide@address.com", 'curl', 1);
//$hosts['other']['internetfiles.org'] = array("@internetfiles\hide@address.com", 'curl', 1);
//$hosts['other']['ultrashare.net'] = array("@ultrashare\hide@address.com", 'curl', 1, array('@ 100MB hide@address.com'));
//$hosts['other']['upload2.net'] = array("@upload2\hide@address.com", 'curl', 1, array('@s hide@address.com'));
//$hosts['other']['webfilehost.com'] = array("@webfilehost\hide@address.com", 'curl', 1, array('@500\s?hide@address.com'));
//$hosts['other']['rapidfile.net'] = array("@rapidfile\hide@address.com", 'file', 1, array('@o 300 MB hide@address.com'));
//$hosts['other']['zshare.net'] = array("@zshare\hide@address.com", 'file', 1);*/
// find out which host to check and set variables from array
$host = false;
foreach ($hosts as $key => $value) {
foreach ($value as $domain => $pattern) {
if (preg_match($pattern[0], $hostname, $matches)) {
$host = $key;
$this->domain = $domain;
$this->method = $pattern[1];
$this->adjustment = $pattern[2];
$this->filters = $pattern[3];
}
}
}
// return false if no supported hosts were matched or set default variables if supported is false
if (!$host) {
if ($supported == true) {
$result[0] = false;
$result[1] = 'invalid host';
$result[2] = "The domain $hostname is not a supported host";
return $result;
} else {
$host = 'other';
$this->domain = $hostname;
$this->method = 'curl';
$this->adjustment = 1;
$this->filters = null;
}
}
// dynamic function call
$result = $this->$host();
return $result;
}
/**
* Rapidshare requires a two step process in order to view the file size. To make it more
* complicated, the second page can only be reached using POST. This method gathers the
* information required to make the POST call and passes it to the other function, which
* the other domains use. If other hosts require a two step process, this rapidshare method
* can be used as a template.
*
* @return array result
*/
private function rapidshare() {
// get rapidshare submit form url
$pattern = '@<form.*.action="(.*)".*hide@address.com';
$matches = $this->match($this->url, $pattern);
$url = $matches[1];
$params = "dl.start=Free";
// get rapidshare.de hidden param
if ($this->domain == 'rapidshare.de') {
$pattern = '@<input.*.hidden.*.value="(.*)">@i';
$matches = $this->match($this->url, $pattern);
$param = $matches[1];
if (!is_null($param)) {
$params = "$params&uri=$param";
}
}
// get file size
$this->url = $url;
$result = $this->other($params);
return $result;
}
/**
* The other method is used to get the file size by all domains other than rapidshare. It
* contains the standard pattern for finding the file size and also makes the call to the
* match method and convertSize method.
*
* @param string params (optional) - params is used for passing POST parameters
* @return array result
*/
private function other($params = null) {
// get file size and format
$pattern = self::PATTERN;
$matches = $this->match($this->url, $pattern, $params);
$size = $matches[1];
$sourceFormat = strtoupper($matches[2]);
if (is_null($size) || rtrim($size) == '') {
$result[0] = false;
$result[1] = 'invalid link';
$result[2] = "This link for $this->domain is invalid";
return $result;
}
// convert size to requested format
$result = $this->convertSize($size, $sourceFormat);
return $result;
}
/**
* The match method is used by the other methods to get the HTML and perform the preg_replace
* and preg_match. First, it gets the HTML using the curl or file_get_contents method accodding
* to the global method variable. Next, the HTML is filtered for common problems and then for
* the domain specific filters that are stored in the global fitlers array. Finally, the filtered
* HTML is matched against the pattern that is passed into the method.
*
* @param string url (required) - this url may not always be the same as the global url so it must be passed
* @param string params (optional) - params is used for passing POST parameters
* @return array result
*/
private function match($url, $pattern, $params = null) {
// get html from url
if ($this->method == 'curl') {
$curl = curl_init();
if (!is_null($params)) {
curl_setopt($curl, CURLOPT_POST, 1);
curl_setopt($curl, CURLOPT_POSTFIELDS, $params);
}
curl_setopt($curl, CURLOPT_URL, $url);
curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
$html = curl_exec($curl);
curl_close($curl);
} else {
if (!is_null($params)) {
$options = array('http' => array('method' => 'POST', 'content' => $params));
$context = stream_context_create($options);
$html = file_get_contents($url, null, $context);
} else {
$html = file_get_contents($url);
}
}
// uncomment line below to test unfiltered html
// echo "<xmp>$html</xmp>"; exit;
// setup patterns for preg_replace to remove common-problem text
$patterns[] = '@<title>.*?</title>@i';
$patterns[] = '@<meta.*?>@i';
$patterns[] = '@<noscript>(.|\n)*?</noscript>@i';
$patterns[] = '@ @i';
$patterns[] = '@</b>@i';
// add custom patterns from filters array
if (is_array($this->filters)) {
foreach ($this->filters as $value) {
$patterns[] = $value;
}
}
// process patterns with preg_replace
foreach ($patterns as $value) {
$test = preg_replace($value, ' ', $html);
if (!is_null($test)) {
$html = $test;
}
}
// uncomment line below to test filtered html
// echo "<xmp>$html</xmp>"; exit;
// check html against pattern and return result
if (preg_match($pattern, $html, $matches)) {
return $matches;
} else {
return false;
}
}
/**
* The convertSize method is used to change the file size from the format that the host
* uses (KB, MB, or GB) to the file size format that was requested from the test method.
* It also uses the adjustment variable, which is used if the host converts their file
* sizes wrong (the most noteable example is rapidshare.com, which needs adjustment).
*
* @param number size (required) - the file size that was matched from the host
* @param string sourceFormat (required) - the format that the host uses (not the requested format)
* @return array result
*/
private function convertSize($size, $sourceFormat) {
// set variables for equation
$size = str_replace(',', '', $size);
$conversion = self::CONVERSION;
$adjustment = $this->adjustment;
$format['source'] = $sourceFormat;
$format['final'] = $this->format;
// set multiplier and divsor for equation
foreach ($format as $key => $value) {
switch ($value) {
case 'KB':
$x[$key] = 1;
break;
case 'MB':
$x[$key] = $conversion;
break;
case 'GB':
$x[$key] = $conversion * $conversion;
break;
}
}
// convert size to KB then convert to final format
$size = $size * $adjustment;
$size = ($size * $x['source']) / $x['final'];
$result[0] = $size;
return $result;
}
}
?>