<?php
/** fetch-script: imdb.com
*
* $Id: fetch-imdb.php,v 1.17 2006/01/31 18:03:12 michasch Exp $
*
* create your own fetch-scripts for other webpages like this one
* but please don't make any changes at this file, always make first
* a copy of it.
*
* please send us your fetch-scripts, so we can add them to the
* powermovielist! you can also export them with the export-settings-feature
*
* @package fetch
*/
//first check if the class exists allready, if so return and don't include it again
if(class_exists("pmlfetch_imdb")) return;
//IMPORTANT: the class-name (pmlfetch_*) has to be the same as the file-name (fetch-*.php)
class pmlfetch_imdb extends pml_fetch {
/**
* FieldNames
* IMPORTANT: Add new fields here and in function DoFetch
*
* @access private
**/
var $FieldNames = array("Title",
"Year",
"Poster",
"Director",
"Genre",
"Rating",
"Starring",
"Plot",
"Release",
"Runtime",
"MPAA",
"Akas",
"Country",
"imdbid");
/**
* actorLimit
* @access private
* @var int
**/
var $actorLimit;
/**
* actorLimit
* @access private
* @var int
**/
var $akaCountry;
/**
* doSearch - perform the search on the page to fetch from
*
* @param &$out return-string: the HTML-code displayed when searching
* @param $SearchString string: the string sent to the page to search for
* @param $EntryUrl string: url used for links in HTML-code
* @access public
* @return const PML_FETCH_SEARCHERROR, PML_FETCH_SEARCHDONE or PML_FETCH_EXACTMATCH
**/
function doSearch(&$out, $SearchString, $EntryUrl) {
$out .= $GLOBALS['strSearchingInfoFor'] ." <b>". $SearchString . "</b> ... ";
//$Name = rawurlencode($SearchString);
//$Name = str_replace("%20", "+", $SearchString);
$data = "GET /find?q=". rawurlencode($SearchString) .";tt=on;mx=20 HTTP/1.0\r\n";
$data .= "Accept: text/html, image/png, image/x-xbitmap, image/gif, image/jpeg, */*\r\n";
$data .= "Referer: http://akas.imdb.com/Find\r\n";
$data .= "Content-type: application/x-www-form-urlencoded\r\n";
$data .= "Accept-Encoding: *;q=0\r\n";
$data .= "User-Agent: Mozilla/4.0 (compatible; MSIE 5.5; Windows 98; Win 9x 4.90)\r\n";
$data .= "Host: akas.imdb.com\r\n";
$data .= "Connection: Keep-Alive\r\n";
$data .= "Cache-Control: no-cache\r\n";
$data .= "\r\n";
$site = $this->FetchPage($data, "akas.imdb.com:80");
$sections = preg_split("/Names \(Exact Matches\)/i", $site);
// $sections[0]: part with movie matches
// [1]: part with actor matches
$site = $sections[0];
//when you use the search-form on imdb.com and you search for a title that was exactly found
//imdb uses a 302-found-page to redirect to the Title-page of this movie.
//if this happens, we can use this imdb-id too
if(strstr($site, "HTTP/1.0 302") || strstr($site, "HTTP/1.1 302")) { //exact match?
$out .= $GLOBALS['strExactMatch'];
ereg('\/title\/tt([0-9]+)\/', $site, $x);
$this->FetchID = $x[1]; //save the id in $FetchID
return(PML_FETCH_EXACTMATCH); //return to editentry that it can fetch now the data - search is allready done
}
$tlBlocks = preg_match_all('/(<b>P?o?p?u?l?a?r? ?Titles.*?<\/b>).*?<\/ol>/msi', $site, $categoryBlocks);
if ($tlBlocks > 0) {
//$categoryBlocks contain movies from "Popular Titles" and/or "Exact Matches" and/or "Partial Matches"
$currentBlock = 0;
$SearchData = array();
// remove text "Titles" and () from the category-names
$categoryBlocks[1] = preg_replace('/(Titles \()|\)|;/i','',$categoryBlocks[1]);
foreach($categoryBlocks[0] as $block){
preg_match_all('/<li> <a href=\"\/title\/tt([0-9]+)\/.*?\">(.*?)<\/a>(.*?)<\/li>/i', $block, $imdbMovies);
foreach($imdbMovies[1] as $key => $value){
$SearchData[] = array("id" => $value,
"name" => $imdbMovies[2][$key],
"help" => str_replace(" aka","<br> ",strip_tags($imdbMovies[3][$key])),
"cat" => '- ' . $categoryBlocks[1][$currentBlock]);
}
$currentBlock++;
}
}
else {
$out .= $GLOBALS['strErrNothingFoundEnterNewString'];
return(PML_FETCH_SEARCHDONE);
}
$out .= $GLOBALS['strFoundMore'];
//print out all the movies found:
$out .= "<table>";
$Page = basename(__FILE__);
$Page = substr($Page, 6);
$Page = substr($Page, 0, -4);
$LastCat = "";
foreach($SearchData as $Dat) {
if($Dat['cat']!=$LastCat) {
$out .= "<tr class='top'><td colspan=2 align='left'>$Dat[cat]</td></tr>\n";
$j=1;
}
$LastCat = $Dat['cat'];
$out .= "<tr class='row";
if($j++%2) $out .= "1"; else $out .= "2";
$out .= "'><td width='20'> </td><td>";
$out .= "<a href=\"$EntryUrl&" . urlencode("fid[{$Page}]")."=".urlencode($Dat['id'])."\">$Dat[name]</a>";
$out .= " - [<a href=\"http://akas.imdb.com/Title?$Dat[id]\" target=_blank>$GLOBALS[strInfo]</a>]\n";
if(strlen($Dat['help'])>2) $out .= "<i>". $Dat['help'] ."</i>";
$out .= "</td></tr>\n";
}
$out .= "</table>";
return(PML_FETCH_SEARCHDONE);
}
/**
* GetCachedPage
*
* downloaded a given url with a given referrer, uses caching from fetch-class
*
* @param string the url to fetch
* @param string the Referrer (default-value is set here)
**/
function GetCachedPage($Url, $Referer="http://akas.imdb.com/") {
return($this->fetchCachedUrl($Url, "akas.imdb.com", $Referer));
}
/**
* DoFetch - perform the search on the page to fetch from
*
* IMPORTANT if you want to add some feilds:
* if you add new fields, add them to var $FieldNames on top of this file
*
* @param string the fetched value (return-string)
* @param string the FieldName
* @access public
* @return const PML_FETCH_ERROR, PML_FETCH_OK or PML_FETCH_ITEMNOTFOUND
**/
function DoFetch(&$ret, $FieldName) {
global $CFG;
switch($FieldName) {
case "Title": //fetch Title
//get this url, cached if allready used:
$site = $this->GetCachedPage("/title/tt$this->FetchID/", "http://akas.imdb.com/Find");
//regular-expresstion to filter out the field
if(!eregi("<STRONG CLASS=\"title\">([^\<]*) <SMALL>\(<A HREF=\"/Sections/Years/([0-9]{4})", $site, $x)) {
//if eregi fails, print error-message and return error
return(PML_FETCH_ERROR);
}
//else set $ret (will be returned)
$ret = $x[1];
$ret = addslashes($ret);
//and break
break;
case "Year":
$site = $this->GetCachedPage("/title/tt$this->FetchID/", "http://akas.imdb.com/Find");
if(!eregi("<STRONG CLASS=\"title\">([^\<]*) <SMALL>\(<A HREF=\"/Sections/Years/([0-9]{4})", $site, $x)) {
return(PML_FETCH_ERROR);
}
$ret = $x[2];
if($ret=="") $ret=0;
break;
case "Poster":
$site = $this->GetCachedPage("/title/tt$this->FetchID/", "http://akas.imdb.com/Find");
if(!preg_match('/<a name="poster".+?<img .*?src="([^"]+)"/is', $site, $x)) {
return(PML_FETCH_ERROR);
}
$ret = $x[1];
$ret= addslashes($ret);
break;
case "Director":
$site = $this->GetCachedPage("/title/tt$this->FetchID/", "http://akas.imdb.com/Find");
if(!preg_match('#Directed by.*\n[^<]*<a href="/Name?[^"]*">([^<]*)</a>#i', $site, $x)) {
return(PML_FETCH_ERROR);
}
$ret = $x[1];
$ret = addslashes($ret);
break;
case "Genre":
$site = $this->GetCachedPage("/title/tt$this->FetchID/", "http://akas.imdb.com/Find");
if(!preg_match('#Genre:</b>(.*?)<br>#msi', $site, $gen)) {
return(PML_FETCH_ERROR);
}
$gen = $gen[1];
$ret = array();
while(eregi("<A HREF=\"/Sections/Genres/[a-zA-Z\\-]*/\">([a-zA-Z\\-]*)</A>", $gen, $x)) {
$gen = substr($gen,strpos($gen,$x[0])+strlen($x[0]));
$ret[] = addslashes($x[1]);
}
if(sizeof($ret)==0) {
return(PML_FETCH_ERROR);
}
break;
case "Rating":
$site = $this->GetCachedPage("/title/tt$this->FetchID/", "http://akas.imdb.com/Find");
if(!eregi("<B>([0-9]).([0-9])/10</B> \([0-9,]+ votes\)", $site, $x)) {
return(PML_FETCH_ERROR);
}
$ret = $x[1].$x[2];
$ret = $ret/10;
break;
case "Starring":
$site = $this->GetCachedPage("/title/tt$this->FetchID/", "http://akas.imdb.com/Find");
$ret = array();
$i=0;
while(eregi('<td valign="top"><a href="/name/nm([^"]+)">([^<]*)</a></td>', $site, $x)) {
$i++;
if($i>$this->actorLimit) break;
$site = substr($site,strpos($site,$x[0])+strlen($x[0]));
$ret[] = addslashes($x[2]);
}
if(sizeof($ret)==0) {
return(PML_FETCH_ERROR);
}
break;
case "Plot":
$site = $this->GetCachedPage("/title/tt$this->FetchID/plotsummary", "http://akas.imdb.com/title/tt$this->FetchID/");
if(eregi('<p class="plotpar">([^<]*)</p>', $site, $x)) {
//plot exists:
$ret = addslashes($x[1]);
break;
}
//plot doesn't exist, use plot-outline from title-page:
$site = $this->GetCachedPage("/title/tt$this->FetchID/", "http://akas.imdb.com/Find");
if (preg_match("#Plot Outline:</b>([^<]*)#", $site, $x)) {
$ret = addslashes($x[1]);
break;
}
//plot doesn't exist, use plot-summary from title-page:
if (preg_match("#Plot Summary:</b>([^<]*)#", $site, $x)) {
$ret = addslashes($x[1]);
break;
}
// if there's no plot outline fetch tagline.
if(!preg_match("#Tagline:</b>([^<]*)#", $site, $x)) {
$ret = addslashes($x[1]);
break;
}
//error, no plot found
return(PML_FETCH_ERROR);
case "Release":
$site = $this->GetCachedPage("/title/tt$this->FetchID/releaseinfo", "http://akas.imdb.com/title/tt$this->FetchID/");
$convert["January"]="01";
$convert["February"]="02";
$convert["March"]="03";
$convert["April"]="04";
$convert["May"]="05";
$convert["June"]="06";
$convert["July"]="07";
$convert["August"]="08";
$convert["September"]="09";
$convert["October"]="10";
$convert["November"]="11";
$convert["December"]="12";
if(!eregi('<a href="/BusinessThisDay[^>]*>([0-9]+) ([A-Za-z]+)</a>', $site, $date)) {
return(PML_FETCH_ERROR);
}
if(!eregi("<a href=\"/Sections/Years[^>]*>([^<]*)</A>", $site, $year)) {
return(PML_FETCH_ERROR);
}
$ret = $year[1] . "-" . $convert[$date[2]] . "-" . $date[1];
break;
case "imdbid":
$ret = $this->FetchID;
break;
case "Runtime":
$site = $this->GetCachedPage("/title/tt$this->FetchID/", "http://akas.imdb.com/Find");
if(!preg_match('#<b class="ch">Runtime:</b>\n([0-9]+) min#i', $site,$x)) {
return(PML_FETCH_ERROR);
}
$ret = $x[1];
$ret = addslashes($ret);
break;
case "MPAA":
$site = $this->GetCachedPage("/title/tt$this->FetchID/", "http://akas.imdb.com/Find");
if(!preg_match('#<b class="ch"><a href="/mpaa">MPAA</a>:</b> Rated ([^ ]+) for (.+).<br>#i', $site,$x)) {
return(PML_FETCH_ERROR);
}
$ret = $x[1];
$ret = addslashes($ret);
break;
case "Akas":
$site = $this->GetCachedPage("/title/tt$this->FetchID/", "http://akas.imdb.com/Find");
$ret = array();
if(eregi('<b class="ch">Also Known As</b>(.*)<b class="ch"><a href="/mpaa">MPAA</a>:</b>',$site, $y)) {
$site = $y[0];
while(eregi('<i([^>]*)>([^<]*)</i>', $site, $x)) {
if(eregi($this->akaCountry, $x[2]))
$ret[] = addslashes(str_replace(" ", " ", $x[2]));
$site = substr($site,strpos($site,$x[0])+strlen($x[0]));
}
}
if(sizeof($ret)==0) {
return(PML_FETCH_ERROR);
}
break;
case "Country":
$site = $this->GetCachedPage("/title/tt$this->FetchID/", "http://akas.imdb.com/Find");
$ret = array();
while(eregi('<a href="/Sections/Countries/([^>]*)>([^<]*)</a>', $site, $x)) {
$site = substr($site,strpos($site,$x[0])+strlen($x[0]));
$ret[] = addslashes($x[2]);
}
if(sizeof($ret)==0) {
return(PML_FETCH_ERROR);
}
break;
default:
return(PML_FETCH_ITEMNOTFOUND);
}//end switch $FieldName
if(is_array($ret)) {
foreach($ret as $k=>$i) {
$ret[$k] = $this->ReplaceUnicodeChars($i);
}
} else {
$ret = $this->ReplaceUnicodeChars($ret);
}
return(PML_FETCH_OK);
}//end function DoFetch
/**
* getUseSettings
*
* returns if this field has additional settings
*
* @access public
* @param string the field
* @return boolean
**/
function getUseSettings($field) {
switch($field) {
case "Starring":
return(true);
case "Akas":
return(true);
default:
return(false);
}
}
/**
* printSettings
*
* print out here the HTML-code for your custom settings
*
* @access public
* @param string the field
* @param string the current setting from the database
* @return the HTML-code
**/
function printSettings($field, $set) {
switch($field) {
case "Starring":
if($set=="") $set="3";
$out = "<input type=\"text\" name=\"set{$field}\" size=\"3\" value=\"$set\"> Actors to fetch\n";
return($out);
case "Akas":
if($set=="") $set="USA";
$out = "<input type=\"text\" name=\"set{$field}\" size=\"32\" value=\"$set\"> Aka(s) to fetch\n";
return($out);
}
}
/**
* saveSettings
*
* processes the $_GET-stuff and validates it and then moves it into
* one string that will be saved in the database (only one field is
* avaliable for saving the data!
*
* @access public
* @param string the field
* @return string the string that will be saved in the db
**/
function saveSettings($field) {
switch($field) {
case "Starring":
return($_POST['set'.$field]);
case "Akas":
return($_POST['set'.$field]);
default:
return("");
}
}
/**
* setSetting
*
* will be called from editentry.php bevore calling DoFetch
* (only if $set is not empty)
* There shoud the setting be processed and saved into some
* class-vars...
*
* @access public
* @param string
* @param string the field
**/
function setSettings($field, $set) {
switch($field) {
case "Starring":
if($set=="" || $set==0) $set="3";
$this->actorLimit = $set;
break;
case "Akas":
if($set=="") $set="USA";
$this->akaCountry = $set;
break;
}
}
}
?>