<?php
/**
* Amazon Parser
*
* Parses data from Amazon.de
*
* @package Engines
* @author Andreas Goetz <hide@address.com>
* @link http://www.amazon.de Amazon
* @version $Id: amazon.php,v 1.23 2008/06/29 11:13:02 andig2 Exp $
*/
$GLOBALS['amazonServer'] = 'http://www.amazon.de';
$GLOBALS['amazonIdPrefix'] = 'amazon:';
/**
* Get meta information about the engine
*
* @todo Include image search capabilities etc in meta information
*/
function amazonMeta()
{
return array('name' => 'Amazon (de)', 'stable' => 0);
}
/**
* Get search Url for an Amazon product
*
* @author Andreas Goetz <hide@address.com>
* @param string The search string
* @return string The search URL (GET)
*/
function amazonSearchUrl($title)
{
global $amazonServer;
return $amazonServer;
}
/**
* Get search Url to visit external site
*
* @author Andreas Goetz <hide@address.com>
* @param string $id The movie's external id
* @return string The visit URL
*/
function amazonContentUrl($id)
{
global $amazonServer, $amazonIdPrefix;
$id = preg_replace('|^'.$amazonIdPrefix.'|', '', $id);
return $amazonServer.'/exec/obidos/ASIN/'.$id.'/'.AMAZON_ASSOCIATE;
}
/**
* Search a Movie/DVD/Book etc
*
* Searches for a given title on Amazon and returns the found links in
* an array
*
* @author Andreas Goetz (hide@address.com)
* @param string The search string
* @return array Associative array with id and title
*/
function amazonSearch($title)
{
global $amazonServer, $amazonIdPrefix, $cache;
global $CLIENTERROR;
$post = 'size=10'.
'&url='.urlencode('index=blended').
'&field-keywords='.urlencode($title);
$resp = httpClient($amazonServer.'/exec/obidos/search-handle-form/ref=sr_sp_go_qs/028-5046340-6062930', $cache, array('post' => $post));
if (!$resp['success']) $CLIENTERROR .= $resp['error']."\n";
$data = array();
// add encoding
$data['encoding'] = engine_get_encoding($resp);
if (preg_match_all('/<td class="dataColumn">.+?<a href="(.+?)"><span class="srTitle">(.+?)<\/span>(.+?)<\/td><\/tr>/is', $resp['data'], $m, PREG_SET_ORDER))
{
foreach ($m as $row)
{
if (ereg('<img', $row[2])) continue;
if (!preg_match('#/dp/(.+?)/#', $row[1], $m2)) continue;
$info['id'] = $amazonIdPrefix.$m2[1];
# $info['showid'] = $row[2];
# $info['episodeid'] = $row[3];
$info['title'] = trim(strip_tags($row[2]));
//Adds type of media and date if available to aid in searching
if (preg_match('/\((.+?)\)/is', $row[3], $tempdata))
{
$tempdata[1] = preg_replace('/<span class="binding">/', '', $tempdata[1]);
$tempdata[1] = preg_replace('/<\/span>/', '', $tempdata[1]);
$info['title'] .= ' ('.$tempdata[1].')';
}
# $info['subtitle'] = $row[4];
$data[] = $info;
}
}
return $data;
}
/**
* Fetches the data for a given Amazon ID (equals ISBN)
*
* @author Andreas Goetz <hide@address.com>
* @author Victor La <hide@address.com>
* @param string Amazon-ID
* @return array Result data
*/
function amazonData($amazonID)
{
global $amazonServer, $amazonIdPrefix, $cache;
global $CLIENTERROR;
$amazonID = preg_replace('/^'.$amazonIdPrefix.'/', '', $amazonID);
$data = array(); //result
$languages = array('deutsch' => 'german', 'englisch' => 'english', 'spanisch' => 'spanish');
$genres = array(
'Anime' => 'Animation',
'Horror' => 'Horror',
'Monumental' => '',
'Abenteuer' => 'Adventure',
'Abenteuerfilm' => 'Adventure',
'Eastern' => '',
'Kriegsfilm' => 'War',
'Krieg' => 'War',
'War' => 'War',
'World War' => 'War',
'Action' => 'Action',
'Historienfilm' => 'History',
'Historical' => 'History',
'Krimi' => 'Crime',
'Kriminalfilm' => 'Crime',
'Thriller' => 'Thriller',
'Dokumentation' => 'Documentary',
'Dokumentarfilm' => 'Documentary',
'Erotik' => 'Adult',
'Geschichte' => 'History',
'Musik' => 'Music',
'Musivideo' => 'Music',
'Reise' => '',
'Biografie' => 'Biography',
'Biographie' => 'Biography',
'Sport' => 'Sport',
'Sports' => 'Sport',
'Zeitgeschichte' => 'History',
'Fantasy' => 'Fantasy',
'Komödie' => 'Comedy',
'Humor' => 'Comedy',
'Science Fiction' => 'Sci-Fi',
'Trickfilm' => 'Animation',
'Zeichentrick' => 'Animation',
'Kinder' => 'Family',
'Familienfilm' => 'Family',
'Drama' => 'Drama',
'Liebesfilm' => 'Romance',
'Romantic' => 'Romance',
'Western' => 'Western',
'Horror/Occult' => 'Mystery',
'Comedy' => 'Comedy',
// Didn't check these, took 'em from dvdb.php
'Familie' => 'Family',
'Actionkomödie' => 'Comedy',
'Westernkomödie' => 'Western',
'Musikfilm' => 'Musical',
'Animation' => 'Animation',
'Splatter' => 'Horror',
'Filmoperette' => 'Musical',
'Horrorkomödie' => 'Comedy',
'Musikdokumentation' => 'Music',
'Mystery' => 'Mystery',
'Roadmovie' => 'Thriller',
'Satire' => 'Comedy',
);
// fetch mainpage
$resp = httpClient($amazonServer.'/exec/obidos/ASIN/'.$amazonID, 1);
if (!$resp['success']) $CLIENTERROR .= $resp['error']."\n";
$data = array();
// add encoding
$data['encoding'] = engine_get_encoding($resp);
// Titles
if (preg_match("/<b class=\"sans\">(.+?)<\/b>/", $resp['data'], $ary))
{
list($t, $s) = split(' - ', $ary[1]);
$data['title'] = trim($t);
$data['subtitle'] = trim($s);
}
// Cover URL
if (preg_match('/registerImage\("original_image", "(.+?)"/i', $resp['data'], $ary))
{
$data['coverurl'] = trim($ary[1]);
}
// Director
if (preg_match('/<b>Regisseur\(e\):<\/b>.+?>(.+?)<\/a>/is', $resp['data'], $ary))
{
$data['director'] = trim($ary[1]);
}
// Language
if (preg_match("/<b>Sprache:<\/b>(.*?)<\/li>/is", $resp['data'], $ary))
{
$lang = trim(strtolower($ary[1]));
$lang = preg_split("/, /", $lang);
foreach($lang as $templang)
{
$templang = trim($templang);
if (!$templang) continue;
if (isset($languages[$templang])) $templang = $languages[$templang];
if (!$templang) continue;
$data['language'] .= $templang.' ';
}
$data['language'] = trim($data['language']);
$data['language'] = preg_replace('/ /', ', ', $data['language']);
}
// Year
if (preg_match("/<b>(DVD-)?Erscheinungs(datum|termin):<\/b>.*?(\d{4})(<\/li>)?/is", $resp['data'], $ary))
{
#print_r($ary);
$data['year'] = trim($ary[3]);
}
// Runtime
if (preg_match("/<b>Spieldauer:<\/b>(.+?)Minuten<\/li>/is", $resp['data'], $ary))
{
$data['runtime'] = trim($ary[1]);
}
// Rating
if (preg_match('/<b>Durchschnittliche Kundenbewertung:<\/b>.+?<img src=".+?customer-reviews\/stars-(\d)-(\d)./is', $resp['data'], $ary))
{
$data['rating'] = 2 * (trim($ary[1]).'.'.trim($ary[2]));
}
// Genres (as array)
// TODO: There are a lot of comma seperated genres, they should be parsed too
if (preg_match_all('|<input[^>]*?name="field.keywords"[^>]*?value="([^"]*?)"|si',$resp['data'], $ary, PREG_SET_ORDER))
{
foreach ($ary as $row)
{
$genre = trim($row[1]);
$genre = substr($genre, 0, 1).strtolower(substr($genre, 1));
if (!$genre) continue;
if (isset($genres[$genre]))
$data['genres'][] = $genres[$genre];
}
}
// Cast
if (preg_match('/<li><b>Darsteller:<\/b>(.+?)<\/li>/si', $resp['data'], $ary))
{
preg_match_all('/<a href=".+?>(.+?)<\/a>/si', $ary[1], $ary, PREG_SET_ORDER);
$cast = array();
foreach ($ary as $row)
{
$actor = trim($row[1]);
if (array_search($actor, $cast) === false) $cast[] = $actor;
}
$data['cast'] = join("\n", $cast);
}
// Fetch plot
$resp = httpClient($amazonServer.'/gp/product/product-description/'.$amazonID, 1);
if (!$resp['success']) $CLIENTERROR .= $resp['error']."\n";
// Plot
preg_match_all('/Kurzbeschreibung.+?\n(.+?)\n(.+?)\n/si', $resp['data'], $ary, PREG_SET_ORDER);
foreach ($ary as $row)
{
$data['plot'] = trim(html_clean($row[1]));
}
return $data;
}
?>