<?php
/**
* Amazon Parser
*
* Parses data from Amazon.com
*
* @package Engines
* @author Andreas Goetz <hide@address.com>
* @link http://www.amazon.com Amazon
* @version $Id: amazoncom.php,v 1.10 2009/02/28 12:09:50 andig2 Exp $
*/
$GLOBALS['amazonComServer'] = 'http://www.amazon.com';
$GLOBALS['amazonComIdPrefix'] = 'amazoncom:';
/**
* Get meta information about the engine
*
* @todo Include image search capabilities etc in meta information
*/
function amazoncomMeta()
{
return array('name' => 'Amazon', 'stable' => 0);
}
/**
* Get search Url for an Amazon product
*
* @author Andreas Goetz <hide@address.com>
* @param string The search string
* @return string The search URL (GET)
*/
function amazoncomSearchUrl($title)
{
global $amazonComServer;
return $amazonComServer;
}
/**
* Get search Url to visit external site
*
* @author Andreas Goetz <hide@address.com>
* @param string $id The movie's external id
* @return string The visit URL
*/
function amazoncomContentUrl($id)
{
global $amazonComServer, $amazonComIdPrefix;
$id = preg_replace('|^'.$amazonComIdPrefix.'|', '', $id);
return $amazonComServer.'/exec/obidos/ASIN/'.$id.'/'.AMAZON_ASSOCIATE;
}
/**
* Search a Movie/DVD/Book etc
*
* Searches for a given title on Amazon and returns the found links in
* an array
*
* @author Andreas Goetz (hide@address.com)
* @param string The search string
* @return array Associative array with id and title
*/
function amazoncomSearch($title)
{
global $amazonComServer, $amazonComIdPrefix, $cache;
global $CLIENTERROR;
$post = 'size=10'.
'&url='.urlencode('index=blended').
'&field-keywords='.urlencode($title);
$resp = httpClient($amazonComServer.'/exec/obidos/search-handle-form/102-9499445-6725730', $cache, array('post' => $post));
if (!$resp['success']) $CLIENTERROR .= $resp['error']."\n";
$data = array();
// add encoding
# $data['encoding'] = engine_get_encoding($resp);
$data['encoding'] = 'iso-8859-1';
if (preg_match_all('#<td class="dataColumn">.+?<a href="(.+?)"><span class="srTitle">(.+?)</span>(.+?)<\/td><\/tr>#is', $resp['data'], $m, PREG_SET_ORDER))
{
foreach ($m as $row)
{
if (ereg('<img', $row[2])) continue;
if (!preg_match('#/dp/(.+?)/#', $row[1], $m2)) continue;
$info['id'] = $amazonComIdPrefix.$m2[1];
# $info['showid'] = $row[2];
# $info['episodeid'] = $row[3];
$info['title'] = trim(strip_tags($row[2]));
// Adds type of media and date if available to aid in searching
if (preg_match('/\((.+?)\)/is', $row[3], $tempdata))
{
$tempdata[1] = preg_replace('/<span class="binding">/', '', $tempdata[1]);
$tempdata[1] = preg_replace('/<\/span>/', '', $tempdata[1]);
$info['title'] .= ' ('.$tempdata[1].')';
}
# $info['subtitle'] = $row[4];
$data[] = $info;
}
}
return $data;
}
/**
* Fetches the data for a given Amazon ID (equals ISBN)
*
* @author Andreas Goetz <hide@address.com>
* @author Victor La <hide@address.com>
* @param int AmazonCom-ID
* @return array Result data
*/
function amazoncomData($amazonComID)
{
global $amazonComServer, $amazonComIdPrefix, $cache;
global $CLIENTERROR;
$amazonComID = preg_replace('/^'.$amazonComIdPrefix.'/', '', $amazonComID);
$data = array(); // result
// fetch mainpage
$resp = httpClient($amazonComServer.'/exec/obidos/ASIN/'.$amazonComID, 1);
if (!$resp[success])
{
$CLIENTERROR .= $resp['error']."\n";
return $data;
}
$data = array();
// add encoding
# $data['encoding'] = engine_get_encoding($resp);
$data['encoding'] = 'iso-8859-1';
// Title
if (preg_match("/<b class=\"sans\">(.+?)<\/b>/", $resp['data'], $ary))
{
$data['title'] = trim($ary[1]);
}
// Cover URL
if (preg_match('/registerImage\("original_image", "(.+?)"/i', $resp['data'], $ary))
{
$data['coverurl'] = trim($ary[1]);
}
// Director
if (preg_match('/<b>Directors:<\/b>.+?>(.+?)<\/a>/is', $resp['data'], $ary))
{
$data['director'] = trim($ary[1]);
}
// Language
if (preg_match("/<b>Language:<\/b>(.*?)<\/li>/is", $resp['data'], $ary))
{
$data['language'] = trim(strtolower($ary[1]));
}
// Year
if (preg_match("/<b>(DVD )?Release Date:<\/b>.*?(\d{4})(\n|<br>|<li>)/is", $resp['data'], $ary))
{
$data['year'] = trim($ary[2]);
}
// Runtime
if (preg_match("/<b>Run Time:<\/b>\s*(\d{2,}) minutes<\/li>/is", $resp['data'], $ary))
{
$data['runtime'] = trim($ary[1]);
}
// Rating
if (preg_match('/<b>Average Customer Review:<\/b> <img src=".+?customer-reviews\/stars-(\d)-(\d)./is', $resp['data'], $ary))
{
$data['rating'] = 2 * (trim($ary[1]).'.'.trim($ary[2]));
}
// Genres (as Array) - Fixed 5-24-07 - Check for a single Genre
if (preg_match('/<b>Genres:<\/b>.*?>(.*?)<\/li>/is', $resp['data'], $ary))
{
$gens = preg_split("/>|,/", $ary[1]);
#print_r($gens);
foreach($gens as $genre)
{
$genre = trim($genre);
$genre = strip_tags($genre);
if (!$genre) continue;
$data['genres'][] = $genre;
}
#print_r($data['genres']);
}
// Cast
if (preg_match('/<li><b>Actors:<\/b>(.+?)<\/li>/si', $resp['data'], $ary))
{
if (preg_match_all('/<a href=".+?>(.+?)<\/a>/si', $ary[1], $ary, PREG_SET_ORDER))
{
$cast = array();
foreach ($ary as $row)
{
$actor = trim($row[1]);
if (array_search($actor, $cast) === false) $cast[] = $actor;
}
$data['cast'] = join("\n", $cast);
}
}
// Fetch plot
$resp = httpClient($amazonComServer.'/gp/product/product-description/'.$amazonComID, 1);
if (!$resp['success'])
{
$CLIENTERROR .= $resp['error']."\n";
return $data;
}
// Plot - Fixed 5-24-07 - Not complete, eg. The Illusionist
if (preg_match('/Editorial Reviews(.+?)<\/div>/is', $resp['data'], $ary))
{
if (preg_match_all('/Amazon.com<\/b>.+?\n(.+?)\n(.+?)\n/si', $ary[1], $ary, PREG_SET_ORDER))
{
foreach ($ary as $row)
{
$data['plot'] = html_clean(preg_replace('/<I>--/','', $row[1]));
}
}
}
#print_r($data);
return $data;
}
?>