Location: PHPKode > projects > VideoDB > videodb/engines/amazoncom.php
<?php
/**
 * Amazon Parser
 *
 * Parses data from Amazon.com
 *
 * @package Engines
 * @author  Andreas Goetz <hide@address.com>
 * @link    http://www.amazon.com Amazon
 * @version $Id: amazoncom.php,v 1.10 2009/02/28 12:09:50 andig2 Exp $
 */

$GLOBALS['amazonComServer']     = 'http://www.amazon.com';
$GLOBALS['amazonComIdPrefix']   = 'amazoncom:';

/**
 * Get meta information about the engine
 *
 * @todo    Include image search capabilities etc in meta information
 */
function amazoncomMeta()
{
    return array('name' => 'Amazon', 'stable' => 0);
}

/**
 * Get search Url for an Amazon product
 *
 * @author  Andreas Goetz <hide@address.com>
 * @param   string    The search string
 * @return  string    The search URL (GET)
 */
function amazoncomSearchUrl($title)
{
	global $amazonComServer;
	return $amazonComServer;
}

/**
 * Get search Url to visit external site
 *
 * @author  Andreas Goetz <hide@address.com>
 * @param   string  $id The movie's external id
 * @return  string      The visit URL
 */
function amazoncomContentUrl($id)
{
    global $amazonComServer, $amazonComIdPrefix;
    
    $id = preg_replace('|^'.$amazonComIdPrefix.'|', '', $id);
    return $amazonComServer.'/exec/obidos/ASIN/'.$id.'/'.AMAZON_ASSOCIATE;
}

/**
 * Search a Movie/DVD/Book etc
 *
 * Searches for a given title on Amazon and returns the found links in
 * an array
 *
 * @author  Andreas Goetz (hide@address.com)
 * @param   string    The search string
 * @return  array     Associative array with id and title
 */
function amazoncomSearch($title)
{
    global $amazonComServer, $amazonComIdPrefix, $cache;
    global $CLIENTERROR;

    $post = 'size=10'.
            '&url='.urlencode('index=blended').
            '&field-keywords='.urlencode($title);

    $resp = httpClient($amazonComServer.'/exec/obidos/search-handle-form/102-9499445-6725730', $cache, array('post' => $post));
    if (!$resp['success']) $CLIENTERROR .= $resp['error']."\n";

    $data = array();

    // add encoding
#    $data['encoding'] = engine_get_encoding($resp);
    $data['encoding'] = 'iso-8859-1';

    if (preg_match_all('#<td class="dataColumn">.+?<a href="(.+?)"><span class="srTitle">(.+?)</span>(.+?)<\/td><\/tr>#is', $resp['data'], $m, PREG_SET_ORDER))
    {
        foreach ($m as $row)
        {
            if (ereg('<img', $row[2])) continue;
            if (!preg_match('#/dp/(.+?)/#', $row[1], $m2)) continue;
            
            $info['id']         = $amazonComIdPrefix.$m2[1];
#           $info['showid']     = $row[2];
#           $info['episodeid']  = $row[3];
            $info['title']      = trim(strip_tags($row[2]));

            // Adds type of media and date if available to aid in searching
            if (preg_match('/\((.+?)\)/is', $row[3], $tempdata))
            {
                $tempdata[1] = preg_replace('/<span class="binding">/', '', $tempdata[1]);
                $tempdata[1] = preg_replace('/<\/span>/', '', $tempdata[1]);
                $info['title'] .= ' ('.$tempdata[1].')';
            }

#           $info['subtitle']   = $row[4];
            $data[]              = $info;
        }
    }

    return $data;   
}

/**
 * Fetches the data for a given Amazon ID (equals ISBN)
 *
 * @author  Andreas Goetz <hide@address.com>
 * @author  Victor La <hide@address.com>
 * @param   int   AmazonCom-ID
 * @return  array Result data
 */
function amazoncomData($amazonComID)
{
    global $amazonComServer, $amazonComIdPrefix, $cache;
	global $CLIENTERROR;

    $amazonComID = preg_replace('/^'.$amazonComIdPrefix.'/', '', $amazonComID);

	$data = array();  // result

	// fetch mainpage
	$resp = httpClient($amazonComServer.'/exec/obidos/ASIN/'.$amazonComID, 1);
	if (!$resp[success])
    {
		$CLIENTERROR .= $resp['error']."\n";
		return $data;
	}

    $data = array();

    // add encoding
#    $data['encoding'] = engine_get_encoding($resp);
    $data['encoding'] = 'iso-8859-1';

    // Title
    if (preg_match("/<b class=\"sans\">(.+?)<\/b>/", $resp['data'], $ary))
    {
        $data['title'] = trim($ary[1]);
    }

    // Cover URL
    if (preg_match('/registerImage\("original_image", "(.+?)"/i', $resp['data'], $ary))
    {
        $data['coverurl'] = trim($ary[1]);
	}

    // Director
    if (preg_match('/<b>Directors:<\/b>.+?>(.+?)<\/a>/is', $resp['data'], $ary))
    {
        $data['director'] = trim($ary[1]);
    }

    // Language
    if (preg_match("/<b>Language:<\/b>(.*?)<\/li>/is", $resp['data'], $ary))
    {
        $data['language'] = trim(strtolower($ary[1]));
    }

    // Year
    if (preg_match("/<b>(DVD )?Release Date:<\/b>.*?(\d{4})(\n|<br>|<li>)/is", $resp['data'], $ary))
    {
        $data['year'] = trim($ary[2]);
    }

    // Runtime
    if (preg_match("/<b>Run Time:<\/b>\s*(\d{2,}) minutes<\/li>/is", $resp['data'], $ary))
    {
        $data['runtime'] = trim($ary[1]);
    }

    // Rating
    if (preg_match('/<b>Average Customer Review:<\/b> <img src=".+?customer-reviews\/stars-(\d)-(\d)./is', $resp['data'], $ary))
    {
        $data['rating']   = 2 * (trim($ary[1]).'.'.trim($ary[2]));
    }

    // Genres (as Array) - Fixed 5-24-07 - Check for a single Genre
    if (preg_match('/<b>Genres:<\/b>.*?>(.*?)<\/li>/is', $resp['data'], $ary))
    {
        $gens = preg_split("/>|,/", $ary[1]);
        #print_r($gens);
        foreach($gens as $genre)
        {
            $genre = trim($genre);
            $genre = strip_tags($genre);
            if (!$genre) continue;
            $data['genres'][] = $genre;
        }
        #print_r($data['genres']);
    }

    // Cast
    if (preg_match('/<li><b>Actors:<\/b>(.+?)<\/li>/si', $resp['data'], $ary))
    {
    	if (preg_match_all('/<a href=".+?>(.+?)<\/a>/si', $ary[1], $ary, PREG_SET_ORDER))
    	{
			$cast = array();

			foreach ($ary as $row)
			{
				$actor = trim($row[1]);
				if (array_search($actor, $cast) === false) $cast[] = $actor;
			}

			$data['cast'] = join("\n", $cast);
		}
	}
	
    // Fetch plot
    $resp = httpClient($amazonComServer.'/gp/product/product-description/'.$amazonComID, 1);
    if (!$resp['success'])
	{
		$CLIENTERROR .= $resp['error']."\n";
		return $data;
	}

    // Plot - Fixed 5-24-07 - Not complete, eg. The Illusionist
    if (preg_match('/Editorial Reviews(.+?)<\/div>/is', $resp['data'], $ary))
    {
    	if (preg_match_all('/Amazon.com<\/b>.+?\n(.+?)\n(.+?)\n/si', $ary[1], $ary, PREG_SET_ORDER))
    	{
			foreach ($ary as $row)
			{
				$data['plot'] = html_clean(preg_replace('/<I>--/','', $row[1]));
			}
		}
	}	

#print_r($data);

	return $data;
}

?>
Return current item: VideoDB