Location: PHPKode > projects > VideoDB > videodb/engines/amazon.php
<?php
/**
 * Amazon Parser
 *
 * Parses data from Amazon.de
 *
 * @package Engines
 * @author  Andreas Goetz <hide@address.com>
 * @link    http://www.amazon.de Amazon
 * @version $Id: amazon.php,v 1.23 2008/06/29 11:13:02 andig2 Exp $
 */

$GLOBALS['amazonServer']	= 'http://www.amazon.de';
$GLOBALS['amazonIdPrefix']  = 'amazon:';

/**
 * Get meta information about the engine
 *
 * @todo    Include image search capabilities etc in meta information
 */
function amazonMeta()
{
    return array('name' => 'Amazon (de)', 'stable' => 0);
}

/**
 * Get search Url for an Amazon product
 *
 * @author  Andreas Goetz <hide@address.com>
 * @param   string    The search string
 * @return  string    The search URL (GET)
 */
function amazonSearchUrl($title)
{
	global $amazonServer;
	return $amazonServer;
}

/**
 * Get search Url to visit external site
 *
 * @author  Andreas Goetz <hide@address.com>
 * @param   string  $id The movie's external id
 * @return  string      The visit URL
 */
function amazonContentUrl($id)
{
    global $amazonServer, $amazonIdPrefix;
    
    $id = preg_replace('|^'.$amazonIdPrefix.'|', '', $id);
    return $amazonServer.'/exec/obidos/ASIN/'.$id.'/'.AMAZON_ASSOCIATE;
}

/**
 * Search a Movie/DVD/Book etc
 *
 * Searches for a given title on Amazon and returns the found links in
 * an array
 *
 * @author  Andreas Goetz (hide@address.com)
 * @param   string    The search string
 * @return  array     Associative array with id and title
 */
function amazonSearch($title)
{
    global $amazonServer, $amazonIdPrefix, $cache;
    global $CLIENTERROR;

    $post = 'size=10'.
            '&url='.urlencode('index=blended').
            '&field-keywords='.urlencode($title);

    $resp = httpClient($amazonServer.'/exec/obidos/search-handle-form/ref=sr_sp_go_qs/028-5046340-6062930', $cache, array('post' => $post));
    if (!$resp['success']) $CLIENTERROR .= $resp['error']."\n";

    $data = array();

    // add encoding
    $data['encoding'] = engine_get_encoding($resp);

    if (preg_match_all('/<td class="dataColumn">.+?<a href="(.+?)"><span class="srTitle">(.+?)<\/span>(.+?)<\/td><\/tr>/is', $resp['data'], $m, PREG_SET_ORDER))
    {
        foreach ($m as $row)
        {
            if (ereg('<img', $row[2])) continue;
            if (!preg_match('#/dp/(.+?)/#', $row[1], $m2)) continue;

            $info['id']         = $amazonIdPrefix.$m2[1];
#            $info['showid']        = $row[2];
#            $info['episodeid'] = $row[3];

            $info['title']      = trim(strip_tags($row[2]));
            //Adds type of media and date if available to aid in searching
            if (preg_match('/\((.+?)\)/is', $row[3], $tempdata))
            {
                $tempdata[1] = preg_replace('/<span class="binding">/', '', $tempdata[1]);
                $tempdata[1] = preg_replace('/<\/span>/', '', $tempdata[1]);
                $info['title'] .= ' ('.$tempdata[1].')';
            }

#            $info['subtitle']  = $row[4];
            $data[]             = $info;
        }
    }

    return $data;
}

/**
 * Fetches the data for a given Amazon ID (equals ISBN)
 *
 * @author  Andreas Goetz <hide@address.com>
 * @author  Victor La <hide@address.com>
 * @param   string  Amazon-ID
 * @return  array   Result data
 */
function amazonData($amazonID)
{
    global $amazonServer, $amazonIdPrefix, $cache;
    global $CLIENTERROR;

    $amazonID = preg_replace('/^'.$amazonIdPrefix.'/', '', $amazonID);

    $data = array();  //result

    $languages = array('deutsch' => 'german', 'englisch' => 'english', 'spanisch' => 'spanish');
    $genres = array(
                    'Anime' => 'Animation',
                    'Horror' => 'Horror',
                    'Monumental' => '',
                    'Abenteuer' => 'Adventure',
                    'Abenteuerfilm' => 'Adventure',
                    'Eastern' => '',
                    'Kriegsfilm' => 'War',
                    'Krieg' => 'War',
                    'War' => 'War',
                    'World War' => 'War',
                    'Action' => 'Action',
                    'Historienfilm' => 'History',
                    'Historical' => 'History',
                    'Krimi' => 'Crime',
                    'Kriminalfilm' => 'Crime',
                    'Thriller' => 'Thriller',
                    'Dokumentation' => 'Documentary',
                    'Dokumentarfilm' => 'Documentary',
                    'Erotik' => 'Adult',
                    'Geschichte' => 'History',
                    'Musik' => 'Music',
                    'Musivideo' => 'Music',
                    'Reise' => '',
                    'Biografie' => 'Biography',
                    'Biographie' => 'Biography',
                    'Sport' => 'Sport',
                    'Sports' => 'Sport',
                    'Zeitgeschichte' => 'History',
                    'Fantasy' => 'Fantasy',
                    'Komödie' => 'Comedy',
                    'Humor' => 'Comedy',
                    'Science Fiction' => 'Sci-Fi',
                    'Trickfilm' => 'Animation',
                    'Zeichentrick' => 'Animation',
                    'Kinder' => 'Family',
                    'Familienfilm' => 'Family',
                    'Drama' => 'Drama',
                    'Liebesfilm' => 'Romance',
                    'Romantic' => 'Romance',
                    'Western' => 'Western',
                    'Horror/Occult' => 'Mystery',
                    'Comedy' => 'Comedy',

                    // Didn't check these, took 'em from dvdb.php
                    'Familie' => 'Family',
                    'Actionkomödie' => 'Comedy',
                    'Westernkomödie' => 'Western',
                    'Musikfilm' => 'Musical',
                    'Animation' => 'Animation',
                    'Splatter' => 'Horror',
                    'Filmoperette' => 'Musical',
                    'Horrorkomödie' => 'Comedy',
                    'Musikdokumentation' => 'Music',
                    'Mystery' => 'Mystery',
                    'Roadmovie' => 'Thriller',
                    'Satire' => 'Comedy',
                   );

    // fetch mainpage
    $resp = httpClient($amazonServer.'/exec/obidos/ASIN/'.$amazonID, 1);
    if (!$resp['success']) $CLIENTERROR .= $resp['error']."\n";

    $data = array();

    // add encoding
    $data['encoding'] = engine_get_encoding($resp);

    // Titles
    if (preg_match("/<b class=\"sans\">(.+?)<\/b>/", $resp['data'], $ary))
    {
        list($t, $s) = split(' - ', $ary[1]);
        $data['title'] = trim($t);
        $data['subtitle'] = trim($s);
    }

    // Cover URL
    if (preg_match('/registerImage\("original_image", "(.+?)"/i', $resp['data'], $ary))
    {
        $data['coverurl'] = trim($ary[1]);
    }

    // Director
    if (preg_match('/<b>Regisseur\(e\):<\/b>.+?>(.+?)<\/a>/is', $resp['data'], $ary))
    {
        $data['director'] = trim($ary[1]);
    }

    // Language
    if (preg_match("/<b>Sprache:<\/b>(.*?)<\/li>/is", $resp['data'], $ary))
    {
        $lang = trim(strtolower($ary[1]));
        $lang = preg_split("/, /", $lang);
        foreach($lang as $templang)
        {
            $templang = trim($templang);
            if (!$templang) continue;
            if (isset($languages[$templang])) $templang = $languages[$templang];
            if (!$templang) continue;
            $data['language'] .= $templang.' ';
        }
        $data['language'] = trim($data['language']);
        $data['language'] = preg_replace('/ /', ', ', $data['language']);
    }

    // Year
    if (preg_match("/<b>(DVD-)?Erscheinungs(datum|termin):<\/b>.*?(\d{4})(<\/li>)?/is", $resp['data'], $ary))
    {
        #print_r($ary);
        $data['year']     = trim($ary[3]);
    }

    // Runtime
    if (preg_match("/<b>Spieldauer:<\/b>(.+?)Minuten<\/li>/is", $resp['data'], $ary))
    {
        $data['runtime'] = trim($ary[1]);
    }

    // Rating
    if (preg_match('/<b>Durchschnittliche Kundenbewertung:<\/b>.+?<img src=".+?customer-reviews\/stars-(\d)-(\d)./is', $resp['data'], $ary))
    {
        $data['rating']   = 2 * (trim($ary[1]).'.'.trim($ary[2]));
    }

    // Genres (as array)
    // TODO: There are a lot of comma seperated genres, they should be parsed too
    if (preg_match_all('|<input[^>]*?name="field.keywords"[^>]*?value="([^"]*?)"|si',$resp['data'], $ary, PREG_SET_ORDER))
    {
        foreach ($ary as $row)
        {
            $genre = trim($row[1]);
            $genre = substr($genre, 0, 1).strtolower(substr($genre, 1));
            if (!$genre) continue;
            if (isset($genres[$genre]))
                $data['genres'][] = $genres[$genre];
        }
    }

    // Cast
    if (preg_match('/<li><b>Darsteller:<\/b>(.+?)<\/li>/si', $resp['data'], $ary))
    {
        preg_match_all('/<a href=".+?>(.+?)<\/a>/si', $ary[1], $ary, PREG_SET_ORDER);

        $cast = array();
        foreach ($ary as $row)
        {
            $actor = trim($row[1]);
            if (array_search($actor, $cast) === false) $cast[] = $actor;
        }
        $data['cast'] = join("\n", $cast);
    }

    // Fetch plot
    $resp = httpClient($amazonServer.'/gp/product/product-description/'.$amazonID, 1);
    if (!$resp['success']) $CLIENTERROR .= $resp['error']."\n";

    // Plot
    preg_match_all('/Kurzbeschreibung.+?\n(.+?)\n(.+?)\n/si', $resp['data'], $ary, PREG_SET_ORDER);

    foreach ($ary as $row)
    {
        $data['plot'] = trim(html_clean($row[1]));
    }

    return $data;
}

?>
Return current item: VideoDB