Location: PHPKode > projects > VideoDB > videodb/engines/dvdpalace.php
<?php
/**
 * DVD Palace Parser
 *
 * Parse data from a german DVD Database
 *
 * @author  Chinamann <hide@address.com>
 * @package Engines
 * @link    http://www.dvd-palace.de
 */

$GLOBALS['dvdpalaceServer']   = 'http://www.dvd-palace.de';
$GLOBALS['dvdpalaceIdPrefix'] = 'dvdpalace:';
$GLOBALS['resultsPerPage']    = 15; // DVD Palace Config

/**
 * Get meta information about the engine
 *
 * @todo    Include image search capabilities etc in meta information
 */
function dvdpalaceMeta()
{
    return array(
    	'name' => 'DVD Palace (de)'
    	, 'stable' => 1
    	, 'supportsEANSearch' => 1
    );
}

/**
 * Get Url to search DVD Palace for a movie
 *
 * @author  Chinamann <hide@address.com>
 * @param   string    The search string
 * @return  string    The search URL (GET)
 */
function dvdpalaceSearchUrl($title, $searchType = 'title')
{
	// $searchType not needed for this engine. Reason: Compatibility
    global $dvdpalaceServer;
    return $dvdpalaceServer.'/dvddatabase/dbsearch.php?action=1&suchbegriff='.htmlentities($title);
}

/**
 * Get Url to visit DVD Palace for a specific movie
 *
 * @author  Chinamann <hide@address.com>
 * @param   string  $id The movie's external id
 * @return  string      The visit URL
 */
function dvdpalaceContentUrl($id)
{
    global $dvdpalaceServer;
    global $dvdpalaceIdPrefix;
    $id = preg_replace('/^DP/', '', $id); // old prefix
    $id = preg_replace('/^'.$dvdpalaceIdPrefix.'/', '', $id);
    return $dvdpalaceServer.'/dvd-datenbank/'.$id.'.html';
}

/**
 * Search a Movie
 *
 * Searches for a given title on the DVD Palace and returns the found links in
 * an array
 *
 * @author  Chinamann <hide@address.com>
 * @param   string    The search string
 * @return  array     Associative array with id and title
 */
function dvdpalaceSearch($title, $searchType = 'title')
{
	global $dvdpalaceServer;
    global $dvdpalaceIdPrefix;
    global $resultsPerPage;
    global $CLIENTERROR;

    $pageNo = 1;
    while(1) 
    {
        $url = dvdpalaceSearchUrl($title, $searchType).'&start='.(($pageNo - 1) * $resultsPerPage);

        $resp = httpClient($url, 1);
        if (!$resp['success']) $CLIENTERROR .= $resp['error']."\n";
        $resp['data'] = preg_replace('/[\r\n\t]/',' ', $resp['data']);

        // direct match (redirecting to individual title)?
        $single = array();
        if (preg_match('/^'.preg_quote($dvdpalaceServer,'/').'\/dvddatabase\/getinfo.php\?dvdid=([0-9]+)/i', $resp['url'], $single))
        {
            if(preg_match('#<TR>.*?<TD CLASS="tabheader".*?>.*?<b>(.*?)<#i', $resp['data'], $matches)) {
                $ary[0]['id']   = $dvdpalaceIdPrefix.$single[1];
                $ary[0]['title']= trim($matches[1]);
            }
            break;
        }

        // multiple matches
        if (preg_match_all('/<TR.*?<TD class="tabcontentbgwhite".*?<TD.*?\/dvd-datenbank\/([0-9]+)\.html">(.*?)<(.*?)<TD.*?<TD.*?<b>(.*?)<\/b>.*?<td.*?>(.*?)<\/tr>/i', $resp['data'], $data, PREG_SET_ORDER))
        {
        	
            foreach ($data as $row)
            {
            	
                if (preg_match('/<IMG.*?fsk(.*?)logo/i',$row[5],$fsk))
                {
                    $fsks   = array("OA"=>" FSK0", "KJ"=>" FSK18", "JK"=>" FSK18 (indiziert?)", "NG"=>" FSK18 (indiziert?)");
                    $match  = $fsks[strtoupper(trim($fsk[1]))];
                    $fsk    = ($match) ? $match : " FSK".trim($fsk[1]);
                }
                else
                    $fsk= "";

                list(,,$match,)       = preg_split('/<br>/i',$row[3],4);
                list($genre,$mlength) = split(',',$match);

                if (preg_match('/<b>(.*?)<\/b>/i',$mlength,$m))
                {
                    $min = " ".trim($m[1])."min";
                }
                else $min = "";

                list($text,,$year)= preg_split('/\./',$row[4]);
                if ($year=="") $year = $text; // "out now"-text
                if ($year!="") $year = " (".trim($year).")";

                $info['id']     = $dvdpalaceIdPrefix.$row[1];
                $info['title']  = trim($row[2]).$year.$fsk." ".trim($genre).$min;
                $info['title']  = preg_replace('/  /', ' ', $info['title']);
                $ary[]          = $info;
            }
        }

        if (preg_match('#Seiten \(<b>([0-9]+)</b>\)#i', $resp['data'], $single))
        {
            if ($pageNo >= $single[1]) break;
            else $pageNo++;
        }
        else break;
    }
	
    // do not return an array which contains only an encoding attribute
	if (count($ary) == 0) return array();
    
    // add encoding
    $ary['encoding'] = engine_get_encoding($resp);
    
    return $ary;
}

/**
 * Fetches the data for a given DVD Palace-ID
 *
 * @author  Chinamann <hide@address.com>
 * @param   int   DVD Palace-ID
 * @return  array Result data
 */
function dvdpalaceData($dvdpalaceID)
{
    global $dvdpalaceServer;
    global $dvdpalaceIdPrefix;
    global $CLIENTERROR;
    
    $dvdpalaceID = preg_replace('/^DP/', '', $dvdpalaceID); // old prefix
    $dvdpalaceID = preg_replace('/^'.$dvdpalaceIdPrefix.'/', '', $dvdpalaceID);

    $data= array(); // result
    $ary = array(); // temp

    // fetch mainpage 
    $resp = httpClient($dvdpalaceServer.'/dvddatabase/getinfo.php?dvdid='.$dvdpalaceID, 0); // don't use cache -> cover img
    if (!$resp['success']) $CLIENTERROR .= $resp['error']."\n";
    $resp['data'] = preg_replace('/[\r\n\t]/',' ', $resp['data']);
    
    // add encoding
    $data['encoding'] = engine_get_encoding($resp);

    // add engine ID -> important for non edit.php refetch
    $data['imdbID'] = $dvdpalaceIdPrefix.$dvdpalaceID;
    
    // Titles
    preg_match('#<TR>.*?<TD CLASS="tabheader".*?>.*?<b>(.*?)<#i', $resp['data'], $ary);
    list($t, $s)      = split(" - ",trim($ary[1]),2);
    $data['title']    = trim($t);
    $data['subtitle'] = trim($s);

    // Country and Year
    if (preg_match('/<TD.*?>Originaltitel<\/TD>.*?<TD.*?>.*?<b>(.*?)<\/b>.*?\((.*?)\)/i', $resp['data'], $ary))
    {
        $data['orgtitle'] .= trim($ary[1]);
        $contries = array();
        foreach (preg_split('/[\/,:]/',$ary[2]) as $country)
        {
            $country = preg_replace('/[0-9]*/','',$country);
            $countries[] = trim($country);
        }
        $data['country']  = trim(join(', ',array_unique($countries)));
        preg_match('/([0-9][0-9][0-9][0-9])/',$ary[2],$ary); // take first year occurrence
        $data['year']     = $ary[1];
    }

    // Cover URL
    if (preg_match('/src="(\/showcover\.php.*?)"/i', $resp['data'], $ary))
    {
        // hack so that extension is validated correctly.
        $data['coverurl'] = $dvdpalaceServer.trim($ary[1])."#pic.jpg";
    }
    
    // Runtime
    if (preg_match('#<TD.*?>Laufzeit</TD>.*?<TD.*?>(.*?)min</TD>#i', $resp['data'], $ary)) {
        if (preg_match('#([0-9]+)#',$ary[1],$ary)) {
            $data['runtime']  = trim($ary[1]);
        }
    }

    // Director
    preg_match('/<TD.*?>Regisseur\(e\)<\/TD>.*?<TD.*?>(.*?)<\/TD>/i', $resp['data'], $ary);
    preg_match('/<a.*?>(.*?)<\/a>/si', $ary[1], $ary);
    array_shift($ary);
    $data['director'] = trim(join(', ', $ary));
    if ($data['director'] == 'keine Angabe') $data['director'] ='';

    // Rating
    preg_match_all('/<h5>Bewertung des Films.*?<br>.*?<IMG.*?>.*?([0-9][0-9]) von [0-9][0-9]/si', $resp['data'], $ary,PREG_PATTERN_ORDER);
    $count = 0;
    $sum = 0;
    while (isset($ary[1][$count]))
    {
        $sum += intval($ary[1][$count]);
        $count++;
    }
    if ($count != 0)
    {
        $data['rating'] = round($sum / $count);
        //$data['comment'] .= "Rating: ".$data['rating']."/10\n";
    }

    // Languages
    // Languages (as Array)
    $laguages = array(
        'arabisch' => 'arabic',
        'bulgarisch' => 'bulgarian',
        'chinesisch' => 'chinese',
        'tschechisch' => 'czech',
        'dänisch' => 'danish',
        'holändisch' => 'dutch',
        'englisch' => 'english',
        'französisch' => 'french',
        'deutsch' => 'german',
        'griechisch' => 'greek',
        'ungarisch' => 'hungarian',
        'isländisch' => 'icelandic',
        'indisch' => 'indian',
        'israelisch' => 'israeli',
        'italienisch' => 'italian',
        'japanisch' => 'japanese',
        'koreanisch' => 'korean',
        'norwegisch' => 'norwegian',
        'polnisch' => 'polish',
        'portugisisch' => 'portuguese',
        'rumänisch' => 'romanian',
        'russisch' => 'russian',
        'serbisch' => 'serbian',
        'spanisch' => 'spanish',
        'schwedisch' => 'swedish',
        'thailändisch' => 'thai',
        'türkisch' => 'turkish',
        'vietnamesisch' => 'vietnamese',
        'kantonesisch' => 'cantonese',
        'katalanisch' => 'catalan',
        'zypriotisch' => 'cypriot',
        'zyprisch' => 'cypriot',
        'esperanto' => 'esperanto',
        'gälisch' => 'gaelic',
        'hebräisch' => 'hebrew',
        'hindi' => 'hindi',
        'jüdisch' => 'jewish',
        'lateinisch' => 'latin',
        'mandarin' => 'mandarin',
        'serbokroatisch' => 'serbo-croatian',
        'somalisch' => 'somali'
    );
    $lang_list = array();
    preg_match('/<TD.*?>Tonformat\(e\)<\/TD>.*?<TD.*?>(.*?)<\/TD>/i', $resp['data'], $ary);
    preg_match_all('/(\w+):/si', $ary[1], $langs, PREG_PATTERN_ORDER);
    foreach($langs[1] as $language) {
        $language = trim(strtolower($language));
        $language = html_entity_decode(strip_tags($language));
        $language = preg_replace('/\s+$/','',$language);
        if (!$language) continue;
        if (isset($laguages[$language])) $language = $laguages[$language];
        else continue;
        if (!$language) continue;
        $lang_list[] = $language;
    }
    $data['language'] = trim(join(', ', array_unique($lang_list)));

    // Plot
    if(
        preg_match('#alt="Studio/Label"></div><br>(.*?)</TD>#i', $resp['data'], $ary) ||
        preg_match('#<TD VALIGN="TOP" class="db_white_1" width="95%">(.*?)&nbsp;.*?</TD>#i', $resp['data'], $ary)
    )
    {
        $ary[1] = preg_replace('#<em>.*?</em>#',' ',$ary[1]);
        $ary[1] = preg_replace('/<br.*?>/',"\n",$ary[1]);
        $ary[1] = preg_replace('/\s*?$/','',html_entity_decode(strip_tags($ary[1])));
        $data['plot'] = trim($ary[1]);
    }

    // FSK
    if(preg_match('#<TD.*>Altersfreigabe \(FSK\)</TD>.*?<TD.*?>(.*?)&nbsp;#', $resp['data'], $ary))
    {
        //print "<PRE>".$ary[1]."</PRE>";

        if (preg_match('#([0-9]+)#',$ary[1],$fsk)) $fsk=$fsk[1];
        elseif (preg_match('#o\.A\.#i',$ary[1])) $fsk="0";
        elseif (preg_match('#o\. A\.#i',$ary[1])) $fsk="0";
        elseif (preg_match('#indiziert#i',$ary[1])) {
            $fsk="18";
            $data['subtitle'] .= ' (indiziert)';
        }
        elseif (preg_match('#Keine Jugendfreigabe#i',$ary[1])) {
            $fsk="18";
        }
        else {
            $ary[1] = preg_replace('/&nbsp;/',' ',$ary[1]);
            $ary[1] = preg_replace('/<br.*?>/',' ',$ary[1]);
            $ary[1] = preg_replace('/\s*?$/','',html_entity_decode(strip_tags($ary[1])));
            $fsk=trim($ary[1]);
        }
    }
    else $fsk = "";

    $data['fsk'] = $fsk;

    // Genres
    $genres = array(
        'Reise' => '',
        'Ratgeber' => '',
        'Revuefilm' => '',
        'Serie' => '',
        'Special' => '',
        'Sport' => 'Sport',
        'TV-Movie' => '',
        'Unterhaltung' => '',
        'Biographie' => 'Biography',
        'Thriller' => 'Thriller',
        'Kriminalfilm' => 'Crime',
        'Science Fiction' => 'Sci-Fi',
        'Kinderfilm' => 'Family',
        'Familie' => 'Family',
        'Dokumentation' => 'Documentary',
        'Action' => 'Action',
        'Actionkomödie' => 'Comedy',
        'Drama' => 'Drama',
        'Abemnteuer' => 'Adventure',
        'Historienfilm' => 'History',
        'Monumentalfilm' => '',
        'Komödie' => 'Comedy',
        'Romanze' => 'Romance',
        'Horror' => 'Horror',
        'Splatter' => 'Horror',
        'Western' => 'Western',
        'Erotik' => 'Adult',
        'Klassiker' => '',
        'Eastern' => '',
        'Musikfilm' => 'Musical',
        'Trickfilm' => 'Animation',
        'Anime' => 'Animation',
        'Animation' => 'Animation',
        'Fantasy' => 'Fantasy',
        'Filmoperette' => 'Musical',
        'Horrorkomödie' => 'Comedy',
        'Kriegsfilm' => 'War',
        'Musikdokumentation' => 'Music',
        'Mystery' => 'Mystery',
        'Roadmovie' => 'Thriller',
        'Satire' => 'Comedy',
        'Westernkomödie' => 'Western',
        'Musik' => 'Music',
        'Musik (Oper' => 'Music',
        'Musik (Pop' => 'Music',
        'Musik (Rock' => 'Music',
        'Zeichentrick' => 'Animation'
    );
    preg_match('#<TD.*>Genre.*?</TD>.*?<TD.*?>(.*?)&nbsp;.*?<#', $resp['data'], $ary);
    $gens = preg_split('/[\/,]/',$ary[1]);
    foreach($gens as $genre) {
        $genre = trim(html_entity_decode($genre));
        $genre = strip_tags($genre);
        if (!$genre) continue;
        if (isset($genres[$genre])) $data[genres][] = $genres[$genre];
    }


    // Cast
    preg_match('/<TD.*?>Darsteller \/ Sprecher<\/TD>.*?<TD.*?>(.*?)&nbsp;<\/TD>/i', $resp['data'], $ary);
    $ary[1] = preg_replace('#<em>.*?</em>#',' ',$ary[1]);
    $ary[1] = trim(html_entity_decode(strip_tags($ary[1])));
    $actors = preg_split("/, /", $ary[1]);
    $casts = "";
    $role = "";
    $actorid = "";
    foreach($actors as $actor) {
        $actor = preg_replace('/\s+$/','',$actor);
        $actor = preg_replace('/\s+$/','',$actor);
        if (preg_match('/:$/',$actor)) continue;
        // $casts .= $actor."::".$role."::".$actorid."\n";
        $casts .= trim($actor)."\n";
        $actorid = "";
    }
    $data['cast'] = trim($casts);

    // EAN-Code
    if (preg_match('#/dvddatabase/ean.php\?(.*?)["&]#i', $resp['data'], $ary))
    {
        $data['barcode'] = dvdpalaceDecodeEAN($ary[1]);
    }

    // Aspect Ratio
    if (preg_match('/<TD.*?>Bildformat\(e\)<\/TD>.*?<TD.*?>(.*?)<\/TD>/i', $resp['data'], $ary) && preg_match('#:#',$ary[1]))
    {
        // only trust data if ':' is present
        $ary[1] = preg_replace('/\s*?$/','',html_entity_decode(strip_tags($ary[1])));
        $data['comment'] .= trim($ary[1])."\n";
    }
    
    return $data;
}


/**
 * Decode DVD-Palace EAN-KEY
 *
 * @author  Chinamann <hide@address.com>
 * @param  string  KEY
 * @return string  EAN-String
 */
function dvdpalaceDecodeEAN($key)
{
    //$code1 = 'AQtw'; // (until March 2005)
    $code2 = 'DTjz';
    $code3 = 'AEIMQUYcgk';
    $code4 = 'MNO';
    $code5 = 'wxyz012345';

    // precheck
    if (strlen($key) != 20) return "";
    if (substr($key,-2) != "==") return "";

    $ean  = strpos($code4,$key{0})*4 + strpos($code2,$key{1}); // 1
    $ean .= strpos($code3,$key{2}); // 2
    $ean .= strpos($code5,$key{3}); // 3
    $ean .= strpos($code4,$key{4})*4 + strpos($code2,$key{5}); // 4
    $ean .= strpos($code3,$key{6}); // 5
    $ean .= strpos($code5,$key{7}); // 6
    $ean .= strpos($code4,$key{8})*4 + strpos($code2,$key{9}); // 7
    $ean .= strpos($code3,$key{10}); // 8
    $ean .= strpos($code5,$key{11}); // 9
    $ean .= strpos($code4,$key{12})*4 + strpos($code2,$key{13}); // 10
    $ean .= strpos($code3,$key{14}); // 11
    $ean .= strpos($code5,$key{15}); // 12
    //$ean .= strpos($code4,$key{16})*4 + strpos($code1,$key{17}); // 13 (until March 2005)
    $ean .= strpos($code4,$key{16})*4 + dvdpalaceDecodeEANChecksum($key{17}); // 13 (since April 2005)

    return $ean;
}

/**
 * Sub function of dvdpalaceDecodeEAN()
 *
 * @author  Chinamann <hide@address.com>
 * @param   string  last chracter of the KEY
 * @return  int     substitution value for last EAN digit
 */
function dvdpalaceDecodeEANChecksum($lastChar) {
    $v = ord($lastChar);
    if (ord('A') <= $v && $v <= ord('O')) return 0;
    if (ord('P') <= $v && $v <= ord('Z')) return 1;
    if (ord('a') <= $v && $v <= ord('f')) return 1;
    if (ord('g') <= $v && $v <= ord('v')) return 2;
    if (ord('w') <= $v && $v <= ord('z')) return 3;
    if (ord('0') <= $v && $v <= ord('9')) return 3;
}

/**
 * Parses Actor-Details
 *
 * Fetch covers and URLs of movies with this actor
 *
 * @author  Chinamann <hide@address.com>
 * @param  string  $name  Name of the Actor
 * @return array          array with Movie-URL and Thumbnail
 */
function dvdpalaceMoviesByActor($name)
{
    global $dvdpalaceServer;
    global $resultsPerPage;

    // fetch data by name
    $pageNo = 1;
    while(1) {
        $url = '/dvddatabase/dbsearch.php?action=2&suchbegriff='.urlencode($name).'&start='.(($pageNo - 1) * $resultsPerPage);
        $resp = httpClient($dvdpalaceServer.$url, 1);
        $resp['data'] = preg_replace('/[\r\n\t]/',' ', $resp['data']);

        $single = array();

        // multiple matches
        $matchcount = 0;
        if (preg_match('/<td class="tabcontentbgwhite".*?>(.*?)<\/td>/si', $resp['data'], $single))
        {
            for ($matchcount=1;$matchcount <= count($single); $matchcount++)
            {
                switch ($matchcount % 3) {
                    case 1: // Picture
                        preg_match('/<img.*?src="(.*?)"/i', $single[1][$matchcount - 1], $purl);
                        $ary[floor($matchcount/3)][1] = $dvdpalaceServer.$purl[1];
                        break;

                    case 2: // Link & Name
                        preg_match('/<a.*?href="(.*?)"/i', $single[1][$matchcount - 1], $lurl);
                        $ary[floor($matchcount/3)][0] = $dvdpalaceServer.$lurl[1];
                        break;

                    case 3: // Adds
                        // not needed
                        break;
                }
            }
        }


        if (preg_match('#Seiten \(<b>([0-9]+)</b>\)#i', $resp['data'], $single)) {
            if ($pageNo >= $single[1]) break;
            else $pageNo++;
        }
        else break;
    }

    return $ary;
}

/**
 * Get an array of all previous prefixes for the ImdbId
 * 
 * @author  Chinamann <hide@address.com>
 * @return  array     Associative array with ImdbId prefixes
 */
function dvdpalaceImdbIdPrefixes()
{
	global $dvdpalaceIdPrefix;
    return array($dvdpalaceIdPrefix,'DP');
}

?>
Return current item: VideoDB