Location: PHPKode > projects > VideoDB > videodb/engines/allocine.php
<?php
/**../../../Downloads/videodb/install/install.sql
 * Allocine Parser
 *
 * Parses data from the Allocine.fr
 *
 * @package Engines
 * @author  Douglas Mayle   <hide@address.com>
 * @author  Andreas Gohr    <hide@address.com>
 * @author  tedemo          <hide@address.com>
 * @link    http://www.allocine.fr  Internet Movie Database
 * @version $Id: allocine.php,v 1.13 2009/03/02 20:40:41 andig2 Exp $
 */

$GLOBALS['allocineServer']	    = 'http://www.allocine.fr';
$GLOBALS['allocineIdPrefix']    = 'allocine:';

/**
 * Get meta information about the engine
 *
 * @todo    Include image search capabilities etc in meta information
 */
 
function allocineMeta()
{
    return array('name' => 'Allocine (fr)');
}

/**
 * Encode title search to allow results with accentued caracters
 * @author Martin Vauchel <hide@address.com>
 * @param string	The search string
 * @return string	The search string with no accents
 */
function removeAccents($title)
{
	$accentued = array("à","á","â","ã","ä","ç","è","é","ê","ë","ì",
	"í","î","","ï","ñ","ò","ó","ô","õ","ö","ù","ú","û","ü","ý","ÿ",
	"À","Á","Â","Ã","Ä","Ç","È","É","Ê","Ë","Ì","Í","Î","Ï","Ñ","Ò",
	"Ó","Ô","Õ","Ö","Ù","Ú","Û","Ü","Ý");
	$nonaccentued = array("a","a","a","a","a","c","e","e","e","e","i","i",
	"i","i","n","o","o","o","o","o","u","u","u","u","y","y","A","A","A",
	"A","A","C","E","E","E","E","I","I","I","I","N","O","O","O","O","O",
	"U","U","U","U","Y");
	
	$title = str_replace($accentued, $nonaccentued, $title);
	
	return $title;
}

/**
 * Get Url to search Allocine for a movie
 *
 * @author  Douglas Mayle <hide@address.com>
 * @author  Andreas Goetz <hide@address.com>
 * @param   string    The search string
 * @return  string    The search URL (GET)
 */
function allocineSearchUrl($title)
{
	global $allocineServer;
	// The removeAccents function is added here
	return $allocineServer.'/recherche?motcle='.urlencode(removeAccents($title)).'&rub=1';
}

/**
 * Get Url to visit Allocine for a specific movie
 *
 * @author  Douglas Mayle <hide@address.com>
 * @author  Andreas Goetz <hide@address.com>
 * @param   string    $id    The movie's external id
 * @return  string        The visit URL
 */
function allocineContentUrl($id)
{
   global $allocineServer;
   global $allocineIdPrefix;

   $allocineID = preg_replace('/^'.$allocineIdPrefix.'/', '', $id);
   return $allocineServer.'/film/fichefilm_gen_cfilm='.$allocineID.'.html';
}


/**
 * Search a Movie
 *
 * Searches for a given title on Allocine and returns the found links in
 * an array
 *
 * @author  Douglas Mayle <hide@address.com>
 * @author  Tiago Fonseca <hide@address.com>
 * @author  Charles Morgan <hide@address.com>
 * @param   string    The search string
 * @return  array     Associative array with id and title
 */
function allocineSearch($title)
{
    global $allocineServer;
    global $CLIENTERROR;
		   
    // The removeAccents function is added here
    $resp = httpClient($allocineServer.'/recherche?motcle='.urlencode(removeAccents($title)).'&rub=1', 1);
    if (!$resp['success']) $CLIENTERROR .= $resp['error']."\n";

    $data = array();

    // add encoding
    $data['encoding'] = engine_get_encoding($resp);

    // direct match (redirecting to individual title)?
    $single = array();
    if (preg_match('/^'.preg_quote($allocineServer,'/').'\/film\/fichefilm_gen_cfilm=(\d+)\.html', $resp['url'], $single))
    {
        $data[0]['id']   = 'allocine:'.$single[2];
        $data[0]['title']= $title;
        return $data;
    }

    // multiple matches
    preg_match_all('/<A HREF="\/film\/fichefilm_gen_cfilm=(\d+)\.html" class="link1">(.*?)<\/A><\/h4>(<div[^>]*><h4[^>]*>(\d+)<\/h4><\/div>)?(<div[^>]*><h5[^>]*>([^<]+))?/i', $resp['data'], $m, PREG_SET_ORDER);
    foreach ($m as $row) 
    {
        $info['id']     = 'allocine:'.$row[1];
        $info['title']  = $row[2];
        // add year (helpful in case of multiple matches)
        $info['title'] .= '<small>';
        if (isset($row[4])) $info['title'] .= ' '.$row[4];
        if (isset($row[6])) $info['title'] .= ' '.$row[6];
        $info['title'] .= '</small>';	
        $data[]          = $info;
    }

    return $data;
}

/**
 * Fetches the data for a given Allocine-ID
 *
 * @author  Douglas Mayle <hide@address.com>
 * @author  Tiago Fonseca <hide@address.com>
 * @param   int   imdb-ID
 * @return  array Result data
 */
function allocineData($imdbID) 
{
    global $allocineServer;
    global $allocineIdPrefix;
    global $CLIENTERROR;

    $allocineID = preg_replace('/^'.$allocineIdPrefix.'/', '', $imdbID);

    // fetch mainpage
    $resp = httpClient($allocineServer.'/film/fichefilm_gen_cfilm='.$allocineID.'.html', 1);		// added trailing / to avoid redirect
    if (!$resp['success']) $CLIENTERROR .= $resp['error']."\n";

    $data   = array(); // result
    
    // add encoding
    $data['encoding'] = engine_get_encoding($resp);

    $ary    = array(); // temp

    // Titles
	preg_match('/<h1 class=\"TitleFilm\">(.*)<\/h1>/iU', $resp['data'], $ary);
	list($t, $s)      = split(" - ",trim($ary[1]),2);
	$data['title']    = html_clean($t);    
    $data['subtitle'] = html_clean($s);
	
    $data['language'] = 'french';

    // Year
    preg_match('/<a href="\/film\/agenda_gen_date=\d+\/\d+\/(\d+).html".*?>/i', $resp['data'], $ary);
    if (!empty($ary[1]))
    {
        $data['year'] = trim($ary[1]);
    }
    else
    {
    	preg_match('/<h3 class=\"SpProse\">Ann.{1}e de production : (\d+)<\/h3>/i', $resp['data'], $ary);
    	//preg_match('/<h4>Ann.{1}e de production : (\d+)<\/h4>/i', $resp['data'], $ary);
    	$data['year']     = trim($ary[1]);
    }

    // Cover URL
    preg_match('/<IMG SRC="([^"]+)?" (border|alt)(.*\n){2,10}.*Date de sortie/i', $resp['data'], $ary);
    $data['coverurl'] = trim($ary[1]);

    // Runtime
    preg_match('/Dur.{1}e : (\d+)h\.{0,1}( (\d+)min\.{0,1}){0,1}/si', $resp['data'], $ary);
    $hours  = preg_replace('/,/', '', trim($ary[1]));
    $minutes  = preg_replace('/,/', '', trim($ary[3]));
    $data['runtime']  = $hours * 60 + $minutes;

    // Director
    preg_match('/R.{1}alis.{1} par\s*<A.+HREF="\/personne[?\/].+">(.+)<\/A>/iU', $resp['data'], $ary);
    $data['director'] = trim($ary[1]);

	// Countries
	
	/* 
	// Countries in French
	$map_countries = array(
		'am.ricain'			=> 'Etats-Unis',
		'indien'			=> 'Inde',
		'allemand'			=> 'Allemagne',
		'britannique'		=> 'Royaume-Unis',
		'fran.ais'			=> 'France',
		'Ta.wan'			=> 'Taiwan',
		'n.o z.landais'		=> 'Nouvelle-Zélande',
		'hong-kongais'		=> 'Hong-Kong',
		'mexicain'			=> 'Mexique',
		'roumain'			=> 'Roumanie',
		'belge'				=> 'Belgique',
		'luxembourgeois'	=> 'Luxembourg',
		'irlandais'			=> 'Irlande',
		'tha.landais'		=> 'Thaïlande',
		'danois'			=> 'Danemark',
		'tch.que'			=> 'République Tchèque',
		'chinois'			=> 'Chine',
		'japonais'			=> 'Japon',
		'espagnol'			=> 'Espagne',
		'canadien'			=> 'Canada',
		'italien'			=> 'Italie',
		'australien'		=> 'Australie',
		'portugais'			=> 'Portugal',
		'norv.gien'			=> 'Norvège',
		'bulgare'			=> 'Bulgarie',
		'hollandais'		=> 'Pays-Bas',
		'gr.c'				=> 'Grèce',
		'hongrois'			=> 'Hongrie',
		'islandais'			=> 'Islande',
		'isra.lien'			=> 'Israël',
		'cor.en'			=> 'Corée du Sud',
		'polonais'			=> 'Pologne',
		'russe'				=> 'Russie',
		'ukrainien'			=> 'Ukraine',
		'serbe'				=> 'Serbie',
		'su.dois'			=> 'Suède',
		'turc'				=> 'Turquie',
		'vietnamien'		=> 'Vietnam');
	*/
	// Countries in English
	$map_countries = array(
		'am.ricain'			=> 'USA',
		'indien'			=> 'India',
		'allemand'			=> 'Germany',
		'britannique'		=> 'UK',
		'fran.ais'			=> 'France',
		'Ta.wan'			=> 'Taiwan',
		'n.o z.landais'		=> 'New Zealand',
		'hong-kongais'		=> 'Hong-Kong',
		'mexicain'			=> 'Mexico',
		'roumain'			=> 'Romania',
		'belge'				=> 'Belgium',
		'luxembourgeois'	=> 'Luxembourg',
		'irlandais'			=> 'Republic of Ireland',
		'tha.landais'		=> 'Thailand',
		'danois'			=> 'Denmark',
		'tch.que'			=> 'Czech Republic',
		'chinois'			=> 'China',
		'japonais'			=> 'Japan',
		'espagnol'			=> 'Spain',
		'canadien'			=> 'Canada',
		'italien'			=> 'Italy',
		'australien'		=> 'Australia',
		'portugais'			=> 'Portugal',
		'norv.gien'			=> 'Norge',
		'bulgare'			=> 'Bulgaria',
		'hollandais'		=> 'Netherlands',
		'gr.c'				=> 'Greece',
		'hongrois'			=> 'Hungary',
		'islandais'			=> 'Iceland',
		'isra.lien'			=> 'Israel',
		'cor.en'			=> 'South Korea',
		'polonais'			=> 'Poland',
		'russe'				=> 'Russia',
		'ukrainien'			=> 'Ukraine',
		'serbe'				=> 'Serbia',
		'su.dois'			=> 'Sweden',
		'turc'				=> 'Turkey',
		'vietnamien'		=> 'Vietnam');
			
	if (preg_match_all('/<div style=\"padding: 2 0 2 0;\"><h3 class=\"SpProse\">Film (.+).&nbsp;<\/h3><h3 class=\"SpProse\">Genre :/iU', $resp['data'], $ary, PREG_PATTERN_ORDER) > 0)
	{
		$originlist  = split(",",trim(join(', ', $ary[1])));
		foreach ($originlist as $origin)
		{
			$mapped_country_found = '';
			
			foreach ($map_countries as $pattern_c => $mapped_country)
			{
				if (preg_match_all('/'.$pattern_c.'/i', $origin, $junk, PREG_PATTERN_ORDER) > 0)
				{
					$mapped_country_found = $mapped_country;
					break;
				}
			}
			if( $data['country'] == '' )
			{
				$data['country'] = $mapped_country_found;
			}
			elseif( stristr( $data['country'], $mapped_country_found ) == TRUE )
			{
				$data['country'] = $data['country'];
			}
			else
			{
				$data['country'] = $data['country'] . ', ' . $mapped_country_found;
			}
		}
	}
	
    // Plot
    preg_match('/<h2 class=\"SpBlocTitle\" >Synopsis<\/h2>[^\n]+\n[^\n]*\n[^\n]*<div align=\"justify\"><h4>(.+)<\/h4>/Us', $resp['data'], $ary);
	if (!empty($ary[1])) $data['plot'] = trim($ary[1]);
    
    // And cleanup
    $data['plot'] = preg_replace('/[\n\r]/',' ', $data['plot']);
    $data['plot'] = preg_replace('/  /',' ', $data['plot']);
    $data['plot'] = trim($data['plot']);

    $map_genres = array(
        'Com.die Dramatique' => 'Drama',
        'Com.die' => 'Comedy',
        'Dessin anim.' => 'Animation', 
        'Famille' => 'Family',
        'Animation' => 'Animation',
        'Espionnage' => '-',  
        'Karat.' => 'Action',
        'Divers' => '-',
        'Biopic' => 'Biography',
        'Historique' => 'History',
        'Guerre' => 'War',
        'Documentaire' => 'Documentary',
        'Science fiction' => 'Sci-Fi',
        'Policier' => 'Crime',
        'Thriller' => 'Thriller',
        'Musique' => 'Musical',
        'horreur' => 'Horror',
        'Aventure' => 'Adventure',
        'Fantastique' => 'Fantasy',
        'Drame' => 'Drama');

    // Genres (as Array)
    if (preg_match_all('/<h3 class=\"SpProse\">Genre : (.+)<\/h3>/iU', $resp['data'], $ary, PREG_PATTERN_ORDER) > 0)
    {
        $genrelist = split(",", trim(join(', ', $ary[1])));
        
        foreach ($genrelist as $genre)
        {
            $mapped_genre_found = '';
            
            foreach ($map_genres as $pattern => $mapped_genre)
            {
                if (preg_match_all('/'.$pattern.'/i', $genre, $junk, PREG_PATTERN_ORDER) > 0)
                {
                    $mapped_genre_found = $mapped_genre;
                    break;
                }
            }
            
            $data['genres'][] = ($mapped_genre_found != '-') ? $mapped_genre_found : trim($genre);
	    }
    }

    // If sub-title is blank, we'll try to fill in the original title for foreign films.
    if (empty($data['subtitle']))
    {
        //preg_match('/<h4>Titre original : (.*?)<\/h4>/', $resp['data'], $ary);
        preg_match('/<h3 class=\"SpProse\">Titre original : (.*?)<\/h3>/', $resp['data'], $ary);
        if (!empty($ary[1])) $data['subtitle'] = trim($ary[1]);
    }

    // Genres (as Array)
    // if (preg_match_all('/<h4>Genre : (.+)<\/h4>/i', $resp['data'], $ary, PREG_PATTERN_ORDER) > 0)

    // fetch credits
    $resp = httpClient($allocineServer.'/film/casting_gen_cfilm='.$allocineID.'.html', 1);
    if (!$resp['success']) $CLIENTERROR .= $resp['error']."\n";

    // Cast
    //if (preg_match('/(<b>Acteur.*?<b>(Acteur.*?<b>)?)/is', $resp['data'], $Section))
    if (preg_match('/(Acteurs<\/h2>.*?<\/table>(Acteurs<\/h2>.*?<\/table>)?)/is', $resp['data'], $Section))
    {
		preg_match_all('/<h5>([^<].*)<\/h5>.*\n.*<h5><a href="\/personne\/fichepersonne_gen_cpersonne=(\d+)\.html(.*)>(.*)?<\/a>/iU', $Section[1], $ary, PREG_PATTERN_ORDER);

		$count = 0;
		while (isset($ary[1][$count])) 
		{
			$cast  .= $ary[4][$count]."::".$ary[1][$count]."::allocine:".$ary[2][$count]."\n";
			$count++;
		}
		$data['cast'] = trim($cast);
    }

    return $data;
}

/**
 * Parses Actor-Details
 *
 * Find image and detail URL for actor, not sure if this can be made
 * a one-step process?  Completion waiting on update of actor
 * functionality to support more than one engine.
 *
 * @author  Douglas Mayle <hide@address.com>
 * @author                Andreas Goetz <hide@address.com>
 * @param  string  $name  Name of the Actor
 * @return array          array with Actor-URL and Thumbnail
 */
function allocineActor($name, $actorid)
{
    global $allocineServer;

    if (empty ($actorid)) {
        return;
    }

    $url = 'http://www.allocine.fr/personne/fichepersonne_gen_cpersonne='.urlencode($actorid).'.html';
    $resp = httpClient($url, 1);

    $single = array();
    if (preg_match ('/src="([^"]+allocine.fr\/acmedia\/medias\/nmedia\/[^"]+\/[0-9]+\.jpg)[^>]+width="120"/', $resp['data'], $single)) {
        $ary[0][0]=$url;
        $ary[0][1]=$single[1];
        return $ary;
    } else {
	    return null;
    }
}


?>
Return current item: VideoDB