<?php
/**../../../Downloads/videodb/install/install.sql
* Allocine Parser
*
* Parses data from the Allocine.fr
*
* @package Engines
* @author Douglas Mayle <hide@address.com>
* @author Andreas Gohr <hide@address.com>
* @author tedemo <hide@address.com>
* @link http://www.allocine.fr Internet Movie Database
* @version $Id: allocine.php,v 1.13 2009/03/02 20:40:41 andig2 Exp $
*/
$GLOBALS['allocineServer'] = 'http://www.allocine.fr';
$GLOBALS['allocineIdPrefix'] = 'allocine:';
/**
* Get meta information about the engine
*
* @todo Include image search capabilities etc in meta information
*/
function allocineMeta()
{
return array('name' => 'Allocine (fr)');
}
/**
* Encode title search to allow results with accentued caracters
* @author Martin Vauchel <hide@address.com>
* @param string The search string
* @return string The search string with no accents
*/
function removeAccents($title)
{
$accentued = array("à ","á","â","ã","ä","ç","è","é","ê","ë","ì",
"Ã","î","","ï","ñ","ò","ó","ô","õ","ö","ù","ú","û","ü","ý","ÿ",
"Ã","Ã","Ã","Ã","Ã","Ã","Ã","Ã","Ã","Ã","Ã","Ã","Ã","Ã","Ã","Ã",
"Ã","Ã","Ã","Ã","Ã","Ã","Ã","Ã","Ã");
$nonaccentued = array("a","a","a","a","a","c","e","e","e","e","i","i",
"i","i","n","o","o","o","o","o","u","u","u","u","y","y","A","A","A",
"A","A","C","E","E","E","E","I","I","I","I","N","O","O","O","O","O",
"U","U","U","U","Y");
$title = str_replace($accentued, $nonaccentued, $title);
return $title;
}
/**
* Get Url to search Allocine for a movie
*
* @author Douglas Mayle <hide@address.com>
* @author Andreas Goetz <hide@address.com>
* @param string The search string
* @return string The search URL (GET)
*/
function allocineSearchUrl($title)
{
global $allocineServer;
// The removeAccents function is added here
return $allocineServer.'/recherche?motcle='.urlencode(removeAccents($title)).'&rub=1';
}
/**
* Get Url to visit Allocine for a specific movie
*
* @author Douglas Mayle <hide@address.com>
* @author Andreas Goetz <hide@address.com>
* @param string $id The movie's external id
* @return string The visit URL
*/
function allocineContentUrl($id)
{
global $allocineServer;
global $allocineIdPrefix;
$allocineID = preg_replace('/^'.$allocineIdPrefix.'/', '', $id);
return $allocineServer.'/film/fichefilm_gen_cfilm='.$allocineID.'.html';
}
/**
* Search a Movie
*
* Searches for a given title on Allocine and returns the found links in
* an array
*
* @author Douglas Mayle <hide@address.com>
* @author Tiago Fonseca <hide@address.com>
* @author Charles Morgan <hide@address.com>
* @param string The search string
* @return array Associative array with id and title
*/
function allocineSearch($title)
{
global $allocineServer;
global $CLIENTERROR;
// The removeAccents function is added here
$resp = httpClient($allocineServer.'/recherche?motcle='.urlencode(removeAccents($title)).'&rub=1', 1);
if (!$resp['success']) $CLIENTERROR .= $resp['error']."\n";
$data = array();
// add encoding
$data['encoding'] = engine_get_encoding($resp);
// direct match (redirecting to individual title)?
$single = array();
if (preg_match('/^'.preg_quote($allocineServer,'/').'\/film\/fichefilm_gen_cfilm=(\d+)\.html', $resp['url'], $single))
{
$data[0]['id'] = 'allocine:'.$single[2];
$data[0]['title']= $title;
return $data;
}
// multiple matches
preg_match_all('/<A HREF="\/film\/fichefilm_gen_cfilm=(\d+)\.html" class="link1">(.*?)<\/A><\/h4>(<div[^>]*><h4[^>]*>(\d+)<\/h4><\/div>)?(<div[^>]*><h5[^>]*>([^<]+))?/i', $resp['data'], $m, PREG_SET_ORDER);
foreach ($m as $row)
{
$info['id'] = 'allocine:'.$row[1];
$info['title'] = $row[2];
// add year (helpful in case of multiple matches)
$info['title'] .= '<small>';
if (isset($row[4])) $info['title'] .= ' '.$row[4];
if (isset($row[6])) $info['title'] .= ' '.$row[6];
$info['title'] .= '</small>';
$data[] = $info;
}
return $data;
}
/**
* Fetches the data for a given Allocine-ID
*
* @author Douglas Mayle <hide@address.com>
* @author Tiago Fonseca <hide@address.com>
* @param int imdb-ID
* @return array Result data
*/
function allocineData($imdbID)
{
global $allocineServer;
global $allocineIdPrefix;
global $CLIENTERROR;
$allocineID = preg_replace('/^'.$allocineIdPrefix.'/', '', $imdbID);
// fetch mainpage
$resp = httpClient($allocineServer.'/film/fichefilm_gen_cfilm='.$allocineID.'.html', 1); // added trailing / to avoid redirect
if (!$resp['success']) $CLIENTERROR .= $resp['error']."\n";
$data = array(); // result
// add encoding
$data['encoding'] = engine_get_encoding($resp);
$ary = array(); // temp
// Titles
preg_match('/<h1 class=\"TitleFilm\">(.*)<\/h1>/iU', $resp['data'], $ary);
list($t, $s) = split(" - ",trim($ary[1]),2);
$data['title'] = html_clean($t);
$data['subtitle'] = html_clean($s);
$data['language'] = 'french';
// Year
preg_match('/<a href="\/film\/agenda_gen_date=\d+\/\d+\/(\d+).html".*?>/i', $resp['data'], $ary);
if (!empty($ary[1]))
{
$data['year'] = trim($ary[1]);
}
else
{
preg_match('/<h3 class=\"SpProse\">Ann.{1}e de production : (\d+)<\/h3>/i', $resp['data'], $ary);
//preg_match('/<h4>Ann.{1}e de production : (\d+)<\/h4>/i', $resp['data'], $ary);
$data['year'] = trim($ary[1]);
}
// Cover URL
preg_match('/<IMG SRC="([^"]+)?" (border|alt)(.*\n){2,10}.*Date de sortie/i', $resp['data'], $ary);
$data['coverurl'] = trim($ary[1]);
// Runtime
preg_match('/Dur.{1}e : (\d+)h\.{0,1}( (\d+)min\.{0,1}){0,1}/si', $resp['data'], $ary);
$hours = preg_replace('/,/', '', trim($ary[1]));
$minutes = preg_replace('/,/', '', trim($ary[3]));
$data['runtime'] = $hours * 60 + $minutes;
// Director
preg_match('/R.{1}alis.{1} par\s*<A.+HREF="\/personne[?\/].+">(.+)<\/A>/iU', $resp['data'], $ary);
$data['director'] = trim($ary[1]);
// Countries
/*
// Countries in French
$map_countries = array(
'am.ricain' => 'Etats-Unis',
'indien' => 'Inde',
'allemand' => 'Allemagne',
'britannique' => 'Royaume-Unis',
'fran.ais' => 'France',
'Ta.wan' => 'Taiwan',
'n.o z.landais' => 'Nouvelle-Zélande',
'hong-kongais' => 'Hong-Kong',
'mexicain' => 'Mexique',
'roumain' => 'Roumanie',
'belge' => 'Belgique',
'luxembourgeois' => 'Luxembourg',
'irlandais' => 'Irlande',
'tha.landais' => 'Thaïlande',
'danois' => 'Danemark',
'tch.que' => 'République Tchèque',
'chinois' => 'Chine',
'japonais' => 'Japon',
'espagnol' => 'Espagne',
'canadien' => 'Canada',
'italien' => 'Italie',
'australien' => 'Australie',
'portugais' => 'Portugal',
'norv.gien' => 'Norvège',
'bulgare' => 'Bulgarie',
'hollandais' => 'Pays-Bas',
'gr.c' => 'Grèce',
'hongrois' => 'Hongrie',
'islandais' => 'Islande',
'isra.lien' => 'Israël',
'cor.en' => 'Corée du Sud',
'polonais' => 'Pologne',
'russe' => 'Russie',
'ukrainien' => 'Ukraine',
'serbe' => 'Serbie',
'su.dois' => 'Suède',
'turc' => 'Turquie',
'vietnamien' => 'Vietnam');
*/
// Countries in English
$map_countries = array(
'am.ricain' => 'USA',
'indien' => 'India',
'allemand' => 'Germany',
'britannique' => 'UK',
'fran.ais' => 'France',
'Ta.wan' => 'Taiwan',
'n.o z.landais' => 'New Zealand',
'hong-kongais' => 'Hong-Kong',
'mexicain' => 'Mexico',
'roumain' => 'Romania',
'belge' => 'Belgium',
'luxembourgeois' => 'Luxembourg',
'irlandais' => 'Republic of Ireland',
'tha.landais' => 'Thailand',
'danois' => 'Denmark',
'tch.que' => 'Czech Republic',
'chinois' => 'China',
'japonais' => 'Japan',
'espagnol' => 'Spain',
'canadien' => 'Canada',
'italien' => 'Italy',
'australien' => 'Australia',
'portugais' => 'Portugal',
'norv.gien' => 'Norge',
'bulgare' => 'Bulgaria',
'hollandais' => 'Netherlands',
'gr.c' => 'Greece',
'hongrois' => 'Hungary',
'islandais' => 'Iceland',
'isra.lien' => 'Israel',
'cor.en' => 'South Korea',
'polonais' => 'Poland',
'russe' => 'Russia',
'ukrainien' => 'Ukraine',
'serbe' => 'Serbia',
'su.dois' => 'Sweden',
'turc' => 'Turkey',
'vietnamien' => 'Vietnam');
if (preg_match_all('/<div style=\"padding: 2 0 2 0;\"><h3 class=\"SpProse\">Film (.+). <\/h3><h3 class=\"SpProse\">Genre :/iU', $resp['data'], $ary, PREG_PATTERN_ORDER) > 0)
{
$originlist = split(",",trim(join(', ', $ary[1])));
foreach ($originlist as $origin)
{
$mapped_country_found = '';
foreach ($map_countries as $pattern_c => $mapped_country)
{
if (preg_match_all('/'.$pattern_c.'/i', $origin, $junk, PREG_PATTERN_ORDER) > 0)
{
$mapped_country_found = $mapped_country;
break;
}
}
if( $data['country'] == '' )
{
$data['country'] = $mapped_country_found;
}
elseif( stristr( $data['country'], $mapped_country_found ) == TRUE )
{
$data['country'] = $data['country'];
}
else
{
$data['country'] = $data['country'] . ', ' . $mapped_country_found;
}
}
}
// Plot
preg_match('/<h2 class=\"SpBlocTitle\" >Synopsis<\/h2>[^\n]+\n[^\n]*\n[^\n]*<div align=\"justify\"><h4>(.+)<\/h4>/Us', $resp['data'], $ary);
if (!empty($ary[1])) $data['plot'] = trim($ary[1]);
// And cleanup
$data['plot'] = preg_replace('/[\n\r]/',' ', $data['plot']);
$data['plot'] = preg_replace('/ /',' ', $data['plot']);
$data['plot'] = trim($data['plot']);
$map_genres = array(
'Com.die Dramatique' => 'Drama',
'Com.die' => 'Comedy',
'Dessin anim.' => 'Animation',
'Famille' => 'Family',
'Animation' => 'Animation',
'Espionnage' => '-',
'Karat.' => 'Action',
'Divers' => '-',
'Biopic' => 'Biography',
'Historique' => 'History',
'Guerre' => 'War',
'Documentaire' => 'Documentary',
'Science fiction' => 'Sci-Fi',
'Policier' => 'Crime',
'Thriller' => 'Thriller',
'Musique' => 'Musical',
'horreur' => 'Horror',
'Aventure' => 'Adventure',
'Fantastique' => 'Fantasy',
'Drame' => 'Drama');
// Genres (as Array)
if (preg_match_all('/<h3 class=\"SpProse\">Genre : (.+)<\/h3>/iU', $resp['data'], $ary, PREG_PATTERN_ORDER) > 0)
{
$genrelist = split(",", trim(join(', ', $ary[1])));
foreach ($genrelist as $genre)
{
$mapped_genre_found = '';
foreach ($map_genres as $pattern => $mapped_genre)
{
if (preg_match_all('/'.$pattern.'/i', $genre, $junk, PREG_PATTERN_ORDER) > 0)
{
$mapped_genre_found = $mapped_genre;
break;
}
}
$data['genres'][] = ($mapped_genre_found != '-') ? $mapped_genre_found : trim($genre);
}
}
// If sub-title is blank, we'll try to fill in the original title for foreign films.
if (empty($data['subtitle']))
{
//preg_match('/<h4>Titre original : (.*?)<\/h4>/', $resp['data'], $ary);
preg_match('/<h3 class=\"SpProse\">Titre original : (.*?)<\/h3>/', $resp['data'], $ary);
if (!empty($ary[1])) $data['subtitle'] = trim($ary[1]);
}
// Genres (as Array)
// if (preg_match_all('/<h4>Genre : (.+)<\/h4>/i', $resp['data'], $ary, PREG_PATTERN_ORDER) > 0)
// fetch credits
$resp = httpClient($allocineServer.'/film/casting_gen_cfilm='.$allocineID.'.html', 1);
if (!$resp['success']) $CLIENTERROR .= $resp['error']."\n";
// Cast
//if (preg_match('/(<b>Acteur.*?<b>(Acteur.*?<b>)?)/is', $resp['data'], $Section))
if (preg_match('/(Acteurs<\/h2>.*?<\/table>(Acteurs<\/h2>.*?<\/table>)?)/is', $resp['data'], $Section))
{
preg_match_all('/<h5>([^<].*)<\/h5>.*\n.*<h5><a href="\/personne\/fichepersonne_gen_cpersonne=(\d+)\.html(.*)>(.*)?<\/a>/iU', $Section[1], $ary, PREG_PATTERN_ORDER);
$count = 0;
while (isset($ary[1][$count]))
{
$cast .= $ary[4][$count]."::".$ary[1][$count]."::allocine:".$ary[2][$count]."\n";
$count++;
}
$data['cast'] = trim($cast);
}
return $data;
}
/**
* Parses Actor-Details
*
* Find image and detail URL for actor, not sure if this can be made
* a one-step process? Completion waiting on update of actor
* functionality to support more than one engine.
*
* @author Douglas Mayle <hide@address.com>
* @author Andreas Goetz <hide@address.com>
* @param string $name Name of the Actor
* @return array array with Actor-URL and Thumbnail
*/
function allocineActor($name, $actorid)
{
global $allocineServer;
if (empty ($actorid)) {
return;
}
$url = 'http://www.allocine.fr/personne/fichepersonne_gen_cpersonne='.urlencode($actorid).'.html';
$resp = httpClient($url, 1);
$single = array();
if (preg_match ('/src="([^"]+allocine.fr\/acmedia\/medias\/nmedia\/[^"]+\/[0-9]+\.jpg)[^>]+width="120"/', $resp['data'], $single)) {
$ary[0][0]=$url;
$ary[0][1]=$single[1];
return $ary;
} else {
return null;
}
}
?>