<?php
/**
* Multi-engine glue logic
*
* @package Engines
* @todo Add function comments
* @author Andreas Goetz <hide@address.com>
* @version $Id: engines.php,v 1.35 2009/02/28 12:09:50 andig2 Exp $
*/
require_once './core/httpclient.php';
require_once './core/encoding.php';
/**
* Determine the default engine
*
* @author Andreas Goetz <hide@address.com>
* @return string engine name
*/
function engineGetDefault()
{
global $config;
if (!empty($config['enginedefault']))
{
$engine = $config['enginedefault'];
}
else
{
$engine_list = array_keys($engines);
if (count($engine_list))
{
// first valid engine from list
$engine = $engine_list[0];
}
else $engine = 'imdb'; // last resort
}
return $engine;
}
/**
* Determine engine from id
*
* @todo Enhance DB schema to store engine type explicitly
*
* @author Andreas Goetz <hide@address.com>
* @param string item id
* @return string engine name
*/
function engineGetEngine($id)
{
global $config;
// recognize engine from id
if ($id)
{
// engine prefixed (imdb:081547)
// currently working for imdb, amazon, amazoncom and tvcom
if (preg_match('/^(\w+):/', $id, $match)) $engine = $match[1];
elseif (preg_match('/^\d+-\d+$/', $id)) $engine = 'tvcom';
elseif (preg_match('/^DP[0-9]/', $id)) $engine = 'dvdpalace'; // German Movie Database
elseif (preg_match('/^[0-9A-Z]{10,}$/', $id))
{
if ($config['engine']['amazona2s'])
$engine = 'amazona2s';
elseif ($config['engine']['amazonxml'])
$engine = 'amazonxml';
elseif ($config['engine']['amazoncom'])
$engine = 'amazoncom';
else
$engine = 'amazon';
}
elseif (preg_match('/^GR[0-9]/', $id)) $engine = 'gamerankings';
elseif (preg_match('/^DI[0-9]/', $id)) $engine = 'dvdinside';
# elseif (preg_match('/^[0-9a-z]{6,}$/', $id)) $engine = 'freedb';
}
if (empty($engine)) $engine = 'imdb';
return $engine;
}
/**
* Include engine file and retrieve item data
*
* @author Andreas Goetz <hide@address.com>
* @param string item id
* @param string engine name
* @return array item data
*/
function engineGetData($id, $engine = 'imdb')
{
global $lang, $cache;
require_once($engine.'.php');
$func = $engine.'Data';
$result = array();
if (function_exists($func))
{
$cache = true;
$result = $func($id);
}
// make sure all engines properly return the encoding type
# if (empty($result['encoding'])) errorpage('Engine Error', 'Engine does not properly return encoding');
// set default encoding iso-8859-1
$source_encoding = ($result['encoding']) ? $result['encoding'] : $lang['encoding'];
$target_encoding = 'utf-8';
unset($result['encoding']);
// convert to unicode
if ($source_encoding != $target_encoding)
{
#dump("Converting from $source_encoding to $target_encoding");
$result = iconv_array($source_encoding, $target_encoding, $result);
}
engine_clean_input($result);
return $result;
}
/**
* Include engine file and execute item search
*
* @author Andreas Goetz <hide@address.com>
* @param string search string
* @param string engine name
* @return array list of item data
*/
function engineSearch($find, $engine = 'imdb', $para1 = null, $para2 = null)
{
global $lang, $cache;
require_once($engine.'.php');
$func = $engine.'Search';
$result = array();
if (function_exists($func))
{
$cache = true;
// check if additional parameters given to avoid overriding default values
$result = (isset($para1)) ? $func($find, $para1, $para2) : $func($find);
}
// make sure all engines properly return the encoding type
# if (empty($result['encoding'])) errorpage('Engine Error', 'Engine does not properly return encoding');
// set default encoding iso-8859-1
$source_encoding = ($result['encoding']) ? $result['encoding'] : $lang['encoding'];
$target_encoding = 'utf-8';
unset($result['encoding']);
// convert to unicode
if ($source_encoding != $target_encoding)
{
#dump("Converting from $source_encoding to $target_encoding");
$result = iconv_array($source_encoding, $target_encoding, $result);
}
// obtain unique entries
$result = engine_deduplicate_result($result);
engine_clean_input($result);
return $result;
}
/**
* Get item details URL in external site
*
* @author Andreas Goetz <hide@address.com>
* @param string item id
* @param string engine name
* @return string item details url
*/
function engineGetContentUrl($id, $engine = 'imdb')
{
if (empty($id)) return '';
require_once($engine.'.php');
$func = $engine.'ContentUrl';
$result = '';
if (function_exists($func))
{
$result = $func($id);
}
return $result;
}
/**
* Get complete search URL for external site
*
* @author Andreas Goetz <hide@address.com>
* @param string search string
* @param string engine name
* @return string item search url
*/
function engineGetSearchUrl($find, $engine = 'imdb')
{
require_once($engine.'.php');
$func = $engine.'SearchUrl';
$result = '';
if (function_exists($func))
{
$result = $func($find);
}
return $result;
}
/**
* Check if string contains unicode characters
*/
function is_utf8($str)
{
// From http://w3.org/International/questions/qa-forms-utf-8.html
return preg_match('%^(?:
[\x09\x0A\x0D\x20-\x7E] # ASCII
| [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte
| \xE0[\xA0-\xBF][\x80-\xBF] # excluding overlongs
| [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2} # straight 3-byte
| \xED[\x80-\x9F][\x80-\xBF] # excluding surrogates
| \xF0[\x90-\xBF][\x80-\xBF]{2} # planes 1-3
| [\xF1-\xF3][\x80-\xBF]{3} # planes 4-15
| \xF4[\x80-\x8F][\x80-\xBF]{2} # plane 16
)*$%xs', $str);
}
/**
* Decode string is utf-8. Typically used for later URL encoding of the string
*/
function utf8_smart_decode($str)
{
if (is_utf8($str)) $str = utf8_decode($str);
return $str;
}
/**
* Constructs engines array for use in lookup template
*
* @author Andreas Goetz <hide@address.com>
* @param string search string
* @param string search type (for Amazon)
* @param string engine name
* @return array engines array for lookup template
*/
function engineGetLookup($find, $searchtype, $engine = 'imdb')
{
global $config, $engines, $lang;
$tpl = array();
$find = trim($find);
foreach ($engines as $key => $value)
{
if ($value)
{
// active indicator
$tpl[$key]['CLASS'] = ($key == $engine) ? 'tabActive' : 'tabInactive';
// url- make sure this is non-unicode
$tpl[$key]['url'] = 'lookup.php?find='.urlencode(utf8_smart_decode($find)).'&engine='.$key.'&searchtype='.$searchtype;
// title
$tpl[$key]['name'] = $config['engines'][$key]['name'];
}
}
return $tpl;
}
/**
* Retrieve meta information about all available engines
*
* @author Andreas Goetz <hide@address.com>
* @return array engines array containing engine names
*/
function engineMeta()
{
$engines = array();
if ($dh = @opendir('./engines'))
{
while (($file = readdir($dh)) !== false)
{
if ((preg_match("/(.*)\.php$/", $file, $matches)) && ($matches[1] != 'engines'))
{
// engine file
$engine = $matches[1];
/*
// Engine only properly defined if it has an Meta function
// this is needed to allow placing the youtube trailer functionalilty into the engines folder, too
$engines[$engine] = $engine;
*/
// get meta data
require_once('./engines/'.$engine.'.php');
$func = $engine.'Meta';
if (function_exists($func))
{
$engines[$engine] = $func();
// required php version present?
if ($engines[$engine]['php'] && (version_compare(phpversion(), $engines[$engine]['php']) < 0))
{
unset($engines[$engine]);
}
}
}
}
closedir($dh);
}
return $engines;
}
/**
* Determine actor engine from actor id, defaults to imdb
*
* @author Michael Kollmann <hide@address.com>
* @param string actor id
* @return string engine name
*/
function engineGetActorEngine($id)
{
// recognize engine from id
if ($id)
{
// actor engine prefixed, too? (imdb:nm0347149)
if (preg_match('/^(\w+):/', $id, $match)) $engine = $match[1];
elseif (preg_match('/^tv\d+$/', $id)) $engine = 'tvcom';
}
if (empty($engine)) $engine = 'imdb';
return $engine;
}
/**
* Get actors details URL in external site
*
* @author Michael Kollmann <hide@address.com>
* @param string actor name
* @param string actor id
* @param string engine name
* @return string actor details url
*/
function engineGetActorUrl($name, $id, $engine = 'imdb')
{
require_once($engine.'.php');
$func = $engine.'ActorUrl';
$result = '';
if (function_exists($func))
{
$id = preg_replace('|^'.$engine.':|', '', $id);
$result = $func($name, $id);
}
return $result;
}
/**
* Include engine file and execute item search
*
* @author Michael Kollmann <hide@address.com>
* @param string actor name
* @param string actor id
* @param string engine name
* @return array array with Actor-URL and Thumbnail
*/
function engineActor($name, $id, $engine = 'imdb')
{
require_once($engine.'.php');
$func = $engine.'Actor';
$result = array();
if (function_exists($func))
{
$id = preg_replace('|^'.$engine.':|', '', $id);
$result = $func($name, $id);
}
return $result;
}
/**
* Clean HTML tags from hierarchical associative array
*
* @param array $data string or hierarchical array to convert
*/
function engine_clean_input(&$data)
{
foreach ($data as $key => $val)
{
if (is_array($val))
engine_clean_input($data[$key]);
else
{
$val = html_to_text($val);
$data[$key] = html_clean_utf8($val);
}
}
}
/**
* Extract source encoding from HTML code or HTTP header otherwise
*/
function engine_get_encoding(&$resp)
{
# <meta http-equiv="content-type" content="text/html; charset=iso-8859-1">
# if (preg_match('#<meta.+?\scontent\s*=\s*[\'"]text/html;\s*charset=\s*(.+?)\s*[\'"]\s*/?\s*>#is', $data, $m))
if (preg_match('#<meta.+?\scontent.+?charset=\s*([a-zA-Z0-9-]+)#is', $resp['data'], $m))
{
return strtolower($m[1]);
}
else if ($resp['header'])
{
# Content-Type: text/html; charset=UTF-8
if (preg_match('#charset=\s*([a-zA-Z0-9-]+)#is', $resp['header'], $m))
return strtolower($m[1]);
else // no charset implies default charset
return 'iso-8859-1';
} else errorpage('Unknown encoding', $resp);
}
/**
* Filter result set for unique engine ids.
* This avoids deduplication of search results inside every single engine.
*/
function engine_deduplicate_result($data)
{
$keys = array();
for ($i=0; $i<count($data); $i++)
{
$id = $data[$i]['id'];
// early exit if engine (e.g. google images) doesn't return ids
if (!$id) return $data;
// exclude duplicates
if (in_array($id, $keys))
unset($data[$i]);
else
$keys[] = $id;
}
return $data;
}
?>