Location: PHPKode > projects > VideoDB > videodb/engines/engines.php
<?php
/**
 * Multi-engine glue logic
 *
 * @package Engines
 * @todo    Add function comments
 * @author  Andreas Goetz <hide@address.com>
 * @version $Id: engines.php,v 1.35 2009/02/28 12:09:50 andig2 Exp $
 */

require_once './core/httpclient.php';
require_once './core/encoding.php';

/**
 * Determine the default engine
 *
 * @author  Andreas Goetz <hide@address.com>
 * @return  string    engine name
 */
function engineGetDefault()
{
    global $config;
    
    if (!empty($config['enginedefault']))
    {
        $engine = $config['enginedefault'];
    }
    else
    {
        $engine_list = array_keys($engines);

        if (count($engine_list)) 
        {
            // first valid engine from list
            $engine = $engine_list[0];
        }
        else $engine = 'imdb'; // last resort
    }
    
    return $engine;
}

/**
 * Determine engine from id
 *
 * @todo    Enhance DB schema to store engine type explicitly
 *
 * @author  Andreas Goetz <hide@address.com>
 * @param   string    item id
 * @return  string    engine name
 */
function engineGetEngine($id)
{
    global $config;
    
	// recognize engine from id
	if ($id)
	{
        // engine prefixed (imdb:081547)
        // currently working for imdb, amazon, amazoncom and tvcom
        if (preg_match('/^(\w+):/', $id, $match)) $engine = $match[1];
        elseif (preg_match('/^\d+-\d+$/', $id)) $engine = 'tvcom';
        elseif (preg_match('/^DP[0-9]/', $id)) $engine = 'dvdpalace'; // German Movie Database
        elseif (preg_match('/^[0-9A-Z]{10,}$/', $id))
        {
            if ($config['engine']['amazona2s'])
                $engine = 'amazona2s';
            elseif ($config['engine']['amazonxml'])
                $engine = 'amazonxml';
            elseif ($config['engine']['amazoncom'])
                $engine = 'amazoncom';
            else    
                $engine = 'amazon';
        }    
        elseif (preg_match('/^GR[0-9]/', $id)) $engine = 'gamerankings';
        elseif (preg_match('/^DI[0-9]/', $id)) $engine = 'dvdinside';
#		elseif (preg_match('/^[0-9a-z]{6,}$/', $id)) $engine = 'freedb';
	}
	if (empty($engine)) $engine = 'imdb';
	return $engine;
}

/**
 * Include engine file and retrieve item data
 *
 * @author  Andreas Goetz <hide@address.com>
 * @param   string    item id
 * @param   string    engine name
 * @return  array     item data
 */
function engineGetData($id, $engine = 'imdb')
{
	global $lang, $cache;
	
	require_once($engine.'.php');
	$func = $engine.'Data';

    $result = array();
    if (function_exists($func))
    {
        $cache  = true;
        $result = $func($id);
    }

    // make sure all engines properly return the encoding type
#    if (empty($result['encoding'])) errorpage('Engine Error', 'Engine does not properly return encoding');

	// set default encoding iso-8859-1
	$source_encoding = ($result['encoding']) ? $result['encoding'] : $lang['encoding'];
	$target_encoding = 'utf-8';
    unset($result['encoding']);
	
	// convert to unicode
	if ($source_encoding != $target_encoding)
	{
		#dump("Converting from $source_encoding to $target_encoding");
        $result = iconv_array($source_encoding, $target_encoding, $result);
	}	
	engine_clean_input($result);

	return $result;
}

/**
 * Include engine file and execute item search
 *
 * @author  Andreas Goetz <hide@address.com>
 * @param   string    search string
 * @param   string    engine name
 * @return  array     list of item data
 */
function engineSearch($find, $engine = 'imdb', $para1 = null, $para2 = null)
{
    global $lang, $cache;

    require_once($engine.'.php');
    $func = $engine.'Search';

    $result = array();
    if (function_exists($func))
    {
        $cache  = true;
        // check if additional parameters given to avoid overriding default values
        $result = (isset($para1)) ? $func($find, $para1, $para2) : $func($find);
    }
    
    // make sure all engines properly return the encoding type
#    if (empty($result['encoding'])) errorpage('Engine Error', 'Engine does not properly return encoding');

    // set default encoding iso-8859-1
    $source_encoding = ($result['encoding']) ? $result['encoding'] : $lang['encoding'];
    $target_encoding = 'utf-8';
    unset($result['encoding']);
    
    // convert to unicode
    if ($source_encoding != $target_encoding)
    {
        #dump("Converting from $source_encoding to $target_encoding");
        $result = iconv_array($source_encoding, $target_encoding, $result);
    }   

    // obtain unique entries
    $result = engine_deduplicate_result($result);

	engine_clean_input($result);

    return $result;
}

/**
 * Get item details URL in external site
 *
 * @author  Andreas Goetz <hide@address.com>
 * @param   string    item id
 * @param   string    engine name
 * @return  string    item details url
 */
function engineGetContentUrl($id, $engine = 'imdb')
{
    if (empty($id)) return '';
    
    require_once($engine.'.php');
    $func = $engine.'ContentUrl';
    
    $result = '';
    if (function_exists($func))
    {
        $result = $func($id);
    }

    return $result;
}

/**
 * Get complete search URL for external site
 *
 * @author  Andreas Goetz <hide@address.com>
 * @param   string    search string
 * @param   string    engine name
 * @return  string    item search url
 */
function engineGetSearchUrl($find, $engine = 'imdb')
{
    require_once($engine.'.php');
    $func = $engine.'SearchUrl';
    
    $result = '';
    if (function_exists($func))
    {
        $result = $func($find);
    }

    return $result;
}

/**
 * Check if string contains unicode characters
 */
function is_utf8($str)
{
   // From http://w3.org/International/questions/qa-forms-utf-8.html
   return preg_match('%^(?:
         [\x09\x0A\x0D\x20-\x7E]            # ASCII
       | [\xC2-\xDF][\x80-\xBF]            # non-overlong 2-byte
       |  \xE0[\xA0-\xBF][\x80-\xBF]        # excluding overlongs
       | [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}  # straight 3-byte
       |  \xED[\x80-\x9F][\x80-\xBF]        # excluding surrogates
       |  \xF0[\x90-\xBF][\x80-\xBF]{2}    # planes 1-3
       | [\xF1-\xF3][\x80-\xBF]{3}          # planes 4-15
       |  \xF4[\x80-\x8F][\x80-\xBF]{2}    # plane 16
   )*$%xs', $str);
}

/**
 * Decode string is utf-8. Typically used for later URL encoding of the string
 */
function utf8_smart_decode($str)
{
	if (is_utf8($str)) $str = utf8_decode($str);
	return $str;
}	

/**
 * Constructs engines array for use in lookup template
 *
 * @author  Andreas Goetz <hide@address.com>
 * @param   string    search string
 * @param   string    search type (for Amazon)
 * @param   string    engine name
 * @return  array     engines array for lookup template
 */
function engineGetLookup($find, $searchtype, $engine = 'imdb')
{
    global $config, $engines, $lang;

    $tpl	= array();
	$find	= trim($find);
	
    foreach ($engines as $key => $value)
    {
        if ($value)
        {
            // active indicator
            $tpl[$key]['CLASS'] = ($key == $engine) ? 'tabActive' : 'tabInactive';
            
            // url- make sure this is non-unicode
            $tpl[$key]['url']   = 'lookup.php?find='.urlencode(utf8_smart_decode($find)).'&engine='.$key.'&searchtype='.$searchtype;
            
            // title
            $tpl[$key]['name']  = $config['engines'][$key]['name'];
        }
    }

    return $tpl;
}

/**
 * Retrieve meta information about all available engines
 *
 * @author  Andreas Goetz <hide@address.com>
 * @return  array     engines array containing engine names
 */
function engineMeta()
{
    $engines = array();
    
    if ($dh = @opendir('./engines'))
    {
        while (($file = readdir($dh)) !== false)
        {
            if ((preg_match("/(.*)\.php$/", $file, $matches)) && ($matches[1] != 'engines'))
            {
                // engine file
                $engine           = $matches[1];            
/*
				// Engine only properly defined if it has an Meta function
				// this is needed to allow placing the youtube trailer functionalilty into the engines folder, too
                $engines[$engine] = $engine;
*/
                // get meta data
                require_once('./engines/'.$engine.'.php');
                $func = $engine.'Meta';

                if (function_exists($func))
                {
                    $engines[$engine] = $func();
                    
                    // required php version present?
                    if ($engines[$engine]['php'] && (version_compare(phpversion(), $engines[$engine]['php']) < 0))
                    {
                        unset($engines[$engine]);
                    }    
                }    
            }
        }
        closedir($dh);
    }

    return $engines;
}

/**
 * Determine actor engine from actor id, defaults to imdb
 *
 * @author  Michael Kollmann <hide@address.com>
 * @param   string    actor id
 * @return  string    engine name
 */
function engineGetActorEngine($id)
{
    // recognize engine from id
    if ($id)
    {
        // actor engine prefixed, too? (imdb:nm0347149)
        if (preg_match('/^(\w+):/', $id, $match)) $engine = $match[1];
        elseif (preg_match('/^tv\d+$/', $id)) $engine = 'tvcom';
    }
    if (empty($engine)) $engine = 'imdb';
    
    return $engine;
}

/**
 * Get actors details URL in external site
 *
 * @author  Michael Kollmann <hide@address.com>
 * @param   string    actor name
 * @param   string    actor id
 * @param   string    engine name
 * @return  string    actor details url
 */
function engineGetActorUrl($name, $id, $engine = 'imdb')
{
    require_once($engine.'.php');
    $func = $engine.'ActorUrl';
    
    $result = '';
    if (function_exists($func))
    {
        $id = preg_replace('|^'.$engine.':|', '', $id);
        $result = $func($name, $id);
    }

    return $result;
}

/**
 * Include engine file and execute item search
 *
 * @author  Michael Kollmann <hide@address.com>
 * @param   string    actor name
 * @param   string    actor id
 * @param   string    engine name
 * @return  array     array with Actor-URL and Thumbnail
 */
function engineActor($name, $id, $engine = 'imdb')
{
    require_once($engine.'.php');
    $func = $engine.'Actor';

    $result = array();
    if (function_exists($func))
    {
        $id = preg_replace('|^'.$engine.':|', '', $id);
        $result = $func($name, $id);
    }

    return $result;
}

/**
 * Clean HTML tags from hierarchical associative array
 *
 * @param   array	$data	string or hierarchical array to convert
 */
function engine_clean_input(&$data)
{
	foreach ($data as $key => $val)
	{
		if (is_array($val)) 
			engine_clean_input($data[$key]);
		else
        {
            $val        = html_to_text($val);
            $data[$key] = html_clean_utf8($val);
        }    
	}
}

/**
 * Extract source encoding from HTML code or HTTP header otherwise
 */
function engine_get_encoding(&$resp)
{

#	<meta http-equiv="content-type" content="text/html; charset=iso-8859-1">
#    if (preg_match('#<meta.+?\scontent\s*=\s*[\'"]text/html;\s*charset=\s*(.+?)\s*[\'"]\s*/?\s*>#is', $data, $m))
    if (preg_match('#<meta.+?\scontent.+?charset=\s*([a-zA-Z0-9-]+)#is', $resp['data'], $m))
	{
		return strtolower($m[1]);
	}
	else if ($resp['header']) 
	{	
		# Content-Type: text/html; charset=UTF-8
		if (preg_match('#charset=\s*([a-zA-Z0-9-]+)#is', $resp['header'], $m))	
			return strtolower($m[1]);
		else // no charset implies default charset
			return 'iso-8859-1';
	} else errorpage('Unknown encoding', $resp);
}

/**
 * Filter result set for unique engine ids. 
 * This avoids deduplication of search results inside every single engine.
 */
function engine_deduplicate_result($data)
{
	$keys = array();
    for ($i=0; $i<count($data); $i++)
    {
        $id = $data[$i]['id'];
        // early exit if engine (e.g. google images) doesn't return ids
        if (!$id) return $data;
        // exclude duplicates
        if (in_array($id, $keys)) 
            unset($data[$i]);
        else
            $keys[]     = $id;
    }

    return $data;
}

?>
Return current item: VideoDB