Location: PHPKode > projects > Nzbirc > nzbirc-1.4/modules/imdb.php
<?php

/**************************************************
 * NZBirc v1
 * Copyright (c) 2006 Harry Bragg
 * tiberious.org
 * Module: imdb
 **************************************************
 *
 * Full GPL License: <http://www.gnu.org/licenses/gpl.txt>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
 */

class Net_SmartIRC_module_imdb extends Net_SmartIRC_module_base
{

	// default module variable
	var $name = 'imdb';
	var $version = 'v0.1';
	var $description = 'searches and gets information from imdb';
	
	/**
	 * Lists the definitions for tvrage website
	 *
	 * @var array of regular expressions
	 * @access public
	 */
	var $_def = array(
		'url' => array(
			'search' => 'http://www.google.com/search?hl=en&q=%s+site:imdb.com&btnI=I\'m+Feeling+Lucky',
			'id' => 'http://www.imdb.com/title/%s/'
		),
		'regex' => array(
			'id' => array(
				'/http:\/\/www.imdb.com\/title\/tt(\d+)\//i',
				'/http:\/\/www.imdb.com\/Title\?(\d+)/i'
				//'/<p><b>Popular Titles<\/b> \(Displaying \d+ Results?\)<ol><li>\s*<a href="\/title\/([^\/]+)\//i',
				//'/<p><b>Titles \(Exact Matches\)<\/b> \(Displaying \d+ Results?\)<ol><li>\s*<a href="\/title\/([^\/]+)\//i',
				//'/<p><b>Titles \(Partial Matches\)<\/b> \(Displaying \d+ Results?\)<ol><li>\s*<a href="\/title\/([^\/]+)\//i'
				),
			'film' => array(
				'title' => '/<div id="tn15crumbs">\s+<a href="\/">IMDb<\/a>\s+&gt;\s+<b>(.+?) \((\d+)\)<\/b>\s+<\/div>/i',
				'rating' => '/<div class="meta">\s+<b>([0-9.]+)\/10<\/b>/i',
				'genreContainer' => '/<div class="info">\s+<h5>Genre:<\/h5>(.+)<\/div>/i',
				'genre' => '/<a href="\/Sections\/Genres\/([^\/]+)\/">.+?<\/a>/i',
				'plot' => '/<div class="info">\s+<h5>Plot:<\/h5>\s+(.+?)\s*<a class="tn15more inline" .+\s+<\/div>/i',			
			),
		),
		'command' => array(
			'id' => '/^\s*(tt\d+)\s*$/i'
		)
	);
		
	/**
	 * TVRage module
	 *
	 * STARTCONFIG	
	 * @var array publicCommand  - Lists the commands that trigger this module, and return publicly
	 * @var array privateCommand - Lists the commands that trigger this module privatly (notice you)
	 * @var double dataCache     - The amount of time (in days) to keep the actual data cache for (this does not include tvrage id caching, which is forever)
	 * @var double cacheChange   - Probability of using the cache instead of re-querying (between 0 and 1, 0.9 = 90%)
	 * ENDCONFIG
	 */
	var $_config = array(
		'publicCommand' => array( '^imdb', '^!imdb'),
		'privateCommand' => array( '^@imdb' ),
		'dataCache' => 4,
		'cacheChance' => 0.9
	);
	
	/**
	 * Template
	 *
	 * @var array
	 * @access private
	 */
	var $_template = array(
		'film' => '{title} ({year}) | {rating}/10 | {genre} | {plot} | {url}',
		'help' => array(
			'{command} film - list information about a film',
			'{command} id - get information from a film ID'
			),
		'error' => '**Error:** {errormsg}',
	);
	
	function _registerCommands()
	{
	 	global $irc;
	 	
		$this->_setHandler( '.*', $this, 'imdb' );
		
		$irc->_modules['store']->addVariable( '_imdbIDCache' );
		$irc->_modules['store']->addVariable( '_imdbFilmCache' );
	}
	
	/*****************************************************
	 * Bot Calls
	 *****************************************************/
	
	function imdb( &$irc, &$data, $notice = false)
	{	
		if ( isset( $data->messageex[1] ) )
		{		
			$query = $data->strippedMessage;
		
			// help
			if ( $query == 'help' )
			{		
				$this->parseTemplate( $msg, 'help', array( 'command' => $data->trigger ) );

			// id
			}
			else if ( preg_match( $this->_def['command']['id'], $query, $match ) )
			{
				if ( ( $film = $this->getFilm( $match[1] ) ) !== false )
				{		
					$this->parseTemplate( $msg, 'film', $film );
				}
				else
				{
					$this->parseTemplate( $msg, 'error', array( 'errormsg' => sprintf( 'No film found for id: %s', $match[1] ) ) );
				}		
			
			// normal film name
			}
			else
			{
				if ( ( $film = $this->getSFilm( $query ) ) !== false )
				{
					$this->parseTemplate( $msg, 'film', $film );
				}
				else
				{
					$this->parseTemplate( $msg, 'error', array( 'errormsg' => sprintf( 'No film found for: %s', $query ) ) );
				}		
			}		
		}
		else
		{		
			// help
			$this->parseTemplate( $msg, 'help', array( 'command' => $data->trigger ) );
		}	
		
		if ( isset( $msg ) )
		{
			$irc->_modules['func']->reply( $irc, $data, $msg, $notice );
		}	
	}

	/*****************************************************
	 * Main functions
	 *****************************************************/
	
	/**
	 * Get URL
	 *
	 * @params string $url - url to get
	 * @return contents of the page
	 * @access public
	 */
	function getUrl( $url )
	{
		global $irc;
		
		$req =& new HTTP_Request( );
		$req->setMethod(HTTP_REQUEST_METHOD_GET);
			
    	if ( isset( $irc->config->proxy ) )
		{
			if ( strlen($irc->config->proxy['host']) > 0 )
			{
				if ( strlen($irc->config->proxy['username']) > 0)
				{
					$req->setProxy($irc->config->proxy['host'],
						$irc->config->proxy['port'],
						$irc->config->proxy['username'],
						$irc->config->proxy['password']);
				}
				else
				{
					$req->setProxy($irc->config->proxy['host'],
						$irc->config->proxy['port']);
				}
			}
		}
				
		$req->setURL( $url, array( 'timeout' => '30', 'readTimeout' => 30, 'allowRedirects' => true ) );
		$irc->modDebug( 'imdb', 'request sent for: '.$url, __FILE__, __LINE__ );
		$request = $req->sendRequest();
		if (PEAR::isError($request)) {
			$irc->modDebug( 'imdb', 'failed to get '.$url.', error: '.$request->getMessage(), __FILE__, __LINE__ );
			unset( $req, $request );
			return false;
		} else {
			$body = $req->getResponseBody();
			if ( empty( $body ) )
				$body = $req->getResponseHeader( 'location');			
			unset( $req, $request );
			return $body;
		}
	}
	
	/**
	 * look for a film
	 *
	 * @params string $query - film search query
	 * @return string - imdb ID
	 * @access public
	 */
	function findFilm( $query )
	{
	
		global $irc;
	
		$irc->_modules['store']->manualUpdate('_imdbIDCache');
		
		// check the cache
		if ( ( isset( $irc->_imdbIDCache[$query] ) ) &&
		     ( mt_rand(1, 100) <= (100 * $this->_config['cacheChance']) ) ) // more likely to requery as cache doesnt go away
		{
			$irc->modDebug( 'imdb', sprintf( 'Film search: %s, using cache: %s', $query, $irc->_imdbIDCache[$query] ), __FILE__, __LINE__ );
			return $irc->_imdbIDCache[$query];
		}
	
		// find film
		$url = sprintf( $this->_def['url']['search'], urlencode(strtolower($query)) );
		if ( ( $page = $this->getUrl( $url ) ) !== false )
		{
			foreach( $this->_def['regex']['id'] as $regex )
			{
				if ( preg_match( $regex, $page, $filmID) )
				{
					$filmID[1] = 'tt'.$filmID[1];
					$irc->_modules['store']->setVariable( '_imdbIDCache', $filmID[1], $query );
					$irc->modDebug( 'imdb', sprintf( 'Film search: %s, Found: %s', $query, $filmID[1] ), __FILE__, __LINE__ );
					return $filmID[1];
				}
			}
			$irc->modDebug( 'imdb', sprintf( 'Film search: %s Failed', $query ), __FILE__, __LINE__ );
			return false;
		}
		else
		{
			return false;
		}
	}

	function getSFilm( $query, $ignoreCache = false )
	{
		if ( ( $imdbID = $this->findFilm( $query ) ) !== false )
		{												
			return $this->getFilm( $imdbID, $ignoreCache );
		}
		else
		{
			return false;
		}
	}

	/**
	 * Get Film
	 *
	 * @param string $tvin - tvrage showID
	 * @return array - Show information
	 * @access public
	 */
	function getFilm( $imdbID, $ignoreCache = false )
	{
		global $irc;
	
		$irc->_modules['store']->manualUpdate('_imdbFilmCache');
		
		// check the cache
		if ( ( isset( $irc->_imdbFilmCache[$imdbID] ) ) &&
		     ( mt_rand(1, 100) <= (100 * $this->_config['cacheChance']) ) ) // more likely to requery as cache doesnt go away
		{
			$irc->modDebug( 'imdb', sprintf( 'Film ID: %s, using cache', $imdbID ), __FILE__, __LINE__ );
			return $irc->_imdbFilmCache[$imdbID];
		}

		$url = sprintf( $this->_def['url']['id'], urlencode( $imdbID ) );
		if ( ( $page = $this->getUrl( $url ) ) !== false )
		{
			preg_match( $this->_def['regex']['film']['title'], $page, $title );
			preg_match( $this->_def['regex']['film']['rating'], $page, $rating );
			preg_match( $this->_def['regex']['film']['plot'], $page, $plot );			
			
			preg_match_all( $this->_def['regex']['film']['genre'], $page, $gList );
			
			for( $i=0; $i < count( $gList[0] ); $i++ )
			{
				$genre[] = $this->stringDecode( $gList[1][$i] );
			}
						
			$film = array(
				'imdbID' => $imdbID,
				'title' => $this->stringDecode( $title[1] ),
				'year' => $this->stringDecode( $title[2] ),
				'rating' => $this->stringDecode( $rating[1] ),
				'genre' => $genre,
				'plot' => $this->stringDecode( $plot[1] ),
				'url' => sprintf( $this->_def['url']['id'], $imdbID ) );
			
			if ( empty( $film['title'] ) )
			{
				return false;
			}
			
			$film['searched'] = time();
			
			$irc->_imdbFilmCache[$imdbID] = $film;
			$this->_checkCache();
			
			return $film;	
		}
		else
		{
			return false;
		}
	}

	/**
	 * Check the data cache, for old data
	 *
	 * @return void
	 */
	function _checkCache()
	{
	 	global $irc;
	 
	 	if ( is_array( $irc->_imdbFilmCache ) )
	 	{
		 	// show Data Cache
	        foreach( $irc->_imdbFilmCache as $id => $film )
	        {
				if ( $film['searched'] < ( time() - ( $this->_config['dataCache'] * 3600 * 24 ) ) )
				{
					$irc->_modules['store']->unsetVariable( '_imdbFilmCache', $id );
				}
			}			
		}
				
		$irc->_modules['store']->manualUpdate( '_imdbFilmCache', true );
	}	
}

?>
Return current item: Nzbirc