Location: PHPKode > projects > VideoDB > videodb/engines/tvcom.php
<?php
/**
 * TV.com Parser
 *
 * Parses data from the TV Tome Movie Database
 *
 * @package Engines
 * @author  Andreas Goetz   <hide@address.com>
 * @link    http://www.tv.com  TV Tome
 * @version $Id: tvcom.php,v 1.11 2008/08/02 10:26:40 andig2 Exp $
 */

$GLOBALS['tvcomServer']		= 'http://www.tv.com';
$GLOBALS['tvcomIdPrefix']	= 'tvcom:';

/**
 * Get meta information about the engine
 *
 * @todo    Include image search capabilities etc in meta information
 */
function tvcomMeta()
{
    return array('name' => 'TV.com', 'stable' => 0);
}

/**
 * Get search Url for a TV.com movie
 *
 * @param   string    The search string
 * @return  string    The search URL (GET)
 */
function tvcomSearchUrl($title)
{
	global $tvcomServer;

	preg_replace ('|\ |', '+', $title);
	return $tvcomServer.'/search.php?qs='.urlencode($title).'&stype=all';
}

/**
 * Get search Url to visit external site
 *
 * @todo    Have to store episode url, probably.
 *
 * @param   string	$id	The movie's external id
 * @return  string		The visit URL
 */
function tvcomContentUrl($id)
{
	global $tvcomServer;
	global $tvcomIdPrefix;
	
	$id = preg_replace('|^'.$tvcomIdPrefix.'|', '', $id);
	// split into show and episode
	list($showid, $episodeid) = explode('-', $id);
	return $tvcomServer.'/show/'.$showid.'/episode/'.$episodeid.'/summary.html';
}

/**
 * Search a Movie
 *
 * Searches for a given title on tv.com and returns 
 * results as associative array
 *
 * @param   string    The search string
 * @return  array     Associative array with id and title
 */
function tvcomSearch($title)
{
	global $tvcomServer;
	global $tvcomIdPrefix;
	global $CLIENTERROR;

	// search for series
	$resp = httpClient(tvcomSearchUrl($title), 1);
	if (!$resp['success']) $CLIENTERROR .= $resp['error']."\n";

	// take the first match
	if (preg_match('|Show:.*?<a[^>]*href="(http.*?/summary.html)[^>]*>(.*?)</a>|s', $resp['data'], $series)) 
	{
		$showurl = $series[1];
		$showtitle = $series[2];
		$resp = httpClient($showurl, 1);
		if (!$resp['success']) $CLIENTERROR .= $resp['error']."\n";
	}

	// get show tag and id
	if (preg_match('|/([^/]+)/show/(\d+)/|i', $showurl, $series))
	{
//		$showtag = $series[1];
		$showid = $series[2];
	}

	// look for episode listing
	if (preg_match('|<a href="(http.*?/show/\d+/episode_listings.html)[^>]*>.*?Episode|', $resp['data'], $series)) 
	{
		// TODO: field to enter season (and/or get and strip from title)
		$listurl = $series[1]."?season=0";
		$resp = httpClient($listurl, 1);
		if (!$resp['success']) $CLIENTERROR .= $resp['error']."\n";
	}

	$body = $resp['data'];
	if (preg_match_all('|<td class="f-bold">\s*<a href=".*?/episode/(\d+)/[^>]*>(.*?)</a>|i', $body, $data, PREG_SET_ORDER)) 
	{
		foreach ($data as $row) 
		{
			$info['id']			= $tvcomIdPrefix.$showid.'-'.$row[1];	// prefix:season-episode
			$info['title']		= $showtitle;
			$info['subtitle']	= $row[2];
			$ary[]				= $info;
		}
	}

	return $ary;
}

/**
 * Fetches the data for a given TV Tome id
 *
 * @param   int   TV Tome id (show-episode)
 * @return  array Result data
 */
function tvcomData($id)
{
	global $CLIENTERROR;
	global $tvcomServer;
	global $tvcomIdPrefix;

	$data= array(); //result
	$ary = array(); //temp

	$id = preg_replace('|^'.$tvcomIdPrefix.'|', '', $id);

	// fetch mainpage
	$resp = httpClient(tvcomContentUrl($id), 1);
	if (!$resp['success']) $CLIENTERROR .= $resp['error']."\n";
	$body = $resp['data'];

	//Title
	preg_match_all('/<h1>(.+?)<\/h1>/si', $body, $ary, PREG_PATTERN_ORDER);
	$data['title']    = html_entity_decode(trim($ary[1][0]));
	$data['subtitle'] = html_entity_decode(trim($ary[1][1]));

	// Year
	preg_match('|First Aired:.*\ (\d{4})\ |i', $body, $ary);
	$data['year'] = trim($ary[1]);

	// Director
	preg_match('|Director:\s*</td>\s*<td>\s*<a href="[^>]*>([^<]*)</a>|si', $body, $ary);
	$data['director'] = preg_replace('/\.*$/','',trim($ary[1]));

	// Plot
	preg_match('|<div id="main-col">\s*<div>\s*([^<]*)|si', $body, $ary);
	trim($ary);
	if (empty($ary[1])) {
		// Match if a video link is shown
		preg_match('|Watch\ Video\s*</a>\s*</div>([^<]*)|si', $body, $ary);
	}
	$data['plot'] = trim($ary[1]);
	
	// Remove single word plots (two/three word plots, too?)
	if (preg_match('|^\S+$|', $data['plot'])) $data['plot'] = '';

	// Remove single word plots (two/three word plots too?)
	if (preg_match('|^\S+$|', $data['plot'])) $data['plot'] = '';

/*
	// TODO: update to new layout
	// expose this to append the notes to the plot
	if (preg_match('/Notes<\/strong>.*?<tr><td>\s+(.+?)\s+<\/td><\/tr>/is', $resp['data'], $ary))
	{
		$data['plot'] .= "\n<b>Note:</b>\n".'<ul>'.trim(preg_replace('/\s{2,}/s', ' ', $ary[1])).'</ul>';
	}
*/

	// Cast
	$cast = '';
    foreach(array('Star', 'Recurring Role', 'Guest Star') as $cat)
    {
        if (preg_match('|<td class[^<]*'.$cat.':[^<]*</td>(.*?)</td>|si', $body, $ary))
        {
            preg_match_all('|<a href="http.*?/person/(\d+)/summary.html">(.*?)</a>\s*\(([^,)(]*)\)?|si',$ary[0],$stars,PREG_SET_ORDER);
            foreach($stars as $s)
            {
                $cast .= trim($s[2]) . "::";
                $cast .= trim($s[3]) . "::";
                $cast .= $tvcomIdPrefix.trim($s[1])."\n";
            }
        }
    }
	$data['cast'] = trim($cast);

	// Rating
	preg_match('|<span[^>]*>(\d[^<]*)|si', $body, $ary);
	$data['rating'] = trim($ary[1]);

	// Episode number (currently not used)
	preg_match('|Episode Number: (\d+)|i', $body, $ary);
	$data['episode'] = $ary[1];

	// Season number (currently not used)
	preg_match('|Season Num: (\d+)|i', $body, $ary);
	$data['season'] = $ary[1];

	// Fetch show summary to get cover and other data from
	list($showid, $episodeid) = explode('-', $id);
	$resp = httpClient($tvcomServer.'/show/'.$showid.'/summary.html', 1);
	if (!$resp['success']) $CLIENTERROR .= $resp['error']."\n";
	$body = $resp['data'];

	// Genres (as Array)
	preg_match_all('|<a href=".*?genre">(.+?)</a>|si', $body, $ary, PREG_PATTERN_ORDER);
	foreach($ary[1] as $genre)
	{
		$genre = trim($genre);
		if ($genre == 'Action/Adventure')
		{
			// Add two genres for one
			$data['genres'][] = 'Action';
			$genre = 'Adventure';
		}
		else if ($genre == 'Science-Fiction') $genre = 'Sci-Fi';

		$data['genres'][] = $genre;
	}

	// Cover URL
	if (preg_match('|<a\s+class="default-image[^<]+<img\s+src="(.*?)"|s', $body, $m))
	{
		$data['coverurl'] = trim($m[1]);
	}

	// Runtime
	if (preg_match('|\(\s*(\d+)\s*min.\s*\)|i', $body, $m))
	{
		$data['runtime'] = trim($m[1]);
	}

/*
	// Production code (currently not used)
	preg_match('|Prod Code: ([^<]+)|i', $body, $ary);
	$data['prodno'] = $ary[1];
*/

/*
	More data from IMDB currently not retrievable via tvtome:

	//MPAA Rating
	preg_match('/<A HREF="\/mpaa">MPAA<\/A>: ?<\/B>(.+?)<br>/i',$resp['data'],$ary);
	$data['mpaa']     = trim($ary[1]);

	//Countries
	preg_match_all('/<A HREF="\/Sections\/Countries\/.+?\/">(.+?)<\/A>/i',$resp['data'],$ary,PREG_PATTERN_ORDER);
	$data['country']  = trim(join(' ',$ary[1]));
	}
*/

	return $data;
}

/**
 * Get actor details URL from tv.com
 *
 * @param   string	$name	The actor's name
 * @param   string	$id	The actor's external id
 * @return  string		The visit URL
 */
function tvcomActorUrl($name, $id)
{
	global $tvcomServer;
	global $tvcomIdPrefix;
	
	$id = preg_replace('|^tv|', '', $id);
	
	$url = $tvcomServer.'/person/'.urlencode($id).'/summary.html';
	
	return $url;
}

/**
 * Parses Actor-Details
 *
 * Find image and detail URL for actor, not sure if this can be made
 * a one-step process?
 *
 * @param  string  $name  Name of the Actor
 * @return array          array with Actor-URL and Thumbnail
 */
function tvcomActor($name, $id)
{
	global $tvcomServer;

	$ary = array();
	
	$id = preg_replace('|^tv|', '', $id);
	
	$url = $tvcomServer."/person/".urlencode($id)."/summary.html";

	$resp = httpClient($url, 1);
	if (preg_match('|<a\s+class="default-image[^<]+<img\s+src="(.*?)"|s', $resp['data'], $m))
	{
		$ary[0][0] = $url;
		$ary[0][1] = $m[1];
	}

	return $ary;
}

?>
Return current item: VideoDB