Location: PHPKode > scripts > Kolosej Parser > kolosej-parser/KolosejParser.php
<?php

/**
 * KolosejParser class
 *
 * Fetches movie information from kolosej.si.
 *
 * @copyright  2009 Tomaž Muraus
 * @license    http://www.gnu.org/copyleft/gpl.html   GPL License
 * @version    Release: 1.1
 * @link       http://www.tomaz-muraus.info
 */
class KolosejParser
{
    protected $_movieListUrl = 'http://www.kolosej.si/filmi/A-Z/slovensko/';
    protected $_movieUrlPrefix = 'http://www.kolosej.si/filmi/film/';
    protected $_movieTitle;
    protected $_information;

    /**
     * Constructor.
     *
     * @param string $movieTitle Original or Slovene movie title.
     */
    public function __construct($movieTitle)
    {
        $this->_movieTitle = $movieTitle;
    }

    /**
     * Fetches the information about movie.
     *
     * @return array Movie information on success, FALSE otherwise.
     */
    public function fetchMovieData()
    {
        $movieUrl = $this->_getMovieUrl();

        if ($movieUrl !== FALSE)
        {
            return $this->_parseMovieData($movieUrl);
        }
        else
        {
            return FALSE;
        }
    }

    /**
     * Retrieves kolosej.si movie URL for the provided movie title
     *
     * @return string Movie URL on success, FALSE otherwise.
     */
    protected function _getMovieUrl()
    {
        $movieList = file_get_contents($this->_movieListUrl);

        $matches1 = array();
        $matches2 = array();

        // Slovene title
        preg_match('#<td class="link"><a href="/filmi/film/(.*?)/">' . $this->_movieTitle . '</a></td>#i', $movieList, $matches1);

        // Original title
        preg_match('#<td class="link"><a href="/filmi/film/(.*?)/">.*?</a></td>\n<td>' . $this->_movieTitle . '</td>#i', $movieList, $matches2);

        if (count($matches1) > 0)
        {
            return $this->_movieUrlPrefix . $matches1[1];
        }
        else if (count($matches2) > 0)
        {
            return $this->_movieUrlPrefix . $matches2[1];
        }
        else
        {
            return FALSE;
        }
    }

    /**
     * Parses the movie information and saves it into an array.
     *
     * @param string $movieUrl kolosej.si movie URL.
     *
     * @return array Movie information.
     */
    protected function _parseMovieData($movieUrl)
    {
        $movieData = file_get_contents($movieUrl . '/');

        preg_match('#<h1>(.*?)</h1>#i', $movieData, $sloveneTitle);
        preg_match('#<span class="title-orig">.*?<a href=".*?">(.*?)</a>.*?</span>#is', $movieData, $originalTitle);
        preg_match('#<span class="genre">(.*?)</span>#i', $movieData, $genre);
        preg_match('#<span class="duration"><span class="label">Dolžina:</span>.*?/ (\d+)min\s</span>#is', $movieData, $duration);
        preg_match('#<span class="year"><span class="label">Leto:</span> (\d+)</span>#is', $movieData, $year);
        preg_match('#<span class="language"><span class="label">Jezik:</span>(.*?)</span>#is', $movieData, $language); // olepšaj, preg replace vse new line
        preg_match('#<span class="screenplay"><span class="label">Scenarij:</span>(.*?)</span>#is', $movieData, $screenplay);
        preg_match('#<span class="director"><span class="label">Režija:</span>.*?<a href=".*?">(.*?)</a>.*?</span>#is', $movieData, $director);
        preg_match('#<span class="links">.*?<span class="label">Povezavi:</span>.*?, <a href="(.*?)">IMDB</a>.*?</span>#is', $movieData, $imdbLink);
        preg_match('#<span class="label">Napovednik:</span> <a href="(.*?)">Apple.com</a>#is', $movieData, $trailerLink);
        preg_match('#<div class="summary">(.*?)</div>#is', $movieData, $summary);

        if (count($sloveneTitle) == 2)
        {
            $this->_information['slovene_title'] = $sloveneTitle[1];
        }
        if (count($originalTitle) == 2)
        {
            $this->_information['original_title'] = $originalTitle[1];
        }
        if (count($genre) == 2)
        {
            $this->_information['genre'] = $genre[1];
        }
        if (count($duration) == 2)
        {
            $this->_information['duration'] = $duration[1];
        }
        if (count($year) == 2)
        {
            $this->_information['year'] = $year[1];
        }
        if (count($language) == 2)
        {
            $this->_information['language'] = preg_replace('#\s+#', '', $language[1]);
        }
        if (count($screenplay) == 2)
        {
            $this->_information['screenplay'] = $screenplay[1];
        }
        if (count($director) == 2)
        {
            $this->_information['director'] = $director[1];
        }
        if (count($imdbLink) == 2)
        {
            $this->_information['imdb_link'] = $imdbLink[1];
        }
        if (count($trailerLink) == 2)
        {
            $this->_information['trailer_link'] = $trailerLink[1];
        }
        if (count($summary) == 2)
        {
            $this->_information['summary'] = $summary[1];
        }

        return $this->_information;
    }
}

/* End of file KolosejParser.php */
/* Location: ./KolosejParser.php */
Return current item: Kolosej Parser