Location: PHPKode > projects > PBBoard > PBBoard_v2.1.4/Upload/includes/FeedParser.php
<?php
class FeedParser{

	private $xmlParser      = null;
	private $insideItem     = array();                  // Keep track of current position in tag tree
	private $currentTag     = null;                     // Last entered tag name
	private $currentAttr    = null;                     // Attributes array of last entered tag

	private $namespaces     = array(
							'http://purl.org/rss/1.0/'                  => 'RSS 1.0',
							'http://purl.org/rss/1.0/modules/content/'  => 'RSS 2.0',
							'http://www.w3.org/2005/Atom'               => 'ATOM 1',
							);                          // Namespaces to detact feed version
	private $itemTags       = array('ITEM','ENTRY');    // List of tag names which holds a feed item
	private $channelTags    = array('CHANNEL','FEED');  // List of tag names which holds all channel elements
	private $dateTags       = array('UPDATED','PUBDATE','DC:DATE');
	private $hasSubTags     = array('IMAGE','AUTHOR');  // List of tag names which have sub tags
	private $channels       = array();
	private $items          = array();
	private $itemIndex      = 0;

	private $url            = null;                     // The parsed url
	private $version        = null;                     // Detected feed version


	/**
	* Constructor - Initialize and set event handler functions to xmlParser
	*/
	function __construct()
	{
		$this->xmlParser = xml_parser_create();

		xml_set_object($this->xmlParser, $this);
		xml_set_element_handler($this->xmlParser, "startElement", "endElement");
		xml_set_character_data_handler($this->xmlParser, "characterData");
	}

	/*-----------------------------------------------------------------------+
	|  Public functions. Use to parse feed and get informations.             |
	+-----------------------------------------------------------------------*/

	/**
	* Get all channel elements
	*
	* @access   public
	* @return   array   - All chennels as associative array
	*/
	public function getChannels()
	{
		return $this->channels;
	}

	/**
	* Get all feed items
	*
	* @access   public
	* @return   array   - All feed items as associative array
	*/
	public function getItems()
	{
		return $this->items;
	}

	/**
	* Get total number of feed items
	*
	* @access   public
	* @return   number
	*/
	public function getTotalItems()
	{
		return count($this->items);
	}

	/**
	* Get a feed item by index
	*
	* @access   public
	* @param    number  index of feed item
	* @return   array   feed item as associative array of it's elements
	*/
	public function getItem($index)
	{
		if($index < $this->getTotalItems())
		{
			return $this->items[$index];
		}
		else
		{
			throw new Exception("Item index is learger then total items.");
			return false;
		}
	}

	/**
	* Get a channel element by name
	*
	* @access   public
	* @param    string  the name of channel tag
	* @return   string
	*/
	public function getChannel($tagName)
	{
		if(array_key_exists(strtoupper($tagName), $this->channels))
		{
			return $this->channels[strtoupper($tagName)];
		}
		else
		{
			throw new Exception("Channel tag $tagName not found.");
			return false;
		}
	}

	/**
	* Get the parsed URL
	*
	* @access   public
	* @return   string
	*/
	public function getParsedUrl()
	{
		if(empty($this->url))
		{
			throw new Exception("Feed URL is not set yet.");
			return FALSE;
		}
		else
		{
			return $this->url;
		}


	}

	/**
	* Get the detected Feed version
	*
	* @access   public
	* @return   string
	*/
   public function getFeedVersion()
   {
		return $this->version;
   }

	/**
	* Parses a feed url
	*
	* @access   public
	* @param    srting  teh feed url
	* @return   void
	*/
	public function parse($url)
	{
		$this->url  = $url;
		$URLContent = $this->getUrlContent();

		if($URLContent)
		{
			$segments   = str_split($URLContent, 4096);
			foreach($segments as $index=>$data)
			{
				$lastPiese = ((count($segments)-1) == $index)? true : false;
				xml_parse($this->xmlParser, $data, $lastPiese)
				   or die(sprintf("XML error: %s at line %d",
				   xml_error_string(xml_get_error_code($this->xmlParser)),
				   xml_get_current_line_number($this->xmlParser)));
			}
			xml_parser_free($this->xmlParser);
		}
		else
		{
			die('Sorry! cannot load the feed url.');
		}

		if(empty($this->version))
		{
			die('Sorry! cannot detect the feed version.');
		}
	}

   // End public functions -------------------------------------------------

   /*-----------------------------------------------------------------------+
   | Private functions. Be careful to edit them.                            |
   +-----------------------------------------------------------------------*/

   /**
	* Load the whole contents of a RSS/ATOM page
	*
	* @access   private
	* @return   string
	*/
	private function getUrlContent()
	{
		if(empty($this->url))
		{
			throw new Exception("URL to parse is empty!.");
			return false;
		}

		if($content = @file_get_contents($this->url))
		{
			return $content;
		}
		else
		{
			$ch         = curl_init();

			curl_setopt($ch, CURLOPT_URL, $this->url);
			curl_setopt($ch, CURLOPT_HEADER, false);
			curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);

			$content    = curl_exec($ch);
			$error      = curl_error($ch);

			curl_close($ch);

			if(empty($error))
			{
				return $content;
			}
			else
			{
				throw new Exception("Erroe occured while loading url by cURL. <br />\n" . $error) ;
				return false;
			}
		}

	}

	/**
	* Handle the start event of a tag while parsing
	*
	* @access   private
	* @param    object  the xmlParser object
	* @param    string  name of currently entering tag
	* @param    array   array of attributes
	* @return   void
	*/
	private function startElement($parser, $tagName, $attrs)
	{
		if(!$this->version)
		{
			$this->findVersion($tagName, $attrs);
		}

		array_push($this->insideItem, $tagName);

		$this->currentTag  = $tagName;
		$this->currentAttr = $attrs;
	}

	/**
	* Handle the end event of a tag while parsing
	*
	* @access   private
	* @param    object  the xmlParser object
	* @param    string  name of currently ending tag
	* @return   void
	*/
	private function endElement($parser, $tagName)
	{
		if (in_array($tagName, $this->itemTags))
		{
		   $this->itemIndex++;
		}

		array_pop($this->insideItem);
		$this->currentTag = $this->insideItem[count($this->insideItem)-1];
	}

	/**
	* Handle character data of a tag while parsing
	*
	* @access   private
	* @param    object  the xmlParser object
	* @param    string  tag value
	* @return   void
	*/
	private function characterData($parser, $data)
	{
		//Converting all date formats to timestamp
		if(in_array($this->currentTag, $this->dateTags))
		{
			$data = strtotime($data);
		}

	   if($this->inChannel())
	   {
			// If has subtag, make current element an array and assign subtags as it's element
			if(in_array($this->getParentTag(), $this->hasSubTags))
			{
				if(! is_array($this->channels[$this->getParentTag()]))
				{
					$this->channels[$this->getParentTag()] = array();
				}

				$this->channels[$this->getParentTag()][$this->currentTag] .= strip_tags($this->html2bb((trim($data))));
				return;
			}
			else
			{
				if(! in_array($this->currentTag, $this->hasSubTags))
				{
					$this->channels[$this->currentTag] .= strip_tags($this->html2bb((trim($data))));
				}
			}

			if(!empty($this->currentAttr))
			{
				$this->channels[$this->currentTag . '_ATTRS'] = $this->currentAttr;

				//If the tag has no value
				if(strlen($this->channels[$this->currentTag]) < 2)
				{
					//If there is only one attribute, assign the attribute value as channel value
					if(count($this->currentAttr) == 1)
					{
						foreach($this->currentAttr as $attrVal)
						{
							$this->channels[$this->currentTag] = $attrVal;
						}
					}
					//If there are multiple attributes, assign the attributs array as channel value
					else
					{
						$this->channels[$this->currentTag] = $this->currentAttr;
					}
				}
			}
	   }
	   elseif($this->inItem())
	   {
		   // If has subtag, make current element an array and assign subtags as it's elements
		   if(in_array($this->getParentTag(), $this->hasSubTags))
			{
				if(! is_array($this->items[$this->itemIndex][$this->getParentTag()]))
				{
					$this->items[$this->itemIndex][$this->getParentTag()] = array();
				}

				$this->items[$this->itemIndex][$this->getParentTag()][$this->currentTag] .= strip_tags($this->html2bb((trim($data))));
				return;
			}
			else
			{
				if(! in_array($this->currentTag, $this->hasSubTags))
				{
					$this->items[$this->itemIndex][$this->currentTag] .= strip_tags($this->html2bb((trim($data))));
				}
			}


			if(!empty($this->currentAttr))
			{
				$this->items[$this->itemIndex][$this->currentTag . '_ATTRS'] = $this->currentAttr;

				//If the tag has no value

				if(strlen($this->items[$this->itemIndex][$this->currentTag]) < 2)
				{
					//If there is only one attribute, assign the attribute value as feed element's value
					if(count($this->currentAttr) == 1)
					{
						foreach($this->currentAttr as $attrVal)
						{
						   $this->items[$this->itemIndex][$this->currentTag] = $attrVal;
						}
					}
					//If there are multiple attributes, assign the attribute array as feed element's value
					else
					{
					   $this->items[$this->itemIndex][$this->currentTag] = $this->currentAttr;
					}
				}
			}
	   }
	}

	/**
	* Find out the feed version
	*
	* @access   private
	* @param    string  name of current tag
	* @param    array   array of attributes
	* @return   void
	*/
	private function findVersion($tagName, $attrs)
	{
		$namespace = array_values($attrs);
		foreach($this->namespaces as $value =>$version)
		{
			if(in_array($value, $namespace))
			{
				$this->version = $version;
				return;
			}
		}
	}

	private function getParentTag()
	{
		return $this->insideItem[count($this->insideItem) - 2];
	}

	/**
	* Detect if current position is in channel element
	*
	* @access   private
	* @return   bool
	*/
	private function inChannel()
	{
		if($this->version == 'RSS 1.0')
		{
			if(in_array('CHANNEL', $this->insideItem) && $this->currentTag != 'CHANNEL')
			return TRUE;
		}
		elseif($this->version == 'RSS 2.0')
		{
			if(in_array('CHANNEL', $this->insideItem) && !in_array('ITEM', $this->insideItem) && $this->currentTag != 'CHANNEL')
			return TRUE;
		}
		elseif($this->version == 'ATOM 1')
		{
			if(in_array('FEED', $this->insideItem) && !in_array('ENTRY', $this->insideItem) && $this->currentTag != 'FEED')
			return TRUE;
		}

		return FALSE;
	}

	/**
	* Detect if current position is in Item element
	*
	* @access   private
	* @return   bool
	*/
	private function inItem()
	{
		if($this->version == 'RSS 1.0' || $this->version == 'RSS 2.0')
		{
			if(in_array('ITEM', $this->insideItem) && $this->currentTag != 'ITEM')
			return TRUE;
		}
		elseif($this->version == 'ATOM 1')
		{
			if(in_array('ENTRY', $this->insideItem) && $this->currentTag != 'ENTRY')
			return TRUE;
		}

		return FALSE;
	}

	//This function is taken from lastRSS
	/**
	* Replace HTML entities &something; by real characters
	*
	*
	* @access   private
	* @author   Vojtech Semecky <hide@address.com>
	* @link     http://lastrss.oslab.net/
	* @param    string
	* @return   string
	*/
	private function unhtmlentities($string)
	{
		// Get HTML entities table
		$trans_tbl = get_html_translation_table (HTML_ENTITIES, ENT_QUOTES);
		// Flip keys<==>values
		$trans_tbl = array_flip ($trans_tbl);
		// Add support for &apos; entity (missing in HTML_ENTITIES)
		$trans_tbl += array('&apos;' => "'");
		// Replace entities by values
		return strtr ($string, $trans_tbl);
	}


	// HTML conversion common to both bbcode and text result
	function html_common($string)
	{
		// First extract just the body of the message
		if (stristr($string, "<body"))
		{
      if (preg_match('#<body.*?>(.*)</body>#is', $string, $matches))
      {
        $string = $matches[1];
      }
      elseif (preg_match('#<body.*?>(.*)#is', $string, $matches))
      {
        $string = $matches[1];
      }
		}

		// Convert all HTML tags to lower case
		$string = preg_replace('#(</?)(\w+)([^>]*>)#e','"$1".strtolower("$2")."$3"',$string);

		//
		// Save the pre-formatted text
		//
		preg_match_all('#<pre(| .*?)>(.*?)</pre>#s', $string, $matches);
		$preformated_strings = $matches[2];
		$cp = count($preformated_strings);

		for ($i = 0; $i < $cp; $i++)
		{
			$string = preg_replace('#<pre(| .*?)>(.*?)</pre>#s', "***pre_string***$i", $string, 1);
		}

		//
		// Replace formatting elements
		//
		// Replace all CR LF with a single space
		$string = str_replace("\r\n", ' ', $string);
		// Then replace CR or LF alone with a single space
		$string = str_replace("\r", ' ', $string);
		$string = str_replace("\n", ' ', $string);
		// Replace line break with a CR LF
		$string = str_replace('<br>', "\r\n", $string);
		$string = str_replace('<br />', "\r\n", $string);
		// The end of a division should trigger a line break
//		$string = str_replace('</div>', "\r\n", $string);
		// replace spaces
		$string = str_replace('&nbsp;', ' ', $string);
		// replace multiple spaces
		$string = preg_replace('/ {2,}/', ' ', $string);
		// Outlook sometimes puts extra stuff with the paragraph marker, so match
		// <p> and <p otherjunk>
		//$string = preg_replace('#<p(| .*?)>#', "\r\n", $string);
		// Restore the preformatted text
		//
		for ($i = 0; $i < $cp; $i++)
		{
			$string = str_replace("***pre_string***$i", '<pre>' . $preformated_strings[$i] . '</pre>', $string);
		}
		return $string;
	}

	// Converts an HTML email into bbcode
	// This function is loosely based on cbparser.php by corz.org
	function html2bb($string)
	{
		// Do common conversion stuff
		$string = $this->html_common($string);
		// Do simple string replacements

//@emovip 22/Oct/2010 2:56 hey b creative man

//<font color="#ff4040"><font size="6"><font face="Impact">gggggggggg</font></font></font>
//[color=#ff4040][size=6][font=Impact]gggggggggg[/font][/size][/color]

	  $string = preg_replace('#<font color="(.*?)">(.*?)</font>#i', " $2 ", $string);
	  $string = preg_replace('#<font size="(.*?)">(.*?)</font>#i', " $2 ", $string);
	  $string = preg_replace('#<font face="(.*?)">(.*?)</font>#i', " $2 ", $string);
	  $string = preg_replace('#<p align="(.*?)">(.*?)</p>#i', "[$1] $2 [/$1]", $string);
	  $string = preg_replace('#<div align="(.*?)">(.*?)</div>#i', "[$1] $2 [/$1]", $string);


		$string = str_replace('</b>',  '[/b]',    $string);
		$string = str_replace('</i>',  '[/i]',    $string);
		$string = str_replace('</u>',  '[/u]',    $string);
		$string = str_replace('</ul>', '[/list]', $string);
		$string = str_replace('</ol>', '[/list]', $string);
		$string = str_replace('</em>', '[/i]',    $string);
		$string = str_replace('</strong>', '[/b]', $string);
		$string = str_replace('</blockquote>', '[/quote]', $string);
		$string = str_replace('</pre>', '[/quote]', $string);

		// Do simple reg expr replacements
		$string = preg_replace('#<b(| .*?)>#',      '[b]',      $string);
		$string = preg_replace('#<i(| .*?)>#',      '[i]',      $string);
		$string = preg_replace('#<u(| .*?)>#',      '[u]',      $string);
		$string = preg_replace('#<ul(| .*?)>#',     '[list]',   $string);
		$string = preg_replace('#<ol(| .*?)>#',     '[list=1]', $string);
		$string = preg_replace('#<li(| .*?)>#',     '[*]',      $string);
		$string = preg_replace('#<em(| .*?)>#',     '[i]',      $string);
		$string = preg_replace('#<strong(| .*?)>#', '[b]',      $string);
		$string = preg_replace('#<blockquote(| .*?)>#', '[quote]',  $string);
		$string = preg_replace('#<pre(| .*?)>#', '[quote]',  $string);

		// replace multiple instances of [b] or [i] with single tags
		$string = preg_replace('#(\[b\])+#',      '[b]',      $string);
		$string = preg_replace('#(\[i\])+#',      '[i]',      $string);
		$string = preg_replace('#(\[/b\])+#',     '[/b]',      $string);
		$string = preg_replace('#(\[/i\])+#',     '[/i]',      $string);

		// fix for thunderbird which chops up quotes into little chunks for some reason. Remove if necessary!
		$string = preg_replace('#\[\/quote\]\s*?\[quote\]#', '',  $string);

		// Replace email address
		$string = preg_replace('#<a .*?href=.*?"mailto:(.*?)".*?>(.*?)</a>#i', "$2 ([email]$1[/email])", $string);

		// Replace links
		$string = preg_replace('#<a .*href=.*"(.*)".*>(.*)</a>#iUe', "'[url'. (trim('$1') ? '='.trim('$1') : '') .']'.trim('$2').'[/url]'", $string);

		// Remove any image tags whose source starts with 'cid:' - this is an inline attachment, and will be added to the post as a normal attachment.
		$string = preg_replace('#<img[^>]+src="cid:[^>]+>#i', '', $string);

		// Replace image references
		$string = preg_replace('#<img .*src="(.*)".*>#iUe', "'[img]'.trim('$1').'[/img]'", $string);

		// Remove all remaining HTML tags
		$string = preg_replace('#<(/?\w+|!--)[^>]*>#', '', $string);

		// Convert HTML entities
		$string = html_entity_decode($string);

		// Convert quotes
		if (get_magic_quotes_gpc()) {
			return stripslashes($string);
		} else {
			return ($string);
		}
	}


}
?>
Return current item: PBBoard