Location: PHPKode > scripts > Wiki Parser > class_WikiRetriever.php
<?php
/* WikiRetriever
 * Version 1.0
 * Copyright 2005, Steve Blinch
 * http://code.blitzaffe.com
 *
 * This class fetches one or more documents from Wikipedia (or any other MediaWiki wiki),
 * translates them into HTML, and returns them.
 *
 *
 * EXAMPLE
 *
 * //
 * // Simple WikiRetriever usage example
 * //
 * require_once('class_WikiRetriever.php');
 *
 * $url = "http://www.yourwikisite.com";
 * $titles = array("First_article","Second_article");
 * $wiki = &new WikiRetriever();
 * if (!$wiki->retrieve($url,$titles)) {
 * 		die("Error: $wiki->error");
 * } else {
 * 		var_dump($wiki->pages);
 * }
 *
 *
 * LICENSE
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 * 
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 * 
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 * 
 *  
 *
 */
require_once("libcurlemu/class_HTTPRetriever.php");
require_once("class_XMLParser.php");

//require_once(dirname(__FILE__)."/class_WikiWrapper.php");
require_once(dirname(__FILE__)."/class_WikiParser.php");

class WikiRetriever {
	
	function WikiRetriever() {
		$this->http = &new HTTPRetriever();
		$this->http->caching = 3600;
		$this->xml = &new XMLParser();
	}
	
	function retrieve($wikiurl,$titles,$level=0) {
		
		$this->_recursionlevel = $level;
		
		$this->wikiurl = $wikiurl;
		
		//if (substr($wikiurl,-1)!='/') $wikiurl .= '/';
		if (!is_array($titles)) $titles = array($titles);
		$titles = implode("\r\n",$titles);

		$values = array(
			'action'=>'submit',
			'pages'=>$titles,
			'curonly'=>'true',
		);

		if ($this->http->post($wikiurl."Special:Export",$this->http->make_query_string($values))) {
			$this->parse_xml($this->http->response);

			return true;
		} else {
			$this->error = "HTTP request error: #{$this->http->result_code}: {$this->http->result_text}";
			return false;
		}
	}
	

	function retrieve_urls($wikiurl,$urls) {
		$urls = explode("\n",$urls);
		foreach ($urls as $k=>$url) {
			$url = trim($url);
			$p = strrpos($url,'/');
			$urls[$k] = substr($url,$p+1);
		}
		return $this->retrieve($wikiurl,$urls);
	}
	
	function parse_xml($xml) {
		$this->xmldata = $this->xml->xmlize($xml);
		
		$this->pages = array();
		
		foreach ($this->xmldata['mediawiki']['#']['page'] as $k=>$page) {
			$page = $page['#'];
			$pagedata = array(
				'title'=>$page['title'][0]['#'],
				'timestamp'=>$page['revision'][0]['#']['timestamp'][0]['#'],
				'text'=>$page['revision'][0]['#']['text'][0]['#'],
			);
			$pagedata['html'] = $this->wiki_html($pagedata['text'],$pagedata['title']);
			$this->pages[] = $pagedata;	
		}
		
		return true;
	}
	
	function wiki_html($text,$title) {
		$parser = &new WikiParser();
		$output = $parser->parse($text,$title);
		
		// handle redirection
		if ($parser->redirect) {
			$this->_recursionlevel++;
			if ($this->recursionlevel==10) {
				$output = "Redirection limit exceeded";
			} else {
				$sub = &new WikiRetriever();
				$sub->retrieve($this->wikiurl,$parser->redirect,$this->_recursionlevel);
				$output = $sub->pages[0]['html'];
				unset($sub);
			}
		}
		
		return $output;
		
		/*
		//$text = preg_replace('/\[\[([^\|\]]*?\|)?([^\]]+?)\]\]/i','\\2',$text);
		$parser = &new Parser();
		$options = &new WikiWrapperOptions();
		$title = &new Title();
		$content = $parser->parse($text,$title,$options);
		return $content->mText;
		*/
	}
}
?>
Return current item: Wiki Parser