Location: PHPKode > scripts > Craigslist XML > craigslist.php
<?PHP
class tagSpider {
	var $crl; // this will hold our curl instance
	var $html; // this is where we dump the html we get
	var $binary; // set for binary type transfer
	var $url; // this is the url we are going to do a pass on

	function tagSpider() {
		$this->html = "";
		$this->binary = 0;
		$this->url = "";
	}

	function fetchPage($url){
		$this->url = $url;
		if (isset($this->url)) {
			$this->ch = curl_init (); // start cURL instance
			curl_setopt ($this->ch, CURLOPT_RETURNTRANSFER, 1); // this tells cUrl to return the data
			curl_setopt ($this->ch, CURLOPT_URL, $this->url); // set the url to download
			curl_setopt($this->ch, CURLOPT_BINARYTRANSFER, $this->binary); // tell cURL if the data is binary data or not
			$this->html = curl_exec($this->ch); // grabs the webpage from the internet
			curl_close ($this->ch); // closes the connection
		}
	}

	function parse_array($beg_tag, $close_tag) {
		preg_match_all("($beg_tag.*$close_tag)siU", $this->html, $matching_data); // match data between specificed tags
		return $matching_data[0];
	}
}

header('Content-Type: application/xml; charset=UTF-8');
$writer = new XMLWriter(); 
$writer->openURI('php://output');
$writer->setIndent(true); 
$writer->startDocument('1.0', 'utf-8');

$writer->startElement('XML');
$writer->writeElement('document', 'Craigslist XML'); 
$writer->writeElement('description', 'Craigslist XML Lists');

if (isset($_GET['url'])){
	$page = "$_GET[url]"; //http://fortmyers.craigslist.org/web
}

$urlrun = $page;
$writer->writeElement('link', $urlrun);


if (stristr($_GET['url'], '.html')==true){
	$stag='<section class="body">';
	$etag='<footer>';
} else {
	$stag='<p';
	$etag='</p>';
}

$tspider = new tagSpider();
$tspider->fetchPage($urlrun);
$linkarray = $tspider->parse_array($stag, $etag);

if (stristr($_GET['url'], '.html')==true){
	foreach ($linkarray as $list) {
		preg_match("'<span id=\"replytext\">Reply to:</span> <a href=\"(.*?)\">'si", $list, $reply);
		$reply_url = str_replace('?', '&', $reply[1]);
		$reply_url = str_replace('mailto:', 'mailto=', $reply_url);
		parse_str(str_replace('amp;', '&', $reply_url), $reply_array);
		preg_match("' -->Compensation: (.*?)</li>'si", $list, $compensation);
		preg_match("' -->Location: (.*?)</li>'si", $list, $location);
		preg_match("'<section id=\"postingbody\">(.*?)</section>'si", $list, $post_body);
		preg_match_all("'href=\"http:\/\/images.craigslist.org\/(.*?)\"'si", $list, $images);
		$writer->startElement('post');
			$writer->writeElement('reply-mailto', $reply_array['mailto']);
			$writer->writeElement('reply-subject', $reply_array['subject']);
			$writer->writeElement('reply-body', $reply_array['body']);
			$writer->writeElement('compensation', $compensation[1]);
			$writer->writeElement('location', $location[1]);
			$writer->writeElement('post-body', $post_body[1]);
			foreach ($images[1] as $img) {
			$writer->writeElement('image', 'http://images.craigslist.org/'.$img);
			}
		$writer->endElement();
	}
} else {
	foreach ($linkarray as $list) {
		preg_match('/href="([^"]*)"/i', $list, $url);
		if (stristr($url[1], 'index')==false){
			preg_match("'<a href=\".*?\">(.*?)</a>'si", $list, $title);
			preg_match("'<span class=\"itempp\"> (.*?)</span>'si", $list, $price);
			preg_match("'<span class=\"itempn\"><font size=\"-1\"> (.*?)</font></span>'si", $list, $loc);
			$writer->startElement('item');
				$writer->writeElement('url', $url[1]);
				$writer->writeElement('title', $title[1]);
				$writer->writeElement('new_url', $_SERVER['SERVER_NAME'].'/craigslist.php?url='.$url[1]);
				$writer->writeElement('price', $price[1]);
				$writer->writeElement('location', $loc[1]);
			$writer->endElement();
		}
	}
}

$writer->endElement();
$writer->endDocument();
?>
Return current item: Craigslist XML