Location: PHPKode > projects > FarODP > farodp/includes/html/OpenDirectoryHTMLInterpreter.php
<?php

define('BROWSE_START', "<table border=0>\n<tr><td valign=top><ul>");
define('BROWSE_START_2', '<table align=center width="100%" cellpadding=0 cellspacing=0>');

define('BROWSE_END', '<table width="95%" cellpadding=0');
define('BROWSE_END_2', '"&nbsp;search');

define('SEARCH_START', '<CENTER>Search: ');
define('SEARCH_END', '<TABLE cellpadding=0 ');

// Hack to remove other languages. This only works for the English categories.

define('OTHER_LANG_STR', '<p><table><tr><td valign=top colspan=2>This category in other languages:');

function preprocessMainPage($data) {
	if (($startPos = strpos($data, '<table cellspacing="4" cellpadding="4"><tr><td valign=top>')) !== false)
	{
		$data = substr($data, $startPos);

		if (($endPos = strpos($data, '</td></tr></table>')) !== false)
		{
			$data = substr($data, 0, $endPos + strlen('</td></tr></table>'));

			return $data;
		}
	}

	$GLOBALS['action']->setAction('cannotBrowse');
	return false;
}

function processMainPage($data, &$node) {
	preg_match_all('#<b><a href\="(.*?)">(.*?)</a></b><br>'.
	'.*?<small>.*?<a href\="(.*?)">(.*?)</a>,.*?'.
	'<a href\="(.*?)">(.*?)</a>,.*?<a href\="(.*?)">(.*?)</a>#s', $data, $matches, PREG_SET_ORDER);

	foreach ($matches as $match) {
	    $block = new AstCategoryBlock(urlCat(dehtml($match[1])), dehtml(strip_tags($match[2]))); // Strip the colored "Kids and Teens" category.
	    $block->addNode(new AstCategoryBlockExample(urlCat(dehtml($match[3])), dehtml($match[4])));
	    $block->addNode(new AstCategoryBlockExample(urlCat(dehtml($match[5])), dehtml($match[6])));
	    $block->addNode(new AstCategoryBlockExample(urlCat(dehtml($match[7])), dehtml($match[8])));
		$node->addNode($block);
	}

	return true;
}

function preprocessBrowse($data) {
	if ($data != '' && substr($data, 0, 33) != '<html><head><title>Page Has Moved')
	{
		// don't skip A-Z links if there are any

		$isAToZ = strpos($data, '[ <a href="');
		$isAToZ2 = strpos($data, '[ <b>');

		$fetched = false;

		if ($isAToZ !== false)
		{
			$data = '<hr><center>' . substr($data, $isAToZ);
			$fetched = true;
		}
		elseif ($isAToZ2 !== false)
		{
			$data = '<hr><center>' . substr($data, $isAToZ2);
			$fetched = true;
		}
		elseif (($pos = strpos($data, BROWSE_START)) !== false)
		{
			$data = substr($data, $pos);
			$fetched = true;
		}
		elseif (($pos = strpos($data, BROWSE_START_2)) !== false)
		{
			$data = substr($data, $pos);
			$fetched = true;
		}

		if ($fetched)
		{
			// delete dmoz header and footer

			if (($pos = strpos($data, BROWSE_END)) !== false)
				$data = substr($data, 0, $pos);
			elseif (($pos = strpos($data, BROWSE_END_2)) !== false)
				$data = substr($data, 0, $pos);

			// hide "This category in other languages:" sections

			if (($t = strpos($data, OTHER_LANG_STR)) !== false)
			{
				$data = substr($data, 0, $t) . substr($data, strpos($data, '</table></td></tr></table>') + 27);
			}

			// remove usenet links

			if (($t = strpos($data, '<table cellpadding=0 cellspacing=0><tr><td width="100%" valign=top>')) !== false)
			{
				$data = substr($data, 0, $t);
			}

			return $data;
		}
		else
		{
			$GLOBALS['action']->setAction('cannotBrowse');
			return false;
		}
	}
	else
	{
		$GLOBALS['action']->setAction('cannotBrowse');
		return false;
	}
}

function processBrowse($data, &$node) {
    $AToZStart = false;

	if (($pos = strpos($data, '[ <a href="')) !== false)
	{
	    $AToZStart = $pos;
	}
	elseif (($pos = strpos($data, '[ <b>')) !== false)
	{
	    $AToZStart = $pos;
	}

	if ($AToZStart !== false)
	{
	    $AToZEnd = strpos($data, '] </center>');
		if ($AToZEnd !== false && $AToZStart < $AToZEnd)
		{
		    $contents = substr($data, $AToZStart + 2, $AToZEnd - $AToZStart - 2);

		    $items = preg_split('#> | <#', $contents);

			$AToZ = new AstAToZ();

		    foreach ($items as $item) {
				if (preg_match('#a href\="(.*?)"><b>(.*?)</b>#', $item, $m)) {
			    	$AToZ->addNode(new AstAToZItem(dehtml($m[2]), urlCat(dehtml($m[1]))));
				} elseif (preg_match('#b>(.*?)</b#', $item, $m)) {
				    $AToZ->addNode(new AstCurCat(dehtml($m[1])));
				}
			}
		    $node->addNode($AToZ);
		}
		else
		{
			$GLOBALS['action']->setAction('formatNotSupported');
			return false;
		}
	}


	$contCatPosRelated = strpos($data, '<p>See also:<ul>');

	if ($contCatPosRelated !== false) {
		$catSections = explode("<table border=0>\n<tr><td valign=top><ul>",
			substr($data, 0, -(strlen($data) - $contCatPosRelated)));
	} else {
		$catSections = explode("<table border=0>\n<tr><td valign=top><ul>",
			$data);
	}

	array_shift($catSections);

	$contCat = new AstBrowseCategoryContainer();

	foreach ($catSections as $section) {
		$rawSecCat = array();

		// some symlinks have different names
		preg_match_all('#<li><a href\="(.+?)"><b>(.+?)</b></a>(@)?\s+\&nbsp;<i>\((.+?)\)</i>#s', $section,
		    $rawSecCat, PREG_SET_ORDER);

		$secCat = new AstBrowseCategorySection();

		foreach ($rawSecCat as $v) {
		    $cat = urlCat($v[1]);

			if (strpos($v[2], ':') !== false) {	// Hack to detect "see also" categories in foreign languages. It's not catch-all.
			    $secCat->addNode(
					new AstBrowseCategoryRelated($cat, dehtml($v[2]), dehtml($v[4]))
				);
			} elseif ($v[3] == '@') {
		    	$secCat->addNode(
					new AstBrowseCategorySymbolic($cat, dehtml($v[2]), dehtml($v[4]))
				);
			// Alternatively, "if ($cat[0] === $this->curCats[0]) {"
			} else {
		    	$secCat->addNode(
					new AstBrowseCategory($cat, dehtml($v[2]), dehtml($v[4]))
				);
			}
		}

		$contCat->addNode($secCat);
    }

    $node->addNode($contCat);

	if ($contCatPosRelated !== false) {
	    $end = strpos($data, '</ul>', $contCatPosRelated);
	    $catsRelatedData = substr($data, $contCatPosRelated, $end - $contCatPosRelated);
	    $catsRelated = array();
		preg_match_all('#<li><a href\="(.+?)"><b>(.+?)</b></a> \&nbsp;<i>\((.+?)\)</i>#s',
			$catsRelatedData,
	    	$catsRelated, PREG_SET_ORDER);
		foreach ($catsRelated as $v) {
		    $cat = urlCat($v[1]);
	    	$node->addNode(new AstBrowseCategoryRelated($cat, dehtml($v[2]), dehtml($v[3])));
		}
	}

	// trim the age range from the kids and teens sections
	$data = preg_replace('#<small>\[.*?\]</small> #', '', $data);

	$lines = explode("\n", $data);
	$count = count($lines) - 2;
	$i = 0;

	while ($i < $count && strpos($lines[$i], '://') === false)
	{
		$i++;
	}

	$contBrowse = new AstBrowseListingContainer();
	for (; $i < $count; $i++)
	{
		if (strpos($lines[$i], '://') === false)
		{
			continue;
		}

		$sections = explode('">', $lines[$i], 2);
		if (!isset($sections[1])) continue;
		$url = substr($sections[0], 13);

		$title = substr($sections[1], 0, $linkEnd = strpos($sections[1], '</a>'));

		$descStart = strpos($sections[1], ' - ', $linkEnd) + 3;

		$desc = substr($sections[1], $descStart);
		$star = false;
		if (substr($lines[$i+1], 0, 3) == ' - ') {
		    $desc = substr($lines[$i+1], 3);
		    $star = true;
		    $i++;
		}

		// Featured sites have bold tags
		$contBrowse->addNode(new AstBrowseListing(dehtml($url), dehtml(strip_tags($title)), dehtml(strip_tags($desc)), $star, true));
	}

	$node->addNode($contBrowse);

	return true;
}

function preprocessSearch($data) {
	if ($data != '' || (strpos($data, '<CENTER><I>The Open Directory search is temporarily unavailable. Please try back later.</I></CENTER>') == false))
	{
		if (strpos($data, 'Try your search on:') == false)
		{
			// delete dmoz header and footer

			if (($start = strpos($data, SEARCH_START)) !== false) {
			    if (($end = strpos($data, SEARCH_END) - $start) !== false) {
			        return substr($data, $start, $end);
				}

				return substr($data, $start);
			}
			else
			{
				$GLOBALS['action']->setAction('cannotSearch');
				return false;
			}
		}
		else
		{
		    $GLOBALS['action']->setAction('searchNoResults');
		    return false;
		}
	}
	else
	{
	    $GLOBALS['action']->setAction('searchHeavyLoad');
	    return false;
	}
}

function processSearch($data, &$node) {

	$lines = explode("\n", $data);
	$count = count($lines) - 3;
	$i = 0;

	/**
	 * Since category search results have the same categories as the categories in the top listings,
	 * category results are omitted.
	 */

	$listingNum = 0;

	for (; $i < $count; $i++)
	{
		// note that this is not the same as the browse listings code

		if (strpos($lines[$i], '<li><a href="') === 0 && strpos($lines[$i], '://') !== false && substr($lines[$i], 0, 7) !== '<li><b>' && substr($lines[$i], 0, 4) !== '</b>')
		{
			++$listingNum;
			$star = false;
			$sections = explode('">', $lines[$i], 2);
			if (!isset($sections[1])) continue;
			$url = substr($sections[0], 13);
			$linkEnd = strpos($sections[1], '</a>');
			$title = substr($sections[1], 0, $linkEnd);
			if (substr($sections[1], $linkEnd + 4, 4) === '<img') {
			    $star = true;
				$sections[1] = substr($sections[1], strpos($sections[1], '&nbsp;') - $linkEnd);
				$i++;
			}

			$ss = strpos($sections[1], '<br><small><i>--');

			$descStart = strpos($sections[1], ' - ', $linkEnd) + 3;

			if ($ss !== false) {
				$desc = substr($sections[1], $descStart, $ss - $descStart);
				$cat = substr($sections[1],
					$first = strpos($sections[1],
					 '">', $ss) + 2,
					 strpos($sections[1],
					 '</a>', $first) - $first);
			} else {
				$desc = substr($sections[1], $descStart, strpos($sections[1], '<br>') - $descStart);
				$cat = substr($sections[1],
					$first = strpos($sections[1],
					'">') + 2,
					strpos($sections[1],
				 	'</a>', $first) - $first);
			}

			$node->addNode(new AstSearchListing(dehtml(strip_tags($url)), dehtml(strip_tags($title)), dehtml(strip_tags($desc)), $star, explode(': ', dehtml(strip_tags(($cat)))), true));
		}
	}

	return true;
}

?>
Return current item: FarODP