<?
/*
////////////////////////////////////////////////////////////////////////////////
// Jazarsoft HTML Parser //
////////////////////////////////////////////////////////////////////////////////
// //
// VERSION : 1.0 //
// AUTHOR : James Azarja //
// CREATED : 2 May 2001 //
// WEBSITE : http://www.jazarsoft.com/ //
// SUPPORT : hide@address.com //
// BUG-REPORT : hide@address.com //
// COMMENT : hide@address.com //
// LEGAL : Copyright (C) 2001 Jazarsoft. //
// //
////////////////////////////////////////////////////////////////////////////////
// //
// This code may be used and modified by anyone so long as this header and //
// copyright information remains intact. //
// //
// The code is provided "as-is" and without warranty of any kind, //
// expressed, implied or otherwise, including and without limitation, any //
// warranty of merchantability or fitness for a particular purpose.═ //
// //
// In no event shall the author be liable for any special, incidental, //
// indirect or consequential damages whatsoever (including, without //
// limitation, damages for loss of profits, business interruption, loss //
// of information, or any other loss), whether or not advised of the //
// possibility of damage, and on any theory of liability, arising out of //
// or in connection with the use or inability to use this software.══ //
// //
////////////////////////////////////////////////////////////////////////////////
// HISTORY : //
////////////////////////////////////////////////////////////////////////////////
// //
// 1.0, May 2001 //
// - Initial Development (Convert from Pascal/Delphi) //
// //
////////////////////////////////////////////////////////////////////////////////
*/
class htmlparser_class
{
var $html="";
var $ontagfound="";
var $ontextfound="";
var $elements=array();
function InsertHTML($htmlcode)
{
$this->html = "";
$this->html=$htmlcode;
return true;
}
function LoadHTML($filename)
{
$this->html = "";
if (!file_exists ($filename))
{
return false;
}
$fh = fopen ($filename, "r");
if ($fh!=false)
{
flock($fh,2);
while (!feof ($fh))
{
$buffer = fgets($fh, 10240);
if ($buffer!="")
{
$this->html.=trim($buffer);
}
}
flock($fh,3);
fclose($fh);
return true;
}
else return false;
}
function GetElements(&$result)
{
if (count($this->elements)==0) { return false; $result=array(); }
$result=$this->elements;
return true;
}
function Parse()
{
$ignorechar = false;
$intag = false;
$tagdepth = 0;
$line="";
$text="";
$tag="";
if ($this->html=="")
{ return false;}
$raw = split ("\r\n", $this->html);
while (list($key, $line) = each ($raw))
{
$htmlline = htmlentities($line);
if ($line=="") { continue; }
$line = trim($line);
for ($charsindex=0;$charsindex<=strlen($line);$charsindex++)
{
if ($ignorechar==true) { $ignorechar=false;}
if (($line[$charsindex]=="<") && (!$intag))
{
if ($text!="")
{
/* Found Text */
$this->elements[]=$text;
$text="";
}
$intag = true;
} else
if (($line[$charsindex]==">") && ($intag))
{
$tag .=">";
/* Tag Found */
$this->elements[]=$tag;
$ignorechar = true;
$intag=false;
$tag="";
}
if ((!$ignorechar) && (!$intag))
{
$text .= $line[$charsindex];
} else
if ((!$ignorechar) && ($intag))
{
$tag .= $line[$charsindex];
}
}
}
return true;
}
}
?>