Location: PHPKode > projects > KnProxy > includes/module_parser.php
<?php
/****************************
* Parser to parse a webpage
****************************/
class knParser{
	var $url_prefix = '';
	var $url='';
	var $source='';
	var $type='';
	var $output='';
	var $charset='';
	var $extraParseEngine=false;
	var $stdEncoder=false;
	var $values=Array();
	function __construct($page_url,$page_data,$url_prefix=""){
		$this->url = $page_url;
		$this->source = $page_data;
		$this->stdEncoder = false;
		$this->url_prefix = $url_prefix;
	}
	function setPluginEngine($engine){
		$engine->url = $this->url;
		$engine->url_prefix = $this->url_prefix;
		$engine->stdEncoder = $this->stdEncoder;
		$this->extraParseEngine=$engine;
	}
	function setMimeType($mime_type){
		$mime_type=preg_replace('~;.*$~','',$mime_type);
		$this->type=preg_replace('~\s~','',$mime_type);
	}
	function setCharset($mime_raw,$page_raw){
		if(preg_match('~^.*;\s*charset\s*=\s*([a-zA-Z0-9\-]*)\s*[;]*$~',$mime_raw,$matches)){
			$this->charset = $matches[1];
		}
		if($this->charset==""){
			preg_match('~<meta.*charset=(.+)["\'].*\>~iUs',$page_raw,$pmatch);
			$this->charset = $pmatch[1];
		}
		return $this->charset;
	}
	function setEncoder($encoder){
		$this->stdEncoder = $encoder;
	}
	function convertCharset($to='UTF-8'){
		if(function_exists('iconv')){
			$this->output = iconv($this->get_value('charset_iconv','GBK'),$to . '//IGNORE//TRANSLIT',$this->output);
		}elseif(function_exists('mb_convert_encoding')){
			$this->output = mb_convert_encoding($this->output,$to,$this->charset);
		}else{
			//UNMODIFY
			return false;
		}
		return true;
	}
	function set_value($name,$val){
		$this->values[$name]=$val;
	}
	function get_value($name,$def){
		if(isset($this->values[$name]))
			return $this->values[$name];
		return $def;
	}
	function parse(){
		if($this->extraParseEngine!=false)
			$this->source = $this->extraParseEngine->pre($this->source,$this->type);
		switch($this->type){
			case 'knproxy/noparse':$this->output = $this->source;break;
			case 'text/css':$this->parseCss();break;
			case 'text/javascript':
			case 'application/javascript':
			case 'application/x-javascript':$this->parseJS();break;
			case 'image/gif':
			case 'image/png':
			case 'image/jpeg':$this->output = $this->source;break;
			case 'text/html':
			default:$this->parseHTML();break;
		}
		if($this->extraParseEngine!=false){
			$this->output = $this->extraParseEngine->post($this->output,$this->type);
		}
		if($this->get_value('use_page_encryption',false)){
			if($this->stdEncoder->can('page_encrypt') && ($this->type=="text/html" || $this->type=="")){
				$key = $this->stdEncoder->getKey();
				if($this->charset=="gb2312")
					$this->convertCharset();
				$this->output = $this->stdEncoder->encrypt_page($this->output,$key);
				$this->set_value('key',$key);
			}
		}
	}
	function parseRawCSS($css){
		$css = preg_replace_callback('~(url|src)(\()\s*([^\s].*)\s*\)~iUs',array('self','parseSimpleURL'),$css);
		$css = preg_replace_callback('~(@import\s*)(["\'\(])\s*([^\s].*)\s*["\'\)]~iUs',array('self','parseSimpleURL'),$css);
		return $css;
	}
	function parseStyle($callback){
		return $callback[1] . $callback[2]. $this->parseRawCSS($callback[3]) . $callback[2];
	}
	function parseCss($source = false){
		if(!$source)
			$css = $this->source;
		elseif(is_array($source)){
			$heading = $source[1];
			$css = $source[2];
		}
		$css = $this->parseRawCSS($css);
		if(!$source)
			$this->output = $css;
		else
			return $heading . $css . '</style>';
	}
	function parseJS(){	
		$this->output = $this->source;//NOT IMPLEMENTED YET
	}
	function parseScriptTagURLReloc($url){
		$delim = $url[1];
		$urlA = $url[2];
		$tmp = $this->decodeJSURL($urlA);
		$urlDec = $this->url->getAbsolute($tmp[0]);
		if($this->stdEncoder!=false)
			$urlDec = $this->stdEncoder->encode($urlDec);
		$urlDec = $this->escapeJS($this->url_prefix . $urlDec);
		return 'location.replace(' . $delim . $urlDec . $delim . ')'; 
	}
	function parseScriptStrings($strings){
		$str = $strings[2];
		/*
		if(preg_match('~^https*://~iUs',$str)){
			$tmp = $this->url->getAbsolute($str);
			$urlDec = $this->stdEncoder->encode($tmp);
			return $strings[1] . $this->url_prefix . $urlDec . $strings[1];
		}*/
		$str = preg_replace_callback('~(href|src|codebase|url|action)\s*=\s*([\'\"])?(?(2) (.*?)\\2 | ([^\s\>]+))~isx',array('self','parseExtURL'),$str);
		return $strings[1] . $str . $strings[1];
	}
	function parseScriptTag($matches){
		$tagInner = preg_replace('~#knproxy_script_lt#~iUs','<',$matches[2]);
		$tagInner = preg_replace('~\\\~','#knproxy_script_escape#',$tagInner);//REMOVE ESCAPES
		$tagInner = preg_replace_callback('~([\'\"])(.+)\\1~',Array('self','parseScriptStrings'),$tagInner);
		$tagInner = preg_replace_callback('~location\.replace\(([\'"])(.*)\\1\)~iUs',Array('self','parseScriptTagURLReloc'),$tagInner);
		$tagInner = preg_replace('~#knproxy_script_escape#~','\\',$tagInner);
		return '<script' . $matches[1] . '>' . $tagInner . '</script>';
	}
	function parseScriptPre($matches){
		$tagInner = preg_replace('~<~iUs','#KNPROXY_SCRIPT_LT#',$matches[2]);
		return '<script' . $matches[1] . '>' . $tagInner . '</script>';
	}
	function parseUrlHTML($match){
		$is_pform=false;
		if(preg_match('~^\s*form~iUs',$match[1])){
			//This is a form
			if(preg_match('~method\s*=~',$match[1])){
				if(preg_match('~method\s*=\s*([\'\"]?)get~isx',$match[1])){
					$match[1] = preg_replace('~(method)\s*=\s*([\'\"])?(?(2) (.*?)\\2 | ([^\s\>]+))~isx','$1="POST"',$match[1]);
					$is_pform = true;
				}else{
					$is_pform = false;
				}
			}else{
				$match[1].= ' method="POST"';
				$is_pform=true;
			}
		}
		$code = preg_replace_callback('~(href|src|codebase|url|action)\s*=\s*([\'\"])?(?(2) (.*?)\\2 | ([^\s\>]+))~isx',array('self','parseExtURL'),$match[1]);
		$code = preg_replace_callback('~(style\s*=\s*)([\'\"])(.*)\2~iUs',Array('self','parseStyle'),$code);
		if($is_pform)
			return '<' . $code . '><input type="hidden" name="knproxy_gettopost" value="true">';
		return '<' . $code . '>';
	}
	function parseHTML(){
		//BY FAR THE MOST DIFFICULT
		//ONLY GET IN TAG ITEMS
		$noJS = false;//FOR PREVENTING ERROR IN JS
		$code = preg_replace_callback('~<script([^>]*)>(.*)</script>~iUs',Array('self','parseScriptPre'),$this->source);
		if(preg_last_error()!=PREG_NO_ERROR){
			$noJS = true;
			$code = $this->source;
		}
		//ABOVE LINE ESCAPES THE < (lesser than) in JS SCRIPTS
		$code = preg_replace_callback('~<([^!].*)>~iUs',Array('self','parseUrlHTML'),$code);
		if(defined('ALLOW_NAVBAR') && ALLOW_NAVBAR=="true")
			$code = preg_replace('~<\s*/\s*head\s*>~iUs','<script type="text/javascript" language="javascript">parent.fixed.document.getElementById(\'url_\').value=parent.fixed.knEncode.unBase64("' . base64_encode($this->url->output($this->url->base)) . '");</script></head>',$code);
		$code = preg_replace_callback('~(<\s*style[^>]*>)(.*)<\s*/style\s*>~iUs',Array('self','parseCSS'),$code);
		if(!$noJS){
			$code = preg_replace_callback('~<script([^>]*)>(.*)<\s*/\s*script>~iUs',Array('self','parseScriptTag'),$code);
		}
		$this->output = $code;
	}
	function parseSimpleURL($matches){
		$url = $matches[3];
		$delimiter = $matches[2];
		$method = $matches[1];
		$url = preg_replace('~^"(.*)"$~iUs','$1',$url);//REMOVE FILTERS
		$url = preg_replace('~^\s*(.*)\s*$~iUs','$1',$url);
		$url = preg_replace('~^\'(.*)\'$~iUs','$1',$url);
		if(strtolower(substr($url,0,5))=='data:')
			return $method . '(' . $url . ')';
		$url = $this->url->getAbsolute($url);
		$encoder = $this->stdEncoder;
		if($encoder!=false){
			$url = $this->url_prefix . $encoder->encode($url);
		}
		if($delimiter =='(' )
			return $method . '(' . $url . ')';
		else
			return $method . $delimiter . $url . $delimiter;
	}
	function decodeJSURL($jsURL){
		$purl = preg_replace('~#knproxy_script_escape#~iUs','',$jsURL);
		if($purl[0]=='"' || $purl[0]=="'")
			$sep = $purl[0];
		else
			$sep = '';
		$purl = preg_replace('~["\']~iUs','',$purl);
		return Array($purl,$sep);
	}
	function escapeJS($text){
		return preg_replace('~([/"\'])~','\\\$1',$text);
	}
	function parseExtURL($matches){
		$method = $matches[1];
		$delim = $matches[2];
		if($delim=='')
			$url = $matches[4];
		else
			$url = $matches[3];
		if(substr($url,0,9)!='#knproxy_'){
			if($url == '' || strtolower(substr($url,0,11)) == 'javascript:' || (isset($url[0]) && $url[0]=='#') || strtolower(substr($url,0,5))=='data:'){
				return $method . '=' . $delim . $url . $delim . ' ';//NO PARSE
			}
		}
		if(substr($url,0,23)=='#knproxy_script_escape#' && $delim==''){
			$tpurl = $this->decodeJSURL($url);
			if($tpurl!='#'){
				$u = $this->url->getAbsolute($tpurl[0]);
				$encoder = $this->stdEncoder;
				if($encoder!=false)
					$u = $this->url_prefix . $encoder->encode($u);
				return $method . '=' . $this->escapeJS($tpurl[1].$u.$tpurl[1]);
			}else{
				return $method . '=' . $this->escapeJS($tpurl[1] . '#' . $tpurl[1]);
			}
		}
		$url = $this->url->getAbsolute($url);
		$encoder = $this->stdEncoder;
		if($encoder!=false){
			$url = $this->url_prefix . $encoder->encode($url);
		}
		$new = $method . '=' . $delim . $url . $delim . ' ';
		return $new;
	}
}

?>
Return current item: KnProxy