Location: PHPKode > projects > OpenWolf Guidelines Validator > openWolf 0.9.9/basic_functions.php
<?php

/*************************************************

openWolf - an HTML accessibility guidelines validator
Author: Geoff Munn (hide@address.com)
Version: 0.9.9

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.

You may contact the author of openWolf by e-mail at: hide@address.com

The latest version of openWolf can be obtained from:
http://openwolf.sourceforge.net/

*************************************************/

function get_element($this_element, $pos, $isScript, $isStyle){
	
	$char=substr($this_element,$pos,1);
	$element='';
	$has_equals=false;
	$element_length=strlen($this_element);
	
	if($char=='<' && !$isScript && !$isStyle){
		//go forth until a closing bracket is found
		//if a quote mark of either kind is found then skip the contents
		//If another opening bracket is found then exit
		$i=$pos;
		$bad_escape=false;
		$tagname='';
		$isComment=false;
		$is_cdata=false;
		
		while($char!='>' || $isComment){
			//if this is a comment then do something different:
			if($tagname==''){
				//ok, so we have found a space or a dash.  Get the tagname:
				if($char==' ' || $char=='-' || $char=='[')
					$tagname=tag_name($element);
			}
			
			if($tagname=='!'){
				$isComment=true;
				//we have found a comment
				//is this a CDATA section?
				if($char=='['){
					//get the next chunk:
					$opening=strtolower(substr($this_element, $i, 7));
					if($opening=='[cdata[')
						$is_cdata=true;			
				}
				
				if($is_cdata){
					//right, get everything up to the closing element (]]>)
					if($char==']' && substr($this_element, $i+1, 1)==']'){
						$temp=substr($this_element, $i+2, 1);
						if($temp=='>'){
							$element.=']]>';
							$bad_escape=true;
							$isComment=false;
							break;
						}
					}
				}
				
				if(($char=='-' && substr($this_element, $i+1, 1)=='-') || !$is_cdata){
					$temp=substr($this_element, $i, 2);
					if($temp=='--'){
						//if the next character is a '>', then close this comment
						$spaceString='';
						$j=$i+2;
						$temp=substr($this_element, $j, 1);
						while($temp==' '){
							$j++;
							if($j>strlen($this_element)){
								$bad_escape=true;
								$isComment=false;
								break;
							}
								
							$temp=substr($this_element, $j, 1);
							$spaceString.=$temp;
							if($temp=="\n" || $temp=="\t")
								$temp=' ';
						}
						
						if($temp=='>'){
							$element.='--' . $spaceString . '>';
							$bad_escape=true;
							$isComment=false;
							break;
						}
					}
				}
				$element.=$char;
				$i++;
				
				if($i>$element_length)
					break;
				
				$char=substr($this_element, $i, 1);
			} else {
				//if we have found another opening bracket, then reset the string:
				//We have found a normal element... Comments are not handled in this section
				if($char=='<')
					$element='';
				
				$element.=$char;
				
				if($char=='=')
					$has_equals=true;
					
				if(($char=="'" || $char=="\"") && $has_equals){
					$quote=$char;
					$i++;
					$char=substr($this_element, $i, 1);
					while($char!=$quote){
						$element.=$char;
						$i++;
						
						if($i>$element_length)
							//I don't think that this break works... it might still get stuck in a loop
							break;	
						
						$char=substr($this_element, $i, 1);
					}
					$element.=$char;
					$has_equals=false;
				}
				$i++;
				 
				if($i>$element_length)
					break;
					
				$char=substr($this_element, $i, 1);
			}
		}
		
		if(!$bad_escape)
			$element.=$char;
		
	} else {
		if(!$isScript && !$isStyle){
			//this will build text nodes:
			$i=$pos;
			while($char!='<'){
				$element.=$char;
				$i++;
				
				if($i>$element_length)
					break;
				
				$char=substr($this_element, $i, 1);
			}
		} else {
			//If this is currently a script element, ignore anything
			//within quotes
			if($isStyle)
				$criteria='</style>';
			else
				$criteria='</script>';
			
			$i=$pos;
			$chars=strtolower(substr($this_element, $i, strlen($criteria)));

			while($chars!=$criteria){
				
				$element.=substr($this_element, $i, 1);
				$i++;
				
				if($i>$element_length)
					break;
				
				$chars=strtolower(substr($this_element, $i, strlen($criteria)));
			}	
		}
	}

	return $element;
}

function is_element($element){
	$element=trim($element);
	if(substr($element,0,1)=='<')
		return true;
	else
		return false;
}

function is_closing_element($element){
	//Note; this should only be called if you are sure this is an element, otherwise there
	//is a chance a false positive might be returned
	$element=trim($element);
	if(substr($element,1,1)=='/')
		return 1;
	else
		return 0;
}

function is_self_closing_element($element){
	$element=trim($element);
	if(substr($element,-2)=='/>')
		return true;
	else
		if(substr($element,-1)=='/')
			return true;
		else
			return false;
}

function end_tag_required($tagname){
	
	//Source: http://www.w3.org/TR/html4/index/elements.html
	$tagname=strtolower($tagname);
	
	static $tags_optional=null;
	static $tags_forbidden=null;
	
	if(is_null($tags_optional)){
		$tags_optional['body']='body';
		$tags_optional['colgroup']='colgroup';
		$tags_optional['dd']='dd';
		$tags_optional['dt']='dt';
		$tags_optional['head']='head';
		$tags_optional['html']='html';
		$tags_optional['li']='li';
		$tags_optional['option']='option';
		$tags_optional['p']='p';
		$tags_optional['tbody']='tbody';
		$tags_optional['td']='td';
		$tags_optional['tfoot']='tfoot';
		$tags_optional['thead']='thead';
		$tags_optional['tr']='tr';
	}
	
	if(is_null($tags_forbidden)){
		$tags_forbidden=Array();
		$tags_forbidden['!']='!';
		$tags_forbidden['!doctype']='!doctype';
		$tags_forbidden['![cdata[']='![cdata[';
		$tags_forbidden['area']='area';
		$tags_forbidden['base']='base';
		$tags_forbidden['basefont']='basefont';
		$tags_forbidden['br']='br';
		$tags_forbidden['col']='col';
		$tags_forbidden['frame']='frame';
		$tags_forbidden['hr']='hr';
		$tags_forbidden['img']='img';
		$tags_forbidden['input']='input';
		$tags_forbidden['isindex']='isindex';
		$tags_forbidden['link']='link';
		$tags_forbidden['keygen']='keygen';
		$tags_forbidden['meta']='meta';
		$tags_forbidden['param']='param';
		$tags_forbidden['spacer']='spacer';
	}
	
	if(isset($tags_optional[$tagname]))
		return ELEMENT_END_TAG_OPTIONAL;
	
	if(isset($tags_forbidden[$tagname]))	
		return ELEMENT_END_TAG_FORBIDDEN;
		
	return ELEMENT_END_TAG_REQUIRED;
}

function tag_name($element){
	
	$tagname='';
	for($i=1; $i<strlen($element); $i++){
		$char=substr($element,$i,1);
		$ord=ord($char);
		if($char=='/'){
			$i++;
			$char=substr($element,$i,1);
		}
		
		if($char==' ' || $ord==9 || $ord==10 || $ord==13)
			break;
		
		if($char=='>')
			break;
		
		if($char=='-')
			break;
		
		if($char=='[' && strtolower(substr($element, $i, 7))=='[cdata['){
			$tagname.='[cdata[';
			break;
		}
			
		$tagname.=$char;
	}
	return strtolower(trim($tagname));
}

function get_attributes($element) {
	//Get all the attributes of an element sans tag name and return an array
	
	//NOTE:		This will ignore any attribute starting with '/'
	//			This will mangle a DOCTYPE declaration
	
	$results=Array();
	//First, get the first space after the tagname:
	$i=0;
	//if this is a comment then ignore the attributes
	$tagname=tag_name($element);
	$element_length=strlen($element);
	//we do not want to check comments - they may contain scripts or really mangled HTML
	if($tagname!='!' && $tagname!='![cdata['){
		//move to the end of the tagname:
		$i=1+strlen($tagname);
		//check to see if this is not an end bracket:
		$char=substr($element,$i,1);
		if($char!='>'){
			//set up all the initial values:
			$attribute='';
			$value='';
			$attribute_start=$i;
			$attribute_end=$i;
			$value_start=$i;
			$value_end=$i;
			$in_attribute=false;
			
			//go through and get the attributes:
			for($j=$i; $j<$element_length; $j++){
				$char=substr($element, $j, 1);
				$ord=ord($char);
				
				if($in_attribute && ($char==' ' || $ord==9 || $ord==10 || $ord==13 || $char=='>')){
					//whoa!  It appears that it's ok for spaces to appear here before the '=' character,
					//so we'll go through until we find either an equals character or something else
					$k=$j;
					$char2='';
					//doctype elements get caught out here, include any other special cases in this check
					if($tagname!='!doctype'){
						do {
							$k++;
							if($k>$element_length)
								break;
							$char2=substr($element, $k, 1);
							$ord2=ord($char2);
							
						} while ($char2==' ' || $ord2==9 || $ord2==10 || $ord2==13);
					}
					$j=$k;
					($char2=='=') ? $standalone=false : $standalone=true;
										
					if($standalone){
						$attribute_end=$j;
						$in_attribute=false;
						
						$attribute=substr($element, $attribute_start, $attribute_end-$attribute_start);
						$attribute_lower=strtolower($attribute);
						
						$results[$attribute_lower]['name']=$attribute;
						$results[$attribute_lower]['attribute_start']=$attribute_start;
						$results[$attribute_lower]['attribute_end']=$attribute_end;
						$results[$attribute_lower]['value']=true;
						$results[$attribute_lower]['specified']=true;
						$results[$attribute_lower]['value_start']=-1;
						$results[$attribute_lower]['value_end']=-1;
					} else
						$char=$char2;
				}
				
				if(!$in_attribute && $char!=' ' && $ord!=9 && $ord!=10 && $ord!=13 && $char!='>' && $char!='/'){
					//we have the beginning of a new attribute
					$attribute='';
					$attribute_start=$j;
					$in_attribute=true;
				}
				
				if($char=='=' && $in_attribute){
					//this is the begining of the value, and is not a standalone attribute
					$attribute_end=$j;
					$in_attribute=false;
					
					$attribute=trim(substr($element, $attribute_start, $attribute_end-$attribute_start));
					$attribute_lower=strtolower($attribute);
					
					$results[$attribute_lower]['name']=$attribute;
					$results[$attribute_lower]['attribute_start']=$attribute_start;
					$results[$attribute_lower]['attribute_end']=$attribute_end;
					
					//okay, grab the value...
					//The first step is to find out what the quote mark is (if any).
					$quote=substr($element,$j+1,1);
					$has_quote=true;
					if($quote!="'" && $quote!="\""){
						$quote=' ';	
						$j++;
						$has_quote=false;
					} else
						$j=$j+2;
					
					//ok so we have skipped to the start of the value, let's get it
					$value='';
					$value_start=$j;
					$ignore_next=false;
					
					for($k=$j;$k<strlen($element);$k++){
						$char=substr($element, $k, 1);
						$ord=ord($char);
						
						//if the previous charact was an escape slash and this character is not a quote, then
						//remove the ignore flag.  This check MUST go before the next...
						if($ignore_next && $char!=$quote)
							$ignore_next=false;
							
						//We need to detect backslashes because they can be used to escape quote marks
						//note the double slash for escape purposes :)
						if($char=='\\')
							$ignore_next=true;
												
						//NOTE: the value_end value might need to be -1 to compensate for the offset?
						if($char=='>' && $has_quote==false){
							$value_end=$k;
							break;
						}
						
						if($char==$quote && !$ignore_next){
							$value_end=$k;
							break;
						}
						
						if($k>$element_length)
							break;
							
						$value.=$char;
						
					}
					
					$j=$k;
									
					$results[$attribute_lower]['value']=$value;
					$results[$attribute_lower]['specified']=true;
					$results[$attribute_lower]['value_start']=$value_start;
					$results[$attribute_lower]['value_end']=$value_end;
					
				}
			}
		}
	}
	
	return $results;
}


function get_parent_elements(&$elements){
	//start off with the first element.
	//Any opening element off this inherits this element as a parent element
	
	$length=sizeof($elements);
	
	//set up the default parent information
	$current=0;
	$parents=Array();
	$parents[0]['elementIndex']=-1;
	for($i=0; $i<$length; $i++){
		$tagname=$elements[$i]['tagname'];
		if($elements[$i]['is_self_closing']==true || end_tag_required($tagname)==ELEMENT_END_TAG_FORBIDDEN){
			//This is a self-closing element or is not allowed an end tag
			$elements[$i]['openingIndex']=$elements[$i]['elementIndex'];
			$elements[$i]['closingIndex']=$elements[$i]['elementIndex'];
			$elements[$i]['parentElement']=$parents[$current]['elementIndex'];
		} else {
			//move on to elements that open or close:
			if($elements[$i]['is_closing']==false){
				//This is an opening element
				$elements[$i]['parentElement']=$parents[$current]['elementIndex'];
				$elements[$i]['openingIndex']=$elements[$i]['elementIndex'];
				$current++;
				
				$parents[$current]['tagname']=$tagname;
				$parents[$current]['elementIndex']=$elements[$i]['elementIndex'];
				$parents[$current]['openingIndex']=$elements[$i]['elementIndex'];
				$parents[$current]['arrayPos']=$i;
											
				//If this is a block element, then we automatically close any block elements that are still open.
				//When (if) we find one, we can stop looking further up the tree, since the block element we found
				//would have closed any previous block tags.
				
				//Divs don't get automatically closed - we shall treat them as being inline element.
				//Tables close block elements except for divs and other tables
				//We are going to treat forms as being inline elements...
				$tagtype=tagType($tagname, true);
				if($tagname=='div' || $tagname=='form' || $tagname=='table'){
					//echo 'found a ' . $tagname . ' tag at element index ' . $i . '<br>';
					//we need to go back up the tree and find any opening <p> tags.  If there are any, then we close them
					$elementIndex=$i;
					do {
						$parent_tagname=$elements[$elementIndex]['tagname'];
						//echo 'the parent tagname is ' . $parent_tagname . '<br>';
						if($parent_tagname=='p'){
							if(!isset($elements[$elementIndex]['closingIndex']) && @$elements[$elementIndex]['orphan']==false){
								//echo 'marking element ' . $elementIndex . ' as having the closing index of ' . $i . ' (child block element issue)<br>';
								$elements[$elementIndex]['closingIndex']=$i;
							}
							break;
						}
						$elementIndex--;
					} while ($elementIndex>-1);
					$tagtype=PARSE_INLINE_ELEMENT;
				}
				
				if($tagtype==PARSE_BLOCK_ELEMENT){
					$j=$i-1;
					//go backwards to find the most recent opening block element...
					//echo 'we have the tag ' . $tagname . ' (treated as a block element)<br>';
					
					do {
						//echo 'we are checking ' . $elements[$j]['tagname'] . '<br>';
						$tagtype=tagType($elements[$j]['tagname'], true);

						if($tagname=='div' || $tagname=='form' || $elements[$j]['tagname']=='noscript')
							$tagtype=PARSE_INLINE_ELEMENT;
							
						//echo 'the tagtype is ' . $tagtype . ' (blockelement=' . PARSE_BLOCK_ELEMENT . ')<br>';
						if(!$elements[$j]['is_self_closing'] && end_tag_required($elements[$j]['tagname'])!=ELEMENT_END_TAG_FORBIDDEN && $tagtype==PARSE_BLOCK_ELEMENT){
							//ok so this is a block element, is it an opening tag?
							if($elements[$j]['is_closing']==false){
								//ok, we have found an unclosed block element.
								//We shall mark it's closing element as being the current block element, only if it's not already marked
								//as being closed
								if(@$elements[$j]['orphan']==false){
									if(!isset($elements[$j]['closingIndex'])){
										//echo 'marking element ' . $j . ' as having the closing index of ' . $i . ' (opening tag)<br>';
										$elements[$j]['closingIndex']=$i;
									}
									break;
								}
							} else {
								//This is an closing block element, we can stop looking...
								if(@$elements[$j]['orphan']==false)
									break;
							}
						}
						$j--;
					} while ($j>0);
				}
				
			} else {
				//This is a closing element
				//If this closing tag matches the current parent, then all is well.
				//If we get to the absolute top of the parent tree, the parent index will be -1 and there is no tagname
				if(@$parents[$current]['tagname']==$tagname){
					//Sometimes this closing element's opening conterpart might already have been closed due to
					//a nesting error, so this element is technically an orphan
					if(!isset($elements[$parents[$current]['openingIndex']]['closingIndex'])){
						//This sets this element's opening tag's closing index as being this element:
						$elements[$parents[$current]['elementIndex']]['closingIndex']=$elements[$i]['elementIndex'];
						$elements[$i]['parentElement']=$elements[$parents[$current]['arrayPos']]['parentElement'];
						//Update the closing tag with the opening (and closing) details:
						$elements[$i]['closingIndex']=$elements[$i]['elementIndex'];
						//echo 'marking element ' . $i . ' as having the closing index of ' . $elements[$i]['elementIndex'] . ' (closing tag)<br>';
						$elements[$i]['openingIndex']=$elements[$parents[$current]['arrayPos']]['elementIndex'];
					} else {
						$elements[$i]['closingIndex']=-1;
						$elements[$i]['openingIndex']=-1;
						$elements[$i]['orphan']=true;	
					}
					
					$current--;
					
				} else {
					//We have a problem... this could be an extra element,
					//or it could be an improperly nested element.
					
					//It doesn't matter what the reasoning behind the incorrect tag is...
					//Take this tag and move down the list.
					//If the same opening element is found, then mark all the elements to this point
					//as being closed
					//If another closing element of the same type is found then cancel the search and
					//mark this element as being invalid
					//If the end is reached and nothing is found, then mark it as being invalid and
					//then move on to the next element
					$is_orphan=true;
					for($j=$current;$j>=0; $j--){
						if($parents[$j]['elementIndex']>-1 && $parents[$j]['tagname']==$tagname){
							//we have a match... go from here to the end and mark them all as
							//being closed.
							//Then set $current as being $j
							//$closing_index=$i;
							$bad_nesting=true;
							for($k=$j;$k<=$current;$k++){
								$elements[$parents[$k]['arrayPos']]['openingIndex']=$parents[$k]['openingIndex'];
								if(!isset($elements[$parents[$k]['elementIndex']]['closingIndex'])){
									$elements[$parents[$k]['elementIndex']]['closingIndex']=$i;
									$bad_nesting=false;
								} else {
									//ok, so if the closing index is already set, then it might be a block element
									//which wasn't closed properly.  Therefore, we shall not overwrite this value
									//and we will use this closing index from here on...
									//$closing_index=$elements[$parents[$k]['arrayPos']]['closingIndex'];
									break;
								}
							}
							if(!$bad_nesting)
								$is_orphan=false;
							$current=$j;
							break;
						}
					}
					if($is_orphan){ 
						//If this is true, then there is no opening version for this tag, it's a complete orphan
						$elements[$i]['closingIndex']=-1;
						$elements[$i]['openingIndex']=-1;
						$elements[$i]['orphan']=true;
					} else
						$current--;	
				}
				
			}
		}
	}
}

function get_filename($file){
	$url=get_base_url($file);
	$file=str_replace($url,'',$file);
	$path_parts = pathinfo($file);

	//DO NOT convert to lower case.
	//This is used to construct the href values.
	return $path_parts['basename'];	

}

/*This returns the lower cased extension of an URL.
This assumes that a valid url has been provided
If an url with no file is provided, but closes with a slash, '' will be returned*/
function get_extension($file){
	
	if(substr($file,-1)=='/')
		return '';

	$file_parts=parse_url($file);
	
	$path=@$file_parts['path'];
	if($path!=''){
		$path_parts = pathinfo($path);
		
		if(isset($path_parts['extension']))
			return strtolower($path_parts['extension']);
		else
			return '';
	} else return '';
	
}

function get_base_url($file, $include_subdirs=false){
	//Return the basic url, ie, http://www.cnn.com
	//If this is not a valid url, then return a blank string
	$url_parts=parse_url($file);
	if(!$include_subdirs){
		if(isset($url_parts['scheme']) && isset($url_parts['host']))
			return strtolower($url_parts['scheme'] . "://" . $url_parts['host']);
		else
			return '';
		
	} else {
		if(isset($url_parts['path'])){
			$path_parts=pathinfo($url_parts['path']);
			
			if(isset($path_parts['dirname'])){
				if(isset($url_parts['scheme']) && isset($url_parts['host']))
					return strtolower($url_parts['scheme'] . "://" . $url_parts['host']) . $path_parts['dirname'];
				else
					return '';
				
				//return $path_parts['dirname'];
			} else {
				if(isset($url_parts['scheme']) && isset($url_parts['host']))
					return strtolower($url_parts['scheme'] . "://" . $url_parts['host']);
				else
					return '';
				
			}
		} else {
			if(isset($url_parts['scheme']) && isset($url_parts['host']))
				return strtolower($url_parts['scheme'] . "://" . $url_parts['host']);
			else
				return '';
			
		}
	}
}

function get_protocol($file){
	$url_parts=parse_url($file);
	return strtolower(@$url_parts['scheme']);
}

function strip_dots($link){
	$temp=explode('/', $link);
	
	$pos=0;
	$dirs=Array();
	foreach($temp as $this_directory){
		
		if($this_directory!=''){
			if($this_directory!='..' && $this_directory!='.'){
				$dirs[$pos]=$this_directory;
				$pos++;
			}
			if($this_directory=='..'){
				$dirs[$pos]='';
				$pos--;
				$dirs[$pos]='';
				
				if($pos<0)
					$pos=0;
				
			}
		}
	}

	$link= implode('/', $dirs);
	$link=str_replace('//','/',$link);
	
	if(substr($link,-1,1)=='/')
		$link=substr($link,0,-1);
	
	if(substr($link,0,1)!='/')
		$link='/'.$link;
	
	if($link=='/')
		$link='';
		
	return $link;
}

function make_abs($rel_uri, $base, $REMOVE_LEADING_DOTS = true) {
		preg_match("'^([^:]+://[^/]+)/'", $base, $m);  
		$base_start = $m[1];  
		if (preg_match("'^/'", $rel_uri))
			return $base_start . $rel_uri;  
		
		$base = preg_replace("{[^/]+$}", '', $base);  
		$base .= $rel_uri;  
		$base = preg_replace("{^[^:]+://[^/]+}", '', $base);  
		$base_array = explode('/', $base);  
		if (count($base_array) and!strlen($base_array[0]))  
			array_shift($base_array);  
			$i = 1;  
			while ($i < count($base_array)) {  
				if ($base_array[$i - 1] == ".") {  
					array_splice($base_array, $i - 1, 1);  
					if ($i > 1) $i--;  
				} elseif ($base_array[$i] == ".." and $base_array[$i - 1]!= "..") {  
					array_splice($base_array, $i - 1, 2);  
					if ($i > 1) {  
						$i--;  
						if ($i == count($base_array)) array_push($base_array, "");  
					}  
				} else
					$i++;  
				
			}  
			if (count($base_array) and @$base_array[-1] == ".")  
			$base_array[-1] = "";  
			 /* How do we treat the case where there are still some leading ../ 
			   segments left? According to RFC2396 we are free to handle that 
			   any way we want. The default is to remove them. 
			# 
			   "If the resulting buffer string still begins with one or more 
			   complete path segments of "..", then the reference is considered 
			   to be in error. Implementations may handle this error by 
			   retaining these components in the resolved path (i.e., treating 
			   them as part of the final URI), by removing them from the 
			   resolved path (i.e., discarding relative levels above the root), 
			   or by avoiding traversal of the reference." 
			# 
			   http://www.faqs.org/rfcs/rfc2396.html  5.2.6.g 
			*/  
			if ($REMOVE_LEADING_DOTS) {  
				while (count($base_array) and preg_match("/^\.\.?$/", $base_array[0])) 
					array_shift($base_array);  
				
			}
	return($base_start . '/' . implode("/", $base_array));  
} 

function check_filename($link, $default_page, $return_filename=false, $site_domain=''){
	//This should be a correct URL
	//if there is no filename specified, then add one:
	/*scheme - e.g. http
	
	host
	
	port
	
	user
	
	pass
	
	path
	
	query - after the question mark ?
	
	fragment - after the hashmark 
	*/
	
	$url_parts=parse_url($link);
	
	if(isset($url_parts['path']))
		$path_parts=pathinfo($url_parts['path']);
	
	//if the link is warped (ie, is missing the 'http' bit), then the host will appear to be missing
	$domain=$url_parts['scheme'] . '://' . @$url_parts['host'];
	if(isset($url_parts['port']))
		$domain.=':' . $url_parts['port'];
		
	$filename='';
	if('/' . basename(@$url_parts['path']) . '/'!=@$url_parts['path'])
		$filename=basename(@$url_parts['path']);
		
	$dirname=@$url_parts['path'];
	//remove the fragment and queries (if any)
	if(isset($url_parts['fragment'])){
		$dirname=str_replace($url_parts['fragment'], '', $dirname);
		$dirname=str_replace('#', '', $dirname);
	}
	if(isset($url_parts['query'])){
		$dirname=str_replace($url_parts['query'], '', $dirname);
		$dirname=str_replace('?', '', $dirname);
	}
	
	if($filename==trim($dirname, '/'))
		$dirname='';
	
	$dirname=str_replace('/' . $filename, '', $dirname);
	$dirname=trim($dirname, '/');
	$filename=trim($filename, '/');
	
	if($filename==''){
		$original_filename='';
		if($site_domain==get_domain($domain))
			$filename=$default_page;
		
	} else
		$original_filename=$filename;
		
	$new_link=$domain;
	if($dirname!='')
		$new_link.='/'. $dirname;
	
	$new_link.='/' . $filename;
	if(isset($url_parts['query']))
		$new_link.='?' . $url_parts['query'];
	
	if(isset($url_parts['fragment']))
		$new_link.='#' . $url_parts['fragment'];
	
	$results['href']=$new_link;
	$results['filename']=$original_filename;
	
	return $results;
}

function getDoctype($docType, $lowercase=false){
	//http://www.blooberry.com/indexdot/html/tagpages/d/doctype.htm
	//<!DOCTYPE	HTML PUBLIC	"-//W3C// DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
 	//          (1)	 (2)	 (3)(4)	  (5) (6)					(7)	 (8)
	
	$bits[1]='Top Element';
	$bits[2]='Availability';
	$bits[3]='Registration';
	$bits[4]='Organisation';
	$bits[5]='Type';
	$bits[6]='Label';
	$bits[7]='Language';
	$bits[8]='URL';
		
	$results=Array();
	if(substr(strtolower($docType),0,9)=='<!doctype'){
		$docType=str_replace("\n", '', $docType);
		$docType=str_replace("\t", '', $docType);
		while(strpos($docType,'  ')!==false){
			$docType=str_replace('  ', ' ', $docType);
		}
		
		$bit='';
		$bitCount=1;
		//10 is the next character after the word '<!doctype
		for($i=10; $i<strlen($docType); $i++){
			$char=substr($docType, $i, 1);
			$char2=substr($docType, $i+1, 1);
			
			if($char=='/' && $char2=='/')
				$doubles=true;
			else
				$doubles=false;
			
			//This will ensure that quote marks do not become part of the results
			if($char=="\"")
				$char='';
								
			//we are looking at a new 'bit' - a text fragment has just been finished
			if($char==' ' || $doubles==true || $char=='>'){
					if($doubles==true){
						if($bitCount!=8){
							if(!$lowercase)
								$results[$bits[$bitCount]]=$bit;
							else
								$results[$bits[$bitCount]]=strtolower($bit);
							
							$bitCount++;
							$bit='';
							$i++;
						}
					} else {
						if($bitCount!=6 && $char=' '){
							if(!$lowercase)
								$results[$bits[$bitCount]]=$bit;
							else
								$results[$bits[$bitCount]]=strtolower($bit);
							
							$bitCount++;
							$bit='';
							if($doubles==true)
								$i++;
							
						} else {
							if($bitCount==6)
								$bit.=$char;
							
						}
					}
					
			} else {
					$bit.=$char;
				
			}
		}

		return $results;
	} else {
		//This is not a doctype tag!  Return an empty array
		return $results;
	}
}

function get_domain($link){
	//get the last two or three sections of the link depending on the style
	
	$link=get_base_url($link);
	
	$temp=explode('.',$link);

	$domain='';
	$protocol=get_protocol($link);
	if($protocol=='http' || $protocol=='https'){
		//for($i=0;$i<count($temp)-1;$i++){
		//	if($temp[$i]!=$protocol . '://' && $temp[$i]!=$protocol . '://www'){
		//		$domain.= $temp[$i] . '.';
		//	} 
		//} 
		//$domain=$domain . $temp[$i];
		for($i=0; $i<sizeof($temp)-1; $i++){
			$temp[$i]=str_replace($protocol . '://', '', $temp[$i]);
			if($temp[$i]!='www')
				$domain.=$temp[$i] . '.';
		}
		$domain.=$temp[$i];
	} 
	
	return $domain;
}

function fetch_file($url, $cache=false){

	if($cache){
		if(@$_SESSION['cached_files'][$url]!='')
			return unserialize($_SESSION['cached_files'][$url]);
		
	}

	$snoopy = new Snoopy;
	$headers=Array();
	
	if($snoopy->fetch($url)) {
		$headers['response_code']=$snoopy->response_code;
		if(isset($snoopy->headers) && sizeof($snoopy->headers)>0){
			//check for redirects!
			$bits=explode(' ', $snoopy->headers[0]);
			if($bits[1]=='301' || $bits[1]=='302' || $bits[1]=='307'){
				//righty, go and get the new location
				$location_bits=explode(' ', $snoopy->headers[3]);
				$headers=fetch_file(trim($location_bits[1]), $cache);
				if($cache)
					$_SESSION['cached_files'][$url]=serialize($headers);
				return $headers;			
			}
			
			while(list($key, $val) = each($snoopy->headers)){
				$temp=explode(':', $val);
				if(count($temp)>1){
					$headers[trim(strtolower($temp[0]))]=trim(strtolower($temp[1]));
				}
				$headers[trim($key)]=trim($val);
				
			}
			$headers['contents']=$snoopy->results;
		}
		if($cache)
			$_SESSION['cached_files'][$url]=serialize($headers);
		return $headers;
	} else {
		if($cache)
			$_SESSION['cached_files'][$url]=serialize($headers);
		return $headers;
	}
}

function is_response_good($headers, $content_type=''){
	if($headers!=''){
		
		if(sizeof($headers)==0)
			return false;
		
		$bits=explode(';', $headers['content-type']);
		$received_content_type=trim($bits[0]);
		
		if($content_type!='' && $received_content_type!=$content_type)
			return false;
				
   	   	if(isset($headers['response_code']) || @$headers['response_code']!=''){
           $temp=explode(' ', $headers['response_code']);
           $num=$temp[1];
           if($num>=200 && $num<=399)
               return true;
           else
               return false;
       	} else 
        	return false;
       	
   } else
       return false;
}

function get_post($value, $default=''){
	$result='';
	if(isset($_POST[$value]))
		$result=trim($_POST[$value]);	
	else
		$result=$default;
		
	return $result;
}

function get_get($value, $default=''){
	$result='';
	if(isset($_GET[$value]))
		$result=trim($_GET[$value]);	
	else
		$result=$default;
		
	return $result;
}

function get_request($value, $default=''){
	$result='';
	if(isset($_REQUEST[$value]))
		$result=trim($_REQUEST[$value]);	
	else
		$result=$default;
	
	return $result;
}

/*********************************
//Readability functions go here://
*********************************/

function average_words_sentence($text) {
  $sentences = strlen(preg_replace('/[^\.!?]/', '', $text));
  $words = strlen(preg_replace('/[^ ]/', '', $text));
  if($sentences==0){
  	$sentences=1;
  }
  return ($words/$sentences);
}

function average_syllables_word($text) {
  $words = explode(' ', $text);
  $syllables='';
  for ($i = 0; $i < count($words); $i++) {
    $syllables = $syllables + count_syllables($words[$i]);
  }
  return ($syllables/count($words));
}

function count_syllables($word) {

  $subsyl = Array(
    'cial'
    ,'tia'
    ,'cius'
    ,'cious'
    ,'giu'
    ,'ion'
    ,'iou'
    ,'sia$'
    ,'.ely$'
  );

  $addsyl = Array(
    'ia'
    ,'riet'
    ,'dien'
    ,'iu'
    ,'io'
    ,'ii'
    ,'[aeiouym]bl$'
    ,'[aeiou]{3}'
    ,'^mc'
    ,'ism$'
    ,'([^aeiouy])\1l$'
    ,'[^l]lien'
    ,'^coa[dglx].'
    ,'[^gq]ua[^auieo]'
    ,'dnt$'
  );

  // Based on Greg Fast's Perl module Lingua::EN::Syllables
  $word = preg_replace('/[^a-z]/is', '', strtolower($word));
  $word_parts = preg_split('/[^aeiouy]+/', $word);
  $valid_word_parts='';
  
  foreach ($word_parts as $key => $value) {
    if ($value <> '') {
      $valid_word_parts[] = $value;
    }
  }

  $syllables = 0;
  foreach ($subsyl as $syl) {
    if (strpos($word, $syl) !== false) {
      $syllables--;
    }
  }
  foreach ($addsyl as $syl) {
    if (strpos($word, $syl) !== false) {
      $syllables++;
    }
  }
  if (strlen($word) == 1) {
    $syllables++;
  }
  $syllables += count($valid_word_parts);
  $syllables = ($syllables == 0) ? 1 : $syllables;
  return $syllables;
}

function calculate_flesch($text) {
	
	//The text must be cleaned (clean_text()) first, to remove entities and so forth
  	return round((206.835 - (1.015 * average_words_sentence($text)) - (84.6 * average_syllables_word($text))));
}

/*********************************
//Readability functions end here//
*********************************/

function clean_text($text){
	$entities=get_html_translation_table(HTML_ENTITIES);
	$entities['&nbsp']='&nbsp';
	
	$text=str_replace($entities, ' ', strtolower($text));
	
	//go through and remove any encoded values:
	$length=strlen($text)-2;
	$ignore=false;
	$new_text='';
	for($i=0; $i<$length; $i++){
		$char=substr($text, $i, 1);
		
		if($char=='&' && substr($text, $i+1, 1)=='#')
			$ignore=true;
		
		if($ignore==false)
			$new_text.=$char;
			
		if($ignore && $char==';')
			$ignore=false;
		
	}
	
	return $new_text;
}

function strip_spaces($text, $thorough=false){
	
	if($thorough){
		$spaces=Array('&nbsp;', '&nbsp;', "\n", "\t", "\r", ' ');
		$replaced='';
	} else {
		$spaces=Array('&nbsp;', '&nbsp;');
		$replaced=' ';
	}
	
	$text=str_replace($spaces, $replaced, $text);
	
	return $text;
}

function get_attribute_equivilent($property){
	//take the css property and return a list of html equivilents.
	//This should not be a shortcut value
	
	$html=Array();
	$count=0;
	switch (TRUE){
		case $property=='font':
			$html[$count]['value']='size';
			$html[$count]['type']='attribute';
			$html[$count]['tags'][]='*';
			$count++;
			$html[$count]['value']='face';
			$html[$count]['type']='attribute';
			$html[$count]['tags'][]='*';
			$count++;
			break;
		case $property=='font-size':
			$html[$count]['value']='size';
			$html[$count]['type']='attribute';
			$html[$count]['tags'][]='font';
			break;
		case $property=='font-style':
			$html[$count]['value']='i';
			$html[$count]['type']='tagname';
			$html[$count]['tags'][]='i';
			$count++;
			$html[$count]['value']='em';
			$html[$count]['type']='tagname';
			$html[$count]['tags'][]='em';
			break;
		case $property=='font-variant':
			$html[$count]['value']='size';
			$html[$count]['type']='attribute';
			$html[$count]['tags'][]='*';
			break;
		case $property=='font-weight':
			$html[$count]['value']='b';
			$html[$count]['type']='tagname';
			$html[$count]['tags'][]='b';
			$count++;	
			$html[$count]['value']='strong';
			$html[$count]['type']='tagname';
			$html[$count]['tags'][]='strong';
		case $property=='font-family':
			$html[$count]['value']='face';
			$html[$count]['type']='attribute';
			$html[$count]['tags'][]='*';
		case $property=='white-space':
			$html[$count]['value']='pre';
			$html[$count]['type']='tagname';
			$html[$count]['tags'][]='pre';
			$count++;
			$html[$count]['value']='nobr';
			$html[$count]['type']='tagname';
			$html[$count]['tags'][]='nobr';
			break;
		case $property=='text-align':
			$html[$count]['value']='center';
			$html[$count]['type']='tagname';
			$html[$count]['tags'][]='center';
			$count++;
			$html[$count]['value']='align';
			$html[$count]['type']='attribute';
			$html[$count]['tags'][]='p';
			$html[$count]['tags'][]='h1';
			$html[$count]['tags'][]='h2';
			$html[$count]['tags'][]='h3';
			$html[$count]['tags'][]='h4';
			$html[$count]['tags'][]='h5';
			$html[$count]['tags'][]='h6';
			$html[$count]['tags'][]='div';
			$html[$count]['tags'][]='table';
			$html[$count]['tags'][]='thead';
			$html[$count]['tags'][]='tbody';
			$html[$count]['tags'][]='tfoot';
			$html[$count]['tags'][]='tr';
			$html[$count]['tags'][]='th';
			$html[$count]['tags'][]='td';
			break;
		case $property=='text-decoration':
			$html[$count]['value']='u';
			$html[$count]['type']='tagname';
			$html[$count]['tags'][]='u';
			$count++;
			$html[$count]['value']='s';
			$html[$count]['type']='tagname';
			$html[$count]['tags'][]='s';
			$count++;
			$html[$count]['value']='strike';
			$html[$count]['type']='tagname';
			$html[$count]['tags'][]='strike';
			$count++;
			$html[$count]['value']='blink';
			$html[$count]['type']='tagname';
			$html[$count]['tags'][]='blink';
			break;
		case $property=='color':
			$html[$count]['value']='text';
			$html[$count]['type']='attribute';
			$html[$count]['tags'][]='body';
			$count++;
			$html[$count]['value']='color';
			$html[$count]['type']='attribute';
			$html[$count]['tags'][]='font';
			break;
		case $property=='layer-background-color':
			$html[$count]['value']='bgcolor';
			$html[$count]['type']='attribute';
			$html[$count]['tags'][]='layer';
			break;
		case $property=='layer-background-image':
			$html[$count]['value']='background';
			$html[$count]['type']='attribute';
			$html[$count]['tags'][]='layer';
			break;
		case $property=='background':
			$html[$count]['value']='bgcolor';
			$html[$count]['type']='attribute';
			$html[$count]['tags'][]='body';
			$html[$count]['tags'][]='table';
			$html[$count]['tags'][]='thead';
			$html[$count]['tags'][]='tbody';
			$html[$count]['tags'][]='tfoot';
			$html[$count]['tags'][]='tr';
			$html[$count]['tags'][]='th';
			$html[$count]['tags'][]='td';
			$count++;
			$html[$count]['value']='background';
			$html[$count]['type']='attribute';
			$html[$count]['tags'][]='body';
			$html[$count]['tags'][]='table';
			$html[$count]['tags'][]='thead';
			$html[$count]['tags'][]='tbody';
			$html[$count]['tags'][]='tfoot';
			$html[$count]['tags'][]='tr';
			$html[$count]['tags'][]='th';
			$html[$count]['tags'][]='td';
			$count++;
			$html[$count]['value']='bgproperties';
			$html[$count]['type']='attribute';
			$html[$count]['tags'][]='body';
			break;
		case $property=='background-color':
			$html[$count]['value']='bgcolor';
			$html[$count]['type']='attribute';
			$html[$count]['tags'][]='body';
			$html[$count]['tags'][]='table';
			$html[$count]['tags'][]='thead';
			$html[$count]['tags'][]='tbody';
			$html[$count]['tags'][]='tfoot';;
			$html[$count]['tags'][]='th';
			$html[$count]['tags'][]='td';
			break;
		case $property=='background-image':
			$html[$count]['value']='background';
			$html[$count]['type']='attribute';
			$html[$count]['tags'][]='body';
			break;
		case $property=='background-attachment':
			$html[$count]['value']='background';
			$html[$count]['type']='attribute';
			$html[$count]['tags'][]='body';
			$count++;
			$html[$count]['value']='bgproperties';
			$html[$count]['type']='attribute';
			$html[$count]['tags'][]='body';
			break;
		case $property=='list-style':
			$html[$count]['value']='type';
			$html[$count]['type']='attribute';
			$html[$count]['tags'][]='ul';
			$html[$count]['tags'][]='ol';
			$html[$count]['tags'][]='li';
			break;
		case $property=='caption-side':
			$html[$count]['value']='align';
			$html[$count]['type']='attribute';
			$html[$count]['tags'][]='caption';
			break;
		case $property=='list-style-type':
			$html[$count]['value']='type';
			$html[$count]['type']='attribute';
			$html[$count]['tags'][]='ul';
			$html[$count]['tags'][]='ol';
			$html[$count]['tags'][]='li';
			break;
		case $property=='visibility':
			$html[$count]['value']='visibility';
			$html[$count]['type']='attribute';
			$html[$count]['tags'][]='layer';
			break;
		case $property=='float':
			$html[$count]['value']='align';
			$html[$count]['type']='attribute';
			$html[$count]['tags'][]='applet';
			$html[$count]['tags'][]='iframe';
			$html[$count]['tags'][]='img';
			$html[$count]['tags'][]='object';
			$html[$count]['tags'][]='spacer';
			$html[$count]['tags'][]='table';
			break;		
		case $property=='clear':
			$html[$count]['value']='clear';
			$html[$count]['type']='attribute';
			$html[$count]['tags'][]='*';
			break;	
		case $property=='width':
			$html[$count]['value']='width';
			$html[$count]['type']='attribute';
			$html[$count]['tags'][]='*'; //Is this correct?
			break;
		case $property=='height':
			$html[$count]['value']='height';
			$html[$count]['type']='attribute';
			$html[$count]['tags'][]='*'; //Is this correct?
			break;
		case $property=='top':
			$html[$count]['value']='top';
			$html[$count]['type']='attribute';
			$html[$count]['tags'][]='layer';
			break;
		case $property=='left':
			$html[$count]['value']='left';
			$html[$count]['type']='attribute';
			$html[$count]['tags'][]='layer';
			break;
		case $property=='vertical-align':
			$html[$count]['value']='align';
			$html[$count]['type']='attribute';
			$html[$count]['tags'][]='applet';
			$html[$count]['tags'][]='iframe';
			$html[$count]['tags'][]='img';
			$html[$count]['tags'][]='input';
			$html[$count]['tags'][]='object';
			$html[$count]['tags'][]='spacer';
			$count++;
			$html[$count]['value']='valign';
			$html[$count]['type']='attribute';
			$html[$count]['tags'][]='caption';
			$html[$count]['tags'][]='col';
			$html[$count]['tags'][]='colgroup';
			$html[$count]['tags'][]='tbody';
			$html[$count]['tags'][]='tfoot';
			$html[$count]['tags'][]='thead';
			$html[$count]['tags'][]='th';
			$html[$count]['tags'][]='td';
			$html[$count]['tags'][]='tr';
			break;
		case $property=='clip':
			$html[$count]['value']='clip';
			$html[$count]['type']='attribute';
			$html[$count]['tags'][]='layer';
			break;
		case $property=='zindex':
			$html[$count]['value']='clip';
			$html[$count]['type']='attribute';
			$html[$count]['tags'][]='layer';
			$html[$count]['tags'][]='ilayer';
			break;
		case ($property=='border' || $property=='border-left' || $property=='border-top' || $property=='border-right' || $property=='border-bottom'):
			$html[$count]['value']='border';
			$html[$count]['type']='attribute';
			$html[$count]['tags'][]='img';
			$html[$count]['tags'][]='table';
			break;
	}
	
	return $html;
}

function is_doctype_valid($bits){
	//take the array of doctype bits and check it against a known list
	//NOTE:  Some of these will not be correctly identified (ie, SYSTEM, and some others that dont fully conform to the usual layout
	
	/*
	<!DOCTYPE html PUBLIC "-//IETF//DTD HTML 2.0//EN">
	<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
	<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
	<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
	<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Frameset//EN" "http://www.w3.org/TR/html4/frameset.dtd">
	<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
	<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
	<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Frameset//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd">
	<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
	<!DOCTYPE math SYSTEM "http://www.w3.org/Math/DTD/mathml1/mathml.dtd">
	<!DOCTYPE math PUBLIC "-//W3C//DTD MathML 2.0//EN" "http://www.w3.org/TR/MathML2/dtd/mathml2.dtd">
	<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN" "http://www.w3.org/2002/04/xhtml-math-svg/xhtml-math-svg.dtd">
	<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.0//EN" "http://www.w3.org/TR/2001/REC-SVG-20010904/DTD/svg10.dtd">
	<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
	<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1 Basic//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11-basic.dtd">
	<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1 Tiny//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11-tiny.dtd">
	<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN" "http://www.w3.org/2002/04/xhtml-math-svg/xhtml-math-svg.dtd">
	<!DOCTYPE svg:svg PUBLIC "-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN" "http://www.w3.org/2002/04/xhtml-math-svg/xhtml-math-svg.dtd">
	*/
	
	/*
	$bits[1]='Top Element';
	$bits[2]='Availability';
	$bits[3]='Registration';
	$bits[4]='Organisation';
	$bits[5]='Type';
	$bits[6]='Label';
	$bits[7]='Language';
	$bits[8]='URL';
	*/
	//check part one:
	
	$valid=false;
	/*
	if($bits[1]!='html' && $bits[1]!='math' && $bits[1]!='svg' && $bits[1]!='svg:svg'){
			$valid=false;
	}
	if($bits[2]!='public' && $bits[2]!='system'){
		$valid=false;
	}
	if($bits[3]!='+' && $bits[3]!='-'){
		$valid=false;
	}
	if($bits[4]!='ietf' && $bits[4]!='w3c'){
		$valid=false;
	}
	if($bits[5]!='dtd'){
		$valid=false;
	}
	if($bits[6]!='HTML 2.0' && $bits[6]!='HTML 3.2 Final' && )
	
	*/
	$identifier=strtolower($bits['Registration'] . '//' . $bits['Organisation'] . '//' . $bits['Type'] . ' ' . $bits['Label'] . '//' . $bits['Language']);
	
	switch(TRUE){
		case $identifier==strtolower('-//W3C//DTD HTML 4.01//EN'):
			$valid=true;
			break;
		case $identifier==strtolower('-//W3C//DTD HTML 4.0//EN'):
			$valid=true;
			break;
		case $identifier==strtolower('-//W3C//DTD XHTML 1.0 Strict//EN'):
			$valid=true;
			break;
		case $identifier==strtolower('ISO/IEC 15445:2000//DTD HyperText Markup Language//EN'):
			$valid=true;
			break;
		case $identifier==strtolower('ISO/IEC 15445:2000//DTD HTML//EN'):
			$valid=true;
			break;
		case $identifier==strtolower('-//IETF//DTD HTML i18n//EN'):
			$valid=true;
			break;
		case $identifier==strtolower('-//W3C//DTD XHTML 1.0 Transitional//EN'):
			$valid=true;
			break;
		case $identifier==strtolower('-//W3C//DTD XHTML 1.0 Frameset//EN'):
			$valid=true;
			break;
		case $identifier==strtolower('-//W3C//DTD HTML 4.01 Transitional//EN'):
			$valid=true;
			break;
		case $identifier==strtolower('-//W3C//DTD HTML 4.01 Frameset//EN'):
			$valid=true;
			break;
		case $identifier==strtolower('-//W3C//DTD HTML 4.01 Transitional//EN'):
			$valid=true;
			break;
		case $identifier==strtolower('-//W3C//DTD HTML 4.01 Frameset//EN'):
			$valid=true;
			break;
		case $identifier==strtolower('-//W3C//DTD HTML 4.0 Transitional//EN'):
			$valid=true;
			break;
		case $identifier==strtolower('-//W3C//DTD HTML 4.0 Frameset//EN'):
			$valid=true;
			break;
		case $identifier==strtolower('-//SoftQuad Software//DTD HoTMetaL PRO 6.0::19990601::extensions to HTML 4.0//EN'):
			$valid=true;
			break;
		case $identifier==strtolower('-//SoftQuad//DTD HoTMetaL PRO 4.0::19971010::extensions to HTML 4.0//EN'):
			$valid=true;
			break;
		case $identifier==strtolower('-//IETF//DTD HTML//EN//3.0'):
			$valid=true;
			break;
		case $identifier==strtolower('-//W3O//DTD W3 HTML 3.0//EN//'):
			$valid=true;
			break;
		case $identifier==strtolower('-//W3O//DTD W3 HTML 3.0//EN'):
			$valid=true;
			break;
		case $identifier==strtolower('-//W3C//DTD HTML 3 1995-03-24//EN'):
			$valid=true;
			break;
		case $identifier==strtolower('-//IETF//DTD HTML 3.0//EN'):
			$valid=true;
			break;
		case $identifier==strtolower('-//IETF//DTD HTML 3.0//EN//'):
			$valid=true;
			break;
		case $identifier==strtolower('-//IETF//DTD HTML 3//EN'):
			$valid=true;
			break;
		case $identifier==strtolower('-//IETF//DTD HTML Level 3//EN'):
			$valid=true;
			break;
		case $identifier==strtolower('-//IETF//DTD HTML Level 3//EN//3.0'):
			$valid=true;
			break;
		case $identifier==strtolower('-//IETF//DTD HTML 3.2//EN'):
			$valid=true;
			break;
		case $identifier==strtolower('-//AS//DTD HTML 3.0 asWedit + extensions//EN'):
			$valid=true;
			break;
		case $identifier==strtolower('-//AdvaSoft Ltd//DTD HTML 3.0 asWedit + extensions//EN'):
			$valid=true;
			break;
		case $identifier==strtolower('-//IETF//DTD HTML Strict//EN//3.0'):
			$valid=true;
			break;
		case $identifier==strtolower('-//W3O//DTD W3 HTML Strict 3.0//EN//'):
			$valid=true;
			break;
		case $identifier==strtolower('-//IETF//DTD HTML Strict Level 3//EN'):
			$valid=true;
			break;
		case $identifier==strtolower('-//IETF//DTD HTML Strict Level 3//EN//3.0'):
			$valid=true;
			break;
		case $identifier==strtolower('HTML'):
			$valid=true;
			break;
		case $identifier==strtolower('SYSTEM'):
			$valid=true;
			break;
		case $identifier==strtolower('-//IETF//DTD HTML//EN'):
			$valid=true;
			break;
		case $identifier==strtolower('-//IETF//DTD HTML//EN//2.0'):
			$valid=true;
			break;
		case $identifier==strtolower('-//IETF//DTD HTML 2.0//EN'):
			$valid=true;
			break;
		case $identifier==strtolower('-//IETF//DTD HTML Level 2//EN'):
			$valid=true;
			break;
		case $identifier==strtolower('-//IETF//DTD HTML Level 2//EN//2.0'):
			$valid=true;
			break;
		case $identifier==strtolower('-//IETF//DTD HTML 2.0 Level 2//EN'):
			$valid=true;
			break;
		case $identifier==strtolower('-//IETF//DTD HTML Level 1//EN'):
			$valid=true;
			break;
		case $identifier==strtolower('-//IETF//DTD HTML Level 1//EN//2.0'):
			$valid=true;
			break;
		case $identifier==strtolower('-//IETF//DTD HTML 2.0 Level 1//EN'):
			$valid=true;
			break;
		case $identifier==strtolower('-//IETF//DTD HTML Level 0//EN'):
			$valid=true;
			break;
		case $identifier==strtolower('-//IETF//DTD HTML Level 0//EN//2.0'):
			$valid=true;
			break;
		case $identifier==strtolower('-//IETF//DTD HTML Strict//EN'):
			$valid=true;
			break;
		case $identifier==strtolower('-//IETF//DTD HTML Strict//EN//2.0'):
			$valid=true;
			break;
		case $identifier==strtolower('-//IETF//DTD HTML Strict Level 2//EN'):
			$valid=true;
			break;
		case $identifier==strtolower('-//IETF//DTD HTML Strict Level 2//EN//2.0'):
			$valid=true;
			break;
		case $identifier==strtolower('-//IETF//DTD HTML 2.0 Strict//EN'):
			$valid=true;
			break;
		case $identifier==strtolower('-//IETF//DTD HTML 2.0 Strict Level 2//EN'):
			$valid=true;
			break;
		case $identifier==strtolower('-//IETF//DTD HTML Strict Level 1//EN'):
			$valid=true;
			break;
		case $identifier==strtolower('-//IETF//DTD HTML Strict Level 1//EN//2.0'):
			$valid=true;
			break;
		case $identifier==strtolower('-//IETF//DTD HTML 2.0 Strict Level 1//EN'):
			$valid=true;
			break;
		case $identifier==strtolower('-//IETF//DTD HTML Strict Level 0//EN'):
			$valid=true;
			break;
		case $identifier==strtolower('-//IETF//DTD HTML Strict Level 0//EN//2.0'):
			$valid=true;
			break;
		case $identifier==strtolower('-//WebTechs//DTD Mozilla HTML//EN'):
			$valid=true;
			break;
		case $identifier==strtolower('-//WebTechs//DTD Mozilla HTML 2.0//EN'):
			$valid=true;
			break;
		case $identifier==strtolower('-//Netscape Comm. Corp.//DTD HTML//EN'):
			$valid=true;
			break;
		case $identifier==strtolower('-//Netscape Comm. Corp.//DTD HTML//EN'):
			$valid=true;
			break;
		case $identifier==strtolower('-//Netscape Comm. Corp.//DTD Strict HTML//EN'):
			$valid=true;
			break;
		case $identifier==strtolower('-//Microsoft//DTD Internet Explorer 2.0 HTML//EN'):
			$valid=true;
			break;
		case $identifier==strtolower('-//Microsoft//DTD Internet Explorer 2.0 HTML Strict//EN'):
			$valid=true;
			break;
		case $identifier==strtolower('-//Microsoft//DTD Internet Explorer 2.0 Tables//EN'):
			$valid=true;
			break;
		case $identifier==strtolower('-//Microsoft//DTD Internet Explorer 3.0 HTML//EN'):
			$valid=true;
			break;
		case $identifier==strtolower('-//Microsoft//DTD Internet Explorer 3.0 HTML Strict//EN'):
			$valid=true;
			break;
		case $identifier==strtolower('-//Microsoft//DTD Internet Explorer 3.0 Tables//EN'):
			$valid=true;
			break;
		case $identifier==strtolower('-//Sun Microsystems Corp.//DTD HotJava HTML//EN'):
			$valid=true;
			break;
		case $identifier==strtolower('-//Sun Microsystems Corp.//DTD HotJava Strict HTML//EN'):
			$valid=true;
			break;
		case $identifier==strtolower('-//IETF//DTD HTML 2.1E//EN'):
			$valid=true;
			break;
		case $identifier==strtolower("-//O'Reilly and Associates//DTD HTML Extended 1.0//EN"):
			$valid=true;
			break;
		case $identifier==strtolower("-//O'Reilly and Associates//DTD HTML Extended Relaxed 1.0//EN"):
			$valid=true;
			break;
		case $identifier==strtolower("-//O'Reilly and Associates//DTD HTML 2.0//EN"):
			$valid=true;
			break;
		case $identifier==strtolower('-//SQ//DTD HTML 2.0 HoTMetaL + extensions//EN'):
			$valid=true;
			break;
		case $identifier==strtolower('-//Spyglass//DTD HTML 2.0 Extended//EN'):
			$valid=true;
			break;
		case $identifier==strtolower('+//Silmaril//DTD HTML Pro v0r11 19970101//EN'):
			$valid=true;
			break;
		case $identifier==strtolower('-//W3C//DTD HTML Experimental 19960712//EN'):
			$valid=true;
			break;
		case $identifier==strtolower('-//W3C//DTD HTML 3.2//EN'):
			$valid=true;
			break;
		case $identifier==strtolower('-//W3C//DTD HTML 3.2 Final//EN'):
			$valid=true;
			break;
		case $identifier==strtolower('-//W3C//DTD HTML 3.2 Draft//EN'):
			$valid=true;
			break;
		case $identifier==strtolower('-//W3C//DTD HTML Experimental 970421//EN'):
			$valid=true;
			break;
		case $identifier==strtolower('-//W3C//DTD HTML 3.2S Draft//EN'):
			$valid=true;
			break;
		case $identifier==strtolower('-//W3C//DTD W3 HTML//EN'):
			$valid=true;
			break;
		case $identifier==strtolower('-//Metrius//DTD Metrius Presentational//EN'):
			$valid=true;
			break;
		case $identifier==strtolower('-//W3C//DTD XHTML 1.1//EN'):
			$valid=true;
			break;
		case $identifier==strtolower('-//W3C//DTD MathML 2.0//EN'):
			$valid=true;
			break;
		case $identifier==strtolower('-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN'):
			$valid=true;
			break;
		case $identifier==strtolower('-//W3C//DTD SVG 1.0//EN'):
			$valid=true;
			break;
		case $identifier==strtolower('-//W3C//DTD SVG 1.1//EN'):
			$valid=true;
			break;
		case $identifier==strtolower('-//W3C//DTD SVG 1.1 Basic//EN'):
			$valid=true;
			break;
		case $identifier==strtolower('-//W3C//DTD SVG 1.1 Tiny//EN'):
			$valid=true;
			break;
		case $identifier==strtolower('-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN'):
			$valid=true;
			break;
		case $identifier==strtolower('-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN'):
			$valid=true;
			break;
	}

	return $valid;
	
}

function is_data_table($parse, $this_element){
	$is_data=false;
	$border_specified=$parse->specified($this_element, 'border');
	$border=$parse->getAttribute($this_element, 'border');
	if(!$border_specified){
		$is_data=true;
	} else {
		if(!is_numeric($border)){
			$is_data=true;
		} else {
			if($border==0){
				$is_data=false;
			}
			if($border>=1){
				$is_data=true;
			}
		}
	}

	if(!$is_data){
		//do a CSS check on the borders.
		//style can be hidden or none or empty which means an invisible border
		//widths can be zero or empty
		//borders can be applied on all four sides, so we need to check them each.
		//If ANY of them have a border style applied, then it is a data table
		//TODO/NOTE: maybe we should check for borders that are the same colour as the background
		
		$borders[0]['style']=$parse->getStyleValue($this_element, 'border-top-style');
		$borders[0]['width']=$parse->getStyleValue($this_element, 'border-top-width');
		$borders[0]['colour']=$parse->getStyleValue($this_element, 'border-top-color');
		
		$borders[1]['style']=$parse->getStyleValue($this_element, 'border-right-style');
		$borders[1]['width']=$parse->getStyleValue($this_element, 'border-right-width');
		$borders[1]['colour']=$parse->getStyleValue($this_element, 'border-right-color');
		
		$borders[2]['style']=$parse->getStyleValue($this_element, 'border-bottom-style');
		$borders[2]['width']=$parse->getStyleValue($this_element, 'border-bottom-width');
		$borders[2]['colour']=$parse->getStyleValue($this_element, 'border-bottom-color');
		
		$borders[3]['style']=$parse->getStyleValue($this_element, 'border-left-style');
		$borders[3]['width']=$parse->getStyleValue($this_element, 'border-left-width');
		$borders[3]['colour']=$parse->getStyleValue($this_element, 'border-left-color');
		
		//It MUST have a type for a border to be visible
		//if style = none or hidden (or maybe inherit?), the border is invisible
		//if width = 0(px/em/etc) then it is invisible
		//if color = transparent or the same as the background color, then the border is invisible
		
 		for($i=0; $i<4; $i++){
			$style=$borders[$i]['style']['value'];
			$width=$borders[$i]['width']['value'];
			$colour=$borders[$i]['colour']['value'];
			
			
			if($style!='' && $style!='none' && $style!='hidden'){
				$is_data=true;
				if(!$width || ($width!=0 && $width!='')){
					$is_data=true;
					if($colour!='transparent'){
						$is_data=true;
					} else $is_data=false;
				} else $is_data=false;
				
			} else $is_data=false;
			
			if($is_data)
				break;
		}
		
	}
	return $is_data;
}

function get_media_types(){
	$types=Array();

	$types['all']='all';
	$types['screen']='screen';
	$types['print']='print';
	$types['projection']='projection';
	$types['aural']='aural';	
	$types['braille']='braille';
	$types['embossed']='embossed';
	$types['handheld']='handheld';
	$types['tty']='tty';
	$types['tv']='tv';
	
	return $types;
}

function tagType($tagname, $just_standard=false, $ignore_divs=false, $ignore_forms=false){
	//Source: http://www.htmlhelp.com/reference/html40/block.html
	
	$tagname=strtolower($tagname);
	
	static $blocks_standard=null;
	if(is_null($blocks_standard)){
		$blocks_standard=Array();
		$blocks_standard['address']='address';
		$blocks_standard['blockquote']='blockquote';
		$blocks_standard['center']='center';
		$blocks_standard['dir']='dir';
		$blocks_standard['div']='div';
		$blocks_standard['dl']='dl';
		$blocks_standard['fieldset']='fieldset';
		$blocks_standard['form']='form';
		$blocks_standard['h1']='h1';
		$blocks_standard['h2']='h2';
		$blocks_standard['h3']='h3';
		$blocks_standard['h4']='h4';
		$blocks_standard['h5']='h5';
		$blocks_standard['h6']='h6';
		$blocks_standard['hr']='hr';
		$blocks_standard['isindex']='isindex';
		$blocks_standard['menu']='menu';
		$blocks_standard['noframes']='noframes';
		$blocks_standard['noscript']='noscript';
		$blocks_standard['ol']='ol';
		$blocks_standard['p']='p';
		$blocks_standard['pre']='pre';
		$blocks_standard['table']='table';
		$blocks_standard['ul']='ul';
	}

	//The following elements may also be considered block-level elements since they may contain block-level elements:
	static $blocks_extended1=null;
	if(is_null($blocks_extended1)){
		$blocks_extended1=Array();
		$blocks_extended1['dd']='dd';
		$blocks_extended1['dt']='dt';
		$blocks_extended1['frameset']='frameset';
		$blocks_extended1['li']='li';
		$blocks_extended1['tbody']='tbody';
		$blocks_extended1['td']='td';
		$blocks_extended1['tfoot']='tfoot';
		$blocks_extended1['td']='th';
		$blocks_extended1['thead']='thead';
		$blocks_extended1['tr']='tr';
	}
	
	static $blocks_extended2=null;
	if(is_null($blocks_extended2)){
		//The following elements may be used as either block-level elements or inline elements. If used as inline elements (e.g., within another inline element or a P), these elements should not contain any block-level elements.
		$blocks_extended2['applet']='applet';
		$blocks_extended2['button']='button';
		$blocks_extended2['del']='del';
		$blocks_extended2['iframe']='iframe';
		$blocks_extended2['ins']='ins';
		$blocks_extended2['map']='map';
		$blocks_extended2['object']='object';
		$blocks_extended2['script']='script';
	}
	
	if(isset($blocks_standard[$tagname])){
		if($ignore_divs && $tagname=='div')
			return PARSE_INLINE_ELEMENT;
		else if ($ignore_divs && $tagname=='table')
			return PARSE_INLINE_ELEMENT;
		else if ($ignore_forms && $tagname=='form')
			return PARSE_INLINE_ELEMENT;
		else
			return PARSE_BLOCK_ELEMENT;
	}
	
	if(!$just_standard){
		
		if(isset($blocks_extended1[$tagname]))
			return PARSE_BLOCK_ELEMENT;
		
		if(isset($blocks_extended2[$tagname]))
			return PARSE_BLOCK_ELEMENT;
		else return PARSE_INLINE_ELEMENT;
		
	} else return PARSE_INLINE_ELEMENT;
	
}

function can_check($url){
	$can_check=true;

	if($url==''){
		$can_check=false;
		echo 'no web site specified... exiting';
		exit;
	}
	
	if($url=='about:'){
		header('Location:http://www.accessware.co.nz/validator/about.html');
		exit;
	}
	
	return true;
}

function clean_url($url){
	if($url=='')
		return '';
		
	$bits=parse_url($url);

	if(!isset($bits['host']))
		return '';
		
	$part1=$bits['scheme'] . '://' . $bits['host'];
	$part2='';
	if(@$bits['port']!='')
		$part2= ':' . $bits['port'];
		
	$part3=$bits['path'];
	
	return $part1 . $part2 . $part3;
}

function create_message($msg, $rule_text='', $is_rule=true){
	session_start();
	
	if($is_rule)
		$key=$msg;
	else $key=sizeof(@$_SESSION['messages']);
	
	if($is_rule){
		$msg='Checking standard ' . $msg;
	}
	
	if($is_rule)
		$class='loading';
	else $class='notice';
	
	$_SESSION['messages'][$key]['rule']=$msg;	
	$_SESSION['messages'][$key]['text']=$rule_text;
	$_SESSION['messages'][$key]['is_rule']=$is_rule;
	$_SESSION['messages'][$key]['class']=$class;
	$_SESSION['messages'][$key]['type']='new';
	$_SESSION['messages'][$key]['key']=$key;
	$_SESSION['messages'][$key]['id']='nz_s_' . str_replace('.', '_', $key);
	
	session_write_close();
}

//function update_message($msg=-1, $rule_text='', $key=-1, $is_rule=true){
function update_message($key, $err_type){
	session_start();
	
	$msg='Checked standard ' . $key;
	
	$_SESSION['messages'][$key]['rule']=$msg;
	switch($err_type){
		case RESULT_PASS:
			$err_type=RESULT_PASS_CLASS; break;
			
		case RESULT_USER:
			$err_type=RESULT_USER_CLASS; break;
			
		case RESULT_NOT_CHECKED:
			$err_type=RESULT_NOT_CHECKED_CLASS; break;
			
		case RESULT_WARNING:
			$err_type=RESULT_WARNING_CLASS; break;
			
		case RESULT_FAIL:
			$err_type=RESULT_FAIL_CLASS; break;
	}
	$_SESSION['messages'][$key]['class']=$err_type;
	$_SESSION['messages'][$key]['type']='update';
	$_SESSION['messages'][$key]['id']='nz_s_' . str_replace('.', '_', $key);
	
	$count=@$_SESSION['count'];
	
	if($count=='')
		$count=0;
	
	//$percent=round(($count/74)*100);
	$count++;
	//$_SESSION['percentage']=$percent;
	$_SESSION['count']=$count;
	
	session_write_close();
}

function get_error_type($results){
	
	$err_type=-1;
	foreach($results as $this_result){
		$types=$this_result['type'];
		$exit=false;
		
		
		foreach($types as $this_type){
			if($this_type==RESULT_FAIL){
				$err_type=RESULT_FAIL;
				$exit=true;
				break;
			}
			
			if($this_type>$err_type)
				$err_type=$this_type;
		}
		
		if($exit)
			break;
	}
	
	return $err_type;
}

function getnum($numString=0, $regexp='/\d+\.?\d*/'){
	if($numString=='')
		return false;
	preg_match_all($regexp, $numString, $matches);
	return (is_numeric($matches[0][0]))?$matches[0][0]:false;
}

function get_measurement_unit($value){
	$temp=substr($value, -2);
	if($temp=='em')
		return 'em';
	if($temp=='ex')
		return 'ex';
	if($temp=='px')
		return 'px';
		
	$temp=substr($value, -1);
	if($temp=='%')
		return '%';
		
	//if there is no unit at this point, we shall return a blank string
	return '';
	
}

function __serialize($object){
	return serialize(base64_encode($object));
}

function __unserialize($sObject, $isBase64=false) {

	if($isBase64)
		return unserialize(base64_decode($sObject));
	else
		return unserialize($sObject);
}
?>
Return current item: OpenWolf Guidelines Validator