Location: PHPKode > scripts > Web Page Size Calculator > web-page-size-calculator/class.webpagesize.php
<?php
## v0.1	Beta ## 02 April 2005
## v0.2	Beta ## 05 April 2005
## v1.0	## 06 April 2005
##      { using curl to get filesize }
## v1.1 ## 07 April 2005
##      { supporting 'base url' }
##      ## 09 April 2005
##      { fixing bugs on parsing CSS }
##
######
## Webpage Size Calculator
##
## Is your website page too fat?
## If your website pages are too fat, there are many problems you face.
## Such as: (1) slow motion on displaying your webpage,
##          (2) bandwidth usage that is over-dose.
## But, how dou know the size of your webpage?
## It is not easy to measure your webpage weight, in particular a dynamic page.
##
## This class calculates size of webpage and all elements, such as image, js, swf, frame, etc.
## By knowing your page size, you can take an action,
## wether reducing the size or removing unimportant parts.
##
## Limitation
## Can not measure javascript`s pre-loading images
#######
## Author: Huda M Elmatsani
## Email: 	justhuda ## gmail ## com
##
## 04/02/2005
#######
## Copyright (c) 2005 Huda M Elmatsani All rights reserved.
## This program is freeware
## Please, tell me if you made improvements or just a little modification
## Please, tell me if you made online tool with this class
########
## USAGE
##
## $size = new WebpageSize;
## $size->setURL("http://www.php.net/");
## $size->printResult();
##
## see sample.webpagesize.php
##
## credits:
## Fauzan Aminuddin, Satya Agustan Dinata, Ciko Parera @phpug-at-yahoogroups
####

class WebpageSize {


	var $url 		= '';
    var $baseurl    = '';
    var $tailfile   = '';
	var $proxy		= '';
	var $proxyport	= 3128;
    var $pages      = array();
    var $freqpages  = array(); //frequency of page element to be loaded

    function setURL($url) {
         $this->url = $this->parseURL($url);
	}

    function parseURL($url) {

        $this->tailfile = substr($url, strrpos($url, '/')+1);
        $parsed = parse_url($url);
        if($this->tailfile == $parsed['host'])  $this->tailfile = '';
		if(substr($url, -1)=='/' or $this->tailfile)
            return $url;
  		else
            $url =  $url.'/';
        return $url;
    }

    /*
    * searching base href
    */
	function setBaseURL($str) {
		preg_match("/base.*[\s]*href[\040]*=[\040]*\"?([^\"' >]+)/ie", $str, $match);

         if($match[1]) {
            $url = $this->parseURL($match[1]);
            if(substr($url, -1)!='/') $url .= '/';
            $this->baseurl = $url;
        } else {
            $this->baseurl = $this->url;
        }

    }


    /*
    *   if you access via proxy, use this
    */
	function setURLviaProxy($url,$proxy,$port) {
		$this->setURL($url);
		$this->proxy     = $proxy;
		$this->proxyport = $port;
			
	}

    /*
    *  core function!
    *  page elements and the size
    */
	function getResult() {

		$paths      = $this->grabPageSources();
        array_unshift ($paths, $this->url);
        $pages    = array();

        //cURL
		
		if(function_exists('curl_init')) {
		
			$ch = curl_init();
			if($this->proxy) {
			   curl_setopt($ch, CURLOPT_PROXY, $this->proxy.':'.$this->proxyport);
			}
			curl_setopt($ch, CURLOPT_HEADER, 1);
			curl_setopt($ch, CURLOPT_NOBODY, 1);
			curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
	
			for($i=0; $i<count($paths); $i++){
	
				if(!array_key_exists($paths[$i],$pages) ){
					curl_setopt($ch, CURLOPT_URL, $paths[$i]);
					$headers = curl_exec ($ch);
					$filesize = curl_getinfo($ch,CURLINFO_CONTENT_LENGTH_DOWNLOAD);
					if(!$filesize) //try using fopen
						$filesize = strlen($this->getContent($paths[$i]));
					if(!$filesize)
                        continue;
					$this->freqpages[$paths[$i]] = 1;
					$pages[$paths[$i]] = $filesize;
					$this->totalsize  += $filesize;
				} else
					$this->freqpages[$paths[$i]] += 1;
			}
			curl_close ($ch);
		} else {
		
			for($i=0; $i<count($paths); $i++){
				if(!array_key_exists($paths[$i],$pages) ){
					$filesize = strlen($this->getContent($paths[$i]));
					$this->freqpages[$paths[$i]] = 1;
					$pages[$paths[$i]] = $filesize;
					$this->totalsize  += $filesize;
				} else
					$this->freqpages[$paths[$i]] += 1;
			}
		
		}
		
        natsort($pages);

        return $pages;
	}

	function totalPageSize() {
		return $this->totalsize;
	}

	/*
	* this one is usefull
	*/
	function readableSize($size) {
		return number_format($size/1024,2)." KB";
	}

    /*
    *  pre-formated output
    */
	function printResult() {
		$pages = $this->getResult();
		$strtable = '<table width=\"700\" border=\"1\">' .
                    '<tr bgcolor=#F3F3F3><td width=\"360\" colspan=2>' .
                    '<div align=\"center\">Webpage`s URL : '.$this->url.'</div></td>' .
                    '<td width=\"140\" colspan=2>Size : ' . $this->readableSize($this->totalPageSize()) . '</td></tr>' .
                    '<tr bgcolor=#F3F3F3><td width=\"24\"><div align=\"center\">#</div></td>' .
					'<td width=\"210\">URL of Elements of Webpage</td>' .
					'<td width=\"86\">Filesize</td>' .
					'<td width=\"32\">Freq</td>' .
				  	'</tr>';
		$n=0;
		while(list($url,$size) = each($pages)){
			$strtable .= '<tr><td width=20>'.++$n.'</td><td width=440>'. $url. '</td>' .
                         '<td width=100>'. $this->readableSize($size) . '</td>' .
                         '<td width=40>' . $this->freqpages[$url] .     '</td></tr>';
		}
		$strtable .= '<tr bgcolor=#F3F3F3><td>&nbsp;</td><td> Total Webpage Size</td><td colspan=2>'.
					 $this->readableSize($this->totalPageSize()) . '</td></tr>';
		$strtable .='</table>';
		echo $strtable;
	}
    /*
    *  from "../../images/some.jpg" for example to "http://www.domain.com/images/some.jpg"
    */
	function resolvePathSources($paths) {

		while(list(,$src) = each($paths))
			$arr_path[] = $this->makeAbsolutePath($src,$this->baseurl);
       return $arr_path;
	}

    /*
    *  taking webpage content
    *  fopen is lighter than cURL
    */
	function getContent($url){
       if($this->proxy) {
            return $this->getContentProxy($url);
       } else {
    		$file = @fopen($url, 'rb');
    		$buffer = '';
    		if(!$file) return '';
    		while(!feof($file)) {
    			   $buffer .= fread($file,1024);
    		}
    		fclose($file);
    		return $buffer;
           }
    }

    /*
    *  taking webpage content via proxy
    */
	function getContentProxy($url)
	{
	   $buffer = '';

	   $file = fsockopen($this->proxy, $this->proxyport);
	   if (!$file)    {return '';}
	   fputs($file, "GET $url HTTP/1.0\r\nHost: $proxy_name\r\n\r\n");
	   while(!feof($file)) {$buffer .= fread($file,4096);}
	   fclose($file);
	   $buffer = substr($buffer, strpos($buffer,"\r\n\r\n")+4);
	   return $buffer;
	} 

    /*
    * searching webpage elements
    */
	function grabPageSources() {

		$content  = $this->getContent($this->url);
        $this->setBaseURL($content);

		$arr_src1 = array();
		$arr_src2 = array(); 
		$arr_src3 = array();
        $arr_src4 = array();
		$arr_src5 = array();
		$arr_src6 = array();	
	
		$arr_src1 = $this->searchSources($content);
        //search CSS classes that applied on page
        $this->CSSclasses = $this->searchCSSClasses($content);

       // print_r( $arr_clss );
		$arr_src2 = $this->searchSourcesOnCSS($content);
		$arr_src3 = $this->searchCSSLinks($content);

        if(!empty($arr_src3))
		$arr_src4 = $this->searchSourcesOnCSSFiles($arr_src3);
        //search on frames if exists
        $arr_src5 = $this->searchFrames($content);
        if(!empty($arr_src5))
        $arr_src6 = $this->searchSourcesOnFrames($arr_src5);

		$arr_sources  = array_merge ($arr_src1, $arr_src2, $arr_src3,
                                 $arr_src4, $arr_src5, $arr_src6);

        return $this->resolvePathSources($arr_sources);

	}

    /*
    * searching image/js elements
    */
	function searchSources($str) {
		preg_match_all("/[img|input|embed|script]+.*[\s]*(src|background)[\040]*=[\040]*\"?([^\"' >]+)/ie", $str, $arr_source);
		return $arr_source[2];
	}

    /*
    * searching class elements
    */
	function searchCSSClasses($str) {
		preg_match_all("/class[\040]*=[\040]*\"?([^\"' >]+)/ie", $str, $arr_source);
		return $arr_source[1];
	}
    /*
    * searching frame elements
    */
	function searchFrames($str) {
		preg_match_all("/frame.*[\s]*src[\040]*=[\040]*\"?([^\"' >]+)/ie", $str, $arr_source);
		return $arr_source[1];
	}

    /*
    * searching css elements
    */
	function xsearchSourcesOnCSS($str) {
		preg_match_all("/(url\(\"?([^\")]+))/ie", $str, $arr_source);
		return $arr_source[2];
	}

     /*
    * searching css elements
    */
	function searchSourcesOnCSS($str) {

		preg_match_all("/(\.(.*)\s+\{[\s]+)*.*url\(\"?([^\")]+)/ie", $str, $arr_source);
        for($i=0; $i<count($arr_source);$i++) {
            if( in_array( $arr_source[2][$i], $this->CSSclasses )) {
                $arr_sources[] = $arr_source[3][$i];
            }
        }
		return $arr_sources;
	}

    /*
    * searching webpage elements on frames
    */
    function searchSourcesOnFrames($framefiles) {
        $arr_source  = array();
        $arr_sources = array();
        while(list(,$src)   = each($framefiles)) {

            $framepage        = $this->makeAbsolutePath($src,$this->baseurl);

            $page = new WebpageSize;
             if($this->proxy)
                $page->setURLviaProxy($framepage, $this->proxy, $this->proxyport);
            else
                $page->setURL($framepage);


            $arr_source  = $page->grabPageSources();
            $arr_sources = array_merge($arr_sources, $arr_source);
        }
        return $arr_sources;
    }

    /*
    * searching webpage elements on CSS files
    */
    function searchSourcesOnCSSFiles($cssfiles) {

        //search sources on CSS file
        $arr_CSSlinks = array();
        while(list(,$src)   = each($cssfiles)) {
            $numstepback    = substr_count($src, "../");
            $CSSpage        = $this->makeAbsolutePath($src,$this->baseurl);

            $CSScontent     = $this->getContent($CSSpage);
            $arr_sourcelink    = $this->searchSourcesOnCSS($CSScontent);
            if(empty( $arr_sourcelink )) continue;

            while(list(,$srclink)   = each($arr_sourcelink)) {
                $arr_CSSlink[]  =   str_repeat("../",$numstepback) . $srclink;
            }
            $arr_CSSlinks   = array_merge($arr_CSSlinks, $arr_CSSlink);
        }
        return $arr_CSSlinks;

    }

    /*
    * searching webpage elements on CSS
    */
	function searchCSSLinks($str) {
         preg_match_all("/<link[^>]+href[\040]*=[\040]*[\"|\'|\\\\]*([^\'|\"|>|\040]*(.*)\.css)[\"|\'|>|\040|\\\\]*/ie",$str, $arr_CSSlink);
        return $arr_CSSlink[1];
	}


	function makeAbsolutePath ($src,$url) {

        $addone = 1;
     
        if ($this->tailfile) {
            $url = substr($url, 0, -(strlen($this->tailfile)+1));
            $addone = 0;
        }
        
        if (strtolower(substr($src,0,4)) != 'http') {

            $numrel  = substr_count($src, "../");
            $src     = str_replace("../","",$src);

            for($i=0; $i < $numrel+$addone; $i++) {
                $lastslash  = strrpos($url,"/");
                $url       = substr($url, 0, $lastslash);
            }

          return $url.'/'.$src;
        }
        else return $src;
	}

}
?>
Return current item: Web Page Size Calculator