Location: PHPKode > projects > Web Statistics Montage > web-statistics-montage/stats_generator.php
<?php
	include('config.php');
	include('check-pagerank.php');

	function getSpiderInfo($string,$contents) {
		preg_match('/' . $string . '<\/td><td>(.*?)<\/td><\/tr>/', $contents, $match);	
		if ($match[1] != '') {
			$spider_array = explode('<td>',$match[1]);
			$spider['hits'] = $spider_array[0];
			$spider['bandwidth'] = $spider_array[1];
			$spider['last_spidered'] = date('Y-m-d H:i:s',strtotime($spider_array[2]));
			$spider['last_spidered_formatted'] = date('M j, y',strtotime($spider_array[2])) . '<br>' . date('g:i a',strtotime($spider_array[2]));
		}	
		return $spider;
	}

	function spiderRow($bot_name,$style) {
		global $info;
		global $url_i;
		if ($info[$url_i][$bot_name]['hits'] != '') {
			return '
				<td class="' . $style . '">' . $info[$url_i][$bot_name]['hits'] .'</td> 
				<td class="' . $style . '">' . $info[$url_i][$bot_name]['bandwidth'] .'</td>
				<td class="' . $style . '">' . $info[$url_i][$bot_name]['last_spidered_formatted'] .'</td>';
		} else {
			return '
				<td class="' . $style . '">(none)</td>
				<td class="' . $style . '">(none)</td>
				<td class="' . $style . '">(none)</td>';
		}
	}
	
	function ternary($current,$item1,$item2) {
		if ($current == $item1) {
			return $item2;
		} else {
			return $item1;
		}
	}

	$url_i = 0;
	while ($url_i <= $highest_url_key)  {
		if ($urls[$url_i]['username'] != '') {
	// START VISITS
	// Get the AWSTATS file
			ob_start();
			$file = $awstats_file_location . '?config=' . $urls[$url_i]['url'] . '&framename=mainright&url_id=' . $url_i;
			include_once($file);
			$contents = ob_get_contents();
			ob_end_clean(); 
			$contents = str_replace("\n",'',$contents);
			
			// Get the time it was processed
			preg_match('/<span style="font-size: 12px;">(.*?)<\/span>/', $contents, $match);
			$date_time_string = $match[1];	
			$date_time_string = date('g:i a',strtotime($date_time_string));
			$date_time[$url_i] = $date_time_string;
			
			preg_match('/<td valign="middle">Average<\/td>(.*?)<a name="daysofweek">/', $contents, $match);
			$month_contents = $match[1];		
				
			// Clean the month contents
			preg_match('/Bandwidth<\/td><\/tr>(.*?)<\/tr><tr bgcolor="#ECECEC"><td>Average/', $month_contents, $match);
			$month_contents = $match[1];		
			
			// Get just today and before
			$tomorrow = strtotime(date('Y-m-d')) + 60*60*24;
			$tomorrow = date('d M Y',$tomorrow);
			$month_contents_array = explode($tomorrow,$month_contents);
			$month_contents = $month_contents_array[0];
			//echo $month_contents;
			
			// Deal with each day
			$month_array = explode('</tr>',$month_contents);
			$day_i = 0;
			while ($month_array[$day_i] != '') {
				$day_array = explode('</td>',$month_array[$day_i]);
				$date = strip_tags($day_array[0]);
				//$date_array = explode(' ',$date);
				//$month = date('n',$date_array[1]);
				//$date = $date_array[2] . '-' . $month . '-' . $date_array[0];
				$date = date('Y-m-d H:i:s',strtotime($date));
				$visits = strip_tags($day_array[1]);
				//echo $date . ' ' . $visits . '<br>';
				// Get only older stats
				if ($time <= time()) {
					// Check if entry exists
					$result = mysql_query('
						SELECT * 
						FROM stats_unique_visitors
						WHERE unique_visitors_day = "' . $date . '" and
							urls_id = "' . $url_i . '"');
					if (mysql_num_rows($result) < 1) {
						$insert = mysql_query('
							INSERT INTO stats_unique_visitors
							SET unique_visitors_day = "' . $date . '",
								urls_id = "' . $url_i . '",
								unique_visitors_number = "' . $visits . '"');
					} else {
						$update = mysql_query('
							UPDATE stats_unique_visitors
							SET unique_visitors_number = "' . $visits . '"
							WHERE unique_visitors_day = "' . $date . '" and
								urls_id = "' . $url_i . '"');
					}
				}
				$day_i++;
			}
	// END VISITS
	
	// START LINKS FROM SEARCH ENGINES
	preg_match('/<td class="aws">- Google<\/td><td>(.*?)<\/td>/', $contents, $match);
	if ($match[1] != '') {
		$info[$url_i]['google_links'] = $match[1];
	} else {
		$info[$url_i]['google_links'] = '(none)';
	}
	
	preg_match('/<td class="aws">- Yahoo<\/td><td>(.*?)<\/td>/', $contents, $match);
	if ($match[1] != '') {
		$info[$url_i]['yahoo_links'] = $match[1];
	} else {
		$info[$url_i]['yahoo_links'] = '(none)';
	}
	
	preg_match('/<td class="aws">- MSN<\/td><td>(.*?)<\/td>/', $contents, $match);
	if ($match[1] != '') {
		$info[$url_i]['msn_links'] = $match[1];
	} else {
		$info[$url_i]['msn_links'] = '(none)';
	}
	
	// END LINKS FROM SEARCH ENGINES
	
	// START ROBOTS
			preg_match_all('/Bandwidth<\/th><th width="120">Last visit<\/th><\/tr><tr><td class="aws">(.*?)<\/tr><\/table><\/td>/', $contents, $match);
			$robot_contents = $match['1'][2];
			
			$info[$url_i]['googlebot'] = getSpiderInfo('Googlebot',$contents);
			$info[$url_i]['msnbot'] = getSpiderInfo('MSNBot',$contents);
			$info[$url_i]['inktomibot'] = getSpiderInfo('Inktomi Slurp',$contents);
			//echo $info[$url_i]['googlebot']['hits'] . '<hr>';
			
		
	
	// END ROBOTS
	
	// START KEYWORDS
			ob_start();
			$file = $awstats_file_location . '?config=' . $urls[$url_i]['url'] . '&framename=mainright&output=keyphrases&url_id=' . $url_i;
			include_once($file);
			$contents = ob_get_contents();
			ob_end_clean(); 
			$contents = str_replace("\n",'',$contents);
			
			preg_match('/Percent<\/th><\/tr><tr><td class="aws">(.*?)<\/table><\/td>/', $contents, $match);
			$keyword_contents = $match[1];
			
			$keywords_array = explode('<td class="aws">',$keyword_contents);
			
			$keyword_i = 0;
			$daily_key_i = 0;
			while ($keywords_array[$keyword_i] != '') {
				$keyword_array = explode('</td>',$keywords_array[$keyword_i]);
				$keyword = strip_tags($keyword_array[0]);
				$keyword = str_replace('"','\"',$keyword);
				$keyword = str_replace('\\','\\\\',$keyword);
				$keyword_num = strip_tags($keyword_array[1]);
				//echo $keyword . ' ' . $keyword_num . '<br>';
				$this_month_year = date('Y-m');
				$sql = '
					SELECT * 
					FROM stats_keywords
					WHERE keywords_keyword = "' . $keyword . '" and
						urls_id = "' . $url_i . '" and
						keywords_year_month = "' . $this_month_year . '"';
				$result = mysql_query($sql);
				if (mysql_num_rows($result) < 1) {
					$insert = mysql_query('
						INSERT INTO stats_keywords
						SET keywords_keyword = "' . $keyword . '",
							urls_id = "' . $url_i . '",
							keywords_total = "' . $keyword_num . '",
							keywords_last_appeared = "' . date('Y-m-d H:i:s') . '",
							keywords_year_month = "' . $this_month_year . '"');
					$info[$url_i]['daily_keywords'][$daily_key_i]['keyword'] = $keyword;
					$info[$url_i]['daily_keywords'][$daily_key_i]['change'] = $keyword_num;
					$daily_key_i++;
				} else {
					$row = mysql_fetch_array($result);
					if ($keyword_num > $row['keywords_total']) {
						$update = mysql_query('
							UPDATE stats_keywords
							SET keywords_last_appeared = "' . date('Y-m-d H:i:s') . '",
								keywords_total = "' . $keyword_num . '",
								keywords_year_month = "' . $this_month_year . '"
							WHERE keywords_id = "' . $row['keywords_id'] . '"');
								
						$info[$url_i]['daily_keywords'][$daily_key_i]['keyword'] = $keyword;
						$info[$url_i]['daily_keywords'][$daily_key_i]['change'] = $keyword_num - $row['keywords_total'];
						$daily_key_i++;
					}
				}
				$keyword_i++;
			}
		}
// END KEYWORDS
		$url_i++;
	}
// END SCRAPING


// START OUTPUT
	
	// Create Visitor Table
	$start_time = time(date('Y-m-d'))-$days_to_display*60*60*24;
	$start_date = date('Y-m-d H:i:s',$start_time);
	
	$th_time = time(date('Y-m-d'));
	while ($th_time > $start_time) {
		$month = date('M',$th_time);
		$weekday = date('D',$th_time);
		$day = date('j',$th_time);
		$th_row .= '<th>' . $month . '<br />' . $day . '<br />' . $weekday . '</th>';
		$th_time = $th_time - 60*60*24;
	}
	$th_row = '<th>PR</th><th>Time</th><th>URL</th>' . $th_row;
	
	$url_i = 0;
	while ($url_i <= $highest_url_key)  {
		if ($urls[$url_i]['username'] != '') {
			
			// Get PR
			$pr_url = 'http://www.' . $urls[$url_i]['url'];
			$pr_url = 'info:'.$pr_url; 
			$ch = GoogleCH(strord($pr_url)); 
			ob_start();
			include_once('http://www.google.com/search?client=navclient-auto&ch=6' . $ch . '&features=Rank&q=' . $pr_url);
			$page = ob_get_contents();
			ob_end_clean();  
			$rank_array = explode(':',$page);
			$pr =  $rank_array[2];
			
			$row_style = ternary($row_style,'main-row-1','main-row-2');
			$visit_row = '';
			$result = mysql_query('
				SELECT *
				FROM stats_unique_visitors
				WHERE urls_id = "' . $url_i . '" and
					unique_visitors_day >= "' . $start_date . '"
				ORDER BY unique_visitors_day DESC');
			while ($row = mysql_fetch_array($result)) {
				$visit_row .= '<td class="' . $row_style . '">' . $row['unique_visitors_number'] . '</td>';
			}
			
			$stats_url = $awstats_file_location . '?config=' . $urls[$url_i]['url'] . '&url_id=' . $url_i;
			$visit_table .= '
				<tr>
					<td class="' . $row_style . '">' . $pr . '</td>
					<td class="' . $row_style . ' nowrap">' . $date_time[$url_i] . '</td>
					<td class="' . $row_style . '"><a href="' . $stats_url . '" target="_blank">'. $urls[$url_i]['url'] . '</td>
					' . $visit_row . '
				</tr>';
		}
		$url_i++;
	}
	$visit_table = '<table class="main-table">' . $th_row . $visit_table . '</table>';
	//echo $visit_table;
	
	// Start Spider Table
	$url_i = 0;
	$th_row = '
		<th>URL</th>
		<th>Google<br />Links</th>
		<th>Yahoo<br />Links</th>
		<th>MSN<br />Links</th>
		<th>Google Hits</th>
		<th>Bandwidth</th>
		<th>Last Spidered</th>
		<th>Inktomi Hits</th>
		<th>Bandwidth</th>
		<th>Last Spidered</th>
		<th>MSN Hits</th>
		<th>Bandwidth</th>
		<th>Last Spidered</th>';
	while ($url_i <= $highest_url_key)  {
		if ($urls[$url_i]['username'] != '') {
			$row_style = ternary($row_style,'main-row-1','main-row-2');
			$spider_row  = '';
			$spider_row .= spiderRow('googlebot','google-td');
			$spider_row .= spiderRow('inktomibot','inktomi-td');
			$spider_row .= spiderRow('msnbot','msn-td');
			$stats_url = $awstats_file_location . '?config=' . $urls[$url_i]['url'] . '&url_id=' . $url_i;
			$spider_table .= '
				<tr>
					<td class="' . $row_style . '"><a href="' . $stats_url . '" target="_blank">'. $urls[$url_i]['url'] . '</td>
					<td class="google-td">' . $info[$url_i]['google_links'] . '</td>
					<td class="inktomi-td">' . $info[$url_i]['yahoo_links'] . '</td>
					<td class="msn-td">' . $info[$url_i]['msn_links'] . '</td>
					' . $spider_row . '
				</tr>';
		}
		$url_i++;
	}
	$spider_table = '<table class="main-table">' . $th_row . $spider_table . '</table>';
	//echo $spider_table;
	
	// Start Daily Keywords
	$url_i = 0;
	while ($url_i <= $highest_url_key)  {
		$daily_key_table = '';
		if ($urls[$url_i]['username'] != '') {
			$daily_key_i = 0;
			if ($info[$url_i]['daily_keywords'][$daily_key_i]['keyword'] != '') {
				while ($info[$url_i]['daily_keywords'][$daily_key_i]['keyword'] != '') {
					$row_style = ternary($row_style,'main-row-1','main-row-2');
					$daily_key_table .= '
						<tr><td class="' . $row_style . '">' . $info[$url_i]['daily_keywords'][$daily_key_i]['keyword'] . '</td>
						<td class="' . $row_style . '">' . $info[$url_i]['daily_keywords'][$daily_key_i]['change'] . '</td></tr>';
					$daily_key_i++;
				}
				$daily_key_block .= '
					<div class="daily-key-title">' . $urls[$url_i]['url'] . '</div>
					<table class="main-table">
						<th>Keyword</th>
						<th>#</th>' . $daily_key_table . '
					</table><br />';
				
			}
		}
		$url_i++;
	}
	//echo $daily_key_block;
	
	// Start Popular Keywords
	$url_i = 0;
	while ($url_i <= $highest_url_key)  {
		if ($urls[$url_i]['username'] != '') {
			$popular_keys_table = '';
			$popular_keys_array = '';
			$popular_keys_start_time = time() - 60*60*24*$popular_keywords_days;
			$popular_keys_start_date = date('Y-m-d H:i:s',$popular_keys_start_time);
			$result = mysql_query('
				SELECT DISTINCT keywords_keyword
				FROM stats_keywords
				WHERE urls_id = "' . $url_i . '" and
					keywords_last_appeared > "' . $popular_keys_start_date . '"');
		
			while ($row = mysql_fetch_array($result)) {
				$keyword = str_replace('"','\"',$row['keywords_keyword']);
				$keyword = str_replace('\\','\\\\',$keyword);
				$sql = '
					SELECT * 
					FROM stats_keywords
					WHERE urls_id = "' . $url_i . '" and
						keywords_keyword = "' . $keyword . '" and
						keywords_last_appeared > "' . $popular_keys_start_date . '"';
				$sub_result = mysql_query($sql); 
				$sub_total = 0;
				while ($sub_row = mysql_fetch_array($sub_result)) {
					$sub_total = $sub_total + $sub_row['keywords_total'];
				}
				$popular_keys_array[$row['keywords_keyword']] = $sub_total;
			}
			if (count($popular_keys_array) > 1) {
				arsort($popular_keys_array);
				$popular_i = 0;
				foreach ($popular_keys_array as $keyword => $num) {
					if ($num > $popular_keywords_min && $popular_i < $popular_keywords_num) {
						$row_style = ternary($row_style,'main-row-1','main-row-2');
						$popular_keys_table .= '<tr><td class="' . $row_style . '"><a href="' . $absolute_url . 'check-rank-for-keyword.php?url=' . $urls[$url_i]['url'] . '&keyword=' . $keyword . '" target="_blank">' . $keyword . '</a></td><td class="' . $row_style . '">' . $num . '</td></tr>';
						$popular_i++;
					}
				}
			}
			if ($popular_keys_table != '') {
				$popular_keys_block .= '
					<div class="daily-key-title">' . $urls[$url_i]['url'] . '</div>
					<table class="main-table">' . $popular_keys_table . '</table>';
			}
		}
		$url_i++;
	}
	//echo $popular_keys_block;
// END OUTPUT

// START FILE OUTPUT
	// Check how long since last report (in hours)
	$result = mysql_query('
		SELECT *
		FROM stats_reports
		ORDER BY stats_reports_id DESC
		LIMIT 1');
	if ($row = mysql_fetch_array($result)) {
		$since_last_report = time() - strtotime($row['stats_reports_date']);
		$since_last_report = round($since_last_report/60/60);
		$last_report = ' Last report was generated about <strong>' . $since_last_report . '</strong> hours ago.';
	}
	
	$output .= '
		<html>
		<head>
		<title>Statistics Montage - ' . date('M j, Y') .' at ' . date('g:i A') . '</title>
		<link href="' . $css_file_location . '" rel="stylesheet" type="text/css"></link>
		<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
		</head>
		<body>
			This report was generated on <strong>' . date('M j, Y') . '</strong> at <strong>' . date('g:i A') . '</strong>. <a href="' . $reports_file_location . '">Click here to view all reports</a>. ' . $last_report . '<br />
			<h2>Unique Visitors</h2>
			' . $visit_table . '
			<h2>Search Engine Spiders</h2>
			' . $spider_table . '
			<h2>Daily Keywords</h2>
			' . $daily_key_block . '
			<h2>Popular Keywords</h2>
			' . $popular_keys_block . '
			<p><a href="' . $absolute_url . 'stats_generator.php">Click here to generate another report</a></p>
		</body>
		</html>';
	echo $output;
	
	$mysql_output = str_replace('\\','\\\\',$mysql_output);
	$mysql_output = str_replace('"','\"',$output);
	$insert = mysql_query('
		INSERT INTO stats_reports
		SET stats_reports_date = "' . date('Y-m-d H:i:s') . '",
			stats_reports_content = "' . $mysql_output . '"');
	echo mysql_error();
	
	// Send the report via mail
	mail($report_email,'Stats Montage - ' . date('M j, Y'),$output,"To: " .$report_email . "\n" .
		"From: Your Stats <" . $report_email . ">\n" .
		"MIME-Version: 1.0\n" .
		"Content-type: text/html; charset=iso-8859-1");

// END FILE OUTPUT
?>
Return current item: Web Statistics Montage