Location: PHPKode > projects > Obsessive Website Statistics > ows/include/analysis.inc.php
<?php
/*
	$Id: analysis.inc.php 111 2007-09-27 19:18:22Z randomperson83 $

	Obsessive Web Statistics
    Copyright (C) 2007 Dustin Spicuzza <hide@address.com>

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
	
	This file includes common analysis routines used by the upload scripts

*/

/* 
	This runs analysis over all dimensions on a per-line basis. Its really complicated and 
	possibly even efficient -- it has a decent caching mechanism. :)
	
	Cache system:
	
	The cache makes a few assumptions:
	
		- Most data won't be totally random, but will generally have correlations in the data
		- Database queries are more expensive than navigating arrays
	
	
	The cache tries to match dimension row id's to the primary node of the dimension.
	This requires analysis to be done, but finding something in this cache prevents a SQL query
	for the item.
	
	The cache has a configurable limiting mechanism that keeps the cache size down to a small number, 
	and then it removes 20% of the lowest-hit items and any items that haven't gotten hit again.
	
	The cache can probably be optimized some more. Right now, I've been getting around an 75-95% hit rate.
	
*/
class Analysis{
	
	// create an array of all the fields we support, so this can be merged back in later
	// REMEMBER: Case matters!
	var $merge = null;
	
	// list of plugins listed per dimension.. 
	var $plugins_pnode = array();
	var $plugins_attrs = array();
	var $plugins;
	
	var $reject_plugins;
	
	var $website = '';
	var $fact_table = '';
	var $s_fact_table = '';
	
	// each field and etc for each dimension
	var $dimensions;
	var $d_tables;		// SQL-safe version of the dimension tables
	
	var $d_names;		// SQL-safe version of the dimension name
	var $d_keys;		// SQL-safe version of the dimension keys
	
	var $d_ids;			// last id of that dimension
	var $d_sql;			// the beginning of the dimensional insert SQL statement
	var $fact_sql;		// the beginning of the fact insert SQL statement
	
	// first level cache for dimension id's
	var $cache;
	var $cache_hits;
	var $cache_misses;
	
	var $cache_sz = 25;
	var $cache_sz_lim = 5;
	
	var $initialized = false;
	
	/*
		Initialize the analysis engine
	*/
	public function Initialize($website, $dimensions, $doupdate = false){
	
		global $cfg;
		echo ">>> Initializing analysis engine...";
	
		$this->dimensions = $dimensions;
		
		// define the merge array
		$this->merge = array(
			'Remote-Host' => '',
			'Remote-User' => '',
			'Date' => '',
			'Time' => '',
			'Method' => '',
			'Protocol' => '',
			'Status' => '',
			'Bytes-Sent' => '',
			'Referrer' => ''
		);
		
		// initialize default values
	
		// list of plugins listed per dimension.. 
		$this->plugins_pnode = array();
		$this->plugins_attrs = array();

		$this->website = '';
		$this->fact_table = '';
		$this->s_fact_table = '';

		// each field and etc for each dimension
		$this->d_tables = array();		// SQL-safe version of the dimension tables

		$this->d_names = array();		// SQL-safe version of the dimension name
		$this->d_keys = array();		// SQL-safe version of the dimension keys

		$this->d_ids = array();			// last id of that dimension
		$this->d_sql = array();			// the beginning of the dimensional insert SQL statement
		$this->fact_sql = '';			// the beginning of the fact insert SQL statement

		// first level cache for dimension id's
		$this->cache = array();
		$this->cache_hits = array();
		$this->cache_misses = array();

		$this->initialized = false;
		
		$this->cache_sz = intval($cfg['analysis_cache_sz']);
		$this->cache_sz_lim = intval($this->cache_sz / 5);
		
		$this->website = $website;
		$this->fact_table = str_replace('.','_',$website);
		$this->s_fact_table = db_escape_string($this->fact_table);
		
		// initialize the dimension plugin list
		$this->plugins = get_plugins('analysis');
		$this->reject_plugins = get_plugins('reject');
		
		// assign each plugin to a dimension
		foreach($this->plugins as $kn => $plugin){
		
			// this tells us whether or not a plugin needs to be loaded or not
			$involved = false;
		
			// get list of dimensions that the plugin supports
			$dimension = $plugin['plugin']->define_dimensions();
			
			// put it on the appropriate list
			foreach ($dimension as $k => $v){
				
				if (array_key_exists($k,$this->dimensions)){
				
					$involved = true;
					if (array_key_exists($k,$v)){
						$this->plugins_pnode[$k] = $plugin['plugin'];
						if (count($v) > 2)
							$this->plugins_attrs[$k][] = $plugin['plugin'];
					}else{
						$this->plugins_attrs[$k][] = $plugin['plugin'];
					}
				}
			}
			
			if (!$involved)
				unset ($this->plugins[$kn]);
			
		}
		
		echo ".";
		
		foreach ($this->dimensions as $dname => $attrs){
			
			$this->d_tables[$dname] = db_escape_string($this->fact_table . '_' . $dname);
			$this->d_names[$dname] = db_escape_string($dname);
			$this->d_keys[$dname] = db_escape_string($dname . '_id');
			
			// create the insert statements ahead of time
			$this->d_sql[$dname] = "INSERT INTO " . $this->d_tables[$dname] . " (" . $this->d_keys[$dname] . "," . $this->d_names[$dname];
			
			foreach ($attrs as $attr_name => $attr)
				if ($attr_name != $dname && $attr_name != 'pnode_is')
					$this->d_sql[$dname] .= "," . db_escape_string($attr_name);
			
			$this->d_sql[$dname] .= ') VALUES ';
			
			
			// use this to predict the upcoming id's to be inserted
			$this->d_ids[$dname] = 1;
			if (db_is_valid_result($result = db_query("SELECT MAX(" . $this->d_keys[$dname] . ") FROM " . $this->d_tables[$dname])))
				if ($row = db_fetch_row($result)){
					$this->d_ids[$dname] = $row[0] + 1;
				}
			else
				return null;
			
			
			
			$this->cache[$dname] = array(array(),array(),0);			
			$this->cache_hits[$dname] = 0;
			$this->cache_misses[$dname] = 0;
			
			echo ".";
		}
		
		
		// create the primary insert statement ahead of time
		if ($doupdate)
			$this->fact_sql = "UPDATE $this->s_fact_table SET ";
		else
			$this->fact_sql = "INSERT INTO $this->s_fact_table (" . implode(',',$this->d_keys) . ') VALUES ';

		
		echo "done.\n";
		
		
		// initialize the analysis and rejection plugins
		echo ">>> Initializing analysis plugins...";
		foreach ($this->plugins as $plugin){
			if (is_array($plugin)){
				if (!$plugin['plugin']->InitializeAnalysis($this->website))
					return show_plugin_error($plugin,"could not be initialized!");
			}else{
				if (!$plugin->InitializeAnalysis($this->website))
					return show_error("Could not initialize analysis plugin!");
			}
			echo '.';
		}
		echo "done.\n";
		
		echo ">>> Initializing rejection plugins...";
		foreach ($this->reject_plugins as $plugin){
			if (!$plugin['plugin']->InitializeRejection($this->website))
				return show_plugin_error($plugin,"could not be initialized!");
			echo '.';
		}
		echo "done.\n";
		
		
		// finally, set a flag that shit is going down
		set_config_var($website,"uploading",'yes');
		
		// this boosts performance somewhat
		db_is_valid_result(db_query("SET UNIQUE_CHECKS=0"));
		
		$this->initialized = true;
		return true;
	}
	
	/*
		Does cleanup
	*/
	function __destruct(){
		
		echo ">>> Closing analysis engine...";
		
		foreach ($this->dimensions as $dname => $v){
			// this has no effect on InnoDB tables :(
			db_query("ALTER TABLE " . $this->d_tables[$dname] . " ENABLE KEYS");
			echo ".";
		}
		
		db_query("ALTER TABLE $this->s_fact_table ENABLE KEYS");
		
		// reset the flag
		db_is_valid_result(db_query("SET UNIQUE_CHECKS=1"));
		
		if ($this->initialized)
			set_config_var($this->website,"uploading",'no');
		
		echo "done\n";
		
	}

	
	// functions called before and after each round of analysis
	function preAnalysis($plugins){
		echo ">>> Running pre-analysis...";
	
		foreach ($plugins as $plugin){
			if (is_array($plugin)){
				if (!$plugin['plugin']->preAnalysis($this->website,$this->d_ids))
					return false;
			}else{
				if (!$plugin->preAnalysis($this->website,$this->d_ids))
					return false;
			}
			echo '.';
		}
		echo "done.\n";
		return true;
	}
	
	function postAnalysis($plugins){
		echo ">>> Running post-analysis...";
		foreach ($plugins as $plugin){
			if (is_array($plugin)){
				if (!$plugin['plugin']->postAnalysis($this->website,$this->d_ids))
					return false;
			}else{
				if (!$plugin->postAnalysis($this->website,$this->d_ids))
					return false;
			}
			echo '.';
		}
		echo "done.\n";
		return true;
	}
	
	function RejectLine($line){
		foreach ($this->reject_plugins as $plugin)
			if ($plugin['plugin']->RejectLine($this->website,$line))
				return true;
		return false;
	}
	
	/*
		Processes a set of lines, returns true/false/null
	
		// TODO: need to optimize for NULL/blank values?
	
	*/
	function Process($parsed_lines, $doupdate = false, $delete_rejected = false){
		
		//f_keys is already defined!
		$f_values = array();
		$d_batch = array();
		
		// initialize
		foreach ($this->dimensions as $k => $v)
			$d_batch[$k] = array();
	
		foreach ($parsed_lines as $parsed_line){
			
			// ensure everything exists that we expect to exist
			$parsed_line = array_merge($this->merge,$parsed_line);
			
			// see if we should reject it
			if ($this->RejectLine($parsed_line)){
				
				// sometimes, it needs to be deleted
				if ($delete_rejected){
				
					if (!array_key_exists('id',$parsed_line))
						return show_error("Cannot delete rejected line: no id present!");
				
					if (!db_is_valid_result(db_query("DELETE FROM $this->s_fact_table WHERE id = $parsed_line[id]")))
						return show_error("Error deleting rejected line!");
				
				}
				
				continue;
			}
			
			$facts = array();
			
			// grab the primary node value
			foreach ($this->plugins_pnode as $dname => $plugin){
				
				// get primary node
				$pnode = $this->plugins_pnode[$dname]->getPrimaryNode($this->website, $dname, $parsed_line);
				
				if ($pnode === false)
					return show_error("getPrimaryNode returned an error! Aborting...");
				
				// see if its cached
				if (($id = $this->cache_id($dname,$pnode)) === false){
					
					$id = $this->d_ids[$dname]++;
					
				
					// not cached, so run analysis
					if (array_key_exists($dname,$this->plugins_attrs)){
					
						$pret = array();
						
						foreach ($this->plugins_attrs[$dname] as $plugin)
							if (($ret = $plugin->getAttributes($this->website, $dname, $pnode)) === false)
								return false;
							else
								$pret = $pret + $ret;
					
						// add to batch
						if (count($pret) == 0)
							return show_error("Invalid result returned from plugin in dimension '$dname'",true);
					
					
						// escape all of the attributes
						foreach ($pret as $k => $v)
							$pret[$k] = $this->e_str($v);
						
						$d_batch[$dname][] = "($id," . $this->e_str($pnode) . ',' . implode(',',$pret) . ')';
					}else{
						$d_batch[$dname][] = "($id," . $this->e_str($pnode) . ')';
					}
					
									
					$this->add_to_cache($dname,$id,$pnode);
				}
				
				$facts[] = $id;
			}
			
			if (!$doupdate)
				$f_values[] = '(' . implode(',',$facts) . ')';
			else
				$f_values[$parsed_line['id']] = $facts;
		}
		
		// ok, do a batch insert on each dimension
		foreach ($d_batch as $dname => $v)
			if (count($v) > 0)
				if (!db_is_valid_result(db_query($this->d_sql[$dname] . implode(',',$v))))
					return show_error("Error doing batch insert on dimension '$dname'");
		
		// adjust the cache
		$this->adjust_cache();
		
		// now, batch insert the fact table
		if (!$doupdate)
			if (count($f_values))
				return db_is_valid_result(db_query($this->fact_sql . implode(',',$f_values)));
			else
				return true;
			
		// unless its an update, then we need to do more annoying work
		foreach ($f_values as $id => $values){
			$vs = array();
			
			reset($this->d_keys);
			foreach ($values as $value){
				$vs[] = current($this->d_keys) . " = $value";
				next($this->d_keys);
			}
			
			if (!db_is_valid_result(db_query($this->fact_sql . implode(',',$vs) . " WHERE id = $id")))
				return false;
		}
		
		return true;
	}
	

	
	/*
		This cache depends on the primary node value.
	*/
	function cache_id($dname, $pval){
	
		if (array_key_exists($pval,$this->cache[$dname][0])){
			$this->cache_hits[$dname] += 1;
			
			$this->cache[$dname][1][$pval] += 1;
			return $this->cache[$dname][0][$pval];
		}
		
		// statistics
		$this->cache_misses[$dname] += 1;
		
	
		$v = $this->e_str($pval);
		
		// make a query to see if the data exists
		if (db_has_rows($result = db_query("SELECT " . $this->d_keys[$dname] . " FROM " . $this->d_tables[$dname] . " WHERE " . $this->d_names[$dname] . " = $v")))
			if ($row = db_fetch_row($result)){
			
				$this->add_to_cache($dname,$row[0],$pval);
				return $row[0];	
			}
		
		return false;
	}
	
	// adds an id to the cache
	function add_to_cache($dname,$id,$pval){
	
		$this->cache[$dname][0][$pval] = intval($id);
		$this->cache[$dname][1][$pval] = 0;
	}
	
	/*
		This ensures that the cache doesnt get too big, and removes items when
		needed.
	*/
	function adjust_cache(){
	
		foreach ($this->cache as $cache){
	
			if (count($cache[0]) > $this->cache_sz){
				
				// sort cache
				asort($cache[1]);
				
				// remove the lowest items
				// TODO? Look at the rest and reduce their hit count to 'age' the non-used items?
				$i = 0;
				foreach ($cache[0] as $k => $v){
				
					// this might help prevent cache reorganizations for low-cached data
					if (count($cache[0]) > $this->cache_sz - $this->cache_sz_lim || $cache[1][$k] == 0){
						unset($cache[0][$k]);
						unset($cache[1][$k]);
					}
				}
				
				$cache[2] += 1;
				
				// 'stupid' way of doing it
				//array_shift($cache[0]);
				//array_shift($cache[1]);
			}
		}
	}
	
	// does not reset the internal cache however
	function reset_cache_stats(){
			
		foreach($this->cache_hits as $k => $v)
			$this->cache_hits[$k] = 0;
			
		foreach($this->cache_misses as $k => $v)
			$this->cache_misses[$k] = 0;
	
	}
	
	
	// helps with processing the line
	function e_str($str){
	
		return is_numeric($str) ? $str : 
			(is_bool($str) ? ($str ? 'TRUE' : 'FALSE') :
			($str == '-' ? "''" : "'" . db_escape_string($str) . "'"));
	}
	
	/*
		This reanalyzes data, when there is already preexisting	data. 
		
		TODO: Examine common elements and see if refactoring is possible.
	*/
	function reAnalyze(){
	
		global $cfg;
		
		$flush_count = $cfg['upload_flush'] > 100 ? $cfg['upload_flush'] : 100;
	
		echo ">>> Running analysis on dimensions: \n";
		$full_dimensions = array();
		
		// first, pull out completely new dimensions, and run partial analysis
		// on dimensions that already exist
		foreach ($this->dimensions as $dname => $attrs){
		
			if (array_key_exists($dname,$attrs) && !(array_key_exists('pnode_is',$attrs) && $attrs['pnode_is'] !== null)){
				$full_dimensions[] = $dname;
				continue;
			}
						
			// show how many rows to go
			$max = 0;
			if (!db_is_valid_result($result = db_query("SELECT COUNT(*) FROM " . $this->d_tables[$dname])))
				return false;
			$row = db_fetch_row($result);
			if ($row != null)
				$max = $row[0];
			
			if ($max == 0)
				continue;
				
			// if it doesn't have attributes, continue on our way
			if (!array_key_exists($dname,$this->plugins_attrs)){
				echo "\n-> Dimension '$dname' does not have attributes... skipping partial analysis\n";
				continue;
			}
			
			echo "\n-> Partial analysis on $dname: $max rows\n";
			
			// run the partial analysis 
			$u_str = "UPDATE " . $this->d_tables[$dname]. " SET ";
			
			
			$queries = 0;
			
			do{
				
				if (!db_is_valid_result(db_begin_transaction()))
					return show_error("Could not start transaction!");
			
				$result = db_query("SELECT " . $this->d_names[$dname]. " FROM " . $this->d_tables[$dname] . " LIMIT $queries,$flush_count");
				
				if (!db_is_valid_result($result))
					return false;
			
				if (!db_has_rows($result))
					break;
			
				if (!$this->preAnalysis($this->plugins_attrs[$dname]))
					return show_error("Error in preanalysis");
			
				while ($row = db_fetch_row($result)){
				
					$queries += 1;
				
					if ($max > 10000)
						show_progress("\tPartial analysis",10000);
				
					$pnode = $row[0];
					$pret = array();
							
					foreach ($this->plugins_attrs[$dname] as $plugin)
						if (($ret = $plugin->getAttributes($this->website, $dname, $pnode)) === false)
							return false;
						else
							$pret = $pret + $ret;
				
					if (count($pret) == 0){
						db_rollback_transaction();
						return show_error("Invalid result returned from plugin in dimension '$dname'",true);
					}
				
					// create the query
					foreach ($pret as $k => $v)
						$pret[$k] = db_escape_string($k) . ' = ' . $this->e_str($v);
					
					// run it now, cant do updates in batches
					if (!db_is_valid_result(db_query($u_str . implode(',',$pret) . " WHERE " . $this->d_names[$dname]. " = " . $this->e_str($pnode)))){
						db_rollback_transaction();
						return false;
					}
				}
				
				// finish it all up
				if (!$this->postAnalysis($this->plugins_attrs[$dname])){
					db_rollback_transaction();
					return show_error("Error in postanalysis.");
				}
				
				// commit the transaction here
				if (!db_is_valid_result(db_commit_transaction()))
					return show_error("Could not commit transaction!");
				
			}while(true);
		}
		
		// run full analysis on the other dimensions. This means we just totally delete the dimension, and
		// run a full set of analysis on the dimension. We cannot do this for dimensions that define pnode_is,
		// because that implies the pnode is directly derived from the logfile, and that requires the 
		// original logfile.
		
		if (count($full_dimensions) > 0){
		
			echo "\n>>> Preparing full reanalysis for selected dimensions...";
		
			if (!db_is_valid_result(db_begin_transaction()))
				return show_error("Could not start transaction!");
		
			// reinitialize this object with only full dimensions for analysis, and delete that dimension
			foreach ($full_dimensions as $dimension){
				if (!db_is_valid_result("TRUNCATE TABLE " . $this->d_tables[$dimension])){
					db_rollback_transaction();
					return show_error("Could not delete rows in dimension '$dimension'");
				}
				echo " '$dimension'...";
			}
			
			foreach ($this->dimensions as $dname => $attrs){
				if (!in_array($dname,$full_dimensions))
					unset ($this->dimensions[$dname]);	
			}
		
			echo "done.\n";
		
			if (!$this->Initialize($this->website, $this->dimensions, true))
				return show_error("Could not reinitialize the analysis object!");

			// finally commit the transaction
			if (!db_is_valid_result(db_commit_transaction()))
				return show_error("Could not commit transaction");
				
			// ok, so the next step is use a query to reconstruct the logfile
			if (!($sql = reconstruct_logfile($this->website,true)))
				return false;
			
			
			// now, we keep running the queries over and over again.. 			
			$queries = 0;
			$stage = 0;
			$done = false;
			
			do{
				echo "\n>>> Starting processing stage $stage:\n";
				
				if (!db_is_valid_result(db_begin_transaction()))
					return show_error("Could not start transaction!");
			
				if (!$this->preAnalysis($this->plugins))
					return show_error("Error in preanalysis");
			
				$next_transaction = ($stage + 1) * $flush_count;
			
				do{
					// commit every once in awhile
					if ($next_transaction < $queries)
						break;
					
					$result = db_query("$sql LIMIT $queries,$cfg[upload_lines]");
					
					if (!db_is_valid_result($result))
						return false;
				
					if (!db_has_rows($result)){
						$done = true;
						break;
					}
				
					$rows = array();
				
					while ($row = db_fetch_assoc($result)){
					
						$rows[] = $row;
					
						$queries += 1;
						show_progress("\tLines processed so far",10000);
					
						if ($next_transaction < $queries)
							break;
					}
					
					
					// analyze the line and upload it
					if ($this->Process($rows, true, true) === false){
					
						db_rollback_transaction();
						return show_error("Error processing line in file.");
					}
					
				}while(true);
				
				// finish it all up
				if (!$this->postAnalysis($this->plugins)){
					db_rollback_transaction();
					return show_error("Error in postanalysis.");
				}
				
				if ($done)
					break;
				
				// TODO: Should this be somewhere else?
				if (!$this->deleteStale()){
					db_rollback_transaction();
					return false;
				}
				
				// commit the transaction here
				if (!db_is_valid_result(db_commit_transaction()))
					return show_error("Could not commit transaction!");
				
				$stage += 1;
				
			}while(true);
		}
		
		echo "OK.\n";
		
		//print_r($this->dimensions);
		return true;
	}
	
	/*
		This runs analysis on a file.
		
		$website			Current site
		$hfile				Handle to file to analyze
		$rows				Initial lines to be inserted
		$first_read_line 	First line of the file
		$apache_log			Apache Log Parsing object
	*/
	function analyzeFile($website,$hfile,$rows,$first_read_line,$apache_log){
	
		global $cfg;
	
		// disable keys
					
			// hopefully this succeeds -- this has no effect on InnoDB tables :(
			//db_is_valid_result(db_query("ALTER TABLE " . $this->d_tables[$dname] . " DISABLE KEYS"));
		//db_is_valid_result(db_query("ALTER TABLE $this->s_fact_table DISABLE KEYS"));
	
	
		// dont set this too low
		$flush_count = $cfg['upload_flush'] > 100 ? $cfg['upload_flush'] : 100;

		echo "\n>>> Now parsing given logfile...\n";
		
		// number of stages we've been through so far
		$stage = 0;
		$linenum = 0;
		
		do{
			
			$stage += 1;
			echo "\n>>> Starting processing stage $stage:\n";
			
			// begin SQL transaction
			if (!db_is_valid_result(db_begin_transaction()))
				return show_error("Could not begin transaction.");

			// run preanalysis
			if (!$this->preAnalysis($this->plugins))
				return show_error("Error in pre-analysis.");
				
			// setup state variables
			$good_lines = 0;
			$bad_lines = 0;
				
			//$first_read_line = null;		// don't re-initialize this one
			$last_read_line = null;			// initialize this, however
			$last_read_tell = 0;

			$q = $cfg['db_queries'];
			
			
			// main loop: read each line and parse it
			while (!gzeof($hfile)){
				
				$linenum += 1;
				$tell = gztell($hfile);
				
				// seriously, if the line is bigger than this, there are some other issues
				if (($line = gzgets($hfile,65535)) === false){
					
					// if the file isn't compressed, then the last line returns a false
					if (gzeof($hfile))
						break;
						
					db_rollback_transaction();
					return show_error("Error reading line from $logfile for some reason\n");
				}
				
				$line = trim($line);
				
				// set this, doesn't matter whether its good or bad
				if ($first_read_line == null)
					$first_read_line = $line;

				if (($parsed_line = $apache_log->parse($line)) === null){
					
					if ($cfg['debug'])
						echo "Bad line found on $linenum: $line\n\n";
					$bad_lines += 1;
				}else{
				
					$good_lines += 1;
					show_progress('Lines processed so far',10000);
					
					// last line that wasn't an error.. 
					$last_read_tell = $tell;
					$last_read_line = $line;
					
					// flush to the database every once in awhile
					if ($good_lines != 0 && $good_lines % $flush_count == 0)
						break;
					
					// do analysis only every once in awhile, however
					$rows[] = $parsed_line;
						
					if (count($rows) > $cfg['upload_lines']){
					
						// analyze the line and upload it
						if ($this->Process($rows) === false){
						
							db_rollback_transaction();
							return show_error("Error parsing line in file.");
						}
						
						$rows = array();
					}
				}
			}
			
			// finish these up
			if (count($rows) > 0)
				// analyze the line and upload it
				if ($this->Process($rows) === false){
				
					db_rollback_transaction();
					return show_error("Error parsing line in file.");
				}
			
			echo $good_lines . " good lines\n";
			echo $bad_lines . " bad lines\n\n";

			if ($cfg['debug']){
			
				$h1 = array_sum($this->cache_hits);
				$m1 = array_sum($this->cache_misses);
				$x1 = $h1 + $m1 ? number_format($h1/($h1 + $m1),2) : 0;
				
				echo "Cache stats:  " . $h1 . "h " .  $m1 . "m ($x1)\n";
				
				$this->reset_cache_stats();
				
				echo ($cfg['db_queries'] - $q) . " SQL queries/inserts\n";
			}
			
			// run postanalysis
			if (!$this->postAnalysis($this->plugins)){
				db_rollback_transaction();
				return show_error("Error in postanalysis.");
			}
			
			// store position information
			if ($last_read_tell > 0){
				
				if (!set_config_var($website,'uploadlog_first_line',$first_read_line) ||
				!set_config_var($website,'uploadlog_last_line',$last_read_line) ||
				!set_config_var($website,'uploadlog_filepos',$last_read_tell)){
					db_rollback_transaction();
					return show_error("Could not store information in configuration database!");
				}
			}
			
			// TODO: Should this be somewhere else?
			if (!$this->deleteStale()){
				db_rollback_transaction();
				return false;
			}
				
			
			// initialize this here
			$rows = array();
			
			// finally commit the transaction
			if (!db_is_valid_result(db_commit_transaction()))
				return show_error("Could not commit transaction");
		
		}while(!gzeof($hfile));
	
		// re-enable keys
	
	}
	
	/*
		This function deletes any stale items from dimensions. After an analysis, 
		if the user sets $cfg['cleanup_stale'], then this function will try to automatically resolve 
		all dimension records to see if there is matching data in the main fact table, and delete the 
		dimensional data if it isn't referenced by anything else.
	*/
	function deleteStale(){
	
		global $cfg;
		
		if ($cfg['cleanup_stale'] && count($this->reject_plugins) > 0){
			echo ">>> Deleting stale dimension records...";
		
			foreach ($this->d_tables as $dname => $table){
				if (!db_is_valid_result(db_query("DELETE $table FROM $table LEFT OUTER JOIN $this->s_fact_table ON $this->s_fact_table." . $this->d_keys[$dname] . " = $table." . $this->d_keys[$dname] . " WHERE $this->s_fact_table.id IS NULL")))
					return false;
				echo ".";
			}
		
			echo "done.";
		
		}
		
		return true;
	}
}

// this is used in scripts to detect whether another upload is currently going or not
// this should be initialized and Locked() should be checked before creating an analysis object
class AnalysisLock{

	var $locked = false;
	var $website;

	function __construct($website){
		$this->website = $website;
	}
	
	// checks to see if analysis is already occuring. Returns true if it is.
	function Locked(){
		
		// check to see if table exists
		if (db_has_rows(db_query("SHOW TABLES LIKE '" . db_escape_string(str_replace('.','_',$this->website)) . "_config'"))){
			
			if (get_config_var($this->website,"uploading") == "yes")
				return show_error("An upload is already in progress! Cannot continue. If this is not the case, use unlock.php to resolve this issue.",false,true);
				
			$this->locked = set_config_var($this->website,"uploading","yes");
		
		}
		
		return false;
	}
	
	// forces an unlock
	function Unlock(){
		if (db_has_rows(db_query("SHOW TABLES LIKE '" . db_escape_string(str_replace('.','_',$this->website)) . "_config'"))){
			if (set_config_var($this->website,"uploading","no"))
				echo ">>> Lock forced open for $this->website\n";
			else
				echo ">>> Lock could not be forced open for $this->website!\n";
		}else{
			echo ">>> Lock table does not exist for $this->website\n";
		}
	}

	function __destruct(){
		if ($this->locked && db_has_rows(db_query("SHOW TABLES LIKE '" . db_escape_string(str_replace('.','_',$this->website)) . "_config'")))
			set_config_var($this->website,"uploading","no");
	}

}

?>
Return current item: Obsessive Website Statistics