Location: PHPKode > scripts > csv2mysql database migration tool > csv2mysql-database-migration-tool/csv2mysql.inc
<?php

 
///////////////////////////////////////////////////////////////////////////
// Class to inport csv in alien formats into any datastructure
// written by mark williamson hide@address.com
// Date: 3/7/02 
///////////////////////////////////////////////////////////////////////////

$incPath="./";
//needed for some error messaging with certain debug settings 


//needed for some outputting and some debug functions. 
include_once("$incPath/htmltablecreation.inc");


class Import{

	var $csv_fields=array(); //fields in csv to care about...
	var $csv_file; //file to open
	var $csv_data; //data in file
	var $csv_array = array(); //array of data in file
	var $csv_all_fields = array(); //all the field names in the csv file
	var $csv_data_all = array(); //all the data
	var $csv_data_keyed = array(); //all the data
	var $tmp_array=array();
	var $mysql_array=array();
	var $table; //table to insert into
	var $mysql_link; //database
	var $instructions=array(); //sql and default settings
	var $instruct_functions = array(); //function calls
	var $debug=0; // show debug information - step by step messages
	var $return_debug=0; // return debug information instead of echoing it
	var	$vals_per_insert=100; //number of values to add per insert statement of SQL
	var $format="linux";
	var $linebreak_char="\n";
	var $delimit_char = ",";
	var $use_external_csv_header_file = 0;
	var $external_headers="";
	
	function Import(){
		echo "\n";
		return(TRUE);
	}

	var $resetProgress=0;
	function reset_class_state(){
		$this->dmsg("Resetting class state", 2);
		$this->csv_data_all = array();
		$this->csv_fields = array();
		$this->csv_data_keyed = array();
		$this->tmp_array = array();
		$this->mysql_array = array();
		$this->instructions = array();
		$this->instruct_functions = array();
		$this->error_messages = "";
		$this->output_sql = "";
		$this->resetProgress=1;
		$this->dmsg("Ready for new table and data definitions", 2);
	}

	function set_delimit_char($ch){
		$this->delimit_char = $ch;
		$this->dmsg("Set delimiter character to $ch", 2);
	}

	function debug($d=0,$r=0){
		$this->debug=$d;
		$this->return_debug=$r;
	}

	var $y=1;
	var $error_messages="";
	var $debug_level = 1;
	function set_debug_level($dl=1){
		$this->debug_level = $dl;
	}

	function dmsg($string, $dlvl=1){
		global $PHP_SELF;
		//debug level - 1-4 //1 is status messages only - 4 is full debugging
		if($dlvl <= $this->debug_level){
			if($this->debug){
				if(!$this->return_debug){
					if($PHP_SELF){
						echo "&bull; ".$string."<BR>\n";
					}else{
						echo "\n# $string ";
					}
				}else{
					$this->error_messages .="# $string <BR>\n";	
				}
				$this->y++;
			}
		}
	}

	var $terminal_width=100;
	var $queries_per_dot = 20;

	function progress_guess(){
		$calculations_to_perform = `wc -l $this->csv_file`;
		$funcs_per_row = count($this->instruct_functions);
		$total_function_calls = $calculations_to_perform * $funcs_per_row;
		$this->queries_per_dot = round($total_function_calls / 100);
		$this->dmsg("functions: $total_function_calls function calls to be made - dots at $this->queries_per_dot calls per dot", 3);
	}

	function set_terminal_width($w){
		$this->terminal_width=$w;
	}

	function set_resetProgress($on=1){
		$this->resetProgress=1;
	}
	function show_progress_meter(){
		static $count=0;
		static $first=1;	
		static $ticker=0;

		//reset class state triggers this action
		if($this->resetProgress){
			$count=0;
			$first=1;
			$ticker=0;
			$this->resetProgress=0;
		}

		if($this->debug){
			if($count == 0 && $first){
				echo "\n#";
				$first=0;
			}

			if($count > $this->terminal_width ){
				echo "\n#";
				$count=0;
			}
						//queries per dot
			if($ticker == $this->queries_per_dot){
				echo ".";
				$ticker=0;
				$count++;
			}else{
				$ticker++;
			}
		}
	}

	function blank_error_messages(){
		$this->error_messages = "";
	}

	function get_error_messages(){
		return($this->error_messages);
	}

	function add_field_alias($csv_field, $mysql_field){
		//add element to array
		$this->csv_fields[$csv_field] = $mysql_field;
		if(ereg("^TMP__", $mysql_field)){
			$this->dmsg("added temporary field alias $csv_field --> $mysql_field", 2);
		}else{	
			$this->dmsg("added field alias $csv_field --> $mysql_field", 2);
		}
	}

	//only call this after calling read_csv_file - otherwise array will be missing.
	//ALSO - ONLY CALL AFTER create_csv_data_array() has been called for any initial add_field_alias values. 
	//otherwise these fields are missing from the end array but no warnings are dipsplayed!! :( 
	function duplicate_field($csv_field, $mysql_field){
		
		if(!is_array($this->mysql_array)){
			echo "Run read_csv_file(\$file) first to create necessary arrays"; die();
		}

		reset($this->mysql_array);
		while(list($key, $this->field) = each($this->mysql_array)){
			$this->mysql_array[$key][$mysql_field] = $this->csv_data_all[$key][$csv_field];
		}
		reset($this->mysql_array);
		$this->dmsg("duplicated field $csv_field --> $mysql_field", 1);
	}

	function use_external_headers($headerfile){
				
			$this->dmsg("Reading in external csv headers from $headerfile.", 2);
			if(file_exists($headerfile)){
				$fd = fopen($headerfile, "r");
				$this->external_headers = fread($fd, filesize($headerfile));
				$this->external_headers = trim($this->external_headers);
				fclose($fd);
		 		$this->use_external_csv_header_file = 1;
			}else{
				$this->dmsg("Error - csv header file $headerfile does not exist!" );
				die();
			}

			$this->dmsg("Got headers:\n $this->external_headers", 2);
	}

	//clean file will replace commas quotes and apostrophies with safe alternatives - 
	//create_sql knows about the aliases and flips them back in the final sql output
	function clean_file($text){
		$rows = explode($this->linebreak_char,$text);
		$return = "";
		while(list($key,$val)=each($rows)){
		//replace commas apostrophies and quotes.... these are flipped back to propper values in the create_sql function	
			$val = preg_replace("/\,\"(.*)([\,])(.*)\"\,/", ',"\\1&comma;\\3",',  $val, -1);
			$val = str_replace("\"\"", "&quote;", $val);
			$val = str_replace("'", "&appos;", $val);
			$return .= $val."\n";
		}
		return($return);
	}

	var $pages = array();
	function find_xls_pages($file){
		$command = " xlhtml -dp -asc $file";
		$xls_detail = `$command`;
		
		$this->dmsg($xls_detail);

	}

	//function can read xls files and create the necessary arrays from that (by using xlhtml application to craete a csv in /tmp)
	function read_xls_file($file){
		//use xlhtml to convert...
		$this->dmsg("checking if xlhtml is installed...", 3);
		$xlhtml_check = `which xlhtml 2>/dev/null`;
		if(strlen($xlhtml_check)==0){
			$this->dmsg("You must install xlhtml to be able to import xls files directly into csv2mysql", 1);
			die("\n# Script Ended\n");
		}
		$this->dmsg("$xlhtml_check");
		$tmpfilename="/tmp/TMPCSV2MYSQL.CSV";

		$this->find_xls_pages($file);

		$command = "$xlhtml_check -csv -xp:0 $file > $tmpfilename";
		$this->dmsg("Running command $command");
		exec($command);
		$this->read_big_csv_file($tmpfilename);
	}

	var $fd;
	function read_csv_file($file){
		//call better function - reads in file line by line
		$this->read_big_csv_file($file);
	}

	var $buffer;
	var $csvfields;
	var $csv_all_possible_fields = array();
	var $csvFields = array();
	function  read_big_csv_file($file){
		$this->dmsg("Reading data from csv file: $file", 1);
		$this->csv_file = $file;
		
		if(!$this->fd=fopen($this->csv_file, "r")){
			echo "Unable to open csv file\n\n"; die();
		}

		/*
		$x=0;
		while(!feof($this->fd)){
			$this->buffer = fgets($this->fd, 4096);
			$this->csv_data .= $this->buffer;	
			$x++;	
		}
		*/

		//creates an array called data....
		while($data = fgetcsv($this->fd, 1000, $this->delimit_char)){
			$num = count($data);
			$count=1;
			foreach($data as $cell){
				$cell = str_replace("\"", "&quote;", $cell);		
				$cell = str_replace("'", "&appos;", $cell);		
				$cell = str_replace(",", "&comma;", $cell);
				$this->csv_data .= $cell;
				if($count < $num){	
					$this->csv_data .= $this->delimit_char;
					$count++;
				}
			}
			$this->csv_data .= $this->linebreak_char;
		}

		//die($this->csv_data);
		//clean file will replace commas quotes and apostrophies with safe alternatives - 
		//create_sql knows about the aliases and flips them back in the final sql output
		//$this->csv_data = $this->clean_file($this->csv_data);

		//if we are using an external header file then add the headers to the top of the read in data for simplicities sake...
		if($this->use_external_csv_header_file){
			$this->csv_data = $this->external_headers."\n".$this->csv_data;	
		}
				/* create array of the field names */
	
	
		$this->csvfields = explode($this->linebreak_char, $this->csv_data);
		$this->csv_all_possible_fields = $this->csvfields[0];
			//explode these on commas
		$this->csvfields = explode($this->delimit_char, $this->csv_all_possible_fields);
		foreach($this->csvfields as $field){
			$this->csvFields[] = addslashes($field);	
		}
		
			

		$this->dmsg("Finished reading data from csv file", 2);
	}

	var $tmp;
	function set_format($format){
		$this->format = $format;
		if($format == "win"){
			$this->linebreak_char = "\r\n";
			$this->tmp = "\\r\\n";
		}else{
			$this->linebreak_char = "\n";
			$this->tmp = "\\n";
		}
		//$this->dmsg("Line break format set to ".addslashes($this->linebreak_char));
		$this->dmsg("Line break format set to $this->tmp", 2);
	}

	var $lines;	
	var $stripQuotes = 1;
	function setStripQuotes($on=1){
		$this->stripQuotes = $on;
	}

	//function will create csv_data_all array - keyed on csv field names holding values.
	function create_csv_data_array(){


		if(empty($this->csv_data)){
			echo "No data in csv file $this->csv_file\n"; die();
		}

		$this->lines = array();
		$this->csv_data_all = array();
	
		$this->dmsg("Started to create data array", 1);
		$this->lines = explode($this->linebreak_char, $this->csv_data);
		foreach($this->lines as $line){
			if(!empty($line)){
				$values = explode($this->delimit_char, $line);

				//strip the " tags wrapping the strings...
				foreach($values as $k=>$v){
					if($this->stripQuotes){
						$len = strlen($v)-1;
						//are the first and last characters quotes? 
						$v = stripslashes($v);
						if(ereg("^\".+\"$", $v)){ 
							//if so replace them with spaces and then trim them off
							//$v[0] = " ";
							//$v[$len] = " ";
							//$v = trim($v);	
							//$v = str_replace("\"", "", $v);

						}	

					}
					$v = str_replace("\"", "", $v);
					//$values[$k] = addslashes($v);
					$values[$k] = $v;
				}
				$this->csv_array[] = $values;
			}
		}
		$this->csv_all_fields = $this->csv_array[0];

		//skip line 1 as this is the csv definition then run through the array - 
		//create a new array keyed on the field names of the csv and holding the values 
		$count=0; 
		reset($this->csv_array);
		while(list($foo,$el)=each($this->csv_array)){
			if($count>0){
				foreach($el as $key=>$val){
					$this->csv_data_keyed[$this->csv_all_fields[$key]]=$val;		
				}
				$this->csv_data_all[] = $this->csv_data_keyed;
			}
			$count++;
		}
		reset($this->csv_array);

		$this->dmsg("finished creating initial data array", 2);
		$this->convert_array_to_mysql_keys();
	}

	function convert_array_to_mysql_keys(){

		reset($this->csv_data_all);
		$this->dmsg("creating keyed data array", 2);
		//loop through all specified fields - create new array keyed on the specified aliases.
		reset($this->csv_data_all);

		while(list($foo,$data)=each($this->csv_data_all)){

			foreach($this->csv_fields as $key=>$field){
				$this->tmp_array[$field] = $data[$key];
			}
			$this->mysql_array[] = $this->tmp_array;

		}
		reset($this->csv_data_all);	
	}


	var $sql;
	var $result;	
	var $tbl_structure;
	var $mysqlFields = array();
	//specify database table to insert into.....
	function specify_table($table){
		$this->table = $table;	
		$this->dmsg("looking at database table $table", 2);
		$this->sql = "desc  $this->table";

		$this->result = mysql_query($this->sql, $this->mysql_link) or die(mysql_error());
		while($rows = mysql_fetch_array($this->result, MYSQL_ASSOC)){
			$this->tbl_structure[] = $rows;	
			$this->mysqlFields[] = $rows['Field'];
		}
		$this->dmsg("finished looking at table $table", 2);
	}

	//adds a static alias - if you always want field "foobar" to say "hello" then add_static_alias("foobar", "hello"); will do this for every row
	function add_static_alias($field, $value){
		$this->add_db_instruction($field, $value);
	}

	//add an sql instruction for a particular field
	function add_db_instruction($field, $sql, $key=""){
		$instruct['field'] = $field;
		$instruct['sql'] = $sql;
		$instruct['select_key'] = $key;
		$this->instructions[]=$instruct;
		$this->dmsg("added db instruction or static alias for $field", 2);
		$this->set_resetProgress();
	}


	function add_function_instruction($field, $functionname, $args){
		$instruct['field'] = $field;
		$instruct['function_name'] = $functionname;
		$instruct['args'] = $args;
		$this->instruct_functions[] = $instruct;
	
		$this->dmsg("added function instruction for $field", 2);
		$this->set_resetProgress();
	}

	//execute function instructions
	var $call;
	function get_function_instruction_values(){
		$this->progress_guess();
		$this->dmsg("calculating values for function instructions", 2);
		reset($this->mysql_array);
		while(list($key,$element)=each($this->mysql_array)){
			
			foreach($this->instruct_functions as $function){

				$this->call = "\$function_result = ".$function['function_name'];

				//split the arguments up to calculate values...
				$args = explode(",", $function['args']);
				$arg_call = "";

				$num_args = count($args)-1;
				$count=0;

				//get the arg values
				foreach($args as $arg){
					$arg = trim($arg);
					$arg_call.="\"".$element[$arg]."\"";
					if($count<$num_args){
						$arg_call .= ",";
					}
					$count++;
				}
		
				//add arg values to end of function
				$this->call.="(".$arg_call.");";

				//eval the created function call - variable function_result populated with return value
				//die($this->call);
				$this->dmsg("Run function instruction : $this->call", 4);
				$this->show_progress_meter();
				eval($this->call);

				//add the value to the mysql_array 
				$this->mysql_array[$key][$function['field']] = $function_result;
			}	
		}
		reset($this->mysql_array);
	}

	//execute db instructions 
	var $product_id;
	function get_instruction_values(){
	
		$this->dmsg("calculating values for db instructions", 2);
		
		reset($this->mysql_array);
		while(list($key,$element)=each($this->mysql_array)){

				foreach($this->instructions as $ins){
					if(eregi("SELECT", $ins['sql'])){
							$ins['sql'] = stripslashes($ins['sql']);
							$this->select_key = $ins['select_key'] ;
						
							//if we havent yet got a value for this...
							if(!isset($this->mysql_array[$key][$ins['field']])){
								$this->mysql_array[$key][$ins['field']] = $this->run_sql_query($ins['sql'], $element[$this->select_key]);
							}
					}else{
						$this->mysql_array[$key][$ins['field']] = $ins['sql'];
					}
				}
			$this->show_progress_meter();
		}
		reset($this->mysql_array);
	}

	var $runsql;
	var $dbrows;
	var $dbkey;
	//not what you'd think function -- this is for evaling code - for the instructions. 
	//TODO rename function to a more logical name reflecting what it does better.
	function run_sql_query($sql, $dbkey){

		$this->runsql = stripslashes($sql);
		$this->dbkey = $dbkey;
	
		//eval the code into this->runsql - 
		//this makes the dbkey get populated with a value instead of being a string	

		eval("\$this->runsql = \"$this->runsql\";");
		$this->dmsg("running sql query..$this->runsql", 4);

		//run query and return result
		$this->result = mysql_query($this->runsql, $this->mysql_link) or die(mysql_error());

		while($rows = mysql_fetch_array($this->result, MYSQL_ASSOC)){
			$this->dbrows = $rows['result'];	
			$this->dmsg("$this->runsql returns: $this->dbrows", 4);
		}

			$this->dmsg("$this->runsql returns: $this->dbrows", 4);
		
		return($this->dbrows);
	}

	//connect to the database
	function db_connect($host, $user, $pass, $db){
		$this->dmsg("connecting to database", 2);
		$this->mysql_link = mysql_pconnect("$host", "$user", "$pass") or die(mysql_error());
		if($this->mysql_link){
			mysql_select_db($db);
		}		
	}

	var $specified_fields=array();
	var $sql_error=0;
	var $error_fields = array();

	//check that all the fields required for the table are present and correct....
	function validate_sql(){
		$this->dmsg("validating array elements to generate sql from", 1);

		reset($this->mysql_array);
		//step through elements in array, foo is an unused variable.
		while(list($foo,$this->element)=each($this->mysql_array)){
			
			//dont mind a foreach here- tiny array to play with...
			foreach($this->element as $this->field=>$val){
				//make an array of the field names that have been specified...
				if(!in_array($this->field, $this->specified_fields)){
					$this->specified_fields[] = $this->field;
				}
			}

		}
		reset($this->mysql_array);

		//upon errors this will be true - causing die below to spring to life...
		$this->sql_error=0;	

		//loop through the tables fields - create array
		foreach	($this->tbl_structure as $this->field){

			//if the field is not in the array of specified fields and it has no default value 
			// and the field in question is not a primary key- moan
			if(!in_array($this->field['Field'], $this->specified_fields) 
					&& ($this->field['Default']=="") 
					&& ($this->field['Key'] != 'PRI') 
					&& ($this->field['Extra'] != 'auto_increment')
					&& ($this->field['NULL'] != "YES" ) ){

				$this->dmsg( "{$this->field['Field']} has no default and you have not specified a value for this field.", 1);
				$this->sql_error=1;	
				$this->error_fields[] = $this->field['Field'];

			}else{
				$this->dmsg( "{$this->field['Field']} has been verified.", 4);
			}
		}

		#IF THIS VAR IS SET THEN SOMETHING ERRORED - REPORT THIS WITH A MESSAGE.
		if($this->sql_error){
			echo $this->error_messages;
			$this->csv_data_all = array();

			if(function_exists("render_page_head")){
				echo render_page_head();
			}
			$erst="";
			foreach($this->error_fields as $f){
				$erst.="Field ".$f." has no default <BR>";
			}
			die("\n$erst Please specify values for the missing fields please add definitions for these fields.");
		}

	}
	
	var $element;
	var $output_sql;
	var $insert_into;
	var $values;
	var $all_values = array();
	var	$insert_line;

	function create_sql($show=0){
		//check all array elements are present and correct...

		$this->validate_sql();

		$this->dmsg("creating sql", 1);


#TODO: THIS IS NOT VERY CLEVER MEMORY MANAGEMENT - IT WOULD BE BETTER NOT TO CREATE
# EACH VALUE LINE AND SAVE IN AN ARRAY - IF THERE ARE 10,000 RECORDS THIS WILL KILL
# PHP AND THE LINUXBOX - LOCKUP COMPLETELY ALT+SYSREQ+S+U+B TO RECOVER MACHINE!!!! 
# OUTPUTTING EACH LINE AS IT WAS CREATED WOULD BE MUCH LESS PROCESSOR INTENSIVE. 
	
		$this->values = "";
		//loop through each row to be entered
		reset($this->mysql_array);
		$this->all_values = array();
		while(list($foobar,$this->element)=each($this->mysql_array)){

			//loop through each field name and value
			$num=count($this->element)-1;
			$x=0;
				//blank data
				$this->insert_into = "";
				$this->values = "(";

			foreach($this->element as $field=>$val){
				//do not include temporary fields - these can be used for the function fields
				if(!ereg("^TMP__", $field )){
					$val = str_replace("&quote;", "\"", $val);
					$val = str_replace("&comma;", ",", $val);
					$val = str_replace("&appos;", "\'", $val);
						$this->insert_into .= $field." ";
						$this->values .= "'$val' ";
						if($x<$num){	
							//capture field names
							$this->insert_into.=",";
							$this->values .= ", ";
						}
				}
				$x++;	
			}	
			$this->values .= ")";
			$this->all_values[] = $this->values;

	
		}
	
		//create insert line of the sql 
		$this->insert_line = "REPLACE INTO $this->table ( $this->insert_into ) VALUES\n ";
		$this->output_sql = $this->insert_line;

		$num=count($this->all_values)-1;
		$x=0;
		$i=1;

		//loop through the list of VALUES "(bla,bla,bla)" in array adding each to sql
		//when vals_per_insert is reached close sql statement and add another insert line

		reset($this->all_values);

		while(list($foo,$v)=each($this->all_values)){	

			//only add specified number of values per insert statement (vals_per_insert)
			if($i== $this->vals_per_insert){
				$this->output_sql .= $v.";\n\n".$this->insert_line;
				$i=1;	
			}	

			else{	
				$this->output_sql .= " $v ";

				if($x<$num){
					$this->output_sql .= ",\n";
				}
			}
			$x++;
			$i++;
		}
		reset($this->all_values);
		reset($this->mysql_array);

		//finish the sql statement
		$this->output_sql .= ";";
		if($show){
			echo "\n##########################################################\n";
			echo "# SQL Created by Csv2MySQL.inc \n";
			echo "# CSV LeGaCy ImPoRtAtIoN ToOl \n";
			echo "# ".date("D d-m-Y H:i:s", time())."\n";
			echo "# CSV File: $this->csv_file \n";
			echo "##########################################################\n";
			echo "\n\n".$this->output_sql."\n";
		}
		else{
			return($this->output_sql);
		}
	}

	var $keys;
	//just a little debugging function 
	function show_definitions($tbl=1, $return=0){
		if(!$tbl){
			ds($this->mysql_array);
	
			#BELOW ARE JUST SOME MORE OPTIONAL DUMPS FOR DEBUGGING
			//ds($this->csv_fields);
			//ds($this->csv_data_all);
			//ds($this->tbl_structure);
			//ds($this->instructions);
			//echo nl2br($this->output_sql);
		}else{
			if(!$return){
				#DEBUG JARGON - "FOR THOSE THAT KNOW" ;)
					echo "<pre>";
					$d=new display($this->csv_data_all);
					$d->make_layout(1);
					$d=new display($this->mysql_array);
					$d->make_layout(1);

					#OPTIONAL DEBUG DUMPS
					//$d=new display($this->instructions);
					//$d->make_layout(1);
					//$d=new display($this->instruct_functions);
					//$d->make_layout(1);
					//$d=new display($this->tbl_structure);
					//$d->make_layout(1);
					//echo nl2br($this->output_sql);
					echo "</pre>";

			}else{
				#RETURN DATA TO FUNCTION CALLER...	
				//get the array keys	
				$keys = array_keys($this->mysql_array[0]);
				$this->keys = $keys;	
				//go through keys - grab 5 rows from big array - this is to display
				//to the user what they are doing while adding functions etc. - without
				//taking up potentially 1000's of lines...
				foreach($keys as $key){
					$x=0;
					while($x<5){
						$sample[$x][$key] = $this->mysql_array[$x][$key];
						$x++;
					}
				}
				$d=new display($sample);
				$string = $d->make_layout(0);
				return("<pre>".$string."</pre>");
			}
		}
	}

	function return_csv_fields(){
		return($this->csvFields);
	}
	
	function return_mysql_fields(){
		return($this->mysqlFields);
	}

	function return_mysql_key_fields(){
		//return(array_keys($this->mysql_array[0]));
		return($this->keys);
	}
}



?>
Return current item: csv2mysql database migration tool