<?php
/*
OpenDataBag - Data Web Interface
Copyright (C) 2004 Nawara
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
// INIT ---------------------------------------------
set_time_limit(0);
include(cfg_data_path.'/script/inc_functions.txt');
include(cfg_data_path.'/script/inc_data.txt');
include(cfg_data_path.'/script/sec_basic.txt');
$test=0;
$sleep=1;
$find=1;
$replace=0;
$check_range_days=300;
$similarity=92;
// DEDUP ---------------------------------------------
$deamonid=date('Ymd His ').rand(100,999);
set_time_limit(0);
echo(INFO.' '.$deamonid.CRLF);
echo(CRLF);
echo(' sleep='.$sleep.CRLF);
echo(' find='.$find.CRLF);
echo(' replace='.$replace.CRLF);
echo(' check range days='.$check_range_days.CRLF);
echo(' similarity='.$similarity.CRLF);
echo(CRLF.'working...');
while(check_version())
{
if($test)
$file_array[cfg_data_path.'/data/dat_chsaab212.txt']=1;
else
{
$res=opendir(cfg_data_path.'/data');
while($fl = readdir($res))
if(substr($fl,0,4)=='dat_' and $fl!='dat_.txt')
$file_array[cfg_data_path.'/data/'.$fl]=1;
closedir($res);
}
$replace_array=array();
reset($file_array);
while(list($dat_file,$tmp)=each($file_array))
{
$curr_bag=substr(basename($dat_file),4,-4);
//replacing by rules ---------------------------------------------------
if($replace)
{
$dedup_active_rules_array=search('sysGroupBy=dedup Active=1 MyBag=dedup'); //.$curr_bag
$cnt=0;
reset($dedup_active_rules_array);
while(list($tmp,$dedup_active_rule_line)=each($dedup_active_rules_array))
{
if(read($dedup_active_rule_line,'Active'))
{
$replace_array[$cnt]['sysId']=read($dedup_active_rule_line,'sysId');
$replace_array[$cnt]['search']=read($dedup_active_rule_line,'Change Filter');
$replace_array[$cnt]['from']=read($dedup_active_rule_line,'Change From');
$replace_array[$cnt]['to']=read($dedup_active_rule_line,'Change To');
$cnt++;
}
}
$fp=fopen($dat_file,'rb');
while(!feof($fp))
{
$lc_line=fgets($fp,2048);
$line=$lc_line;
$changed=0;
$changes='';
if(read($line,'sysValidTo')=='99991231' and read($line,'sysValid')=='1' and read($line,'sysValidFrom')<=date('Ymd'))
{
for($ii=0;$ii<count($replace_array);$ii++)
{
if(stristr($line,$replace_array[$ii]['search']))
if(stristr($line,$replace_array[$ii]['from']))
if(stristr($line,'sysGroupBy=dedup')==0)
{
$line1=str_replace($replace_array[$ii]['from'],$replace_array[$ii]['to'],$line);
if($line1!=$line)
{
$line=$line1;
$changes.=' '.$replace_array[$ii]['sysId'];
$changed=1;
}
}
}
if($changed)
{
$variable_array=line2array($line);
$variable_array['sysComment']='Dedup fix:'.$changes;
$variable_array['sysModInfo']=date('Ymd H:i').' Dedup Deamon';
save($variable_array);
//echo CRLF.' REPLACED '.read($line,'sysId').' '.$changes;
echo CRLF.CRLF.' FIX bag='.$curr_bag.' rule='.read($line,'sysId').' changes='.$changes;
if($sleep) sleep(1);
}
if(rand(0,200)<1)
if($sleep) sleep(1);
}
//echo '/'; ob_flush();
}
fclose($fp);
}
//replacing by rules ---------------------------------------------------
//echo CRLF.'BAG:'.$curr_bag;
reset($cfg_dedup_array);
while(list($tmp,$config_line)=each($cfg_dedup_array))
{
$pattern_variable_array=array();
$variable_names_array=explode(',',$config_line);
reset($variable_names_array);
while(list($tmp,$variable_name)=each($variable_names_array))
if(strlen(trim($variable_name)))
$pattern_variable_array[trim($variable_name)]='';
//echo CRLF.' RULE:'.$config_line;
//finding rules ---------------------------------------------------
if($find)
{
$fp=fopen($dat_file,'rb');
while(!feof($fp))
{
$lc_line=fgets($fp,2048);
$line=$lc_line;
if(read($line,'sysValidTo')=='99991231' and read($line,'sysValid')=='1' and read($line,'sysValidFrom')<=date('Ymd') and read($line,'sysValidFrom')>date('Ymd',mktime(0,0,0,date('m'),date('d')-$check_range_days,date('Y'))))
{
$result_array=array();
$variable_array=array();
reset($pattern_variable_array);
while(list($variable_name,$variable_value)=each($pattern_variable_array))
{
$variable_array[$variable_name]=read($line,$variable_name);
if(strlen($variable_array[$variable_name])==0)
{
$variable_array=array();
break;
}
}
if(count($variable_array)==count($pattern_variable_array))
{
$result_array=find_similar($variable_array,60,$similarity,1,$curr_bag,$sleep,read($line,'sysId'));
//echo '.'; ob_flush();
}
if(count($result_array))
{
if($sleep) sleep(2);
reset($result_array);
while(list($replacement,$sim)=each($result_array))
{
$replacement_array=explode(' ',$replacement);
$replacement_left='';
$replacement_right='';
$filter='';
for($ii=0;$ii<count($replacement_array);$ii+=2)
{
$pattern=$replacement_array[$ii];
$word=$replacement_array[$ii+1];
if($pattern==$word)
$filter.=$pattern.' ';
else
{
$replacement_left.=$pattern.' ';
$replacement_right.=$word.' ';
}
}
$replacement_variable_array=array();
$replacement_variable_array['sysGroupBy']='dedup';
$replacement_variable_array['sysId']='';
$replacement_variable_array['Active']='0';
$replacement_variable_array['Rule Type']=$config_line;
$replacement_variable_array['Change Filter']=trim($filter);
$replacement_variable_array['Change From']=trim($replacement_left);
$replacement_variable_array['Change To']=trim($replacement_right);
$replacement_variable_array['Similarity']=$sim.'%';
$replacement_variable_array['MyBag']='dedup';//substr(basename($dat_file),4,-4);
//~ echo CRLF.'-----------------------'.CRLF; ob_flush();
//~ print_r($variable_array);
//~ print_r($result_array);
//~ echo CRLF.'-----------------------'.CRLF; ob_flush();
if(0+trim($replacement_left)==0 and 0+trim($replacement_right)==0 and count(search('dedup '.$filter.' '.$replacement_left.' '.$replacement_right.' MyBag=dedup '))==0)
{
save($replacement_variable_array);
//echo CRLF.' FOUND '.$sim.'% '.$filter.' | '.$replacement_left.' '.$replacement_right.' ';
echo CRLF.CRLF.' NEW RULE bag='.$curr_bag.' rule=('.$config_line.')'.' sim='.$sim.'% filter='.$filter.' replace=('.$replacement_left.' '.$replacement_right.')';
}
}
if($sleep) sleep(1);
}
}
ob_flush();
}
fclose($fp);
}
//finding rules ---------------------------------------------------
}//end foreach config_line
if($sleep) sleep(5);
}//end foreach file
}
?>