Location: PHPKode > projects > ObsceneClean > ObsceneCleanLib.php
<?PHP
/*
    ObsceneClean - a profanity filter. Copyright (C) 2009 Scott L. Moore

    This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version.

    This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details.

    You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 
	Contact: hide@address.com 
	*/
/* ========================================================================================================== */
// Again, no var passed by ref. is altered in the called function!
/* ========================================================================================================== */
function my_decrypt($string,$key) {
   $key = md5($key); //to improve variance
  /* Open module, and create IV */
  $td = mcrypt_module_open('des', '','cfb', '');
  $key = substr($key, 0, mcrypt_enc_get_key_size($td)); 
  $iv_size = mcrypt_enc_get_iv_size($td);
  $iv = substr($string,0,$iv_size);
  $string = substr($string,$iv_size);
  /* Initialize encryption handle */
   if (mcrypt_generic_init($td, $key, $iv) != -1) 
   {
      /* Encrypt data */
      $c_t = mdecrypt_generic($td, $string);
      mcrypt_generic_deinit($td);
      mcrypt_module_close($td);
       return $c_t;
   } 
} 
/* ========================================================================================================== */
function my_encrypt($string,$key) {
   srand((double) microtime() * 1000000); //for sake of MCRYPT_RAND
   $key = md5($key); //to improve variance
  /* Open module, and create IV */
  $td = mcrypt_module_open('des', '','cfb', '');
  $key = substr($key, 0, mcrypt_enc_get_key_size($td));
  $iv_size = mcrypt_enc_get_iv_size($td);
  $iv = mcrypt_create_iv($iv_size, MCRYPT_RAND);
  /* Initialize encryption handle */
   if (mcrypt_generic_init($td, $key, $iv) != -1) {

      /* Encrypt data */
      $c_t = mcrypt_generic($td, $string);
      mcrypt_generic_deinit($td);
      mcrypt_module_close($td);
       $c_t = $iv.$c_t;
       return $c_t;
   } //end if
}

/* ========================================================================================================== */
function GetMasterSafeList()
  {
	include_once "ObsceneClean.settings.php";
	/*-------------------------------------------------- Define data files location ------------------------------------ */
	$TheFileToOpen = 'MasterSafeList.dat';
	if ($DataDir == "")
	{	
	$app_root = dirname(__FILE__);
	$FTfile = $app_root . '/' . 'dat' . '/' . $TheFileToOpen;  
	}
	else
	{	
	$FTfile = $DataDir . '/' . $TheFileToOpen;  
	}
	putenv("ALPHADATA9=TRUE");
	$thefile = fopen($FTfile, 'r'); 	
	if ($thefile) 
	{ 
	$FileContents = fread($thefile, filesize($FTfile)); 
	} 
	fclose($thefile);
	putenv("ALPHADATA9="); 
	/* -----------------------------    setup encryption parms, modules then decrypt all ----------------------------------- */
	$key = 'hfhkjkHU7765x21two47';
	$AllSafeWords = my_decrypt($FileContents, $key);
	/* -----------------------------    put em in an array  -------------------------------------------------------------------------- */
	$MasterSafeList = explode("\n", $AllSafeWords); 
	$array_elements = (count($MasterSafeList) - 1);
	for ($i=0; $i<=$array_elements; $i++)		// This loop one per bad word
		{												// forward slash good to enclose pattern but escape all forward slashs IN pattern!
		$MasterSafeList[$i] = '/' . rtrim($MasterSafeList[$i]) . '/ix'; 
		}		
	return $MasterSafeList;
}

/* ========================================================================================================== */
function GetLocalSafeList()
  {
	include "ObsceneClean.settings.php";
	/*-------------------------------------------------- Define data files location ------------------------------------ */
	$TheFileToOpen = 'LocalSafeList.dat';
	if ($DataDir == "")
	{	
	$app_root = dirname(__FILE__);
	$FTfile = $app_root . '/' . 'dat' . '/' . $TheFileToOpen;  
	}
	else
	{	
	$FTfile = $DataDir . '/' . $TheFileToOpen;  
	}
	putenv("ALPHADATA9=TRUE");
	$thefile = fopen($FTfile, 'r'); 	
	if ($thefile) 
	{ 
	$FileContents = fread($thefile, filesize($FTfile)); 
	} 
	fclose($thefile);
	putenv("ALPHADATA9="); 
	/* -----------------------------    setup encryption parms, modules then decrypt all ----------------------------------- */
	$key = 'hfhkjkHU7765x21two47';
	$AllSafeWords = my_decrypt($FileContents, $key);
	/* -----------------------------    put em in an array  -------------------------------------------------------------------------- */
	$LocalSafeList = explode("\n", $AllSafeWords); 
	$WhitespacesCount = count($WhitespaceArray);
	for ($x=0; $x<$WhitespacesCount; $x++)		
		{ $ReplaceArray[$x] = "\s{0,3}"; }
	echo '<br> here they=';
	var_dump($WhitespaceArray);
	echo '<br> here 2=';
	var_dump($ReplaceArray);
	$array_elements = count($LocalSafeList);
	for ($i=0; $i<$array_elements; $i++)		
		{						 
		$LocalSafeList[$i] = '/' . preg_quote(rtrim($LocalSafeList[$i])) . '/i';
		echo '<br> prgged $LocalSafeList[$i]=' . $LocalSafeList[$i];
		$LocalSafeList[$i] = str_replace($WhitespaceArray,$ReplaceArray,$LocalSafeList[$i]);
		echo '<br> spaced $LocalSafeList[$i]=' . $LocalSafeList[$i];
		}			
	return $LocalSafeList;
}

/* ========================================================================================================== */
function GetMasterOEList()
  {
	include "ObsceneClean.settings.php";
	/*-------------------------------------------------- Define data files location ------------------------------------ */
	$TheFileToOpen = 'MasterOEs.dat';
	if ($DataDir == "")
	{	
	$app_root = dirname(__FILE__);
	$FTfile = $app_root . '/' . 'dat' . '/' . $TheFileToOpen;  
	}
	else
	{	
	$FTfile = $DataDir . '/' . $TheFileToOpen;  
	}
	putenv("ALPHADATA9=TRUE");
	$thefile = fopen($FTfile, 'r'); 	
	if ($thefile) 
	{ 
	$FileContents = fread($thefile, filesize($FTfile)); 
	} 
	fclose($thefile);
	putenv("ALPHADATA9="); 
	/* -----------------------------    setup encryption parms, modules then decrypt all ----------------------------------- */
	$key = 'hfhkjkHU7765x21two47';
	$AllOEs = my_decrypt($FileContents, $key);
	/* -----------------------------    put em in an array  -------------------------------------------------------------------------- */
	$MasterOEList = explode("\n", $AllOEs); 
	$array_elements = count($MasterOEList);
	for ($i=0; $i<$array_elements; $i++)		// This loop one per bad word
		{												// forward slash good to enclose pattern but escape all forward slashs IN pattern!
		$MasterOEList[$i] = '/' . rtrim($MasterOEList[$i]) . '/ix'; 
		}		
	return $MasterOEList;
}
/* ========================================================================================================== */
function GetShakespeareSafeList()
  {
	include "ObsceneClean.settings.php";
	/*-------------------------------------------------- Define data files location ------------------------------------ */
	$TheFileToOpen = 'KingJamesBibleSafeList.dat';
	if ($DataDir == "")
	{	
	$app_root = dirname(__FILE__);
	$FTfile = $app_root . '/' . 'dat' . '/' . $TheFileToOpen;  
	}
	else
	{	
	$FTfile = $DataDir . '/' . $TheFileToOpen;  
	}
	putenv("ALPHADATA9=TRUE");
	$thefile = fopen($FTfile, 'r'); 	
	if ($thefile) 
	{ 
	$FileContents = fread($thefile, filesize($FTfile)); 
	} 
	fclose($thefile);
	putenv("ALPHADATA9="); 
	/* -----------------------------    setup encryption parms, modules then decrypt all ----------------------------------- */
	$key = 'hfhkjkHU7765x21two47';
	$AllSafeWords = my_decrypt($FileContents, $key);
	/* -----------------------------    put em in an array  -------------------------------------------------------------------------- */
	$ShakespeareSafeList = explode("\n", $AllSafeWords); 
	$WhitespacesCount = count($WhitespaceArray);
	for ($x=0; $x<$WhitespacesCount; $x++)		
		{ $ReplaceArray[$x] = "\s{0,3}"; }
	echo '<br> here they=';
	var_dump($WhitespaceArray);
	echo '<br> here 2=';
	var_dump($ReplaceArray);
	$array_elements = (count($ShakespeareSafeList) - 1);	
	for ($i=0; $i<=$array_elements; $i++)		
		{				
		$ShakespeareSafeList[$i] = '/' . preg_quote(rtrim($ShakespeareSafeList[$i])) . '/i';
		echo '<br> prgged $ShakespeareSafeList[$i]=' . $ShakespeareSafeList[$i];
		$ShakespeareSafeList[$i] = str_replace($WhitespaceArray,$ReplaceArray,$ShakespeareSafeList[$i]);
		echo '<br> spaced $ShakespeareSafeList[$i]=' . $ShakespeareSafeList[$i];
		}			
	return $ShakespeareSafeList;
}

/* ========================================================================================================== */
function GetKJVBibleSafeList()
  {
	include "ObsceneClean.settings.php";
	/*-------------------------------------------------- Define data files location ------------------------------------ */
	$TheFileToOpen = 'KingJamesBibleSafeList.dat';
	if ($DataDir == "")
	{	
	$app_root = dirname(__FILE__);
	$FTfile = $app_root . '/' . 'dat' . '/' . $TheFileToOpen;  
	}
	else
	{	
	$FTfile = $DataDir . '/' . $TheFileToOpen;  
	}
	putenv("ALPHADATA9=TRUE");
	$thefile = fopen($FTfile, 'r'); 	
	if ($thefile) 
	{ 
	$FileContents = fread($thefile, filesize($FTfile)); 
	} 
	fclose($thefile);
	putenv("ALPHADATA9="); 
	/* -----------------------------    setup encryption parms, modules then decrypt all ----------------------------------- */
	$key = 'hfhkjkHU7765x21two47';
	$AllSafeWords = my_decrypt($FileContents, $key);
	/* -----------------------------    put em in an array  -------------------------------------------------------------------------- */
	$KJVBibleSafeList = explode("\n", $AllSafeWords); 
	$WhitespacesCount = count($WhitespaceArray);
	for ($x=0; $x<$WhitespacesCount; $x++)		
		{ $ReplaceArray[$x] = "\s{0,3}"; }
	echo '<br> here they=';
	var_dump($WhitespaceArray);
	echo '<br> here 2=';
	var_dump($ReplaceArray);
	$array_elements = (count($KJVBibleSafeList) - 1);	
	for ($i=0; $i<=$array_elements; $i++)		
		{				
		$KJVBibleSafeList[$i] = '/' . preg_quote(rtrim($KJVBibleSafeList[$i])) . '/i';
		echo '<br> prgged $KJVBibleSafeList[$i]=' . $KJVBibleSafeList[$i];
		$KJVBibleSafeList[$i] = str_replace($WhitespaceArray,$ReplaceArray,$KJVBibleSafeList[$i]);
		echo '<br> spaced $KJVBibleSafeList[$i]=' . $KJVBibleSafeList[$i];
		}			
	return $KJVBibleSafeList;
}
/* ========================================================================================================== */
function GetOWsAndParms($LowestSevConsidered )
  {
	include "ObsceneClean.settings.php";
	// echo  '<BR> start GetOWsParms $LowestSevConsidered  =' . $LowestSevConsidered  . '<BR>';
	/*-------------------------------------------------- Define data files location ------------------------------------ */
	if ($DataDir == "")
	{	
	$app_root = dirname(__FILE__);
	$FTfile = $app_root . '/' . 'dat' . '/' . 'MasterOWs.dat';  
	}
	else
	{	
	$FTfile = $DataDir . '/' . 'MasterOWs.dat';  
	}
	putenv("ALPHADATA9=TRUE");
	$thefile = fopen($FTfile, 'r'); 	//  Lets get the encrypted OWs into a string
	if ($thefile) 
	{ 
	$EncryptedWords = fread($thefile, filesize($FTfile)); 
	} 
	fclose($thefile);
	putenv("ALPHADATA9=");
	// echo '<BR> $EncryptedWords=';
	// print_r($EncryptedWords);
	// echo '<BR><BR> <BR> <BR>  ';
	/* -----------------------------    setup encryption parms, modules then decrypt all ----------------------------------- */
	$key = 'hfhkjkHU7765x21two47';
	$AllWords = my_decrypt($EncryptedWords, $key);
	// echo '<BR> $AllWords=';
	// print_r($AllWords);
	// echo '<BR><BR> <BR> <BR>  ';
	/* -----------------------------    put em in an array  -------------------------------------------------------------------------- */
	$OWArrayInital = explode("\n", $AllWords); 
	$OWArrayStrings = array();
	$array_elements = count($OWArrayInital);
	for ($i=0; $i<$array_elements; $i++)	// This loop one per bad word
		{
			$OWArrayStrings[$i] = explode(",", (rtrim($OWArrayInital[$i])));
		}
	$i2 = 0;
	echo '<br> sas!! ';
	var_dump($UseSoundAlikes);
	for ($i=0; $i<$array_elements; $i++)	// This loop one per bad word
		{
		if ( ($OWArrayStrings[$i][1] >= $LowestSevConsidered ) 
			and
			( ($UseSoundAlikes)
			or
			  ((!$UseSoundAlikes) and ($OWArrayStrings[$i][5] == 0))) )
			{
				$OWArray[$i2]['OW'] = $OWArrayStrings[$i][0];
				$OWArray[$i2]['sev'] = intval($OWArrayStrings[$i][1]);
				$OWArray[$i2]['cat'] = intval($OWArrayStrings[$i][2]);
				$OWArray[$i2]['subcat'] = intval($OWArrayStrings[$i][3]);
				$OWArray[$i2]['ambiguity'] = intval($OWArrayStrings[$i][4]);
				$OWArray[$i2]['soundalike'] = intval($OWArrayStrings[$i][5]);
				$i2 = $i2 + 1;		
				echo '<br> in read ';
				var_dump($OWArray[$i]);
			}
		}
		
	// echo '<BR> debug only follows var dump of OWArray=';
	// var_dump($OWArray);
	$array_elements = (count($OWsArray) - 1);	
	echo '<BR> here here wawa 2nd time after rtrim $OWArray=';
	var_dump($OWArray);
	echo '<BR><BR><BR> end getOWs  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!  <BR><BR><BR><BR><BR>';
	return $OWArray;
}

/* ========================================================================================================== */
function GetSpecialRules()
  {
	include_once "ObsceneClean.settings.php";
	// echo  '<BR> start GetOWsParms $LowestSevConsidered  =' . $LowestSevConsidered  . '<BR>';
	/*-------------------------------------------------- Define data files location ------------------------------------ */
	if ($DataDir == "")
	{	
	$app_root = dirname(__FILE__);
	$FTfile = $app_root . '/' . 'dat' . '/' . 'SpecialRulesData.dat';  
	}
	else
	{	
	$FTfile = $DataDir . '/' . 'SpecialRulesData.dat';  
	}
	putenv("ALPHADATA9=TRUE");
	$thefile = fopen($FTfile, 'r'); 	//  Lets get the encrypted OWs into a string
	if ($thefile) 
	{ 
	$EncryptedWords = fread($thefile, filesize($FTfile)); 
	} 
	fclose($thefile);
	putenv("ALPHADATA9=");
	// echo '<BR> $EncryptedWords=';
	// print_r($EncryptedWords);
	// echo '<BR><BR> <BR> <BR>  ';
	/* -----------------------------    setup encryption parms, modules then decrypt all ----------------------------------- */
	$key = 'hfhkjkHU7765x21two47';
	$AllWords = my_decrypt($EncryptedWords, $key);
	echo '<BR> special rule data $AllWords=';
	print_r($AllWords);
	 echo '<BR><BR> <BR> <BR>  ';
	/* -----------------------------    put em in an array  -------------------------------------------------------------------------- */
	$SpecialRulesData = explode("\n", $AllWords); 
	$SpecialRulesStrings = array ();
	$array_elements = count($SpecialRulesData);
	for ($i=0; $i<$array_elements; $i++)	// This loop one per bad word
		{
			$SpecialRulesStrings[$i] = explode(",", (rtrim($SpecialRulesData[$i])));
		}
		$i2 = 0;
	for ($i=0; $i<$array_elements; $i++)	// This loop one per bad word
		{
		$SpecialRules[$i2]['OW'] = $SpecialRulesStrings[$i][0];
		$SpecialRules[$i2]['rule'] = intval($SpecialRulesStrings[$i][1]);
		$SpecialRules[$i2]['nearword'] =  ltrim($SpecialRulesStrings[$i][2]);
		$SpecialRules[$i2]['proximity'] = intval($SpecialRulesStrings[$i][3]);
		$SpecialRules[$i2]['direction'] = intval($SpecialRulesStrings[$i][4]);
		$SpecialRules[$i2]['weight'] = intval($SpecialRulesStrings[$i][5]);
		$i2++; 
		}
	echo '<BR><BR><BR> end getOWs  !!!!!!!!!!!! <BR><BR><BR><BR><BR>';
	return $SpecialRules;
}

/* ========================================================================================================== */
function GetQtyWords()
  {
	include_once "ObsceneClean.settings.php";
	echo '<BR><BR> <BR> <BR>  ';
	echo 'it begins satan  ';
	echo '<BR><BR> <BR> <BR>  ';
	/*-------------------------------------------------- Define data files location ------------------------------------ */
	if ($DataDir == "")
	{	
	$app_root = dirname(__FILE__);
	$FTfile = $app_root . '/' . 'dat' . '/' . 'QtyWords.dat';  
	}
	else
	{	
	$FTfile = $DataDir . '/' . 'QtyWords.dat';  
	}
	putenv("ALPHADATA9=TRUE");
	$thefile = fopen($FTfile, 'r'); 	//  Lets get the encrypted OWs into a string
	if ($thefile) 
	{ 
	$EncryptedWords = fread($thefile, filesize($FTfile)); 
	} 
	fclose($thefile);
	putenv("ALPHADATA9=");
	echo '<BR> $EncryptedWords=';
	print_r($EncryptedWords);
	echo '<BR><BR> <BR> <BR>  ';
	/* -----------------------------    setup encryption parms, modules then decrypt all ----------------------------------- */
	$key = 'hfhkjkHU7765x21two47';
	$AllWords = my_decrypt($EncryptedWords, $key);
	echo '<BR> $AllWords=';
	print_r($AllWords);
	echo '<BR><BR> <BR> <BR>  ';
	/* -----------------------------    put em in an array  -------------------------------------------------------------------------- */
	$QtyArray = explode("\n", $AllWords); 
	$array_elements = count($QtyArray);
	for ($i=0; $i<$array_elements; $i++)	// This loop one per  word
		{
			$QtyArray[$i] = rtrim($QtyArray[$i]);
		}
	echo '<BR> $QtyArray=';
	print_r($QtyArray);
	echo '<BR><BR> <BR> <BR>  ';
	return $QtyArray;
}

/* ========================================================================================================== */
function GetInsultingWords()
  {
	include_once "ObsceneClean.settings.php";
	echo '<BR><BR> <BR> <BR>  ';
	echo 'it begins satan2  ';
	echo '<BR><BR> <BR> <BR>  ';
	/*-------------------------------------------------- Define data files location ------------------------------------ */
	if ($DataDir == "")
	{	
	$app_root = dirname(__FILE__);
	$FTfile = $app_root . '/' . 'dat' . '/' . 'InsultingWords.dat';  
	}
	else
	{	
	$FTfile = $DataDir . '/' . 'InsultingWords.dat';  
	}
	putenv("ALPHADATA9=TRUE");
	$thefile = fopen($FTfile, 'r'); 	//  Lets get the encrypted OWs into a string
	if ($thefile) 
	{ 
	$EncryptedWords = fread($thefile, filesize($FTfile)); 
	} 
	fclose($thefile);
	putenv("ALPHADATA9=");
	echo '<BR> $EncryptedWords=';
	print_r($EncryptedWords);
	echo '<BR><BR> <BR> <BR>  ';
	/* -----------------------------    setup encryption parms, modules then decrypt all ----------------------------------- */
	$key = 'hfhkjkHU7765x21two47';
	$AllWords = my_decrypt($EncryptedWords, $key);
	echo '<BR> $AllWords=';
	print_r($AllWords);
	echo '<BR><BR> <BR> <BR>  ';
	/* -----------------------------    put em in an array  -------------------------------------------------------------------------- */
	$InsultArrayInitial = explode("\n", $AllWords); 
	$array_elements = count($InsultArrayInitial);
	$InsultArrayStrings = array ();
	for ($i=0; $i<$array_elements; $i++)	
		{
			$InsultArrayStrings[$i] = explode(",", (rtrim($InsultArrayInitial[$i])));
		}
	$InsultArray = array ();
	for ($i=0; $i<$array_elements; $i++)	// This loop one per  word
		{
			$InsultArray[$i]['insult'] = $InsultArrayStrings[$i][0];
			$InsultArray[$i]['plural'] = $InsultArrayStrings[$i][1];
			$InsultArray[$i]['ambiguity'] = intval($InsultArrayStrings[$i][2]);
			$InsultArray[$i]['weight'] = intval($InsultArrayStrings[$i][3]);
		}
	echo '<BR> $InsultArray=';
	print_r($InsultArray);
	echo '<BR><BR> <BR> <BR>  ';
	return $InsultArray;
}

/* ========================================================================================================== */
// The categories for the words in this file must be incremental without missing numbers, ie.e 1,2,3,4 is okay. 1,2,5,6 is not okay
// First category must start with zero which is the most general category. These categories, 2nd field, are base don the way rascists, etc. USE OWS and not on any race, ethnic categorization
/* ========================================================================================================== */
function GetAntagonisticWords()		
{
	include_once "ObsceneClean.settings.php";
	/*-------------------------------------------------- Define data files location ------------------------------------ */
	echo '<BR><BR> <BR> <BR>  ';
	echo 'satan3  ';
	echo '<BR><BR> <BR> <BR>  ';
	if ($DataDir == "")
	{	
	$app_root = dirname(__FILE__);
	$FTfile = $app_root . '/' . 'dat' . '/' . 'AntagaonisticWords.dat';  
	}
	else
	{	
	$FTfile = $DataDir . '/' . 'AntagaonisticWords.dat';  
	}
	putenv("ALPHADATA9=TRUE");
	$thefile = fopen($FTfile, 'r'); 	//  Lets get the encrypted OWs into a string
	if ($thefile) 
	{ 
	$EncryptedWords = fread($thefile, filesize($FTfile)); 
	} 
	fclose($thefile);
	putenv("ALPHADATA9=");
	/* -----------------------------    setup encryption parms, modules then decrypt all ----------------------------------- */
	$key = 'hfhkjkHU7765x21two47';
	$AllWords = my_decrypt($EncryptedWords, $key);
	/* -----------------------------    put em in an array  -------------------------------------------------------------------------- */
	$AntagonisticWordsInitial = explode("\n", $AllWords); 
	$AntagonisticWordsStrings = array ();
	$array_elements = count($AntagonisticWordsInitial); 
	for ($i=0; $i<$array_elements; $i++)	// This loop one per  word
		{
			$AntagonisticWordsStrings[$i] = explode(",", (rtrim($AntagonisticWordsInitial[$i])));
		}
	for ($i=0; $i<$array_elements; $i++)	// This loop one per  word
		{
			$AntagonisticWordsArray[$i]['theword'] = $AntagonisticWordsStrings[$i][0];
			$AntagonisticWordsArray[$i]['plural'] = $AntagonisticWordsStrings[$i][1];
			$AntagonisticWordsArray[$i]['cat'] = intval($AntagonisticWordsStrings[$i][2]);
		}
	$array_elements = (count($AntagonisticWordsArray) - 1);	//  ????????
	echo '<BR> 2nd time after rtrim $AntagonisticWordsArray=';
	var_dump($AntagonisticWordsArray);
	echo '<BR><BR><BR> end get antags  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!  <BR><BR><BR><BR><BR>';
	usort($AntagonisticWordsArray, "cmp4usort");
	return $AntagonisticWordsArray;
}

/* ========================================================================================================== */
function GetOWCategoryLookup()
  {
	include_once "ObsceneClean.settings.php";
	/*-------------------------------------------------- Define data files location ------------------------------------ */
	if ($DataDir == "")
	{	
	$app_root = dirname(__FILE__);
	$FTfile = $app_root . '/' . 'dat' . '/' . 'OWCategoryLookup.dat';  
	}
	else
	{	
	$FTfile = $DataDir . '/' . 'OWCategoryLookup.dat';  
	}
	putenv("ALPHADATA9=TRUE");
	$thefile = fopen($FTfile, 'r'); 	//  Lets get the encrypted OWs into a string
	if ($thefile) 
	{ 
	$EncryptedWords = fread($thefile, filesize($FTfile)); 
	} 
	fclose($thefile);
	putenv("ALPHADATA9=");
	// echo '<BR> $EncryptedWords=';
	// print_r($EncryptedWords);
	// echo '<BR><BR> <BR> <BR>  ';
	/* -----------------------------    setup encryption parms, modules then decrypt all ----------------------------------- */
	$key = 'hfhkjkHU7765x21two47';
	$AllWords = my_decrypt($EncryptedWords, $key);
	// echo '<BR> $AllWords=';
	// print_r($AllWords);
	// echo '<BR><BR> <BR> <BR>  ';
	/* -----------------------------    put em in an array  -------------------------------------------------------------------------- */
	$OWArrayInital = explode("\n", $AllWords); 
	$OWArrayStrings = array ();
	$array_elements = count($OWArrayInital);
	for ($i=0; $i<$array_elements; $i++)	// This loop one per bad word
		{
			$OWArrayStrings[$i] = explode(",", (rtrim($OWArrayInital[$i])));
		}
	for ($i=0; $i<$array_elements; $i++)	// This loop one per bad word
		{
			$OWLookupArray[$i]['cat'] = intval($OWArrayStrings[$i][0]);
			$OWLookupArray[$i]['subcat'] = intval($OWArrayStrings[$i][1]);
			$OWLookupArray[$i]['catdescription'] = str_replace("_", " ", $OWArrayStrings[$i][2]);
			$OWLookupArray[$i]['subcatdescription'] = str_replace("_", " ", $OWArrayStrings[$i][3]);
			echo '<BR>';
			var_dump($OWLookupArray[$i]);
		}
	$array_elements = (count($OWsArray) - 1);	
	echo '<BR> here here is lookup of cats and subcat descriptions $OWLookupArray=';
	var_dump($OWLookupArray);
	return $OWLookupArray;
}


/* ========================================================================================================== */
function cmp4usort($a, $b)
{
    return strcmp($a['cat'], $b['cat']);
}

/* ========================================================================================================== */
function cmp4usort2($a, $b)
{
    return strcmp($a['insult'], $b['insult']);
}

/* ========================================================================================================== */
function GetOWsInAspell()
  {
	include_once "ObsceneClean.settings.php";
	echo '<BR><BR> <BR> <BR>  ';
	echo 'it begins satan2  ';
	echo '<BR><BR> <BR> <BR>  ';
	/*-------------------------------------------------- Define data files location ------------------------------------ */
	if ($DataDir == "")
	{	
	$app_root = dirname(__FILE__);
	$FTfile = $app_root . '/' . 'dat' . '/' . 'OWsInAspell.dat';  
	}
	else
	{	
	$FTfile = $DataDir . '/' . 'OWsInAspell.dat';  
	}
	putenv("ALPHADATA9=TRUE");
	$thefile = fopen($FTfile, 'r'); 	//  Lets get the encrypted OWs into a string
	if ($thefile) 
	{ 
	$EncryptedWords = fread($thefile, filesize($FTfile)); 
	} 
	fclose($thefile);
	putenv("ALPHADATA9=");
	echo '<BR> $EncryptedWords=';
	print_r($EncryptedWords);
	echo '<BR><BR> <BR> <BR>  ';
	/* -----------------------------    setup encryption parms, modules then decrypt all ----------------------------------- */
	$key = 'hfhkjkHU7765x21two47';
	$AllWords = my_decrypt($EncryptedWords, $key);
	echo '<BR> $AllWords=';
	print_r($AllWords);
	echo '<BR><BR> <BR> <BR>  ';
	/* -----------------------------    put em in an array  -------------------------------------------------------------------------- */
	$OWsInAspell = explode("\n", $AllWords); 
	$array_elements = count($OWsInAspell);
	for ($i=0; $i<$array_elements; $i++)	// This loop one per  word
		{
			$OWsInAspell[$i] = rtrim($OWsInAspell[$i]);
		}
	echo '<BR> $OWsInAspell=';
	print_r($OWsInAspell);
	echo '<BR><BR> <BR> <BR>  ';
	return $OWsInAspell;
}
// ===================================================================
function FormREGEX_Template($usethindisguise, $usepoordisguise, &$PoorDisguisingCharsOnly, &$GoodDisguisingCharsOnly, $MaxGoodDisguisingChars, $MaxPoorDisguisingChars, $DisguiseCharAlternationFactor, $MaxOWLetterRepeat)
{
	$GoodDisguisingChars = '[' . $GoodDisguisingCharsOnly . ']{0,' . $MaxGoodDisguisingChars . '}';	// disguise chars. usually used to seperate letters of OW, e.g. P_O-O P on you!!
	$PoorDisguisingChars = '[' . $PoorDisguisingCharsOnly . ']{0,' . $MaxPoorDisguisingChars . '}';			
	if (($usethindisguise) and (!$usepoordisguise))
	{
		$regexPattern = ']{1,' . $MaxOWLetterRepeat . '} (' . $GoodDisguisingChars . '){0,' . $MaxGoodDisguisingChars . '[';
	}
	else if (($usethindisguise) and ($usepoordisguise))
	{
		$regexPattern = ']{1,' . $MaxOWLetterRepeat . '}';
		for ($i=0; $i<$DisguiseCharAlternationFactor; $i++)				// this deals with poor & good disguising chars of the OW appearing in alternating order.
			{
			$regexPattern .= $GoodDisguisingChars . $PoorDisguisingChars;
			}
		$regexPattern .= '[';
	}
	return $regexPattern;
}
// ===================================================================
function FormREGEX_Template_for_1st_letter($usethindisguise, $usepoordisguise, &$PoorDisguisingCharsOnly, &$GoodDisguisingCharsOnly, $MaxGoodDisguisingChars, $MaxPoorDisguisingChars, $DisguiseCharAlternationFactor, $MaxOWLetterRepeat)
{
	$GoodDisguisingChars = '[' . $GoodDisguisingCharsOnly . ']{0,' . $MaxGoodDisguisingChars . '}';	// disguise chars. usually used to seperate letters of OW, e.g. P_O-O P on you!!
	$PoorDisguisingChars = '[' . $PoorDisguisingCharsOnly . ']{0,' . $MaxPoorDisguisingChars . '}';			
	if (($usethindisguise) and (!$usepoordisguise))
	{
		$regexPattern = ']{1} (' . $GoodDisguisingChars . '){0,' . $MaxGoodDisguisingChars . '[';
	}
	else if (($usethindisguise) and ($usepoordisguise))
	{
		$regexPattern = ']{1}';
		for ($i=0; $i<$DisguiseCharAlternationFactor; $i++)
			{
			$regexPattern .= $GoodDisguisingChars . $PoorDisguisingChars;
			}
		$regexPattern .= '[';
	}
	return $regexPattern;
}

//------------------------------------------------------------------------------------------------------------------------------------------------------------------- */  
//	Combine the template regex with each OW and produce an array of REGEXs to find OWs
/* --------------------------------------------------------------------------------------------------------------------------------------------------------------------------- */  
function MakeArrayOfREGEXsToFindOWs($OWArray, $regexPattern, $uselookalikes, &$VowelSubstitutionChars, &$SubstitutableChars, &$SubstitutionChars, &$PoorDisguisingCharsOnly, &$GoodDisguisingCharsOnly, &$VowelSubstitutionRule, &$VowelSubstitutionLen, &$UseGreedyREGEXs, $MaxGoodDisguisingChars, &$regexPattern_for_1st_letter )
{
	$GoodDisguisingChars = '[' . $GoodDisguisingCharsOnly . ']{0,' . $MaxGoodDisguisingChars . '}';	
	echo  '<BR> wtf2? $VowelSubstitutionChars=';
	var_dump($VowelSubstitutionChars);
	echo  '<BR>';
	$MaxDisguiseChars4VowelSubsitution = 5;
	$array_elements = count($OWArray);		// putting count in for loop makes it count every iteration!
	echo  '<BR>  in MakeArr etc   $array_elements=' . $array_elements . '          ';
	echo  '<BR> wtf is goin $regexPattern_for_1st_letter =' . $regexPattern_for_1st_letter ;
	$SubstitutableCharsCount = count($SubstitutableChars); 
	for ($i=0; $i<$array_elements; $i++)	// This loop builds a regex per OW, one loop per bad word & puts each OW regex into an array of regexs
	{
		$REGEXwithVowelSubstitutionChars = NULL;
		$WordLength = strlen($OWArray[$i]['OW']);		// recall that $OWArray[$i][OW] an OW from masterOW.dat
		echo  '<BR> begin loop for ' . $OWArray[$i]['OW'];
		echo  '<BR> $WordLength=';
		var_dump($WordLength);
		echo  '<BR>';
		$REGEXwithOW = '[';				// setup  the BEGINNING of an OW regex
		for ($i2=0; $i2<$WordLength; $i2++)  // loop thru letters in OW starting with 1st char. Put $regexPattern between each letter in OW to form regex. i.e. sew chars of bad word into regex
		{  
			$REGEXwithOW .= $OWArray[$i]['OW'][$i2];			//  add letter of OW to regex 
			if ($uselookalikes)
			{
				for ($i3=0; $i3<$SubstitutableCharsCount; $i3++)	// if letter can be substituted with non-letter ($for S or @ for a, etc.)  then append substitution char to regex as it is so far
				{
					if ($OWArray[$i]['OW'][$i2] == $SubstitutableChars[$i3]) 
					{ $REGEXwithOW .= $SubstitutionChars[$i3]; } 
					}
			}
			if ($i2 < $WordLength-1 and  $i2 > 0) { $REGEXwithOW .= $regexPattern; }  // $regexPattern contains square brackets for letters, The IF statement prevents regexPattern from being appended after last letter because when matching don't care what comes after or before last and first letters of OW
			if ($i2  == 0) { $REGEXwithOW .= $regexPattern_for_1st_letter ; } 
			echo  '<BR><BR> wtf666 666 the $REGEXwithOW so far =' . $REGEXwithOW;
			echo  '<BR><BR> $VowelSubstitutionChars=';
			var_dump($VowelSubstitutionChars);
			echo  '<BR>';
			if ($VowelSubstitutionRule >= 1)
			{
				switch ($OWArray[$i]['OW'][$i2]) 			// <-- TEST EACH LETTER, VOWEL? Form 2nd part of regex that tests for OW with any one vowel substituted with a substitution char.
				{
					case 'a':
					case 'e':
					case 'i':
					case 'o':
					case 'u':
					$REGEXwithVowelSubstitutionChars .= '[' . $OWArray[$i]['OW'][$i2] . $VowelSubstitutionChars . ']';
					if ($i2 < $WordLength-1)  { $REGEXwithVowelSubstitutionChars .= $GoodDisguisingChars; }
					break;
					default:
					$REGEXwithVowelSubstitutionChars .= $OWArray[$i]['OW'][$i2];
					if ($i2 < $WordLength-1)  { $REGEXwithVowelSubstitutionChars .= $GoodDisguisingChars; }
				}
			}
			echo  '<BR> you mama  $REGEXwithVowelSubstitutionChars=';
			var_dump($REGEXwithVowelSubstitutionChars);
			echo  '<BR> $GoodDisguisingCharsOnly='; 
			var_dump($GoodDisguisingCharsOnly);
			echo  '<BR>'; 
		}
		$REGEXwithOW .= ']';
		$REGEXwithOW = '/' . '(' . $REGEXwithOW . ')';
		if ($VowelSubstitutionRule >= 1)     // The REGEX positive lookahead says you can only have X asterisks (or another vowel substitution char.) in the following REGEX.  
		{
			$REGEXwithOW .= ' | ((?=([^' . $VowelSubstitutionChars . ']*?' . $VowelSubstitutionChars . '[^' . $VowelSubstitutionChars . '\s]*?';
			if ($VowelSubstitutionRule == 2) 					
				{$REGEXwithOW .= '(' . $VowelSubstitutionChars . '[^' . $VowelSubstitutionChars . ']*?)?'; }  
			if (($VowelSubstitutionRule = 3) and (strlen($OWArray[$i]['OW']) >= $VowelSubstitutionLen))
				{$REGEXwithOW .= '(' . $VowelSubstitutionChars . '[^' . $VowelSubstitutionChars . ']*?)?'; } 
			$REGEXwithOW .= '))'; 
			$REGEXwithOW .= $REGEXwithVowelSubstitutionChars . ')';  
		}
		$REGEXwithOW .= '/iux';
		if ($UseGreedyREGEXs) { $REGEXwithOW .= 'U'; }
		echo  '<BR>';
		echo  '<BR> INIT: FINAL REGEX=';
		var_dump($REGEXwithOW);
		echo  '<BR>';
		$REGEXs_array[$i] = $REGEXwithOW;		// $REGEXs_array is the resulting array regexs (1 per OW)
	}
	return $REGEXs_array;
} 

/* ========================================================================================================== */
// Detect false positives like "hot water" and "hispanic until".  Because of diguising, word boundaries may be useless. 
//PSEUDO CODE:   get words surrounding the match and spell check'em     e.g. MatchedOW=twat so extract "hot water" from "get in hot water now"  (NO we don't care about word boundaries here).
// 	If dictionary words & NOT the OW being matched then  accidental 				e.g. hot & water are both words but not OWs
//   however
// if match is accidental then ignore it but if it is NOT accidental then continue testing of the match 
// $SafeAppliedStr not passed by ref cause its modified in this func
/* ========================================================================================================== */
function AccidentalOW($SafeAppliedStr, $matchedOW, $posINcheckstr, &$OWsInAspell, $Severity, $OWReferenced, $Ambiguity)  
{
	include "ObsceneClean.settings.php";
	echo '  <br> 1 $SafeAppliedStr=' . $SafeAppliedStr;
	$SafeAppliedStr = str_replace("-", " ", $SafeAppliedStr);		// cleanup user dashes & crap that will cause problems with spell checker  
	$SafeAppliedStr = str_replace(".", " ", $SafeAppliedStr);
	echo '  <br> 1 var_dump $SafeAppliedStr=';
	var_dump($SafeAppliedStr);
	if (is_array($WhitespaceArray)) { echo ' $WhitespaceArray  is an array '; }
		else { echo ' $WhitespaceArray is NOT an array '; }
	$NumOfOWs = count($OWsInAspell);
	echo '<br> down in AccidentalOW $posINcheckstr=' . $posINcheckstr;
	echo '<br> $OWsInAspell=';
	print_r($OWsInAspell);
	echo '<br> '; 
	$pspell_link = pspell_new("en"); //0. Get the dictionary
	$NumOfWhitespaces = count($WhitespaceArray);		// now lets get words surroundng match
	$len = strlen($matchedOW);
	echo '<br> down in $NumOfWhitespaces=' . $NumOfWhitespaces;
	$endpos = strlen($SafeAppliedStr) + 1;		/// assume this is end position for now
	echo 'just before loop $endpos=' . $endpos;
	for ($x=0; $x < $NumOfWhitespaces; $x++) // find 1st whitespc after end of $matchedOW?  
		{
		$tmppos = strpos($SafeAppliedStr, $WhitespaceArray[$x], ($posINcheckstr+$len));
		if ($tmppos < $endpos and $tmppos !== false) { echo 'GOT ASSIGNED!!!';  $endpos = $tmppos; }
		echo '<br>';
		echo 'in loop $tmppos=' . $tmppos;
		echo '<br>';
		echo 'and the var dump $tmppos='; var_dump($tmppos);
		echo '<br>';
		echo 'in loop $endpos=' . $endpos;
		}
	echo '<br> hEY HEY HEY WWWWWWWWW';
	echo ' $posINcheckstr=' . $posINcheckstr;
	echo '<br>';
	echo ' $endpos=' . $endpos;
	echo '<br>';
	$startpos = 0;			// assume this starting position for now
	for ($x=0; $x < $NumOfWhitespaces; $x++) // find 1st whitespc before start of $matchedOW?  
		{
		echo '<br> start wspc=';
		var_dump($WhitespaceArray[$x]);
		$tmppos = rstrpos($SafeAppliedStr, $WhitespaceArray[$x], $posINcheckstr);  
		if ($tmppos > $startpos and $tmppos!== false) { $startpos = $tmppos; }
		echo '<br> wtf666 ';
		echo ' $startpos=' . $startpos;
		echo '<br>';
		echo 'uh uh uh var_dump of $WhitespaceArray[$x]=';
		var_dump($WhitespaceArray[$x]);
		echo ' $posINcheckstr=' . $posINcheckstr;
		}
	echo '<br> hEY HEY HEY WWWWWWWWW';
	echo ' $posINcheckstr=' . $posINcheckstr;
	echo '<br>';
	echo ' $startpos=' . $startpos;
	echo '<br>';
	echo 'before substr loop $endpos=' . $endpos;
	echo ' the len used will be=' . ($endpos - $startpos);
	$WordsAroundMatchedOW = substr($SafeAppliedStr, $startpos, ($endpos - $startpos)); // extract all chars form $startpos to $endpos into a  string, e.g. if twat is OW & $SafeAppliedStr is "you are in hot water man!" then you extract "hot water" . NOTE assigned var is 'Words...' plural!
	echo '<br>';
	echo ' here $WordsAroundMatchedOW=' . $WordsAroundMatchedOW;
	echo '<br>';
	var_dump($WordsAroundMatchedOW);
	echo '<br>'; 
	$AWordAroundMatchedOW = str_word_count($WordsAroundMatchedOW, 2);  //put each word in an element of array for spell check later
	echo '<br>';
	echo ' $AWordAroundMatchedOW =';
	var_dump($AWordAroundMatchedOW);
	echo '<br>';
	var_dump($AWordAroundMatchedOW);
	echo '<br>'; 
	$posInNumOfWords = ($posINcheckstr - $startpos);  // what is position of OW in new, extrated string?  OW started at 13, space before it was 11 so OW is in position 2 in of $WordsAroundMatchedOW
	$prevword = "";
	foreach ($AWordAroundMatchedOW as $x => $value) 	// spell check each word,  if any 1 word checked is a not a valid non-OW, dictionary word then NOT accidental. $x does not increment ++1. It is a named index AND an element's position in the table, so for "hot" $x = 0 and for "water" $x = 4. The str_word_count func sets the pointer like this --I think.
	{
		if	((strlen($AWordAroundMatchedOW[$x]) == 1)					// alphabet letters are considered words by pspell !
			and	(strtolower($AWordAroundMatchedOW[$x]) != 'a')
			and	(strtolower($AWordAroundMatchedOW[$x]) != 'i'))
			{
				echo '<BR>';
				echo 'FALSE POSITIVE CHECK: (A) Not accidental OW - 1 letter TESTED=' . $matchedOW;
				echo '<BR>';
				return false; 						
			}
			else
			{
				if (pspell_check($pspell_link, strtoupper($AWordAroundMatchedOW[$x])))		// compare upper because 'americas' is a misspell but 'AMERICAS' isn't -- we just have to account for some sloppy bloggers!
				{ 	
					echo '<br> pspell says its a word ' . $AWordAroundMatchedOW[$x];
					for ($y=0; $y<$NumOfOWs; $y++)			// OWs are in the dictionary so if spelled right still ask if it is the OW (yes a custom dictionary with no OWs would make this whole thing much simpler)
					{ 
						echo '<BR>';
						echo ' wtf satan time $AWordAroundMatchedOW[$x]=';
						var_dump($AWordAroundMatchedOW[$x]);
						echo ' ------- ';
						echo ' $OWsInAspell[$y]=';
						var_dump($OWsInAspell[$y]);
						echo '<BR>';
						echo '   $x=' . $x;
						echo '<BR>';
						echo '<br>';
						echo ' $posInNumOfWords=';
						var_dump($posInNumOfWords);
						if (strtolower($AWordAroundMatchedOW[$x]) == $OWsInAspell[$y] and $posInNumOfWords == $x)  // If $AWordAroundMatchedOW[$x] is in same starting position as match position & its the same OW we are trying to match THEN (then I say) NOT accidental !
						{ 
							echo '<BR> FALSE POSITIVE CHECK: (B) Not accidental OW - PSPELL says word but a recognized OW. TESTED=' . $AWordAroundMatchedOW[$x];
							echo '<BR>';
							return false; 
						}
						if ((($prevword . $AWordAroundMatchedOW[$x]) == $OWReferenced) and ($posInNumOfWords != $x))  // if 'god' and 'damn' are seprated by a space, psspell wil say they are both words and its therefore accidental which is not the case, so this says if prev word + current word in the for loop == the OW being matched then it is the OW but with a space and therefore NOT accidental. Also consider 'butt hole' or 'ass hole'.
						{ 
							echo '<BR> FALSE POSITIVE CHECK: (B) Not accidental OW - PSPELL says word but a recognized OW. TESTED=' . $OWReferenced;
							echo '<BR>';
							return false; 
						}
					}
				}
				else 														
				{	
					echo '<br> pspell says its is not a word'; 															// but is it a combo word? like asshead, assman, bitchslap, etc 
					if (((sizeof($AWordAroundMatchedOW)) == 1) and
						(IsComboWord($AWordAroundMatchedOW[$x], $OWReferenced)))
						{ 
							echo '<br> SUBRULE: FALSE POSITIVE CHECK: (F) Not accidental OW - Is a COMBO WORD - TESTED=' . $AWordAroundMatchedOW[$x];
							return false; 				
						}
					else
						{ 
							echo '<br> SUBRULE: FALSE POSITIVE CHECK: (G) Not accidental OW - Not a dictionary word - TESTED=' . $AWordAroundMatchedOW[$x];
							return false; 				
						}
				}
			}
		$prevword = $AWordAroundMatchedOW[$x];
	}
	// ASSIGN PROBABILITY TO ACCIDENTAL OW???? 10% ???
	echo '<br> FALSE POSITIVE CHECK: (D) Accidental OW - PSPELL says all were words return true. TESTED=' . $WordsAroundMatchedOW;
	echo '<br>';
	return true;;
}
			
/* --------------------------------------------------------------------------------------------------------------------------------------------------------------------------- */  
// although the REGEX tries not to match unrecognizable OWs, some inevitably slip thru. This func looks at reasonable recognizability of an OW.
// BTW, $SafeAppliedStr not passed by ref here cause it gets changed in this func
// To determine recognizability we ask: How randomized are chars before and after match? 
//  If a lot then match is UNRECOGNIZABLE, 
//  e.g. 'gasdfgsasdbitchsddsfjhadh' is NOT so recognizable but iiiiiiiibitchiiiiiiiiiiii is much more recognizable!
// Pseudo code:
//	1 so long can't even be recognized? 
//	2 if severity is high (like 'F' word) and match contains no spaces (spaces indicate it could be part of two words)
//	3 Bounded by spaces and thus recognizable? 
// 4 Not bounded by spaces and chars before and after are so scambled the OW is not recognizable
//     future look more at spaces in during  check 4
//     future combine rules?
/* --------------------------------------------------------------------------------------------------------------------------------------------------------------------------- */  
function OWrecognizable($SafeAppliedStr, $word, $pos, $Severity, $OWReferenced, $Ambiguity)
{
	include "ObsceneClean.settings.php";
	$WordLen = strlen($word);
	$WhiteSpcs = CountWhiteSpcs($word);
	echo '  <br> this whitespace working?';
	echo '  <br> $WhiteSpcs=' . $WhiteSpcs;
	echo '  <br> strlen( $OWReferenced)=' . strlen( $OWReferenced);
	echo ' <br> $WordLen=' . $WordLen; 
	echo ' <br> $word=' . $word; 
	if ((BoundedByWhiteSpaces($SafeAppliedStr, $word, $pos)) and $WhiteSpcs == 0)
		{
			echo ' <br> SUBRULE MATCH: Recognizable - Bounded by spaces <br> ';
			return true;
		}
	echo '  <br> $Severity=' . $Severity;
	echo '  <br> $NumCharsUsedForRecognition=' . $NumCharsUsedForRecognition;
	echo '  <br> $NonRepetitiveFactor=' . $NonRepetitiveFactor;
	echo '  <br> $SeverityOfMoreRecognizableOWs=' . $SeverityOfMoreRecognizableOWs;
	if 	($Severity >= $SeverityOfMoreRecognizableOWs and $WhiteSpcs == 0 and $Ambiguity == 0)
		{
			echo ' <br> Recognizable - Severity high, unambiguous and has no embedded whiite spaces <br> ';
			return true;
		}
//  If the matched OW found is sandwiched in a bunch of characters we need to know if the chars before 
// and after the matched OW are random enough to make the OW unrecognizable. But note in code above 
// that the more recognizable OWs are given special treatment.
	$BeforLen = $NumCharsUsedForRecognition;                  
	$BefrStartPos = $pos - $NumCharsUsedForRecognition;  					// how far in front of match to look
	echo '  <br> 1st $BefrStartPos=' . $BefrStartPos;
	if ($BefrStartPos < 0)  { 																		// is match at position zero?
							$BeforLen = ($NumCharsUsedForRecognition + $BefrStartPos);
							$BefrStartPos = 0; 												
							}
	$PartBefrStr = substr($SafeAppliedStr, $BefrStartPos, $BeforLen);				// get chars just before match
	$UniqueBefrChars = count_chars($PartBefrStr, 3);								// reduce but do not sort the chars before match
	echo '  <br> 	$BefrStartPos=' . $BefrStartPos;
	echo '  <br> 	$BeforLen=   ' . $BeforLen;		
	echo '  <br> 	$PartBefrStr='; var_dump($PartBefrStr); 
	echo ' <br> $UniqueBefrChars=' . $UniqueBefrChars;
    $PartBefrStrLen = strlen($PartBefrStr);							// how many chars before match?
	$UniqueBefrCharsLen = strlen($UniqueBefrChars);     // how many unique chars before match?
	if ($PartBefrStrLen <=($NumCharsUsedForRecognition / $NumCharsRecogDivisor))
		{ echo ' <br> SUBRULE OW recognizable -- part before OW too short. TESTED=' . $word;  return true; }
	if ($PartBefrStrLen > 0)
	{ $Befrfinalfactor = $PartBefrStrLen / $UniqueBefrCharsLen ;   }// how much repetitiveness vs randomization in chars before match?  if UNrecognizable, a low number is produced
	else
	{ $Befrfinalfactor = $NonRepetitiveFactor + 1; }// if nothing before match, it is recognizable so make num high
	// now lets look at chars after match, same logic
  	$AftrStartPos = $pos + $WordLen;							
	$PartAftrStr = substr($checkstr, $AftrStartPos, $NumCharsUsedForRecognition);
	$UniqueAftrChars = count_chars($PartAftrStr, 2);
	echo ' <br> $UniqueAftrChars=' . $UniqueAftrChars;
	echo '  <br> 	$AftrStartPos=' . $AftrStartPos;
	echo '  <br> 	$PartAftrStr=';  var_dump($PartAftrStr); 
	echo '  <br> 	$NonRepetitiveFactor=';  var_dump($NonRepetitiveFactor);
	 $PartAftrStrLen = strlen($PartAftrStr);
	 $UniqueAftrCharsLen = strlen($UniqueAftrChars);
	 if ($PartAftrStrLen <= ($NumCharsUsedForRecognition / $NumCharsRecogDivisor))
		{ echo ' <br> SUBRULE OW recognizable -- part after OW too short. TESTED=' . $word;  return true; }
	if ($PartAftrStrLen > 0)							
	{ $Aftrfinalfactor = $PartAftrStrLen / $UniqueAftrCharsLen; }		// !!  if UNrecognizable, a low number is produced. If recognizable then high num produced
	else
	{ $Aftrfinalfactor = $NonRepetitiveFactor + 1; }
	echo ' <BR>------------------------------------------------------------';
	echo ' <BR> if low num then unrecog, if high num recog';
	echo '<br>  $Aftrfinalfactor=';   var_dump($Aftrfinalfactor);
	echo '<br> $Befrfinalfactor=';   var_dump($Befrfinalfactor);
	if (bccomp($Befrfinalfactor, $NonRepetitiveFactor, 2) == 1 or  bccomp($Aftrfinalfactor, $NonRepetitiveFactor, 2) == 1)  // if ==1, left operand larger
		{ echo ' <br> SUBRULE OW recognizable - characters before and after OW not randomised sufficiently   TESTED=' . $word;  return true; }
	else
		{ echo ' <br> SUBRULE OW unrecognizable - characters before and after OW are randomised  sufficiently TESTED=' . $word;  return false; }
}  

//==============================================================
function CountWhiteSpcs($text)	
{
	include "ObsceneClean.settings.php";
	echo ' <BR>';
	if (!is_array($WhitespaceArray)) { echo 'ERROR $WhitespaceArray is an NOT array in CountWhiteSpcs'; }
	$Total = 0;
	$CharCnt = count_chars($text, 1);
	$NumOfWhitespaces = count($WhitespaceArray);
	echo ' $NumOfWhitespaces=' . $NumOfWhitespaces;
	for ($x=0; $x <= $NumOfWhitespaces; $x++)
		{
			$key = ord($WhitespaceArray[$x]);
			$Total += $CharCnt[$key];
		}
	return $Total; 
}

function BoundedByWhiteSpaces($checkstr, $word, $pos)
{
	include "ObsceneClean.settings.php";
	$NumOfWhitespaces = count($WhitespaceArray);
	$LeftBounded = 0;
	$RightBounded = 0;
	$PosBeforeWord = $pos - 1;
	if ($pos == 0)
	{
		$LeftBounded = 1; 
	}
	else
	{
		for ($x=0; $x <= $NumOfWhitespaces; $x++)
			{
				if ($checkstr[$PosBeforeWord] == $WhitespaceArray[$x])
					{  $LeftBounded = 1; break; }
			}
	} 
	$PosAfterWord = $pos + strlen($word);    //get out hello fuck off to town - F is pos=14  (NOTE TO SELF: RUN CODE THRU PROFANITY FILTER)
	if ($PosAfterWord > (strlen($checkstr) - 1)) 
		{ 
			$RightBounded = 1; 
		}
	else
		{
			for ($x=0; $x <= $NumOfWhitespaces; $x++)
				{
					if ($checkstr[$PosAfterWord] == $WhitespaceArray[$x])
						{  $RightBounded = 1; break; }
				}
		}
	if (($LeftBounded) and ($RightBounded))  { return true; }
	else { return false; }
}

 
/* ========================================================================================================== */
// Is a combo word like 'assclown' or 'clownass' ? If so then not a an accident
/* ========================================================================================================== */
function IsComboWord($TestWord, $OWReferenced)
{
echo '<br> $TestWord=' . $TestWord;
echo '<br> $OWReferenced=' . $OWReferenced;
if ($TestWord == $OWReferenced) 
		{
			echo '<br> match >IS< OW referenced';
			return false;
		}	
$len = strlen($OWReferenced);
$pspell_link = pspell_new("en"); //0. Get the dictionary
$BeforePart = substr($TestWord,0,$len);
$AfterPart = substr($TestWord,($len - (2 * $len)));     // make $len negative here
echo '<br> $BeforePart=' . $BeforePart;
echo '<br> $AfterPart=' . $AfterPart;
if ($BeforePart == $OWReferenced)
	{ 
		$spellChk = substr($TestWord,$len);
		if (pspell_check($pspell_link, strtoupper($spellChk))) 
			{
			echo '<br> in before $spellChk=' . $spellChk;
			return true; 
			}
		else { echo '<br> before spell chk false'; }
	}
elseif ($AfterPart == $OWReferenced)
	{ 
		$spellChk = substr($TestWord,0,(strlen($TestWord) - $len));
		if (pspell_check($pspell_link, strtoupper($spellChk))) 
			{
			echo '<br> in after $spellChk=' . $spellChk;
			return true;
			}
		else { echo '<br> after spell chk false'; }
	}
else { echo '<br>SUBRULE: was not a combo word'; } 
}
 
/* ========================================================================================================== */
// search backwards for needle in haystack, and return its position, Note: supports full strings as needle
/* ========================================================================================================== */
function rstrpos ($haystack, $needle, $offset)
{
    $size = strlen ($haystack);
    $pos = strpos (strrev($haystack), $needle, $size - $offset);
    if ($pos === false)
        return false;
    return $size - $pos;
}

/* ========================================================================================================== */
// This func has limited usefullness. The REGEX should make the primary determination
// whether the OW is too long to be recognized like: shfla___F____----____________________________________________________a___________________________________________________________________________________g========================
/* ========================================================================================================== */
function OWsPercentOfMatchTooSmall($match,  $OWReferenced)
{
	include "ObsceneClean.settings.php";
	$MatchLen = strlen($match);
	$OWRefLen = strlen( $OWReferenced);
	echo '<br> $match=' . $match;
	echo '<br> $OWReferenced=' . $OWReferenced;
	echo '<br> $MatchLen=' . $MatchLen; 
	echo '<br> $OWRefLen=' . $OWRefLen; 
	echo '<br> the NONrounded dividend is=' . ((strlen( $OWReferenced)/ $MatchLen) * 100);
	echo '<br> the rounded dividend is=' . (round(((strlen( $OWReferenced)/ $MatchLen)) * 100));
	if  (round(((strlen( $OWReferenced)/ $MatchLen)) * 100) < $AllowableOWLenAsPercentOfMatch )
		{
			echo ' <br> SUBRULE MATCH:  Unrecognizable - The referenced OW is too small a percent of the match found. TESTED=' . $match;
			return true; 
		}
	else
		{
			echo ' <br> SUBRULE MATCH:  Recognizable - The referenced OW is NOT too small a percent of the match found. TESTED=' . $match;
			return false; 
		}
}

/* ========================================================================================================== */
//  the preg_replace_callback has issues returning spaces
/* ========================================================================================================== */
function ReplaceWithStrOfEqualSize($input)
{
return str_repeat("-", strlen($input[0]));
}

function ReplaceWithAsterisks($input)
{
return str_repeat("*", strlen($input[0]));
}

/* ========================================================================================================== */
//  Does the detected OW exist within a previously detected OW? If so we don't care. We don't want to re-match the 'damn' in goddamn.
/* ========================================================================================================== */
function AlreadyDetected(&$matches, $matchedOWtmp, $matchedOWPostmp)
{
echo ' <br> down in Alreadydetected hello ';
$TotalMatchesSoFar = count($matches);
echo ' <br> $TotalMatchesSoFar=' . $TotalMatchesSoFar;
echo ' <br> $matchedOWtmp=' . $matchedOWtmp;
echo ' <br> $matchedOWPostmp=' . $matchedOWPostmp;
for ($x=0; $x<$TotalMatchesSoFar; $x++)		//  If OW part of larger OW already detected then do NOT add it to $matches array. If 'ass' found with previously matched 'asshole' don't add 'ass'
	{
	echo ' <br> wawa2 $matches[$x][pos]=' . $matches[$x]['pos'] . '<br>';
	if (($matchedOWPostmp >= $matches[$x]['pos']) && ($matchedOWPostmp <= ($matches[$x]['pos'] + strlen($matches[$x]['OW']) - 1)))
		{ 
		echo '<BR>';
		echo 'FALSE POSITIVE CHECK: Already detected. TESTED=' . $matchedOWtmp;
		echo '<BR>';
		return TRUE; 
		}	
	}
return false;
}

/* ========================================================================================================== */
//  Exact  match of UNambiguous OW w/o disguise? then busted. fuck == fuck!   $OWArray[$i][ambiguity] is a flag from masterOW file. 0=UNambiguous, 'n' word is UNambiguous, ass is ambiguous, its in bible!
/* ========================================================================================================== */
function ExactMatchOfUnambiguousOW(&$matchedOWtmp, &$OWArray, &$i)
{
if (($OWArray[$i]['ambiguity'] == 0) and ($matchedOWtmp == $OWArray[$i]['OW'])) 	// if exact match of OW & it is UNambiguous then exit & report. LOGIC: the word F word cannot exist in ANY context or within any word. Scunthorpe was blanked out at this point
	{	
		echo '<BR>';
		echo 'RULE MATCHED MATCH: Exact match of unambiguous OW. TESTED=' . $matchedOWtmp;
		echo '<BR>';
		return TRUE;
	}
return false;
}

/* ========================================================================================================== */
//  // if not exact match but bounded by non-letters AND it is unambiguous then busted. !
/* ========================================================================================================== */
function WordBoundedMatch(&$checkstr, $matchedOWtmp, $matchedOWPostmp)
{   			
	$OWStartPos = $matchedOWPostmp;
	$OWEndPos = ($OWStartPos + strlen($matchedOWtmp)) - 1;
	// DEBUG CRAP
	echo '<BR> $OWStartPos - 1=' . ($OWStartPos - 1) . '<BR>';
	echo '<BR> $OWEndPos - 1=' . ($OWEndPos + 1) . '<BR>';
	echo '<BR> $checkstr[$OWStartPos - 1]=';
	var_dump($checkstr[$OWStartPos - 1]);
	echo '<BR> $checkstr[$OWEndPos + 1]='; 
	var_dump($checkstr[$OWEndPos + 1]); 
	echo '<BR> baaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa';
	echo '<BR> baaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa';
	echo '<BR> baaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa';
	echo '<BR> NotExactMatchButBoundedByNonLetters';
	echo '<BR> ord($checkstr[$OWStartPos - 1]=' . ord($checkstr[$OWStartPos - 1]);
	echo '<BR> ord("a")=' . ord("a");
	echo '<BR> ord("A")=' . ord("A");
	echo '<BR> ord("z")=' . ord("z");
	echo '<BR> ord("Z")=' . ord("Z");
	echo '<BR> ord($checkstr[$OWEndPos + 1]=' . ord($checkstr[$OWEndPos + 1]);
	// END DEBUG CRAP 
	if	(
		((ord($checkstr[$OWStartPos - 1]) < ord("A") OR ord($checkstr[$OWStartPos - 1]) > ord("z")) 
		OR 
		(ord($checkstr[$OWStartPos - 1]) > ord("Z") AND ord($checkstr[$OWStartPos - 1]) < ord("a")))
		AND    
		((ord($checkstr[$OWEndPos + 1]) < ord("A") OR ord($checkstr[$OWEndPos + 1]) > ord("z")) 
		OR 
		(ord($checkstr[$OWEndPos + 1]) > ord("Z") AND ord($checkstr[$OWEndPos + 1]) < ord("a")))
		)
			{
			echo '<BR>';
			echo 'RULE SUB-MATCH: A bounded word.';
			echo '<br>';
			return true;
			}
echo '<BR>';
echo 'RULE SUB-MATCH: Not A bounded word.';
echo '<br>';
return false;
}

/* ========================================================================================================== */
//  if say, 3 different OWs are found in one string regardless if they are ambiguous or not, user is busted!
/* ========================================================================================================== */
function TooManyUniqueOws(&$i2, &$matches, &$TooManyUniqueOWs)
{ 
	if ($i2 < ($TooManyUniqueOWs - 1)) { return FALSE; }
	for ($x=0; $x<=$i2; $x++) // move OWs found so far to temporary array for unique check
	{
		$OWsOnly[$x] = $matches[$x]['OW'];
	}
	$OWsOnlyUnique = array_unique($OWsOnly);			// condense OWs into new array to unique OWs only
	echo '<BR> count($OWsOnlyUnique=' . count($OWsOnlyUnique) . ' $TooManyUniqueOWs=' . $TooManyUniqueOWs . '<BR>';
	if (count($OWsOnlyUnique) >= $TooManyUniqueOWs)	// How many OWs in new array are unique?
	{	 
		echo '<BR>';
		echo 'RULE MATCHED MATCH: TooManyUniqueOWs. TESTED=';
		print_r($OWsOnlyUnique);
		echo '<BR>';
		return TRUE;	
	}
	else 
	{
		return FALSE;
	}
}

/* ================================================================================================================= */
function PercentOfOWsInStrTooHigh(&$matches, &$checkstr, &$AllowablePercentOfOWcharsInStr)
{
	$TotalLenOfOWs = 0;				// If OW(s) takes up more than say 60% of string, then busted! ADD TO THIS 'AND 2 OWs ??' a quick email 'chink your logs' would NOT produce a false positive at 60%
	for ($x=0; $x<=$i2; $x++) 
	{
		$TotalLenOfOWs += strlen($matches[$x]['OW']);
	}
	if ((($TotalLenOfOWs / strlen($checkstr)) * 100) > $AllowablePercentOfOWcharsInStr)
	{ 
	echo '<BR>';
	echo 'RULE MATCHED MATCH: Percent of OWs in str too high. TESTED TotalLenOfOWs=' . $TotalLenOfOWs;
	echo '<BR>';
	return true;
	}
	else
	{ return false; }
}

/* ========================================================================================================== */
function ContainsDisguiseChars(&$teststr, &$DisguiseChars)
{	
	echo ' I SAID HELLO!!!!!!!!  ';
	$newstr = $DisguiseChars;
	$newstr = str_replace("\\", "", $newstr);		// UNescape chars
	$newstr = str_replace("t", "", $newstr);
	$newstr .= "\x09\x20\x2A";  // asterisk, space & tab are not a 'good' disguising char but should be checked here.
	echo '<br>';
	echo ' $DisguiseChars=' . $DisguiseChars; 
	echo '<br>';
	echo ' $newstr='; 
	var_dump($newstr); 
	echo '<br>'; 
	$num = strlen($newstr); 
	echo '<br>'; 
	echo ' $num=' . $num; 
	for ($x=0; $x<$num; $x++) 
		{ 
			echo '<br>';
			echo ' $newstr[$x]=' . $newstr[$x];
			echo '<br>';
			$thepos = strpos($teststr, $newstr[$x]);
			if ($thepos !== false)
				{ 
				echo '<br>';
				echo 'RULE SUB-MATCH: Contains Disguise Chars';
				echo '<br>';
				return true; }
		}
		
	echo '<br>';
	echo 'RULE SUB-MATCH: DOES NOT Contain Disguise Chars';
	echo '<br>';
	return false;
}

/* ========================================================================================================== */
function RidiculouslyLong($matchedOWtmp, $OWArray, $i, $BoundedOWLenLimit)  // prevent false positives
{
	if ((strlen($matchedOWtmp)) > (strlen($OWArray[$i]['OW']) * $BoundedOWLenLimit)) 
		{ 
				echo '<br>';
				echo 'RULE SUB-MATCH: Ridiculously Long';
				echo '<br>';
				return true; }
	else
		{ 
		echo '<br>';
		echo 'RULE SUB-MATCH: Not Ridiculously Long';
		echo '<br>';
		return false; }
}

/* ========================================================================================================== */
// accompanying words make it an obvious OW, like "  'you' fag "  or  "  'bunch of' fags "  - percent always returned!
// GET RID OS THIS  FUNC - ITS OBSOLETE
/* ========================================================================================================== */
function QtyRule(&$checkstr, $MatchedOW, $matchedOWPostmp, $QtyWordsArray, $QtyRuleWordsLenChk, $QtyRuleDirection)
{
	echo '<BR> in lib $QtyRuleDirection=' . $QtyRuleDirection;
	echo '<BR> ';
	$Count = 0;
	$array_elements = count($QtyWordsArray);
	for ($i=0; $i<$array_elements; $i++)		// This loop one per Qty word
		{	
			if (ProxChk($checkstr, $QtyWordsArray[$i], $MatchedOW, $matchedOWPostmp, $QtyRuleWordsLenChk, $QtyRuleDirection))
			{ $Count += 1; }
		}
		
	return $Count;
}

/* ========================================================================================================== */
// accompanying insutling words make it an obvious OW, like "  'stupid' fag "  or  "  'retard' fags "  
// Get ALL Insults  and position
/* ========================================================================================================== */
function GetPos4InsultsInArray(&$checkstr, $InsultingWordsArray)
{
	$CheckstrArray = strtolower($checkstr);
	$CheckstrArray = str_word_count($checkstr, 2);
	$Count = count($InsultingWordsArray);
	$z = 0;
	foreach ($CheckstrArray as $key=>$value)
	{
		for ($y=0; $y<$Count; $y++)	
		{
			if ((strtolower($value) ==  $InsultingWordsArray[$y]['insult']) or
				(strtolower($value) ==  $InsultingWordsArray[$y]['plural']))
				{
					$InsultsAndPos[$z]['insult'] = $value;
					$InsultsAndPos[$z]['pos']  = $key; 
					$z++;
				}
		}
	}
	return $InsultsAndPos;
	
}

/* ========================================================================================================== */
// Count ambiguous insults only once! This func counts ambiguous insults only once! The ladies dog show blog has the ambigous OW 'bitch' and the insult 'dog' 32 times.  Adding up each instance of 'dog' tells you nothing.
//  Each insulting word has a weight. 
/* ========================================================================================================== */
function WeighAllInsults($InsultsAndPos, $InsultingWordsArray)
{
	echo '<br> here we are';
	usort($InsultsAndPos, "cmp4usort2");
	$InsultingWordsCount = count($InsultingWordsArray);
	$InsultsNPosCount = count($InsultsAndPos);
	$TotalWeight = 0;
	for ($x=0; $x < $InsultingWordsCount; $x++) 		
		{
			for ($y=0; $y < $InsultsNPosCount; $y++) 
				{
				if (($InsultingWordsArray[$x]['insult'] == $InsultsAndPos[$y]['insult']) or
				    ($InsultingWordsArray[$x]['plural'] == $InsultsAndPos[$y]['insult']))
						{
							$TotalWeight += $InsultingWordsArray[$x]['weight'];
							if ($InsultingWordsArray[$x]['ambiguity'] == 1)
							{  break;  }
						}
				}
		}
	return $TotalWeight;
}
		
/* ========================================================================================================== */
function CountAllInsults($InsultsAndPos, $InsultingWordsArray)
{
	usort($InsultsAndPos, "cmp4usort2");
	$InsultingWordsCount = count($InsultingWordsArray);
	$InsultsNPosCount = count($InsultsAndPos);
	$TotalCount = 0;
	for ($x=0; $x < $InsultingWordsCount; $x++) 		
		{
			for ($y=0; $y < $InsultsNPosCount; $y++) 
				{
				if (($InsultingWordsArray[$x]['insult'] == $InsultsAndPos[$y]['insult']) or
				    ($InsultingWordsArray[$x]['plural'] == $InsultsAndPos[$y]['insult']))
						{
							echo '<br> tot=' . $TotalCount . '   $InsultsAndPos[$y][insult]=' . $InsultsAndPos[$y]['insult'];
							$TotalCount++;
							if ($InsultingWordsArray[$x]['ambiguity'] == 1)
							{  break;  }
						}
				}
		}
	return $TotalCount;
}

/* ========================================================================================================== */
//   
/* ========================================================================================================== */
function ReturnUniqueInsults($InsultsAndPos)
{
	$Count = count($InsultsAndPos);
	$InsultsOnlyUnique = array();
	for ($x=0; $x < $Count; $x++) 		
		{
		$InsultsOnlyUnique[$x] = $InsultsAndPos[$x]['insult'];
		}
	$InsultsOnlyUnique = array_unique($InsultsOnlyUnique);
	return count($InsultsOnlyUnique);
}

/* ========================================================================================================== */
//	How many antagonistic words exist for each category?  cat = category   
//  The Antagonistic word categories, 3rd field, are based on the way rascists, etc. USE OWS and not on any racial, ethnic categorization
//  REALLY MAY HAVE TO COME BACK AND ASSESS ONLY THE USE OF UNIQUE ANTAG WORDS!  so black, monkey, african, black would only be a score of3!
/* ========================================================================================================== */
function AssessAntagonism(&$checkstr, &$AntagonisticWordsArray, $AntagonismThreshold)
{
	$EachWord = str_word_count($checkstr, 1); // put all in array
	$EachWord = arrayUnique($EachWord);		// if the antagonistc word  'black' occurrs 6x in text about a  black history, you need to count it only once so as to be conservative in assessing antagonism. 
	$EachWord = reIndex(0, $EachWord);
	$EachWord_cnt  = count($EachWord);
	for ($x=0; $x < $EachWord_cnt; $x++) 			// remove possesive "s" from a word
		{
		// echo '<br> before possesive s strip =' . $EachWord[$x];
		$y = strlen($EachWord[$x]);
		if (($EachWord[$x][$y - 1] == 's')
		and 
		($EachWord[$x][$y - 2] == '\''))
		{ $EachWord[$x] = substr($EachWord[$x], 0, $y - 2);  }
		   echo '<br> after  possesive strip =' . $EachWord[$x];
		}
	for ($x=0; $x < $EachWord_cnt; $x++) 			// remove possesive "s" from a word
		{
		$EachWord[$x] = strtolower($EachWord[$x]);
		}
	echo '<br><br> yo yo yo here we go $EachWord ';
	print_r($EachWord);
	$Antag_cnt = count($AntagonisticWordsArray);
	for ($x=0; $x<$Antag_cnt; $x++)			// How many categories are there? create array with ctas only
			 { 
			 $cats[$x]['cat'] = $AntagonisticWordsArray[$x]['cat'];
			 $cats[$x]['score'] = 0;
			 }
	echo '<br><br> yo yo yo here we go $cats ';
	print_r($cats);
	$Uniquecats = arrayUnique($cats);			// now unique array to get num of categories
	$Uniquecats = reIndex(0, $Uniquecats);
	$Uniquecats_cnt = count($Uniquecats);
	echo '<br><br> yo yo yo here we go 2 $Uniquecats <br><br>';
	print_r($Uniquecats); 
	
	for ($y=0; $y<$Antag_cnt; $y++)			//  COUNT (or 'score') 1 INSTANCE OF EACH ANTAG WORD!
	{
		for ($x=0; $x<$EachWord_cnt; $x++)		
		{	
			if ((strtolower($EachWord[$x]) == $AntagonisticWordsArray[$y]['theword']) or
				(strtolower($EachWord[$x]) == $AntagonisticWordsArray[$y]['plural']))
			{
			echo '<br> antag match  eachwords =' . $EachWord[$x] . '  antagarray= ' . $AntagonisticWordsArray[$y]['theword'] . '  plural= ' . $AntagonisticWordsArray[$y]['plural'];
			$z = $AntagonisticWordsArray[$y]['cat'];	// using category value as index to assign score for each category
			$Uniquecats[$z]['score'] += 1;				//  $Uniquecats holds final score for each cat
			break;
			}
		}
	}	
	echo '<br><br><br> yo yo yo here we go after scoring  $Uniquecats ';
	print_r($Uniquecats);
	echo '<br><br><br>';
	return ($Uniquecats);
}
/* ========================================================================================================== */
//  what antagonistic category has the highest score? Highest  antagonism  is added to overall probability of OWs in input
// The Antagonistic word categories, 2nd field, are based on the way rascists, etc. USE OWS and not on any race, ethnic categorization
/* ========================================================================================================== */
function EvaluateHighestAntagonism($Uniquecats)
{
	$Uniquecats_cnt = count($Uniquecats);
	$HighestAntagonismArray[0] = 0;
	$HighestAntagonismArray[1] = 0;
	for ($x=1; $x<$Uniquecats_cnt; $x++)		// Is any one category over threshold ? 
	{											// category zero is general and should be combined with all other categories  so start at 1
		if (($Uniquecats[$x]['score'] + $Uniquecats[0]['score']) > $HighestAntagonismArray[0]) 
			{
			$HighestAntagonismArray[0] = $Uniquecats[$x]['score'] + $Uniquecats[0]['score'];
			$HighestAntagonismArray[1] = $Uniquecats[$x]['cat'];
			}
	}
	return $HighestAntagonismArray;
}

/* ========================================================================================================== */
// Find proximity of one word to another. Search direction (2=BEFORE, 1=AFTER, 0=BOTH). No REGEXs please. OBSOLETE?
/* ========================================================================================================== */
function ProxChk(&$checkstr, $FindWord, $MatchedOW, $pos, $LenChk, $Direction) 
{    
	echo '<BR> $Direction=' . $Direction;
	echo '<BR> $FindWord=' . $FindWord;
	echo '<BR> ';
    $LenChk = $LenChk - 1;						// substr's 'start' parm starts at zero, so '8' is the 9th position
	if ($Direction == 2)  
			{
			$SearchStart = ($pos - $LenChk); 
			if ($SearchStart < 0) { $SearchStart = 0; }
			$SearchStr = substr($checkstr, $SearchStart, $LenChk);
			}
	if ($Direction == 1)  
			{
			$SearchStart = ($pos + strlen($MatchedOW)); 
			if ($SearchStart > strlen($checkstr)) { echo 'error 98768867'; }
			$SearchStr = substr($checkstr, $SearchStart, $LenChk);
			}
	if ($Direction == 0)  
			{
			$SearchStart = ($pos - $LenChk); 
			if ($SearchStart < 0) { $SearchStart = 0; }
			$SearchStr = substr($checkstr, $SearchStart, (($LenChk * 2) + strlen($MatchedOW)));
			}
	echo ' <br> now strpos -- $SearchStart=' . $SearchStart;
	echo ' <br> $SearchStr=' . $SearchStr;
	$WordsArray = str_word_count($SearchStr, 1);  //put each word in an element of array, then match list
	echo ' <br> $WordsArray=';
	print_r($WordsArray);
	$array_elements = count($WordsArray);
	for ($i=0; $i<$array_elements; $i++)		
		{	
		if ($FindWord == strtolower($WordsArray[$i]))
			{
			return TRUE;
			}
		}
	return FALSE;
}

/* ========================================================================================================== */
function arrayUnique($myArray)  // from php.net array_unique, works well for multidimensional arrays
{
    if(!is_array($myArray))
           return $myArray;

    foreach ($myArray as &$myvalue){
        $myvalue=serialize($myvalue);
    }

    $myArray=array_unique($myArray);

    foreach ($myArray as &$myvalue){
        $myvalue=unserialize($myvalue);
    }

    return $myArray;
} 

/* ========================================================================================================== */
// Function to re-index an array beginning at N   @param array $array    @param int $start   @return @array
// is this necessary ???????
/* ========================================================================================================== */
function reIndex($start, $array)
{
  $end = ($start+count($array))-1;			//   the end number of keys minus one 
  $keys = range($start, $end);				// the range of numbers to use as keys
  return array_combine($keys, $array);		// combine the arrays with the new keys and values
}


/* =========================================================================== */
/* The English langauge and human languages in general are far too complex
/* to allow for one simple, elegant algorithmic loop that 'does it all' --thus the special rules.
/* Special rules generally point to other functions for the sake of modularity
/*
/* =========================================================================== */

function SpecialRule1(&$checkstr, $FindWord, $MatchedOW, $pos, $LenChk, $Direction)
{
	$FindWordArray = array($FindWord); 
	echo ' <BR><BR><BR>  in SpecialRule1 wtf? <BR><BR><BR> ';
	echo ' <BR> $FindWord=' . $FindWord;
	echo ' <BR>  $MatchedOW=' . $MatchedOW;
	echo ' <BR>   $pos=' . $pos;
	echo ' <BR> $LenChk=' . $LenChk;
	echo ' <BR> $Direction=' . $Direction;
	echo ' <BR> $FindWord=';
	var_dump($FindWord);
	echo ' <BR> $FindWordArray=';
	var_dump($FindWordArray);
	$FindWordPos = GetPos4WordsInArray($checkstr, $FindWordArray, false); 
	echo ' <BR> $FindWordPos=';
	var_dump($FindWordPos);
	if (CountWordsInArrayNearMatch($pos, strlen($MatchedOW), $FindWordPos, $LenChk, $Direction))
			{ 
				echo '<BR> RULE MATCH: Special Rule 1 matched. TESTED=' . $MatchedOW . ' NEAR WORD=' . $FindWord;
				return true; 
			}
	else 	{ return false; }
			
}

function SpecialRule6(&$checkstr, $FindWord, $MatchedOW, $pos, $LenChk, $Direction)
{
	echo ' <BR><BR><BR>  special rule 6 placeholder <BR><BR><BR> ';		
}

function SpecialRule16(&$checkstr, $FindWord, $MatchedOW, $pos, $LenChk, $Direction)
{
	echo ' <BR><BR><BR>  special rule 16 placeholder <BR><BR><BR> ';		
}

function SpecialRule20(&$checkstr, $FindWord, $MatchedOW, $pos, $LenChk, $Direction)
{
	echo ' <BR><BR><BR>  special rule 21 placeholder <BR><BR><BR> ';		
}

function SpecialRule21(&$checkstr, $FindWord, $MatchedOW, $pos, $LenChk, $Direction)
{
	echo ' <BR><BR><BR>  special rule 21 placeholder <BR><BR><BR> ';		
}

function SpecialRule22(&$checkstr, $FindWord, $MatchedOW, $pos, $LenChk, $Direction)
{
	if (Detect_CJK_charset($checkstr))
	{ 	return TRUE; }
	else
	{ return FALSE; }
}

function SpecialRule25(&$checkstr, $FindWord, $MatchedOW, $pos, $LenChk, $Direction)
{
	echo ' <BR><BR><BR>  special rule 21 placeholder <BR><BR><BR> ';		
}

/* ========================================================================================================== */
// MAIN QUESTION: How many antagonistic words are related to the recently matched OW?	
//  The Antagonistic word categories, 2nd field in array, are based on the way rascists, etc. USE OWS and not on any racial, ethnic or religious categorization and thus the need for the translation table from one system of categorization to another. Also translation from usage to accepted categorization may not be perfect!
// e.g.   rascists tend to group combine slurs used against multiple groups such as jews and blacks. So anti-semitic and anti-black terms are in the same antagonistc group 
/* ========================================================================================================== */
function CorrelateAntagonism(&$Uniquecats, &$OWArray, $i)
{										
	if 		($OWArray[$i][cat] == 4 and $OWArray[$i][subcat] == 1)
				{
				return ($Uniquecats[1]['score'] + $Uniquecats[0]['score']);
				}
	elseif ($OWArray[$i][cat] == 6 and $OWArray[$i][subcat] == 2)
				{
				return ($Uniquecats[2]['score'] + $Uniquecats[0]['score']);
				}
	elseif ($OWArray[$i][cat] == 4 and $OWArray[$i][subcat] == 2)
				{
				return ($Uniquecats[3]['score'] + $Uniquecats[0]['score']);
				}
	elseif ($OWArray[$i][cat] == 5 and $OWArray[$i][subcat] == 2)
				{
				return ($Uniquecats[4]['score'] + $Uniquecats[0]['score']);
				}
	elseif ($OWArray[$i][cat] == 4 and $OWArray[$i][subcat] == 3)
				{
				return ($Uniquecats[5]['score'] + $Uniquecats[0]['score']);
				}
	elseif ($OWArray[$i][cat] == 4 and $OWArray[$i][subcat] == 4)
				{
				return ($Uniquecats[6]['score'] + $Uniquecats[0]['score']);
				}
	elseif ($OWArray[$i][cat] == 5 and $OWArray[$i][subcat] == 4)
				{
				return ($Uniquecats[7]['score'] + $Uniquecats[0]['score']);
				}
	else
				{
				return ($Uniquecats[0]['score']);
				}
}

function ReportAntagonism($Uniquecats, $OWLookupArray)
{
	$UniqueCatsCount = count($Uniquecats);
	echo '<br><br> <b>ANTAGONISM TYPES AND COUNT</b>';
	if (is_array($Uniquecats))
	{
		for ($x=0; $x<$UniqueCatsCount; $x++)	
		{
			$CatAndSubcat  = GetCatSubcatFromAntagCat($Uniquecats[$x]['cat']);
			if (!$CatAndSubcat) { echo '<BR> ERROR GetCatSubcatFromAntagCat no cat and subcat for antagcat=' . $Uniquecats[$x]['cat']; }
			$CatSubcatDesc = GetCatSubcatDescription($CatAndSubcat['cat'], $CatAndSubcat['subcat'], $OWLookupArray);
			if ($Uniquecats[$x]['score'] > 0)
				{
				echo '<br>  There are ' . $Uniquecats[$x]['score'] . ' words <i>related</i> to the category <b>' . $CatSubcatDesc['cat'] . '</b>, subcategory <b>'. $CatSubcatDesc['subcat'] . '</b>.';
				}
		}
	}
	else { echo '<br> No antagonistic terms found.  '; }
}	
			
function GetCatSubcatFromAntagCat($antagcat)
{
	if 		($antagcat == 0)
			{ $CatAndSubcat['cat'] = 0;  $CatAndSubcat['subcat'] = 0; return($CatAndSubcat);}
	elseif 		($antagcat == 1)
			{ $CatAndSubcat['cat'] = 4;  $CatAndSubcat['subcat'] = 1; return($CatAndSubcat);}
	elseif 	($antagcat == 2)
			{ $CatAndSubcat['cat'] = 6;  $CatAndSubcat['subcat'] = 1; return($CatAndSubcat);}
	elseif 	($antagcat == 3)
			{ $CatAndSubcat['cat'] = 4;  $CatAndSubcat['subcat'] = 2; return($CatAndSubcat);}
	elseif 	($antagcat == 4)
			{ $CatAndSubcat['cat'] = 5;  $CatAndSubcat['subcat'] = 2; return($CatAndSubcat);}
	elseif 	($antagcat == 5)
			{ $CatAndSubcat['cat'] = 4;  $CatAndSubcat['subcat'] = 3; return($CatAndSubcat);}
	elseif 	($antagcat == 6)
			{ $CatAndSubcat['cat'] = 4;  $CatAndSubcat['subcat'] = 4; return($CatAndSubcat);}
	elseif 	($antagcat == 7)
			{ $CatAndSubcat['cat'] = 5;  $CatAndSubcat['subcat'] = 4; return($CatAndSubcat);}
	else 		{ return FALSE; }
}

function GetCatSubcatDescription($cat, $subcat, $OWLookupArray)
{
	$LoopCnt = count ($OWLookupArray);  
	for ($x=0; $x<$LoopCnt; $x++)	
	{
	//echo '<br> cat n subcat nums         ' . $OWLookupArray[$x]['cat'] . ' ' . $OWLookupArray[$x]['subcat'];	
	//echo '<br> cat n subcat description  ' . $OWLookupArray[$x][catdescription] . ' ' . $OWLookupArray[$x][subcatdescription];
		 if ( $cat == $OWLookupArray[$x]['cat'] and $subcat == $OWLookupArray[$x]['subcat'] ) 
		 { 
		 $CatSubcatDesc['cat'] = $OWLookupArray[$x][catdescription];
		 $CatSubcatDesc['subcat'] = $OWLookupArray[$x][subcatdescription];
		 return ($CatSubcatDesc);
		 }	
	}
}

function ReportInsults($InsultsAndPos)
{
	$Count = count($InsultsAndPos);
	echo '<BR><BR>';
	echo '   <b>INSULTING TERMS</b>';
	if (is_array($InsultsAndPos))
	{
		for ($x=0; $x<$Count; $x++)	
		{
			echo '<BR> ' . $InsultsAndPos[$x]['word'] . '     Position=' . $InsultsAndPos[$x]['pos'];
		}
	}
	else { echo '<br> No insulting terms found.'; }
	echo '<BR><BR>';
}

function GetPos4WordsInArray(&$checkstr, $WordsArray, $FirstInstanceOnly)  		// $checkstr changed so do not pass by ref.
{
	$WordsAndPos = array();
	$CheckstrArray = strtolower($checkstr);
	$CheckstrArray = str_word_count($checkstr, 2);
	if ($FirstInstanceOnly) { $CheckstrArray = array_unique($CheckstrArray); }
	$z = 0;
	$WordsArrayCount = count($WordsArray);
	foreach ($CheckstrArray as $key=>$value)
	{
		for ($y=0; $y<$WordsArrayCount; $y++)	
		{
			if (strtolower($value) == $WordsArray[$y])
				{
					$WordsAndPos[$z]['word'] = $value;
					$WordsAndPos[$z]['pos']  = $key; 
					$z++;
				}
		}
	}
	return $WordsAndPos;
}

function CountWordsInArrayNearMatch($matchedOWPostmp, $matchedOWLen, $WordsAndPos, $NearMatchLen, $Direction)  //  (Direction:  2=BEFORE, 1=AFTER, 0=BOTH).
{												// you pass arry to this func that was created by GetPos4WordsInArray
	$NearMatchCount = 0;
	$WordsAndPosCount = count($WordsAndPos);
	for ($x=0; $x<$WordsAndPosCount; $x++)	
	{
		if 	($Direction == 0)			// look in both before and after the match
			{
				if (($WordsAndPos[$x]['pos'] < ($matchedOWPostmp + $matchedOWLen + $NearMatchLen))
				and
					($WordsAndPos[$x]['pos'] > ($matchedOWPostmp - $NearMatchLen)))
					{ $NearMatchCount++; }
			}
		elseif ($Direction == 1)		// after 
			{
				if (($WordsAndPos[$x]['pos'] > $matchedOWPostmp)
				and
					($WordsAndPos[$x]['pos'] < ($matchedOWPostmp + $matchedOWLen + $NearMatchLen)))
					{ $NearMatchCount++; }
			}
		elseif ($Direction == 2)		// before
			{
				if (($WordsAndPos[$x]['pos'] < $matchedOWPostmp)
				and
					($WordsAndPos[$x]['pos'] > ($matchedOWPostmp - $NearMatchLen)))
					{ $NearMatchCount++; }
			}
	}
	return $NearMatchCount;
}

//-----------------------------------------------------------------------------------------------------------------------
function PreviousWordAnInsult(&$checkstr, &$InsultingWordsArray, $posINcheckstr)  
{
	include "ObsceneClean.settings.php";
	if (is_array($WhitespaceArray)) { echo ' $WhitespaceArray  is an array '; }
		else { echo ' $WhitespaceArray is NOT an array '; }
	echo '<br> in PreviousWordAnInsult';
	echo '<br> '; 
	echo ' $posINcheckstr=' . $posINcheckstr;
	$NumOfWhitespaces = count($WhitespaceArray);
	// lets get boundaries of word preceding the OW, i.e. $endpos and $startpos
	$endpos = 0;			// assume this for now
	for ($x=0; $x < $NumOfWhitespaces; $x++)     // find 1st whitespc before start of $matchedOW - this will be $endpos
		{
		$tmppos = rstrpos($checkstr, $WhitespaceArray[$x], $posINcheckstr);  
		if ($tmppos > $endpos and $tmppos!== false) { $endpos = $tmppos; }
		echo '<br>';
		echo ' $endpos=' . $endpos;
		}
	if ($endpos > 0)
		{
			$startpos = 0;			// assume this for now
			for ($x=0; $x < $NumOfWhitespaces; $x++) // find 1st whitespc before start of $matchedOW?  
				{
				$tmppos = rstrpos($checkstr, $WhitespaceArray[$x], ($endpos - 1));  
				if ($tmppos > $startpos and $tmppos!== false) { $startpos = $tmppos; }
				echo '<br> ';
				echo ' $startpos=' . $startpos;
				echo '<br>';
				echo 'uh uh uh var_dump of $WhitespaceArray[$x]=';
				var_dump($WhitespaceArray[$x]);
				echo ' $posINcheckstr=' . $posINcheckstr;
				}
		}
	else { return false; }
	$testword = trim(substr($checkstr, $startpos, ($endpos - $startpos)));
	echo '<br>';
	echo '<br>----------------------------------------------';
	echo '<br>';
	echo ' $testword=';
	var_dump($testword);
	echo '<br>';
	$insults = count($InsultingWordsArray);
	for ($x=0; $x < $insults; $x++) 
		{
		echo '<br> insult=';  var_dump($InsultingWordsArray[$x]['insult']);
		if ($testword == $InsultingWordsArray[$x]['insult'])
			{ return true; }
		}
	return false;
}

//-----------------------------------------------------
// This function folds diacritics by replacing the unicode letter with a 
// non-accented roman letter that looks most like the unicode letter replaced. 
// e.g. Even though a Greek Capital Lunate Sigma Symbol looks much like a C it is not the
// equivalent of a C, but because it looks like a C it is folded to a C. 
// This function is mean to handle ASCII accented letters (western encoding) and unicode letters.
// Non-letter, disguising chars, like '@' and '$' aren't dealt with elsewhere -- see settings file.
// ------------------------------------------------------
 function FoldDiacritics($changestr)				// also called accent removal
{
	include "ObsceneClean.settings.php";
	$encode = mb_detect_encoding($changestr, "auto");
echo '<br>--------->IN fold diacritic  $changestr=';
var_dump($changestr);  
echo ' <br> $folddiacritics=' . $folddiacritics;
echo ' <br> $foldUNICODEdiacritics=' .  $foldUNICODEdiacritics;
echo ' <br> $encode=' . $encode;
 
 	if ($encode == "ANSI" or $encode == "ASCII") 
	{
		$changestr1 = strtr($changestr, "\xDF",	"b");			
		$changestr2 = strtr($changestr1, "\x83", "f");
		$changestr3 = strtr($changestr2, "\xE0\xE1\xE2\xE5\xE3\xE4\xE8\xE9\xEA\xEB\xEC\xED\xEE\xEF\xF2\xF3\xF4\xF5\xF6\xF8\x9A\xE7\xF9\xFA\xFB\xFC\xF1",	"aaaaaaeeeeiiiiooooooscuuuun");  
		echo ' <BR> right after lower $changestr3=' . $changestr3 . '<BR>';
		$changestr4 = strtr($changestr3, "\xC0\xC1\xC2\xC5\xC3\xC4\xC8\xC9\xCA\xCB\xCC\xCD\xCE\xCF\xD2\xD3\xD4\xD5\xD6\xC7\xD9\xDA\xDB\xDC\xD1",	"AAAAAAEEEEIIIIOOOOOCUUUUN");   
		echo ' <BR> right after upper $changestr4=' . $changestr4 . '<BR>';
		$changestr = $changestr4;
	}

	if ($encode == "UTF-8" and ($foldUNICODEdiacritics)) 
	{
		// Fold Latin-1 Supplement unicode block
		$search  = array('À', 'Á', 'Â', 'Ã', 'Ä', 'Å', 'Ç', 'È', 'É', 'Ê', 'Ë', 'Ì', 'Í', 'Î', 'Ï', 'Ð', 'Ñ', 'Ò', 'Ó', 'Ô', 'Õ', 'Ö', '×', 'Ø', 'Ù', 'Ú', 'Û', 'Ü', 'Ý', 'Þ', 'ß', 'à', 'á', 'â', 'ã', 'ä', 'å', 'ç', 'è', 'é', 'ê', 'ë', 'ì', 'í', 'î', 'ï', 'ð', 'ñ', 'ò', 'ó', 'ô', 'õ', 'ö', 'ø', 'ù', 'ú', 'û', 'ü', 'ý', 'þ', 'ÿ');
        $replace = array('A', 'A', 'A', 'A', 'A', 'A', 'C', 'E', 'E', 'E', 'E', 'I', 'I', 'I', 'I', 'D', 'N', 'O', 'O', 'O', 'O', 'O', 'x', 'O', 'U', 'U', 'U', 'U', 'Y', 'b', 'B', 'a', 'a', 'a', 'a', 'a', 'a', 'c', 'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i', 'o', 'n', 'o', 'o', 'o', 'o', 'o', 'o', 'u', 'u', 'u', 'u', 'y', 'b', 'y');
		$changestr1 = str_replace($search, $replace, $changestr);
		$changestr = $changestr1; 
		// Fold Latin Extended-A unicode block
		$search  = array('Ā', 'ā', 'Ă', 'ă', 'Ą', 'ą', 'Ć', 'ć', 'Ĉ', 'ĉ', 'Ċ', 'ċ', 'Č', 'č', 'Ď', 'ď', 'Đ', 'đ', 'Ē', 'ē', 'Ĕ', 'ĕ', 'Ė', 'ė', 'Ę', 'ę', 'Ě', 'ě',  'Ĝ', 'ĝ', 'Ğ', 'ğ', 'Ġ', 'ġ', 'Ģ', 'ģ', 'Ĥ', 'ĥ', 'Ħ', 'ħ', 'Ĩ', 'ĩ', 'Ī', 'ī', 'Ĭ', 'ĭ', 'Į', 'į', 'İ', 'ı', 'Ĵ', 'ĵ', 'ķ', 'ĸ', 'Ĺ', 'ĺ', 'Ļ', 'ļ', 'Ľ', 'ľ', 'Ŀ', 'ŀ', 'Ł', 'ł', 'Ń', 'ń', 'Ņ', 'ņ', 'Ň', 'ň', 'ʼn', 'Ŋ', 'ŋ', 'Ō', 'ō', 'Ŏ', 'ŏ', 'Ő', 'ő', 'Ŕ', 'ŕ', 'Ŗ', 'ŗ', 'Ř', 'ř', 'Ś', 'ś', 'Ŝ', 'ŝ', 'Ş', 'ş', 'Š', 'š', 'Ţ', 'ţ', 'Ť', 'ť', 'Ŧ', 'ŧ', 'Ũ', 'ũ', 'Ū', 'ū', 'Ŭ', 'ŭ', 'Ů', 'ů', 'Ű', 'ű', 'Ų', 'ų', 'Ŵ', 'ŵ', 'Ŷ', 'ŷ', 'Ÿ', 'Ź', 'ź', 'Ż', 'ż', 'Ž', 'ž', 'ſ');
        $replace = array('A', 'a', 'A', 'a', 'A', 'a', 'C', 'c', 'C', 'c', 'C', 'c', 'C', 'c', 'D', 'd', 'D', 'd', 'E', 'e', 'E', 'e', 'E', 'e', 'E', 'e', 'E', 'e',  'G', 'g', 'G', 'g', 'G', 'g', 'G', 'g', 'H', 'h', 'H', 'h', 'I', 'i', 'I', 'i', 'I', 'i', 'I', 'i', 'I', 'i', 'J', 'j', 'K', 'k', 'L', 'l', 'L', 'l', 'L', 'l', 'L', 'l', 'L', 'l', 'N', 'n', 'N', 'n', 'N', 'n', 'n', 'N', 'n', 'O', 'o', 'O', 'o', 'O', 'o', 'R', 'r', 'R', 'r', 'R', 'r', 'S', 's', 'S', 's', 'S', 's', 'S', 's', 'T', 't', 'T', 't', 'T', 't', 'U', 'u', 'U', 'u', 'U', 'u', 'U', 'u', 'U', 'u', 'U', 'u', 'W', 'w', 'Y', 'y', 'Y', 'Z', 'z', 'Z', 'z', 'Z', 'z', 'f');
		$changestr1 = str_replace($search, $replace, $changestr);
		$changestr = $changestr1; 
		// Fold Latin Extended-B unicode block (except those characters not supported by Arial Unicode MS)
		$search  = array('ƀ', 'Ƃ', 'ƃ', 'Ƅ', 'ƅ', 'Ƈ', 'ƈ', 'Ƌ', 'ƌ', 'ƍ', 'Ƒ', 'ƒ', 'ƕ', 'Ƙ', 'ƙ', 'ƚ', 'ƛ', 'ƞ', 'Ɵ', 'Ơ', 'ơ', 'Ƣ', 'ƣ', 'Ƥ', 'ƥ', 'Ʀ', 'Ƨ', 'ƨ', 'ƪ', 'ƫ', 'Ƭ', 'ƭ', 'Ư', 'ư', 'Ƴ', 'ƴ', 'Ƶ', 'ƶ', 'Ƹ', 'ƹ', 'ƺ',   'ƾ', 'ƿ', 'ǀ', 'ǁ', 'ǂ',  'ǃ', 'Ǎ', 'ǎ', 'Ǐ', 'ǐ', 'Ǒ', 'ǒ', 'Ǔ', 'ǔ', 'Ǖ', 'ǖ', 'Ǘ', 'ǘ', 'Ǚ', 'ǚ', 'Ǜ', 'ǜ', 'ǝ', 'Ǟ', 'ǟ', 'Ǡ', 'ǡ', 'Ǥ', 'ǥ', 'Ǧ', 'ǧ', 'Ǩ', 'ǩ', 'Ǫ', 'ǫ', 'Ǭ', 'ǭ', 'Ǯ', 'ǯ', 'ǰ', 'Ǵ', 'ǵ', 'Ǻ', 'ǻ', 'Ǿ', 'ǿ', 'Ȁ', 'ȁ', 'Ȃ', 'ȃ', 'Ȅ', 'ȅ', 'Ȇ', 'ȇ', 'Ȉ', 'ȉ', 'Ȋ', 'ȋ', 'Ȍ', 'ȍ', 'Ȏ', 'ȏ', 'Ȑ', 'ȑ', 'Ȓ', 'ȓ', 'Ȕ', 'ȕ', 'Ȗ', 'ȗ', 'Ɓ', 'Ɔ', 'Ɖ', 'Ɗ', 'Ǝ', 'Ə', 'Ɛ', 'Ɠ', 'Ɣ', 'Ɨ', 'Ɩ', 'Ɯ', 'Ɲ', 'Ʃ', 'Ʈ', 'Ʊ', 'Ʋ', 'Ʒ');
        $replace = array('b', 'b', 'b', 'b', 'b', 'C', 'c', 'a', 'a', 'q', 'F',  'f', 'h',  'K', 'k', 't', 'A', 'n', 'O', 'O', 'o', 'O', 'o',  'P', 'p', 'R', 'S', 's',  'i', 't', 'T', 'f', 'U', 'u', 'Y', 'y', 'Z', 'z', 'E', 'e', 's',   'j',  'p',  'I', 'll', 'T', 'i', 'A', 'a', 'I', 'i', 'O', 'o', 'U', 'u', 'U', 'u', 'U', 'u', 'U', 'u', 'U', 'u', 'e', 'A', 'a', 'A', 'a', 'G', 'g', 'G', 'g', 'K', 'k', 'Q', 'q', 'Q', 'q', 'e', 'e', 'j', 'G', 'g', 'A', 'a', 'O', 'o', 'A', 'a', 'A', 'a', 'E', 'e', 'E', 'e', 'I', 'i', 'I', 'i', 'O', 'o', 'O', 'o', 'R', 'r', 'R', 'r', 'U', 'u', 'U', 'u', 'B', 'C', 'D', 'D', 'E', 'e',  'e', 'G', 'Y', 't', 'l', 'w', 'N', 'e',  'T', 'U', 'V', 'e');
		$changestr1 = str_replace($search, $replace, $changestr);  
		$changestr = $changestr1; 
		// Fold IPA Extensions unicode block
		$search  = array('ɐ', 'ɑ', 'ɒ', 'ɓ', 'ɔ', 'ɕ', 'ɖ', 'ɗ', 'ɘ', 'ə', 'ɚ', 'ɛ',  'ɜ', 'ɝ', 'ɞ',  'ɟ', 'ɠ', 'ɡ', 'ɢ',  'ɣ', 'ɤ', 'ɥ',  'ɦ', 'ɧ', 'ɨ', 'ɩ', 'ɪ', 'ɫ', 'ɬ', 'ɭ',  'ɮ', 'ɯ', 'ɰ', 'ɱ', 'ɲ', 'ɳ', 'ɴ', 'ɵ', 'ɷ', 'ɸ', 'ɹ', 'ɺ', 'ɻ', 'ɼ', 'ɽ', 'ɾ', 'ɿ', 'ʀ', 'ʁ',   'ʂ',  'ʃ', 'ʄ', 'ʅ', 'ʆ', 'ʇ', 'ʈ', 'ʉ', 'ʊ', 'ʋ', 'ʌ', 'ʍ', 'ʎ', 'ʏ', 'ʐ', 'ʑ', 'ʒ', 'ʓ',  'ʗ', 'ʘ', 'ʙ', 'ʚ',  'ʛ',  'ʜ',  'ʝ', 'ʞ', 'ʟ', 'ʠ'   );
        $replace = array('a', 'a', 'a', 'b', 'c', 'c', 'd', 'd', 'e', 'e', 'e',  'e', 'e', 'e', 'B',  'f', 'g', 'g', 'G', 'y', 'y', 'u',  'h', 'h', 'i',  'i', 'I', 't',  't', 'L', 'B', 'w', 'w', 'm', 'n', 'n', 'n', 'e',  'o', 'o', 'r', 'l', 'r', 'r', 'r', 'r', 'r', 'R', 'R', 's',  'f', 'f', 'f', 'f', 't', 't', 'u', 'u', 'u', 'A', 'm', 'y', 'y', 'z', 'z', '3', '3', 'C', 'o', 'B', 'e', 'G', 'H',  'j',  'k', 'l', 'q'   );
		$changestr1 = str_replace($search, $replace, $changestr);
		$changestr = $changestr1; 
		// Fold Spacing Modifier Letters (only those that can be subtituted for a Roman letter)
		$search  = array('ʰ', 'ʱ',  'ʲ', 'ʳ', 'ʴ',  'ʶ',  'ʷ', 'ʸ', '˄',     '˅', 'ˠ',  'ˡ',  'ˢ',  'ˣ');
        $replace = array('h', 'h', 'j', 'r', 'r', 'r', 'w', 'y', 'A', 'V', 'y', 'l', 's', 'x');
		$changestr1 = str_replace($search, $replace, $changestr);
		$changestr = $changestr1; 
		// Fold Greek letters (only those that can be subtituted for a Roman letter)
		$search  = array('Ά', 'Έ', 'Ή', 'Ί', 'Ό', 'Ύ', 'Ώ', 'ΐ', 'Α', 'Β', 'Δ', 'Ε',  'Ζ', 'Η', 'Θ', 'Ι', 'Κ',  'Λ', 'Μ', 'Ν', 'Ξ', 'Ο', 'Π', 'Ρ', 'Σ', 'Τ', 'Υ',   'Φ', 'Χ', 'Ψ', 'Ω', 'Ϊ', 'Ϋ',  'ά', 'έ', 'ή', 'ί', 'ΰ', 'α', 'β', 'γ', 'δ', 'ε', 'ζ',   'η', 'θ', 'ι', 'κ', 'λ',   'μ', 'ν', 'ξ', 'ο',  'π', 'ρ', 'ς', 'σ', 'τ', 'υ', 'φ', 'χ', 'ψ', 'ω', 'ϊ', 'ϋ',    'ό', 'ύ', 'ώ', 'ϐ', 'ϒ', 'ϓ', 'ϔ', 'ϕ', 'ϖ',   'Ϛ', 'Ϝ', 'ϝ', 'Ϡ', 'Ϣ', 'ϣ', 'Ϥ', 'ϥ',  'Ϧ', 'ϧ', 'Ϩ', 'ϩ', 'Ϫ', 'ϫ', 'Ϭ', 'ϭ', 'Ϯ', 'ϯ', 'ϰ', 'ϱ', 'ϲ', 'ϳ', 'Ϻ', 'ϻ', 'Ϲ', 'Ͼ');
        $replace = array('A', 'E', 'H', 'I', 'O', 'Y', 'O', 'i', 'A', 'B', 'A', 'E',   'Z', 'H', 'O', 'I', 'K',  'A', 'M', 'N', 'E', 'O', 'M', 'P', 'E', 'T', 'Y',   'O', 'x', 'Y', 'O', 'I', 'Y',  'a', 'e', 'n', 'i', 'u', 'a', 'B', 'y', 'o', 'e', 'S',   'n', 'B', 'i', 'k', 'A',  'u', 'v', 'E', 'o',   'T', 'p', 'c', 'o', 't', 'u', 'o', 'x', 'Y', 'w', 'i', 'u',    'o', 'u', 'w', 'B', 'Y', 'Y', 'Y', 'o', 'w',   'C', 'F', 'F', 'C', 'W', 'w', 'Y', 'y',   'b', 'c', 'S', 's', 'X', 'x', 'A', 'a', 'T', 't', 'x', 'a', 'c', 'j', 'M', 'M', 'C', 'C');
		$changestr1 = str_replace($search, $replace, $changestr);
		$changestr = $changestr1; 
	    // Fold Cyrillic  (only those that can be subtituted for a Roman letter)
		$search  = array('Ё', 'Ђ', 'Є', 'Ѕ', 'І', 'Ї',    'Ј', 'Ћ', 'Ќ', 'Ў', 'Џ', 'А', 'Б', 'В', 'Д', 'Е', 'Ж', 'З', 'И', 'Й',  'Ҋ', 'К', 'Л', 'М', 'Н', 'О',    'П',   'Р', 'С', 'Т', 'У', 'Ф', 'Х', 'Ц',       'Ч', 'Ш', 'Щ', 'Ъ', 'Ь', 'Э', 'Я', 'а', 'б', 'в', 'г', 'д', 'е', 'ж',    'з', 'и', 'й', 'к', 'л',  'м', 'н', 'о', 'п', 'р', 'с', 'т', 'у',    'ф', 'х', 'ц', 'ч', 'ш', 'щ', 'ъ', 'ь',      'э', 'я', 'ё', 'ђ', 'ѓ', 'є',  'ѕ', 'і', 'ї',  'ј', 'ћ', 'ќ', 'ў', 'џ', 'Ѡ', 'ѡ', 'Ѣ',  'ѣ',      'Ѥ', 'ѥ', 'Ѧ', 'ѧ', 'Ѩ', 'ѩ',    'Ѯ', 'ѯ',  'Ѱ', 'ѱ', 'Ѳ', 'ѳ', 'Ѵ', 'ѵ', 'Ѷ', 'ѷ',   'Ѻ', 'ѻ', 'Ѽ', 'ѽ', 'Ѿ', 'ѿ', 'Ҁ', 'ҁ', 'Ғ', 'ғ', 'Ҕ', 'ҕ',    'җ', 'Ҙ', 'ҙ', 'Қ', 'қ', 'Ҝ', 'ҝ', 'Ҟ', 'ҟ', 'Ҡ', 'ҡ',     'Ң', 'ң', 'Ҥ', 'ҥ',     'Ҧ', 'ҧ', 'Ҩ', 'ҩ', 'Ҫ', 'ҫ',  'Ҭ', 'ҭ', 'Ү', 'ү', 'Ұ', 'ұ', 'Ҳ', 'ҳ',   'Ҵ', 'ҵ',  'Ҷ', 'ҷ', 'Ҹ', 'ҹ', 'Һ', 'һ', 'Ҽ',  'ҽ', 'Ҿ', 'ҿ', 'Ӏ', 'Ӄ', 'ӄ', 'Ӈ', 'ӈ', 'Ӌ', 'ӌ',     'Ӎ', 'ӎ', 'ӏ', 'Ӑ', 'ӑ', 'Ӓ', 'ӓ',  'Ӗ', 'ӗ', 'Ә', 'ә', 'Ӛ', 'ӛ', 'Ӟ', 'ӟ', 'Ӡ', 'ӡ',  'Ӣ', 'ӣ', 'Ӥ', 'ӥ', 'Ӧ', 'ӧ', 'Ө', 'ө', 'Ӫ', 'ӫ', 'ӯ', 'Ӱ', 'ӱ', 'Ӳ', 'ӳ', 'Ӵ', 'ӵ', 'Ӻ', 'ӻ');
        $replace = array('E', 'h', 'E', 'S',  'I', 'I',   'J', 'h', 'K', 'Y', 'U',  'A', 'b', 'B', 'A', 'E', 'K',  'E', 'N', 'N',   'N', 'K', 'N', 'M', 'H', 'O',    'n',   'P', 'C', 'T', 'Y', 'o', 'X', 'U',       'y', 'W', 'W',  'B', 'b', 'E', 'R', 'a', 'b', 'B', 'r', 'A', 'e', 'k',     'e', 'n', 'n', 'k', 'n', 'm', 'h', 'o', 'n', 'p', 'c', 't', 'y',   'p', 'x', 'u', 'y', 'w', 'w', 'b', 'b',      'e', 'R', 'e', 'h', 'r', 'e',  's', 'i', 'i',  'j', 'h', 'k', 'y', 'u', 'W', 'w', 'b',  'b',      'C', 'c', 'A', 'a', 'A', 'a',      'E', 'e', 'Y', 'y',  'O', 'o', 'V', 'v', 'V', 'v',   'O', 'o', 'W', 'w', 'W', 'w', 'C', 'c', 'F', 'f', 'h', 'h',    'k',  'E', 'e', 'K', 'k', 'K', 'k', 'K', 'k', 'K',  'k',      'H', 'h', 'H', 'h',     'N', 'n', 'Q', 'q', 'C', 'c',  'T', 't', 'Y', 'y', 'Y', 'y', 'X', 'x',   'U', 'u',  'Y', 'y', 'Y', 'y', 'h', 'h',  'E', 'e', 'E', 'E', 'I', 'K', 'k', 'H', 'h', 'Y', 'y',          'M', 'M', 'I', 'A', 'a', 'A', 'a',  'E', 'e', 'E', 'e', 'E', 'e', 'E', 'e', 'E', 'e',    'N', 'n', 'N', 'n', 'O', 'o', 'O', 'o', 'O', 'O', 'y', 'y', 'y', 'y', 'y', 'u', 'u',  'Y', 'y');
		$changestr1 = str_replace($search, $replace, $changestr);
		$changestr = $changestr1; 
		   // Fold  Cyrillic Supplement (only those that can be subtituted for a Roman letter)
		$search  = array('Ԁ', 'ԁ', 'Ԃ', 'ԃ', 'Ԋ', 'ԋ', 'Ԍ', 'ԍ', 'Ԏ', 'ԏ', 'Ԑ', 'ԑ', 'Ԕ', 'ԕ', 'Ԗ', 'ԗ', 'Ԛ', 'ԛ', 'Ԝ', 'ԝ', 'Ԟ', 'ԟ', 'Ԣ', 'ԣ');
        $replace = array('d', 'd', 'd', 'd', 'H', 'H', 'G', 'G', 'T', 'T', 'E', 'E', 'X', 'x', 'P', 'p', 'Q', 'q', 'W', 'w', 'K', 'k', 'H', 'h');
		$changestr1 = str_replace($search, $replace, $changestr);
		$changestr = $changestr1; 
		   // Fold  Armenian (only those that can be subtituted for a Roman letter)
		$search  = array('Ա', 'Բ', 'Գ', 'Դ', 'Ե', 'Զ', 'Է', 'Թ', 'Ժ', 'Ի', 'Լ',  'Ծ', 'Հ', 'Մ', 'Յ', 'Ն', 'Շ', 'Ո', 'Ռ', 'Ս', 'Վ', 'Տ',  'Ր', 'Ց', 'Ւ', 'Փ', 'Ք', 'Օ', 'Ֆ', 'ա', 'բ',  'գ', 'դ', 'ե', 'զ', 'է', 'ը', 'թ', 'ժ', 'ի', 'լ', 'կ', 'հ', 'ձ', 'ղ', 'ճ', 'մ',   'յ', 'ն', 'ո', 'պ', 'ռ', 'ս', 'վ', 'ր', 'ց', 'ւ', 'ք', 'օ', 'ֆ', 'և');
        $replace = array('U', 'F', 'P',   'f', 't', 'S',   't', 'P', 'd', 'r', 'L',   'Q', 'Z', 'U', 'E',   'i', 'C', 'N', 'N',  'U', 'Y', 'S',  'r', '8', 'r', 'o', 'p', 'o', 's', 'w', 'p',    'q', 'n', 't', 'q', 't', 'n', 'p', 'd', 'h',  'i', 'U', 'h', 'a', 'n', 'a', 'u',  'j', 'u', 'n', 'w', 'n', 'u', 'u', 'n', 'g', 'L', 'p', 'o', 's', 'u');
		$changestr1 = str_replace($search, $replace, $changestr);
		$changestr = $changestr1; 
		   // Fold  Hebrew (only those that can be subtituted for a Roman letter)
		$search  = array('׀', '׆', 'א', 'ב', 'ג', 'ה', 'ו', 'ז', 'ח', 'ט', 'כ', 'ל', 'ם', 'מ', 'ן', 'נ', 'ס', 'ע', 'ץ', 'צ', 'ש', 'ת');
        $replace = array('I', 'C', 'K', 'l', 'I', 'N', 'I', 'I', 'N', 'U', 'J', 'j', 'O', 'O', 'I', 'l', 'U', 'U', 'Y', 'Y', 'W', 'N');
		$changestr1 = str_replace($search, $replace, $changestr);
		$changestr = $changestr1; 
		   // Fold  Latin Extended-C (only those that can be subtituted for a Roman letter)
		$search  = array('Ⱡ', 'ⱡ', 'Ɫ', 'Ᵽ', 'Ɽ', 'ⱥ', 'ⱦ', 'Ⱨ', 'ⱨ', 'Ⱪ', 'ⱪ', 'Ⱬ', 'ⱬ', 'ⱴ', 'Ⱶ', 'ⱶ');
        $replace = array('L', 'l', 'L', 'P', 'R', 'a', 't', 'H', 'h', 'K', 'k', 'Z', 'z', 'v', 't', 't');
		$changestr1 = str_replace($search, $replace, $changestr);
		$changestr = $changestr1; 
		   // Fold  some Dingbats (only those that can be subtituted for a Roman letter)
		$search  = array('✕', '✖', '✗', '✘', '❍', '❘', '❙', '❚');
        $replace = array('x', 'x', 'x', 'x', 'O', 'I', 'I', 'I');
		$changestr1 = str_replace($search, $replace, $changestr);
		$changestr = $changestr1; 
		   // Fold Miscellaneous Mathematical Symbols-A (only those that can be subtituted for a Roman letter)
		$search  = array('⟑', '⟒', '⟙', '⟟');
        $replace = array('A', 'W', 'T', 'i');
		$changestr1 = str_replace($search, $replace, $changestr);
		$changestr = $changestr1;
	}
echo '<br>'; 
echo '<br>after  folds   $changestr=';
var_dump($changestr); 
echo '<br>'; 
	echo '--------->END OF fold diacritic ';
	return $changestr; 
}

/*============================================================ */
function Detect_CJK_charset(&$checkstr)
{
$TempArray = "";
echo '<BR> test for  CJK <BR>';
$pattern1 = '/[\x{4E00}-\x{9FA5}]/iux';
if (preg_match_all($pattern1, $checkstr, $TempArray)> 0)
	{
	return TRUE;
	}
else { return FALSE;  }
}

?>
Return current item: ObsceneClean