Location: PHPKode > projects > ObsceneClean > DetectOWs2.php
<?PHP
/*
    ObsceneClean - a profanity filter. Copyright (C) 2009 Scott L. Moore

    This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version.

    This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details.

    You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 
Contact: hide@address.com	
*/	
/*  OW = Offensive Word
parm1: string - the string to check
parm2: int -Default 1.  Lowest severity to check for, use 1 through 10.
parm3:  Boolean - Default True. Do Quick check which exits as soon as the first unambiguous OW is found otherwise all possible matches will be analyzed and reported. 
*/ 
echo ' super begin -------  well? lowestsev=';
var_dump($_POST['lowestsev']);
echo 'well? quickcheck=';
var_dump($_POST['quickcheck']);
echo 'well? checkstr=';
var_dump($_POST['checkstr']);
$quickcheckB = 0;			// overide value from HTML form
if (DetectOWs($_POST['checkstr'], $_POST['lowestsev'], $quickcheckB))
			{
			echo '<B>' . "ERROR 66 BW - Wait for redirect." . '</b>';
			}   
echo 'jus afore func';
exit();

function DetectOWs($checkstr = " ", $lowestsev = "1", $quickcheck = "1") 	// no parm, passed by ref., is altered in called function!
{   
/* --------------------------------------------------------------------------------------------------------------------------------------------------------------------------- */  
//	Housekeeping 
/* --------------------------------------------------------------------------------------------------------------------------------------------------------------------------- */  
  $real_usage= TRUE;
  echo '<BR> BEGIN <BR> BEGIN MEM=' . memory_get_usage() . '<BR>';
  echo 'MEM PEAK=' . memory_get_peak_usage() . '<BR>';
  include_once "ObsceneCleanLib.php";
  include_once "ObsceneClean.settings.php";
  $pspell_link = pspell_new("en"); //0. Get the dictionary
  echo '<br> fore strip checkstr=';
  var_dump($checkstr);
  $checkstr = stripslashes($checkstr);
  echo 'well??? checkstr=';
  var_dump($checkstr);
  // $checkstr = "\x20" . $checkstr . "\x20";			// this is easier than getting function AccidentalOW to detect EOL and  BOL, factor this in when doing replacement
  // $checkstr = str_replace("-", " ", $checkstr);   // some words commonly seperated with these chars. 
  // $checkstr = str_replace("_", " ", $checkstr);   // get rid of them so we spell check easier
  // $checkstr = str_replace("/", " ", $checkstr);
  // $checkstr = str_replace("<", " ", $checkstr);
  // $checkstr = str_replace(">", " ", $checkstr);
  // $checkstr = str_replace(".", " ", $checkstr);
  $SafeAppliedStr = $checkstr;  
  $encode = mb_detect_encoding($SafeAppliedStr, "auto");
	echo '<br> $encode=' . $encode;
/* --------------------------------------------------------------------------------------------------------------------------------------------------------------------------- */  
//	Error checking is checkstr == 2 or less chars? Y=return error, lowest sev must be 10 or less!  
/* --------------------------------------------------------------------------------------------------------------------------------------------------------------------------- */  
$checkstrSize = strlen($checkstr);
echo '<BR> $checkstrSize=' . $checkstrSize;
if (strlen($checkstrSize) > $MaxInputStringSize)
				{ echo '<BR> <b>ERROR</b> Input string is larger than allowed Maximum. Max=' . $MaxInputStringSize; exit(); }
if ((!$usethindisguise ) and ($usepoordisguise)) {$usethindisguise = TRUE; }
if (strlen($checkstr) <= $MinCheckstrSize) {echo "<BR>String to small to be checked"; exit(); }
/* --------------------------------------------------------------------------------------------------------------------------------------------------------------------------- */  
$TotalWords = str_word_count($checkstr, 0);
/* --------------------------------------------------------------------------------------------------------------------------------------------------------------------------- */  
//	Blank out completely safe strings in the string to be tested 
/* --------------------------------------------------------------------------------------------------------------------------------------------------------------------------- */  
  $MasterSafeList = GetMasterSafeList();
  $TheFileToOpen = 'LocalSafeList.dat';
  if ($DataDir == "")
	{	
	$app_root = dirname(__FILE__);
	$FTfile = $app_root . '/' . 'dat' . '/' . $TheFileToOpen;  
	}
	else
	{	
	$FTfile = $DataDir . '/' . $TheFileToOpen;  
	}
  if (filesize($FTfile) > 0)
	{
		$LocalSafeList = GetLocalSafeList();
		$MasterSafeList = array_merge($MasterSafeList, $LocalSafeList);
	}
  if ($ShakespeareSafe)
	{
	$ShakespeareSafeList = GetShakespeareSafeList();
	$MasterSafeList = array_merge($MasterSafeList, $ShakespeareSafeList);
	}
	echo '<br> blank here! $ShakespeareSafeList=';
	var_dump($ShakespeareSafeList);
	echo '<br>';
	echo '<br>';
if ($KingJamesBibleSafe)
	{
	$KJVBibleSafeList = GetKJVBibleSafeList();
	$MasterSafeList = array_merge($MasterSafeList, $KJVBibleSafeList);
	}
	echo '<br> blank here! $KJVBibleSafeList=';
	var_dump($KJVBibleSafeList);
	echo '<br>';
	echo '<br>';
  $MasterSafeList = array_unique($MasterSafeList);
  if (($MasterSafeList == NULL) || (!is_array($MasterSafeList)))
						{	echo '<BR><BR> ERROR $MasterSafeList is NULL or IS not an array <BR><BR>'; 
							exit();
							die(); 
						}
  $SafeAppliedStr = preg_replace_callback($MasterSafeList, 'ReplaceWithStrOfEqualSize', $SafeAppliedStr);
  echo '<BR> after preg_replace_callback ';
  echo '<BR> BEGIN MEM=' . memory_get_usage() . '<BR>';
  echo '<BR> MEM PEAK=' . memory_get_peak_usage() . '<BR>';
/* --------------------------------------------------------------------------------------------------------------------------------------------------------------------------- */
// Put list of decrypted OWs & parms into array, 1 per element
/* --------------------------------------------------------------------------------------------------------------------------------------------------------------------------- */
  $OWArray = GetOWsAndParms($LowestSevConsidered);
  if (($OWArray == NULL) || (!is_array($OWArray)))
						{	echo '<BR><BR> ERROR $OWArray is NULL or not an array <BR><BR>'; 
							exit();
							die(); 
						}
echo  '<BR> before from template     ';
print_r($OWArray);
/* --------------------------------------------------------------------------------------------------------------------------------------------------------------------------- */
// Get 'other' word lists 
/* --------------------------------------------------------------------------------------------------------------------------------------------------------------------------- */
echo '<BR> before get lists ';
echo '<BR> BEGIN MEM=' . memory_get_usage() . '<BR>';
echo '<BR> MEM PEAK=' . memory_get_peak_usage() . '<BR>';
$QtyWordsArray = GetQtyWords();
$InsultingWordsArray = GetInsultingWords();					// Insulting words, e.g. stupid, dumb, not the same as offensive words. They are qualitatively different
$AntagonisticWordsArray = GetAntagonisticWords();		// Each antagonistic Word is not offensive in any way but when combined these words are offensive
$OWsInAspell = GetOWsInAspell();
$OWLookupArray = GetOWCategoryLookup();
echo ' <br> we got the lookup ';
var_dump($OWLookupArray);
if ($UseSpecialRules)
	{ 	$SpecialRules = GetSpecialRules(); 
		echo '<BR> debug only follows  SpecialRules=';
		var_dump($SpecialRules);
		echo '<BR> <BR><BR>  special rules start here';
		$SpecialRulesCount = count($SpecialRules);
		for ($x=0; $x<$SpecialRulesCount; $x++)	
			{
				echo '<BR> ow=' . $SpecialRules[$x]['OW'];
				echo ' rule=' . $SpecialRules[$x]['rule'];
				echo ' nearword=' . $SpecialRules[$x]['nearword'];
				echo ' proximity=' . $SpecialRules[$x]['proximity'];
				echo ' direction=' . $SpecialRules[$x]['direction'];
				echo ' weight=' . $SpecialRules[$x]['weight'];
				echo '<BR> <br>';
			}
	}
echo '<BR> ----------- $QtyWordsArray=';
var_dump($QtyWordsArray);
echo '<BR> ----------- $InsultingWordsArray=';
var_dump($InsultingWordsArray);
echo '<BR> ----------- $AntagonisticWordsArray=';
var_dump($AntagonisticWordsArray);
$array_elements = count($AntagonisticWordsArray); 
/*	for ($i=0; $i<$array_elements; $i++)	
	{ echo '<br> val=' . $AntagonisticWordsArray[$i]['theword']; 
	  echo ' val=' . $AntagonisticWordsArray[$i]['cat'];
	}   */
echo '<BR> ----------- $OWsInAspell=';
var_dump($OWsInAspell);
echo '<BR> after GET ALL LISTS ';
echo '<BR> BEGIN MEM=' . memory_get_usage() . '<BR>';
echo '<BR> MEM PEAK=' . memory_get_peak_usage() . '<BR>';
/* --------------------------------------------------------------------------------------------------------------------------------------------------------------------------- */
// do diacritical folding e.g. accent removal 
/* --------------------------------------------------------------------------------------------------------------------------------------------------------------------------- */
echo '<br> <br> just before fold diacritic  $SafeAppliedStr=';
echo '<BR> BEGIN MEM=' . memory_get_usage() . '<BR>';
echo '<BR> MEM PEAK=' . memory_get_peak_usage() . '<BR>';
var_dump($SafeAppliedStr);
if ($folddiacritics)
	{
		$SafeAppliedStr = FoldDiacritics($SafeAppliedStr);
	}
	echo '<br> <br> After fold diacritic  $SafeAppliedStr=' . $SafeAppliedStr;
	echo '<BR> after DIACRIT FOLD ';
echo '<BR> BEGIN MEM=' . memory_get_usage() . '<BR>';
echo '<BR> MEM PEAK=' . memory_get_peak_usage() . '<BR>';
/* --------------------------------------------------------------------------------------------------------------------------------------------------------------------------- */
// Put all insulting & Quantitative terms in array with their position for later analysis
/* --------------------------------------------------------------------------------------------------------------------------------------------------------------------------- */		
$InsultsAndPos = GetPos4InsultsInArray(&$checkstr, $InsultingWordsArray);
echo '<br> yo yo dyne $InsultsAndPos=';
print_r($InsultsAndPos); 
$QtyAndPos = GetPos4WordsInArray($checkstr, $QtyWordsArray, false);
echo '<br> $QtyAndPos=';
print_r($QtyAndPos); 
$TotalInsultsWeight = 0;
if (is_array($InsultsAndPos))
	{
	$TotalInsultingCount += CountAllInsults($InsultsAndPos, $InsultingWordsArray);
	echo '<br><br><br> $TotalInsultingCount=';
	print_r($TotalInsultingCount); 
	$TotalInsultsWeight = WeighAllInsults($InsultsAndPos, $InsultingWordsArray);
	echo '<br><br><br> $TotalInsultsWeight=';
	print_r($TotalInsultsWeight); 
	$UniqueInsults = ReturnUniqueInsults($InsultsAndPos);
	echo '<br> <br> ';
	}
	 echo '<BR> after insulting & Quantitative ';
  echo '<BR> BEGIN MEM=' . memory_get_usage() . '<BR>';
  echo '<BR> MEM PEAK=' . memory_get_peak_usage() . '<BR>';
/* --------------------------------------------------------------------------------------------------------------------------------------------------------------------------- */
// Evaluate Antagonism 				$Uniquecats is the de-duplicated count of antagonistic words per 'politically correct' category
/* --------------------------------------------------------------------------------------------------------------------------------------------------------------------------- */
if ($EvaluateAntagonism)
{
	$Uniquecats = AssessAntagonism($checkstr, $AntagonisticWordsArray, $AntagonismThreshold);
	echo ' <br> yo yo yo (count($InsultsAndPos)=' . count($InsultsAndPos);
	echo ' <br> $Uniquecats=';
	print_r($Uniquecats);
	$HighestAntagonismArray = EvaluateHighestAntagonism($Uniquecats);
	if ($HighestAntagonismArray[0] > 1 and $HighestAntagonismArray[0] < $AntagonismLow)
		{ 
		echo '<br> <span style="color: red; font-weight: bold;">RULE MATCH:</span> Antagonistic language is present but low.';
		$OverallProbability += 20;
		$Antagonism = FALSE; 
		}
	if ($HighestAntagonismArray[0] >= $AntagonismLow and $HighestAntagonismArray[0] < $AntagonismHigh)
		{ 
		echo '<br> <span style="color: red; font-weight: bold;">RULE MATCH:</span> Antagonistic language is moderate';
		$OverallProbability += 50; 
		$Antagonism = TRUE;  
		}
	if ($HighestAntagonismArray[0] >= $AntagonismVeryHigh)   // antagonism alone is not good indicator of bad talk (wikipedia 'Holocaust'), so insulting terms must also be present to signal exit
		{
			$OverallProbability += 70;
			if (($TotalInsultsWeight > $InsultingWeightThreshold) or ($UniqueInsults >= $InsultingUniqueThreshold))
			{
				echo '<br> <span style="color: red; font-weight: bold;"><span style="color: red; font-weight: bold;">RULE MATCH:</span></span> Antagonistic and insulting language is very high or too many unique insults. Unique Insults=' . $UniqueInsults . ' Insults weight=' . $TotalInsultsWeight . '<br>';
				if ($AntagonismExitOnVeryHigh)
				{ 
					if ($ReportAntagonism) 
						{	ReportAntagonism($Uniquecats, $OWLookupArray); }
					if ($ReportInsults) 
						{ 	ReportInsults($InsultsAndPos); }
					exit(); 
				}
			}
		}
}
else { echo '<br>  Antagonism not evaluated'; }
echo '<BR> after antag eval ';
echo '<BR> BEGIN MEM=' . memory_get_usage() . '<BR>';
echo '<BR> MEM PEAK=' . memory_get_peak_usage() . '<BR>';
/* --------------------------------------------------------------------------------------------------------------------------------------------------------------------------- */
// Use disguise chars., lookalikes to form a template REGEX for finding OWs 
/* --------------------------------------------------------------------------------------------------------------------------------------------------------------------------- */
	$regexPattern 							= FormREGEX_Template($usethindisguise, $usepoordisguise, $PoorDisguisingCharsOnly, $GoodDisguisingCharsOnly, $MaxGoodDisguisingChars, $MaxPoorDisguisingChars, $DisguiseCharAlternationFactor, $MaxOWLetterRepeat);
	$regexPattern_for_1st_letter 	= FormREGEX_Template_for_1st_letter($usethindisguise, $usepoordisguise, $PoorDisguisingCharsOnly, $GoodDisguisingCharsOnly, $MaxGoodDisguisingChars, $MaxPoorDisguisingChars, $DisguiseCharAlternationFactor, $MaxOWLetterRepeat);
/* --------------------------------------------------------------------------------------------------------------------------------------------------------------------------- */
//	time to programatically sew letters of OW with regex template to form a new regex that can find all OWs
/* --------------------------------------------------------------------------------------------------------------------------------------------------------------------------- */
	echo  '<BR> BEFORE MakeArrayOfREGEXsToFindOWs';
	var_dump($SafeAppliedStr);
	echo  '<BR>';
	$REGEXs_array = MakeArrayOfREGEXsToFindOWs($OWArray, $regexPattern, $uselookalikes, $VowelSubstitutionChars,   $SubstitutableChars, $SubstitutionChars, $PoorDisguisingCharsOnly, $GoodDisguisingCharsOnly,  $VowelSubstitutionRule, $VowelSubstitutionLen, $UseGreedyREGEXs, $MaxGoodDisguisingChars, $regexPattern_for_1st_letter );
	echo  '<BR>';
	if (is_array($REGEXs_array)) { echo ' IS array'; }
		else { echo 'not ARRAY!'; }
/* --------------------------------------------------------------------------------------------------------------------------------------------------------------------------- */
// Now use array of REGEXs to find OWs - After each REGEX check if any unambiguous OWs
 /* --------------------------------------------------------------------------------------------------------------------------------------------------------------------------- */
	$TempArray  = array(); 
	$Matches  = array();	
	$iOffset = 0; 
	$i2 = 0;								// increment for each OW matched by REGEX     
	$array_elements = count($REGEXs_array);	
	for ($i=0; $i<$array_elements; $i++)	// This loop one per OW REGEX - increment $i. (recall there is now one REGEX for each OW from the masterOW file)
	{
		echo  '<BR> this regex is for ' . $OWArray[$i]['OW'];
		// echo   '<BR> $SafeAppliedStr=' . $SafeAppliedStr;
		echo '<BR> before preg_match_all ' ;
		echo '<BR> MEM IS=' . memory_get_usage() . '<BR>';
		echo '<BR> PEAK MEM IS=' . memory_get_peak_usage() . '<BR>';
		// echo '<BR> afore if preg - i$=' . $i . ' $REGEXs_array[$i]=' . $REGEXs_array[$i] . '<BR>';		
		if (preg_match_all($REGEXs_array[$i], $SafeAppliedStr, $TempArray, PREG_OFFSET_CAPTURE, $iOffset) > 0)  // preg_match might return more than 1 instance of an OW
		{
			echo '<br> something matched! OW=' . $OWArray[$i]['OW']; 
			$NumOfMatches = count($TempArray[0]);	// $TempArray simply holds result of preg_match_all while $matches holds result of all OWs so far
			for ($i3=0; $i3<$NumOfMatches; $i3++)	// One loop for each matched OW found by previous  preg_match_all, i.e. if F word found 3x by previous preg_match_all then this loops thrice
			{
				echo '<BR> THIS ALL MATCHES $TempArray[0]=';
				var_dump($TempArray[0]);
				echo '<BR> $TempArray[1]=';
				var_dump($TempArray[1]); 
				$matchedOWtmp = $TempArray[0][$i3][0]; 
				$matchedOWPostmp = $TempArray[0][$i3][1];
				echo '<BR> $matchedOWtmp=';
				var_dump($matchedOWtmp); 
				if ( (!AlreadyDetected($matches, $matchedOWtmp, $matchedOWPostmp)) 
				and (!AccidentalOW($SafeAppliedStr,  $matchedOWtmp, $matchedOWPostmp, $OWsInAspell, $OWArray[$i]['sev'], $OWArray[$i]['OW'], $OWArray[$i]['ambiguity']))
				and (OWrecognizable($SafeAppliedStr, $matchedOWtmp, $matchedOWPostmp, $OWArray[$i]['sev'], $OWArray[$i]['OW'], $OWArray[$i]['ambiguity']))
				and (!OWsPercentOfMatchTooSmall($matchedOWtmp,  $OWArray[$i]['OW'])) )		
				{
					$matches[$i2]['OW'] = $TempArray[0][$i3][0];	// Store  matched pattern. $TempArray[0] contains full matches. So,  $TempArray[0][0][0] is the first match string. Why store at this point? Becuae at this point we have assumed probability os greater than zero.
					$matches[$i2]['pos'] = $TempArray[0][$i3][1];	// Store matched start postion of OW. $TempArray[1]  and up is all the partial matches that we don't care about. So $TempArray[1][0][0]  contains the first partial match that we don't care about.
					$matches[$i2]['OWidx'] = $i;					// Store the index (in $OWArray) of the OW that was matched, to refer back to it later							
					$matches[$i2]['sev'] = $OWArray[$i]['sev'];
					echo ' <BR><BR>  BEGIN TESTING OF MATCHED OW=' . $matches[$i2]['OW'] . ' IN POSITION=' . $matches[$i2]['pos'] . ' MATCHED AGAINST OW=' . $OWArray[$i]['OW'] . '<BR> ';
					if (ExactMatchOfUnambiguousOW($matchedOWtmp, $OWArray, $i))
					{							
						echo ' <BR> <span style="color: red; font-weight: bold;">RULE MATCH:</span> EXACT MATCH OF UNAMBIGUOUS WORD TESTED=' . $matches[$i2]['OW'] . '<br>'; 
						$matches[$i2]['prob'] = 100;					//   $matches[$i2][prob] is the probability factor. OW
						if ($quickcheck) { $i2 = $i2 + 1; break 2; }  // increment $i2 when REGEX matches & puts match in $matches
					}
					elseif (($OWArray[$i]['ambiguity'] == 0) 	// 0 = UNambiguous OW, like 'f' word and 'n' word. IF word bounded & match not too long & UNambiguous & its disguised THEN busted!
						and
						(WordBoundedMatch($checkstr, $matchedOWtmp, $matchedOWPostmp))
						and
						(ContainsDisguiseChars($matchedOWtmp, $GoodDisguisingCharsOnly))
						and
						(!RidiculouslyLong($matchedOWtmp, $OWArray, $i, $BoundedOWLenLimit)))    // prevent false positives
					{
						echo ' <BR> <span style="color: red; font-weight: bold;">RULE MATCH:</span> REGEX MATCH HAS WORD BOUNDARIES, IS NOT RIDICOUSLY LONG, IS UNAMBIGUOUS and IS DISGUISED. TESTED=' . $matches[$i2]['OW']; 
						$matches[$i2]['prob'] = 100;	
						if ($quickcheck) { $i2 = $i2 + 1; break 2; }
					}
					else	// no easy and evident match so this next IF structure now looks at other less obvious factors indicating an OW
						{
							$Ambiguous = FALSE;
							$Antagonism = FALSE;
							$QuantifiersCount = 0;
							if (is_array($QtyAndPos))
								{
								$QuantifiersCount = CountWordsInArrayNearMatch($matchedOWPostmp, strlen($matchedOWtmp), $QtyAndPos, $QtyRuleWordsLenChk, $QtyRuleDirection); 
								}
							$CorrelatedAntagonisticWordsCnt = CorrelateAntagonism($Uniquecats, $OWArray, $i);  // look for antagonistic words that specifically correlate with current match
							if ($CorrelatedAntagonisticWordsCnt >= $CorrelatedAntagonisticWordsThreshold) { $Antagonism = true; }
							if ($OWArray[$i]['ambiguity'] == 1)				  { $Ambiguous = TRUE; }	// 1= ambiguous OW
							if ($QuantifiersCount >= $QtyRuleThreshold)		  { $Quantifiers = TRUE; }
							echo '<br><br> LOGIC BLOCK ';
							if ($Ambiguous) { ECHO '<BR> ITS AMBIGUOUS!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!  $CorrelatedAntagonisticWordsCnt=' . $CorrelatedAntagonisticWordsCnt . '  $CorrelatedAntagonisticWordsThreshold=' . $CorrelatedAntagonisticWordsThreshold; }
							if ($Quantifiers) { ECHO '<BR> ITS Quantifiers!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!'; }
							if ($Antagonism) { ECHO '<BR> ITS Antagonism!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!'; }
							if ($TotalInsultsWeight >= $InsultingWeightThreshold) { ECHO '<BR> ITS INSULT WEIGHT OVER THRESH!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!'; }
							if (($Ambiguous) and (($Quantifiers) or ($TotalInsultsWeight >= $InsultingWeightThreshold) or ($Antagonism)))
							{
								$matches[$i2]['prob'] = 80;						
								echo ' <BR> <span style="color: red; font-weight: bold;">RULE MATCH:</span> REGEX MATCH IS AMBIGUOUS AND ANTAGONISTIC WORDS ARE PRESENT OR MATCH IS IN PROXIMITY TO INSULTING OR QUANTIFYING WORDS.  TESTED=' . $matches[$i2]['OW'] . ' $CorrelatedAntagonisticWordsCnt=' . $CorrelatedAntagonisticWordsCnt . ' $TotalInsultsWeight=' . $TotalInsultsWeight . ' $QuantifiersCount=' . $QuantifiersCount;
								if ($quickcheck) { $i2 = $i2 + 1; break 2; }
							}
							elseif (($Ambiguous) and ((($TotalInsultsWeight >= $InsultingWeightThresholdLow) and ($TotalInsultsWeight < $InsultingWeightThreshold))
									and  (($QuantifiersCount >= $QtyRuleThresholdLow) and ($QuantifiersCount < $QtyRuleThreshold))))
							{
								echo ' <BR> <span style="color: red; font-weight: bold;">RULE MATCH:</span> REGEX MATCH IS AMBIGUOUS AND IS IN PROXIMITY TO A SMALL NUMBER OF INSULTING AND QUANTIFYING WORDS.  TESTED=' . $matches[$i2]['OW'] . ' $CorrelatedAntagonisticWordsCnt=' . $CorrelatedAntagonisticWordsCnt . ' $TotalInsultsWeight=' . $TotalInsultsWeight . ' $QuantifiersCount=' . $QuantifiersCount;
								$matches[$i2]['prob'] = 60;						
								if ($quickcheck) { break 2; }
							}
							elseif (($Ambiguous) and PreviousWordAnInsult($checkstr, $InsultingWordsArray, $matchedOWPostmp))
							{
									$matches[$i2]['prob'] = $DefaultAmbiguousOWwithInsultProbability;	
									echo ' <BR> <span style="color: red; font-weight: bold;">RULE MATCH:</span> REGEX MATCH IS AMBIGUOUS AND IS IMMEDIATELY PRECEDED BY AN INSULTING WORD.  TESTED=' . $matches[$i2]['OW'];
							}
							elseif (!$Ambiguous) 		 // if no other rules apply to UNambiguous OW
							{
								if (ContainsDisguiseChars($matchedOWtmp, $GoodDisguisingCharsOnly))
								{
									$matches[$i2]['prob'] = $DefaultDisguisedUnAmbiguousOWProbability;	
									echo ' <BR> <span style="color: red; font-weight: bold;">RULE MATCH:</span> UNAMBIGUOUS OW MATCHED. CONTAINS SPECIAL CHARACTERS. TESTED=' . $matches[$i2]['OW'];
								}
								else 
								{ 
									echo '<BR> <span style="color: red; font-weight: bold;">RULE MATCH:</span> UNAMBIGUOUS OW MATCHED. TESTED=' . $matches[$i2]['OW'];
									$matches[$i2]['prob'] = $DefaultUnAmbiguousOWProbability; 
								}
							}
							elseif (($UseSpecialRules) and ($Ambiguous)) 	// if text not characteristic of OW USAGE then use special rules, e.g. look at context for nearby mitigating or aggravating  words	
							{
								echo '<br> BEGIN SPECIAL RULES';
								$matches[$i2]['prob'] = $DefaultAmbiguousOWProbability;	
								$NumSpcRulesApplied = 0;
								for ($x=0; $x<$SpecialRulesCount; $x++)
								{
									echo '<br> $SpecialRules[$x][OW]=' . $SpecialRules[$x]['OW'];
									echo '<br>  $OWArray[$i][OW]=' . $OWArray[$i]['OW'];
									if ($SpecialRules[$x]['OW'] == $OWArray[$i]['OW'])
									{
										$Func = 'SpecialRule' . $SpecialRules[$x]['rule'];
										echo '<br> This special rule will now be tested=' . $Func;
										if ($Func($checkstr, $SpecialRules[$x]['nearword'], $matches[$i2]['OW'], $matches[$i2]['pos'], $SpecialRules[$x]['proximity'], $SpecialRules[$x]['direction']))
											{
												echo '<br> SPECIAL RULE USED: the match=' . $matches[$i2]['OW'];
												echo '<br> current prob=' . $matches[$i2]['prob'];
												echo '<br> weight applied=' . $SpecialRules[$x]['weight'];
												$matches[$i2]['prob'] += $SpecialRules[$x]['weight'];
												$NumSpcRulesApplied += 1;  
												echo '<br> after special 1 applied prob=' . $matches[$i2]['prob'];
											}
										else { echo '<br> SUBRULE special rule not applied=' . $Func;  }  
									 }
								}
								if ($NumSpcRulesApplied = 0) { echo '<br> SUBRULE: OW recognized but no special rules applied.'; }
							}
							else  
							{ 
								echo '<br> NOT SURE HOW NO RULE AT ALL COULD HAVE APPLIED. THIS SHOULD ONLY HAPPEN IF SPECIAL RULES TURNED OFF, NO OTHER RULE APPLIED & ITS AMBIGUOUS<BR>';
								$matches[$i2]['prob'] = $DefaultAmbiguousOWProbability;	
							}
						}
					//Analyze overall matches so far
					if ((TooManyUniqueOws($i2, $matches, $TooManyUniqueOWs))			// if more than $TooManyUniqueOWs so far (regardless whether ambiguous) then exit & report. 
					or (PercentOfOWsInStrTooHigh($matches, $checkstr, $AllowablePercentOfOWcharsInStr)))
						{
						echo '<BR> <span style="color: red; font-weight: bold;">RULE MATCH:</span> Too many unique OWs or too much of text contains OWs';
						for ($x=0; $x<=$i2; $x++) { $matches[$x]['prob'] = 100; } // assume all 100%, RULE MATCHED in func
						if ($quickcheck) { break 2; }
						}
					echo '<br><br><br> TEST PROB is=' . $matches[$i2]['prob'] . ' pos=' . $matches[$i2]['pos'];
					$i2 = $i2 + 1;		// increment $i2 when REGEX matches & puts match in $matches
				}
				else
				{
					echo '<br> REGEX <b> NO MATCH</b>, beta ACCIDENTAL OR ALREADY DETECTED OW <BR>';
				}
			}
		}
		else { echo '<br> DID NOT MATCH  <br> ';  }
	}
/* --------------------------------------------------------------------------------------------------------------------------------------------------------------------------- */  
// FINAL CHECK RULES
/* --------------------------------------------------------------------------------------------------------------------------------------------------------------------------- */  
/* --------------------------------------------------------------------------------------------------------------------------------------------------------------------------- */
// Search for offensive expressions (OEs)OEs do not have so much emphais on disguising.
/* --------------------------------------------------------------------------------------------------------------------------------------------------------------------------- */
if ($SearchForOEs)
	{
	$MasterOEList =	GetMasterOEList();
	echo  '<BR>';
	echo  '<BR>';
	echo  '<BR> OEs=';
	var_dump($MasterOEList);
	echo  '<BR>';
	echo  '<BR>';
	if ($ReplaceOEsChar == 1)
		{
		$SafeAppliedStr = preg_replace_callback($MasterOEList, 'ReplaceWithAsterisks', $SafeAppliedStr, -1, $OEcount);
		}
	else
		{
		$SafeAppliedStr = preg_replace_callback($MasterOEList, 'ReplaceWithStrOfEqualSize', $SafeAppliedStr, -1, $OEcount);
		}
	}
 echo 'just after OE search   $SafeAppliedStr=';
var_dump($SafeAppliedStr);
//
if ($ReportAntagonism and $EvaluateAntagonism )	{ ReportAntagonism($Uniquecats, $OWLookupArray); }
if ($ReportInsults) 		{ ReportInsults($InsultsAndPos); }
//
if (is_array($matches)) { echo '<br><br> $matches is an array='; var_dump($matches); }
 else { echo '<br><br> <BR> <span style="color: red; font-weight: bold;">No OWs were found. </span> ($matches is NOT an array.) ';  
			 exit(); 
			 }
echo '<br>';
echo '<br>';
$num = count($matches);	
for ($x=0; $x<=$num; $x++) 
	{
		$OWsOnly[$x] = $matches[$x]['OW'];
	}
$OWsOnlyUnique = array_unique($OWsOnly);			// condense OWs into new array to unique OWs only
echo '<BR> count($OWsOnlyUnique=' . count($OWsOnlyUnique) . ' $TooManyUniqueOWs=' . $TooManyUniqueOWs . '<BR>';
if ((count($OWsOnlyUnique) == 1) and (strlen($checkstr) > $InputStringIsBig)
and ($TotalInsultingCount < 1) and ($HighestAntagonismArray[0] <= $AntagonismLow))
	{
		for ($i=0; $i<$num; $i++)	
		{
			$matches[$i]['prob'] = ($matches[$i]['prob'] - 20);
			echo '<BR> <span style="color: red; font-weight: bold;">RULE MATCH:</span> Only 1 OW found when OWs reduced, substantial text, no insults and little or no antagonism. Probabilities were lowered.';
		}
	}

/* --------------------------------------------------------------------------------------------------------------------------------------------------------------------------- */  
//	FINAL REPORT
/* --------------------------------------------------------------------------------------------------------------------------------------------------------------------------- */  
echo '<BR> num=' . $num;			
if ($num != 0)
	{
	for ($i=0; $i<$num; $i++)	
	{
		if ($matches[$i]['prob'] > 100)  { $matches[$i]['prob'] = 100; }
		if ($matches[$i]['prob'] < 5)  { $matches[$i]['prob'] = 5; }
		$TotalProbs += $matches[$i]['prob'];
	}
	echo '<BR> $TotalProbs=' . $TotalProbs;
	$AverageProb = $TotalProbs / $num;
	}
echo '<BR> $AverageProb=' . $AverageProb;
$OverallProbability += $AverageProb;
if ($OverallProbability > 100) { $OverallProbability = 100; }	
// REPORT !!!!
echo '<BR><BR>';
echo '   <b> REPORT  </b><BR>';
if ($SearchForOEs)
	{
	echo '<BR>';
	echo '   <b>Offensive Expressions found: ' . $OEcount . '</b>';	
	echo '<BR><BR>';	
	}
echo '   <b>OFFENSIVE WORDS</b>';		
for ($i=0; $i<$num; $i++)	
{
	if ($matches[$i]['sev'] >= $lowestsev)
	{
		echo "<BR> $i OWfound=" . '<span style="color: red; font-weight: bold;">' . $matches[$i]['OW'] . '</span>' . ' position=' . $matches[$i]['pos'] . ' OW index num=' . $matches[$i]['OWidx'] . '  PROBABILITY=' . $matches[$i]['prob'] . ' SEVERITY=' . $matches[$i]['sev']; 
	}
}
echo '<BR> $OverallProbability=' . $OverallProbability;
echo '<BR> func detectOWs chill. Yeah you. <BR>';
}
?>
 
Return current item: ObsceneClean