<?php
/*
* ChatGrammarCorrector v1.2
* Copyright 2006-2008 sk89q
* Written by sk89q
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
/**
* @package com.therisenrealm.text
*/
/**
* Attempts to correct faulty grammar/chatspeak in chat situations.
*
* @package com.therisenrealm.text
* @version 1.2
* @author sk89q
* @copyright Copyright (c) 2006-2008, sk89q
*/
class ChatGrammarCorrector
{
public static $changes = array(
"omg" => "OMG",
"wtf" => "wtfreak",
"ur" => "you're",
"youre" => "you're",
"itz" => "it's",
"its" => "it's",
"im" => "I'm",
"ive" => "I've",
"cant" => "can't",
"isnt" => "isn't",
"arent" => "aren't",
"wasnt" => "wasn't",
"werent" => "weren't",
"havent" => "haven't",
"hasnt" => "hasn't",
"hadnt" => "hadn't",
"wont" => "won't",
"wouldnt" => "wouldn't",
"dont" => "don't",
"doesnt" => "doesn't",
"didnt" => "didn't",
"couldnt" => "couldn't",
"shouldnt" => "shouldn't",
"mightnt" => "mightn't",
"mustnt" => "mustn't",
"thats" => "that is",
"b4" => "before",
"btw" => "by the way",
"co(s|z)" => "'cause",
"r" => "are",
"u" => "you",
"2nite" => "tonight",
"innit" => "isn't",
"awsum" => "awesome",
"rite" => "right",
"i" => "I",
"dun" => "don't",
"thnk" => "think",
"gud" => "good",
"dat" => "that",
"pl(z|s)" => "please",
"som" => "some",
"avi?" => "avatar",
"sig" => "signature",
"ty" => "thank you",
"oic" => "oh I see",
"srry" => "Sorry",
"ya" => "you're",
"k?noe" => "know",
"lyke?" => "like",
"pic(s?)" => "picture\1",
"rite" => "right",
"diz" => "this",
"d8" => "date",
"duz" => "does",
"eva" => "ever",
"eve?ry1" => "everyone",
"ne1" => "everyone",
"some?1" => "someone",
"sum1" => "someone",
"sup" => "what's up",
"hafta" => "have to",
"hav" => "have",
"idk" => "I don't know",
"wryte" => "write",
"b" => "be",
"tlk" => "talk",
"ty" => "thank you",
"giv" => "give",
"dunno" => "don't know",
"w/o" => "without",
"luv" => "love",
"wernt" => "weren't",
"l8a" => "later",
"oso" => "also",
"n" => "and",
"den" => "then",
"wuz" => "was",
"ne" => "any",
"neway(s?)" => "anyway\1",
"newayz" => "anyways",
"thx" => "thanks",
"nvmnd" => "nevermind",
"nvm" => "nevermind",
"lyke" => "like",
"srsly" => "seriously",
"wrk" => "work",
"liek" => "like",
"dis" => "this",
"lemme" => "let me",
"amirite\?*" => "am I right?",
"y" => "why",
"wut" => "what",
"gonna" => "going to",
"any1" => "anyone",
"thurr?" => "there",
"wud" => "would",
"im?ma" => "I'm going to",
"laterz" => "later",
"mr(s)." => "Mr\1.",
"(s)hes" => "\1he's",
"cu(z|s)" => "'cause",
"gr8" => "great",
);
/**
* Replaces repeated punctuation
* @param string $m The text
* @return string Corrected text string
*/
private static function replace_repeated_punctuation($m)
{
//$m = preg_replace("#\W+(\.|!|\?)\b#", "\\1", $m);
// Repeated punctuation marks
$m = preg_replace("#!!+#", "!", $m);
$m = preg_replace("#\?\?+#", "?", $m);
$m = preg_replace("#![\?!]+#", "!?", $m);
$m = preg_replace("#\?[\?!]+#", "!?", $m);
$m = preg_replace("#\.\.+#", "...", $m);
return $m;
}
/**
* Corrects text
* @param string $m The text
* @return string Corrected text string
*/
public static function correct($m)
{
// Changes
foreach (self::$changes as $k => $v) {
$m = preg_replace("#\b$k\b#i", $v, $m);
}
$m = self::replace_repeated_punctuation($m);
$words = explode(" ", $m);
$new = array();
$start = true;
foreach ($words as $w) {
if (strtolower($w) != $w) {
} else if (strtoupper($w) == $w) {
} else {
if ($w == "i") {
$w = "I";
} else if (preg_match("#o\.o|o\-o#i", $w)) {
} else if (preg_match("#^i'#", $w)) {
$w = strtoupper(substr($w, 0, 1)) . strtolower(substr($w, 1));
} else if ($start) {
$w = strtoupper(substr($w, 0, 1)) . strtolower(substr($w, 1));
} else {
$w = strtolower($w);
}
}
$start = preg_match("#(\.|!|\?)$#", trim($w));
$new[] = $w;
}
$m = implode(" ", $new);
return $m;
}
}