Location: PHPKode > scripts > Chat Grammar Corrector > chat-grammar-corrector/ChatGrammarCorrector.php
<?php
/*
 * ChatGrammarCorrector v1.2
 * Copyright 2006-2008 sk89q
 * Written by sk89q
 * 
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 */

/**
 * @package com.therisenrealm.text
 */

/**
 * Attempts to correct faulty grammar/chatspeak in chat situations.
 *
 * @package com.therisenrealm.text
 * @version 1.2
 * @author sk89q
 * @copyright Copyright (c) 2006-2008, sk89q
 */
class ChatGrammarCorrector
{
    public static $changes = array(
        "omg" => "OMG",
        "wtf" => "wtfreak",
        "ur" => "you're",
        "youre" => "you're",
        "itz" => "it's",
        "its" => "it's",
        "im" => "I'm",
        "ive" => "I've",
        "cant" => "can't",
        "isnt" => "isn't",
        "arent" => "aren't",
        "wasnt" => "wasn't",
        "werent" => "weren't",
        "havent" => "haven't",
        "hasnt" => "hasn't",
        "hadnt" => "hadn't",
        "wont" => "won't",
        "wouldnt" => "wouldn't",
        "dont" => "don't",
        "doesnt" => "doesn't",
        "didnt" => "didn't",
        "couldnt" => "couldn't",
        "shouldnt" => "shouldn't",
        "mightnt" => "mightn't",
        "mustnt" => "mustn't",
        "thats" => "that is",
        "b4" => "before",
        "btw" => "by the way",
        "co(s|z)" => "'cause",
        "r" => "are",
        "u" => "you",
        "2nite" => "tonight",
        "innit" => "isn't",
        "awsum" => "awesome",
        "rite" => "right",
        "i" => "I",
        "dun" => "don't",
        "thnk" => "think",
        "gud" => "good",
        "dat" => "that",
        "pl(z|s)" => "please",
        "som" => "some",
        "avi?" => "avatar",
        "sig" => "signature",
        "ty" => "thank you",
        "oic" => "oh I see",
        "srry" => "Sorry",
        "ya" => "you're",
        "k?noe" => "know",
        "lyke?" => "like",
        "pic(s?)" => "picture\1",
        "rite" => "right",
        "diz" => "this",
        "d8" => "date",
        "duz" => "does",
        "eva" => "ever",
        "eve?ry1" => "everyone",
        "ne1" => "everyone",
        "some?1" => "someone",
        "sum1" => "someone",
        "sup" => "what's up",
        "hafta" => "have to",
        "hav" => "have",
        "idk" => "I don't know",
        "wryte" => "write",
        "b" => "be",
        "tlk" => "talk",
        "ty" => "thank you",
        "giv" => "give",
        "dunno" => "don't know",
        "w/o" => "without",
        "luv" => "love",
        "wernt" => "weren't",
        "l8a" => "later",
        "oso" => "also",
        "n" => "and",
        "den" => "then",
        "wuz" => "was",
        "ne" => "any",
        "neway(s?)" => "anyway\1",
        "newayz" => "anyways",
        "thx" => "thanks",
        "nvmnd" => "nevermind",
        "nvm" => "nevermind",
        "lyke" => "like",
        "srsly" => "seriously",
        "wrk" => "work",
        "liek" => "like",
        "dis" => "this",
        "lemme" => "let me",
        "amirite\?*" => "am I right?",
        "y" => "why",
        "wut" => "what",
        "gonna" => "going to",
        "any1" => "anyone",
        "thurr?" => "there",
        "wud" => "would",
        "im?ma" => "I'm going to",
        "laterz" => "later",
        "mr(s)." => "Mr\1.",
        "(s)hes" => "\1he's",
        "cu(z|s)" => "'cause",
        "gr8" => "great",
    );
    
    /**
     * Replaces repeated punctuation
     * @param string $m The text
     * @return string Corrected text string
     */
    private static function replace_repeated_punctuation($m)
    {
        //$m = preg_replace("#\W+(\.|!|\?)\b#", "\\1", $m);
        
        // Repeated punctuation marks
        $m = preg_replace("#!!+#", "!", $m);
        $m = preg_replace("#\?\?+#", "?", $m);
        $m = preg_replace("#![\?!]+#", "!?", $m);
        $m = preg_replace("#\?[\?!]+#", "!?", $m);
        $m = preg_replace("#\.\.+#", "...", $m);
        
        return $m;
    }
    
    /**
     * Corrects text
     * @param string $m The text
     * @return string Corrected text string
     */
    public static function correct($m)
    {
        // Changes
        foreach (self::$changes as $k => $v) {
            $m = preg_replace("#\b$k\b#i", $v, $m);
        }
        
        $m = self::replace_repeated_punctuation($m);
        
        $words = explode(" ", $m);
        $new = array();
        
        $start = true;
        foreach ($words as $w) {
            if (strtolower($w) != $w) {
            } else if (strtoupper($w) == $w) {
            } else {
                if ($w == "i") {
                    $w = "I";
                } else if (preg_match("#o\.o|o\-o#i", $w)) {
                } else if (preg_match("#^i'#", $w)) {
                    $w = strtoupper(substr($w, 0, 1)) . strtolower(substr($w, 1));
                } else if ($start) {
                    $w = strtoupper(substr($w, 0, 1)) . strtolower(substr($w, 1));
                } else {
                    $w = strtolower($w);
                }
            }
            
            $start = preg_match("#(\.|!|\?)$#", trim($w));
            
            $new[] = $w;
        }
        
        $m = implode(" ", $new);
        
        return $m;
    }
}
Return current item: Chat Grammar Corrector