Location: PHPKode > scripts > ArWordTag > arwordtag/ArWordTag.class.php
<?php
// ----------------------------------------------------------------------
// Copyright (C) 2007 by Khaled Al-Shamaa.
// http://www.al-shamaa.com/php/arabic
// ----------------------------------------------------------------------
// LICENSE

// This program is open source product; you can redistribute it and/or
// modify it under the terms of the GNU General Public License (GPL)
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.

// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.

// To read the license please visit http://www.gnu.org/copyleft/gpl.html
// ----------------------------------------------------------------------
// Class Name: Tagging Arabic Word Class
// Filename: ArWordTag.class.php
// Original  Author(s): Khaled Al-Sham'aa <hide@address.com>
// Purpose:  Arabic grammarians describe Arabic as being derived from
//           three main categories: noun, verb and particle. This class
//           built to recognize the class of a given Arabic word.
// ----------------------------------------------------------------------

class ArWordTag {
      var $particle_pre_nouns = array();

      function ArWordTag(){
          array_push($this->particle_pre_nouns, 'Úä', 'Ýí', 'ãÐ', 'ãäÐ', 'ãä',
                                          'Çáì', 'Úáì', 'ÍÊì', 'ÇáÇ', 'ÛíÑ',
                                          'Óæì', 'ÎáÇ', 'ÚÏÇ', 'ÍÇÔÇ', 'áíÓ');
      }

      function isNoun($word, $word_befor){
          $word       = trim($word);
          $word_befor = trim($word_befor);

          $word       = preg_replace('/Ã|Å/', 'Ç', $word);
          $word_befor = preg_replace('/Ã|Å/', 'Ç', $word_befor);

          if(in_array($word_befor, $this->particle_pre_nouns)){
              return true;
          }

          if(preg_match('/\d+/', $word)){
              return true;
          }

          if(preg_match('/\d+/', $word_befor)){
              return true;
          }

          if(preg_match('/(ð|ò|ñ)$/', $word)){
              return true;
          }

          $word       = preg_replace('/ó|ð|õ|ñ|ö|ò|ú/', '', $word);
          $word_befor = preg_replace('/ó|ð|õ|ñ|ö|ò|ú/', '', $word_befor);

          if(preg_match('/^Çá\S{3,}$/', $word)){
              return true;
          }

          if(preg_match('/\S{3,}(É|Á|ì|ÇÊ)$/', $word)){
              return true;
          }

          if(preg_match('/^ã\S{3}$/', $word) ||
             preg_match('/^ã\S{2}Ç\S$/', $word) ||
             preg_match('/^ã\S{3}É$/', $word) ||
             preg_match('/^\S{2}Ç\S$/', $word) ||
             preg_match('/^\SÇ\Sæ\S$/', $word) ||
             preg_match('/^\S{2}æ\S$/', $word) ||
             preg_match('/^\S{2}í\S$/', $word) ||
             preg_match('/^ã\S{2}æ\S$/', $word) ||

             preg_match('/^ã\S{2}í\S$/', $word) ||
             preg_match('/^\S{3}É$/', $word) ||
             preg_match('/^\S{2}Ç\SÉ$/', $word) ||
             preg_match('/^\SÇ\S{2}É$/', $word) ||
             preg_match('/^\SÇ\Sæ\SÉ$/', $word) ||
             preg_match('/^Ç\S{2}æ\SÉ$/', $word) ||
             preg_match('/^Ç\S{2}í\S$/', $word) ||
             preg_match('/^Ç\S{3}$/', $word) ||
             preg_match('/^\S{3}ì$/', $word) ||
             preg_match('/^\S{3}ÇÁ$/', $word) ||
             preg_match('/^\S{3}Çä$/', $word) ||
             preg_match('/^ã\SÇ\S{2}$/', $word) ||
             preg_match('/^ãä\S{3}$/', $word) ||
             preg_match('/^ãÊ\S{3}$/', $word) ||
             preg_match('/^ãÓÊ\S{3}$/', $word) ||
             preg_match('/^ã\SÊ\S{2}$/', $word) ||
             preg_match('/^ãÊ\SÇ\S{2}$/', $word) ||

             preg_match('/^\SÇ\S{2}$/', $word)){
              return true;
          }

          return false;
      }
      
      function tagText($str){
          $text     = array();
          $words    = split(' ', $str);
          $prevWord = '';

          foreach($words as $word){
              if($word == '') continue;

              if($this->isNoun($word, $prevWord)){
                  $text[] = array($word, 1);
              }else{
                  $text[] = array($word, 0);
              }

              $prevWord = $word;
          }

          return $text;
      }

      function highlightText($str){
          $html       = '';
          $prevTag    = 0;
          $prevWord   = '';

          $taggedText = $this->tagText($str);

          foreach($taggedText as $wordTag){
              list($word, $tag) = $wordTag;

              if($prevTag == 0 && $tag == 1){
                  $html  .= " \n<span style=\"background-color: #EEEE80\">";
              }

              if($prevTag == 1 && in_array($word, $this->particle_pre_nouns)){
                  $prevWord = $word;
                  continue;
              }

              if($prevTag == 1 && $tag == 0){
                  $html  .= "</span> \n";
              }

              $html   .= ' ' . $prevWord . ' ' . $word;

              if($prevWord != ''){ $prevWord = ''; }
              $prevTag = $tag;
          }

          if($prevTag == 1){
              $html  .= "</span> \n";
          }

          return $html;
      }
}
?>
Return current item: ArWordTag