<?php
// ----------------------------------------------------------------------
// Copyright (C) 2007 by Khaled Al-Shamaa.
// http://www.al-shamaa.com/php/arabic
// ----------------------------------------------------------------------
// LICENSE
// This program is open source product; you can redistribute it and/or
// modify it under the terms of the GNU General Public License (GPL)
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// To read the license please visit http://www.gnu.org/copyleft/gpl.html
// ----------------------------------------------------------------------
// Class Name: Tagging Arabic Word Class
// Filename: ArWordTag.class.php
// Original Author(s): Khaled Al-Sham'aa <hide@address.com>
// Purpose: Arabic grammarians describe Arabic as being derived from
// three main categories: noun, verb and particle. This class
// built to recognize the class of a given Arabic word.
// ----------------------------------------------------------------------
class ArWordTag {
var $particle_pre_nouns = array();
function ArWordTag(){
array_push($this->particle_pre_nouns, 'Úä', 'Ýí', 'ãÐ', 'ãäÐ', 'ãä',
'Çáì', 'Úáì', 'ÍÊì', 'ÇáÇ', 'ÛíÑ',
'Óæì', 'ÎáÇ', 'ÚÏÇ', 'ÍÇÔÇ', 'áíÓ');
}
function isNoun($word, $word_befor){
$word = trim($word);
$word_befor = trim($word_befor);
$word = preg_replace('/Ã|Å/', 'Ç', $word);
$word_befor = preg_replace('/Ã|Å/', 'Ç', $word_befor);
if(in_array($word_befor, $this->particle_pre_nouns)){
return true;
}
if(preg_match('/\d+/', $word)){
return true;
}
if(preg_match('/\d+/', $word_befor)){
return true;
}
if(preg_match('/(ð|ò|ñ)$/', $word)){
return true;
}
$word = preg_replace('/ó|ð|õ|ñ|ö|ò|ú/', '', $word);
$word_befor = preg_replace('/ó|ð|õ|ñ|ö|ò|ú/', '', $word_befor);
if(preg_match('/^Çá\S{3,}$/', $word)){
return true;
}
if(preg_match('/\S{3,}(É|Á|ì|ÇÊ)$/', $word)){
return true;
}
if(preg_match('/^ã\S{3}$/', $word) ||
preg_match('/^ã\S{2}Ç\S$/', $word) ||
preg_match('/^ã\S{3}É$/', $word) ||
preg_match('/^\S{2}Ç\S$/', $word) ||
preg_match('/^\SÇ\Sæ\S$/', $word) ||
preg_match('/^\S{2}æ\S$/', $word) ||
preg_match('/^\S{2}í\S$/', $word) ||
preg_match('/^ã\S{2}æ\S$/', $word) ||
preg_match('/^ã\S{2}í\S$/', $word) ||
preg_match('/^\S{3}É$/', $word) ||
preg_match('/^\S{2}Ç\SÉ$/', $word) ||
preg_match('/^\SÇ\S{2}É$/', $word) ||
preg_match('/^\SÇ\Sæ\SÉ$/', $word) ||
preg_match('/^Ç\S{2}æ\SÉ$/', $word) ||
preg_match('/^Ç\S{2}í\S$/', $word) ||
preg_match('/^Ç\S{3}$/', $word) ||
preg_match('/^\S{3}ì$/', $word) ||
preg_match('/^\S{3}ÇÁ$/', $word) ||
preg_match('/^\S{3}Çä$/', $word) ||
preg_match('/^ã\SÇ\S{2}$/', $word) ||
preg_match('/^ãä\S{3}$/', $word) ||
preg_match('/^ãÊ\S{3}$/', $word) ||
preg_match('/^ãÓÊ\S{3}$/', $word) ||
preg_match('/^ã\SÊ\S{2}$/', $word) ||
preg_match('/^ãÊ\SÇ\S{2}$/', $word) ||
preg_match('/^\SÇ\S{2}$/', $word)){
return true;
}
return false;
}
function tagText($str){
$text = array();
$words = split(' ', $str);
$prevWord = '';
foreach($words as $word){
if($word == '') continue;
if($this->isNoun($word, $prevWord)){
$text[] = array($word, 1);
}else{
$text[] = array($word, 0);
}
$prevWord = $word;
}
return $text;
}
function highlightText($str){
$html = '';
$prevTag = 0;
$prevWord = '';
$taggedText = $this->tagText($str);
foreach($taggedText as $wordTag){
list($word, $tag) = $wordTag;
if($prevTag == 0 && $tag == 1){
$html .= " \n<span style=\"background-color: #EEEE80\">";
}
if($prevTag == 1 && in_array($word, $this->particle_pre_nouns)){
$prevWord = $word;
continue;
}
if($prevTag == 1 && $tag == 0){
$html .= "</span> \n";
}
$html .= ' ' . $prevWord . ' ' . $word;
if($prevWord != ''){ $prevWord = ''; }
$prevTag = $tag;
}
if($prevTag == 1){
$html .= "</span> \n";
}
return $html;
}
}
?>