<?php
/* o------------------------------------------------------------------------------o
*
* This script was originally written in PEARL by Ljiljana Dolamic and Jacques Savoy
*
* Improvements, PHP5 implementation and adapted for Sphider-plus application
* by Rolf Kellner [Tec] Feb. 2010
*
* o------------------------------------------------------------------------------o */
define('bg_article', '/(ÑÑ|Ñо|Ñе|Ñа)$/');
define('bg_plural', '/(иÑа|иÑе|ове|Ñа)$/');
define('bg_normal', '/(еи|ии|аой)$/');
class bg_stemmer{
public function stem($word) {
//$word = lower_case($word);
$word = self::Remove_Article($word);
$word = self::Remove_Plural($word);
$word = self::Normalize($word);
$word = self::Palatalization($word);
return $word;
}
private function Remove_Article($word) {
$word = preg_replace(bg_article, '', $word);
if (preg_match('/(ÑÑ)$/', $word)){
if (preg_match("/(a|e|и|о|Ñ|Ñ)$/", substr($word, 0, -4))) { // word ends with vowal + ÑÑ
$word = preg_replace('/(ÑÑ)$/', 'й', $word);
} else {
$word = preg_replace('/(ÑÑ)$/', '', $word);
}
}
return $word;
}
private function Remove_Plural($word) {
$word = preg_replace(bg_plural, '', $word);
$word = preg_replace('/(овÑи)$/', 'о', $word);
$word = preg_replace('/(евÑи)$/', 'е', $word);
$word = preg_replace('/(\.\.е\.и)$/', '.Ñ.', $word);
if (preg_match('/(еве)$/', $word)){
if (preg_match("/(a|e|и|о|Ñ|Ñ)$/", substr($word, 0, -6))) { // word ends with vowal + еве
$word = preg_replace('/(еве)$/', 'й', $word);
} else {
$word = preg_replace('/(еве)$/', '', $word);
}
}
return $word;
}
private function Normalize($word) {
$word = preg_replace(bg_normal, '', $word);
$word = preg_replace('/(йн)$/', 'н', $word);
$word = preg_replace('/(LеC)$/', 'LC', $word);
$word = preg_replace('/(LÑL)$/', 'LL', $word);
if (preg_match('/(Ñ)$/', $word)){
if (preg_match("/(a|e|и|о|Ñ|Ñ)$/", substr($word, 0, -2))) { // word ends with vowal + Ñ
$word = preg_replace('/(Ñ)$/', 'й', $word);
} else {
$word = preg_replace('/(Ñ)$/', '', $word);
}
}
return $word;
}
private function Palatalization($word) {
$word = preg_replace('/(Ñ|Ñ)$/', 'к', $word);
$word = preg_replace('/(з|ж)$/', 'г', $word);
$word = preg_replace('/(Ñ|Ñ)$/', 'Ñ
', $word);
return $word;
}
}
?>