<?php
/***********************************************************************
Class: Gender Guesser
Version: v0.02.0
Date: 11/12/2005
Author: Wudi <hide@address.com>
License: GPL (GNU General Public License)
PHP Classes: http://www.phpclasses.org/browse/package/2701.html
Description: This class can guess the gender by name.
***********************************************************************/
/***********************************************************************
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
The GNU General Public License can be found at
http://www.gnu.org/copyleft/gpl.html
***********************************************************************/
class GenderGuesser {
var $options = array('s' => TRUE);
var $lexicon_name = 'Unknown';
var $lexicon_argv = 0;
var $lexicon_chars = array();
function setOptions($options) {
if (is_string($options)) {
$options = explode(' ', $options);
}
if (is_array($options)) {
foreach ($options as $option) {
if ($option{0} == '-') {
$this->options[$option{1}] = FALSE;
} else {
$this->options[$option{0}] = TRUE;
}
}
} else {
return FALSE;
}
}
function loadLexicon($lexicon_file) {
if (!file_exists($lexicon_file)) {
return FALSE;
}
$fp = fopen($lexicon_file, 'r');
$temp = fgets($fp, 512);
if ($temp === FALSE) {
return FALSE;
} else {
if (strpos($temp, "\t") === FALSE) {
return FALSE;
} else {
list ($lexicon_type, $lexicon_ver, $lexicon_name) = explode("\t", trim($temp));
if (($lexicon_type!='LEXICON_CHAR_CS') || ($lexicon_ver!='001')) {
return FALSE;
}
$this->lexicon_name = $lexicon_name;
}
}
while (!feof($fp)) {
list ($char, $offset) = split("\t", rtrim(fgets($fp, 32)));
$this->lexicon_chars[$char] = $offset;
}
fclose($fp);
$this->lexicon_argv = max($this->lexicon_chars);
return TRUE;
}
function getLexiconName() {
return $this->lexicon_name;
}
function getOffset($name) {
if (count($this->lexicon_chars) == 0) {
return FALSE;
}
$name_length = strlen($name);
$chars = array();
for ($i=0; $i<$name_length; $i++) {
if ((ord($name{$i})>=224) && (ord($name{$i})<=239)) {
$chars[] = $name{$i} . $name{$i+1} . $name{$i+2};
$i += 2;
}
}
if ($this->options['s']) {
if (count($chars) == 2) {
$chars = array($chars[1]);
} elseif (count($chars) == 4) {
$chars = array($chars[2], $chars[3]);
} else {
$chars = array($chars[1], $chars[2]);
}
} else {
if (count($chars) > 2) {
$chars = array($chars[0], $chars[1]);
}
}
$offset = 0;
if (count($chars) == 1) {
if (array_key_exists($chars[0], $this->lexicon_chars)) {
$offset += $this->lexicon_chars[$chars[0]] * 6;
}
} else {
if (array_key_exists($chars[0], $this->lexicon_chars)) {
$offset += $this->lexicon_chars[$chars[0]] * 4;
}
if (array_key_exists($chars[1], $this->lexicon_chars)) {
$offset += $this->lexicon_chars[$chars[1]] * 5;
}
}
return $offset;
}
function getTendencyByOffset($offset) {
if ($this->lexicon_argv == 0) {
$tendency = FALSE;
} else {
$tendency = $offset / ($this->lexicon_argv * 9);
}
return $tendency;
}
function getTendency($name) {
$tendency = $this->getOffset($name);
return $this->getTendencyByOffset($tendency);
}
}
?>