<?
/**
* @author Eric Schabell <hide@address.com>
* @copyright Copyright 2005, GPL
* @package PMS
*/
/**
* Include once - the Manager class.
*/
include_once( 'Manager.php' );
/**
* BiBTeXManager class
*
* Parses and manipulates BiBTeX entries.
* @access public
*
* @package PMS
* @subpackage Manager
*/
class BiBTeXManager extends Manager
{
/**
* Constructor - initialize the BiBTeXManager.
* @access public
*
* @param string Type of manager is BiBTeX Manager.
* @return BiBTeXManager BiBTeXManager object.
*/
public function __construct( $manager="BiBTeX Manager" )
{
parent::__construct( $manager );
}
/**
* bibtexify - will automagically fix any string for bibtex.
* @access public
*
* @param string The string to be checked.
* @return string The string completely bibtexified!
*/
public function bibtexify( $string )
{
$search = array(
chr(92), // \
chr(35), // #
chr(36), // $
chr(37), // %
chr(38), // &
chr(45), // -
chr(60), // <
chr(62), // >
chr(95), // _
chr(126), // ~
chr(161), // ¡
chr(163), // £
chr(165), // ¥
chr(167), // §
chr(168), // ¨
chr(169), // ©
chr(170), // ª
chr(171), // «
chr(172), // ¬
chr(174), // ®
chr(181), // µ
chr(182), // ¶
chr(187), // »
chr(191), // ¿
chr(192), // Ã
chr(193), // Ã
chr(194), // Ã
chr(195), // Ã
chr(196), // Ã
chr(197), // Ã
chr(198), // Ã
chr(199), // Ã
chr(200), // Ã
chr(201), // Ã
chr(202), // Ã
chr(203), // Ã
chr(204), // Ã
chr(205), // Ã
chr(206), // Ã
chr(207), // Ã
chr(209), // Ã
chr(210), // Ã
chr(211), // Ã
chr(212), // Ã
chr(213), // Ã
chr(214), // Ã
chr(215), // Ã
chr(216), // Ã
chr(217), // Ã
chr(218), // Ã
chr(219), // Ã
chr(220), // Ã
chr(221), // Ã
chr(223), // Ã
chr(224), // Ã
chr(225), // á
chr(226), // â
chr(227), // ã
chr(228), // ä
chr(229), // å
chr(230), // æ
chr(231), // ç
chr(232), // è
chr(233), // é
chr(234), // ê
chr(235), // ë
chr(236), // ì
chr(237), // Ã
chr(238), // î
chr(239), // ï
chr(241), // ñ
chr(242), // ò
chr(243), // ó
chr(244), // ô
chr(245), // õ
chr(246), // ö
chr(248), // ø
chr(249), // ù
chr(250), // ú
chr(251), // û
chr(252), // ü
chr(253), // ý
chr(255) // ÿ
);
$replace = array(
"{\$\\backslash\$}", // \
"{\\#}", // #
"{\\$}", // $
"{\\%}", // %
"{\\&}", // &
"--", // -
"{\$\<\$}", // <
"{\$\<\$}", // >
"{\\_}", // _
"{\\~{}}", // ~
"{!'}", // ¡
"{\\pounds}", // £
"{\\yen}", // ¥
"{\\S}", // §
"{\\\"~}", // ¨
"{\\copyright}", // ©
"\\textsuperscript{2}", // ª
"{\$\\guillemotleft\$}", // «
"{\$\\lnot\$}", // ¬
"{\\textregistered}", // ®
"{\$\\mu\$}", // µ
"{\$\\pi\$}", // ¶
"{\$\\guillemotleft\$}", // »
"{?'}", // ¿
"{\\`A}", // Ã
"{\\'A}", // Ã
"{\\^A}", // Ã
"{\\~A}", // Ã
"{\\\"A}", // Ã
"{\\AA}", // Ã
"{\\AE}", // Ã
"{\\c{C}}", // Ã
"{\\`E}", // Ã
"{\\'E}", // Ã
"{\\^E}", // Ã
"{\\\"E}", // Ã
"{\\`I}", // Ã
"{\\'I}", // Ã
"{\\^I}", // Ã
"{\\\"I}", // Ã
"{\\~N}", // Ã
"{\\`O}", // Ã
"{\\'O}", // Ã
"{\\^O}", // Ã
"{\\~O}", // Ã
"{\\\"O}", // Ã
"{\$\\times\$}", // Ã
"{\\O}", // Ã
"{\\`U}", // Ã
"{\\'U}", // Ã
"{\\^U}", // Ã
"{\\\"U}", // Ã
"{\\'y}", // Ã
"{\\ss}", // Ã
"{\\`a}", // Ã
"{\\'a}", // á
"{\\^a}", // â
"{\\~a}", // ã
"{\\\"a}", // ä
"{\\aa}", // å
"{\\ae}", // æ
"{\\c{c}}", // ç
"{\\`e}", // è
"{\\'e}", // é
"{\\~e}", // ê
"{\\\"e}", // ë
"{\\`\\i}", // ì
"{\\'\\i}", // Ã
"{\\^\\i}", // î
"{\\\"\\i}", // ï
"{\\~n}", // ñ
"{\\`o}", // ò
"{\\'o}", // ó
"{\\^o}", // ô
"{\\~o}", // õ
"{\\\"o}", // ö
"{\\o}", // ø
"{\\`u}", // ù
"{\\'u}", // ú
"{\\^u}", // û
"{\\\"u}", // ü
"{\\'y}", // ý
"{\\\"y}" // ÿ
);
$results = str_replace( $search, $replace, $string );
return $results;
}
/**
* cleanEntry - replaces single and double quotes with '`', removes
* any extra backslashes in the fields.
* @access public
*
* @param string The bibtex entry to be parsed.
* @returns string The cleaned up bibtex entry.
*/
public function cleanEntry( $bibtex )
{
$bibtex = str_replace( "'", "`", $bibtex ); // replacing single quotes.
$bibtex = str_replace( "\"", "`", $bibtex ); // replacing double quotes.
$bibtex = str_replace( "\n", "", $bibtex ); // replacing line feeds.
$bibtex = stripslashes( $bibtex ); // removing backslashes.
return $bibtex;
}
/**
* cutOutUrl - returns a string with the url key and text cut out.
* access private
*
* @param string Partial bibtex string.
* @return string The url free line.
*/
private function cutOutUrl( $line )
{
$pos_url = strpos( $line, " url " );
$myline = trim( substr( $line, 0, $pos_url ) );
$toend = trim( substr( $line, $pos_url ) );
$trim_bit = trim( substr( $toend, ( strpos( $toend, "`" ) + 1 ) ) );
$bit_more = trim( substr( $trim_bit, strpos( $trim_bit, "`," ) ) );
$url_free = trim( substr( $bit_more, strpos( $bit_more, ",") + 1 ) );
$myline .= " $url_free";
return $myline;
}
/**
* extractUrlText - returns url text from given bibtex string.
* access private
*
* @param string Partial bibtex string.
* @return string The url text.
*/
private function extractUrlText( $line )
{
$pos_url = strpos( $line, " url " );
$myline = trim( substr( $line, $pos_url ) );
$pos_end_url = strpos( $myline, "\`, " );
$urlrest = trim( substr( $myline, ( strpos( $myline, "`" ) + 1 ) ) );
$url = trim( substr( $urlrest, 0, strpos( $urlrest, "`,") ) );
return $url;
}
/**
* parseSingleBiBTeX - parses a single bibtex entry.
* @access public
*
* @param string The bibtex data in string format to be parsed.
* @return array Array containing the parsed bibtex elements.
*/
public function parseSingleBiBTeX( $bibtex )
{
$bibtex_array = array(); // array we will be returning.
$line = trim( $bibtex );
$line = $this->cleanEntry( $line );
// check what is encompassing my entry.
$revline = strrev( $line );
if ( $revline[0] == ')' )
{
// dealing with normal brackets.
$pos_first_bracket = strpos( $line, '(' );
}
elseif ( $revline[0] == '}' )
{
// dealing with curly brackets.
$pos_first_bracket = strpos( $line, '{' );
}
else
{
print "\nDEBUG[parseSingleBiBTeX]: Something wrong with brackets....\n";
print "Entry : \n" . $bibtex;
return $bibtex_array; // return empty array, not valid!
}
// Type attribute.
//
$type = trim( substr( $line, 0, $pos_first_bracket ) );
$type = substr( $type, 1 );
$type = self::removedWhiteSpaces( $type );
$line = trim( substr( $line, ( $pos_first_bracket + 1 ) ) );
$bibtex_array = $bibtex_array + array( "type" => $type );
// Id attribute.
//
$pos_next_comma = strpos( $line, "," );
$id = trim( substr( $line, 0, $pos_next_comma ) );
$id = self::removedWhiteSpaces( $id );
$line = trim( substr( $line, ( $pos_next_comma + 1 ) ) );
$bibtex_array = $bibtex_array + array( "id" => $id );
// url attribute (doing apart as very unpredictable).
//
if ( strpos( $line, " url " ) > 0 )
{
$bibtex_array = $bibtex_array + array( "url" => self::extractUrlText( $line ) );
$line = self::cutOutUrl( $line );
}
// Rest of attributes.
//
$bibtex_array = $bibtex_array + $this->remainingAttributes( $line );
return $bibtex_array;
}
/**
* remainingAttributes - parse out the rest of the bibtex entry attributes.
* @access private
*
* @param string The remaining line of the bibtex entry that contains attributes.
* @returns array The array containing the parsed attributes.
*/
private function remainingAttributes( $line )
{
// - each attribute is a 'field = `text`,' so can pull the text
// with regexpr.
//
$attributes = array(); // initialize our array.
$number_fields = substr_count( $line, "=" );
for ( $i=1; $i <= $number_fields; $i++ ) // completes all but last item.
{
if ( $i == $number_fields )
{
// last attribute.
$pos_equals = strpos( $line, "=" ); // find equals.
$pos_text_begin = strpos( $line, "`" ); // find first quote.
$field = strtolower( trim( substr( $line, 0, $pos_equals ) ) );
#if ( $field == "type" ) { $field = "class"; }; // type is already reserved for bibtex.
$text = ereg_replace( "^`", "", trim( substr( $line, ( $pos_equals + 1 ), ( strlen( $line ) - 1 ) ) ) );
$text = $this->removeBrackets( $text );
// have to trim everything after last text field's data.
$pos_last_quote = strpos( $text, "`" );
$text = trim( substr( $text, 0, $pos_last_quote ) );
$text = self::removedWhiteSpaces( $text );
$text = self::removedDashes( $text );
$field = self::removedWhiteSpaces( $field );
$attributes = $attributes + array( $field => $text );
}
else
{
$pos_next_item = stripos( $line, "`," ); // find item limit.
$item = trim( substr( $line, 0, $pos_next_item ) );
$line = trim( substr( $line, ( $pos_next_item + 2 ) ) );
$pos_equals = strpos( $item, "=" ); // find equals.
$pos_text_begin = strpos( $item, "`" ); // find first quote.
$field = strtolower( trim( substr( $item, 0, $pos_equals ) ) );
#if ( $field == "type" ) { $field = "class"; }; // type is already reserved for bibtex.
$text = ereg_replace( "^`", "", trim( substr( $item, ( $pos_equals + 1 ), ( strlen( $item ) - 1 ) ) ) );
$text = $this->removeBrackets( $text );
$text = self::removedWhiteSpaces( $text );
$text = self::removedDashes( $text );
$field = self::removedWhiteSpaces( $field );
$attributes = $attributes + array( $field => $text );
}
}
// fill out array with missing keys.
$bibtexkeys = array( "class",
"isbn",
"annotate",
"month",
"year",
"note",
"howpublished",
"key",
"title",
"research_group",
"source",
"abstract",
"content",
"translated_from"
);
// fill keys with valid defaults.
foreach ( $bibtexkeys as $key )
{
if ( !( array_key_exists( $key, $attributes ) ) )
{
switch( $key ) {
case "class":
$attributes = $attributes + array( $key => "none" );
break;
case "title":
$attributes = $attributes + array( $key => "none" );
break;
case "source":
$attributes = $attributes + array( $key => "none" );
break;
case "abstract":
$attributes = $attributes + array( $key => "none" );
break;
case "content":
$attributes = $attributes + array( $key => "none" );
break;
default:
$attributes = $attributes + array( $key => 'NULL' );
}
}
}
return $attributes;
}
/**
* removeBrackets - replaces any brackets found in string.
* @access private
*
* @param string The string to be cleaned of brackets.
* @returns string The bracketless string.
*/
private function removeBrackets( $text )
{
$text = str_replace( "{", "", $text );
$text = str_replace( "}", "", $text );
return $text;
}
/**
* removedDashes - will clean all mulitple dashes out of a string,
* leaving single dash behind.
* @access private
*
* @param string The string to be checked.
* @return string The string with no more multiple dashes.
*/
private function removedDashes( $string )
{
// replace line feeds.
$string = str_replace( "\n", "", $string );
// replace mulitple dashes with single dash.
$string = preg_replace( '/--+/', '-', $string );
return $string;
}
/**
* removedWhitespaces - will clean all mulitple spaces out of a string,
* leaving single white space behind.
* @access private
*
* @param string The string to be checked.
* @return string The string with no more multiple spaces.
*/
private function removedWhiteSpaces( $string )
{
// replace line feeds.
$string = str_replace( "\n", "", $string );
// replace mulitple white spaces with single space.
$string = preg_replace( '/\s+/', ' ', $string );
return $string;
}
}
?>