Location: PHPKode > scripts > BiBTeX Manager > bibtex-manager/BiBTeXManager.php
<?

/**
* @author Eric Schabell <hide@address.com>
* @copyright Copyright 2005, GPL
* @package PMS
*/

/**
* Include once - the Manager class.
*/ 
include_once( 'Manager.php' );

/**
* BiBTeXManager class
*
* Parses and manipulates BiBTeX entries.
* @access public
*
* @package PMS
* @subpackage Manager
*/
class BiBTeXManager extends Manager 
{
	/**
	* Constructor - initialize the BiBTeXManager.
	* @access public
	*
	* @param string Type of manager is BiBTeX Manager.
	* @return BiBTeXManager BiBTeXManager object.
	*/
	public function __construct( $manager="BiBTeX Manager" ) 
	{
		parent::__construct( $manager );
	}


	/**
	* bibtexify - will automagically fix any string for bibtex.
	* @access public
	*
	* @param string The string to be checked.
	* @return string The string completely bibtexified!
	*/
	public function bibtexify( $string )
	{
		$search = array(	
			 chr(92),   //	\
			 chr(35),   //	#
			 chr(36),   //	$
			 chr(37),   //	%
			 chr(38),   //	&
			 chr(45),   //  -
			 chr(60),   //	<
			 chr(62),   //	>
			 chr(95),   //	_
			 chr(126),  //	~
			 chr(161),  //	¡
			 chr(163),  //	£
			 chr(165),  //	¥
			 chr(167),  //	§
			 chr(168),  //	¨
			 chr(169),  //	©
			 chr(170),  //	ª
			 chr(171),  //	«
			 chr(172),  //	¬
			 chr(174),  //	®
			 chr(181),  //	µ
			 chr(182),  //	¶
			 chr(187),  //	»
			 chr(191),  //	¿
			 chr(192),  //	À
			 chr(193),  //	Á
			 chr(194),  //	Â
			 chr(195),  //	Ã
			 chr(196),  //	Ä
			 chr(197),  //	Å
			 chr(198),  //	Æ
			 chr(199),  //	Ç
			 chr(200),  //	È
			 chr(201),  //	É
			 chr(202),  //	Ê
			 chr(203),  //	Ë
			 chr(204),  //	Ì
			 chr(205),  //	Í
			 chr(206),  //	Î
			 chr(207),  //	Ï
			 chr(209),  //	Ñ
			 chr(210),  //	Ò
			 chr(211),  //	Ó
			 chr(212),  //	Ô
			 chr(213),  //	Õ
			 chr(214),  //	Ö
			 chr(215),  //	×
			 chr(216),  //	Ø
			 chr(217),  //	Ù
			 chr(218),  //	Ú
			 chr(219),  //	Û
			 chr(220),  //	Ü
			 chr(221),  //	Ý
			 chr(223),  //	ß
			 chr(224),  //	à
			 chr(225),  //	á
			 chr(226),  //	â
			 chr(227),  //	ã
			 chr(228),  //	ä
			 chr(229),  //	å
			 chr(230),  //	æ
			 chr(231),  //	ç
			 chr(232),  //	è
			 chr(233),  //	é
			 chr(234),  //	ê
			 chr(235),  //	ë
			 chr(236),  //	ì
			 chr(237),  //	í
			 chr(238),  //	î
			 chr(239),  //	ï
			 chr(241),  //	ñ
			 chr(242),  //	ò
			 chr(243),  //	ó
			 chr(244),  //	ô
			 chr(245),  //	õ
			 chr(246),  //	ö
			 chr(248),  //	ø
			 chr(249),  //	ù
			 chr(250),  //	ú
			 chr(251),  //	û
			 chr(252),  //	ü
			 chr(253),  //	ý
			 chr(255)   //	ÿ
		 );
		$replace = array(
		 "{\$\\backslash\$}",       //	\
		 "{\\#}",                   //	#
		 "{\\$}",                   //	$
		 "{\\%}",                   //	%
		 "{\\&}",                   //	&
		 "--",                      //  -
		 "{\$\<\$}",                //	<
		 "{\$\<\$}",                //	>
		 "{\\_}",                   //	_
		 "{\\~{}}",                 //	~
		 "{!'}",                    //	¡
		 "{\\pounds}",              //	£
		 "{\\yen}",                 //	¥
		 "{\\S}",                   //	§
		 "{\\\"~}",                 //	¨
		 "{\\copyright}",           //	©
		 "\\textsuperscript{2}",    //	ª
		 "{\$\\guillemotleft\$}",   //	«
		 "{\$\\lnot\$}",            //	¬
		 "{\\textregistered}",      //	®
		 "{\$\\mu\$}",              //	µ
		 "{\$\\pi\$}",              //	¶
		 "{\$\\guillemotleft\$}",   //	»
		 "{?'}",                    //	¿
		 "{\\`A}",                  //	À
		 "{\\'A}",                  //	Á
		 "{\\^A}",                  //	Â
		 "{\\~A}",                  //	Ã
		 "{\\\"A}",                 //	Ä
		 "{\\AA}",                  //	Å
		 "{\\AE}",                  //	Æ
		 "{\\c{C}}",                //	Ç
		 "{\\`E}",                  //	È
		 "{\\'E}",                  //	É
		 "{\\^E}",                  //	Ê
		 "{\\\"E}",                 //	Ë
		 "{\\`I}",                  //	Ì
		 "{\\'I}",                  //	Í
		 "{\\^I}",                  //	Î
		 "{\\\"I}",                 //	Ï
		 "{\\~N}",                  //	Ñ
		 "{\\`O}",                  //	Ò
		 "{\\'O}",                  //	Ó
		 "{\\^O}",                  //	Ô
		 "{\\~O}",                  //	Õ
		 "{\\\"O}",                 //	Ö
		 "{\$\\times\$}",           //	×
		 "{\\O}",                   //	Ø
		 "{\\`U}",                  //	Ù
		 "{\\'U}",                  //	Ú
		 "{\\^U}",                  //	Û
		 "{\\\"U}",                 //	Ü
		 "{\\'y}",                  //	Ý
		 "{\\ss}",                  //	ß
		 "{\\`a}",                  //	à
		 "{\\'a}",                  //	á
		 "{\\^a}",                  //	â
		 "{\\~a}",                  //	ã
		 "{\\\"a}",                  //	ä
		 "{\\aa}",                  //	å
		 "{\\ae}",                  //	æ
		 "{\\c{c}}",                //	ç
		 "{\\`e}",                  //	è
		 "{\\'e}",                  //	é
		 "{\\~e}",                  //	ê
		 "{\\\"e}",                 //	ë
		 "{\\`\\i}",                //	ì
		 "{\\'\\i}",                //	í
		 "{\\^\\i}",                //	î
		 "{\\\"\\i}",               //	ï
		 "{\\~n}",                  //	ñ
		 "{\\`o}",                  //	ò
		 "{\\'o}",                  //	ó
		 "{\\^o}",                  //	ô
		 "{\\~o}",                  //	õ
		 "{\\\"o}",                 //	ö
		 "{\\o}",                   //	ø
		 "{\\`u}",                  //	ù
		 "{\\'u}",                  //	ú
		 "{\\^u}",                  //	û
		 "{\\\"u}",                 //	ü
		 "{\\'y}",                  //	ý
		 "{\\\"y}"                  //	ÿ
		);

		$results = str_replace( $search, $replace, $string );
		return $results;
	}

	
	/**
	* cleanEntry - replaces single and double quotes with '`', removes
	* any extra backslashes in the fields.
	* @access public
	* 
	* @param string The bibtex entry to be parsed.
	* @returns string The cleaned up bibtex entry.
	*/
	public function cleanEntry( $bibtex )
	{
		$bibtex = str_replace( "'", "`", $bibtex );   // replacing single quotes.
		$bibtex = str_replace( "\"", "`", $bibtex );  // replacing double quotes.
		$bibtex = str_replace( "\n", "", $bibtex );  // replacing line feeds.
		$bibtex = stripslashes( $bibtex );            // removing backslashes.
		return $bibtex;
	}
	
	
	/**
	* cutOutUrl - returns a string with the url key and text cut out.
	* access private
	* 
	* @param string Partial bibtex string.
	* @return string The url free line.
	*/
	private function cutOutUrl( $line )
	{
		$pos_url     = strpos( $line, " url " );
		$myline      = trim( substr( $line, 0, $pos_url ) );
		$toend       = trim( substr( $line, $pos_url ) );
		$trim_bit    = trim( substr( $toend, ( strpos( $toend, "`" ) + 1 ) ) );
		$bit_more    = trim( substr( $trim_bit, strpos( $trim_bit, "`," ) ) );
		$url_free    = trim( substr( $bit_more, strpos( $bit_more, ",") + 1 ) );
		$myline     .= " $url_free";
		return $myline;
	}
	
	
	/**
	* extractUrlText - returns url text from given bibtex string.
	* access private
	* 
	* @param string Partial bibtex string.
	* @return string The url text.
	*/
	private function extractUrlText( $line )
	{

		$pos_url     = strpos( $line, " url " );
		$myline      = trim( substr( $line, $pos_url ) );
		$pos_end_url = strpos( $myline, "\`, " );
		$urlrest     = trim( substr( $myline, ( strpos( $myline, "`" ) + 1 ) ) );
		$url         = trim( substr( $urlrest, 0,  strpos( $urlrest, "`,") ) );
		return $url;
	}

		
	/**
	* parseSingleBiBTeX - parses a single bibtex entry.
	* @access public
	*
	* @param string The bibtex data in string format to be parsed.
	* @return array Array containing the parsed bibtex elements.
	*/
	public function parseSingleBiBTeX( $bibtex )
	{
		$bibtex_array = array();  // array we will be returning.

		$line = trim( $bibtex );
		$line = $this->cleanEntry( $line );

		// check what is encompassing my entry.
		$revline = strrev( $line );
		if ( $revline[0] == ')' )
		{
			// dealing with normal brackets.
			$pos_first_bracket = strpos( $line, '(' );
		}
		elseif ( $revline[0] == '}' )
		{
			// dealing with curly brackets.
			$pos_first_bracket = strpos( $line, '{' );
		}
		else
		{
			print "\nDEBUG[parseSingleBiBTeX]: Something wrong with brackets....\n";
			print "Entry : \n" . $bibtex;
			return $bibtex_array; // return empty array, not valid!
		}

		// Type attribute.
		//
		$type = trim( substr( $line, 0, $pos_first_bracket ) );
		$type = substr( $type, 1 );
		$type = self::removedWhiteSpaces( $type );
		$line = trim( substr( $line, ( $pos_first_bracket + 1 ) ) );
		$bibtex_array = $bibtex_array + array( "type" => $type );

		// Id attribute.
		//
		$pos_next_comma = strpos( $line, "," );
		$id = trim( substr( $line, 0, $pos_next_comma ) );
		$id = self::removedWhiteSpaces( $id );
		$line = trim( substr( $line, ( $pos_next_comma + 1 ) ) );
		$bibtex_array = $bibtex_array + array( "id" => $id );

		// url attribute (doing apart as very unpredictable).
		//
		if ( strpos( $line, " url " ) > 0 )
		{
			$bibtex_array = $bibtex_array + array( "url" => self::extractUrlText( $line ) );
			$line = self::cutOutUrl( $line );
		}
		
		// Rest of attributes.
		//
		$bibtex_array = $bibtex_array + $this->remainingAttributes( $line );
		return $bibtex_array;
	}
	
	
	/**
	* remainingAttributes - parse out the rest of the bibtex entry attributes.
	* @access private
	* 
	* @param string The remaining line of the bibtex entry that contains attributes.
	* @returns array The array containing the parsed attributes.
	*/
	private function remainingAttributes( $line )
	{
		//   - each attribute is a 'field = `text`,' so can pull the text 
		//     with regexpr. 
		//
		$attributes = array();  // initialize our array.
		$number_fields = substr_count( $line, "=" ); 
		for ( $i=1; $i <= $number_fields; $i++ )  // completes all but last item.
		{
			if ( $i == $number_fields )
			{
				// last attribute.
				$pos_equals = strpos( $line, "=" );             // find equals.
				$pos_text_begin = strpos( $line, "`" );         // find first quote.
				$field = strtolower( trim( substr( $line, 0, $pos_equals ) ) );
				#if ( $field == "type" ) { $field = "class"; };   // type is already reserved for bibtex.
				$text  = ereg_replace( "^`", "", trim( substr( $line, ( $pos_equals + 1 ), ( strlen( $line ) - 1 ) ) ) );
				$text  = $this->removeBrackets( $text );

				// have to trim everything after last text field's data.
				$pos_last_quote = strpos( $text, "`" );
				$text = trim( substr( $text, 0, $pos_last_quote ) );
				$text = self::removedWhiteSpaces( $text );
				$text = self::removedDashes( $text );
				$field = self::removedWhiteSpaces( $field );
				$attributes = $attributes + array( $field => $text );
			}
			else
			{
				$pos_next_item = stripos( $line, "`," );        // find item limit.
				$item = trim( substr( $line, 0, $pos_next_item ) );
				$line = trim( substr( $line, ( $pos_next_item + 2 ) ) );
				$pos_equals = strpos( $item, "=" );             // find equals.
				$pos_text_begin = strpos( $item, "`" );         // find first quote.
				$field = strtolower( trim( substr( $item, 0, $pos_equals ) ) );
				#if ( $field == "type" ) { $field = "class"; };   // type is already reserved for bibtex.
				$text  = ereg_replace( "^`", "", trim( substr( $item, ( $pos_equals + 1 ), ( strlen( $item ) - 1 ) ) ) );
				$text  = $this->removeBrackets( $text );
				$text = self::removedWhiteSpaces( $text );
				$text = self::removedDashes( $text );
				$field = self::removedWhiteSpaces( $field );

				$attributes = $attributes + array( $field => $text );
			}
		}

		// fill out array with missing keys.
		$bibtexkeys = array( "class", 
												 "isbn", 
												 "annotate",
												 "month",
												 "year",
												 "note",
												 "howpublished",
												 "key",
												 "title",
												 "research_group",
												 "source",
												 "abstract",
												 "content",
												 "translated_from"
		 );
		
		// fill keys with valid defaults.
		foreach ( $bibtexkeys as $key )
		{
			if ( !( array_key_exists( $key, $attributes ) ) )
			{
				switch( $key ) {

				case "class":
					$attributes = $attributes + array( $key => "none" );
					break;

				case "title":
					$attributes = $attributes + array( $key => "none" );
					break;

				case "source":
					$attributes = $attributes + array( $key => "none" );
					break;

				case "abstract":
					$attributes = $attributes + array( $key => "none" );
					break;

				case "content":
					$attributes = $attributes + array( $key => "none" );
					break;

				default:
					$attributes = $attributes + array( $key => 'NULL' );
				}
			}
		}

		return $attributes;
	}


	/**
	* removeBrackets - replaces any brackets found in string.
	* @access private
	* 
	* @param string The string to be cleaned of brackets.
	* @returns string The bracketless string.
	*/
	private function removeBrackets( $text )
	{
		$text  = str_replace( "{", "", $text );
		$text  = str_replace( "}", "", $text );
		return $text;
	}
	
	
	/**
	* removedDashes - will clean all mulitple dashes out of a string,
	* leaving single dash behind.
	* @access private
	* 
	* @param string The string to be checked.
	* @return string The string with no more multiple dashes.
	*/
	private function removedDashes( $string )
	{
		// replace line feeds.
		$string = str_replace( "\n", "", $string );
		// replace mulitple dashes with single dash.
		$string = preg_replace( '/--+/', '-', $string );
		return $string;
	}


	/**
	* removedWhitespaces - will clean all mulitple spaces out of a string,
	* leaving single white space behind.
	* @access private
	* 
	* @param string The string to be checked.
	* @return string The string with no more multiple spaces.
	*/
	private function removedWhiteSpaces( $string )
	{
		// replace line feeds.
		$string = str_replace( "\n", "", $string );
		// replace mulitple white spaces with single space.
		$string = preg_replace( '/\s+/', ' ', $string );
		return $string;
	}

}

?>
Return current item: BiBTeX Manager