Location: PHPKode > projects > Aukyla Document Management System > base/MIME.php
<?php
/*
     MIME.php, provides functions for determining MIME types and getting info
               about MIME types
     Copyright (C) 2003-2004 Arend van Beelen, Auton Rijnsburg

     This program is free software; you can redistribute it and/or modify it
     under the terms of the GNU General Public License as published by the Free
     Software Foundation; either version 2 of the License, or (at your option)
     any later version.

     This program is distributed in the hope that it will be useful, but WITHOUT
     ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
     FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
     more details.

     You should have received a copy of the GNU General Public License along
     with this program; if not, write to the Free Software Foundation, Inc.,
     59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

     For any questions, comments or whatever, you may mail me at: hide@address.com
*/

require_once('Locale.php');
require_once('Messages.php');
require_once('URI.php');

/**
 * @brief Provides functions for working with MIME types of files.
 */
class MIME
{
	function __construct()
	{
		$this->XDG_DATA_DIRS = explode(':', (isset($_ENV['XDG_DATA_DIRS']) ? $_ENV['XDG_DATA_DIRS'] : '/usr/local/share/:/usr/share/'));
	}

	/**
	 * Tries to determine the MIME type of the given file.
	 * If @p openFile is @p false, the file won't be opened and magic
	 * checking will be skipped.
	 *
	 * @param uri         URI to the file whose mime-type should be determined.
	 * @param openFile    If @p true, @p uri may be opened to do magic checking
	 *                    on the contents of the file in case the filename
	 *                    itself isn't enough to determine the MIME type. If
	 *                    @p false, the file itself won't be touched and only
	 *                    the filename is used to guess the MIME type.
	 * @param useMetaData If @p true, the file's "mime-type" meta-data key is
	 *                    examined first and if set, its value is used. This
	 *                    parameter has no effect when @p openFile is @p false.
	 *                    This parameter was added in Aukyla 1.1.
	 * @return The MIME type of the file. If @p openFile is @p false or the
	 *         file can't be opened, this may be an empty string.
	 */
	static function type($uri, $openFile = true, $useMetaData = true)
	{
		global $MIME;

		if($useMetaData == true && $openFile == true)
		{
			$mimetype = URI::metaData($uri, 'mime-type');

			if($mimetype !== false && $mimetype !== '')
			{
				return $mimetype;
			}
		}

		$mimetype = '';
		$matchlen = 0;

		$basename = basename($uri);

		$MIME->loadGlobFile();

		// check the globs twice (both case sensitive and insensitive)
		for($i = 0; $i < 2; $i++)
		{
			// walk through the file line by line
			foreach($MIME->globFileLines as $line)
			{
				// check whether the line is a comment and is valid
				if($line{0} == '#' || strstr($line, ':') === false)
				{
					continue;
				}

				// strip the newline character, but leave any spaces
				$line = substr($line, 0, strlen($line) - 1);

				list($mime, $glob) = explode(':', $line, 2);

				// check for a possible direct match
				if($basename == $glob)
				{
					return $mime;
				}

				// match the globs
				$flag = ($i > 0 ? FNM_CASEFOLD : 0);
				if(fnmatch($glob, $basename, $flag) == true && strlen($glob) > $matchlen)
				{
					$mimetype = $mime;
					$matchlen = strlen($glob);
				}
			}
		}

		// check for hits
		if($mimetype != '')
		{
			return $mimetype;
		}

		// if globbing didn't return any results we're going to do some magic
		// quit now if we may not or cannot open the file
		if($openFile == false || ($fp = URI::fopen($uri, 'r')) == false)
		{
			return '';
		}

		// load the magic files if they weren't loaded yet
		if(!isset($MIME->magicRules))
		{
			$MIME->magicRules = array();

			// go through the data dirs to search for the magic files
			foreach(array_reverse($MIME->XDG_DATA_DIRS) as $dir)
			{
				// read the file
				if(!file_exists("$dir/mime/magic") ||
				   ($buffer = file_get_contents("$dir/mime/magic")) === false)
				{
					continue;
				}

				// check the file type
				if(substr($buffer, 0, 12) != "MIME-Magic\0\n")
				{
					continue;
				}

				$buffer = substr($buffer, 12);

				// go through the entire file
				while($buffer != '')
				{
					if($buffer{0} != '[' && $buffer{0} != '>' &&
					   ($buffer{0} < '0' || $buffer{0} > '9'))
					{
						break;
					}

					switch($buffer{0})
					{
						// create an entry for a new mimetype
						case '[':
							$mime = substr($buffer, 1, strpos($buffer, ']') - 1);
							$MIME->magicRules[$mime] = array();
							$parents[0] =& $MIME->magicRules[$mime];
							$buffer = substr($buffer, strlen($mime) + 3);
							break;

						// add a new rule to the current mimetype
						case '>':
						default:
							$indent = ($buffer{0} == '>' ? 0 : intval($buffer));
							$buffer = substr($buffer, strpos($buffer, '>') + 1);
							$parents[$indent][] = new MIME_MagicRule;
							$rulenum = sizeof($parents[$indent]) - 1;
							$parents[$indent][$rulenum]->start_offset = intval($buffer); $buffer = substr($buffer, strpos($buffer, '=') + 1);
							$value_length = 256 * ord($buffer{0}) + ord($buffer{1}); $buffer = substr($buffer, 2);
							$parents[$indent][$rulenum]->value = substr($buffer, 0, $value_length); $buffer = substr($buffer, $value_length);
							$parents[$indent][$rulenum]->mask = ($buffer{0} != '&' ? str_repeat("\xff", $value_length) : substr($buffer, 1, $value_length)); if($buffer{0} == '&') $buffer = substr($buffer, $value_length + 1);
							$parents[$indent][$rulenum]->word_size = ($buffer{0} != '~' ? 1 : intval(substr($buffer, 1))); while($buffer{0} != '+' && $buffer{0} != "\n" && $buffer != '') $buffer = substr($buffer, 1);
							$parents[$indent][$rulenum]->range_length = ($buffer{0} != '+' ? 1 : intval($buffer)); $buffer = substr($buffer, strpos($buffer, "\n") + 1);
							$parents[$indent][$rulenum]->children = array();
							$parents[$indent + 1] =& $parents[$indent][$rulenum]->children;
							break;
					}
				}
			}

			// sort the array so items with high priority will get on top
			ksort($MIME->magicRules);
			$magicRules = array_reverse($MIME->magicRules);
			reset($MIME->magicRules);
		}

		// call the recursive function for all mime types
		foreach($MIME->magicRules as $mime => $rules)
		{
			foreach($rules as $rule)
			{
				if($MIME->applyRecursiveMagic($rule, $fp) == true)
				{
					list($priority, $mimetype) = explode(':', $mime, 2);
					URI::fclose($fp);
					return $mimetype;
				}
			}
		}

		// nothing worked, I will now only determine whether the file is binary or text
		URI::fseek($fp, 0);
		$length = (URI::fileSize($uri) > 50 ? 50 : URI::fileSize($uri));
		$data = URI::fread($fp, $length);
		URI::fclose($fp);
		for($i = 0; $i < strlen($data); $i++)
		{
			if($data{$i} < "\x20" && $data{$i} != "\x09" && $data{$i} != "\x0a" && $data{$i} != "\x0d")
			{
				return 'application/octet-stream';
			}
		}
		return 'text/plain';
	}

	/**
	 * @internal
	 *
	 * Apply the magic rules recursivily.
	 */
	private function applyRecursiveMagic(MIME_MagicRule $rule, $fp)
	{
		global $MIME;

		URI::fseek($fp, $rule->start_offset);
		$data = URI::fread($fp, strlen($rule->value) + $rule->range_length);
		if(strstr($data, $rule->value) !== false)
		{
			if(sizeof($rule->children) == 0)
			{
				return true;
			}
			else
			{
				foreach($rule->children as $child)
				{
					if($MIME->applyRecursiveMagic($child, $fp) == true)
					{
						return true;
					}
				}
			}
		}
		return false;
	}

	/**
	 * Gets the textual description of @p mimetype, optionally in the
	 * specified @p language.
	 *
	 * @param mimetype MIME type to get a description about.
	 * @param language The language in which the description should be, if
	 *                 no description can be found in the given language,
	 *                 it will fall back to English. As of Aukyla 1.1, if
	 *                 this parameter is ommitted, the currently active
	 *                 language is used.
	 * @return The textual description of the given MIME type or
	 *         i18n('Unknown filetype') if it cannot find a description.
	 */
	static function description($mimetype, $language = '')
	{
		global $MIME;

		$MIME->description = i18n('Unknown filetype');
		if($language == '')
		{
			$MIME->lang = Locale::language();
		}
		else
		{
			$MIME->lang = $language;
		}
		$MIME->read = false;

		// go through the data dirs to search for the XML file for the specified mime type
		foreach($MIME->XDG_DATA_DIRS as $dir)
		{
			$filename = "$dir/mime/$mimetype.xml";

			// open the XML file
			if(!file_exists($filename) ||
			   ($fp = fopen($filename, 'r')) == false)
			{
				continue;
			}

			// initialize XML parser
			$xml_parser = xml_parser_create();
			xml_set_element_handler($xml_parser, array($MIME, 'description_StartElement'), array($MIME, 'description_EndElement'));
			xml_set_character_data_handler($xml_parser, array($MIME, 'description_Data'));

			// read the file and parse
			while($data = str_replace("\n", "", fread($fp, 4096)))
			{
				if(!xml_parse($xml_parser, $data, feof($fp)))
				{
					error_log("ERROR: Couldn't parse $filename: ".
					          xml_error_string(xml_get_error_code($xml_parser)));
					break;
				}
			}
			fclose($fp);
		}

		return $MIME->description;
	}

	/**
	 * @internal
	 */
	private function description_StartElement($parser, $name, $attrs)
	{
		$this->read = false;
		if($name == 'COMMENT')
		{
			if(!isset($attrs['XML:LANG']) || $attrs['XML:LANG'] == $this->lang)
			{
				$this->read = true;
			}
		}
	}

	/**
	 * @internal
	 */
	private function description_EndElement($parser, $name)
	{
		$this->read = false;
	}

	/**
	 * @internal
	 */
	private function description_Data($parser, $data)
	{
		if($this->read == true)
		{
			$this->description = $data;
		}
	}

	/**
	 * Returns a common extension for @p mimetype.
	 *
	 * @param mimetype MIME type to get an extension for.
	 * @return An extension for the given MIME type, starting with a dot. If
	 *         no extension is found, an empty string is returned.
	 */
	static function extension($mimetype)
	{
		global $MIME;

		$MIME->loadGlobFile();

		foreach($MIME->globFileLines as $line)
		{
			if(strstr($line, ':') === false)
			{
				continue;
			}
			$line = trim($line);
			list($mime, $glob) = explode(':', $line, 2);
			if($mime == $mimetype)
			{
				if(substr($glob, 0, 2) == '*.')
				{
					return substr($glob, 1);
				}
			}
		}

		return '';
	}

	private function loadGlobFile()
	{
		// load the glob files if they haven't been loaded already
		if(!isset($this->globFileLines))
		{
			$this->globFileLines = array();

			// go through the data dirs to search for the globbing files
			foreach($this->XDG_DATA_DIRS as $dir)
			{
				// read the file
				if(file_exists("$dir/mime/globs") &&
				   ($lines = file("$dir/mime/globs")) !== false)
				{
					$this->globFileLines = array_merge($this->globFileLines, $lines);
				}
			}
		}
	}

	private $XDG_DATA_DIRS;
	private $globFileLines;
	private $magicRules;
	private $description;
	private $lang;
	private $read;
}

/**
 * @internal
 *
 * helper class for MIME::type()
 */
class MIME_MagicRule
{
	public $start_offset;
	public $value;
	public $mask;
	public $word_size;
	public $range_length;
	public $children;
}

// create one global instance of the class
global $MIME;
$MIME = new MIME;

?>
Return current item: Aukyla Document Management System