<?php
/*
MIME.php, provides functions for determining MIME types and getting info
about MIME types
Copyright (C) 2003-2004 Arend van Beelen, Auton Rijnsburg
This program is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the Free
Software Foundation; either version 2 of the License, or (at your option)
any later version.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
For any questions, comments or whatever, you may mail me at: hide@address.com
*/
require_once('Locale.php');
require_once('Messages.php');
require_once('URI.php');
/**
* @brief Provides functions for working with MIME types of files.
*/
class MIME
{
function __construct()
{
$this->XDG_DATA_DIRS = explode(':', (isset($_ENV['XDG_DATA_DIRS']) ? $_ENV['XDG_DATA_DIRS'] : '/usr/local/share/:/usr/share/'));
}
/**
* Tries to determine the MIME type of the given file.
* If @p openFile is @p false, the file won't be opened and magic
* checking will be skipped.
*
* @param uri URI to the file whose mime-type should be determined.
* @param openFile If @p true, @p uri may be opened to do magic checking
* on the contents of the file in case the filename
* itself isn't enough to determine the MIME type. If
* @p false, the file itself won't be touched and only
* the filename is used to guess the MIME type.
* @param useMetaData If @p true, the file's "mime-type" meta-data key is
* examined first and if set, its value is used. This
* parameter has no effect when @p openFile is @p false.
* This parameter was added in Aukyla 1.1.
* @return The MIME type of the file. If @p openFile is @p false or the
* file can't be opened, this may be an empty string.
*/
static function type($uri, $openFile = true, $useMetaData = true)
{
global $MIME;
if($useMetaData == true && $openFile == true)
{
$mimetype = URI::metaData($uri, 'mime-type');
if($mimetype !== false && $mimetype !== '')
{
return $mimetype;
}
}
$mimetype = '';
$matchlen = 0;
$basename = basename($uri);
$MIME->loadGlobFile();
// check the globs twice (both case sensitive and insensitive)
for($i = 0; $i < 2; $i++)
{
// walk through the file line by line
foreach($MIME->globFileLines as $line)
{
// check whether the line is a comment and is valid
if($line{0} == '#' || strstr($line, ':') === false)
{
continue;
}
// strip the newline character, but leave any spaces
$line = substr($line, 0, strlen($line) - 1);
list($mime, $glob) = explode(':', $line, 2);
// check for a possible direct match
if($basename == $glob)
{
return $mime;
}
// match the globs
$flag = ($i > 0 ? FNM_CASEFOLD : 0);
if(fnmatch($glob, $basename, $flag) == true && strlen($glob) > $matchlen)
{
$mimetype = $mime;
$matchlen = strlen($glob);
}
}
}
// check for hits
if($mimetype != '')
{
return $mimetype;
}
// if globbing didn't return any results we're going to do some magic
// quit now if we may not or cannot open the file
if($openFile == false || ($fp = URI::fopen($uri, 'r')) == false)
{
return '';
}
// load the magic files if they weren't loaded yet
if(!isset($MIME->magicRules))
{
$MIME->magicRules = array();
// go through the data dirs to search for the magic files
foreach(array_reverse($MIME->XDG_DATA_DIRS) as $dir)
{
// read the file
if(!file_exists("$dir/mime/magic") ||
($buffer = file_get_contents("$dir/mime/magic")) === false)
{
continue;
}
// check the file type
if(substr($buffer, 0, 12) != "MIME-Magic\0\n")
{
continue;
}
$buffer = substr($buffer, 12);
// go through the entire file
while($buffer != '')
{
if($buffer{0} != '[' && $buffer{0} != '>' &&
($buffer{0} < '0' || $buffer{0} > '9'))
{
break;
}
switch($buffer{0})
{
// create an entry for a new mimetype
case '[':
$mime = substr($buffer, 1, strpos($buffer, ']') - 1);
$MIME->magicRules[$mime] = array();
$parents[0] =& $MIME->magicRules[$mime];
$buffer = substr($buffer, strlen($mime) + 3);
break;
// add a new rule to the current mimetype
case '>':
default:
$indent = ($buffer{0} == '>' ? 0 : intval($buffer));
$buffer = substr($buffer, strpos($buffer, '>') + 1);
$parents[$indent][] = new MIME_MagicRule;
$rulenum = sizeof($parents[$indent]) - 1;
$parents[$indent][$rulenum]->start_offset = intval($buffer); $buffer = substr($buffer, strpos($buffer, '=') + 1);
$value_length = 256 * ord($buffer{0}) + ord($buffer{1}); $buffer = substr($buffer, 2);
$parents[$indent][$rulenum]->value = substr($buffer, 0, $value_length); $buffer = substr($buffer, $value_length);
$parents[$indent][$rulenum]->mask = ($buffer{0} != '&' ? str_repeat("\xff", $value_length) : substr($buffer, 1, $value_length)); if($buffer{0} == '&') $buffer = substr($buffer, $value_length + 1);
$parents[$indent][$rulenum]->word_size = ($buffer{0} != '~' ? 1 : intval(substr($buffer, 1))); while($buffer{0} != '+' && $buffer{0} != "\n" && $buffer != '') $buffer = substr($buffer, 1);
$parents[$indent][$rulenum]->range_length = ($buffer{0} != '+' ? 1 : intval($buffer)); $buffer = substr($buffer, strpos($buffer, "\n") + 1);
$parents[$indent][$rulenum]->children = array();
$parents[$indent + 1] =& $parents[$indent][$rulenum]->children;
break;
}
}
}
// sort the array so items with high priority will get on top
ksort($MIME->magicRules);
$magicRules = array_reverse($MIME->magicRules);
reset($MIME->magicRules);
}
// call the recursive function for all mime types
foreach($MIME->magicRules as $mime => $rules)
{
foreach($rules as $rule)
{
if($MIME->applyRecursiveMagic($rule, $fp) == true)
{
list($priority, $mimetype) = explode(':', $mime, 2);
URI::fclose($fp);
return $mimetype;
}
}
}
// nothing worked, I will now only determine whether the file is binary or text
URI::fseek($fp, 0);
$length = (URI::fileSize($uri) > 50 ? 50 : URI::fileSize($uri));
$data = URI::fread($fp, $length);
URI::fclose($fp);
for($i = 0; $i < strlen($data); $i++)
{
if($data{$i} < "\x20" && $data{$i} != "\x09" && $data{$i} != "\x0a" && $data{$i} != "\x0d")
{
return 'application/octet-stream';
}
}
return 'text/plain';
}
/**
* @internal
*
* Apply the magic rules recursivily.
*/
private function applyRecursiveMagic(MIME_MagicRule $rule, $fp)
{
global $MIME;
URI::fseek($fp, $rule->start_offset);
$data = URI::fread($fp, strlen($rule->value) + $rule->range_length);
if(strstr($data, $rule->value) !== false)
{
if(sizeof($rule->children) == 0)
{
return true;
}
else
{
foreach($rule->children as $child)
{
if($MIME->applyRecursiveMagic($child, $fp) == true)
{
return true;
}
}
}
}
return false;
}
/**
* Gets the textual description of @p mimetype, optionally in the
* specified @p language.
*
* @param mimetype MIME type to get a description about.
* @param language The language in which the description should be, if
* no description can be found in the given language,
* it will fall back to English. As of Aukyla 1.1, if
* this parameter is ommitted, the currently active
* language is used.
* @return The textual description of the given MIME type or
* i18n('Unknown filetype') if it cannot find a description.
*/
static function description($mimetype, $language = '')
{
global $MIME;
$MIME->description = i18n('Unknown filetype');
if($language == '')
{
$MIME->lang = Locale::language();
}
else
{
$MIME->lang = $language;
}
$MIME->read = false;
// go through the data dirs to search for the XML file for the specified mime type
foreach($MIME->XDG_DATA_DIRS as $dir)
{
$filename = "$dir/mime/$mimetype.xml";
// open the XML file
if(!file_exists($filename) ||
($fp = fopen($filename, 'r')) == false)
{
continue;
}
// initialize XML parser
$xml_parser = xml_parser_create();
xml_set_element_handler($xml_parser, array($MIME, 'description_StartElement'), array($MIME, 'description_EndElement'));
xml_set_character_data_handler($xml_parser, array($MIME, 'description_Data'));
// read the file and parse
while($data = str_replace("\n", "", fread($fp, 4096)))
{
if(!xml_parse($xml_parser, $data, feof($fp)))
{
error_log("ERROR: Couldn't parse $filename: ".
xml_error_string(xml_get_error_code($xml_parser)));
break;
}
}
fclose($fp);
}
return $MIME->description;
}
/**
* @internal
*/
private function description_StartElement($parser, $name, $attrs)
{
$this->read = false;
if($name == 'COMMENT')
{
if(!isset($attrs['XML:LANG']) || $attrs['XML:LANG'] == $this->lang)
{
$this->read = true;
}
}
}
/**
* @internal
*/
private function description_EndElement($parser, $name)
{
$this->read = false;
}
/**
* @internal
*/
private function description_Data($parser, $data)
{
if($this->read == true)
{
$this->description = $data;
}
}
/**
* Returns a common extension for @p mimetype.
*
* @param mimetype MIME type to get an extension for.
* @return An extension for the given MIME type, starting with a dot. If
* no extension is found, an empty string is returned.
*/
static function extension($mimetype)
{
global $MIME;
$MIME->loadGlobFile();
foreach($MIME->globFileLines as $line)
{
if(strstr($line, ':') === false)
{
continue;
}
$line = trim($line);
list($mime, $glob) = explode(':', $line, 2);
if($mime == $mimetype)
{
if(substr($glob, 0, 2) == '*.')
{
return substr($glob, 1);
}
}
}
return '';
}
private function loadGlobFile()
{
// load the glob files if they haven't been loaded already
if(!isset($this->globFileLines))
{
$this->globFileLines = array();
// go through the data dirs to search for the globbing files
foreach($this->XDG_DATA_DIRS as $dir)
{
// read the file
if(file_exists("$dir/mime/globs") &&
($lines = file("$dir/mime/globs")) !== false)
{
$this->globFileLines = array_merge($this->globFileLines, $lines);
}
}
}
}
private $XDG_DATA_DIRS;
private $globFileLines;
private $magicRules;
private $description;
private $lang;
private $read;
}
/**
* @internal
*
* helper class for MIME::type()
*/
class MIME_MagicRule
{
public $start_offset;
public $value;
public $mask;
public $word_size;
public $range_length;
public $children;
}
// create one global instance of the class
global $MIME;
$MIME = new MIME;
?>