<?php
/* vim: set expandtab tabstop=4 shiftwidth=4 encoding=utf-8: */
// +----------------------------------------------------------------------+
// | Eventum - Issue Tracking System |
// +----------------------------------------------------------------------+
// | Copyright (c) 2003 - 2008 MySQL AB |
// | Copyright (c) 2008 - 2009 Sun Microsystem Inc. |
// | |
// | This program is free software; you can redistribute it and/or modify |
// | it under the terms of the GNU General Public License as published by |
// | the Free Software Foundation; either version 2 of the License, or |
// | (at your option) any later version. |
// | |
// | This program is distributed in the hope that it will be useful, |
// | but WITHOUT ANY WARRANTY; without even the implied warranty of |
// | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
// | GNU General Public License for more details. |
// | |
// | You should have received a copy of the GNU General Public License |
// | along with this program; if not, write to: |
// | |
// | Free Software Foundation, Inc. |
// | 59 Temple Place - Suite 330 |
// | Boston, MA 02111-1307, USA. |
// +----------------------------------------------------------------------+
// | Authors: João Prado Maia <hide@address.com> |
// +----------------------------------------------------------------------+
//
/**
* The MIME:: class provides methods for dealing with MIME standards.
*
* $Horde: horde/lib/MIME.php,v 1.121 2003/11/06 15:26:17 chuck Exp $
*
* Copyright 1999-2003 Chuck Hagenbuch <hide@address.com>
*
* See the enclosed file COPYING for license information (LGPL). If you
* did not receive this file, see http://www.fsf.org/copyleft/lgpl.html.
*
*/
require_once(APP_PEAR_PATH . "Mail/mimeDecode.php");
require_once(APP_INC_PATH . "class.error_handler.php");
/**
* Class to handle the business logic related to the MIME email
* processing. The is8bit(), endode() and _encode() functions come from
* the excellent Horde package at http://www.horde.org. These functions are
* licensed under the LGPL, and Horde's copyright notice is available
* above.
*
* @version 1.0
* @author João Prado Maia <hide@address.com>
*/
class Mime_Helper
{
/**
* Method used to get charset from raw email.
*
* @access public
* @param mixed $input The full body of the message or decoded email.
* @return string charset extracted from Content-Type header of email.
*/
function getCharacterSet($input)
{
if (!is_object($input)) {
$structure = Mime_Helper::decode($input, false, false);
} else {
$structure = $input;
}
if (empty($structure)) {
return false;
}
if ($structure->ctype_primary == 'multipart' and $structure->ctype_secondary == 'mixed'
and count($structure->parts) >= 1 and $structure->parts[0]->ctype_primary == 'text') {
$content_type = $structure->parts[0]->headers['content-type'];
} else {
$content_type = @$structure->headers['content-type'];
}
if (preg_match('/charset\s*=\s*(["\'])?([-\w\d]+)(\1)?;?/i', $content_type, $matches)) {
return $matches[2];
}
return false;
}
/**
* Returns the appropriate message body for a given MIME-based decoded
* structure.
*
* @access public
* @param object $output The parsed message structure
* @return string The message body
* @see Mime_Helper::decode()
*/
function getMessageBody(&$output)
{
$parts = array();
Mime_Helper::parse_output($output, $parts);
if (empty($parts)) {
Error_Handler::logError(array("Mime_Helper::parse_output failed. Corrupted MIME in email?", $output), __FILE__, __LINE__);
// we continue as if nothing happened until it's clear it's right check to do.
}
$str = '';
$is_html = false;
if (isset($parts["text"])) {
$str = join("\n\n", $parts["text"]);
} elseif (isset($parts["html"])) {
$is_html = true;
$str = join("\n\n", $parts["html"]);
// hack for inotes to prevent content from being displayed all on one line.
$str = str_replace("</DIV><DIV>", "\n", $str);
$str = str_replace(array("<br>", "<br />", "<BR>", "<BR />"), "\n", $str);
}
// XXX: do we also need to do something here about base64 encoding?
if ($is_html) {
$str = strip_tags($str);
}
return $str;
}
/**
* Method used to fix the encoding of MIME based strings.
*
* @access public
* @param string $input The string to be fixed
* @return string The fixed string
*/
function fixEncoding($input)
{
// Remove white space between encoded-words
$input = preg_replace('/(=\?[^?]+\?(q|b)\?[^?]*\?=)(\s)+=\?/i', '\1=?', $input);
// For each encoded-word...
while (preg_match('/(=\?([^?]+)\?(q|b)\?([^?]*)\?=)/i', $input, $matches)) {
$encoded = $matches[1];
$charset = $matches[2];
$encoding = $matches[3];
$text = $matches[4];
switch (strtolower($encoding)) {
case 'b':
$text = base64_decode($text);
break;
case 'q':
$text = str_replace('_', ' ', $text);
preg_match_all('/=([a-f0-9]{2})/i', $text, $matches);
foreach($matches[1] as $value)
$text = str_replace('='.$value, chr(hexdec($value)), $text);
break;
}
$input = str_replace($encoded, $text, $input);
}
return $input;
}
/**
* Method used to properly quote the sender of a given email address.
*
* @access public
* @param string $address The full email address
* @return string The properly quoted email address
*/
function quoteSender($address)
{
if (strstr($address, '<')) {
$address = stripslashes($address);
$first_part = substr($address, 0, strrpos($address, '<') - 1);
$first_part = '"' . str_replace('"', '\"',($first_part)) . '"';
$second_part = substr($address, strrpos($address, '<'));
$address = $first_part . ' ' . $second_part;
}
return $address;
}
/**
* Method used to remove any unnecessary quoting from an email address.
*
* @access public
* @param string $address The full email address
* @return string The email address without quotes
*/
function removeQuotes($address)
{
if (strstr($address, '<')) {
$address = stripslashes($address);
$first_part = substr($address, 0, strrpos($address, '<') - 1);
$second_part = substr($address, strrpos($address, '<'));
$address = $first_part;
}
if (preg_match('/^".*"/', $address)) {
$address = preg_replace('/^"(.*)"/', '\\1', $address);
}
if (!empty($second_part)) {
$address .= ' ' . $second_part;
}
return $address;
}
/**
* Method used to properly encode an email address.
*
* @access public
* @param string $address The full email address
* @return string The properly encoded email address
*/
function encodeAddress($address)
{
$address = MIME_Helper::removeQuotes($address);
if (Mime_Helper::is8bit($address)) {
// split into name and address section
preg_match("/(.*)<(.*)>/", $address, $matches);
$address = "=?" . APP_CHARSET . "?Q?" .
str_replace(' ', '_', trim(preg_replace('/([\x80-\xFF]|[\x21-\x2F]|[\xFC]|\[|\])/e', '"=" . strtoupper(dechex(ord(stripslashes("\1"))))', $matches[1]))) . "?= <" . $matches[2] . ">";
return $address;
} else {
return MIME_Helper::quoteSender($address);
}
}
/**
* Decodes a quoted printable encoded address and returns the string.
*
* @param string $address The address to decode
* @return string The decoded address
*/
function decodeAddress($address)
{
if (preg_match("/=\?.+\?Q\?(.+)\?= <(.+)>/i", $address, $matches)) {
return str_replace("_", ' ', quoted_printable_decode($matches[1])) . " <" . $matches[2] . ">";
} else {
return Mime_Helper::removeQuotes($address);
}
}
/**
* Returns if a specified string contains a quoted printable address.
*
* @param string $address The address
* @return boolean If the address is quoted printable encoded.
*/
function isQuotedPrintable($address)
{
if (preg_match("/=\?.+\?Q\?.+\?= <.+>/i", $address)) {
return true;
} else {
return false;
}
}
/**
* Determine if a string contains 8-bit characters.
*
* @access public
*
* @param string $string The string to check.
*
* @return boolean True if it does, false if it doesn't.
*/
function is8bit($string)
{
if (is_string($string) && preg_match('/[\x80-\xff]+/', $string)) {
return true;
} else {
return false;
}
}
function encodeHeaders($headers)
{
// encodes emails headers
foreach ($headers as $name => $value) {
$headers[$name] = Mime_Helper::encode($value);
}
return $headers;
}
/**
* Encode a string containing non-ASCII characters according to RFC 2047.
*
* @access public
*
* @param string $text The text to encode.
* @param string $charset (optional) The character set of the text.
*
* @return string The text, encoded only if it contains non-ASCII
* characters.
*/
function encode($text, $charset = APP_CHARSET)
{
/* Return if nothing needs to be encoded. */
if (!MIME_Helper::is8bit($text)) {
return $text;
}
$charset = strtolower($charset);
$line = '';
/* Get the list of elements in the string. */
$size = preg_match_all("/([^\s]+)([\s]*)/", $text, $matches, PREG_SET_ORDER);
foreach ($matches as $key => $val) {
if (MIME_Helper::is8bit($val[1])) {
if ((($key + 1) < $size) &&
MIME_Helper::is8bit($matches[$key + 1][1])) {
$line .= MIME_Helper::_encode($val[1] . $val[2], $charset) . ' ';
} else {
$line .= MIME_Helper::_encode($val[1], $charset) . $val[2];
}
} else {
$line .= $val[1] . $val[2];
}
}
return rtrim($line);
}
/**
* Internal recursive function to RFC 2047 encode a string.
*
* @access private
*
* @param string $text The text to encode.
* @param string $charset The character set of the text.
*
* @return string The text, encoded only if it contains non-ASCII
* characters.
*/
function _encode($text, $charset)
{
$char_len = strlen($charset);
$txt_len = strlen($text) * 2;
/* RFC 2047 [2] states that no encoded word can be more than 75
characters long. If longer, you must split the word. */
if (($txt_len + $char_len + 7) > 75) {
$pos = intval((68 - $char_len) / 2);
return MIME_Helper::_encode(substr($text, 0, $pos), $charset) . ' ' . MIME_Helper::_encode(substr($text, $pos), $charset);
} else {
return '=?' . $charset . '?b?' . trim(base64_encode($text)) . '?=';
}
}
/**
* Method used to encode a given string in the quoted-printable standard.
*
* @access public
* @param string $hdr_value The string to be encoded
* @param string $charset The charset of the string
* @return string The encoded string
*/
function encodeValue($hdr_value, $charset = 'iso-8859-1')
{
preg_match_all('/(\w*[\x80-\xFF]+\w*)/', $hdr_value, $matches);
foreach ($matches[1] as $value) {
$replacement = preg_replace('/([\x80-\xFF])/e', '"=" . strtoupper(dechex(ord("\1")))', $value);
$hdr_value = str_replace($value, '=?' . $charset . '?Q?' . $replacement . '?=', $hdr_value);
}
return $hdr_value;
}
/**
* Given a string containing a header and body
* section, this function will split them (at the first
* blank line) and return them.
*
* @access public
* @param string $input Input to split apart
* @return array Contains header and body section
*/
function splitBodyHeader($input)
{
if (preg_match("/^(.*?)\r?\n\r?\n(.*)/s", $input, $match)) {
return array($match[1], $match[2]);
}
}
/**
* Parse headers given in $input and return
* as assoc array.
*
* @access public
* @param string $input Headers to parse
* @return array Contains parsed headers
*/
function getHeaderNames($input)
{
if ($input !== '') {
// Unfold the input
$input = preg_replace("/\r?\n/", "\r\n", $input);
$input = preg_replace("/\r\n(\t| )+/", ' ', $input);
$headers = explode("\r\n", trim($input));
foreach ($headers as $value) {
$hdr_name = substr($value, 0, $pos = strpos($value, ':'));
$return[strtolower($hdr_name)] = $hdr_name;
}
} else {
$return = array();
}
return $return;
}
/**
* Method used to get an unique attachment name for a given
* filename. This is specially useful for the emails that Microsoft
* Outlook sends out with several attachments with the same name
* when you embed several inline screenshots in the message
*
* @access public
* @param array $list The nested array of mime parts
* @param string $filename The filename to search for
* @return string The unique attachment name
*/
function getAttachmentName(&$list, $filename)
{
if (@in_array($filename, array_values($list))) {
// check if the filename even has an extension...
if (!strstr($filename, '.')) {
$first_part = $filename;
} else {
$first_part = substr($filename, 0, strrpos($filename, '.'));
}
// check if this is already named Outlook-2.bmp (or similar)
if (strstr($first_part, "-")) {
// if so, gotta get the number and increment it
$numeric_portion = substr($first_part, strrpos($first_part, "-")+1);
if (preg_match("/^[0-9]+$/", $numeric_portion)) {
$numeric_portion = intval($numeric_portion) + 1;
}
$first_part = substr($first_part, 0, strrpos($first_part, "-"));
} else {
$numeric_portion = 1;
}
if (!strstr($filename, '.')) {
$filename = $first_part . "-" . $numeric_portion;
} else {
$filename = $first_part . "-" . $numeric_portion . substr($filename, strrpos($filename, '.'));
}
return MIME_Helper::getAttachmentName($list, $filename);
} else {
return $filename;
}
}
/**
* Method used to check whether a given email message has any attachments.
*
* @access public
* @param mixed $message The full body of the message or parsed message structure.
* @return boolean
*/
function hasAttachments($message)
{
if (!is_object($message)) {
$message = Mime_Helper::decode($message, true);
}
$attachments = Mime_Helper::_getAttachmentDetails($message, true);
if (count($attachments) > 0) {
return true;
} else {
return false;
}
}
/**
* Method used to parse and return the full list of attachments
* associated with a message.
*
* @access public
* @param mixed $message The full body of the message or parsed message structure.
* @return array The list of attachments, if any
*/
function getAttachments($message)
{
if (!is_object($message)) {
$message = Mime_Helper::decode($message, true);
}
return Mime_Helper::_getAttachmentDetails($message, true);
}
/**
* Method used to parse and return the full list of attachment CIDs
* associated with a message.
*
* @access public
* @param mixed $message The full body of the message or parsed message structure.
* @return array The list of attachment CIDs, if any
*/
function getAttachmentCIDs($message)
{
if (!is_object($message)) {
$message = Mime_Helper::decode($message, true);
}
return Mime_Helper::_getAttachmentDetails($message, true);
}
function _getAttachmentDetails(&$mime_part, $return_body = false, $return_filename = false, $return_cid = false)
{
$attachments = array();
if (isset($mime_part->parts)) {
for ($i = 0; $i < count($mime_part->parts); $i++) {
$t = Mime_Helper::_getAttachmentDetails($mime_part->parts[$i], $return_body, $return_filename, $return_cid);
$attachments = array_merge($t, $attachments);
}
}
// FIXME: content-type is always lowered by PEAR class (CHECKME) and why not $mime_part->content_type?
$content_type = strtolower(@$mime_part->ctype_primary . '/' . @$mime_part->ctype_secondary);
if ($content_type == '/') {
$content_type = '';
}
$found = 0;
// get the proper filename
$mime_part_filename = @$mime_part->ctype_parameters['name'];
if (empty($mime_part_filename)) {
$mime_part_filename = @$mime_part->d_parameters['filename'];
}
// hack in order to treat inline images as normal attachments
// (since Eventum does not display those embedded within the message)
if (@$mime_part->ctype_primary == 'image') {
// if requested, return only the details of a particular filename
if (($return_filename != false) && ($mime_part_filename != $return_filename)) {
return array();
}
// if requested, return only the details of
// a particular attachment CID. Only really needed
// as hack for inline images
if (($return_cid != false) && (@$mime_part->headers['content-id'] != $return_cid)) {
return array();
}
$found = 1;
} else {
if ((!in_array($content_type, Mime_Helper::_getInvalidContentTypes())) &&
(in_array(@strtolower($mime_part->disposition), Mime_Helper::_getValidDispositions())) &&
(!empty($mime_part_filename))) {
// if requested, return only the details of a particular filename
if (($return_filename != false) && ($mime_part_filename != $return_filename)) {
return array();
}
$found = 1;
}
}
if ($found) {
$t = array(
'filename' => $mime_part_filename,
'cid' => @$mime_part->headers['content-id'],
'filetype' => $content_type
);
// only include the body of the attachment when
// requested to save some memory
if ($return_body == true) {
$t['blob'] = &$mime_part->body;
}
$attachments[] = $t;
}
return $attachments;
}
/**
* Method used to get the encoded content of a specific message
* attachment.
*
* @access public
* @param mixed $message The full content of the message or parsed message structure.
* @param string $filename The filename to look for
* @param string $cid The content-id to look for, if any
* @return string The full encoded content of the attachment
*/
function getAttachment($message, $filename, $cid = false)
{
$parts = array();
if (!is_object($message)) {
$message = Mime_Helper::decode($message, true);
}
$details = Mime_Helper::_getAttachmentDetails($message, true, $filename, $cid);
if (count($details) == 1) {
return array(
$details[0]['filetype'],
$details[0]['blob']
);
} else {
return array();
}
}
/**
* Method used to decode the content of a MIME encoded message.
*
* @access public
* @param string $message The full body of the message
* @param boolean $include_bodies Whether to include the bodies in the return value or not
* @return mixed The decoded content of the message
*/
function decode(&$message, $include_bodies = false, $decode_bodies = true)
{
// need to fix a pretty annoying bug where if the 'boundary' part of a
// content-type header is split into another line, the PEAR library would
// not work correctly. this fix will make the boundary part go to the
// same line as the content-type one
if (preg_match("/^(boundary=).*/m", $message)) {
$pattern = "/(Content-Type: multipart\/)(.+); ?\r?\n(boundary=)(.*)$/im";
$replacement = '$1$2; $3$4';
$message = preg_replace($pattern, $replacement, $message);
}
$params = array(
'crlf' => "\r\n",
'include_bodies' => $include_bodies,
'decode_headers' => false,
'decode_bodies' => $decode_bodies
);
$decode = new Mail_mimeDecode($message);
$email = $decode->decode($params);
foreach ($email->headers as $name => $value) {
if (is_string($value)) {
$email->headers[$name] = iconv_mime_decode(trim($value), ICONV_MIME_DECODE_CONTINUE_ON_ERROR, APP_CHARSET);
}
}
if ($include_bodies) {
$email->body = Mime_Helper::getMessageBody($email);
}
return $email;
}
/**
* Converts a string from a specified charset to the application charset
*
* @param string $string
* @param string $source_charset
* @return string The converted string
*/
function convertString($string, $source_charset)
{
if (($source_charset == false) || ($source_charset == APP_CHARSET)) {
return $string;
} else {
$res = iconv($source_charset, APP_CHARSET, $string);
return $res === false ? $string : $res;
}
}
/**
* Method used to parse the decoded object structure of a MIME
* message into something more manageable.
*
* @access public
* @param object $obj The decoded object structure of the MIME message
* @param array $parts The parsed parts of the MIME message
* @return void
*/
function parse_output($obj, &$parts)
{
if (!empty($obj->parts)) {
for ($i = 0; $i < count($obj->parts); $i++) {
Mime_Helper::parse_output($obj->parts[$i], $parts);
}
} else {
$ctype = @strtolower($obj->ctype_primary.'/'.$obj->ctype_secondary);
switch($ctype){
case 'text/plain':
if (((!empty($obj->disposition)) && (strtolower($obj->disposition) == 'attachment')) || (!empty($obj->d_parameters['filename']))) {
@$parts['attachments'][] = $obj->body;
} else {
$text = Mime_Helper::convertString($obj->body, @$obj->ctype_parameters['charset']);
if (@$obj->ctype_parameters['format'] == 'flowed') {
$text = Mime_Helper::decodeFlowedBodies($text, @$obj->ctype_parameters['delsp']);
}
@$parts['text'][] = $text;
}
break;
case 'text/html':
if ((!empty($obj->disposition)) && (strtolower($obj->disposition) == 'attachment')) {
@$parts['attachments'][] = $obj->body;
} else {
@$parts['html'][] = Mime_Helper::convertString($obj->body, @$obj->ctype_parameters['charset']);
}
break;
// special case for Apple Mail
case 'text/enriched':
if ((!empty($obj->disposition)) && (strtolower($obj->disposition) == 'attachment')) {
@$parts['attachments'][] = $obj->body;
} else {
@$parts['html'][] = Mime_Helper::convertString($obj->body, @$obj->ctype_parameters['charset']);
}
break;
default:
// avoid treating forwarded messages as attachments
if ((!empty($obj->disposition)) && (strtolower($obj->disposition) == 'inline') &&
($ctype != 'message/rfc822')) {
@$parts['attachments'][] = $obj->body;
} elseif (stristr($ctype, 'image')) {
// handle inline images
@$parts['attachments'][] = $obj->body;
} elseif(strtolower(@$obj->disposition) == 'attachment') {
@$parts['attachments'][] = $obj->body;
} else {
@$parts['text'][] = $obj->body;
}
}
}
}
/**
* Given a quoted-printable string, this
* function will decode and return it.
*
* @access private
* @param string Input body to decode
* @return string Decoded body
*/
function _quotedPrintableDecode($input)
{
// Remove soft line breaks
$input = preg_replace("/=\r?\n/", '', $input);
// Replace encoded characters
$input = preg_replace('/=([a-f0-9]{2})/ie', "chr(hexdec('\\1'))", $input);
return $input;
}
/**
* Returns the internal list of content types that we do not support as
* valid attachment types.
*
* @access private
* @return array The list of content types
*/
function _getInvalidContentTypes()
{
return array(
'message/rfc822',
'application/pgp-signature',
'application/ms-tnef',
);
}
/**
* Returns the internal list of attachment dispositions that we do not
* support as valid attachment types.
*
* @access private
* @return array The list of valid dispositions
*/
function _getValidDispositions()
{
return array(
'attachment',
'inline'
);
}
/**
* Splits the full email into headers and body
*
* @access public
* @param string $message The full email message
* @param boolean $unfold If headers should be unfolded
* @return array An array containing the headers and body
*/
function splitHeaderBody($message, $unfold = true)
{
if (preg_match("/^(.*?)\r?\n\r?\n(.*)/s", $message, $match)) {
return array(($unfold) ? Mail_API::unfold($match[1]) : $match[1], $match[2]);
}
return array();
}
/**
* Initial implementation of flowed body handling per RFC 3676. This is probably
* not complete but is a start.
*
* @see http://www.faqs.org/rfcs/rfc3676.html
* @param text $body The text to "unflow"
* @param string $delsp If spaces should be deleted
* @return string The decoded body
*/
function decodeFlowedBodies($body, $delsp)
{
if ($delsp == 'yes') {
$delsp = true;
} else {
$delsp = false;
}
$lines = explode("\n", $body);
$text = '';
foreach ($lines as $line) {
if (($line != '-- ') && (substr(Misc::removeNewLines($line, true), -1) == ' ')) {
if ($delsp) {
$text .= substr(Misc::removeNewLines($line, true), 0, -1);
} else {
$text .= Misc::removeNewLines($line, true);
}
} else {
$text .= $line . "\n";
}
}
return $text;
}
}
// benchmarking the included file (aka setup time)
if (APP_BENCHMARK) {
$GLOBALS['bench']->setMarker('Included Mime_Helper Class');
}