Location: PHPKode > scripts > Native Excel Reader > CompoundDocument.inc.php
<?php
/**
 * Microsoft compound document reader
 * Based on http://sc.openoffice.org/compdocfileformat.pdf
 *
 * @version 0.5.1
 * @author Ruslan V. Uss <unclerus at gmail.com>
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

class CompoundDocumentException extends Exception {}

class CompoundDocument
{
	/**
	 * Output character set. Can be any of iconv encodings.
	 * @var string
	 */
	public $charset;

	/**
	 * Low-level compound document header
	 * @var array
	 */
	public $header = array ();

	/**
	 * Master sector allocation table. This is an array of the SecIDs of all sectors used by
	 * SAT (sector allocation table)
	 * @var array
	 */
	public $msat = array ();

	/**
	 * Sector allocation table. Contains SecIDs of all sectors used by user streams
	 * @var array
	 */
	public $sat = array ();

	/**
	 * Short sector allocation table. Contains SecIDs of all short streams
	 * @var array
	 */
	public $ssat = array ();

	/**
	 * Parsed document.
	 * Each entry refers to a storage or stream.
	 * Data can be found only in streams.
	 *
	 * Structure of the array:
	 * array (
	 *    DirID => array (
	 *         'desc' => array (
	 *	           'name' => <name of the entry>,
	 *             'type' => <type of the entry, 0 = empty, 1 = storage, 2 = stream, 5 = root storage>,
	 *             'color' => <node color of the entry, for red-black tree>,
	 *             'leftDirId' => <DirID of the left child node, -1 if there is no child>,
	 *             'rightDirId' => <DirID of the right child node, -1 if there is no child>,
	 *             'rootDirId' => <DirID of the root node, if this entry is a storage, -1 otherwise>,
	 *             ... see http://sc.openoffice.org/compdocfileformat.pdf 7.2
	 *         ),
	 *         'data' => <binary stream data>
	 *    )
	 *    ...
	 * )
	 * @var array
	 */
	public $directory = array ();
	
	private $_shortData;
	private $_data;

	/**
	 * Constructor
	 * @param string $charset Output character set
	 */
	public function __construct ($charset)
	{
		$this->charset = $charset;
	}

	/**
	 * Offset of the given sector in file
	 * @param int $secId Sector ID
	 * @return int sector offset in file
	 */
	public function sectorOffset ($secId)
	{
		return $this->header ['secSize'] * $secId + 0x200;
	}

	private function _value ($offset, $size = 4, $format = 'l')
	{
		$result = @unpack ($format, substr ($this->_data, $offset, $size));
		return $result [1];
	}

	private function _appendSat ($secId)
	{
		$sectorOffset = $this->sectorOffset ($secId);
		for ($i = 0; $i < $this->header ['secSize']; $i += 4)
		{
			$value = $this->_value ($sectorOffset + $i);
			$this->sat [] = $value;
		}
	}

	private function _buildMsat ($offset, $size, $first = false)
	{
		for ($i = 0; $i < $size - 4; $i += 4)
		{
			$value = $this->_value ($offset + $i);
			if ($value < 0) break;
			$this->msat [] = $value;
			$this->_appendSat ($value);
		}

		$next = $first ? $this->header ['msatSecId'] : $this->_value ($offset + $size - 4);
		if ($next == -2) return;
		$this->_buildMsat ($this->sectorOffset ($next), $this->header ['secSize']);
	}

	private function _buildSsat ()
	{
		for ($sector = $this->header ['ssatSecId']; $sector >= 0; $sector = $this->sat [$sector])
		{
			$sectorOffset = $this->sectorOffset ($sector);
			for ($i = 0; $i < $this->header ['secSize']; $i += 4)
			{
				$value = $this->_value ($sectorOffset + $i);
				$this->ssat [] = $value;
			}
		}
	}

	private function _getShortStream ($ssecId)
	{
		$result = '';
		for ($sector = $ssecId; $sector >= 0; $sector = $this->ssat [$sector])
			$result .= substr ($this->_shortData, $this->header ['ssecSize'] * $sector, $this->header ['ssecSize']);
		return $result;
	}

	private function _getNormalStream ($secId)
	{
		$result = '';
		for ($sector = $secId; $sector >= 0; $sector = $this->sat [$sector])
			$result .= substr ($this->_data, $this->sectorOffset ($sector), $this->header ['secSize']);
		return $result;
	}

	private function _getStream ($id)
	{
		return $this->directory [$id]['desc']['size'] < $this->header ['minStreamSize']
			? $this->_getShortStream ($this->directory [$id]['desc']['secId'])
			: $this->_getNormalStream ($this->directory [$id]['desc']['secId']);
	}

	private function _readDirectory ()
	{
		$dirCount = $this->header ['secSize'] / 0x80;
		$id = 0;
		for ($sector = $this->header ['dirSecId']; $sector >= 0; $sector = $this->sat [$sector])
		{
			$sectorOffset = $this->sectorOffset ($sector);
			for ($i = 0; $i < $dirCount; $i ++, $id ++)
			{
				$desc = @unpack (
					'A64name/vnameLength/Ctype/Ccolor/lleftDirId/lrightDirId/lrootDirId/A16uid/VuserFlags/A8createTime/A8modifTime/lsecId/lsize',
					substr ($this->_data, $sectorOffset + $i * 0x80, 0x80)
				);
				if ($desc ['type'] == 0) break;
				if ($desc ['type'] == 5)
				{
					$this->header ['sstreamSecId'] = $desc ['secId'];
					$this->_shortData = $this->_getNormalStream ($desc ['secId']);
				}
				$desc ['name'] = iconv ('utf-16le', $this->charset, substr ($desc ['name'], 0, $desc ['nameLength'] - 2));
				$this->directory [$id]['desc'] = $desc;
				if ($desc ['type'] == 2) $this->directory [$id]['data'] = $this->_getStream ($id);
			}
		}
	}

	/**
	 * Parse compound document
	 * @param string $data Compound document data
	 */
	public function parse ($data)
	{
		$this->directory = $this->msat = $this->sat = $this->ssat = array ();
		$this->header = @unpack (
			'A8ident/h32uid/vrevision/vversion/vbyteOrder/vssz/vsssz/x10/VsatSize/VdirSecId/x4/VminStreamSize/lssatSecId/VssatSize/lmsatSecId/VmsatSize',
			substr ($data, 0, 0x200)
		);
		if ($this->header ['ident'] != "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1")
			throw new compoundDocumentException ('Invalid file format');
		if ($this->header ['byteOrder'] != 0xfffe)
			throw new compoundDocumentException ('Invalid byte order');
		$this->header ['secSize'] = 1 << $this->header ['ssz'];
		$this->header ['ssecSize'] = 1 << $this->header ['sssz'];

		$this->_data = &$data;

		$this->_buildMsat (0x4c, 0x1b4, true);
		$this->_buildSsat ();
		$this->_readDirectory ();

		$this->_data = null;
	}
}
?>
Return current item: Native Excel Reader