<?php
/**
* Moc10 Library
*
* LICENSE
*
* This source file is subject to the new BSD license that is bundled
* with this package in the file LICENSE.TXT.
* It is also available through the world-wide-web at this URL:
* http://www.moc10phplibrary.com/LICENSE.TXT
* If you did not receive a copy of the license and are unable to
* obtain it through the world-wide-web, please send an email
* to hide@address.com so we can send you a copy immediately.
*
* @category Moc10
* @package Moc10_Pdf
* @author Nick Sagona, III <hide@address.com>
* @copyright Copyright (c) 2009-2011 Moc 10 Media, LLC. (http://www.moc10media.com)
* @license http://www.moc10phplibrary.com/LICENSE.TXT New BSD License
*/
/**
* Moc10_Pdf_Import
*
* @category Moc10
* @package Moc10_Pdf
* @author Nick Sagona, III <hide@address.com>
* @copyright Copyright (c) 2009-2011 Moc 10 Media, LLC. (http://www.moc10media.com)
* @license http://www.moc10phplibrary.com/LICENSE.TXT New BSD License
* @version 1.9.7
*/
class Moc10_Pdf_Import
{
/**
* PDF imported objects
* @var array
*/
public $objects = array();
/**
* PDF imported page objects
* @var array
*/
public $pages = array();
/**
* PDF imported data
* @var string
*/
protected $_data = null;
/**
* PDF imported kids indices
* @var array
*/
protected $_kids = array();
/**
* PDF imported thumb objects
* @var array
*/
protected $_thumbs = array();
/**
* Altered PDF object indices
* @var array
*/
protected $_changed = array();
/**
* Constructor
*
* Instantiate a PDF import object.
*
* @param string $pdf
* @param int|string|array $pgs
* @return void
*/
public function __construct($pdf, $pgs = null)
{
// Read the file data from the imported PDF.
$import_file = new Moc10_File($pdf);
$this->_data = $import_file->read();
// Strip any and all XREF tables, as the structure of the PDF will change.
while (strpos($this->_data, 'xref') !== false) {
$xref = substr($this->_data, 0, (strpos($this->_data, '%%EOF') + 5));
$xref = substr($xref, strpos($xref, 'xref'));
$this->_data = str_replace($xref, '', $this->_data);
}
// Get the PDF objects.
$this->_getObjects($this->_data);
$this->pages = $this->_kids;
// If the page argument was passed, parse out the desired page(s), removing any unwanted pages and their content.
if (!is_null($pgs)) {
if (is_array($pgs)) {
foreach ($pgs as $value) {
$pAry[] = $this->pages[$value - 1];
}
} else {
$pAry[] = $this->pages[$pgs - 1];
}
$rm = array();
foreach ($this->pages as $value) {
if (!in_array($value, $pAry)) {
$rm[] = $value;
}
}
// Remove unwanted pages and their content from the imported data.
if (count($rm) != 0) {
foreach ($rm as $value) {
$content = substr($this->objects[$value]['data'], strpos($this->objects[$value]['data'], 'Contents'));
$content = substr($content, 0, strpos($content, '/'));
$content = str_replace('Contents', '', $content);
$content = str_replace('[', '', $content);
$content = str_replace(']', '', $content);
$content = str_replace(' 0 R', '|', $content);
$content = str_replace(' ', '', $content);
$content = substr($content, 0, -1);
$content_objs = explode('|', $content);
unset($this->objects[$value]);
if (in_array($value, $this->_kids)) {
$k = array_search($value, $this->_kids);
unset($this->_kids[$k]);
}
foreach ($content_objs as $val) {
unset($this->objects[$val]);
}
}
$this->pages = $this->_kids;
}
}
}
/**
* Method to shift the objects' indices based on the array of indices passed to the method, to prevent duplication.
*
* @param array $ind
* @param int $si
* @return void
*/
public function shiftObjects($ind, $si)
{
$ii = $this->_lastIndex($this->objects) + 1;
$start_index = ($ii > $si) ? $ii : $si;
// Adjust the direct object indices, keeping track of the ones that change.
foreach ($ind as $value) {
if (array_key_exists($value, $this->objects)) {
$this->objects[$value]['data'] = $this->objects[$value]['data'];
$this->objects[$value]['data'] = str_replace($value . ' 0 obj', $start_index . ' 0 obj', $this->objects[$value]['data']);
$this->objects[$start_index] = $this->objects[$value];
$this->_changed[$value] = $start_index;
unset($this->objects[$value]);
if (in_array($value, $this->_kids)) {
$k = array_search($value, $this->_kids);
$this->_kids[$k] = $start_index;
}
$start_index++;
}
}
// Adjust the reference object indices, based on the object indiced that have changed.
foreach ($this->objects as $key => $value) {
if (count($value['refs']) != 0) {
foreach($value['refs'] as $k => $v) {
if (array_key_exists($value['refs'][$k], $this->_changed)) {
$value['refs'][$k] = $this->_changed[$value['refs'][$k]];
$this->objects[$key]['refs'] = $value['refs'];
$this->objects[$key]['data'] = str_replace($v . ' 0 R', $this->_changed[$v] . ' 0 R', $this->objects[$key]['data']);
}
}
}
}
$this->pages = $this->_kids;
}
/**
* Method to return the desired imported objects to the main PDF object.
*
* @param int $par
* @return array
*/
public function returnObjects($par)
{
$objs = array();
$keys = array_keys($this->objects);
foreach ($keys as $key) {
// Skip the root, parent and info objects, returning only page and content objects.
if (($this->objects[$key]['type'] != 'root') && ($this->objects[$key]['type'] != 'parent') && ($this->objects[$key]['type'] != 'info')) {
if ($this->objects[$key]['type'] == 'page') {
$parent = substr($this->objects[$key]['data'], strpos($this->objects[$key]['data'], 'Parent'));
$parent = substr($parent, 0, strpos($parent, '/'));
$parent = str_replace('Parent', '', $parent);
$parent = str_replace(' 0 R', '', $parent);
$parent = str_replace(' ', '', $parent);
$this->objects[$key]['data'] = str_replace($parent . ' 0 R', $par . ' 0 R', $this->objects[$key]['data']);
}
$objs[$key] = $this->objects[$key];
}
}
return $objs;
}
/**
* Method to search and return the objects within in the imported data.
*
* @param string $data
* @return void
*/
protected function _getObjects($data)
{
$matches = array();
$obj_start = array();
// Grab object start points.
preg_match_all('/\d*\s\d*\sobj/', $data, $matches, PREG_OFFSET_CAPTURE);
$obj_start = $matches[0];
// Start parsing through the object data.
for ($i = 0; $i < count($obj_start); $i++) {
$type = '';
$j = $i + 1;
$index = substr($obj_start[$i][0], 0, strpos($obj_start[$i][0], ' '));
if (array_key_exists($j, $obj_start)) {
$obj_data = substr($data, $obj_start[$i][1], ($obj_start[$j][1] - $obj_start[$i][1]));
} else {
$obj_data = substr($data, $obj_start[$i][1], (strrpos($data, 'endobj') - $obj_start[$i][1] + 6));
}
// Add all relevant objects, striping away any linearized code, hint codes or metadata, as the order and size of the PDF and its objects may change.
if ((strpos($obj_data, '/Linearized') === false) && (strpos($obj_data, '/Type/Metadata') === false)) {
if ((strpos($obj_data, '/Catalog') !== false) && (strpos($obj_data, '/Pages') !== false)) {
// Strip away any metadata references.
$metadata = substr($obj_data, strpos($obj_data, 'Metadata'));
$metadata = '/' . substr($metadata, 0, strpos($metadata, '/'));
$obj_data = str_replace($metadata, '', $obj_data);
$type = 'root';
} else if ((strpos($obj_data, '/Creator') !== false) || (strpos($obj_data, '/Producer') !== false)) {
$type = 'info';
} else if ((strpos($obj_data, '/Count') !== false) && (strpos($obj_data, '/Kids') !== false)) {
$kids = substr($obj_data, strpos($obj_data, 'Kids'));
$kids = substr($kids, 0, strpos($kids, ']'));
$kids = str_replace('Kids', '', $kids);
$kids = str_replace('[', '', $kids);
$kids = str_replace(' 0 R', '|', $kids);
$kids = str_replace(' ', '', $kids);
$kids = substr($kids, 0, -1);
$kids_objs = explode('|', $kids);
$this->_kids = $kids_objs;
$type = 'parent';
} else if ((strpos($obj_data, '/MediaBox') !== false) || (strpos($obj_data, '/Contents') !== false)) {
if (strpos($obj_data, '/Thumb') !== false) {
// Strip away any thumbnail references.
$thumbdata = substr($obj_data, strpos($obj_data, 'Thumb'));
$thumbdata = '/' . substr($thumbdata, 0, strpos($thumbdata, '/'));
$thumbindex = substr($thumbdata, strpos($thumbdata, ' '));
$thumbindex = str_replace(' 0 R', '', $thumbindex);
$thumbindex = str_replace(' ', '', $thumbindex);
$this->_thumbs[] = $thumbindex;
$obj_data = str_replace($thumbdata, '', $obj_data);
}
$type = 'page';
} else {
$type = 'content';
}
$this->objects[$index] = array('type' => $type, 'data' => $obj_data, 'refs' => $this->_getRefs($obj_data));
}
}
// Order the page objects correctly.
$pageOrder = array();
foreach ($this->objects as $key => $value) {
if ($value['type'] == 'page') {
$pageOrder[$key] = $value;
unset($this->objects[$key]);
}
}
foreach ($this->_kids as $value) {
$this->objects[$value] = $pageOrder[$value];
}
// Remove any thumbnail objects.
if (count($this->_thumbs) != 0) {
foreach ($this->_thumbs as $value) {
unset($this->objects[$value]);
}
}
}
/**
* Method to search and return the object references within in the data.
*
* @param string $data
* @return array
*/
protected function _getRefs($data)
{
$r = array();
$refs = array();
// Grab reference start points.
preg_match_all('/\d*\s0*\sR/', $data, $r, PREG_OFFSET_CAPTURE);
foreach ($r[0] as $value) {
$refs[] = str_replace(' 0 R', '', $value[0]);
}
sort($refs);
return $refs;
}
/**
* Method to return the last object index.
*
* @param array $arr
* @throws Exception
* @return int
*/
protected function _lastIndex($arr)
{
if (!is_array($arr)) {
$lang = new Moc10_Language();
throw new Exception($lang->__('Error: The argument passed must be an array.'));
} else {
$objs = array_keys($arr);
sort($objs);
foreach ($objs as $value) {
$last = $value;
}
return $last;
}
}
}