Location: PHPKode > projects > Moc10 PHP Library > library/Moc10/Pdf/Import.php
<?php
/**
 * Moc10 Library
 *
 * LICENSE
 *
 * This source file is subject to the new BSD license that is bundled
 * with this package in the file LICENSE.TXT.
 * It is also available through the world-wide-web at this URL:
 * http://www.moc10phplibrary.com/LICENSE.TXT
 * If you did not receive a copy of the license and are unable to
 * obtain it through the world-wide-web, please send an email
 * to hide@address.com so we can send you a copy immediately.
 *
 * @category   Moc10
 * @package    Moc10_Pdf
 * @author     Nick Sagona, III <hide@address.com>
 * @copyright  Copyright (c) 2009-2011 Moc 10 Media, LLC. (http://www.moc10media.com)
 * @license    http://www.moc10phplibrary.com/LICENSE.TXT     New BSD License
 */

/**
 * Moc10_Pdf_Import
 *
 * @category   Moc10
 * @package    Moc10_Pdf
 * @author     Nick Sagona, III <hide@address.com>
 * @copyright  Copyright (c) 2009-2011 Moc 10 Media, LLC. (http://www.moc10media.com)
 * @license    http://www.moc10phplibrary.com/LICENSE.TXT     New BSD License
 * @version    1.9.7
 */

class Moc10_Pdf_Import
{

    /**
     * PDF imported objects
     * @var array
     */
    public $objects = array();

    /**
     * PDF imported page objects
     * @var array
     */
    public $pages = array();

    /**
     * PDF imported data
     * @var string
     */
    protected $_data = null;

    /**
     * PDF imported kids indices
     * @var array
     */
    protected $_kids = array();

    /**
     * PDF imported thumb objects
     * @var array
     */
    protected $_thumbs = array();

    /**
     * Altered PDF object indices
     * @var array
     */
    protected $_changed = array();

    /**
     * Constructor
     *
     * Instantiate a PDF import object.
     *
     * @param  string $pdf
     * @param  int|string|array $pgs
     * @return void
     */
    public function __construct($pdf, $pgs = null)
    {

        // Read the file data from the imported PDF.
        $import_file = new Moc10_File($pdf);
        $this->_data = $import_file->read();

        // Strip any and all XREF tables, as the structure of the PDF will change.
        while (strpos($this->_data, 'xref') !== false) {
            $xref = substr($this->_data, 0, (strpos($this->_data, '%%EOF') + 5));
            $xref = substr($xref, strpos($xref, 'xref'));
            $this->_data = str_replace($xref, '', $this->_data);
        }

        // Get the PDF objects.
        $this->_getObjects($this->_data);
        $this->pages = $this->_kids;

        // If the page argument was passed, parse out the desired page(s), removing any unwanted pages and their content.
        if (!is_null($pgs)) {
            if (is_array($pgs)) {
                foreach ($pgs as $value) {
                    $pAry[] = $this->pages[$value - 1];
                }
            } else {
                $pAry[] = $this->pages[$pgs - 1];
            }

            $rm = array();
            foreach ($this->pages as $value) {
                if (!in_array($value, $pAry)) {
                    $rm[] = $value;
                }
            }

            // Remove unwanted pages and their content from the imported data.
            if (count($rm) != 0) {
                foreach ($rm as $value) {
                    $content = substr($this->objects[$value]['data'], strpos($this->objects[$value]['data'], 'Contents'));
                    $content = substr($content, 0, strpos($content, '/'));
                    $content = str_replace('Contents', '', $content);
                    $content = str_replace('[', '', $content);
                    $content = str_replace(']', '', $content);
                    $content = str_replace(' 0 R', '|', $content);
                    $content = str_replace(' ', '', $content);
                    $content = substr($content, 0, -1);
                    $content_objs = explode('|', $content);

                    unset($this->objects[$value]);

                    if (in_array($value, $this->_kids)) {
                        $k = array_search($value, $this->_kids);
                        unset($this->_kids[$k]);
                    }

                    foreach ($content_objs as $val) {
                        unset($this->objects[$val]);
                    }
                }

                $this->pages = $this->_kids;

            }

        }

    }

    /**
     * Method to shift the objects' indices based on the array of indices passed to the method, to prevent duplication.
     *
     * @param  array $ind
     * @param  int $si
     * @return void
     */
    public function shiftObjects($ind, $si)
    {

        $ii = $this->_lastIndex($this->objects) + 1;
        $start_index = ($ii > $si) ? $ii : $si;

        // Adjust the direct object indices, keeping track of the ones that change.
        foreach ($ind as $value) {
            if (array_key_exists($value, $this->objects)) {
                $this->objects[$value]['data'] = $this->objects[$value]['data'];
                $this->objects[$value]['data'] = str_replace($value . ' 0 obj', $start_index . ' 0 obj', $this->objects[$value]['data']);
                $this->objects[$start_index] = $this->objects[$value];
                $this->_changed[$value] = $start_index;
                unset($this->objects[$value]);
                if (in_array($value, $this->_kids)) {
                    $k = array_search($value, $this->_kids);
                    $this->_kids[$k] = $start_index;
                }
                $start_index++;
            }
        }

        // Adjust the reference object indices, based on the object indiced that have changed.
        foreach ($this->objects as $key => $value) {
            if (count($value['refs']) != 0) {
                foreach($value['refs'] as $k => $v) {
                    if (array_key_exists($value['refs'][$k], $this->_changed)) {
                        $value['refs'][$k] = $this->_changed[$value['refs'][$k]];
                        $this->objects[$key]['refs'] = $value['refs'];
                        $this->objects[$key]['data'] = str_replace($v . ' 0 R', $this->_changed[$v] . ' 0 R', $this->objects[$key]['data']);
                    }
                }
            }
        }

        $this->pages = $this->_kids;

    }

    /**
     * Method to return the desired imported objects to the main PDF object.
     *
     * @param  int $par
     * @return array
     */
    public function returnObjects($par)
    {

        $objs = array();
        $keys = array_keys($this->objects);
        foreach ($keys as $key) {
            // Skip the root, parent and info objects, returning only page and content objects.
            if (($this->objects[$key]['type'] != 'root') && ($this->objects[$key]['type'] != 'parent') && ($this->objects[$key]['type'] != 'info')) {
                if ($this->objects[$key]['type'] == 'page') {
                    $parent = substr($this->objects[$key]['data'], strpos($this->objects[$key]['data'], 'Parent'));
                    $parent = substr($parent, 0, strpos($parent, '/'));
                    $parent = str_replace('Parent', '', $parent);
                    $parent = str_replace(' 0 R', '', $parent);
                    $parent = str_replace(' ', '', $parent);
                    $this->objects[$key]['data'] = str_replace($parent . ' 0 R', $par . ' 0 R', $this->objects[$key]['data']);
                }
                $objs[$key] = $this->objects[$key];
            }

        }

        return $objs;

    }

    /**
     * Method to search and return the objects within in the imported data.
     *
     * @param  string $data
     * @return void
     */
    protected function _getObjects($data)
    {

        $matches = array();
        $obj_start = array();

        // Grab object start points.
        preg_match_all('/\d*\s\d*\sobj/', $data, $matches, PREG_OFFSET_CAPTURE);
        $obj_start = $matches[0];

        // Start parsing through the object data.
        for ($i = 0; $i < count($obj_start); $i++) {
            $type = '';
            $j = $i + 1;
            $index = substr($obj_start[$i][0], 0, strpos($obj_start[$i][0], ' '));

            if (array_key_exists($j, $obj_start)) {
                $obj_data = substr($data, $obj_start[$i][1], ($obj_start[$j][1] - $obj_start[$i][1]));
            } else {
                $obj_data = substr($data, $obj_start[$i][1], (strrpos($data, 'endobj') - $obj_start[$i][1] + 6));
            }

            // Add all relevant objects, striping away any linearized code, hint codes or metadata, as the order and size of the PDF and its objects may change.
            if ((strpos($obj_data, '/Linearized') === false) &&  (strpos($obj_data, '/Type/Metadata') === false)) {
                if ((strpos($obj_data, '/Catalog') !== false) && (strpos($obj_data, '/Pages') !== false)) {
                    // Strip away any metadata references.
                    $metadata = substr($obj_data, strpos($obj_data, 'Metadata'));
                    $metadata = '/' . substr($metadata, 0, strpos($metadata, '/'));
                    $obj_data = str_replace($metadata, '', $obj_data);
                    $type = 'root';
                } else if ((strpos($obj_data, '/Creator') !== false) || (strpos($obj_data, '/Producer') !== false)) {
                    $type = 'info';
                } else if ((strpos($obj_data, '/Count') !== false) && (strpos($obj_data, '/Kids') !== false)) {
                    $kids = substr($obj_data, strpos($obj_data, 'Kids'));
                    $kids = substr($kids, 0, strpos($kids, ']'));
                    $kids = str_replace('Kids', '', $kids);
                    $kids = str_replace('[', '', $kids);
                    $kids = str_replace(' 0 R', '|', $kids);
                    $kids = str_replace(' ', '', $kids);
                    $kids = substr($kids, 0, -1);
                    $kids_objs = explode('|', $kids);
                    $this->_kids = $kids_objs;
                    $type = 'parent';
                } else if ((strpos($obj_data, '/MediaBox') !== false) || (strpos($obj_data, '/Contents') !== false)) {
                    if (strpos($obj_data, '/Thumb') !== false) {
                        // Strip away any thumbnail references.
                        $thumbdata = substr($obj_data, strpos($obj_data, 'Thumb'));
                        $thumbdata = '/' . substr($thumbdata, 0, strpos($thumbdata, '/'));

                        $thumbindex = substr($thumbdata, strpos($thumbdata, ' '));
                        $thumbindex = str_replace(' 0 R', '', $thumbindex);
                        $thumbindex = str_replace(' ', '', $thumbindex);
                        $this->_thumbs[] = $thumbindex;

                        $obj_data = str_replace($thumbdata, '', $obj_data);
                    }
                    $type = 'page';
                } else {
                    $type = 'content';
                }
                $this->objects[$index] = array('type' => $type, 'data' => $obj_data, 'refs' => $this->_getRefs($obj_data));
            }

        }

        // Order the page objects correctly.
        $pageOrder = array();

        foreach ($this->objects as $key => $value) {
            if ($value['type'] == 'page') {
                $pageOrder[$key] = $value;
                unset($this->objects[$key]);
            }
        }

        foreach ($this->_kids as $value) {
            $this->objects[$value] = $pageOrder[$value];
        }

        // Remove any thumbnail objects.
        if (count($this->_thumbs) != 0) {
            foreach ($this->_thumbs as $value) {
                unset($this->objects[$value]);
            }
        }

    }

    /**
     * Method to search and return the object references within in the data.
     *
     * @param  string $data
     * @return array
     */
    protected function _getRefs($data)
    {

        $r = array();
        $refs = array();

        // Grab reference start points.
        preg_match_all('/\d*\s0*\sR/', $data, $r, PREG_OFFSET_CAPTURE);
        foreach ($r[0] as $value) {
            $refs[] = str_replace(' 0 R', '', $value[0]);
        }

        sort($refs);
        return $refs;

    }

    /**
     * Method to return the last object index.
     *
     * @param  array $arr
     * @throws Exception
     * @return int
     */
    protected function _lastIndex($arr)
    {

        if (!is_array($arr)) {
            $lang = new Moc10_Language();
            throw new Exception($lang->__('Error: The argument passed must be an array.'));
        } else {
            $objs = array_keys($arr);
            sort($objs);

            foreach ($objs as $value) {
                $last = $value;
            }

            return $last;
        }

    }

}
Return current item: Moc10 PHP Library