Location: PHPKode > projects > phlyMail Lite > phlymail/handlers/bookmarks/bookmarksparser.php
<?php
/*
////////////////////////////////////////////////////////////////

This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.

This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Lesser General Public License for more details.

You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*/
////////////////////////////////////////////////////////////////
/**
 * This class parses IE, Netscape and Opera bookmark files and returns arrays with the bookmark / folder information.
 *
 * @author Lennart Groetzbach <hide@address.com>
 * @copyright Lennart Groetzbach <hide@address.com> - distributed under the LGPL
 * @version 0.61 2003/07/07
 *
 * @author Matthias Sommerfeld <hide@address.com>
 * @version 0.6.2 2009-08-15
 */

class bookmarkParser {

    /**
     * The generated error messages, line feed seperated
     *
     * @access   public
     * @var     String
     */
    public $error_message = '';

    /**
     * The number of folders parsed after a function call
     *
     * @access   protected
     * @var     Integer
     */
    protected $foldersParsed = 0;

    /**
     * The number of bookmarks parsed after a function call
     *
     * @access   protected
     * @var     Integer
     */
    protected $urlsParsed = 0;

    /**
     * Holds the structure as parsed
     *
     * @access   protected
     * @var array
     */
    protected $structure = array();


    public function __construct()
    {
        $this->structure = array('folders' => array(), 'items' => array());
        return true;
    }

    public function parse($url, $format = 'mozilla')
    {
        switch ($format) {
            case 'mozilla': case 'moz': case 'netscape': case 'ns':
                $this->parseNetscape($url, 0);
                break;
            case 'msie': case 'ie': case 'internetexplorer': case 'explorer':
                $this->parseInternetExplorer($url, 0);
                break;
            case 'opera': case 'op':
                $this->parseOpera($url, 0);
                break;
            default:
                $this->error_message = 'Unknown format';
                return false;
        }
        return $this->structure;
    }

    /**
     * Parses an Opera bookmark file
     *
     * Parses the file, default name for bookmark file is "Opera6.adr"
     * Tested with Opera 6.
     *
     * @param String $url   url to the bookmark file
     * @param int  $folderID  id of the root folder
     * @return int  -1 if error occurs
     */
    protected function parseOpera($url, $folderID)
    {
        $this->foldersParsed = 0;
        $this->urlsParsed = 0;
        $depth = 0;
        $parents = array();
        array_push($parents, $folderID);
        // is it a file?
        if (!is_file($url)) {
            $this->error_message .= 'parseOpera(): File error'.LF;
            return -1;
        }
        // open file
        $fp = @fopen($url, 'r-');
        if (!is_resource($fp)) {
            $this->error_message .= 'parseOpera(): File error'.LF;
            return -1;
        }
        // is it an opara bookmark file?
        $line = str_replace(LF, '', fgets($fp, 4096));
        if (!preg_match('/Opera Hotlist version 2.0/', $line)) {
            fclose($fp);
            $this->error_message .= 'parseOpera(): Wrong header'.LF;
            return -1;
        }
        // insert Opera root in DB
        // read lines
        while (!@feof($fp)) {
            $line = str_replace(LF, '', fgets($fp, 4096));
            if (preg_match('/^[\s]*#folder/i', $line)) { // folder found
                // extract the name
                $line = str_replace(LF, '', fgets($fp, 4096));
                $tmp = explode('=', $line, 2);
                $name = $tmp[1];
                // extract create creation date
                $line = str_replace(LF, '', fgets($fp, 4096));
                $tmp = explode('=', $line, 2);
                $created = $tmp[1];
                // extract the visit date
                $line = str_replace(LF, '', fgets($fp, 4096));
                $tmp = explode('=', $line, 2);
                $visited = $tmp[1];
                // insert into db
                $this->foldersParsed++;

                $this->structure['folders'][$this->foldersParsed] = array
                        ('name' => $name
                        ,'descr' => ''
                        ,'created' => $created
                        ,'parent' => $parents[$depth]
                        ,'added' => $created
                        ,'visited' =>  $visited
                        );
                // current id of folder is stored in a stack
                array_push($parents, $folderID + $this->foldersParsed);
                $depth++;
            } elseif (preg_match('/^#url/i', $line)) { // bookmark found
                // extract url
                $line = str_replace(LF, '', fgets($fp, 4096));
                $tmp = explode('=', $line, 2);
                $descr = $tmp[1];
                // extract the name
                $line = str_replace(LF, '', fgets($fp, 4096));
                $tmp = explode('=', $line, 2);
                $url = $tmp[1];
                // extract create creation date
                $line = str_replace(LF, '', fgets($fp, 4096));
                $tmp = explode('=', $line, 2);
                $created = $tmp[1];
                // extract the visit date
                $line = str_replace(LF, '', fgets($fp, 4096));
                $tmp = explode('=', $line, 2);
                $visited = $tmp[1];
                // insert into db
                $this->urlsParsed++;
                $this->structure['items'][$this->urlsParsed] = array
                        ('url' => $url
                        ,'descr' => $descr
                        ,'parent' => $parents[$depth]
                        ,'added' => $created
                        ,'visited' => $visited
                        );
            } elseif (preg_match('/^[\s]*-/', $line)) { // folder closed
                array_pop($parents);
                $depth--;
            }
        }
        fclose($fp);
        return true;
    }

    /**
     * Parses a Netscape bookmark file
     *
     * Parses the file, default name is "bookmarks.html".
     * Tested with Netscape 4.x and 6.x.
     *
     * @param    String      $url   url to the bookmark file
     * @param    int         $folderID  id of the root folder
     * @return   int         -1 if error occurs
     */
    protected function parseNetscape($url, $folderID)
    {
        $this->foldersParsed = 0;
        $this->urlsParsed = 0;
        $depth = 0;
        $parents = array();
        array_push($parents, $folderID);
        // is it a file?
        if (!is_file($url)) {
            $this->error_message .= 'parseNetscape(): File error'.LF;
            return -1;
        }
        // open file
        $fp = @fopen($url, 'r-');
        if (!is_resource($fp)) {
            $this->error_message .= 'parseNetscape(): File error'.LF;
            return -1;
        }
        // is it an opara bookmark file?
        $line = str_replace(LF, '', fgets($fp, 4096));
        if (!preg_match('/<!DOCTYPE NETSCAPE-Bookmark-file-1>/i', $line)) {
            fclose($fp);
            $this->error_message .= 'parseNetscape(): Wrong header'.LF;
            return -1;
        }
        // insert NS root in DB
        // read lines
        while (!@feof($fp)) {
            $line = str_replace(LF, '', fgets($fp, 4096));
            // extract add_date
            preg_match('(/ADD_DATE="([^"]*/i))', $line, $match);
            @$added = $match[1];
            // folder found
            if (preg_match('/<H3[^>]*>(.*)<\/H3>/i', $line, $match)) {
                $name = $match[1];
                $this->foldersParsed++;
                $this->structure['folders'][$this->foldersParsed] = array('name' => $name, 'parent' => $parents[$depth], 'added' => $added, 'descr' => '');
                array_push($parents, $folderID + $this->foldersParsed);
                $depth++;
            } elseif (preg_match('/<A HREF="([^"]*)[^>]*>(.*)<\/A>/i', $line, $match)) { // bookmark found
                // extract url and descr
                $url = $match[1];
                $name = $match[2];
                // extract dates
                preg_match('/ADD_DATE="([^"]*)/i', $line, $match);
                $created = isset($match[1]) ? $match[1] : null;
                preg_match('/LAST_VISIT="([^"]*)/i', $line, $match);
                $visited = isset($match[1]) ? $match[1] : null;
                preg_match('/LAST_MODIFIED="([^"]*)/i', $line, $match);
                $modified = isset($match[1]) ? $match[1] : null;
                // insert into db
                $this->urlsParsed++;
                $this->structure['items'][$this->urlsParsed] = array
                        ('url' => $url
                        ,'name' => $name
                        ,'parent' => $parents[$depth]
                        ,'added' => $created
                        ,'modified' => $modified
                        ,'visited' => $visited
                        );
            } elseif (preg_match('/<\/DL>/i', $line)) { // folder closed
                array_pop($parents);
                $depth--;
            }
        }
        fclose($fp);
        return true;
    }

    /**
     * Parses an IE bookmarks folder.
     *
     * Parses the IE folder and files.
     *
     * @param String  $url   url to the bookmark file
     * @param int  $folderID  id of the root folder
     * @param boolean  $firstCall  only true, upon the first call
     * @return   int  -1 if error occurs
     */
    protected function parseInternetExplorer($url, $folderID, $firstCall = true)
    {
        if ($firstCall) {
            $this->foldersParsed = 0;
            $this->urlsParsed = 0;
        }
        static $depth = 0;
        // open directory
        $d = @dir($url);
        while ($entry = $d->read()) {
            // is not . or ..
            if ($entry != '.' && $entry != '..') {
                // is it a dir?
                if (is_dir($url.'/'.$entry)) {
                    $depth++;
                    $this->structure['folders'][$this->foldersParsed+$length] = array('name' => $entry, 'descr' => '', 'parent' => $folderID);
                    // visit it
                    $this->parseInternetExplorer($url.'/'.$entry, $folderID + 1, false);
                    $this->foldersParsed++;
                    $depth--;
                    // is there a ie internet shortcut?
                } elseif (preg_match('/.url$/i', $entry)) {
                    $modified = '';
                    $lineno = 0;
                    // open it
                    $fp = @fopen($url.'/'.$entry, 'r-');
                    if (@$fp) {
                        $name = substr(basename($entry), 0, strlen(basename($entry)) - 4);
                        while (!@feof ($fp)) {
                            $lineno++;
                            $line = str_replace(LF, '', @fgets($fp, 4096));
                            // extract url
                            if (preg_match('/^url=/i', $line)) {
                                $href = trim(substr($line, 4));
                            } elseif (preg_match('/^modified=/i', $line)) {
                                $modified = trim(substr($line, 9));
                            }
                        }
                        // insert into db
                        $this->urlsParsed++;
                        $this->structure['items'][$this->urlsParsed] = array( 'url' => $href, 'descr' => $name,'parent' => $folderID + $this->foldersParsed);
                    } else {
                        $this->error_message .= 'parseInternetExplorer(): file error: '.$url.LF;
                        return -1;
                    }
                    fclose ($fp);
                }
            }
        }
        $d->close();
    }
}
?>
Return current item: phlyMail Lite