Location: PHPKode > projects > Entier Studio > entierstudio/framework/httpclient.php
<?php
/*************************************************
Snoopy - the PHP net client
Author: Monte Ohrt <hide@address.com>
Copyright (c): 1999-2000 ispi, all rights reserved
Version: 0.93
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
You may contact the author of Snoopy by e-mail at:
hide@address.com
Or, write to:
Monte Ohrt
CTO, ispi
237 S. 70th suite 220
Lincoln, NE 68510
The latest version of Snoopy can be obtained from:
http://snoopy.sourceforge.com
*************************************************/
class HttpClient {
    /**** Public variables ****/
    /* user definable vars */
    var $host = "www.php.net"; // host name we are connecting to
    var $port = 80; // port we are connecting to
    var $proxy_host = ""; // proxy host to use
    var $proxy_port = ""; // proxy port to use
    var $agent = "Snoopy v0.93"; // agent we masquerade as
    var $referer = ""; // referer info to pass
    var $cookies = array(); // array of cookies to pass
    // $cookies["username"]="joe";
    var $rawheaders = array(); // array of raw headers to send
    // $rawheaders["Content-type"]="text/html";
    var $maxredirs = 5; // http redirection depth maximum. 0 = disallow
    var $lastredirectaddr = ""; // contains address of last redirected address
    var $offsiteok = true; // allows redirection off-site
    var $maxframes = 0; // frame content depth maximum. 0 = disallow
    var $expandlinks = true; // expand links to fully qualified URLs.
    // this only applies to fetchlinks()
    // or submitlinks()
    var $passcookies = true; // pass set cookies back through redirects
    // NOTE: this currently does not respect
    // dates, domains or paths.
    var $user = ""; // user for http authentication
    var $pass = ""; // password for http authentication
    // http accept types
    var $accept = "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, */*";
    var $results = ""; // where the content is put
    var $error = ""; // error messages sent here
    var $response_code = ""; // response code returned from server
    var $headers = array(); // headers returned from server sent here
    var $maxlength = 500000; // max return data length (body)
    var $read_timeout = 0; // timeout on read operations, in seconds
    // supported only since PHP 4 Beta 4
    // set to 0 to disallow timeouts
    var $timed_out = false; // if a read operation timed out
    /**** Private variables ****/
    var $_maxlinelen = 4096; // max line length (headers)
    var $_httpmethod = "GET"; // default http request method
    var $_httpversion = "HTTP/1.0"; // default http request version
    var $_submit_method = "POST"; // default submit method
    var $_submittype = "application/x-www-form-urlencoded"; // default submit type
    var $_redirectaddr = false; // will be set if page fetched is a redirect
    var $_redirectdepth = 0; // increments on an http redirect
    var $_frameurls = array(); // frame src urls
    var $_framedepth = 0; // increments on frame depth
    var $_isproxy = false; // set if using a proxy server
    var $_fp_timeout = 30; // timeout for socket connection
    /*======================================================================*\
    Function:	fetch
    Purpose:	fetch the contents of a web page
    (and possibly other protocols in the
    future like ftp, nntp, gopher, etc.)
    Input:		$URI	the location of the page to fetch
    Output:		$this->results	the output text from the fetch
    \*======================================================================*/
    function fetch($URI) {
        //preg_match("|^([^:]+)://([^:/]+)(:[\d]+)*(.*)|",$URI,$URI_PARTS);
        $URI_PARTS = parse_url($URI);
        if (!empty($URI_PARTS["user"])) $this->user = $URI_PARTS["user"];
        if (!empty($URI_PARTS["pass"])) $this->pass = $URI_PARTS["pass"];
        switch ($URI_PARTS["scheme"]) {
            case "http":
                $this->host = $URI_PARTS["host"];
                if (!empty($URI_PARTS["port"])) $this->port = $URI_PARTS["port"];
                if ($this->_connect($fp)) {
                    if ($this->_isproxy) {
                        // using proxy, send entire URI
                        $this->_httprequest($URI, $fp, $URI, $this->_httpmethod);
                    } else {
                        $path = $URI_PARTS["path"] . ($URI_PARTS["query"] ? "?" . $URI_PARTS["query"] : "");
                        // no proxy, send only the path
                        $this->_httprequest($path, $fp, $URI, $this->_httpmethod);
                    }
                    $this->_disconnect($fp);
                    if ($this->_redirectaddr) {
                        /* url was redirected, check if we've hit the max depth */
                        if ($this->maxredirs > $this->_redirectdepth) {
                            // only follow redirect if it's on this site, or offsiteok is true
                            if (preg_match("|^http://" . preg_quote($this->host) . "|i", $this->_redirectaddr) || $this->offsiteok) {
                                /* follow the redirect */
                                $this->_redirectdepth++;
                                $this->lastredirectaddr = $this->_redirectaddr;
                                $this->fetch($this->_redirectaddr);
                            }
                        }
                    }
                    if ($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0) {
                        $frameurls = $this->_frameurls;
                        $this->_frameurls = array();
                        while (list(, $frameurl) = each($frameurls)) {
                            if ($this->_framedepth < $this->maxframes) {
                                $this->fetch($frameurl);
                                $this->_framedepth++;
                            } else break;
                        }
                    }
                } else {
                    return false;
                }
                return true;
                break;
            default:
                // not a valid protocol
                $this->error = 'Invalid protocol "' . $URI_PARTS["scheme"] . '"\n';
                return false;
                break;
            }
            return true;
        }
        /*======================================================================*\
        Function:	submit
        Purpose:	submit an http form
        Input:		$URI	the location to post the data
        $formvars	the formvars to use.
        format: $formvars["var"] = "val";
        Output:		$this->results	the text output from the post
        \*======================================================================*/
        function submit($URI, $formvars = "") {
            unset($postdata);
            settype($formvars, "array");
            while (list($key, $val) = each($formvars)) $postdata.= urlencode($key) . "=" . urlencode($val) . "&";
            $URI_PARTS = parse_url($URI);
            if (!empty($URI_PARTS["user"])) $this->user = $URI_PARTS["user"];
            if (!empty($URI_PARTS["pass"])) $this->pass = $URI_PARTS["pass"];
            switch ($URI_PARTS["scheme"]) {
                case "http":
                    $this->host = $URI_PARTS["host"];
                    if (!empty($URI_PARTS["port"])) $this->port = $URI_PARTS["port"];
                    if ($this->_connect($fp)) {
                        if ($this->_isproxy) {
                            // using proxy, send entire URI
                            $this->_httprequest($URI, $fp, $URI, $this->_submit_method, $this->_submittype, $postdata);
                        } else {
                            $path = $URI_PARTS["path"] . ($URI_PARTS["query"] ? "?" . $URI_PARTS["query"] : "");
                            // no proxy, send only the path
                            $this->_httprequest($path, $fp, $URI, $this->_submit_method, $this->_submittype, $postdata);
                        }
                        $this->_disconnect($fp);
                        if ($this->_redirectaddr) {
                            /* url was redirected, check if we've hit the max depth */
                            if ($this->maxredirs > $this->_redirectdepth) {
                                if (!preg_match("|^" . $URI_PARTS["scheme"] . "://|", $this->_redirectaddr)) $this->_redirectaddr = $this->_expandlinks($this->_redirectaddr, $URI_PARTS["scheme"] . "://" . $URI_PARTS["host"]);
                                // only follow redirect if it's on this site, or offsiteok is true
                                if (preg_match("|^http://" . preg_quote($this->host) . "|i", $this->_redirectaddr) || $this->offsiteok) {
                                    /* follow the redirect */
                                    $this->_redirectdepth++;
                                    $this->lastredirectaddr = $this->_redirectaddr;
                                    $this->submit($this->_redirectaddr, $formvars);
                                }
                            }
                        }
                        if ($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0) {
                            $frameurls = $this->_frameurls;
                            $this->_frameurls = array();
                            while (list(, $frameurl) = each($frameurls)) {
                                if ($this->_framedepth < $this->maxframes) {
                                    $this->fetch($frameurl);
                                    $this->_framedepth++;
                                } else break;
                            }
                        }
                    } else {
                        return false;
                    }
                    return true;
                    break;
                default:
                    // not a valid protocol
                    $this->error = 'Invalid protocol "' . $URI_PARTS["scheme"] . '"\n';
                    return false;
                    break;
                }
                return true;
            }
            /*======================================================================*\
            Function:	fetchlinks
            Purpose:	fetch the links from a web page
            Input:		$URI	where you are fetching from
            Output:		$this->results	an array of the URLs
            \*======================================================================*/
            function fetchlinks($URI) {
                if ($this->fetch($URI)) {
                    if (is_array($this->results)) {
                        for ($x = 0;$x < count($this->results);$x++) $this->results[$x] = $this->_striplinks($this->results[$x]);
                    } else $this->results = $this->_striplinks($this->results);
                    if ($this->expandlinks) $this->results = $this->_expandlinks($this->results, $URI);
                    return true;
                } else return false;
            }
            /*======================================================================*\
            Function:	fetchtext
            Purpose:	fetch the text from a web page, stripping the links
            Input:		$URI	where you are fetching from
            Output:		$this->results	the text from the web page
            \*======================================================================*/
            function fetchtext($URI) {
                if ($this->fetch($URI)) {
                    if (is_array($this->results)) {
                        for ($x = 0;$x < count($this->results);$x++) $this->results[$x] = $this->_striptext($this->results[$x]);
                    } else $this->results = $this->_striptext($this->results);
                    return true;
                } else return false;
            }
            /*======================================================================*\
            Function:	submitlinks
            Purpose:	grab links from a form submission
            Input:		$URI	where you are submitting from
            Output:		$this->results	an array of the links from the post
            \*======================================================================*/
            function submitlinks($URI, $formvars) {
                if ($this->submit($URI, $formvars)) {
                    if (is_array($this->results)) {
                        for ($x = 0;$x < count($this->results);$x++) {
                            $this->results[$x] = $this->_striplinks($this->results[$x]);
                            if ($this->expandlinks) $this->results[$x] = $this->_expandlinks($this->results[$x], $URI);
                        }
                    } else {
                        $this->results = $this->_striplinks($this->results);
                        if ($this->expandlinks) $this->results = $this->_expandlinks($this->results, $URI);
                    }
                    return true;
                } else return false;
            }
            /*======================================================================*\
            Function:	submittext
            Purpose:	grab text from a form submission
            Input:		$URI	where you are submitting from
            Output:		$this->results	the text from the web page
            \*======================================================================*/
            function submittext($URI, $formvars) {
                if ($this->submit($URI, $formvars)) {
                    if (is_array($this->results)) {
                        for ($x = 0;$x < count($this->results);$x++) {
                            $this->results[$x] = $this->_striptext($this->results[$x]);
                            if ($this->expandlinks) $this->results[$x] = $this->_expandlinks($this->results[$x], $URI);
                        }
                    } else {
                        $this->results = $this->_striptext($this->results);
                        if ($this->expandlinks) $this->results = $this->_expandlinks($this->results, $URI);
                    }
                    return true;
                } else return false;
            }
            /*======================================================================*\
            Function:	_striplinks
            Purpose:	strip the hyperlinks from an html document
            Input:		$document	document to strip.
            Output:		$match		an array of the links
            \*======================================================================*/
            function _striplinks($document) {
                preg_match_all("'<a\s+href\s*=\s*					# find <a href=
						([\"\'])?							# find single or double quote
						(?(1) (.*?)\\1 | ([^\s\>]+))		# if quote found, match up to next matching
															# quote, otherwise match up to next space
						'isx", $document, $links);
                // catenate the non-empty matches from the conditional subpattern
                while (list($key, $val) = each($links[2])) {
                    if (!empty($val)) $match[] = $val;
                }
                while (list($key, $val) = each($links[3])) {
                    if (!empty($val)) $match[] = $val;
                }
                // return the links
                return $match;
            }
            /*======================================================================*\
            Function:	_striptext
            Purpose:	strip the text from an html document
            Input:		$document	document to strip.
            Output:		$text		the resulting text
            \*======================================================================*/
            function _striptext($document) {
                // I didn't use preg eval (//e) since that is only available in PHP 4.0.
                // so, list your entities one by one here. I included some of the
                // more common ones.
                $search = array(
                    "'<script[^>]*?>.*?</script>'si", // strip out javascript
                    "'<[\/\!]*?[^<>]*?>'si", // strip out html tags
                    "'([\r\n])[\s]+'", // strip out white space
                    "'&(quote|#34);'i", // replace html entities
                    "'&(amp|#38);'i",
                    "'&(lt|#60);'i",
                    "'&(gt|#62);'i",
                    "'&(nbsp|#160);'i",
                    "'&(iexcl|#161);'i",
                    "'&(cent|#162);'i",
                    "'&(pound|#163);'i",
                    "'&(copy|#169);'i"
                );
                $replace = array(
                    "",
                    "",
                    "\\1",
                    "\"",
                    "&",
                    "<",
                    ">",
                    " ",
                    chr(161) ,
                    chr(162) ,
                    chr(163) ,
                    chr(169)
                );
                $text = preg_replace($search, $replace, $document);
                return $text;
            }
            /*======================================================================*\
            Function:	_expandlinks
            Purpose:	expand each link into a fully qualified URL
            Input:		$links			the links to qualify
            $URI			the full URI to get the base from
            Output:		$expandedLinks	the expanded links
            \*======================================================================*/
            function _expandlinks($links, $URI) {
                preg_match("/^[^\?]+/", $URI, $match);
                $match = preg_replace("|/[^\/\.]+\.[^\/\.]+$|", "", $match[0]);
                $search = array(
                    "|^http://" . preg_quote($this->host) . "|i",
                    "|^(?!http://)(\/)?(?!mailto:)|i",
                    "|/\./|",
                    "|/[^\/]+/\.\./|"
                );
                $replace = array(
                    "",
                    $match . "/",
                    "/",
                    "/"
                );
                $expandedLinks = preg_replace($search, $replace, $links);
                return $expandedLinks;
            }
            /*======================================================================*\
            Function:	_httprequest
            Purpose:	go get the http data from the server
            Input:		$url		the url to fetch
            $fp			the current open file pointer
            $URI		the full URI
            $body		body contents to send if any (POST)
            Output:
            \*======================================================================*/
            function _httprequest($url, $fp, $URI, $http_method, $content_type = "", $body = "") {
                if ($this->passcookies && $this->_redirectaddr) $this->setcookies();
                $URI_PARTS = parse_url($URI);
                if (empty($url)) $url = "/";
                $headers = $http_method . " " . $url . " " . $this->_httpversion . "\n";
                if (!empty($this->agent)) $headers.= "User-Agent: " . $this->agent . "\r\n";
                if (!empty($this->host)) $headers.= "Host: " . $this->host . "\r\n";
                if (!empty($this->accept)) $headers.= "Accept: " . $this->accept . "\r\n";
                if (!empty($this->referer)) $headers.= "Referer: " . $this->referer . "\r\n";
                if (!empty($this->cookies)) {
                    if (!is_array($this->cookies)) $this->cookies = (array)$this->cookies;
                    while (list($cookieKey, $cookieVal) = each($this->cookies)) $headers.= "Cookie: " . $cookieKey . "=" . $cookieVal . "\r\n";
                }
                if (!empty($this->rawheaders)) {
                    if (!is_array($this->rawheaders)) $this->rawheaders = (array)$this->rawheaders;
                    while (list($headerKey, $headerVal) = each($this->rawheaders)) $headers.= $headerKey . ": " . $headerVal . "\r\n";
                }
                if (!empty($content_type)) $headers.= "Content-type: $content_type\r\n";
                if (!empty($body)) $headers.= "Content-length: " . strlen($body) . "\r\n";
                if (!empty($this->user) || !empty($this->pass)) $headers.= "Authorization: BASIC " . base64_encode($this->user . ":" . $this->pass) . "\r\n";
                $headers.= "Connection: close\r\n";
                $headers.= "\r\n";
                // set the read timeout if needed
                if ($this->read_timeout > 0) socket_set_timeout($fp, $this->read_timeout);
                $this->timed_out = false;
                fwrite($fp, $headers . $body, strlen($headers . $body));
                $this->_redirectaddr = false;
                unset($this->headers);
                while ($currentHeader = fgets($fp, $this->_maxlinelen)) {
                    if ($this->_check_timeout($fp)) return false;
                    if ($currentHeader == "\r\n") break;
                    // if a header begins with Location: or URI:, set the redirect
                    if (preg_match("/^(Location: |URI: )/i", $currentHeader)) {
                        // get URL portion of the redirect
                        preg_match("/^(Location: |URI:)(.*)/", chop($currentHeader) , $matches);
                        // look for :// in the Location header to see if hostname is included
                        if (!preg_match("|\:\/\/|", $matches[2])) {
                            // no host in the path, so prepend
                            $this->_redirectaddr = $URI_PARTS["scheme"] . "://" . $this->host . ":" . $this->port;
                            // eliminate double slash
                            if (!preg_match("|^/|", $matches[2])) $this->_redirectaddr.= "/" . $matches[2];
                            else $this->_redirectaddr.= $matches[2];
                        } else $this->_redirectaddr = $matches[2];
                    }
                    if (preg_match("|^HTTP/|", $currentHeader)) $this->response_code = $currentHeader;
                    $this->headers[] = $currentHeader;
                }
                ####
                $results = fread($fp, $this->maxlength);
                if ($this->_check_timeout($fp)) return false;
                // check if there is a a redirect meta tag
                if (preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]+URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i", $results, $match)) {
                    $this->_redirectaddr = $this->_expandlinks($match[1], $URI);
                }
                // have we hit our frame depth and is there frame src to fetch?
                if (($this->_framedepth < $this->maxframes) && preg_match_all("'<frame[\s]*src[\s]*=[\'\"]?([^\'\"\>]+)'i", $results, $match)) {
                    $this->results[] = $results;
                    for ($x = 0;$x < count($match[1]);$x++) $this->_frameurls[] = $this->_expandlinks($match[1][$x], $URI_PARTS["scheme"] . "://" . $this->host);
                }
                // have we already fetched framed content?
                elseif (is_array($this->results)) $this->results[] = $results;
                // no framed content
                else $this->results = $results;
                return true;
            }
            /*======================================================================*\
            Function:	setcookies()
            Purpose:	set cookies for a redirection
            \*======================================================================*/
            function setcookies() {
                for ($x = 0;$x < count($this->headers);$x++) {
                    if (preg_match("/^set-cookie:[\s]+([^=]+)=([^;]+)/i", $this->headers[$x], $match)) $this->cookies[$match[1]] = $match[2];
                }
            }
            /*======================================================================*\
            Function:	_check_timeout
            Purpose:	checks whether timeout has occurred
            Input:		$fp	file pointer
            \*======================================================================*/
            function _check_timeout($fp) {
                if ($this->read_timeout > 0) {
                    $fp_status = socket_get_status($fp);
                    if ($fp_status["timed_out"]) {
                        $this->timed_out = true;
                        return true;
                    }
                }
                return false;
            }
            /*======================================================================*\
            Function:	_connect
            Purpose:	make a socket connection
            Input:		$fp	file pointer
            \*======================================================================*/
            function _connect(&$fp) {
                if (!empty($this->proxy_host) && !empty($this->proxy_port)) {
                    $this->_isproxy = true;
                    $host = $this->proxy_host;
                    $port = $this->proxy_port;
                } else {
                    $host = $this->host;
                    $port = $this->port;
                }
                if ($fp = fsockopen($host, $port, $errno, $errstr, $this->_fp_timeout)) {
                    // socket connection succeeded
                    return true;
                } else {
                    // socket connection failed
                    switch ($errno) {
                        case -3:
                            $this->error = "socket creation failed (-3)";
                        case -4:
                            $this->error = "dns lookup failure (-4)";
                        case -5:
                            $this->error = "connection refused or timed out (-5)";
                        default:
                            $this->error = "connection failed (" . $errno . ")";
                    }
                    return false;
                }
            }
            /*======================================================================*\
            Function:	_disconnect
            Purpose:	disconnect a socket connection
            Input:		$fp	file pointer
            \*======================================================================*/
            function _disconnect($fp) {
                return (fclose($fp));
            }
        }
?>
Return current item: Entier Studio