Location: PHPKode > scripts > Caterpillar > cballou-caterpillar-fc0f22f/inc/RollingCurl.php
<?php
class RollingCurl {

    // window_size represents the number
	// simultaneous connections allowed
	// based on max_window_size and the
	// current number of requests
    protected $window_size = 1;
	// the maximum allowable window size
	protected $max_window_size = 5;
	// holds the callback function
    protected $callback;
	// the request queue
    protected $requests = array();
	// number of current connections in the queue
	protected $num_requests = 0;
	// holds headers
	protected $headers = array();
	// keeps track of whether a cURL request is running
	protected $running = 0;

    // set your base options that you want to be used with EVERY request
    protected $options = array(CURLOPT_USERAGENT 		=> 'Mozilla/4.0 (compatible; MSIE 5.01; Windows NT 5.0)',
								CURLOPT_SSL_VERIFYPEER 	=> 0,
								CURLOPT_RETURNTRANSFER 	=> 1,
								CURLOPT_FOLLOWLOCATION 	=> 1,
								CURLOPT_MAXREDIRS 		=> 5,
								CURLOPT_CONNECTTIMEOUT 	=> 15,
								CURLOPT_TIMEOUT 		=> 15);

	/**
	 * Default constructor.
	 *
	 * @access	public
	 * @param	string|array	$callback
	 * @return	void
	 */
	public function __construct($callback = null) {
        $this->callback = $callback;
    }

	/**
	 * Magic getter method.
	 *
	 * @access	public
	 * @param	string	$name
	 * @return	mixed
	 */
    public function __get($name) {
        return (isset($this->{$name})) ? $this->{$name} : null;
    }

	/**
	 * Magic setter method.
	 *
	 * @access	public
	 * @param	string	$name
	 * @param	string	$value
	 * @return	bool
	 */
    public function __set($name, $value){
        // append the base options & headers
        if ($name == "options" || $name == "headers") {
            $this->{$name} = $this->{$name} + $value;
        } else {
            $this->{$name} = $value;
        }
        return true;
    }

    /**
	 * Add a request to the requests queue.
	 *
	 * @access	public
	 * @param	Request	$request
	 * @return	bool
	 */
    public function add(RollingCurl_Request $request) {
		if (!$this->requestExists($request->url)) {
			$this->requests[] = $request;
			++$this->num_requests;
		}
        return true;
    }

	/**
	 * Verify a request hasn't already been added for the url specified.
	 *
	 * @access	public
	 * @param	string	$url
	 * @return	bool
	 */
	public function requestExists($url) {
		return (isset($this->requests[$url]));
	}

    /**
	 * Create a new request and add it to the queue.
	 *
	 * @access	public
	 * @param	string	$url
	 * @param	string	$method
	 * @param	string	$post_data
	 * @param	string	$headers
	 * @param	array	$options
	 * @return	bool
	 */
    public function addRequest($url, $method = "GET", $post_data = null, $headers = null, $options = null) {
		if (!$this->requestExists($url)) {
			$this->requests[] = new RollingCurl_Request($url, $method, $post_data, $headers, $options);
			++$this->num_requests;
		}
        return true;
    }

    /**
	 * Shortcut to create a new GET request and add
	 * it to the queue.
	 *
	 * @access	public
	 * @param	string	$url
	 * @param	string	$headers
	 * @param	array	$options
	 * @return	bool
	 */
    public function addGetRequest($url, $headers = null, $options = null) {
        return $this->addRequest($url, "GET", null, $headers, $options);
    }

    /**
	 * Shortcut to create a new POST request and add
	 * it to the queue.
	 *
	 * @access	public
	 * @param	string	$url
	 * @param	string	$post_data
	 * @param	string	$headers
	 * @param	array	$options
	 * @return	bool
	 */
    public function addPostRequest($url, $post_data = null, $headers = null, $options = null) {
        return $this->addRequest($url, "POST", $post_data, $headers, $options);
    }

    /**
	 * Execute the cURL request(s).
	 *
	 * @access	public
	 * @param	int		$max_window_size
	 * @return	mixed
	 */
    public function execute($max_window_size = null) {
		// validate we have requests
		if ($this->num_requests < 1) {
			throw new Exception('You must have at least one request before executing rolling cURL.');
		} else if ($this->num_requests == 1) {
            return $this->single_curl();
        } else {
            // start the rolling curl
            return $this->rolling_curl($max_window_size);
        }
    }

	/**
	 * Perform a single cURL request and return
	 * data and status to the specified callback
	 * function if it exists.
	 *
	 * @access	private
	 * @return	mixed
	 */
    private function single_curl() {
        $ch = curl_init();
        $options = $this->get_options($this->requests[0]);
        curl_setopt_array($ch, $options);

        $output 	= curl_exec($ch);
		$url		= curl_getinfo($ch, CURLINFO_EFFECTIVE_URL);
        $http_code 	= curl_getinfo($ch, CURLINFO_HTTP_CODE);
		$filesize	= strlen($output);

		// get the title and reduce the output
		$matches = array();

		preg_match("#<title>(.*)</title>#", $output, $matches);
		$title 		= (!empty($matches)) ? end($matches) : "";
		$output 	= substr($output, strpos($output, '<body'));

		// clear all requests and reset counts
		$this->resetRequests();

        // send the return values to the callback function
        if ($this->callback) {
            if (method_exists($this, $this->callback))
                $this->{$this->callback}($url, $output, $http_code, $title, $filesize);
			else throw new Exception('The callback method ' . $this->callback . ' doesnt exist.');
        } else {
            return $output;
        }
		return true;
    }

	/**
	 * Perform parallel cURL requests and return
	 * data and status to th specified callback function
	 * (if it exists) on completion of each request.
	 *
	 * @access	private
	 * @param	int		$max_window_size
	 * @return	mixed
	 */
    private function rolling_curl($max_window_size = null) {
        if ($max_window_size)
            $this->max_window_size = $max_window_size;

        // make sure the rolling window isn't greater
		// than the total number of requests
        if ($this->num_requests < $this->max_window_size) {
            $this->window_size = $this->num_requests;
        } else {
			$this->window_size = $this->max_window_size;
		}

        // window size must be greater than 1
        if ($this->window_size < 2) {
			throw new Exception('Window size must be greater than 1 for parallel requests to occur.');
        }

        $master = curl_multi_init();
        $curl_arr = array();

        // start the first batch of requests
        for ($i = 0; $i < $this->window_size; ++$i) {
            $options = $this->get_options($this->requests[$i]);
			$ch = curl_init();
            curl_setopt_array($ch, $options);
            curl_multi_add_handle($master, $ch);
        }

        do {
            while (($execrun = curl_multi_exec($master, $this->running)) == CURLM_CALL_MULTI_PERFORM);
            if ($execrun != CURLM_OK)
                throw new Exception('An error (' . $execrun . ') occurred retrieving the page.');

            // a request was just completed -- find out which one
            while ($done = curl_multi_info_read($master)) {

                // get the info and content returned on the request
				$url		= curl_getinfo($done['handle'],  CURLINFO_EFFECTIVE_URL);
                $http_code 	= curl_getinfo($done['handle'], CURLINFO_HTTP_CODE);
                $output 	= curl_multi_getcontent($done['handle']);
				$filesize	= strlen($output);

				// get the title and reduce the output
				$matches = array();

				preg_match("#<title>(.*)</title>#", $output, $matches);
				$title 		= (!empty($matches)) ? end($matches) : "";
				$output 	= substr($output, strpos($output, '<body'));

                // send the return values to the callback function
				if ($this->callback) {
					if (method_exists($this, $this->callback))
						$this->{$this->callback}($url, $output, $http_code, $title, $filesize);
					else throw new Exception('The callback method ' . $this->callback . ' doesnt exist.');
				}

				// check if we need to increase the window size
				// if the number of requests has increased
				if ($this->num_requests > $this->window_size &&
					$this->num_requests < $this->max_window_size) {
					$this->window_size = $this->num_requests;
				}

                // start a new request
                if ($i < $this->num_requests && isset($this->requests[$i])) {
                    $ch = curl_init();
                    $options = $this->get_options($this->requests[$i++]);
                    curl_setopt_array($ch,$options);
                    curl_multi_add_handle($master, $ch);
                }

                // remove the curl handle that just completed
                curl_multi_remove_handle($master, $done['handle']);

            }
        } while ($this->running);
        curl_multi_close($master);

		// clear all requests and reset counts
		$this->resetRequests();
        return true;
    }

	/**
	 * Clears all requests and resets counts.
	 */
    private function resetRequests()
	{
		$this->requests = array();
		$this->window_size = 1;
		$this->num_requests = 0;
	}

	/**
	 * Helper function to set extra options and
	 * header data for a given cURL request.
	 *
	 * @access	private
	 * @param	Request	$request
	 * @return	array
	 */
    private function get_options(RollingCurl_Request $request) {
        // options for this entire curl object
        $options = $this->__get('options');
        $headers = $this->__get('headers');

		// append custom options for this specific request
		if ($request->options) {
            $options = $this->__get('options') + $request->options;
        }

		// set the request URL
        $options[CURLOPT_URL] = $request->url;

        // posting data w/ this request?
        if ($request->post_data) {
            $options[CURLOPT_POST] = 1;
            $options[CURLOPT_POSTFIELDS] = $request->post_data;
        }
        if ($headers) {
            $options[CURLOPT_HEADER] = 0;
            $options[CURLOPT_HTTPHEADER] = $headers;
        }

        return $options;
    }

	/**
	 * Generate an unsigned 32 bit CRC.
	 *
	 * @access	protected
	 * @param	string		$s
	 */
	protected function _crc32($s) {
		$s = abs(crc32($s));
		if ($s & 0x80000000) {
			$s ^= 0xffffffff;
			$s += 1;
		}
		return $s;
	}

	/**
	 * Destructor.
	 */
    public function __destruct() {
        unset($this->window_size, $this->callback, $this->options, $this->headers, $this->requests);
	}
}

?>
Return current item: Caterpillar