<?php
/*
php httpRetriever for PubMed, jStor, Amazon and Google
Copyright (C) 2010 Pierre-Luc Germain (plger)
hide@address.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License (version 3) as
published by the Free Software Foundation.
Excluded from the restrictions of this license is non-original code
that is already published under another license (BSD, for instance).
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
See http://www.gnu.org/licenses/ for the complete license.
*/
function createHttpRequest($type=false) {
$retriever = false;
switch($type){
case 'amazon':
require_once('class.amazonRetriever.php');
$retriever = new amazonRetriever();
break;
case 'jstor':
require_once('class.jstorRetriever.php');
$retriever = new jstorRetriever();
break;
case 'pubmed':
require_once('class.pubmedRetriever.php');
$retriever = new pubmedRetriever();
break;
case 'google':
require_once('JSON.phps');
require_once('class.googleRetriever.php');
$retriever = new googleRetriever();
break;
case 'geocode':
require_once('JSON.phps');
require_once('class.geocodeRetriever.php');
$retriever = new geocodeRetriever();
break;
default:
$retriever = new httpRetriever();
break;
}
return $retriever;
}
class httpRetriever {
private $debug_log = array();
private $has_errors = false;
public $preferential_get_method = 'curl';
public $referer = false;
public $resultCount = false;
public $params = array();
public $allowed_params = false;
public function help(){
echo '<pre>Sample usage:
$retriever = createHttpRequest();
$pageContent = $retriever->fetch_results($url);
For platform-specific retrievers, you can also use the standardized functions:
$retriever->setQueryParam($value);
$results = $retriever->fetch_results();
$results = $retriever->standardize($results);
</pre>';
}
public function get_result_count(){
return $this->resultCount;
}
public function fetch_results($url){
return $this->retrieve_page($url);
}
public function getAllowedParams($print = true){
if($print) echo 'Allowed params: '.implode(', ',$this->allowed_params);
return $this->allowed_params;
}
public function setParams($params){
foreach($params as $key=>$value) $this->setParam($key, $value);
}
public function setParam($param, $value){
if(!$this->allowed_params || in_array($param, $this->allowed_params)){
$this->params[$param] = $value;
return true;
}else{
$this->doDebug(false,'Invalid parameter.', 'Parameter "'.$param.'" is not registered', __FILE__,__LINE__);
return false;
}
}
public function getParams($print = true){
if($print){
echo '<pre>Params: ';
print_r($this->params);
echo '</pre>';
}
return $this->params;
}
public function doDebug($error, $title, $details=false, $file=false, $line=false){
array_push($this->debug_log, array($error, $title, $details, $file?basename($file):false, $line));
if($error) $this->has_errors = true;
}
public function hasErrors(){
return $this->has_errors;
}
public function getLogArray(){
return $this->debug_log;
}
public function getLog(){
$output = '';
foreach($this->debug_log as $entry){
$output .= '<div '.($entry[0]?'style="color: red;"':'').'>';
if($entry[3]) $output .= '<span style="float: right;">'.$entry[3].($entry[4]?', line '.$entry[4]:'').'</span>';
$output .= '<b>'.$entry[1].'</b>';
if($entry[2]) $output .= '<br/>'.$entry[2];
$output .= '</div>
';
}
return $output;
}
public function get_url_from_params($base, $params){
$url = '';
foreach($params as $key=>$value){
$url .= ($url==''?'':'&').$key.'='.urlencode(urldecode($value));
}
return $base.url;
}
private function retrieve_page_curl($url){
$curl = curl_init();
curl_setopt($ch, CURLOPT_HEADER, 0);
if($this->referer) curl_setopt ($curl, CURLOPT_REFERER, $this->referer);
curl_setopt ($curl, CURLOPT_URL, $url);
curl_setopt ($curl, CURLOPT_TIMEOUT, 15);
curl_setopt ($curl, CURLOPT_CONNECTTIMEOUT, 10);
curl_setopt ($curl, CURLOPT_FOLLOWLOCATION, 1);
curl_setopt ($curl, CURLOPT_FORBID_REUSE, 1);
curl_setopt ($curl, CURLOPT_RETURNTRANSFER, 1);
$html = curl_exec ($curl);
curl_close ($curl);
if(!$html) $this->doDebug(true,'Could not fetch page.', 'Could not fetch page ('.$url.') using cURL', __FILE__,__LINE__);
return $html;
}
public function retrieve_page_get($url){
$res = file_get_contents($url);
if(!$res) $this->doDebug(true,'Could not fetch page.', 'Could not fetch page ('.$url.') using file_get_contents', __FILE__,__LINE__);
return $res;
}
public function retrieve_page($url){
if($this->preferential_get_method == 'get'){
$res = $this->retrieve_page_get($url);
if($res) return $res;
}
if(function_exists('curl_init')){
$res = $this->retrieve_page_curl($url);
if($res) return $res;
}else{
$this->doDebug(true,'Could not fetch page.', 'Could not fetch page ('.$url.'). cURL functions not found', __FILE__,__LINE__);
if(!isset($get)) return $this->retrieve_page_get($url);
}
return false;
}
public function manage_return($itemlist, $return_type){
if($return_type == 'xml'){
header('Content-Type: application/xml');
echo '<root>';
foreach($itemlist as $item){
echo '
<item>';
foreach($item as $field=>$value) echo '<field name="'.$field.'"><![CDATA['.$value.']]></field>
';
echo '
</item>';
}
echo '
</root>';
}elseif($return_type == 'display'){
if( $debug ){
echo '<pre>';
print_r($itemlist);
echo '</pre>';
}
}else{
return $itemlist;
}
}
public function setQueryParam($value){
return false;
}
public function standardize($array){
return $array();
}
}