<?php
/**
* $Id: Query.php 1631 2007-05-12 22:40:28Z matthieu $
*/
if (!class_exists('Google_Query')) {
if (!defined('__CLASS_PATH__')) {
define('__CLASS_PATH__', realpath(dirname(__FILE__) . '/../'));
}
require_once __CLASS_PATH__ . '/Cache/class_cache.php';
/**
* tools for making easy query on google search browser
* @author Matthieu MARY <hide@address.com>
* @license http://opensource.org/licenses/gpl-license.php GNU Public License
* @package google
*/
class Google_Query {
/**
* @var string $_lang lang type
* @access private
*/
private $_lang = '';
/**
* @var int $_resultsInPage number of results in one page
* @access private
*/
private $_resultsInPage = 10;
/**
* @var string $_site if specified, google results are search only in
* this site
* @access private
*/
private $_site = '';
/**
* @var string $_fileTypes parameters
* @access private
*/
private $_fileType = '';
/**
* @var string $_position search words in title, body, links, etc...
* @access private
*/
private $_position = 'any';
/**
* @var int $_period search specified periods (in months)
* @access private
*/
private $_period = 'all';
/**
* @var array $_noWords search without theses words
* @access private
*/
private $_noWords = array ();
/**
* @var string $_sentence search with exactly theses sentence
* @access private
*/
private $_sentence = '';
/**
* @var array $_oneWords search with at least one of these words
* @access private
*/
private $_oneWords = array ();
/**
* @var string the link to query build with the parameters
* @access private
*/
private $_link = '';
/**
* @var array $_keywords search with all the words in this array
* @access private
*/
private $_keywords = array ();
/**
* @var bool does we must use cache?
* @access private
*/
private $_bEnableCaching = false;
/**
* @var mixed $_oCache the cache object
* @access private
*/
private $_oCache = false;
/**
* @var string $_detectionPattern : the detection pattern of results in
* content
*/
private static $_detectionPattern = "<b>([0-9, ]+)<\/b> - <b>([0-9, ]+)<\/b>[^<]+<b>([0-9, ]+)<\/b>";
/**
* builder
* @param bool bCache : does you wants to enable cache?
* @return void
* @access public
*/
public function __construct($cache = null) {
$this->_bEnableCaching = ($cache instanceof Cache_Lite);
if ($this->_bEnableCaching) {
$this->_oCache = $cache;
}
}
/**
* update cache values
* @param int cacheDuration : the cache validity period (in seconds)
* @param string cacheFolder : the cache folder
* @return void
* @access public
*/
public function updateCache($cacheDuration = 86400, $cacheFolder = './cache/class_google') {
$this->_oCache->updateCache($cacheDuration, $cacheFolder);
}
/**
* set the lang search
* set the lang search; for exemple, asking in french pages, you must put fr in parameters
* @param string lang : the lang type (ex, fr, ru, etc...)
* @return void
* @access public
*/
public function setLang($lang = 'all') {
$this->_lang = (($lang != 'all') ? 'lang_' . strtolower($lang) : '');
}
/**
* set the number of results search per page
* @param int resultsInPage : number of result per page
* @return void
* @access public
*/
public function setNumInPage($resultsInPage) {
$this->_resultsInPage = intval($resultsInPage);
}
/**
* set the "search only in this site" value
* @param string site : the url of the site where where search the results
* @return void
* @access public
*/
public function setSite($site) {
$this->_site = $site;
}
/**
* set the "file parameters" value
* @param string fileType : fileType where search the results (valids values are pdf, ps, xls, doc, ppt, rtf)
* @return void
* @access public
*/
public function setFiletype($fileType) {
$this->_fileType = $fileType;
}
/**
* set the "file parameters" value
* @param string position : position where search the values (valids values are title, body, url, links)
* @return void
* @access public
**/
public function setPosition($position) {
$this->_position = $position;
}
/**
* set the Period where url where updated
* @param int period : duration in month, where were search the modified urls; max 12
* @return void
* @access public
**/
public function setPeriod($period) {
if ($period >= 12) {
$this->_period = 'y';
}
else {
$this->_period = 'm' . $period;
}
}
/**
* set the list of words which must be exclude from the results
* @param array noWords : array of Words which must be exclude from search
* @return void
* @access public
*/
public function setExclude($noWords = array ()) {
$this->_noWords = $noWords;
}
/**
* set a sentence which must be search
* @param string sentence : exact sentence which must be search
* @return void
* @access public
*/
public function setSentence($sentence) {
$this->_sentence = $sentence;
}
/**
* set a list of words which can be use
*
* @param array words : array of Words which must at least existing in search results
* @return void
* @access public
**/
public function setWords($words = array ()) {
$this->_oneWords = $words;
}
/**
* set a list of words which must be search
*
* @param array words : array of Words which must existing in results (not in order)
* @return void
* @access public
*/
public function setKeywords($words = array ()) {
if (is_array($words)) {
$this->_keywords = $words;
}
}
/**
* add a keyword
* @param string $keyword, the keyword to set
* @access public
* @return void
*/
public function addKeywords($keywords) {
$this->_keywords[] = $keywords;
}
/**
* make the url sentence
* @return void
* @access private
*/
private function _setLink($separator = '&') {
$this->_link = 'http://www.google.com/search?as_q=' . urlencode(implode(" ", $this->_keywords)) . '&num=' . $this->_resultsInPage . (($this->_lang != 'all') ? '&hl=' . substr($this->_lang, 5) : '') . '&ie=UTF-8&oe=UTF-8&btnG=Search&as_epq=' . urlencode($this->_sentence) . '&as_oq=' . urlencode(implode(" ", $this->_oneWords)) . '&as_eq=' . urlencode(implode(" ", $this->_noWords)) . '&lr=' . $this->_lang . '&as_ft=i&as_filetype=' . $this->_fileType . '&as_qdr=' . $this->_period . '&as_occt=' . $this->_position . '&as_dt=i&as_sitesearch=' . $this->_site;
}
/**
* make the url and return the url
*
* @return string
* @access public
**/
public function getLink($separator = '&') {
$this->_setlink($separator);
return $this->_link;
}
/**
* get the google search content
*
* @return string
* @access public
**/
function getContents() {
return file_get_contents($this->getLink('&'));
}
/**
* @access protected
* @return void
* @param string $pattern : the new detection pattern
*/
protected function _updatePattern($pattern) {
$ok = false;
if (is_string($pattern) && !empty ($pattern)) {
self ::$_detectionPattern = $pattern;
$ok = true;
}
return $ok;
}
/**
* get the number of results found
* @return int
* @access public
*/
public function getResults() {
$results = 0;
$this->_setLink();
// does same values in cache?
if (($this->_bEnableCaching) && $this->_oCache->inCache($this->_link)) {
$results = $this->_oCache->readCache();
}
// query isn't exists in cache
// use google asking
else {
$contents = $this->getContents();
if ($contents != '') {
$matches = array ();
$contents = str_replace(" ", " ", $contents);
if (preg_match("/".self ::$_detectionPattern."/", $contents, $matches)) {
$results = str_replace(array (
',',
'.',
' '
), '', $matches[3]);
}
}
}
if ($this->_bEnableCaching) {
$this->_oCache->saveInCache($results, $this->_link);
}
return $results;
}
}
}