<?php
/*
SEO_Assistant for OSC 2.2 MS2 v2.0 08.03.2004
Originally Created by: Jack York
Released under the GNU General Public License
osCommerce, Open Source E-Commerce Solutions
Copyright (c) 2004 osCommerce
*/
//http://www.google.com/search?hl=en&lr=&q=link%3Awww.open-sef.org
function getLinkPopularity($link_url) {
$host = "www.google.com";
$path = "/search?hl=en&lr=&q=link:" . $link_url;
$fp = fsockopen($host, "80", $errno, $errstr);
if (! $fp) {
echo "$errstr ($errno)<br />\n";
return false;
} else {
fputs($fp, "GET ".$path." HTTP/1.0\r\nHost: ".$host."\r\n\r\n");
while(!feof($fp)) {
$line = fgets($fp, 4096);
if (preg_match("/of about/", $line)) {
$total_sites = $line;
$total_sites = preg_replace("/^.*of about <b>/", "", $total_sites);
$total_sites = preg_replace("/<.*$/", "", $total_sites);
$total_sites = preg_replace("/\,/", "", $total_sites);
$total_sites = trim($total_sites);
return $total_sites;
}
}
}
}
//http://search.yahoo.com/search?p=link%3Ahttp%3A%2F%2Fwww.cre8asiteforums.com%2Findex.php&ei=UTF-8&fr=FP-tab-web-t&n=20&fl=0&x=wrt
function get_yahoo_links($domain) {
$lines = array();
$host = "search.yahoo.com";
$path = "search?p=link%3Ahttp%3A%2F%2F" . $domain;
$fp = fsockopen($host, "80");
if ($fp) {
fputs($fp, "GET ".$path." HTTP/1.0\r\nHost: ".$host."\r\n\r\n");
while(!feof($fp)) {
$line = fgets($fp, 4096);
if (preg_match("/^1 \- /", $line)) {
$total_sites = $line;
$total_sites = preg_replace("/^.*of /", "", $total_sites);
$total_sites = preg_replace("/ .*$/", "", $total_sites);
$total_sites = preg_replace("/\,/", "", $total_sites);
$total_sites = trim($total_sites);
return($total_sites);
}
}
} else {
echo "Can't connect to host... ";
}
}
function ListFiles()
{
$files = array();
$dir = opendir('.');
while(($file = readdir($dir)) !== false)
{
if($file !== '.' && $file !== '..' && !is_dir($file))
{
$files[] = $file;
}
}
closedir($dir);
sort($files);
return $files;
}
function checkLinks($url, $idx) {
global $badLinks, $totalLinks;
$file = @fopen($url,'r');
if (! $file) {
$badLinks[$idx] = $url;
// echo 'add bad link MAIN '. $url . ' at pos '. $idx . ' result = ' .$badLinks[$idx].'<br>';
$idx++;
$totalLinks++;
} else {
$totalLinks++;
while (!feof($file)) {
$page_line = trim(fgets($file, 4096));
if (eregi('http:', $page_line)) {
$link = stristr($page_line, 'http:');
if ($link !== FALSE) {
$pos = strpos($link, '"');
if ($pos !== FALSE)
$link = substr($link, 0, $pos);
$actual_link = @fopen($link,'r');
$totalLinks++;
if (! $actual_link) {
$badLinks[$idx] = $link;
// echo 'add bad link SUB '. $link . ' at pos '. $idx . ' result = ' .$badLinks[$idx].'<br>';
$idx++;
} else {
fclose($actual_link);
}
}
}
}
fclose($file);
}
}
/*
SEO_Assistant for OSC 2.2 MS2 v2.0 08.03.2004
Originally Created by: Jack York
Released under the GNU General Public License
osCommerce, Open Source E-Commerce Solutions
Copyright (c) 2004 osCommerce
*/
function linkcheck($path, $engine) {
global $results;
global $total;
if(!file_exists($path)) {
$data = strtolower(strip_tags(@implode("", @file($path))));
if (FALSE === strpos($engine, 'msn'))
$data = substr($data, strpos($data, "of about")+9, strlen($data));
else
$data = substr($data, strpos($data, "of")+3, strlen($data));
$data = substr($data, 0, strpos($data, " "));
if(eregi("[[:alpha:]]", $data)) {
$results[$engine] = array('0', $path);
} else {
$results[$engine] = array($data, $path);
$total+=str_replace(',', '', $data);
}
} else {
$results[$engine] = array('n/a', $path);
}
}
function get_link_popularity($link_url) {
global $results;
global $total;
if($link_url) {
// the results from Google and MSN can be extracted the same way so a function is used to simplify the code
linkcheck("http://www.google.com/search?hl=en&lr=&ie=UTF-8&q=site%3A".$link_url, 'google');
linkcheck("http://search.msn.com/results.aspx?FORM=MSNH&q=site%3A".$link_url, 'msn');
// check Yahoo!
$path ="http://search.yahoo.com/search?p=linkdomain%3A".$link_url."&ei=UTF-8&fr=fp-tab-web-t&cop=mss&tab=";
if(!file_exists($path)) {
$data = strtolower(@implode("", @file($path)));
$data = substr($data, strpos($data, "of about")+9, strlen($data));
$data = strip_tags(substr($data, 0, strpos($data, " ")));
if(eregi("[[:alpha:]]", $data)) {
$results['yahoo'] = array('0', $path);
} else {
$results['yahoo'] = array($data, $path);
$total+=str_replace(',', '', $data);
}
} else {
$results['yahoo'] = array('n/a', $path);
}
// check AlltheWeb
$path ="http://www.alltheweb.com/search?cat=web&cs=utf8&q=site%3A".$link_url."&rys=0&_sb_lang=pref";
if(!file_exists($path)) {
$data = strtolower(strip_tags(@implode("", @file($path))));
$data = substr($data, strpos($data, "1 -")+5, strlen($data));
$data = substr($data, 0, strpos($data, "results"));
$data = trim(substr($data, strpos($data, "of")+3, strlen($data)));
if(eregi("[[:alpha:]]", $data)) {
$results['alltheweb'] = array('0', $path);
} else {
$results['alltheweb'] = array($data, $path);
$total+=str_replace(',', '', $data);
}
} else {
$results['alltheweb'] = array('n/a', $path);
}
// check HotBot
$path = "http://www.hotbot.com/default.asp?query=".$link_url."&ps=&loc=searchbox&tab=web&provKey=Ask+Jeeves&prov=Ask+Jeeves";
//$path ="http://www.hotbot.com/default.asp?query=linkdomain%3A".$link_url."&ps=&loc=searchbox&tab=web&provKey=Inktomi";
if(!file_exists($path)) {
$data = strtolower(strip_tags(@implode("", @file($path))));
$data = substr($data, strpos($data, "results 1 - ")+11, strlen($data));
$data = substr($data, 0, strpos($data, ")"));
$data = trim(substr($data, strpos($data, "of")+3, strlen($data)));
if(eregi("[[:alpha:]]", $data)) {
$results['hotbot'] = array('0', $path);
} else {
$results['hotbot'] = array($data, $path);
$total+=str_replace(',', '', $data);
}
} else {
$results['hotbot'] = array('n/a', $path);
}
//http://www.altavista.com/web/results?q=linkdomain%3Awww.24-7mobileaccessories.co.uk/&kgs=1&kls=0&stq=10
//http://www.altavista.com/web/results?itag=wrx&pg=aq&aqmode=s&aqa=joe&aqp=&aqo=&aqn=&aqb=&kgs=0&kls=0&dt=tmperiod&d2=0&dfr%5Bd%5D=1&dfr%5Bm%5D=1&dfr%5By%5D=1980&dto%5Bd%5D=6&dto%5Bm%5D=9&dto%5By%5D=2004&filetype=&rc=dmn&swd=www.24-7mobileaccessories.co.uk&lh=&nbq=10
//http://www.altavista.com/web/results?itag=wrx&pg=aq&aqmode=s&aqa=joe&aqp=&aqo=&aqn=&aqb=&kgs=1&kls=0&dt=tmperiod&d2=0&dfr%5Bd%5D=1&dfr%5Bm%5D=1&dfr%5By%5D=1980&dto%5Bd%5D=6&dto%5Bm%5D=9&dto%5By%5D=2004&filetype=&rc=dmn&swd=www.24-7mobileaccessories.co.uk&lh=&nbq=10
//http://www.altavista.com/web/results?q=linkdomain%3Awww.mycandysupplier.com&kgs=1&kls=0&stq=10
// check AltaVista
$path ="http://www.altavista.com/web/results?q=site%3A".$link_url."&kgs=0&kls=0&stq=10";
if(!file_exists($path)) {
$data = strtolower(strip_tags(@implode("", @file($path))));
$data = substr($data, strpos($data, "altavista found")+15, strlen($data));
$data = trim(substr($data, 0, strpos($data, "results"))); //echo "$data<br>"; // TEST
if(eregi("[[:alpha:]]", $data)) {
$results['altavista'] = array('0', $path);
} else {
$results['altavista'] = array($data, $path);
$total+=str_replace(',', '', $data);
}
} else {
$results['altavista'] = array('n/a', $path);
}
// check for listing in DMOZ
$path ="http://search.dmoz.org/cgi-bin/search?search=".str_replace("www.", "", $link_url);
if(!file_exists($path)) {
$data = strip_tags(@implode("", @file($path)));
if(strpos($data, "No Open Directory Project results found")) {
$results['dmoz'] = array('No', $path);
} else {
$results['dmoz'] = array('Yes', $path);
}
} else {
$results['dmoz'] = array('n/a', $path);
}
// check for listing in Zeal
$path ="http://www.zeal.com/search/results.jhtml?keyword=".$link_url."&scope=directory";
if(!file_exists($path)) {
$data = @implode("", @file($path)); //echo $data; // TEST
if(strpos($data, "found no results")) {
$results['zeal'] = array('No', $path);
} else {
$results['zeal'] = array('Yes', $path);
}
} else {
$results['zeal'] = array('n/a', $path);
}
// get Alexa Traffic Rank
$path ="http://www.alexa.com/data/details/main?q=&url=http://".$link_url;
//old
if(!file_exists($path)) {
$data = strtolower(strip_tags(@implode("", @file($path))));
$data = substr($data, strpos($data, "traffic rank for ")+17, strlen($data));
$data = str_replace(str_replace('www.', '', $link_url), '', $data);
$data = str_replace(': ', '', $data);
$data = trim(substr($data, 0, strpos(trim($data), ' ')-1)); //echo "$data<br>"; // TEST
if(eregi("[[:alpha:]]", $data)) {
$results['alexa'] = array('0', $path);
} else {
$results['alexa'] = array($data, $path);
}
} else {
$results['alexa'] = array('n/a', $path);
}
return $results;
}
return false;
}
function seo_google_position($searchquery,$searchurl,$searchtotal=10,$showlinks=1) {
$result_google = Array();
if( $searchquery && $searchurl ) {
// Prepare Query
$query = str_replace(" ","+",$searchquery);
$query = str_replace("%26","&",$query);
// Prepare Query
$searchurl = strtolower($searchurl);
// The number of hits per page.
$siteName = 'Google';
$hits_per_page = 10;
$position = 0;
$position_match = 0;
$page_start = 0;
$page_total = floor($searchtotal / $hits_per_page);
for($page_start = 0; $page_start < $searchtotal; $page_start += $hits_per_page) {
$num = $hits_per_page;
if( ($page_start+$hits_per_page) > $searchtotal )
$num -= ($page_start+$hits_per_page) - $searchtotal;
// Search Query
$filename = "http://www.google.com/search?as_q=$query".
"&num={$num}&hl=en&ie=UTF-8&btnG=Google+Search".
"&as_epq=&as_oq=&as_eq=&lr=&as_ft=i&as_filetype=".
"&as_qdr=all&as_nlo=&as_nhi=&as_occt=any&as_dt=i".
"&as_sitesearch=&safe=images&start=$page_start";
$result_google[] = "Checking $siteName positions $page_start .. ".($page_start+$num).' <a href="'.$filename.'" target="_blank">View</a>';
// Set Timeout and Make Request
$timeout = 3;
$old = ini_set('default_socket_timeout', $timeout);
$file = fopen($filename, "r");
ini_set('default_socket_timeout', $old);
if( $file ){
stream_set_timeout($file, $timeout);
stream_set_blocking($file, 0);
}
if (!$file) {
$result_google[] = "<b>Unable to open remote file $filename.</b>";
} else {
// Now load the file into a variable line at a time
$pageData = '';
while (!feof($file))
$pageData .= fgets($file, 1024);
// <a href="{site_url}" class=l onmousedown="return clk(this.href,'','','res','2','')">
if(preg_match_all('/\<a href=\"(.*?)\" class=l.*?\>(.*?)\<\/a\>/',$pageData,$matches, PREG_SET_ORDER)) {
foreach( $matches AS $match ){
$position++;
$foundUrl = strtolower(preg_replace('/\"/','',$match[1]));
$foundTitle = $match[2];
if(preg_match('/'.addslashes($searchurl).'/',$foundUrl)){
$position_match = $position;
$result_google[] = $position.') <a href="'.$foundUrl.'" target="_blank">'.$foundTitle.'</a> - Match Found';
break;
} else if($showlinks)
$result_google[] = $position.') <a href="'.$foundUrl.'" target="_blank">'.$foundTitle.'</a>';
}
}
fclose($file);
}
}
if($position_match) {
$result_google[] = "The site $searchurl is at position $position_match for the term <b>$searchquery</b> on $siteName";
} else {
$result_google[] = "The site $searchurl is not in the top $position for the term <b>$searchquery</b> on $siteName";
}
} else
$result_google[] = "The Search Criteria is Invalid";
return $result_google;
}
function seo_yahoo_position($searchquery,$searchurl,$searchtotal=10,$showlinks=1) {
$result_yahoo = Array();
if( $searchquery && $searchurl ) {
// Prepare Query
$query = str_replace(" ","+",$searchquery);
$query = str_replace("%26","&",$query);
// Prepare Query
$searchurl = strtolower($searchurl);
// The number of hits per page.
$siteName = 'Yahoo';
$hits_per_page = 10;
$position = 0;
$position_match = 0;
$page_start = 0;
$page_total = floor($searchtotal / $hits_per_page);
for($page_start = 0; $page_start < $searchtotal; $page_start += $hits_per_page) {
$num = $hits_per_page;
/* Yahoo Requires Sets of 10
if( ($page_start+$hits_per_page) > $searchtotal )
$num -= ($page_start+$hits_per_page) - $searchtotal;
*/
// Search Query
$filename = "http://search.yahoo.com/search?p=$query&n={$num}&ei=UTF-8&va_vt=any&vo_vt=any"
. "&ve_vt=any&vp_vt=any&vd=all&vst=0&vf=all&vm=p&fl=0&pstart=5&b=".($page_start+1);
$result_yahoo[] = "Checking $siteName positions $page_start .. ".($page_start+$num).' <a href="'.$filename.'" target="_blank">View</a>';
// Set Timeout and Make Request
$timeout = 3;
$old = ini_set('default_socket_timeout', $timeout);
$file = fopen($filename, "r");
ini_set('default_socket_timeout', $old);
if( $file ){
stream_set_timeout($file, $timeout);
stream_set_blocking($file, 0);
}
if (!$file) {
$result_yahoo[] = "<b>Unable to open remote file $filename.</b>";
} else {
// Now load the file into a variable line at a time
$pageData = '';
while (!feof($file))
$pageData .= fgets($file, 1024);
fclose($file);
// <a href="{site_url}" class=l onmousedown="return clk(this.href,'','','res','2','')">
if(preg_match_all('/\<a class=\"yschttl\" href=\"(.*?)\".*?\>(.*?)\<\/a\>/',$pageData,$matches, PREG_SET_ORDER)) {
foreach( $matches AS $match ){
$position++;
$foundUrl = strtolower(preg_replace('/^.?\*\*/','',$match[1]));
$foundUrl = html_entity_decode($foundUrl);
$foundTitle = $match[2];
if(preg_match('/'.addslashes($searchurl).'/',$foundUrl)){
$position_match = $position;
$result_yahoo[] = $position.') <a href="'.$foundUrl.'" target="_blank">'.$foundTitle.'</a> - Match Found';
break;
} else if($showlinks)
$result_yahoo[] = $position.') <a href="'.$foundUrl.'" target="_blank">'.$foundTitle.'</a>';
}
}
}
}
if($position_match) {
$result_yahoo[] = "The site $searchurl is at position $position_match for the term <b>$searchquery</b> on $siteName";
} else {
$result_yahoo[] = "The site $searchurl is not in the top $position for the term <b>$searchquery</b> on $siteName";
}
} else
$result_yahoo[] = "The Search Criteria is Invalid";
return $result_yahoo;
}
function kda($url, &$total, $use_meta_tags, $use_partial_total) {
$text="";
$ws = array();
$words = array();
$occurances = array();
$meta_tags = array();
if(!stristr($url, 'http://')) {
$url = 'http://'.$url;
}
if($html = @file_get_contents($url)) {
$html = html_entity_decode(file_get_contents($url));
//preg_match('/(?<=<title>).*?(?=<\\/title>)/is', $html, $matches);
//$title = array_shift($matches);
$meta_tags = ($use_meta_tags) ? get_meta_tags($url) : '';
$html = kda_strip_tag_script($html);
$no_html = strip_tags($html);
@$tag_info = $meta_tags['description']." ".$meta_tags['keywords'];
$text .= $tag_info." ".$no_html;
$total = count(explode(' ', $text));
$text = kda_clean(kda_stopWords($text));
$words = explode(' ', $text);
$total = count($words);
for($x=0; $x<$total; $x++) {
$words[$x] = trim($words[$x]);
if($words[$x]!='') {
@$ws[$words[$x]]++;
if(trim(@$words[$x+1])!='') {
$phrase2 = $words[$x]." ".trim($words[$x+1]);
@$ws[$phrase2]++;
if(trim(@$words[$x+2])!='') {
$phrase3 = $words[$x]." ".trim($words[$x+1])." ".trim($words[$x+2]);
@$ws[$phrase3]++;
}
}
}
}
foreach($ws as $word=>$count) {
if( ($count>1) and (strlen($word)>2) ) {
$phrase_size = count(explode(' ', $word));
@$occurances[$phrase_size] = @$occurances[$phrase_size] + $count;
}
}
foreach($ws as $word=>$count) {
if( ($count>1) and (strlen($word)>2) ) {
$phrase_size = count(explode(' ', $word));
$ttlWords = ($use_partial_total) ? $occurances[$phrase_size] : $total;
$density = round(($count/$ttlWords)*100, 2);
$dens[$phrase_size][$word] = $density;
$dens[$word] = $count;
}
}
arsort($dens[1]);
if($dens[2]) {
arsort($dens[2]);
}
if($dens[3]) {
arsort($dens[3]);
}
return $dens ;
}else {
return false;
}
}
function kda_strip_tag_script($html) {
$pos1 = false;
$pos2 = false;
do {
if ($pos1 !== false && $pos2 !== false) {
$first = NULL;
$second = NULL;
if ($pos1 > 0)
$first = substr($html, 0, $pos1);
if ($pos2 < strlen($html) - 1)
$second = substr($html, $pos2);
$html = $first . $second;
}
preg_match("/<script[^>]*>/i", $html, $matches);
$str1 =& $matches[0];
preg_match("/<\/script>/i", $html, $matches);
$str2 =& $matches[0];
$pos1 = strpos($html, $str1);
$pos2 = strpos($html, $str2);
if ($pos2 !== false)
$pos2 += strlen($str2);
} while ($pos1 !== false && $pos2 !== false);
return $html;
}
function kda_clean($text) {
global $stopwords_file;
$text = str_replace('.', '', $text);
$text = str_replace(',', '', $text);
$text = str_replace('(', '', $text);
$text = str_replace(')', '', $text);
$text = str_replace('_', '', $text);
$text = str_replace('*', '', $text);
$text = str_replace('"', '', $text);
$text = str_replace('-', '', $text);
$text = str_replace("!", '', $text);
$text = str_replace("?", '', $text);
$text = str_replace("\n", '', $text);
$text = str_replace('/', '', $text);
$text = str_replace('|', '', $text);
$text = str_replace('’', "'", $text);
return trim(strtolower($text));
}
function kda_stopWords($term) {
global $mosConfig_absolute_path;
global $sw_count, $sefConfig;
$clean_term = "";
//load list of common words
$common = file($mosConfig_absolute_path.'/administrator/components/com_sef/includes/seo_words/seo_words_'.$sefConfig->backend_language.'.txt');
$total = count($common);
for ($x=0; $x < $total; $x++) {
$common[$x] = trim(strtolower($common[$x]));
}
//make array of search terms
$_terms = explode(" ", $term);
foreach ($_terms as $line) {
if (in_array(strtolower(trim($line)), $common)) {
$removeKey = array_search($line, $_terms);
$sw_count++;
unset($_terms[$removeKey]);
} else {
$clean_term .= " ".$line;
}
}
return $clean_term;
}