Location: PHPKode > projects > Joomla SEF / SEO - extending OpenSEF > includes/sef.seo.php
<?php
/*
SEO_Assistant for OSC 2.2 MS2 v2.0  08.03.2004
Originally Created by: Jack York
Released under the GNU General Public License
osCommerce, Open Source E-Commerce Solutions
Copyright (c) 2004 osCommerce
*/

//http://www.google.com/search?hl=en&lr=&q=link%3Awww.open-sef.org
function getLinkPopularity($link_url) {
  $host = "www.google.com";
  $path = "/search?hl=en&lr=&q=link:" . $link_url;
  $fp = fsockopen($host, "80", $errno, $errstr);
  if (! $fp) {
    echo "$errstr ($errno)<br />\n";
    return false;
  } else {
    fputs($fp, "GET ".$path." HTTP/1.0\r\nHost: ".$host."\r\n\r\n");
    while(!feof($fp)) {
      $line = fgets($fp, 4096);
      if (preg_match("/of about/", $line)) {
        $total_sites = $line;
        $total_sites = preg_replace("/^.*of about <b>/", "", $total_sites);
        $total_sites = preg_replace("/<.*$/", "", $total_sites);
        $total_sites = preg_replace("/\,/", "", $total_sites);
        $total_sites = trim($total_sites);
        return $total_sites;
      }
    }
  }
}

//http://search.yahoo.com/search?p=link%3Ahttp%3A%2F%2Fwww.cre8asiteforums.com%2Findex.php&ei=UTF-8&fr=FP-tab-web-t&n=20&fl=0&x=wrt
function get_yahoo_links($domain) {
  $lines = array();
  $host = "search.yahoo.com";
  $path = "search?p=link%3Ahttp%3A%2F%2F" . $domain;
  $fp = fsockopen($host, "80");
  if ($fp) {
    fputs($fp, "GET ".$path." HTTP/1.0\r\nHost: ".$host."\r\n\r\n");
    while(!feof($fp)) {
      $line = fgets($fp, 4096);
      if (preg_match("/^1 \- /", $line)) {
        $total_sites = $line;
        $total_sites = preg_replace("/^.*of /", "", $total_sites);
        $total_sites = preg_replace("/ .*$/", "", $total_sites);
        $total_sites = preg_replace("/\,/", "", $total_sites);
        $total_sites = trim($total_sites);
        return($total_sites);
      }
    }
  } else {
    echo "Can't connect to host... ";
  }
}

function ListFiles()
{
  $files = array();
  $dir = opendir('.');
  while(($file = readdir($dir)) !== false)
  {
    if($file !== '.' && $file !== '..' && !is_dir($file))
    {
      $files[] = $file;
    }
  }
  closedir($dir);
  sort($files);
  return $files;
}

function checkLinks($url, $idx) {
  global $badLinks, $totalLinks;

  $file = @fopen($url,'r');
  if (! $file) {
    $badLinks[$idx] = $url;
    // echo 'add bad link MAIN '. $url . ' at pos '. $idx . ' result = ' .$badLinks[$idx].'<br>';
    $idx++;
    $totalLinks++;
  } else {
    $totalLinks++;
    while (!feof($file)) {
      $page_line = trim(fgets($file, 4096));
      if (eregi('http:', $page_line)) {
        $link = stristr($page_line, 'http:');
        if ($link !== FALSE) {
          $pos = strpos($link, '"');
          if ($pos !== FALSE)
            $link = substr($link, 0, $pos);
          $actual_link = @fopen($link,'r');
          $totalLinks++;
          if (! $actual_link) {
            $badLinks[$idx] = $link;
            // echo 'add bad link SUB '. $link . ' at pos '. $idx . ' result = ' .$badLinks[$idx].'<br>';
            $idx++;
          } else {
            fclose($actual_link);
          }
        }
      }
    }
    fclose($file);
  }
}
/*
SEO_Assistant for OSC 2.2 MS2 v2.0  08.03.2004
Originally Created by: Jack York
Released under the GNU General Public License
osCommerce, Open Source E-Commerce Solutions
Copyright (c) 2004 osCommerce
*/
function linkcheck($path, $engine) {
  global $results;
  global $total;

  if(!file_exists($path)) {
    $data = strtolower(strip_tags(@implode("", @file($path))));
    if (FALSE === strpos($engine, 'msn'))
    $data = substr($data, strpos($data, "of about")+9, strlen($data));
    else
    $data = substr($data, strpos($data, "of")+3, strlen($data));
    $data = substr($data, 0, strpos($data, " "));

    if(eregi("[[:alpha:]]", $data)) {
      $results[$engine] = array('0', $path);
    } else {
      $results[$engine] = array($data, $path);
      $total+=str_replace(',', '', $data);
    }
  } else {
    $results[$engine] = array('n/a', $path);
  }
}

function get_link_popularity($link_url) {
  global $results;
  global $total;


  if($link_url) {


    // the results from Google and MSN can be extracted the same way so a function is used to simplify the code
    linkcheck("http://www.google.com/search?hl=en&lr=&ie=UTF-8&q=site%3A".$link_url, 'google');
    linkcheck("http://search.msn.com/results.aspx?FORM=MSNH&q=site%3A".$link_url, 'msn');

    // check Yahoo!
    $path ="http://search.yahoo.com/search?p=linkdomain%3A".$link_url."&ei=UTF-8&fr=fp-tab-web-t&cop=mss&tab=";
    if(!file_exists($path)) {
      $data = strtolower(@implode("", @file($path)));
      $data = substr($data, strpos($data, "of about")+9, strlen($data));
      $data = strip_tags(substr($data, 0, strpos($data, " ")));
      if(eregi("[[:alpha:]]", $data)) {
        $results['yahoo'] = array('0', $path);
      } else {
        $results['yahoo'] = array($data, $path);
        $total+=str_replace(',', '', $data);
      }
    } else {
      $results['yahoo'] = array('n/a', $path);
    }


    // check AlltheWeb
    $path ="http://www.alltheweb.com/search?cat=web&cs=utf8&q=site%3A".$link_url."&rys=0&_sb_lang=pref";
    if(!file_exists($path)) {
      $data = strtolower(strip_tags(@implode("", @file($path))));
      $data = substr($data, strpos($data, "1 -")+5, strlen($data));
      $data = substr($data, 0, strpos($data, "results"));
      $data = trim(substr($data, strpos($data, "of")+3, strlen($data)));
      if(eregi("[[:alpha:]]", $data)) {
        $results['alltheweb'] = array('0', $path);
      } else {
        $results['alltheweb'] = array($data, $path);
        $total+=str_replace(',', '', $data);
      }
    } else {
      $results['alltheweb'] = array('n/a', $path);
    }

    // check HotBot
    $path = "http://www.hotbot.com/default.asp?query=".$link_url."&ps=&loc=searchbox&tab=web&provKey=Ask+Jeeves&prov=Ask+Jeeves";
    //$path ="http://www.hotbot.com/default.asp?query=linkdomain%3A".$link_url."&ps=&loc=searchbox&tab=web&provKey=Inktomi";
    if(!file_exists($path)) {
      $data = strtolower(strip_tags(@implode("", @file($path))));
      $data = substr($data, strpos($data, "results 1 - ")+11, strlen($data));
      $data = substr($data, 0, strpos($data, ")"));
      $data = trim(substr($data, strpos($data, "of")+3, strlen($data)));
      if(eregi("[[:alpha:]]", $data)) {
        $results['hotbot'] = array('0', $path);
      } else {
        $results['hotbot'] = array($data, $path);
        $total+=str_replace(',', '', $data);
      }
    } else {
      $results['hotbot'] = array('n/a', $path);
    }
    //http://www.altavista.com/web/results?q=linkdomain%3Awww.24-7mobileaccessories.co.uk/&kgs=1&kls=0&stq=10
    //http://www.altavista.com/web/results?itag=wrx&pg=aq&aqmode=s&aqa=joe&aqp=&aqo=&aqn=&aqb=&kgs=0&kls=0&dt=tmperiod&d2=0&dfr%5Bd%5D=1&dfr%5Bm%5D=1&dfr%5By%5D=1980&dto%5Bd%5D=6&dto%5Bm%5D=9&dto%5By%5D=2004&filetype=&rc=dmn&swd=www.24-7mobileaccessories.co.uk&lh=&nbq=10
    //http://www.altavista.com/web/results?itag=wrx&pg=aq&aqmode=s&aqa=joe&aqp=&aqo=&aqn=&aqb=&kgs=1&kls=0&dt=tmperiod&d2=0&dfr%5Bd%5D=1&dfr%5Bm%5D=1&dfr%5By%5D=1980&dto%5Bd%5D=6&dto%5Bm%5D=9&dto%5By%5D=2004&filetype=&rc=dmn&swd=www.24-7mobileaccessories.co.uk&lh=&nbq=10
    //http://www.altavista.com/web/results?q=linkdomain%3Awww.mycandysupplier.com&kgs=1&kls=0&stq=10
    // check AltaVista
    $path ="http://www.altavista.com/web/results?q=site%3A".$link_url."&kgs=0&kls=0&stq=10";

    if(!file_exists($path)) {
      $data = strtolower(strip_tags(@implode("", @file($path))));
      $data = substr($data, strpos($data, "altavista found")+15, strlen($data));
      $data = trim(substr($data, 0, strpos($data, "results"))); //echo "$data<br>"; // TEST
      if(eregi("[[:alpha:]]", $data)) {
        $results['altavista'] = array('0', $path);
      } else {
        $results['altavista'] = array($data, $path);
        $total+=str_replace(',', '', $data);
      }
    } else {
      $results['altavista'] = array('n/a', $path);
    }

    // check for listing in DMOZ
    $path ="http://search.dmoz.org/cgi-bin/search?search=".str_replace("www.", "", $link_url);
    if(!file_exists($path)) {
      $data = strip_tags(@implode("", @file($path)));
      if(strpos($data, "No Open Directory Project results found")) {
        $results['dmoz'] = array('No', $path);
      } else {
        $results['dmoz'] = array('Yes', $path);
      }
    } else {
      $results['dmoz'] = array('n/a', $path);
    }

    // check for listing in Zeal
    $path ="http://www.zeal.com/search/results.jhtml?keyword=".$link_url."&scope=directory";
    if(!file_exists($path)) {
      $data = @implode("", @file($path)); //echo $data; // TEST
      if(strpos($data, "found no results")) {
        $results['zeal'] = array('No', $path);
      } else {
        $results['zeal'] = array('Yes', $path);
      }
    } else {
      $results['zeal'] = array('n/a', $path);
    }

    // get Alexa Traffic Rank
    $path ="http://www.alexa.com/data/details/main?q=&url=http://".$link_url;
    //old
    if(!file_exists($path)) {
      $data = strtolower(strip_tags(@implode("", @file($path))));
      $data = substr($data, strpos($data, "traffic rank for ")+17, strlen($data));
      $data = str_replace(str_replace('www.', '', $link_url), '', $data);
      $data = str_replace(':&nbsp;', '', $data);
      $data = trim(substr($data, 0, strpos(trim($data), ' ')-1)); //echo "$data<br>"; // TEST
      if(eregi("[[:alpha:]]", $data)) {
        $results['alexa'] = array('0', $path);
      } else {
        $results['alexa'] = array($data, $path);
      }
    } else {
      $results['alexa'] = array('n/a', $path);
    }
    return $results;
  }
  return false;
}

function seo_google_position($searchquery,$searchurl,$searchtotal=10,$showlinks=1) {

  $result_google = Array();

  if( $searchquery && $searchurl ) {

    // Prepare Query
    $query = str_replace(" ","+",$searchquery);
    $query = str_replace("%26","&",$query);

    // Prepare Query
    $searchurl = strtolower($searchurl);

    // The number of hits per page.
    $siteName       = 'Google';
    $hits_per_page  = 10;
    $position       = 0;
    $position_match = 0;
    $page_start     = 0;
    $page_total     = floor($searchtotal / $hits_per_page);

    for($page_start = 0; $page_start < $searchtotal; $page_start += $hits_per_page) {

      $num = $hits_per_page;
      if( ($page_start+$hits_per_page) > $searchtotal )
        $num -= ($page_start+$hits_per_page) - $searchtotal;

      // Search Query
      $filename = "http://www.google.com/search?as_q=$query".
      "&num={$num}&hl=en&ie=UTF-8&btnG=Google+Search".
      "&as_epq=&as_oq=&as_eq=&lr=&as_ft=i&as_filetype=".
      "&as_qdr=all&as_nlo=&as_nhi=&as_occt=any&as_dt=i".
      "&as_sitesearch=&safe=images&start=$page_start";

      $result_google[] = "Checking $siteName positions $page_start .. ".($page_start+$num).' <a href="'.$filename.'" target="_blank">View</a>';

      // Set Timeout and Make Request
      $timeout = 3;
      $old = ini_set('default_socket_timeout', $timeout);
      $file = fopen($filename, "r");
      ini_set('default_socket_timeout', $old);
      if( $file ){
        stream_set_timeout($file, $timeout);
        stream_set_blocking($file, 0);
      }

      if (!$file) {

        $result_google[] = "<b>Unable to open remote file $filename.</b>";

      } else  {

        // Now load the file into a variable line at a time
        $pageData = '';
        while (!feof($file))
          $pageData .= fgets($file, 1024);

        // <a href="{site_url}" class=l onmousedown="return clk(this.href,'','','res','2','')">
        if(preg_match_all('/\<a href=\"(.*?)\" class=l.*?\>(.*?)\<\/a\>/',$pageData,$matches, PREG_SET_ORDER)) {
          foreach( $matches AS $match ){
            $position++;
            $foundUrl   = strtolower(preg_replace('/\"/','',$match[1]));
            $foundTitle = $match[2];
            if(preg_match('/'.addslashes($searchurl).'/',$foundUrl)){
              $position_match = $position;
              $result_google[] = $position.') <a href="'.$foundUrl.'" target="_blank">'.$foundTitle.'</a> - Match Found';
              break;
            } else if($showlinks)
              $result_google[] = $position.') <a href="'.$foundUrl.'" target="_blank">'.$foundTitle.'</a>';
          }
        }
        fclose($file);

      }

    }

    if($position_match) {
      $result_google[] = "The site $searchurl is at position $position_match for the term <b>$searchquery</b> on $siteName";
    } else  {
      $result_google[] = "The site $searchurl is not in the top $position for the term <b>$searchquery</b> on $siteName";
    }

  } else

    $result_google[] = "The Search Criteria is Invalid";

  return $result_google;
}

function seo_yahoo_position($searchquery,$searchurl,$searchtotal=10,$showlinks=1) {

  $result_yahoo = Array();

  if( $searchquery && $searchurl ) {

    // Prepare Query
    $query = str_replace(" ","+",$searchquery);
    $query = str_replace("%26","&",$query);

    // Prepare Query
    $searchurl = strtolower($searchurl);

    // The number of hits per page.
    $siteName       = 'Yahoo';
    $hits_per_page  = 10;
    $position       = 0;
    $position_match = 0;
    $page_start     = 0;
    $page_total     = floor($searchtotal / $hits_per_page);

    for($page_start = 0; $page_start < $searchtotal; $page_start += $hits_per_page) {

      $num = $hits_per_page;
      /* Yahoo Requires Sets of 10
      if( ($page_start+$hits_per_page) > $searchtotal )
        $num -= ($page_start+$hits_per_page) - $searchtotal;
      */

      // Search Query
      $filename = "http://search.yahoo.com/search?p=$query&n={$num}&ei=UTF-8&va_vt=any&vo_vt=any"
        . "&ve_vt=any&vp_vt=any&vd=all&vst=0&vf=all&vm=p&fl=0&pstart=5&b=".($page_start+1);

      $result_yahoo[] = "Checking $siteName positions $page_start .. ".($page_start+$num).' <a href="'.$filename.'" target="_blank">View</a>';

      // Set Timeout and Make Request
      $timeout = 3;
      $old = ini_set('default_socket_timeout', $timeout);
      $file = fopen($filename, "r");
      ini_set('default_socket_timeout', $old);
      if( $file ){
        stream_set_timeout($file, $timeout);
        stream_set_blocking($file, 0);
      }

      if (!$file) {

        $result_yahoo[] = "<b>Unable to open remote file $filename.</b>";

      } else  {

        // Now load the file into a variable line at a time
        $pageData = '';
        while (!feof($file))
          $pageData .= fgets($file, 1024);
        fclose($file);

        // <a href="{site_url}" class=l onmousedown="return clk(this.href,'','','res','2','')">
        if(preg_match_all('/\<a class=\"yschttl\" href=\"(.*?)\".*?\>(.*?)\<\/a\>/',$pageData,$matches, PREG_SET_ORDER)) {
          foreach( $matches AS $match ){
            $position++;
            $foundUrl   = strtolower(preg_replace('/^.?\*\*/','',$match[1]));
            $foundUrl   = html_entity_decode($foundUrl);
            $foundTitle = $match[2];
            if(preg_match('/'.addslashes($searchurl).'/',$foundUrl)){
              $position_match = $position;
              $result_yahoo[] = $position.') <a href="'.$foundUrl.'" target="_blank">'.$foundTitle.'</a> - Match Found';
              break;
            } else if($showlinks)
              $result_yahoo[] = $position.') <a href="'.$foundUrl.'" target="_blank">'.$foundTitle.'</a>';
          }
        }

      }

    }

    if($position_match) {
      $result_yahoo[] = "The site $searchurl is at position $position_match for the term <b>$searchquery</b> on $siteName";
    } else  {
      $result_yahoo[] = "The site $searchurl is not in the top $position for the term <b>$searchquery</b> on $siteName";
    }

  } else

    $result_yahoo[] = "The Search Criteria is Invalid";

  return $result_yahoo;
}

function kda($url, &$total, $use_meta_tags, $use_partial_total) {
  $text="";
  $ws = array();
  $words = array();
  $occurances = array();
  $meta_tags = array();
  if(!stristr($url, 'http://')) {
    $url = 'http://'.$url;
  }
  if($html = @file_get_contents($url)) {
    $html = html_entity_decode(file_get_contents($url));
    //preg_match('/(?<=<title>).*?(?=<\\/title>)/is', $html, $matches);
    //$title = array_shift($matches);
    $meta_tags = ($use_meta_tags) ? get_meta_tags($url) : '';
    $html = kda_strip_tag_script($html);
    $no_html = strip_tags($html);
    @$tag_info = $meta_tags['description']." ".$meta_tags['keywords'];
    $text .= $tag_info." ".$no_html;
    $total = count(explode(' ', $text));
    $text = kda_clean(kda_stopWords($text));
    $words = explode(' ', $text);
    $total = count($words);
    for($x=0; $x<$total; $x++) {
      $words[$x] = trim($words[$x]);
      if($words[$x]!='') {
        @$ws[$words[$x]]++;
        if(trim(@$words[$x+1])!='') {
          $phrase2 = $words[$x]." ".trim($words[$x+1]);
          @$ws[$phrase2]++;
          if(trim(@$words[$x+2])!='') {
            $phrase3 = $words[$x]." ".trim($words[$x+1])." ".trim($words[$x+2]);
            @$ws[$phrase3]++;
          }
        }
      }
    }
    foreach($ws as $word=>$count) {
      if( ($count>1) and (strlen($word)>2) ) {
        $phrase_size = count(explode(' ', $word));
        @$occurances[$phrase_size] = @$occurances[$phrase_size] + $count;
      }
    }
    foreach($ws as $word=>$count) {
      if( ($count>1) and (strlen($word)>2) ) {
        $phrase_size = count(explode(' ', $word));
        $ttlWords = ($use_partial_total) ? $occurances[$phrase_size] : $total;
        $density = round(($count/$ttlWords)*100, 2);
        $dens[$phrase_size][$word] = $density;
        $dens[$word] = $count;
      }
    }
    arsort($dens[1]);
    if($dens[2]) {
      arsort($dens[2]);
    }
    if($dens[3]) {
      arsort($dens[3]);
    }
    return $dens ;
  }else {
    return false;
  }
}

function kda_strip_tag_script($html) {
  $pos1 = false;
  $pos2 = false;
  do {
    if ($pos1 !== false && $pos2 !== false) {
      $first = NULL;
      $second = NULL;
      if ($pos1 > 0)
      $first = substr($html, 0, $pos1);
      if ($pos2 < strlen($html) - 1)
      $second = substr($html, $pos2);
      $html = $first . $second;
    }
    preg_match("/<script[^>]*>/i", $html, $matches);
    $str1 =& $matches[0];
    preg_match("/<\/script>/i", $html, $matches);
    $str2 =& $matches[0];
    $pos1 = strpos($html, $str1);
    $pos2 = strpos($html, $str2);
    if ($pos2 !== false)
    $pos2 += strlen($str2);
  } while ($pos1 !== false && $pos2 !== false);

  return $html;
}

function kda_clean($text) {
  global $stopwords_file;
  $text = str_replace('.', '', $text);
  $text = str_replace(',', '', $text);
  $text = str_replace('(', '', $text);
  $text = str_replace(')', '', $text);
  $text = str_replace('_', '', $text);
  $text = str_replace('*', '', $text);
  $text = str_replace('"', '', $text);
  $text = str_replace('-', '', $text);
  $text = str_replace("!", '', $text);
  $text = str_replace("?", '', $text);
  $text = str_replace("\n", '', $text);
  $text = str_replace('/', '', $text);
  $text = str_replace('|', '', $text);
  $text = str_replace('&#8217;', "'", $text);

  return trim(strtolower($text));
}

function kda_stopWords($term) {
  global $mosConfig_absolute_path;
  global $sw_count, $sefConfig;
  $clean_term = "";
  //load list of common words
  $common = file($mosConfig_absolute_path.'/administrator/components/com_sef/includes/seo_words/seo_words_'.$sefConfig->backend_language.'.txt');
  $total = count($common);
  for ($x=0; $x < $total; $x++) {
    $common[$x] = trim(strtolower($common[$x]));
  }
  //make array of search terms
  $_terms = explode(" ", $term);
  foreach ($_terms as $line) {
    if (in_array(strtolower(trim($line)), $common)) {
      $removeKey = array_search($line, $_terms);
      $sw_count++;
      unset($_terms[$removeKey]);
    } else {
      $clean_term .= " ".$line;
    }
  }
  return $clean_term;
}
Return current item: Joomla SEF / SEO - extending OpenSEF