tool : pagerank per url from a sitemap

I wired a google pagerank toolbar-query snippet to a simplexml sitemap readout, and put it on a page. You can fill in a sitemap url and get the google pageranks of all ‘mapped’ urls.

It works, I stripped it down and you can download it here or on the sample page.

I mainly wanted the snippet wired to a sitemap to compare the results of my pagerank spider tool with an actual google readout. Running a sitemap through a toolbar query snippet is the fastest way.

I allready had a spider result of siteometrics (calc pr) so now I can compare it to google’s toolbar query on http://www.siteometrics.com/sitemap.xml :

google pr calc pr URL
2 http://www.siteometrics.com/
2 0.80 /index.php
0 0.32 /advertise.html
0.77 /recommend.php
0.75 /search-engine-saturation.php
0 0.75 /link-popularity.php
0 0.75 /pagerank.php
0 0.75 /bulk-pagerank.php
0 0.75 /pagerank-mult-pages.php
0 0.75 /link-pop-pagerank.php
0.75 /link-search-pagerank.php
0 0.75 /alexa.php
0 0.75 /bulk-alexa.php
0 0.75 /serpcheck.php
0 0.75 /keyword-research.php
0 0.67 /visitor-info.php
0.24 /useful-links.html
0 0.24 /contact-us.html
0.24 /sitemap.html
0.24 /privacy-policy.html

Weird result, the sitemap they issue is part old site, part new site. If you check the pageranks on the newer .php files it’s the same, though.

a quarter of the urls link into the archived site, that might cause the drop in pagerank (links to /feed and google.com on every page, see the other article on siteometrics).


for the freaks : here’s the php code (assume url is a valid sitemap-url).

  1.  
  2. $myurl=$_REQUEST['url'];
  3. $xml = simplexml_load_file($myurl);
  4. foreach($xml->url as $u) echo pagerank((string) $u->loc)."<br />";
  5. exit;
  6.  
  7. function pagerank($url) {
  8.  if (!preg_match('/^(http:\/\/)?([^\/]+)/i', $url)) { $url='http://'.$url; }
  9.  $pr=curl_getpr($url);
  10.  return $pr.';'.$url.';';
  11. }
  12.  
  13. function getch($url) { return CheckHash(HashURL($url)); }
  14.  
  15. function curl_getpr($url) {
  16.  $googlehost='toolbarqueries.google.com';
  17.  $googleua='Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.0.6) Gecko/20060728 Firefox/1.5';
  18.  
  19.  $ch = getch($url);
  20.  
  21.  $form="http://toolbarqueries.google.com/search?client=navclient-auto&ch=$ch&features=Rank&q=info:$url";
  22.  $cr = curl_init($form);
  23.  curl_setopt($cr, CURLOPT_FAILONERROR, true);
  24.  curl_setopt($cr, CURLOPT_HEADER, 0);
  25.  curl_setopt($cr, CURLOPT_USERAGENT, $googleua); // Spoof the user-agent
  26.  curl_setopt($cr, CURLOPT_RETURNTRANSFER, true);
  27.  $data = curl_exec($cr);
  28.  
  29.  if(!$data) {
  30.    curl_close($cr);
  31.    unset($cr);
  32.    $pr='-';
  33.    return $pr;
  34.  } else {
  35.   $pos = strpos($data, "Rank_");
  36.   if($pos === false) {
  37.    curl_close($cr);
  38.    unset($cr);
  39.    $pr='-';
  40.    return $pr;
  41.                 } else{
  42.    $pr=substr($data, $pos + 9);
  43.    $pr=trim($pr);
  44.    $pr=str_replace("\n",'',$pr);
  45.    curl_close($cr);
  46.    unset($cr);
  47.    return $pr;
  48.   }
  49.     }
  50. }
  51.  
  52. //PageRank Lookup v1.1 by HM2K (update: 31/01/07)
  53. //based on an algorithm found at: http://pagerank.gamesaga.net/
  54. //live demo: http://www.highrankforum.com/pagerank.php
  55.  
  56. //convert a string to a 32-bit integer
  57. function StrToNum($Str, $Check, $Magic) {
  58.     $Int32Unit = 4294967296;  // 2^32
  59.  
  60.     $length = strlen($Str);
  61.     for ($i = 0; $i < $length; $i++) {
  62.         $Check *= $Magic;  
  63.         if ($Check >= $Int32Unit) {
  64.             $Check = ($Check $Int32Unit * (int) ($Check / $Int32Unit));
  65.             //if the check less than -2^31
  66.             $Check = ($Check < -2147483648) ? ($Check + $Int32Unit) : $Check;
  67.         }
  68.         $Check += ord($Str{$i});
  69.     }
  70.     return $Check;
  71. }
  72.  
  73. //genearate a hash for a url
  74. function HashURL($String) {
  75.     $Check1 = StrToNum($String, 0x1505, 0x21);
  76.     $Check2 = StrToNum($String, 0, 0x1003F);
  77.     $Check1 >>= 2;  
  78.     $Check1 = (($Check1 >> 4) & 0x3FFFFC0 ) | ($Check1 & 0x3F);
  79.     $Check1 = (($Check1 >> 4) & 0x3FFC00 ) | ($Check1 & 0x3FF);
  80.     $Check1 = (($Check1 >> 4) & 0x3C000 ) | ($Check1 & 0x3FFF);
  81.     $T1 = (((($Check1 & 0x3C0) < < 4) | ($Check1 & 0x3C)) <&lt;2 ) | ($Check2 & 0xF0F );
  82.     $T2 = (((($Check1 & 0xFFFFC000) << 4) | ($Check1 & 0x3C00)) << 0xA) | ($Check2 & 0xF0F0000 );
  83.  
  84.     return ($T1 | $T2);
  85. }
  86.  
  87. //genearate a checksum for the hash string
  88. function CheckHash($Hashnum) {
  89.     $CheckByte = 0;
  90.     $Flag = 0;
  91.  
  92.     $HashStr = sprintf('%u', $Hashnum) ;
  93.     $length = strlen($HashStr);
  94.  
  95.     for ($i = $length 1;  $i >= 0;  $i ) {
  96.         $Re = $HashStr{$i};
  97.         if (1 === ($Flag % 2)) {              
  98.             $Re += $Re;    
  99.             $Re = (int)($Re / 10) + ($Re % 10);
  100.         }
  101.         $CheckByte += $Re;
  102.         $Flag ++;
  103.     }
  104.  
  105.     $CheckByte %= 10;
  106.     if (0 !== $CheckByte) {
  107.         $CheckByte = 10 $CheckByte;
  108.         if (1 === ($Flag % 2) ) {
  109.             if (1 === ($CheckByte % 2)) {
  110.                 $CheckByte += 9;
  111.             }
  112.             $CheckByte >>= 1;
  113.         }
  114.     }
  115.  
  116.     return '7'.$CheckByte.$HashStr;
  117. }
Posted in pagerank, php, tool and tagged , , .

Leave a Reply

Your email address will not be published. Required fields are marked *