I wired a google pagerank toolbar-query snippet to a simplexml sitemap readout, and put it on a page. You can fill in a sitemap url and get the google pageranks of all ‘mapped’ urls.
It works, I stripped it down and you can download it here or on the sample page.
I mainly wanted the snippet wired to a sitemap to compare the results of my pagerank spider tool with an actual google readout. Running a sitemap through a toolbar query snippet is the fastest way.
I allready had a spider result of siteometrics (calc pr) so now I can compare it to google’s toolbar query on http://www.siteometrics.com/sitemap.xml :
google pr | calc pr | URL |
2 | http://www.siteometrics.com/ | |
2 | 0.80 | /index.php |
0 | 0.32 | /advertise.html |
– | 0.77 | /recommend.php |
– | 0.75 | /search-engine-saturation.php |
0 | 0.75 | /link-popularity.php |
0 | 0.75 | /pagerank.php |
0 | 0.75 | /bulk-pagerank.php |
0 | 0.75 | /pagerank-mult-pages.php |
0 | 0.75 | /link-pop-pagerank.php |
– | 0.75 | /link-search-pagerank.php |
0 | 0.75 | /alexa.php |
0 | 0.75 | /bulk-alexa.php |
0 | 0.75 | /serpcheck.php |
0 | 0.75 | /keyword-research.php |
0 | 0.67 | /visitor-info.php |
– | 0.24 | /useful-links.html |
0 | 0.24 | /contact-us.html |
– | 0.24 | /sitemap.html |
– | 0.24 | /privacy-policy.html |
Weird result, the sitemap they issue is part old site, part new site. If you check the pageranks on the newer .php files it’s the same, though.
a quarter of the urls link into the archived site, that might cause the drop in pagerank (links to /feed and google.com on every page, see the other article on siteometrics).
for the freaks : here’s the php code (assume url is a valid sitemap-url).
-
-
$myurl=$_REQUEST['url'];
-
$xml = simplexml_load_file($myurl);
-
foreach($xml->url as $u) echo pagerank((string) $u->loc)."<br />";
-
exit;
-
-
function pagerank($url) {
-
if (!preg_match('/^(http:\/\/)?([^\/]+)/i', $url)) { $url='http://'.$url; }
-
$pr=curl_getpr($url);
-
return $pr.';'.$url.';';
-
}
-
-
function getch($url) { return CheckHash(HashURL($url)); }
-
-
function curl_getpr($url) {
-
$googlehost='toolbarqueries.google.com';
-
$googleua='Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.0.6) Gecko/20060728 Firefox/1.5';
-
-
$ch = getch($url);
-
-
$form="http://toolbarqueries.google.com/search?client=navclient-auto&ch=$ch&features=Rank&q=info:$url";
-
$cr = curl_init($form);
-
curl_setopt($cr, CURLOPT_FAILONERROR, true);
-
curl_setopt($cr, CURLOPT_HEADER, 0);
-
curl_setopt($cr, CURLOPT_USERAGENT, $googleua); // Spoof the user-agent
-
curl_setopt($cr, CURLOPT_RETURNTRANSFER, true);
-
$data = curl_exec($cr);
-
-
if(!$data) {
-
curl_close($cr);
-
unset($cr);
-
$pr='-';
-
return $pr;
-
} else {
-
$pos = strpos($data, "Rank_");
-
if($pos === false) {
-
curl_close($cr);
-
unset($cr);
-
$pr='-';
-
return $pr;
-
} else{
-
$pr=substr($data, $pos + 9);
-
$pr=trim($pr);
-
$pr=str_replace("\n",'',$pr);
-
curl_close($cr);
-
unset($cr);
-
return $pr;
-
}
-
}
-
}
-
-
//PageRank Lookup v1.1 by HM2K (update: 31/01/07)
-
//based on an algorithm found at: http://pagerank.gamesaga.net/
-
//live demo: http://www.highrankforum.com/pagerank.php
-
-
//convert a string to a 32-bit integer
-
function StrToNum($Str, $Check, $Magic) {
-
$Int32Unit = 4294967296; // 2^32
-
-
$length = strlen($Str);
-
for ($i = 0; $i < $length; $i++) {
-
$Check *= $Magic;
-
if ($Check >= $Int32Unit) {
-
$Check = ($Check – $Int32Unit * (int) ($Check / $Int32Unit));
-
//if the check less than -2^31
-
$Check = ($Check < -2147483648) ? ($Check + $Int32Unit) : $Check;
-
}
-
$Check += ord($Str{$i});
-
}
-
return $Check;
-
}
-
-
//genearate a hash for a url
-
function HashURL($String) {
-
$Check1 = StrToNum($String, 0x1505, 0x21);
-
$Check2 = StrToNum($String, 0, 0x1003F);
-
$Check1 >>= 2;
-
$Check1 = (($Check1 >> 4) & 0x3FFFFC0 ) | ($Check1 & 0x3F);
-
$Check1 = (($Check1 >> 4) & 0x3FFC00 ) | ($Check1 & 0x3FF);
-
$Check1 = (($Check1 >> 4) & 0x3C000 ) | ($Check1 & 0x3FFF);
-
$T1 = (((($Check1 & 0x3C0) < < 4) | ($Check1 & 0x3C)) <<2 ) | ($Check2 & 0xF0F );
-
$T2 = (((($Check1 & 0xFFFFC000) << 4) | ($Check1 & 0x3C00)) << 0xA) | ($Check2 & 0xF0F0000 );
-
-
return ($T1 | $T2);
-
}
-
-
//genearate a checksum for the hash string
-
function CheckHash($Hashnum) {
-
$CheckByte = 0;
-
$Flag = 0;
-
-
$HashStr = sprintf('%u', $Hashnum) ;
-
$length = strlen($HashStr);
-
-
for ($i = $length – 1; $i >= 0; $i —) {
-
$Re = $HashStr{$i};
-
if (1 === ($Flag % 2)) {
-
$Re += $Re;
-
$Re = (int)($Re / 10) + ($Re % 10);
-
}
-
$CheckByte += $Re;
-
$Flag ++;
-
}
-
-
$CheckByte %= 10;
-
if (0 !== $CheckByte) {
-
$CheckByte = 10 – $CheckByte;
-
if (1 === ($Flag % 2) ) {
-
if (1 === ($CheckByte % 2)) {
-
$CheckByte += 9;
-
}
-
$CheckByte >>= 1;
-
}
-
}
-
-
return '7'.$CheckByte.$HashStr;
-
}