PHP判断来访是搜索引擎蜘蛛还是普通用户的代码小结
1、推荐的一种方法:php判断搜索引擎蜘蛛爬虫还是人为访问代码,摘自Discuzx3.2
<?php functioncheckrobot($useragent=''){ static$kw_spiders=array('bot','crawl','spider','slurp','sohu-search','lycos','robozilla'); static$kw_browsers=array('msie','netscape','opera','konqueror','mozilla'); $useragent=strtolower(empty($useragent)?$_SERVER['HTTP_USER_AGENT']:$useragent); if(strpos($useragent,'http://')===false&&dstrpos($useragent,$kw_browsers))returnfalse; if(dstrpos($useragent,$kw_spiders))returntrue; returnfalse; } functiondstrpos($string,$arr,$returnvalue=false){ if(empty($string))returnfalse; foreach((array)$arras$v){ if(strpos($string,$v)!==false){ $return=$returnvalue?$v:true; return$return; } } returnfalse; } if(checkrobot()){ echo'机器人爬虫'; }else{ echo'人'; } ?>
实际应用中可以这样判断,直接不是搜索引擎才执行操作
<?php if(!checkrobot()){ //dosomething } ?>
2、第二种方法:
使用PHP实现蜘蛛访问日志统计
$useragent=addslashes(strtolower($_SERVER['HTTP_USER_AGENT'])); if(strpos($useragent,'googlebot')!==false){$bot='Google';} elseif(strpos($useragent,'mediapartners-google')!==false){$bot='GoogleAdsense';} elseif(strpos($useragent,'baiduspider')!==false){$bot='Baidu';} elseif(strpos($useragent,'sogouspider')!==false){$bot='Sogou';} elseif(strpos($useragent,'sogouweb')!==false){$bot='Sogouweb';} elseif(strpos($useragent,'sosospider')!==false){$bot='SOSO';} elseif(strpos($useragent,'360spider')!==false){$bot='360Spider';} elseif(strpos($useragent,'yahoo')!==false){$bot='Yahoo';} elseif(strpos($useragent,'msn')!==false){$bot='MSN';} elseif(strpos($useragent,'msnbot')!==false){$bot='msnbot';} elseif(strpos($useragent,'sohu')!==false){$bot='Sohu';} elseif(strpos($useragent,'yodaoBot')!==false){$bot='Yodao';} elseif(strpos($useragent,'twiceler')!==false){$bot='Twiceler';} elseif(strpos($useragent,'ia_archiver')!==false){$bot='Alexa_';} elseif(strpos($useragent,'iaarchiver')!==false){$bot='Alexa';} elseif(strpos($useragent,'slurp')!==false){$bot='雅虎';} elseif(strpos($useragent,'bot')!==false){$bot='其它蜘蛛';} if(isset($bot)){ $fp=@fopen('bot.txt','a'); fwrite($fp,date('Y-m-dH:i:s')."\t".$_SERVER["REMOTE_ADDR"]."\t".$bot."\t".'http://'.$_SERVER['SERVER_NAME'].$_SERVER["REQUEST_URI"]."\r\n"); fclose($fp); }
第三种方法:
我们可以通过HTTP_USER_AGENT来判断是否是蜘蛛,搜索引擎的蜘蛛都有自己的独特标志,下面列取了一部分。
functionis_crawler(){ $userAgent=strtolower($_SERVER['HTTP_USER_AGENT']); $spiders=array( 'Googlebot',//Google爬虫 'Baiduspider',//百度爬虫 'Yahoo!Slurp',//雅虎爬虫 'YodaoBot',//有道爬虫 'msnbot'//Bing爬虫 //更多爬虫关键字 ); foreach($spidersas$spider){ $spider=strtolower($spider); if(strpos($userAgent,$spider)!==false){ returntrue; } } returnfalse; }
下面的php代码附带了更多的蜘蛛标识
functionisCrawler(){ echo$agent=strtolower($_SERVER['HTTP_USER_AGENT']); if(!empty($agent)){ $spiderSite=array( "TencentTraveler", "Baiduspider+", "BaiduGame", "Googlebot", "msnbot", "Sosospider+", "Sogouwebspider", "ia_archiver", "Yahoo!Slurp", "YoudaoBot", "YahooSlurp", "MSNBot", "Java(Oftenspambot)", "BaiDuSpider", "Voila", "Yandexbot", "BSpider", "twiceler", "SogouSpider", "SpeedySpider", "GoogleAdSense", "Heritrix", "Python-urllib", "Alexa(IAArchiver)", "Ask", "Exabot", "Custo", "OutfoxBot/YodaoBot", "yacy", "SurveyBot", "legs", "lwp-trivial", "Nutch", "StackRambler", "Thewebarchive(IAArchiver)", "Perltool", "MJ12bot", "Netcraft", "MSIECrawler", "WGettools", "larbin", "Fishsearch", ); foreach($spiderSiteas$val){ $str=strtolower($val); if(strpos($agent,$str)!==false){ returntrue; } } }else{ returnfalse; } } if(isCrawler()){ echo"你好蜘蛛精!"; } else{ echo"你不是蜘蛛精啊!"; }
第四种方法:
<?php $flag=false; $tmp=$_SERVER['HTTP_USER_AGENT']; if(strpos($tmp,'Googlebot')!==false){ $flag=true; }elseif(strpos($tmp,'Baiduspider')>0){ $flag=true; }elseif(strpos($tmp,'Yahoo!Slurp')!==false){ $flag=true; }elseif(strpos($tmp,'msnbot')!==false){ $flag=true; }elseif(strpos($tmp,'Sosospider')!==false){ $flag=true; }elseif(strpos($tmp,'YodaoBot')!==false||strpos($tmp,'OutfoxBot')!==false){ $flag=true; }elseif(strpos($tmp,'Sogouwebspider')!==false||strpos($tmp,'SogouOrionspider')!==false){ $flag=true; }elseif(strpos($tmp,'fast-webcrawler')!==false){ $flag=true; }elseif(strpos($tmp,'Gaisbot')!==false){ $flag=true; }elseif(strpos($tmp,'ia_archiver')!==false){ $flag=true; }elseif(strpos($tmp,'altavista')!==false){ $flag=true; }elseif(strpos($tmp,'lycos_spider')!==false){ $flag=true; }elseif(strpos($tmp,'Inktomislurp')!==false){ $flag=true; } if($flag==false){ header("Location:https://www.nhooo.com".$_SERVER['REQUEST_URI']); //自动转到https://www.nhooo.com对应的网页 //$_SERVER['REQUEST_URI']为域名后面的路径 //或换成header("Location:https://www.nhooo.com/abc/d.php"); exit(); } ?>