PHP判断来访是搜索引擎蜘蛛还是普通用户的代码小结
1、推荐的一种方法:php判断搜索引擎蜘蛛爬虫还是人为访问代码,摘自Discuzx3.2
<?php
functioncheckrobot($useragent=''){
static$kw_spiders=array('bot','crawl','spider','slurp','sohu-search','lycos','robozilla');
static$kw_browsers=array('msie','netscape','opera','konqueror','mozilla');
$useragent=strtolower(empty($useragent)?$_SERVER['HTTP_USER_AGENT']:$useragent);
if(strpos($useragent,'http://')===false&&dstrpos($useragent,$kw_browsers))returnfalse;
if(dstrpos($useragent,$kw_spiders))returntrue;
returnfalse;
}
functiondstrpos($string,$arr,$returnvalue=false){
if(empty($string))returnfalse;
foreach((array)$arras$v){
if(strpos($string,$v)!==false){
$return=$returnvalue?$v:true;
return$return;
}
}
returnfalse;
}
if(checkrobot()){
echo'机器人爬虫';
}else{
echo'人';
}
?>
实际应用中可以这样判断,直接不是搜索引擎才执行操作
<?php
if(!checkrobot()){
//dosomething
}
?>
2、第二种方法:
使用PHP实现蜘蛛访问日志统计
$useragent=addslashes(strtolower($_SERVER['HTTP_USER_AGENT']));
if(strpos($useragent,'googlebot')!==false){$bot='Google';}
elseif(strpos($useragent,'mediapartners-google')!==false){$bot='GoogleAdsense';}
elseif(strpos($useragent,'baiduspider')!==false){$bot='Baidu';}
elseif(strpos($useragent,'sogouspider')!==false){$bot='Sogou';}
elseif(strpos($useragent,'sogouweb')!==false){$bot='Sogouweb';}
elseif(strpos($useragent,'sosospider')!==false){$bot='SOSO';}
elseif(strpos($useragent,'360spider')!==false){$bot='360Spider';}
elseif(strpos($useragent,'yahoo')!==false){$bot='Yahoo';}
elseif(strpos($useragent,'msn')!==false){$bot='MSN';}
elseif(strpos($useragent,'msnbot')!==false){$bot='msnbot';}
elseif(strpos($useragent,'sohu')!==false){$bot='Sohu';}
elseif(strpos($useragent,'yodaoBot')!==false){$bot='Yodao';}
elseif(strpos($useragent,'twiceler')!==false){$bot='Twiceler';}
elseif(strpos($useragent,'ia_archiver')!==false){$bot='Alexa_';}
elseif(strpos($useragent,'iaarchiver')!==false){$bot='Alexa';}
elseif(strpos($useragent,'slurp')!==false){$bot='雅虎';}
elseif(strpos($useragent,'bot')!==false){$bot='其它蜘蛛';}
if(isset($bot)){
$fp=@fopen('bot.txt','a');
fwrite($fp,date('Y-m-dH:i:s')."\t".$_SERVER["REMOTE_ADDR"]."\t".$bot."\t".'http://'.$_SERVER['SERVER_NAME'].$_SERVER["REQUEST_URI"]."\r\n");
fclose($fp);
}
第三种方法:
我们可以通过HTTP_USER_AGENT来判断是否是蜘蛛,搜索引擎的蜘蛛都有自己的独特标志,下面列取了一部分。
functionis_crawler(){
$userAgent=strtolower($_SERVER['HTTP_USER_AGENT']);
$spiders=array(
'Googlebot',//Google爬虫
'Baiduspider',//百度爬虫
'Yahoo!Slurp',//雅虎爬虫
'YodaoBot',//有道爬虫
'msnbot'//Bing爬虫
//更多爬虫关键字
);
foreach($spidersas$spider){
$spider=strtolower($spider);
if(strpos($userAgent,$spider)!==false){
returntrue;
}
}
returnfalse;
}
下面的php代码附带了更多的蜘蛛标识
functionisCrawler(){
echo$agent=strtolower($_SERVER['HTTP_USER_AGENT']);
if(!empty($agent)){
$spiderSite=array(
"TencentTraveler",
"Baiduspider+",
"BaiduGame",
"Googlebot",
"msnbot",
"Sosospider+",
"Sogouwebspider",
"ia_archiver",
"Yahoo!Slurp",
"YoudaoBot",
"YahooSlurp",
"MSNBot",
"Java(Oftenspambot)",
"BaiDuSpider",
"Voila",
"Yandexbot",
"BSpider",
"twiceler",
"SogouSpider",
"SpeedySpider",
"GoogleAdSense",
"Heritrix",
"Python-urllib",
"Alexa(IAArchiver)",
"Ask",
"Exabot",
"Custo",
"OutfoxBot/YodaoBot",
"yacy",
"SurveyBot",
"legs",
"lwp-trivial",
"Nutch",
"StackRambler",
"Thewebarchive(IAArchiver)",
"Perltool",
"MJ12bot",
"Netcraft",
"MSIECrawler",
"WGettools",
"larbin",
"Fishsearch",
);
foreach($spiderSiteas$val){
$str=strtolower($val);
if(strpos($agent,$str)!==false){
returntrue;
}
}
}else{
returnfalse;
}
}
if(isCrawler()){
echo"你好蜘蛛精!";
}
else{
echo"你不是蜘蛛精啊!";
}
第四种方法:
<?php
$flag=false;
$tmp=$_SERVER['HTTP_USER_AGENT'];
if(strpos($tmp,'Googlebot')!==false){
$flag=true;
}elseif(strpos($tmp,'Baiduspider')>0){
$flag=true;
}elseif(strpos($tmp,'Yahoo!Slurp')!==false){
$flag=true;
}elseif(strpos($tmp,'msnbot')!==false){
$flag=true;
}elseif(strpos($tmp,'Sosospider')!==false){
$flag=true;
}elseif(strpos($tmp,'YodaoBot')!==false||strpos($tmp,'OutfoxBot')!==false){
$flag=true;
}elseif(strpos($tmp,'Sogouwebspider')!==false||strpos($tmp,'SogouOrionspider')!==false){
$flag=true;
}elseif(strpos($tmp,'fast-webcrawler')!==false){
$flag=true;
}elseif(strpos($tmp,'Gaisbot')!==false){
$flag=true;
}elseif(strpos($tmp,'ia_archiver')!==false){
$flag=true;
}elseif(strpos($tmp,'altavista')!==false){
$flag=true;
}elseif(strpos($tmp,'lycos_spider')!==false){
$flag=true;
}elseif(strpos($tmp,'Inktomislurp')!==false){
$flag=true;
}
if($flag==false){
header("Location:https://www.nhooo.com".$_SERVER['REQUEST_URI']);
//自动转到https://www.nhooo.com对应的网页
//$_SERVER['REQUEST_URI']为域名后面的路径
//或换成header("Location:https://www.nhooo.com/abc/d.php");
exit();
}
?>