PHP curl模拟登录带验证码的网站
需求是这样的,需要登录带验证码的网站,获取数据,但是不可能人为一直去记录数据,想通过自动采集的方式进行,如下是试验出来的结果代码!有需要的可以参考下!
<?php namespaceHome\Controller; useThink\Controller; classLoginControllerextendsController { protected$cookieName=array('cookie_verify','cookie_verify'); protected$cookiePath='/cookie/'; protected$cookiePathFile=array(); publicfunctionindex() { $this->display(); } publicfunction_initialize(){ foreach($this->cookieNameas$key=>$name) { $this->cookiePathFile[]=ROOT_PATH.$this->cookiePath.$this->cookieName[$key].'_xxx.txt'; } } /** *登录xxx */ publicfunctionxxxLogin() { $username=I('username'); $password=I('password'); $verifyCode=I('verify'); $loginData=array( '__VIEWSTATE'=>'/wEPDwUKMTU0MzAzOTU4NmQYAQUeX19Db250cm9sc1JlcXVpcmVQb3N0QmFja0tleV9fFgEFDExvZ2luX1N1Ym1pdL/yae69NsY163G3yuP0lxjz8oXu',//不把参数补全可能会不被响应哦 '__VIEWSTATEGENERATOR'=>'DC42DE27', 'txt_UserName'=>$username, 'txt_PWD'=>$password, 'txt_VerifyCode'=>$verifyCode, 'SMONEY'=>'ABC', 'Login_Submit.x'=>'52', 'Login_Submit.y'=>'19', ); $getBack=$this->_cookieRequest('http://xxx.com/noLogin.aspx',$loginData); if(preg_match('/<div[^\<div]*?id\s*=\s*[\'\"]{1}div_msg[\'\"]{1}.*?>(.*?)<\/div>/s',$getBack,$match)){ echo'matched\r\n'; print_r($match); }else{ echo$getBack,'<br/>'; $paramsFull=parse_url($getBack); parse_str($paramsFull['query'],$paramsFull['parsedQuery']); if(!empty($paramsFull['parsedQuery']['Warn'])){ $msg="您好,欢迎来P,请先登录。"; switch($paramsFull['parsedQuery']['Warn']) { case'2': $msg='您输入的验证码错误,请重试'; break; case'3': $msg='该帐号不存在,还没帐号?'; break; case'5': $msg='账户已注销'; break; case'6': $msg='密码错误,如果连续错误3次半小时内不能登录!'; break; case'20': $msg='今日密码错误3次及以上,请于半小时后再来登录!'; break; case'21': $msg='今日您所在IP的所有帐号密码错误9次以上,请于半小时后再来登录!'; break; case'22': $msg='登录失败,您所在IP今日登录的帐号过多!'; break; case'23': $msg='登录失败,验证码失效!'; break; case'32': $msg='该帐号已经绑定其他xx帐号!'; break; case'33': $msg='一台电脑一天只能注册一个帐号!'; break; } $this->error($msg,'',5); }else{ $_SESSION['user_id']='123456';//登录设置session $this->success('登录P网站成功',U('Index/index'),5); } } } /** *获取验证码 */ publicfunctiongetVerifyCode() { $img=$this->_cookieRequest('http://xxx.com/VerifyCode_Login.aspx?id='.rand(10000,999999),null,true,1); echo$img; } /** *删除cookie */ publicfunctionclearCookie() { for($i=0;$i<count($this->cookieName);$i++) { setcookie($this->cookieName[$i],'',time()-3600); } //unlink($this->cookiePathFile); $this->success('清除cookie成功!'); } /** *带COOKIE的访问curl *@param$url访问地址 *@parambool|array$data传递的数据 *@parambool$redirect是否获取重定向的地址 *@returnmixed地址或者返回内容 */ publicfunction_cookieRequest($url,$data=null,$redirect=false,$cookieNum=0) { $ch=curl_init(); $params[CURLOPT_URL]=$url;//请求url地址 $params[CURLOPT_HEADER]=false;//是否返回响应头信息 $params[CURLOPT_RETURNTRANSFER]=true;//是否将结果返回 $params[CURLOPT_FOLLOWLOCATION]=true;//是否重定向 $params[CURLOPT_USERAGENT]='Mozilla/5.0(WindowsNT5.1;rv:9.0.1)Gecko/20100101Firefox/9.0.1'; if($data) { $params[CURLOPT_POST]=true; $params[CURLOPT_POSTFIELDS]=http_build_query($data); } //判断是否有cookie,有的话直接使用 if(!empty($_COOKIE[$this->cookieName[$cookieNum]])&&is_file($this->cookiePathFile[$cookieNum])) { $params[CURLOPT_COOKIEFILE]=$this->cookiePathFile[$cookieNum];//这里判断cookie } else { //$cookie_jar=tempnam($cookie_path,'cookie');//产生一个cookie文件 $params[CURLOPT_COOKIEJAR]=$this->cookiePathFile[$cookieNum];//写入cookie信息 setcookie($this->cookieName[$cookieNum],$this->cookiePathFile[$cookieNum],time()+120);//保存cookie路径 } curl_setopt_array($ch,$params);//传入curl参数 $content=curl_exec($ch); $headers=curl_getinfo($ch); //echo$content; curl_close($ch); if($url!=$headers["url"]&&$redirect==false) { return$headers["url"]; } return$content; } }
登录以后,就可以使用带cookie的访问其他页面了!
ps:phpcurl登录淘宝
提交上去后显示为填写验证码,登录不上去
填写验证码提交:
<!DOCTYPEhtmlPUBLIC"-//W3C//DTDXHTML1.0Transitional//EN""http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> <htmlxmlns="http://www.w3.org/1999/xhtml"> <head> <metahttp-equiv="Content-Type"content="text/html;charset=gb2312"/> <title></title> </head> <body> <iframeid='img'src="b.php"width="950"height="300"scrolling="No"frameborder="0"></iframe> <formaction="tb.php"method="POST"> <textareaname="vv"cols="50"rows="10">umto=&action=Authenticator&event_submit_do_login=anything&from=tb&fc=default&style=default&css_style=&tid=XOR_1_000000000000000000000000000000_635045544 70A7C717F750278&support=000001&CtrlVersion=1,0,0,7&loginType=3&minititle=&minipara=&pstrong=&llnick=&sign=&need_sign=&isIgnore=&full_redirect=&popid=&callback=&guf=¬_duplite_str=&need_user_id=&poy=XOR_1_000000000000000000000000000000_625A424 A45137C6F7A7F0B786D08&gvfdcname=&gvfdcre=&from_encoding=&TPL_redirect_url=http:www.taobao.com&TPL_username=xxx&TPL_password=xxxx&need_check_code=&&TPL_checkcode=</textarea> <inputtype="submit"/> </form> </body> </html>
<?php session_start(); if(empty($_SESSION['cookie_jar']))exit(); $cookie_jar=$_SESSION['cookie_jar']; $post_fields=$_POST["vv"]; $ch=curl_init('https://login.taobao.com/member/login.jhtml'); curl_setopt($ch,CURLOPT_HEADER,0); curl_setopt($ch,CURLOPT_USERAGENT, "Mozilla/4.0(compatible;MSIE8.0;WindowsNT6.1;Trident/4.0;QQWubi133;EmbeddedWebBrowserfrom:http://bsalsa.com/;SLCC2;.NETCLR2.0.50727;.NETCLR3.5.30729;.NETCLR3.0.30729;TabletPC2.0;.NET4.0C;.NET4.0E;InfoPath.3;MediaCenterPC6.0)"); curl_setopt($ch,CURLOPT_RETURNTRANSFER,1); curl_setopt($ch,CURLOPT_POST,1); curl_setopt($ch,CURLOPT_POSTFIELDS,$post_fields); curl_setopt($ch,CURLOPT_SSL_VERIFYPEER,false); curl_setopt($ch,CURLOPT_SSL_VERIFYHOST,1); curl_setopt($ch,CURLOPT_COOKIEJAR,$cookie_jar); $data=curl_exec($ch); curl_close($ch); echo$data;exit; $ch=curl_init('http://www.taobao.com'); curl_setopt($ch,CURLOPT_USERAGENT, "Mozilla/4.0(compatible;MSIE8.0;WindowsNT6.1;Trident/4.0;QQWubi133;EmbeddedWebBrowserfrom:http://bsalsa.com/;SLCC2;.NETCLR2.0.50727;.NETCLR3.5.30729;.NETCLR3.0.30729;TabletPC2.0;.NET4.0C;.NET4.0E;InfoPath.3;MediaCenterPC6.0)"); curl_setopt($ch,CURLOPT_HEADER,0); curl_setopt($ch,CURLOPT_RETURNTRANSFER,0); curl_setopt($ch,CURLOPT_COOKIEFILE,$cookie_jar); curl_setopt($ch,CURLOPT_SSL_VERIFYPEER,TRUE); curl_exec($ch); curl_close($ch); ?>
提取验证码
<?php session_start(); $cookie_jar=tempnam("./temp/","cookie"); $_SESSION['cookie_jar']=$cookie_jar; $post_fields="action=Authenticator&event_submit_do_login=anything&from=tb&fc=default&style=default&css_style=&tid=XOR_1_000000000000000000000000000000_635045544 70A7C717F750278&support=000001&CtrlVersion=1,0,0,7&loginType=3&minititle=&minipara=&pstrong=&llnick=&sign=&need_sign=&isIgnore=&full_redirect=&popid=&callback=&guf=¬_duplite_str=&need_user_id=&poy=XOR_1_000000000000000000000000000000_625A424A45137C6F7A7F0B786D08&gvfdcname=&gvfdcre=&from_encoding=&TPL_redirect_url=http:www.taobao.com&TPL_username=xxx&TPL_password=xxx"; $ch=curl_init('https://login.taobao.com/member/login.jhtml'); curl_setopt($ch,CURLOPT_USERAGENT, "Mozilla/4.0(compatible;MSIE8.0;WindowsNT6.1;Trident/4.0;QQWubi133;EmbeddedWebBrowserfrom:http://bsalsa.com/;SLCC2;.NETCLR2.0.50727;.NETCLR3.5.30729;.NETCLR3.0.30729;TabletPC2.0;.NET4.0C;.NET4.0E;InfoPath.3;MediaCenterPC6.0)"); curl_setopt($ch,CURLOPT_HEADER,0); curl_setopt($ch,CURLOPT_RETURNTRANSFER,1); curl_setopt($ch,CURLOPT_SSL_VERIFYPEER,false); curl_setopt($ch,CURLOPT_SSL_VERIFYHOST,2); $data=curl_exec($ch); curl_close($ch); preg_match("/id=\"um_to\"name=\"umto\"value=\"(.*?)\"\/>/",$data,$arr); $post_fields="umto=".$arr[1]."&".$post_fields."&TPL_checkcode="; echo"<textareacols=50rows=10>".$post_fields."</textarea><br/>"; $ch=curl_init('https://login.taobao.com/member/login.jhtml'); curl_setopt($ch,CURLOPT_HEADER,0); curl_setopt($ch,CURLOPT_USERAGENT, "Mozilla/4.0(compatible;MSIE8.0;WindowsNT6.1;Trident/4.0;QQWubi133;EmbeddedWebBrowserfrom:http://bsalsa.com/;SLCC2;.NETCLR2.0.50727;.NETCLR3.5.30729;.NETCLR3.0.30729;TabletPC2.0;.NET4.0C;.NET4.0E;InfoPath.3;MediaCenterPC6.0)"); curl_setopt($ch,CURLOPT_RETURNTRANSFER,1); curl_setopt($ch,CURLOPT_POST,1); curl_setopt($ch,CURLOPT_POSTFIELDS,$post_fields); curl_setopt($ch,CURLOPT_SSL_VERIFYPEER,false); curl_setopt($ch,CURLOPT_SSL_VERIFYHOST,2); curl_setopt($ch,CURLOPT_COOKIEJAR,$cookie_jar); curl_setopt($ch,CURLOPT_COOKIEFILE,$cookie_jar); $data=curl_exec($ch); curl_close($ch); preg_match("/imgid=\"J_StandardCode_m\"src=\"(.*?)\"data-src=/",$data,$arr1); echo"<imgsrc=".$arr1[1]."/>"; exit; ?>