php正则替换处理HTML页面的方法
本文实例讲述了php正则替换处理HTML页面的方法。分享给大家供大家参考。具体如下:
<?php if(!defined('BASEPATH'))exit('Nodirectscriptaccessallowed'); /** *HTML替换处理类,考虑如下几种替换 *1.imgsrc:'/<img(.+?)src=([\'\"])?(.+?)([>]+?)/i' *2.ahref:'/<a(.+?)href=([\'\"])?(.+?)([>]+?)/i' *3.ifram.src:'/<iframe(.+?)src=([\'\"])?(.+?)([>]+?)/i' *4.framesrc:'/<frame(.+?)src=([\'\"])?(.+?)([>]+?)/i' *5.js:'/window.open([(]+?)([\'"]+?)(.+?)([)+?])/i' *6.css:'/background(.+?)url([(])([\'"]+?)(.+?)([)+?])/i' */ classMyreplace{ private$moudle_array=array('udata','tdata','tresult','dresult'); private$content; private$relative_dirname; private$projectid; private$moudle; function__construct(){ $this->CI=&get_instance(); } /** *替换 *@paramstring$contentHTML内容 *@paramstring$relative相对路径 *@paramint$projectid项目id *@moudlestring$moudle模板标识:udata,tdata,tresult,dresult */ publicfunctionmy_replace($content,$relative,$projectid,$moudle){ $this->content=$content; $this->relative_dirname=$relative; $this->projectid=$projectid; if(in_array(strtolower($moudle),$this->moudle_array)) $this->moudle=$moudle; elseexit; switch($this->moudle){ case'udata': $this->CI->load->model('mupload_data','model'); break; case'tdata': $this->CI->load->model('taskdata','model'); break; case'tresult': $this->CI->load->model('taskresult','model'); break; case'dresult': $this->CI->load->model('dmsresult','model'); break; default: break; } $pattern='/<img(.+?)src=([\'\"])?(.+?)([>]+?)/i'; $content=preg_replace_callback($pattern,array($this,'image_replace'),$content); $pattern='/<a(.+?)href=([\'\"])?(.+?)([>]+?)/i'; $content=preg_replace_callback($pattern,array($this,'html_replace'),$content); $pattern='/<iframe(.+?)src=([\'\"])?(.+?)([>]+?)/i'; $content=preg_replace_callback($pattern,array($this,'iframe_replace'),$content); $pattern='/<frame(.+?)src=([\'\"])?(.+?)([>]+?)/i'; $content=preg_replace_callback($pattern,array($this,'frame_replace'),$content); $pattern='/window.open([(]+?)([\'"]+?)(.+?)([)]+?)/i'; $content=preg_replace_callback($pattern,array($this,'js_replace'),$content); $pattern='/background(.+?)url([(])([\'"]+?)(.+?)([)+?])/i'; $content=preg_replace_callback($pattern,array($this,'css_replace'),$content); return$content; } privatefunctionimage_replace($matches){ if(count($matches)<4)return''; if(empty($matches[3]))return''; $matches[3]=rtrim($matches[3],'\'"/'); //获取图片的id $parent_dir_num=substr_count($matches[3],'../'); $relative_dirname=$this->relative_dirname; for($i=0;$i<$parent_dir_num;$i++){ $relative_dirname=substr($relative_dirname,0,strrpos($relative_dirname,"/")); } $relativepath=rtrim($relative_dirname,'/').'/'.ltrim($matches[3],'./'); $image_id=$this->CI->model->get_id_by_path_and_project($relativepath,$this->projectid); //输出 if(!empty($image_id)){ if($this->moudle=='dresult'){ return"<img".$matches[1]."src=".$matches[2].$this->CI->config->item("base_url")."cdms/".$this->moudle."/readpic/$image_id?pid=".$this->projectid.$matches[2].$matches[4]; }else{ return"<img".$matches[1]."src=".$matches[2].$this->CI->config->item("base_url")."cdms/".$this->moudle."/picfile/$image_id?pid=".$this->projectid.$matches[2].$matches[4]; } }else{ return"<img".$matches[1]."src=".$matches[2].$matches[3].$matches[2].$matches[4]; } } privatefunctionhtml_replace($matches){ if(count($matches)<4)return''; if(empty($matches[3]))return''; //如果href的链接($matches[3])以http或www或mailto开始,则不进行处理 //if(preg_match('/^[http|www|mailto](.+?)/i',$matches[3])) //return"<a".$matches[1]."href=".$matches[2].$matches[3].$matches[4]; $matches[3]=rtrim($matches[3],'\'"/'); //处理锚点 if(substr_count($matches[3],'#')>0) $matches[3]=substr($matches[3],0,strrpos($matches[3],'#')); //获取html的id $parent_dir_num=substr_count($matches[3],'../'); $relative_dirname=$this->relative_dirname; for($i=0;$i<$parent_dir_num;$i++){ $relative_dirname=substr($relative_dirname,0,strrpos($relative_dirname,"/")); } $relativepath=rtrim($relative_dirname,'/').'/'.ltrim($matches[3],'./'); $txtfile_id=$this->CI->model->get_id_by_path_and_project($relativepath,$this->projectid); //输出 if(!empty($txtfile_id)){ if($this->moudle=='dresult'){ return"<a".$matches[1]."href=".$matches[2].$this->CI->config->item("base_url")."cdms/".$this->moudle."/readfile/$txtfile_id?pid=".$this->projectid.$matches[2].$matches[4]; }else{ return"<a".$matches[1]."href=".$matches[2].$this->CI->config->item("base_url")."cdms/".$this->moudle."/txtfile/$txtfile_id?pid=".$this->projectid.$matches[2].$matches[4]; } }else{ return"<a".$matches[1]."href=".$matches[2].$matches[3].$matches[2].$matches[4]; } } privatefunctioniframe_replace($matches){ if(count($matches)<4)return''; if(empty($matches[3]))return''; $matches[3]=rtrim($matches[3],'\'"/'); //处理锚点 if(substr_count($matches[3],'#')>0) $matches[3]=substr($matches[3],0,strrpos($matches[3],'#')); //获取html的id $parent_dir_num=substr_count($matches[3],'../'); $relative_dirname=$this->relative_dirname; for($i=0;$i<$parent_dir_num;$i++){ $relative_dirname=substr($relative_dirname,0,strrpos($relative_dirname,"/")); } $relativepath=rtrim($relative_dirname,'/').'/'.ltrim($matches[3],'./'); $txtfile_id=$this->CI->model->get_id_by_path_and_project($relativepath,$this->projectid); //输出 if(!empty($txtfile_id)){ if($this->moudle=='dresult'){ return"<iframe".$matches[1]."src=".$matches[2].$this->CI->config->item("base_url")."cdms/".$this->moudle."/readfile/$txtfile_id?pid=".$this->projectid.$matches[2].$matches[4]; }else{ return"<iframe".$matches[1]."src=".$matches[2].$this->CI->config->item("base_url")."cdms/".$this->moudle."/txtfile/$txtfile_id?pid=".$this->projectid.$matches[2].$matches[4]; } }else{ return"<iframe".$matches[1]."src=".$matches[2].$matches[3].$matches[2].$matches[4]; } } privatefunctionframe_replace($matches){ if(count($matches)<4)return''; if(empty($matches[3]))return''; $matches[3]=rtrim($matches[3],'\'"/'); //处理锚点 if(substr_count($matches[3],'#')>0) $matches[3]=substr($matches[3],0,strrpos($matches[3],'#')); //获取html的id $parent_dir_num=substr_count($matches[3],'../'); $relative_dirname=$this->relative_dirname; for($i=0;$i<$parent_dir_num;$i++){ $relative_dirname=substr($relative_dirname,0,strrpos($relative_dirname,"/")); } $relativepath=rtrim($relative_dirname,'/').'/'.ltrim($matches[3],'./'); $txtfile_id=$this->CI->model->get_id_by_path_and_project($relativepath,$this->projectid); //输出 if(!empty($txtfile_id)){ if($this->moudle=='dresult'){ return"<frame".$matches[1]."src=".$matches[2].$this->CI->config->item("base_url")."cdms/".$this->moudle."/readfile/$txtfile_id?pid=".$this->projectid.$matches[2].$matches[4]; }else{ return"<frame".$matches[1]."src=".$matches[2].$this->CI->config->item("base_url")."cdms/".$this->moudle."/txtfile/$txtfile_id?pid=".$this->projectid.$matches[2].$matches[4]; } }else{ return"<frame".$matches[1]."src=".$matches[2].$matches[3].$matches[2].$matches[4]; } } privatefunctionjs_replace($matches){ if(count($matches)<4)return''; if(empty($matches[3]))return''; //处理链接 $arr_html=split(',',$matches[3]); $href=$arr_html[0]; $other=''; for($i=0;$i<count($arr_html);$i++) $other=$arr_html[$i].","; $other=rtrim($other,"\,"); $href=rtrim($href,'\'\"'); //处理锚点 if(substr_count($href,'#')>0) return"window.open".$matches[1].$matches[2].$matches[3].$matches[4];; //获取html的id $parent_dir_num=substr_count($href,'../'); $relative_dirname=$this->relative_dirname; for($i=0;$i<$parent_dir_num;$i++){ $relative_dirname=substr($relative_dirname,0,strrpos($relative_dirname,"/")); } $relativepath=rtrim($relative_dirname,'/').'/'.ltrim($href,'./'); $txtfile_id=$this->CI->model->get_id_by_path_and_project($relativepath,$this->projectid); //输出 if(!empty($txtfile_id)){ if($this->moudle=='dresult'){ return"window.open".$matches[1].$matches[2].$this->CI->config->item("base_url")."cdms/".$this->moudle."/readfile/$txtfile_id?pid=".$this->projectid.$matches[2].','.$other.$matches[4]; }else{ return"window.open".$matches[1].$matches[2].$this->CI->config->item("base_url")."cdms/".$this->moudle."/txtfile/$txtfile_id?pid=".$this->projectid.$matches[2].','.$other.$matches[4]; } }else{ return"window.open".$matches[1].$matches[2].$matches[3].$matches[4]; } } privatefunctioncss_replace($matches){ if(count($matches)<5)return''; if(empty($matches[4]))return''; $matches[4]=rtrim($matches[4],'\'"/'); //获取图片的id $parent_dir_num=substr_count($matches[4],'../'); $relative_dirname=$this->relative_dirname; for($i=0;$i<$parent_dir_num;$i++){ $relative_dirname=substr($relative_dirname,0,strrpos($relative_dirname,"/")); } $relativepath=rtrim($relative_dirname,'/').'/'.ltrim($matches[4],'./'); $image_id=$this->CI->model->get_id_by_path_and_project($relativepath,$this->projectid); //输出 if(!empty($image_id)){ if($this->moudle=='dresult'){ return"background".$matches[1]."url".$matches[2].$matches[3].$this->CI->config->item("base_url")."cdms/".$this->moudle."/readpic/$image_id?pid=".$this->projectid.$matches[3].$matches[5]; }else{ return"background".$matches[1]."url".$matches[2].$matches[3].$this->CI->config->item("base_url")."cdms/".$this->moudle."/picfile/$image_id?pid=".$this->projectid.$matches[3].$matches[5]; } }else{ return"background".$matches[1]."url".$matches[2].$matches[3].$matches[4].$matches[3].$matches[5]; } } } /*EndofMyreplace.php*/ /*Location:/application/libraries/Myreplace.php*/
PS:这里再为大家提供2款非常方便的正则表达式工具供大家参考使用:
JavaScript正则表达式在线测试工具:
http://tools.jb51.net/regex/javascript
正则表达式在线生成工具:
http://tools.jb51.net/regex/create_reg
希望本文所述对大家的php程序设计有所帮助。