php解析mht文件转换成html的实例
php解析mht文件,使用编辑器打开可以看到base64编码所以,mht是可以转换成html的。
set_file($filename);
$o_mhtml->extract();
return$o_mhtml->get_part_to_file(0);
}
returnnull;
}
functionmhtmlParseAll($filename){
if(file_exists($filename)){
if(is_dir($filename))returnfalse;
$filename=strtolower($filename);
if(strpos($filename,'.mht',1)==FALSE)returnfalse;
$o_mhtml=newmhtml();
$o_mhtml->set_file($filename);
$o_mhtml->extract();
return$o_mhtml->get_all_part_file();
}
returnnull;
}
*/
classmhtparse{
var$file='';
var$boundary='';
var$filedata='';
var$countparts=1;
var$log='';
functionextract(){
$this->read_filedata();
$this->file_parts();
return1;
}
functionset_file($p){
$this->file=$p;
}
functionget_log(){
return$this->log;
}
functionfile_parts(){
$lines=explode("\n",substr($this->filedata,0,8192));
foreach($linesas$line){
$line=trim($line);
if(strpos($line,'=')!==FALSE){
if(strpos($line,'boundary',0)!==FALSE){
$range=$this->getrange($line,'"','"',0);
$this->boundary="--".$range['range'];
$this->filedata=str_replace($line,'',$this->filedata);
break;
}
}
}
if($this->boundary!=''){
$this->filedata=explode($this->boundary,$this->filedata);
unset($this->filedata[0]);
$this->filedata=array_values($this->filedata);
$this->countparts=count($this->filedata);
}else{
$tmp=$this->filedata;
$this->filedata=array(
$tmp
);
}
}
functionget_all_part_file(){
return$this->filedata;
}
functionget_part_to_file($i){
$line_data_start=0;
$encoding='';
$part_lines=explode("\n",ltrim($this->filedata[$i]));
foreach($part_linesas$line_id=>$line){
$line=trim($line);
if($line==''){
if(trim($part_lines[0])=='--')
return1;
$line_data_start=$line_id;
break;
}
if(strpos($line,':')!==FALSE){
$pos=strpos($line,':');
$k=strtolower(trim(substr($line,0,$pos)));
$v=trim(substr($line,$pos+1,strlen($line)));
if($k=='content-transfer-encoding'){
$encoding=$v;
}
if($k=='content-location'){
$location=$v;
}
if($k=='content-type'){
$contenttype=$v;
}
}
}
foreach($part_linesas$line_id=>$line){
if($line_id<=$line_data_start)
$part_lines[$line_id]='';
}
$part_lines=implode('',$part_lines);
if($encoding=='base64')
$part_lines=base64_decode($part_lines);
elseif($encoding=='quoted-printable')
$part_lines=imap_qprint($part_lines);
return$part_lines;
}
functionread_filedata(){
$handle=fopen($this->file,'r');
$this->filedata=fread($handle,filesize($this->file));
fclose($handle);
}
functiongetrange(&$subject,$Beginmark_str='{',$Endmark_str='}',$Start_pos=0){
/*
*$str="sssss{x{xx}{xx{xx}x}x}sssss";$range=string::getRange($str,'{','}',0);echo$range['range'];//tulem:"x{xx}{xx{xx}x}x"echo$range['behin'];//tulem:6echo$range['end'];//tulem:30('')--l5pumärgistjärgnevout:array('range'=>$Range,'begin'=>$Begin_firstOccurence_pos,'end'=>$End_sequel_pos)|falsev1.12004-2006,Uku-KaarelJ5esaar,ukjoesaar@hot.ee,http://www.hot.ee/ukjoesaar,+3725110693
*/
if(empty($Beginmark_str))
$Beginmark_str='{';
$Beginmark_str_len=strlen($Beginmark_str);
if(empty($Endmark_str))
$Endmark_str='}';
$Endmark_str_len=strlen($Endmark_str);
/*$Start_pos_cache=0;*/
do{
/*!algus*/
if(!is_int($Begin_firstOccurence_pos))
$Start_pos_cache=$Start_pos;
/*?algus-test*/
$Start_pos_cache=@strpos($subject,$Beginmark_str,$Start_pos_cache);
/*thisispossiblestartforrange*/
if(is_int($Start_pos_cache)){
/*skip*/
$Start_pos_cache=($Start_pos_cache+$Beginmark_str_len);
/*testpossiblerangestartpos*/
if(is_int($Begin_firstOccurence_pos)){
if($Start_pos_cache<$range_end_pos)
$rangeClean=0;
elseif($Start_pos_cache>$range_end_pos)
$rangeClean=1;
}
/*hereitis*/
if(!is_int($Begin_firstOccurence_pos))
$Begin_firstOccurence_pos=$Start_pos_cache;
}/*VIGANR0ALGUSTEIOLE*/
if(!is_int($Start_pos_cache)){
/*!algus*/
/*VIGANR1ALGUSMARKIEILEITUD:VIIMANEVOIMALIKALGUS*/
if(is_int($Begin_firstOccurence_pos)and($Start_pos_cache<$range_end_pos))
$rangeClean=1;
else
returnfalse;
}
if(is_int($Begin_firstOccurence_pos)and($rangeClean!=1)){
if(!is_int($End_pos_cache))
$End_sequel_pos=$Begin_firstOccurence_pos;
$End_pos_cache=strpos($subject,$Endmark_str,$End_sequel_pos);
/*ok*/
if(is_int($End_pos_cache)and($rangeClean!=1)){
$range_current_lenght=($End_pos_cache-$Begin_firstOccurence_pos);
$End_sequel_pos=($End_pos_cache+$Endmark_str_len);
$range_end_pos=$End_pos_cache;
}
/*VIGANR2LOPPUEILEITUD*/
if(!is_int($End_pos_cache))
if($End_pos_cache==false)
returnfalse;
}
}while($rangeClean<1);
if(is_int($Begin_firstOccurence_pos)andis_int($range_current_lenght))
$Range=substr($subject,$Begin_firstOccurence_pos,$range_current_lenght);
else
returnfalse;
returnarray(
'range'=>$Range,
'begin'=>$Begin_firstOccurence_pos,
'end'=>$End_sequel_pos
);
}//endgetrange()
}//class
?>
以上这篇php解析mht文件转换成html的实例就是小编分享给大家的全部内容了,希望能给大家一个参考,也希望大家多多支持毛票票。