Python实现批量将word转html并将html内容发布至网站的方法
本文实例讲述了Python实现批量将word转html并将html内容发布至网站的方法。分享给大家供大家参考。具体实现方法如下:
#coding=utf-8 __author__='zhm' fromwin32comimportclientaswc importos importtime importrandom importMySQLdb importre defwordsToHtml(dir): #批量把文件夹的word文档转换成html文件 #金山WPS调用,抢先版的用KWPS,正式版WPS word=wc.Dispatch('KWPS.Application') forpath,subdirs,filesinos.walk(dir): forwordFileinfiles: wordFullName=os.path.join(path,wordFile) #print"word:"+wordFullName doc=word.Documents.Open(wordFullName) wordFile2=unicode(wordFile,"gbk") dotIndex=wordFile2.rfind(".") if(dotIndex==-1): print'********************ERROR:未取得后缀名!' fileSuffix=wordFile2[(dotIndex+1):] if(fileSuffix=="doc"orfileSuffix=="docx"): fileName=wordFile2[:dotIndex] htmlName=fileName+".html" htmlFullName=os.path.join(unicode(path,"gbk"),htmlName) #htmlFullName=unicode(path,"gbk")+"\\"+htmlName printu'生成了html文件:'+htmlFullName doc.SaveAs(htmlFullName,8) doc.Close() word.Quit() print"" print"Finished!" defhtml_add_to_db(dir): #将转换成功的html文件批量插入数据库中。 conn=MySQLdb.connect( host='localhost', port=3306, user='root', passwd='root', db='test', charset='utf8' ) cur=conn.cursor() forpath,subdirs,filesinos.walk(dir): forhtmlFileinfiles: htmlFullName=os.path.join(path,htmlFile) title=os.path.splitext(htmlFile)[0] targetDir='D:/files/htmls/' #D:/files为web服务器配置的静态目录 sconds=time.time() msconds=sconds*1000 targetFile=os.path.join(targetDir,str(int(msconds))+str(random.randint(100,10000))+'.html') htmlFile2=unicode(htmlFile,"gbk") dotIndex=htmlFile2.rfind(".") if(dotIndex==-1): print'********************ERROR:未取得后缀名!' fileSuffix=htmlFile2[(dotIndex+1):] if(fileSuffix=="htm"orfileSuffix=="html"): ifnotos.path.exists(targetDir): os.makedirs(targetDir) htmlFullName=os.path.join(unicode(path,"gbk"),htmlFullName) htFile=open(htmlFullName,'rb') #获取网页内容 htmStrCotent=htFile.read() #找出里面的图片 img=re.compile(r"""<img\s.*?\s?src\s*=\s*['|"]?([^\s'"]+).*?>""",re.I) m=img.findall(htmStrCotent) fortagContentinm: imgSrc=unicode(tagContent,"gbk") imgSrcFullName=os.path.join(path,imgSrc) #上传图片 imgTarget='D:/files/images/whzx/' img_sconds=time.time() img_msconds=sconds*1000 targetImgFile=os.path.join(imgTarget,str(int(img_msconds))+str(random.randint(100,10000))+'.png') ifnotos.path.exists(imgTarget): os.makedirs(imgTarget) ifnotos.path.exists(targetImgFile)or(os.path.exists(targetImgFile)and(os.path.getsize(targetImgFile)!=os.path.getsize(imgSrcFullName))): tmpImgFile=open(imgSrcFullName,'rb') tmpWriteImgFile=open(targetImgFile,"wb") tmpWriteImgFile.write(tmpImgFile.read()) tmpImgFile.close() tmpWriteImgFile.close() htmStrCotent=htmStrCotent.replace(tagContent,targetImgFile.split(":")[1]) ifnotos.path.exists(targetFile)or(os.path.exists(targetFile)and(os.path.getsize(targetFile)!=os.path.getsize(htmlFullName))): #用iframe包装转换好的html文件。 iframeHtml=''' <scripttype="text/javascript"language="javascript"> functioniFrameHeight(){ varifm=document.getElementById("iframepage"); varsubWeb=document.frames?document.frames["iframepage"].document:ifm.contentDocument; if(ifm!=null&&subWeb!=null){ ifm.height=subWeb.body.scrollHeight; } } </script> <iframesrc='''+targetFile.split(':')[1]+''' marginheight="0"marginwidth="0"frameborder="0"scrolling="no"width="765"height=100%id="iframepage"name="iframepage"onLoad="iFrameHeight()"></iframe> ''' tmpTargetFile=open(targetFile,"wb") tmpTargetFile.write(htmStrCotent) tmpTargetFile.close() htFile.close() try: #执行 sql="insertintocommon_article(title,content)values(%s,%s)" param=(unicode(title,"gbk"),iframeHtml) cur.execute(sql,param) except: print"Error:unabletoinsertdata" cur.close() conn.commit() #关闭数据库连接 conn.close() if__name__=='__main__': wordsToHtml('d:/word') html_add_to_db('d:/word')
希望本文所述对大家的Python程序设计有所帮助。