Java使用poi将word转换为html
使用poi将word转换为html,支持doc,docx,转换后可以保持图片、样式。
1.导入Maven包
<dependency> <groupId>org.apache.poi</groupId> <artifactId>poi</artifactId> <version>3.14</version> </dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-scratchpad</artifactId> <version>3.14</version> </dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-ooxml</artifactId> <version>3.14</version> </dependency> <dependency> <groupId>fr.opensagres.xdocreport</groupId> <artifactId>xdocreport</artifactId> <version>1.0.6</version> </dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-ooxml-schemas</artifactId> <version>3.14</version> </dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>ooxml-schemas</artifactId> <version>1.3</version> </dependency>
2.转换代码
importorg.apache.poi.hwpf.HWPFDocument; importorg.apache.poi.hwpf.converter.WordToHtmlConverter; importorg.apache.poi.xwpf.converter.core.BasicURIResolver; importorg.apache.poi.xwpf.converter.core.FileImageExtractor; importorg.apache.poi.xwpf.converter.xhtml.XHTMLConverter; importorg.apache.poi.xwpf.converter.xhtml.XHTMLOptions; importorg.apache.poi.xwpf.usermodel.XWPFDocument; importorg.w3c.dom.Document; importjavax.xml.parsers.DocumentBuilderFactory; importjavax.xml.transform.OutputKeys; importjavax.xml.transform.Transformer; importjavax.xml.transform.TransformerFactory; importjavax.xml.transform.dom.DOMSource; importjavax.xml.transform.stream.StreamResult; importjava.io.File; importjava.io.FileInputStream; importjava.io.FileOutputStream; importjava.io.OutputStreamWriter; publicclassTest{ //doc转换为html voiddocToHtml()throwsException{ StringsourceFileName="C:\\doc\\test.doc"; StringtargetFileName="C:\\html\\test.html"; StringimagePathStr="C:\\html\\image\\"; HWPFDocumentwordDocument=newHWPFDocument(newFileInputStream(sourceFileName)); Documentdocument=DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument(); WordToHtmlConverterwordToHtmlConverter=newWordToHtmlConverter(document); //保存图片,并返回图片的相对路径 wordToHtmlConverter.setPicturesManager((content,pictureType,name,width,height)->{ try(FileOutputStreamout=newFileOutputStream(imagePathStr+name)){ out.write(content); }catch(Exceptione){ e.printStackTrace(); } return"image/"+name; }); wordToHtmlConverter.processDocument(wordDocument); DocumenthtmlDocument=wordToHtmlConverter.getDocument(); DOMSourcedomSource=newDOMSource(htmlDocument); StreamResultstreamResult=newStreamResult(newFile(targetFileName)); TransformerFactorytf=TransformerFactory.newInstance(); Transformerserializer=tf.newTransformer(); serializer.setOutputProperty(OutputKeys.ENCODING,"utf-8"); serializer.setOutputProperty(OutputKeys.INDENT,"yes"); serializer.setOutputProperty(OutputKeys.METHOD,"html"); serializer.transform(domSource,streamResult); } //docx转换为html publicvoiddocxToHtml()throwsException{ StringsourceFileName="D:\\ac\\00.docx"; StringtargetFileName="D:\\ac\\test.html"; StringimagePathStr="D:\\ac\\image\\"; OutputStreamWriteroutputStreamWriter=null; try{ XWPFDocumentdocument=newXWPFDocument(newFileInputStream(sourceFileName)); XHTMLOptionsoptions=XHTMLOptions.create(); //存放图片的文件夹 options.setExtractor(newFileImageExtractor(newFile(imagePathStr))); //html中图片的路径 options.URIResolver(newBasicURIResolver("image")); outputStreamWriter=newOutputStreamWriter(newFileOutputStream(targetFileName),"utf-8"); XHTMLConverterxhtmlConverter=(XHTMLConverter)XHTMLConverter.getInstance(); xhtmlConverter.convert(document,outputStreamWriter,options); }finally{ if(outputStreamWriter!=null){ outputStreamWriter.close(); } } }
演示地址:https://www.xiaoyun.studio/app/preview.html
以上就是本文的全部内容,希望对大家的学习有所帮助,也希望大家多多支持毛票票。