Java使用poi将word转换为html
使用poi将word转换为html,支持doc,docx,转换后可以保持图片、样式。
1.导入Maven包
<dependency> <groupId>org.apache.poi</groupId> <artifactId>poi</artifactId> <version>3.14</version> </dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-scratchpad</artifactId> <version>3.14</version> </dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-ooxml</artifactId> <version>3.14</version> </dependency> <dependency> <groupId>fr.opensagres.xdocreport</groupId> <artifactId>xdocreport</artifactId> <version>1.0.6</version> </dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-ooxml-schemas</artifactId> <version>3.14</version> </dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>ooxml-schemas</artifactId> <version>1.3</version> </dependency>
2.转换代码
importorg.apache.poi.hwpf.HWPFDocument;
importorg.apache.poi.hwpf.converter.WordToHtmlConverter;
importorg.apache.poi.xwpf.converter.core.BasicURIResolver;
importorg.apache.poi.xwpf.converter.core.FileImageExtractor;
importorg.apache.poi.xwpf.converter.xhtml.XHTMLConverter;
importorg.apache.poi.xwpf.converter.xhtml.XHTMLOptions;
importorg.apache.poi.xwpf.usermodel.XWPFDocument;
importorg.w3c.dom.Document;
importjavax.xml.parsers.DocumentBuilderFactory;
importjavax.xml.transform.OutputKeys;
importjavax.xml.transform.Transformer;
importjavax.xml.transform.TransformerFactory;
importjavax.xml.transform.dom.DOMSource;
importjavax.xml.transform.stream.StreamResult;
importjava.io.File;
importjava.io.FileInputStream;
importjava.io.FileOutputStream;
importjava.io.OutputStreamWriter;
publicclassTest{
//doc转换为html
voiddocToHtml()throwsException{
StringsourceFileName="C:\\doc\\test.doc";
StringtargetFileName="C:\\html\\test.html";
StringimagePathStr="C:\\html\\image\\";
HWPFDocumentwordDocument=newHWPFDocument(newFileInputStream(sourceFileName));
Documentdocument=DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();
WordToHtmlConverterwordToHtmlConverter=newWordToHtmlConverter(document);
//保存图片,并返回图片的相对路径
wordToHtmlConverter.setPicturesManager((content,pictureType,name,width,height)->{
try(FileOutputStreamout=newFileOutputStream(imagePathStr+name)){
out.write(content);
}catch(Exceptione){
e.printStackTrace();
}
return"image/"+name;
});
wordToHtmlConverter.processDocument(wordDocument);
DocumenthtmlDocument=wordToHtmlConverter.getDocument();
DOMSourcedomSource=newDOMSource(htmlDocument);
StreamResultstreamResult=newStreamResult(newFile(targetFileName));
TransformerFactorytf=TransformerFactory.newInstance();
Transformerserializer=tf.newTransformer();
serializer.setOutputProperty(OutputKeys.ENCODING,"utf-8");
serializer.setOutputProperty(OutputKeys.INDENT,"yes");
serializer.setOutputProperty(OutputKeys.METHOD,"html");
serializer.transform(domSource,streamResult);
}
//docx转换为html
publicvoiddocxToHtml()throwsException{
StringsourceFileName="D:\\ac\\00.docx";
StringtargetFileName="D:\\ac\\test.html";
StringimagePathStr="D:\\ac\\image\\";
OutputStreamWriteroutputStreamWriter=null;
try{
XWPFDocumentdocument=newXWPFDocument(newFileInputStream(sourceFileName));
XHTMLOptionsoptions=XHTMLOptions.create();
//存放图片的文件夹
options.setExtractor(newFileImageExtractor(newFile(imagePathStr)));
//html中图片的路径
options.URIResolver(newBasicURIResolver("image"));
outputStreamWriter=newOutputStreamWriter(newFileOutputStream(targetFileName),"utf-8");
XHTMLConverterxhtmlConverter=(XHTMLConverter)XHTMLConverter.getInstance();
xhtmlConverter.convert(document,outputStreamWriter,options);
}finally{
if(outputStreamWriter!=null){
outputStreamWriter.close();
}
}
}
演示地址:https://www.xiaoyun.studio/app/preview.html
以上就是本文的全部内容,希望对大家的学习有所帮助,也希望大家多多支持毛票票。