Java实现Word/Pdf/TXT转html的示例

2023-07-12 16:28:03 423

引言:

最近公司在做一个教育培训学习及在线考试的项目,本人主要从事网络课程模块,主要做课程分类,课程,课件的创建及在线学习和统计的功能,因为课件涉及到多种类型,像视频,音频,图文,外部链接及文档类型.其中就涉及到一个问题,就是文档型课件课程在网页上的展示和学习问题,因为要在线统计学习的课程,学习的人员,学习的时长,所以不能像传统做法将文档下载到本地学习,那样就不受系统控制了,所以最终的方案是,在上传文档型课件的时候,将其文件对应的转换成HTML文件,以便在网页上能够浏览学习

下边主要针对word,pdf和txt文本文件进行转换

一:Java实现将word转换为html

1:引入依赖


fr.opensagres.xdocreport
fr.opensagres.xdocreport.document
1.0.5


fr.opensagres.xdocreport
org.apache.poi.xwpf.converter.xhtml
1.0.5


org.apache.poi
poi
3.12


org.apache.poi
poi-scratchpad
3.12

2:代码demo

packagecom.svse.controller;

importjavax.xml.parsers.DocumentBuilderFactory;
importjavax.xml.parsers.ParserConfigurationException;
importjavax.xml.transform.OutputKeys;
importjavax.xml.transform.Transformer;
importjavax.xml.transform.TransformerException;
importjavax.xml.transform.TransformerFactory;
importjavax.xml.transform.dom.DOMSource;
importjavax.xml.transform.stream.StreamResult;

importorg.apache.poi.hwpf.HWPFDocument;
importorg.apache.poi.hwpf.converter.PicturesManager;
importorg.apache.poi.hwpf.converter.WordToHtmlConverter;
importorg.apache.poi.hwpf.usermodel.PictureType;
importorg.apache.poi.xwpf.converter.core.BasicURIResolver;
importorg.apache.poi.xwpf.converter.core.FileImageExtractor;
importorg.apache.poi.xwpf.converter.core.FileURIResolver;
importorg.apache.poi.xwpf.converter.core.IURIResolver;
importorg.apache.poi.xwpf.converter.core.IXWPFConverter;
importorg.apache.poi.xwpf.converter.xhtml.XHTMLConverter;
importorg.apache.poi.xwpf.converter.xhtml.XHTMLOptions;
importorg.apache.poi.xwpf.usermodel.XWPFDocument;
/**
*word转换成html
*/
publicclassTestWordToHtml{

publicstaticfinalStringSTORAGEPATH="C://works//files//";
publicstaticfinalStringIP="192.168.30.222";
publicstaticfinalStringPORT="8010";
publicstaticvoidmain(String[]args)throwsIOException,TransformerException,ParserConfigurationException{
TestWordToHtmlwt=newTestWordToHtml();
//wt.Word2003ToHtml("甲骨文考证.doc");
wt.Word2007ToHtml("甲骨文考证.docx");

}

/**
*2003版本word转换成html
*@throwsIOException
*@throwsTransformerException
*@throwsParserConfigurationException
*/
publicvoidWord2003ToHtml(StringfileName)throwsIOException,TransformerException,ParserConfigurationException{

finalStringimagepath=STORAGEPATH+"fileImage/";//解析时候如果doc文件中有图片图片会保存在此路径
finalStringstrRanString=getRandomNum();
Stringfilepath=STORAGEPATH;
StringhtmlName=fileName.substring(0,fileName.indexOf("."))+"2003.html";
finalStringfile=filepath+fileName;
InputStreaminput=newFileInputStream(newFile(file));
HWPFDocumentwordDocument=newHWPFDocument(input);
WordToHtmlConverterwordToHtmlConverter=newWordToHtmlConverter(DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());
//设置图片存放的位置
wordToHtmlConverter.setPicturesManager(newPicturesManager(){
publicStringsavePicture(byte[]content,PictureTypepictureType,StringsuggestedName,floatwidthInches,floatheightInches){
FileimgPath=newFile(imagepath);
if(!imgPath.exists()){//图片目录不存在则创建
imgPath.mkdirs();
}

Filefile=newFile(imagepath+strRanString+suggestedName);
try{
OutputStreamos=newFileOutputStream(file);
os.write(content);
os.close();
}catch(FileNotFoundExceptione){
e.printStackTrace();
}catch(IOExceptione){
e.printStackTrace();
}

return"http://"+IP+":"+PORT+"//uploadFile/fileImage/"+strRanString+suggestedName;
//returnimagepath+strRanString+suggestedName;
}
});

//解析word文档
wordToHtmlConverter.processDocument(wordDocument);
DocumenthtmlDocument=wordToHtmlConverter.getDocument();

FilehtmlFile=newFile(filepath+strRanString+htmlName);
OutputStreamoutStream=newFileOutputStream(htmlFile);


DOMSourcedomSource=newDOMSource(htmlDocument);
StreamResultstreamResult=newStreamResult(outStream);

TransformerFactoryfactory=TransformerFactory.newInstance();
Transformerserializer=factory.newTransformer();
serializer.setOutputProperty(OutputKeys.ENCODING,"utf-8");
serializer.setOutputProperty(OutputKeys.INDENT,"yes");
serializer.setOutputProperty(OutputKeys.METHOD,"html");

serializer.transform(domSource,streamResult);
outStream.close();

System.out.println("生成html文件路径:"+"http://"+IP+":"+PORT+"//uploadFile/"+strRanString+htmlName);
}

/**
*2007版本word转换成html
*@throwsIOException
*/
publicvoidWord2007ToHtml(StringfileName)throwsIOException{

finalStringstrRanString=getRandomNum();

Stringfilepath=STORAGEPATH+strRanString;
StringhtmlName=fileName.substring(0,fileName.indexOf("."))+"2007.html";
Filef=newFile(STORAGEPATH+fileName);
if(!f.exists()){
System.out.println("SorryFiledoesnotExists!");
}else{
if(f.getName().endsWith(".docx")||f.getName().endsWith(".DOCX")){
try{
//1)加载word文档生成XWPFDocument对象
InputStreamin=newFileInputStream(f);
XWPFDocumentdocument=newXWPFDocument(in);

//2)解析XHTML配置(这里设置IURIResolver来设置图片存放的目录)
FileimageFolderFile=newFile(filepath);
XHTMLOptionsoptions=XHTMLOptions.create().URIResolver(newFileURIResolver(imageFolderFile));
options.setExtractor(newFileImageExtractor(imageFolderFile));
options.URIResolver(newIURIResolver(){
publicStringresolve(Stringuri){
//http://192.168.30.222:8010//uploadFile/....
return"http://"+IP+":"+PORT+"//uploadFile/"+strRanString+"/"+uri;
}
});

options.setIgnoreStylesIfUnused(false);
options.setFragment(true);

//3)将XWPFDocument转换成XHTML
OutputStreamout=newFileOutputStream(newFile(filepath+htmlName));
IXWPFConverterconverter=XHTMLConverter.getInstance();
converter.convert(document,out,options);
//XHTMLConverter.getInstance().convert(document,out,options);
System.out.println("html路径:"+"http://"+IP+":"+PORT+"//uploadFile/"+strRanString+htmlName);
}catch(Exceptione){
e.printStackTrace();
}

}else{
System.out.println("EnteronlyMSOffice2007+files");
}
}
}

/**
*功能说明:生成时间戳
*创建人:zsq
*创建时间:2019年12月7日下午2:37:09
*
*/
publicstaticStringgetRandomNum(){
Datedt=newDate();
SimpleDateFormatsdf=newSimpleDateFormat("yyyyMMddHHmmss");
Stringstr=sdf.format(dt);
returnstr;
}

}

二:Java实现将Pdf转换为html

1:引入依赖


net.sf.cssbox
pdf2dom
1.7


org.apache.pdfbox
pdfbox
2.0.12


org.apache.pdfbox
pdfbox-tools
2.0.12

2:代码Demo

publicclassPdfToHtml{

/*
pdf转换html
*/
publicvoidpdfToHtmlTest(StringinPdfPath,StringoutputHtmlPath){
//StringoutputPath="C:\\works\\files\\ZSQ保密知识测试题库.html";
//try()写在()里面会自动关闭流
try{
BufferedWriterout=newBufferedWriter(newOutputStreamWriter(newFileOutputStream(newFile(outputHtmlPath)),"utf-8"));
//加载PDF文档
//PDDocumentdocument=PDDocument.load(bytes);
PDDocumentdocument=PDDocument.load(newFile(inPdfPath));
PDFDomTreepdfDomTree=newPDFDomTree();
pdfDomTree.writeText(document,out);
}catch(Exceptione){
e.printStackTrace();
}
}

publicstaticvoidmain(String[]args)throwsIOException{
PdfToHtmlph=newPdfToHtml();
StringpdfPath="C:\\works\\files\\武研中心行政考勤制度.pdf";
StringoutputPath="C:\\works\\files\\武研中心行政考勤制度.html";
ph.pdfToHtmlTest(pdfPath,outputPath);
}

}

三:Java实现将TXT转换为html

/*
*txt文档转html
filePath:txt原文件路径
htmlPosition:转化后生成的html路径
*/
publicstaticvoidtxtToHtml(StringfilePath,StringhtmlPosition){
try{
//Stringencoding="GBK";
Filefile=newFile(filePath);
if(file.isFile()&&file.exists()){//判断文件是否存在
InputStreamReaderread=newInputStreamReader(newFileInputStream(file),"GBK");
//考虑到编码格式
BufferedReaderbufferedReader=newBufferedReader(read);
//写文件
FileOutputStreamfos=newFileOutputStream(newFile(htmlPosition));
OutputStreamWriterosw=newOutputStreamWriter(fos,"GBK");
BufferedWriterbw=newBufferedWriter(osw);
StringlineTxt=null;
while((lineTxt=bufferedReader.readLine())!=null){
bw.write("   "+lineTxt+"
");
}
bw.close();
osw.close();
fos.close();
read.close();
}else{
System.out.println("找不到指定的文件");
}
}catch(Exceptione){
System.out.println("读取文件内容出错");
e.printStackTrace();
}
}

以上就是Java实现Word/Pdf/TXT转html的示例的详细内容，更多关于JavaWord/Pdf/TXT转html的资料请关注毛票票其它相关文章！

Java实现Word/Pdf/TXT转html的示例

热门推荐

随机推荐