使用NOPI读取Word、Excel文档内容
使用NOPI读取Excel的例子很多,读取Word的例子不多。
Excel的解析方式有多中,可以使用ODBC查询,把Excel作为一个数据集对待。也可以使用文档结构模型的方式进行解析,即解析Workbook(工作簿)、Sheet、Row、Column。
Word的解析比较复杂,因为Word的文档结构模型定义较为复杂。解析Word或者Excel,关键是理解Word、Excel的文档对象模型。
Word、Excel文档对象模型的解析,可以通过COM接口调用,此类方式使用较广。(可以录制宏代码,然后替换为对应的语言)
也可以使用XML模型解析,尤其是对于2007、2010版本的文档的解析。
usingNPOI.POIFS.FileSystem; usingNPOI.SS.UserModel; usingNPOI.XSSF.UserModel; usingNPOI.XWPF.UserModel; usingSystem; usingSystem.Collections.Generic; usingSystem.Configuration; usingSystem.IO; usingSystem.Text; namespaceeyuan { publicstaticclassNOPIHandler { ////// /// ////// publicstaticList >>ReadExcel(stringfileName) { //打开Excel工作簿 XSSFWorkbookhssfworkbook=null; try { using(FileStreamfile=newFileStream(fileName,FileMode.Open,FileAccess.Read)) { hssfworkbook=newXSSFWorkbook(file); } } catch(Exceptione) { LogHandler.LogWrite(string.Format("文件{0}打开失败,错误:{1}",newstring[]{fileName,e.ToString()})); } //循环Sheet页 intsheetsCount=hssfworkbook.NumberOfSheets; List
>>workBookContent=newList
>>(); for(inti=0;i
>sheetContent=newList >(); introwCount=sheet.PhysicalNumberOfRows; for(intj=0;j
rowContent=newList (); intcellCount=row.PhysicalNumberOfCells; for(intk=0;k /// /// /// /// publicstaticstringReadExcelText(stringfileName) { stringExcelCellSeparator=ConfigurationManager.AppSettings["ExcelCellSeparator"]; stringExcelRowSeparator=ConfigurationManager.AppSettings["ExcelRowSeparator"]; stringExcelSheetSeparator=ConfigurationManager.AppSettings["ExcelSheetSeparator"]; // List >>excelContent=ReadExcel(fileName); stringfileText=string.Empty; StringBuildersbFileText=newStringBuilder(); //循环处理WorkBook中的各Sheet页 List
>>.EnumeratorenumeratorWorkBook=excelContent.GetEnumerator(); while(enumeratorWorkBook.MoveNext()) { //循环处理当期Sheet页中的各行 List
>.EnumeratorenumeratorSheet=enumeratorWorkBook.Current.GetEnumerator(); while(enumeratorSheet.MoveNext()) { string[]rowContent=enumeratorSheet.Current.ToArray(); sbFileText.Append(string.Join(ExcelCellSeparator,rowContent)); sbFileText.Append(ExcelRowSeparator); } sbFileText.Append(ExcelSheetSeparator); } // fileText=sbFileText.ToString(); returnfileText; } ///
///读取Word内容 /// ////// publicstaticstringReadWordText(stringfileName) { stringWordTableCellSeparator=ConfigurationManager.AppSettings["WordTableCellSeparator"]; stringWordTableRowSeparator=ConfigurationManager.AppSettings["WordTableRowSeparator"]; stringWordTableSeparator=ConfigurationManager.AppSettings["WordTableSeparator"]; // stringCaptureWordHeader=ConfigurationManager.AppSettings["CaptureWordHeader"]; stringCaptureWordFooter=ConfigurationManager.AppSettings["CaptureWordFooter"]; stringCaptureWordTable=ConfigurationManager.AppSettings["CaptureWordTable"]; stringCaptureWordImage=ConfigurationManager.AppSettings["CaptureWordImage"]; // stringCaptureWordImageFileName=ConfigurationManager.AppSettings["CaptureWordImageFileName"]; // stringfileText=string.Empty; StringBuildersbFileText=newStringBuilder(); #region打开文档 XWPFDocumentdocument=null; try { using(FileStreamfile=newFileStream(fileName,FileMode.Open,FileAccess.Read)) { document=newXWPFDocument(file); } } catch(Exceptione) { LogHandler.LogWrite(string.Format("文件{0}打开失败,错误:{1}",newstring[]{fileName,e.ToString()})); } #endregion #region页眉、页脚 //页眉 if(CaptureWordHeader=="true") { sbFileText.AppendLine("CaptureHeaderBegin"); foreach(XWPFHeaderxwpfHeaderindocument.HeaderList) { sbFileText.AppendLine(string.Format("{0}",newstring[]{xwpfHeader.Text})); } sbFileText.AppendLine("CaptureHeaderEnd"); } //页脚 if(CaptureWordFooter=="true") { sbFileText.AppendLine("CaptureFooterBegin"); foreach(XWPFFooterxwpfFooterindocument.FooterList) { sbFileText.AppendLine(string.Format("{0}",newstring[]{xwpfFooter.Text})); } sbFileText.AppendLine("CaptureFooterEnd"); } #endregion #region表格 if(CaptureWordTable=="true") { sbFileText.AppendLine("CaptureTableBegin"); foreach(XWPFTabletableindocument.Tables) { //循环表格行 foreach(XWPFTableRowrowintable.Rows) { foreach(XWPFTableCellcellinrow.GetTableCells()) { sbFileText.Append(cell.GetText()); // sbFileText.Append(WordTableCellSeparator); } sbFileText.Append(WordTableRowSeparator); } sbFileText.Append(WordTableSeparator); } sbFileText.AppendLine("CaptureTableEnd"); } #endregion #region图片 if(CaptureWordImage=="true") { sbFileText.AppendLine("CaptureImageBegin"); foreach(XWPFPictureDatapictureDataindocument.AllPictures) { stringpicExtName=pictureData.suggestFileExtension(); stringpicFileName=pictureData.GetFileName(); byte[]picFileContent=pictureData.GetData(); // stringpicTempName=string.Format(CaptureWordImageFileName,newstring[]{Guid.NewGuid().ToString()+"_"+picFileName+"."+picExtName}); // using(FileStreamfs=newFileStream(picTempName,FileMode.Create,FileAccess.Write)) { fs.Write(picFileContent,0,picFileContent.Length); fs.Close(); } // sbFileText.AppendLine(picTempName); } sbFileText.AppendLine("CaptureImageEnd"); } #endregion //正文段落 sbFileText.AppendLine("CaptureParagraphBegin"); foreach(XWPFParagraphparagraphindocument.Paragraphs) { sbFileText.AppendLine(paragraph.ParagraphText); } sbFileText.AppendLine("CaptureParagraphEnd"); // // fileText=sbFileText.ToString(); returnfileText; } } }
以上就是本文的全部内容,希望对大家的学习有所帮助,也希望大家多多支持毛票票。