使用NOPI读取Word、Excel文档内容
使用NOPI读取Excel的例子很多,读取Word的例子不多。
Excel的解析方式有多中,可以使用ODBC查询,把Excel作为一个数据集对待。也可以使用文档结构模型的方式进行解析,即解析Workbook(工作簿)、Sheet、Row、Column。
Word的解析比较复杂,因为Word的文档结构模型定义较为复杂。解析Word或者Excel,关键是理解Word、Excel的文档对象模型。
Word、Excel文档对象模型的解析,可以通过COM接口调用,此类方式使用较广。(可以录制宏代码,然后替换为对应的语言)
也可以使用XML模型解析,尤其是对于2007、2010版本的文档的解析。
usingNPOI.POIFS.FileSystem;
usingNPOI.SS.UserModel;
usingNPOI.XSSF.UserModel;
usingNPOI.XWPF.UserModel;
usingSystem;
usingSystem.Collections.Generic;
usingSystem.Configuration;
usingSystem.IO;
usingSystem.Text;
namespaceeyuan
{
publicstaticclassNOPIHandler
{
///
///
///
///
///
publicstaticList>>ReadExcel(stringfileName)
{
//打开Excel工作簿
XSSFWorkbookhssfworkbook=null;
try
{
using(FileStreamfile=newFileStream(fileName,FileMode.Open,FileAccess.Read))
{
hssfworkbook=newXSSFWorkbook(file);
}
}
catch(Exceptione)
{
LogHandler.LogWrite(string.Format("文件{0}打开失败,错误:{1}",newstring[]{fileName,e.ToString()}));
}
//循环Sheet页
intsheetsCount=hssfworkbook.NumberOfSheets;
List>>workBookContent=newList>>();
for(inti=0;i>sheetContent=newList>();
introwCount=sheet.PhysicalNumberOfRows;
for(intj=0;jrowContent=newList();
intcellCount=row.PhysicalNumberOfCells;
for(intk=0;k
///
///
///
///
publicstaticstringReadExcelText(stringfileName)
{
stringExcelCellSeparator=ConfigurationManager.AppSettings["ExcelCellSeparator"];
stringExcelRowSeparator=ConfigurationManager.AppSettings["ExcelRowSeparator"];
stringExcelSheetSeparator=ConfigurationManager.AppSettings["ExcelSheetSeparator"];
//
List>>excelContent=ReadExcel(fileName);
stringfileText=string.Empty;
StringBuildersbFileText=newStringBuilder();
//循环处理WorkBook中的各Sheet页
List>>.EnumeratorenumeratorWorkBook=excelContent.GetEnumerator();
while(enumeratorWorkBook.MoveNext())
{
//循环处理当期Sheet页中的各行
List>.EnumeratorenumeratorSheet=enumeratorWorkBook.Current.GetEnumerator();
while(enumeratorSheet.MoveNext())
{
string[]rowContent=enumeratorSheet.Current.ToArray();
sbFileText.Append(string.Join(ExcelCellSeparator,rowContent));
sbFileText.Append(ExcelRowSeparator);
}
sbFileText.Append(ExcelSheetSeparator);
}
//
fileText=sbFileText.ToString();
returnfileText;
}
///
///读取Word内容
///
///
///
publicstaticstringReadWordText(stringfileName)
{
stringWordTableCellSeparator=ConfigurationManager.AppSettings["WordTableCellSeparator"];
stringWordTableRowSeparator=ConfigurationManager.AppSettings["WordTableRowSeparator"];
stringWordTableSeparator=ConfigurationManager.AppSettings["WordTableSeparator"];
//
stringCaptureWordHeader=ConfigurationManager.AppSettings["CaptureWordHeader"];
stringCaptureWordFooter=ConfigurationManager.AppSettings["CaptureWordFooter"];
stringCaptureWordTable=ConfigurationManager.AppSettings["CaptureWordTable"];
stringCaptureWordImage=ConfigurationManager.AppSettings["CaptureWordImage"];
//
stringCaptureWordImageFileName=ConfigurationManager.AppSettings["CaptureWordImageFileName"];
//
stringfileText=string.Empty;
StringBuildersbFileText=newStringBuilder();
#region打开文档
XWPFDocumentdocument=null;
try
{
using(FileStreamfile=newFileStream(fileName,FileMode.Open,FileAccess.Read))
{
document=newXWPFDocument(file);
}
}
catch(Exceptione)
{
LogHandler.LogWrite(string.Format("文件{0}打开失败,错误:{1}",newstring[]{fileName,e.ToString()}));
}
#endregion
#region页眉、页脚
//页眉
if(CaptureWordHeader=="true")
{
sbFileText.AppendLine("CaptureHeaderBegin");
foreach(XWPFHeaderxwpfHeaderindocument.HeaderList)
{
sbFileText.AppendLine(string.Format("{0}",newstring[]{xwpfHeader.Text}));
}
sbFileText.AppendLine("CaptureHeaderEnd");
}
//页脚
if(CaptureWordFooter=="true")
{
sbFileText.AppendLine("CaptureFooterBegin");
foreach(XWPFFooterxwpfFooterindocument.FooterList)
{
sbFileText.AppendLine(string.Format("{0}",newstring[]{xwpfFooter.Text}));
}
sbFileText.AppendLine("CaptureFooterEnd");
}
#endregion
#region表格
if(CaptureWordTable=="true")
{
sbFileText.AppendLine("CaptureTableBegin");
foreach(XWPFTabletableindocument.Tables)
{
//循环表格行
foreach(XWPFTableRowrowintable.Rows)
{
foreach(XWPFTableCellcellinrow.GetTableCells())
{
sbFileText.Append(cell.GetText());
//
sbFileText.Append(WordTableCellSeparator);
}
sbFileText.Append(WordTableRowSeparator);
}
sbFileText.Append(WordTableSeparator);
}
sbFileText.AppendLine("CaptureTableEnd");
}
#endregion
#region图片
if(CaptureWordImage=="true")
{
sbFileText.AppendLine("CaptureImageBegin");
foreach(XWPFPictureDatapictureDataindocument.AllPictures)
{
stringpicExtName=pictureData.suggestFileExtension();
stringpicFileName=pictureData.GetFileName();
byte[]picFileContent=pictureData.GetData();
//
stringpicTempName=string.Format(CaptureWordImageFileName,newstring[]{Guid.NewGuid().ToString()+"_"+picFileName+"."+picExtName});
//
using(FileStreamfs=newFileStream(picTempName,FileMode.Create,FileAccess.Write))
{
fs.Write(picFileContent,0,picFileContent.Length);
fs.Close();
}
//
sbFileText.AppendLine(picTempName);
}
sbFileText.AppendLine("CaptureImageEnd");
}
#endregion
//正文段落
sbFileText.AppendLine("CaptureParagraphBegin");
foreach(XWPFParagraphparagraphindocument.Paragraphs)
{
sbFileText.AppendLine(paragraph.ParagraphText);
}
sbFileText.AppendLine("CaptureParagraphEnd");
//
//
fileText=sbFileText.ToString();
returnfileText;
}
}
}
以上就是本文的全部内容,希望对大家的学习有所帮助,也希望大家多多支持毛票票。