Java实现excel大数据量导入
本文实例为大家分享了Java实现excel大数据量导入的具体代码,供大家参考,具体内容如下
情景分析:
通常我们通过poi读取excel文件时,若在用户模式下,由于数据量较大、Sheet较多,很容易出现内存溢出的情况
用户模式读取excel的典型代码如下:
FileInputStreamfile=newFileInputStream("c:\\test.xlsx");
Workbookwb=newXSSFWorkbook(file);
而03版(xls)excel文件每个sheet最大长度为65536行,07版(xlsx)excel文件每个sheet最大长度为1048576行,因此我们主要解决07版(xlsx)excel大数据量导入出现内存溢出的问题
由于读取cvs文件效率高且占用内存少,所以我们采用xlsx转换为cvs的方式来解决
读取xlsx格式数据:
importjava.io.File;
importjava.io.FileOutputStream;
importjava.io.IOException;
importjava.io.InputStream;
importjava.io.OutputStream;
importjava.io.PrintStream;
importjava.text.SimpleDateFormat;
importjava.util.ArrayList;
importjava.util.Date;
importjava.util.HashMap;
importjava.util.List;
importjava.util.Map;
importjavax.xml.parsers.ParserConfigurationException;
importjavax.xml.parsers.SAXParser;
importjavax.xml.parsers.SAXParserFactory;
importorg.apache.poi.hssf.usermodel.HSSFDateUtil;
importorg.apache.poi.openxml4j.exceptions.OpenXML4JException;
importorg.apache.poi.openxml4j.opc.OPCPackage;
importorg.apache.poi.openxml4j.opc.PackageAccess;
importorg.apache.poi.ss.usermodel.BuiltinFormats;
importorg.apache.poi.ss.usermodel.DataFormatter;
importorg.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable;
importorg.apache.poi.xssf.eventusermodel.XSSFReader;
importorg.apache.poi.xssf.model.StylesTable;
importorg.apache.poi.xssf.usermodel.XSSFCellStyle;
importorg.apache.poi.xssf.usermodel.XSSFRichTextString;
importorg.xml.sax.Attributes;
importorg.xml.sax.InputSource;
importorg.xml.sax.SAXException;
importorg.xml.sax.XMLReader;
importorg.xml.sax.helpers.DefaultHandler;
publicclassXLSX2CSV{
enumxssfDataType{
BOOL,ERROR,FORMULA,INLINESTR,SSTINDEX,NUMBER,
}
/**
*使用xssf_sax_API处理Excel,请参考:http://poi.apache.org/spreadsheet/how-to.html#xssf_sax_api
*/
classMyXSSFSheetHandlerextendsDefaultHandler{
privateStylesTablestylesTable;
privateReadOnlySharedStringsTablesharedStringsTable;
privatefinalPrintStreamoutput;
privatefinalintminColumnCount;
privatebooleanvIsOpen;
privatexssfDataTypenextDataType;
privateshortformatIndex;
privateStringformatString;
privatefinalDataFormatterformatter;
privateintthisColumn=-1;
privateintlastColumnNumber=-1;
privateStringBuffervalue;
privateString[]record;
privateListrows=newArrayList();
privatebooleanisCellNull=false;
publicMyXSSFSheetHandler(StylesTablestyles,ReadOnlySharedStringsTablestrings,intcols,PrintStreamtarget){
this.stylesTable=styles;
this.sharedStringsTable=strings;
this.minColumnCount=cols;
this.output=target;
this.value=newStringBuffer();
this.nextDataType=xssfDataType.NUMBER;
this.formatter=newDataFormatter();
record=newString[this.minColumnCount];
rows.clear();//每次读取都清空行集合
}
publicvoidstartElement(Stringuri,StringlocalName,Stringname,Attributesattributes)throwsSAXException{
if("inlineStr".equals(name)||"v".equals(name)){
vIsOpen=true;
//Clearcontentscache
value.setLength(0);
}
//c=>cell
elseif("c".equals(name)){
//Getthecellreference
Stringr=attributes.getValue("r");
intfirstDigit=-1;
for(intc=0;ccontentsofacell
if("v".equals(name)){
//Processthevaluecontentsasrequired.
//Donow,ascharacters()maybecalledmorethanonce
switch(nextDataType){
caseBOOL:
charfirst=value.charAt(0);
thisStr=first=='0'?"FALSE":"TRUE";
break;
caseERROR:
thisStr="\"ERROR:"+value.toString()+'"';
break;
caseFORMULA:
//Aformulacouldresultinastringvalue,
//soalwaysadddouble-quotecharacters.
thisStr='"'+value.toString()+'"';
break;
caseINLINESTR:
//TODO:haveseenanexampleofthis,soit'suntested.
XSSFRichTextStringrtsi=newXSSFRichTextString(value.toString());
thisStr='"'+rtsi.toString()+'"';
break;
caseSSTINDEX:
StringsstIndex=value.toString();
try{
intidx=Integer.parseInt(sstIndex);
XSSFRichTextStringrtss=newXSSFRichTextString(sharedStringsTable.getEntryAt(idx));
thisStr=rtss.toString();
}catch(NumberFormatExceptionex){
output.println("FailedtoparseSSTindex'"+sstIndex
+"':"+ex.toString());
}
break;
caseNUMBER:
Stringn=value.toString();
//判断是否是日期格式
if(HSSFDateUtil.isADateFormat(this.formatIndex,n)){
Doubled=Double.parseDouble(n);
Datedate=HSSFDateUtil.getJavaDate(d);
thisStr=formateDateToString(date);
}elseif(this.formatString!=null)
thisStr=formatter.formatRawCellContents(
Double.parseDouble(n),this.formatIndex,
this.formatString);
else
thisStr=n;
break;
default:
thisStr="(TODO:Unexpectedtype:"+nextDataType+")";
break;
}
//Outputafterwe'veseenthestringcontents
//Emitcommasforanyfieldsthatweremissingonthisrow
if(lastColumnNumber==-1){
lastColumnNumber=0;
}
//判断单元格的值是否为空
if(thisStr==null||"".equals(isCellNull)){
isCellNull=true;//设置单元格是否为空值
}
record[thisColumn]=thisStr;
//Updatecolumn
if(thisColumn>-1)
lastColumnNumber=thisColumn;
}elseif("row".equals(name)){
//Printoutanymissingcommasifneeded
if(minColumns>0){
//Columnsare0based
if(lastColumnNumber==-1){
lastColumnNumber=0;
}
if(isCellNull==false&&record[0]!=null
&&record[1]!=null)//判断是否空行
{
rows.add(record.clone());
isCellNull=false;
for(inti=0;igetRows(){
returnrows;
}
publicvoidsetRows(Listrows){
this.rows=rows;
}
/**
*Capturescharactersonlyifasuitableelementisopen.Originally
*wasjust"v";extendedforinlineStralso.
*/
publicvoidcharacters(char[]ch,intstart,intlength)throwsSAXException{
if(vIsOpen)
value.append(ch,start,length);
}
/**
*ConvertsanExcelcolumnnamelike"C"toazero-basedindex.
*@paramname
*@returnIndexcorrespondingtothespecifiedname
*/
privateintnameToColumn(Stringname){
intcolumn=-1;
for(inti=0;iCSVconverter
*
*@parampkg
*TheXLSXpackagetoprocess
*@paramoutput
*ThePrintStreamtooutputtheCSVto
*@paramminColumns
*Theminimumnumberofcolumnstooutput,or-1fornominimum
*/
publicXLSX2CSV(OPCPackagepkg,PrintStreamoutput,intminColumns){
this.xlsxPackage=pkg;
this.output=output;
this.minColumns=minColumns;
}
/**
*Parsesandshowsthecontentofonesheetusingthespecifiedstylesand
*shared-stringstables.
*@paramstyles
*@paramstrings
*@paramsheetInputStream
*/
publicListprocessSheet(StylesTablestyles,
ReadOnlySharedStringsTablestrings,InputStreamsheetInputStream)
throwsIOException,ParserConfigurationException,SAXException{
InputSourcesheetSource=newInputSource(sheetInputStream);
SAXParserFactorysaxFactory=SAXParserFactory.newInstance();
SAXParsersaxParser=saxFactory.newSAXParser();
XMLReadersheetParser=saxParser.getXMLReader();
MyXSSFSheetHandlerhandler=newMyXSSFSheetHandler(styles,strings,this.minColumns,this.output);
sheetParser.setContentHandler(handler);
sheetParser.parse(sheetSource);
returnhandler.getRows();
}
/**
*初始化这个处理程序
*@throwsIOException
*@throwsOpenXML4JException
*@throwsParserConfigurationException
*@throwsSAXException
*/
publicListprocess()throwsIOException,OpenXML4JException,ParserConfigurationException,SAXException{
ReadOnlySharedStringsTablestrings=newReadOnlySharedStringsTable(this.xlsxPackage);
XSSFReaderxssfReader=newXSSFReader(this.xlsxPackage);
Listlist=null;
StylesTablestyles=xssfReader.getStylesTable();
XSSFReader.SheetIteratoriter=(XSSFReader.SheetIterator)xssfReader.getSheetsData();
intindex=0;
while(iter.hasNext()){
InputStreamstream=iter.next();
index++;
//默认取第一个工作薄
if(index==1){
list=processSheet(styles,strings,stream);
stream.close();
}
}
returnlist;
}
/**
*读取Excel
*
*@parampath
*文件路径
*@paramsheetName
*sheet名称
*@paramminColumns
*列总数
*@return
*@throwsSAXException
*@throwsParserConfigurationException
*@throwsOpenXML4JException
*@throwsIOException
*/
privatestaticListreaderExcel(Stringpath,intminColumns)throwsIOException,OpenXML4JException,ParserConfigurationException,SAXException{
OPCPackagep=OPCPackage.open(path,PackageAccess.READ);
XLSX2CSVxlsx2csv=newXLSX2CSV(p,System.out,minColumns);
Listlist=xlsx2csv.process();
p.close();
returnlist;
}
/**
*读取Excel
*
*@paramfile
*File
*@paramsheetName
*sheet名称
*@paramminColumns
*列总数
*@return
*@throwsSAXException
*@throwsParserConfigurationException
*@throwsOpenXML4JException
*@throwsIOException
*/
privatestaticListreaderExcelInputStream(Filefile,intminColumns)throwsIOException,OpenXML4JException,ParserConfigurationException,SAXException{
OPCPackagep=OPCPackage.openOrCreate(file);
XLSX2CSVxlsx2csv=newXLSX2CSV(p,System.out,minColumns);
Listlist=xlsx2csv.process();
p.close();
returnlist;
}
//获取表头及数据内容,可在业务层调用
publicstaticMapgetTableDate(InputStreamin)throwsIOException,OpenXML4JException,ParserConfigurationException,SAXException{
Fileout=newFile("C:\\date.xlsx");//默认在C盘生成date.xlsx文件
OutputStreamoutput=null;
try{
output=newFileOutputStream(out);
byte[]buf=newbyte[1024];
intbytesRead;
while((bytesRead=in.read(buf))>0){
output.write(buf,0,bytesRead);
}
}finally{
in.close();
output.close();
}
Mapmap=newHashMap();
Listheadresult=newArrayList();//表头集合
List>dataresult=newArrayList>();//表数据集合
Listlist=readerExcelInputStream(out,30);//默认读取30列
for(inti=0;idataList=newArrayList();
for(Stringcell:list.get(i)){
dataList.add(cell==null?"":cell);
}
dataresult.add(dataList);
}
}
map.put("headresult",headresult);
map.put("dataresult",dataresult);
returnmap;
}
publicstaticvoidmain(String[]args)throwsException{
Filefile=newFile("C:/Users/Administrator/Desktop/测试.xlsx");
System.out.println("开始读取...");
Listlist=XLSX2CSV.readerExcelInputStream(file,30);//默认读取30列
System.out.println("数据量:"+list.size());
for(inti=0;i0){
for(Stringcell:list.get(i)){
System.out.print(cell+",");
}
System.out.println();
}
}
}
}
说明:
代码中的加载excel的核心方法:
OPCPackagepkg=OPCPackage.open(path);//文件路径 OPCPackagepkg=OPCPackage.openOrCreate(file);//文件 OPCPackagepkg=OPCPackage.open(InputStream);//文件流
文件流方式对内存依赖极大,所以实际应用时,如果只能获取文件流的话,可以先将文件通过流拷贝到本地再解析代码中的
若上传文件大小超过限制,可在配置文件中设置,SpringBoot2项目application.properties中设置:
spring.servlet.multipart.maxFileSize=50MB spring.servlet.multipart.maxRequestSize=100MB
实际业务数据入库的话也可采用异步任务@Async的方式来提高入库效率:
importjava.util.Map;
importorg.springframework.beans.factory.annotation.Autowired;
importorg.springframework.scheduling.annotation.Async;
importorg.springframework.stereotype.Service;
importcn.com.app.dao.JkconfigDao;
@Service
publicclassJkrzService{
@Autowired
privateJkconfigDaojkconfigDao;
@Async("myTaskAsyncPool")
publicvoidtransJkrz(Mapm){
jkconfigDao.insert(m);
}
}
线程池配置:
importjava.util.concurrent.Executor;
importjava.util.concurrent.ThreadPoolExecutor;
importorg.springframework.beans.factory.annotation.Autowired;
importorg.springframework.context.annotation.Bean;
importorg.springframework.context.annotation.Configuration;
importorg.springframework.scheduling.annotation.EnableAsync;
importorg.springframework.scheduling.concurrent.ThreadPoolTaskExecutor;
@Configuration
@EnableAsync
publicclassAsyncTaskConfig{
@Autowired
privateTaskThreadPoolConfigconfig;
//ThredPoolTaskExcutor的处理流程
//当池子大小小于corePoolSize,就新建线程,并处理请求
//当池子大小等于corePoolSize,把请求放入workQueue中,池子里的空闲线程就去workQueue中取任务并处理
//当workQueue放不下任务时,就新建线程入池,并处理请求,如果池子大小撑到了maximumPoolSize,就用RejectedExecutionHandler来做拒绝处理
//当池子的线程数大于corePoolSize时,多余的线程会等待keepAliveTime长时间,如果无请求可处理就自行销毁
//当threadNamePrefix设置为true,则核心线程也会超时关闭
@Bean
publicExecutormyTaskAsyncPool(){
ThreadPoolTaskExecutorexecutor=newThreadPoolTaskExecutor();
executor.setCorePoolSize(config.getCorePoolSize());
executor.setMaxPoolSize(config.getMaxPoolSize());
executor.setQueueCapacity(config.getQueueCapacity());
executor.setKeepAliveSeconds(config.getKeepAliveSeconds());
executor.setAllowCoreThreadTimeOut(true);
executor.setThreadNamePrefix("MyExecutor-");
//rejection-policy:当pool已经达到maxsize的时候,如何处理新任务
//CALLER_RUNS:不在新线程中执行任务,而是由调用者所在的线程来执行
executor.setRejectedExecutionHandler(newThreadPoolExecutor.CallerRunsPolicy());
executor.initialize();
returnexecutor;
}
}
参数配置:
#核心线程数,当线程数小于核心线程数时,即使有线程空闲,线程池也会优先创建新线程,设置allowCoreThreadTimeout=true(默认false)时,核心线程会超时退出 spring.task.pool.corePoolSize=20 #最大线程数,当线程数大于等于corePoolSize,且任务队列已满时,线程池会创建新线程来处理任务 spring.task.pool.maxPoolSize=60 #线程空闲时间,当线程空闲时间达到keepAliveSeconds(秒)时,线程会退出,直到线程数量等于corePoolSize,如果allowCoreThreadTimeout=true,则会直到线程数量等于0 spring.task.pool.keepAliveSeconds=1 #任务队列容量,当核心线程数达到最大时,新任务会放在队列中排队等待执行 spring.task.pool.queueCapacity=400
读取xls格式数据:
importorg.apache.poi.hssf.eventusermodel.*;
importorg.apache.poi.hssf.eventusermodel.dummyrecord.LastCellOfRowDummyRecord;
importorg.apache.poi.hssf.eventusermodel.dummyrecord.MissingCellDummyRecord;
importorg.apache.poi.hssf.model.HSSFFormulaParser;
importorg.apache.poi.hssf.record.*;
importorg.apache.poi.hssf.usermodel.HSSFDataFormatter;
importorg.apache.poi.hssf.usermodel.HSSFWorkbook;
importorg.apache.poi.poifs.filesystem.POIFSFileSystem;
importjava.io.FileInputStream;
importjava.io.InputStream;
importjava.util.ArrayList;
importjava.util.List;
/**
*@desc用于解决.xls2003版本大数据量问题
**/
publicclassExcelXlsReaderimplementsHSSFListener{
//存储所有数据
privateList>dataList=newArrayList>();
privateintminColums=-1;
privatePOIFSFileSystemfs;
/**
*总行数
*/
privateinttotalRows=0;
/**
*上一行row的序号
*/
privateintlastRowNumber;
/**
*上一单元格的序号
*/
privateintlastColumnNumber;
/**
*是否输出formula,还是它对应的值
*/
privatebooleanoutputFormulaValues=true;
/**
*用于转换formulas
*/
privateEventWorkbookBuilder.SheetRecordCollectingListenerworkbookBuildingListener;
//excel2003工作簿
privateHSSFWorkbookstubWorkbook;
privateSSTRecordsstRecord;
privateFormatTrackingHSSFListenerformatListener;
privatefinalHSSFDataFormatterformatter=newHSSFDataFormatter();
/**
*文件的绝对路径
*/
privateStringfilePath="";
//表索引
privateintsheetIndex=0;
privateBoundSheetRecord[]orderedBSRs;
@SuppressWarnings("unchecked")
privateArrayListboundSheetRecords=newArrayList();
privateintnextRow;
privateintnextColumn;
privatebooleanoutputNextStringRecord;
//当前行
privateintcurRow=0;
//存储一行记录所有单元格的容器
privateListcellList=newArrayList();
/**
*判断整行是否为空行的标记
*/
privatebooleanflag=false;
@SuppressWarnings("unused")
privateStringsheetName;
/**
*遍历excel下所有的sheet
*
*@paramfileName
*@throwsException
*/
publicintprocess(StringfileName)throwsException{
filePath=fileName;
this.fs=newPOIFSFileSystem(newFileInputStream(fileName));
MissingRecordAwareHSSFListenerlistener=newMissingRecordAwareHSSFListener(this);
formatListener=newFormatTrackingHSSFListener(listener);
HSSFEventFactoryfactory=newHSSFEventFactory();
HSSFRequestrequest=newHSSFRequest();
if(outputFormulaValues){
request.addListenerForAllRecords(formatListener);
}else{
workbookBuildingListener=newEventWorkbookBuilder.SheetRecordCollectingListener(formatListener);
request.addListenerForAllRecords(workbookBuildingListener);
}
factory.processWorkbookEvents(request,fs);
returntotalRows;//返回该excel文件的总行数,不包括首列和空行
}
publicList>process(InputStreamin)throwsException{
//filePath=fileName;
this.fs=newPOIFSFileSystem(in);
MissingRecordAwareHSSFListenerlistener=newMissingRecordAwareHSSFListener(this);
formatListener=newFormatTrackingHSSFListener(listener);
HSSFEventFactoryfactory=newHSSFEventFactory();
HSSFRequestrequest=newHSSFRequest();
if(outputFormulaValues){
request.addListenerForAllRecords(formatListener);
}else{
workbookBuildingListener=newEventWorkbookBuilder.SheetRecordCollectingListener(formatListener);
request.addListenerForAllRecords(workbookBuildingListener);
}
factory.processWorkbookEvents(request,fs);
returndataList;//返回该excel文件的总行数,不包括首列和空行
}
/**
*HSSFListener监听方法,处理Record
*处理每个单元格
*@paramrecord
*/
@SuppressWarnings("unchecked")
publicvoidprocessRecord(Recordrecord){
intthisRow=-1;
intthisColumn=-1;
StringthisStr=null;
Stringvalue=null;
switch(record.getSid()){
caseBoundSheetRecord.sid:
boundSheetRecords.add(record);
break;
caseBOFRecord.sid://开始处理每个sheet
BOFRecordbr=(BOFRecord)record;
if(br.getType()==BOFRecord.TYPE_WORKSHEET){
//如果有需要,则建立子工作簿
if(workbookBuildingListener!=null&&stubWorkbook==null){
stubWorkbook=workbookBuildingListener.getStubHSSFWorkbook();
}
if(orderedBSRs==null){
orderedBSRs=BoundSheetRecord.orderByBofPosition(boundSheetRecords);
}
sheetName=orderedBSRs[sheetIndex].getSheetname();
sheetIndex++;
}
break;
caseSSTRecord.sid:
sstRecord=(SSTRecord)record;
break;
caseBlankRecord.sid://单元格为空白
BlankRecordbrec=(BlankRecord)record;
thisRow=brec.getRow();
thisColumn=brec.getColumn();
thisStr="";
cellList.add(thisColumn,thisStr);
break;
caseBoolErrRecord.sid://单元格为布尔类型
BoolErrRecordberec=(BoolErrRecord)record;
thisRow=berec.getRow();
thisColumn=berec.getColumn();
thisStr=berec.getBooleanValue()+"";
cellList.add(thisColumn,thisStr);
checkRowIsNull(thisStr);//如果里面某个单元格含有值,则标识该行不为空行
break;
caseFormulaRecord.sid://单元格为公式类型
FormulaRecordfrec=(FormulaRecord)record;
thisRow=frec.getRow();
thisColumn=frec.getColumn();
if(outputFormulaValues){
if(Double.isNaN(frec.getValue())){
outputNextStringRecord=true;
nextRow=frec.getRow();
nextColumn=frec.getColumn();
}else{
thisStr='"'+HSSFFormulaParser.toFormulaString(stubWorkbook,frec.getParsedExpression())+'"';
}
}else{
thisStr='"'+HSSFFormulaParser.toFormulaString(stubWorkbook,frec.getParsedExpression())+'"';
}
cellList.add(thisColumn,thisStr);
checkRowIsNull(thisStr);//如果里面某个单元格含有值,则标识该行不为空行
break;
caseStringRecord.sid://单元格中公式的字符串
if(outputNextStringRecord){
StringRecordsrec=(StringRecord)record;
thisStr=srec.getString();
thisRow=nextRow;
thisColumn=nextColumn;
outputNextStringRecord=false;
}
break;
caseLabelRecord.sid:
LabelRecordlrec=(LabelRecord)record;
curRow=thisRow=lrec.getRow();
thisColumn=lrec.getColumn();
value=lrec.getValue().trim();
value=value.equals("")?"":value;
cellList.add(thisColumn,value);
checkRowIsNull(value);//如果里面某个单元格含有值,则标识该行不为空行
break;
caseLabelSSTRecord.sid://单元格为字符串类型
LabelSSTRecordlsrec=(LabelSSTRecord)record;
curRow=thisRow=lsrec.getRow();
thisColumn=lsrec.getColumn();
if(sstRecord==null){
cellList.add(thisColumn,"");
}else{
value=sstRecord.getString(lsrec.getSSTIndex()).toString().trim();
value=value.equals("")?"":value;
cellList.add(thisColumn,value);
checkRowIsNull(value);//如果里面某个单元格含有值,则标识该行不为空行
}
break;
caseNumberRecord.sid://单元格为数字类型
NumberRecordnumrec=(NumberRecord)record;
curRow=thisRow=numrec.getRow();
thisColumn=numrec.getColumn();
//第一种方式
//value=formatListener.formatNumberDateCell(numrec).trim();//这个被写死,采用的m/d/yyh:mm格式,不符合要求
//第二种方式,参照formatNumberDateCell里面的实现方法编写
DoublevalueDouble=((NumberRecord)numrec).getValue();
StringformatString=formatListener.getFormatString(numrec);
if(formatString.contains("m/d/yy")){
formatString="yyyy-MM-ddhh:mm:ss";
}
intformatIndex=formatListener.getFormatIndex(numrec);
value=formatter.formatRawCellContents(valueDouble,formatIndex,formatString).trim();
value=value.equals("")?"":value;
//向容器加入列值
cellList.add(thisColumn,value);
checkRowIsNull(value);//如果里面某个单元格含有值,则标识该行不为空行
break;
default:
break;
}
//遇到新行的操作
if(thisRow!=-1&&thisRow!=lastRowNumber){
lastColumnNumber=-1;
}
//空值的操作
if(recordinstanceofMissingCellDummyRecord){
MissingCellDummyRecordmc=(MissingCellDummyRecord)record;
curRow=thisRow=mc.getRow();
thisColumn=mc.getColumn();
cellList.add(thisColumn,"");
}
//更新行和列的值
if(thisRow>-1)
lastRowNumber=thisRow;
if(thisColumn>-1)
lastColumnNumber=thisColumn;
//行结束时的操作
if(recordinstanceofLastCellOfRowDummyRecord){
if(minColums>0){
//列值重新置空
if(lastColumnNumber==-1){
lastColumnNumber=0;
}
}
lastColumnNumber=-1;
if(flag&&curRow!=0){//该行不为空行且该行不是第一行,发送(第一行为列名,不需要)
//ExcelReaderUtil.sendRows(filePath,sheetName,sheetIndex,curRow+1,cellList);//每行结束时,调用sendRows()方法
totalRows++;
//添加到数据集合中
dataList.add(cellList);
}
//清空容器
cellList=newArrayList();
//cellList.clear();
flag=false;
}
}
/**
*如果里面某个单元格含有值,则标识该行不为空行
*@paramvalue
*/
publicvoidcheckRowIsNull(Stringvalue){
if(value!=null&&!"".equals(value)){
flag=true;
}
}
}
读取xlsx格式数据(自动获取表头长度方式):
importorg.apache.poi.openxml4j.opc.OPCPackage; importorg.apache.poi.ss.usermodel.BuiltinFormats; importorg.apache.poi.ss.usermodel.DataFormatter; importorg.apache.poi.xssf.eventusermodel.XSSFReader; importorg.apache.poi.xssf.model.SharedStringsTable; importorg.apache.poi.xssf.model.StylesTable; importorg.apache.poi.xssf.usermodel.XSSFCellStyle; importorg.apache.poi.xssf.usermodel.XSSFRichTextString; importorg.xml.sax.Attributes; importorg.xml.sax.InputSource; importorg.xml.sax.SAXException; importorg.xml.sax.XMLReader; importorg.xml.sax.helpers.DefaultHandler; importorg.xml.sax.helpers.XMLReaderFactory; importjava.io.InputStream; importjava.util.ArrayList; importjava.util.List; /** *@descPOI读取excel有两种模式,一种是用户模式,一种是事件驱动模式 *采用SAX事件驱动模式解决XLSX文件,可以有效解决用户模式内存溢出的问题, *该模式是POI官方推荐的读取大数据的模式, *在用户模式下,数据量较大,Sheet较多,或者是有很多无用的空行的情况下,容易出现内存溢出 **用于解决.xlsx2007版本大数据量问题 **/ publicclassExcelXlsxReaderextendsDefaultHandler{ //存储所有数据 privateList
>dataList=newArrayList
>(); /** *单元格中的数据可能的数据类型 */ enumCellDataType{ BOOL,ERROR,FORMULA,INLINESTR,SSTINDEX,NUMBER,DATE,NULL } /** *共享字符串表 */ privateSharedStringsTablesst; /** *上一次的索引值 */ privateStringlastIndex; /** *文件的绝对路径 */ privateStringfilePath=""; /** *工作表索引 */ privateintsheetIndex=0; /** *sheet名 */ privateStringsheetName=""; /** *总行数 */ privateinttotalRows=0; /** *一行内cell集合 */ privateList
cellList=newArrayList (); /** *判断整行是否为空行的标记 */ privatebooleanflag=false; /** *当前行 */ privateintcurRow=1; /** *当前列 */ privateintcurCol=0; /** *T元素标识 */ privatebooleanisTElement; /** *判断上一单元格是否为文本空单元格 */ privatebooleanstartElementFlag=true; privatebooleanendElementFlag=false; privatebooleancharactersFlag=false; /** *异常信息,如果为空则表示没有异常 */ privateStringexceptionMessage; /** *单元格数据类型,默认为字符串类型 */ privateCellDataTypenextDataType=CellDataType.SSTINDEX; privatefinalDataFormatterformatter=newDataFormatter(); /** *单元格日期格式的索引 */ privateshortformatIndex; /** *日期格式字符串 */ privateStringformatString; //定义前一个元素和当前元素的位置,用来计算其中空的单元格数量,如A6和A8等 privateStringprePreRef="A",preRef=null,ref=null; //定义该文档一行最大的单元格数,用来补全一行最后可能缺失的单元格 privateStringmaxRef=null; /** *单元格 */ privateStylesTablestylesTable; /** *遍历工作簿中所有的电子表格 *并缓存在mySheetList中 * *@paramfilename *@throwsException */ publicintprocess(Stringfilename)throwsException{ filePath=filename; OPCPackagepkg=OPCPackage.open(filename); XSSFReaderxssfReader=newXSSFReader(pkg); stylesTable=xssfReader.getStylesTable(); SharedStringsTablesst=xssfReader.getSharedStringsTable(); XMLReaderparser=XMLReaderFactory.createXMLReader("org.apache.xerces.parsers.SAXParser"); this.sst=sst; parser.setContentHandler(this); XSSFReader.SheetIteratorsheets=(XSSFReader.SheetIterator)xssfReader.getSheetsData(); while(sheets.hasNext()){//遍历sheet curRow=1;//标记初始行为第一行 sheetIndex++; InputStreamsheet=sheets.next();//sheets.next()和sheets.getSheetName()不能换位置,否则sheetName报错 sheetName=sheets.getSheetName(); InputSourcesheetSource=newInputSource(sheet); parser.parse(sheetSource);//解析excel的每条记录,在这个过程中startElement()、characters()、endElement()这三个函数会依次执行 sheet.close(); } returntotalRows;//返回该excel文件的总行数,不包括首列和空行 } publicList >process(InputStreamin)throwsException{ //filePath=filename; OPCPackagepkg=OPCPackage.open(in); XSSFReaderxssfReader=newXSSFReader(pkg); stylesTable=xssfReader.getStylesTable(); SharedStringsTablesst=xssfReader.getSharedStringsTable(); //XMLReaderparser=XMLReaderFactory.createXMLReader("org.apache.xerces.parsers.SAXParser"); XMLReaderparser=XMLReaderFactory.createXMLReader(); this.sst=sst; parser.setContentHandler(this); XSSFReader.SheetIteratorsheets=(XSSFReader.SheetIterator)xssfReader.getSheetsData(); while(sheets.hasNext()){//遍历sheet curRow=1;//标记初始行为第一行 sheetIndex++; InputStreamsheet=sheets.next();//sheets.next()和sheets.getSheetName()不能换位置,否则sheetName报错 sheetName=sheets.getSheetName(); InputSourcesheetSource=newInputSource(sheet); parser.parse(sheetSource);//解析excel的每条记录,在这个过程中startElement()、characters()、endElement()这三个函数会依次执行 sheet.close(); } returndataList; } /** *第一个执行 * *@paramuri *@paramlocalName *@paramname *@paramattributes *@throwsSAXException */ @Override publicvoidstartElement(Stringuri,StringlocalName,Stringname,Attributesattributes)throwsSAXException{ //c=>单元格 if("c".equals(name)){ //前一个单元格的位置 if(preRef==null){ preRef=attributes.getValue("r"); }else{ //中部文本空单元格标识‘endElementFlag'判断前一次是否为文本空字符串,true则表明不是文本空字符串,false表明是文本空字符串跳过把空字符串的位置赋予preRef if(endElementFlag){ preRef=ref; } } //当前单元格的位置 ref=attributes.getValue("r"); //首部文本空单元格标识‘startElementFlag'判断前一次,即首部是否为文本空字符串,true则表明不是文本空字符串,false表明是文本空字符串,且已知当前格,即第二格带“B”标志,则ref赋予preRef if(!startElementFlag&&!flag){//上一个单元格为文本空单元格,执行下面的,使ref=preRef;flag为true表明该单元格之前有数据值,即该单元格不是首部空单元格,则跳过 //这里只有上一个单元格为文本空单元格,且之前的几个单元格都没有值才会执行 preRef=ref; } //设定单元格类型 this.setNextDataType(attributes); endElementFlag=false; charactersFlag=false; startElementFlag=false; } //当元素为t时 if("t".equals(name)){ isTElement=true; }else{ isTElement=false; } //置空 lastIndex=""; } /** *第二个执行 *得到单元格对应的索引值或是内容值 *如果单元格类型是字符串、INLINESTR、数字、日期,lastIndex则是索引值 *如果单元格类型是布尔值、错误、公式,lastIndex则是内容值 *@paramch *@paramstart *@paramlength *@throwsSAXException */ @Override publicvoidcharacters(char[]ch,intstart,intlength)throwsSAXException{ startElementFlag=true; charactersFlag=true; lastIndex+=newString(ch,start,length); } /** *第三个执行 * *@paramuri *@paramlocalName *@paramname *@throwsSAXException */ @Override publicvoidendElement(Stringuri,StringlocalName,Stringname)throwsSAXException{ //t元素也包含字符串 if(isTElement){ //将单元格内容加入rowlist中,在这之前先去掉字符串前后的空白符 Stringvalue=lastIndex.trim(); cellList.add(curCol,value); endElementFlag=true; curCol++; isTElement=false; //如果里面某个单元格含有值,则标识该行不为空行 if(value!=null&&!"".equals(value)){ flag=true; } }elseif("v".equals(name)){ //v=>单元格的值,如果单元格是字符串,则v标签的值为该字符串在SST中的索引 Stringvalue=this.getDataValue(lastIndex.trim(),"");//根据索引值获取对应的单元格值 //补全单元格之间的空单元格 if(!ref.equals(preRef)){ intlen=countNullCell(ref,preRef); for(inti=0;i
(); //cellList.clear(); curRow++; curCol=0; preRef=null; prePreRef=null; ref=null; flag=false; } } } /** *处理数据类型 * *@paramattributes */ publicvoidsetNextDataType(Attributesattributes){ nextDataType=CellDataType.NUMBER;//cellType为空,则表示该单元格类型为数字 formatIndex=-1; formatString=null; StringcellType=attributes.getValue("t");//单元格类型 StringcellStyleStr=attributes.getValue("s");// StringcolumnData=attributes.getValue("r");//获取单元格的位置,如A1,B1 if("b".equals(cellType)){//处理布尔值 nextDataType=CellDataType.BOOL; }elseif("e".equals(cellType)){//处理错误 nextDataType=CellDataType.ERROR; }elseif("inlineStr".equals(cellType)){ nextDataType=CellDataType.INLINESTR; }elseif("s".equals(cellType)){//处理字符串 nextDataType=CellDataType.SSTINDEX; }elseif("str".equals(cellType)){ nextDataType=CellDataType.FORMULA; } if(cellStyleStr!=null){//处理日期 intstyleIndex=Integer.parseInt(cellStyleStr); XSSFCellStylestyle=stylesTable.getStyleAt(styleIndex); formatIndex=style.getDataFormat(); formatString=style.getDataFormatString(); if(formatString.contains("m/d/yyyy")||formatString.contains("yyyy/mm/dd")||formatString.contains("yyyy/m/d")){ nextDataType=CellDataType.DATE; formatString="yyyy-MM-ddhh:mm:ss"; } if(formatString==null){ nextDataType=CellDataType.NULL; formatString=BuiltinFormats.getBuiltinFormat(formatIndex); } } } /** *对解析出来的数据进行类型处理 *@paramvalue单元格的值, *value代表解析:BOOL的为0或1,ERROR的为内容值,FORMULA的为内容值,INLINESTR的为索引值需转换为内容值, *SSTINDEX的为索引值需转换为内容值,NUMBER为内容值,DATE为内容值 *@paramthisStr一个空字符串 *@return */ @SuppressWarnings("deprecation") publicStringgetDataValue(Stringvalue,StringthisStr){ switch(nextDataType){ //这几个的顺序不能随便交换,交换了很可能会导致数据错误 caseBOOL://布尔值 charfirst=value.charAt(0); thisStr=first=='0'?"FALSE":"TRUE"; break; caseERROR://错误 thisStr="\"ERROR:"+value.toString()+'"'; break; caseFORMULA://公式 thisStr='"'+value.toString()+'"'; break; caseINLINESTR: XSSFRichTextStringrtsi=newXSSFRichTextString(value.toString()); thisStr=rtsi.toString(); rtsi=null; break; caseSSTINDEX://字符串 StringsstIndex=value.toString(); try{ intidx=Integer.parseInt(sstIndex); XSSFRichTextStringrtss=newXSSFRichTextString(sst.getEntryAt(idx));//根据idx索引值获取内容值 thisStr=rtss.toString(); //System.out.println(thisStr); //有些字符串是文本格式的,但内容却是日期 rtss=null; }catch(NumberFormatExceptionex){ thisStr=value.toString(); } break; caseNUMBER://数字 if(formatString!=null){ thisStr=formatter.formatRawCellContents(Double.parseDouble(value),formatIndex,formatString).trim(); }else{ thisStr=value; } thisStr=thisStr.replace("_","").trim(); break; caseDATE://日期 thisStr=formatter.formatRawCellContents(Double.parseDouble(value),formatIndex,formatString); //对日期字符串作特殊处理,去掉T thisStr=thisStr.replace("T",""); break; default: thisStr=""; break; } returnthisStr; } publicintcountNullCell(Stringref,StringpreRef){ //excel2007最大行数是1048576,最大列数是16384,最后一列列名是XFD Stringxfd=ref.replaceAll("\\d+",""); Stringxfd_1=preRef.replaceAll("\\d+",""); xfd=fillChar(xfd,3,'@',true); xfd_1=fillChar(xfd_1,3,'@',true); char[]letter=xfd.toCharArray(); char[]letter_1=xfd_1.toCharArray(); intres=(letter[0]-letter_1[0])*26*26+(letter[1]-letter_1[1])*26+(letter[2]-letter_1[2]); returnres-1; } publicStringfillChar(Stringstr,intlen,charlet,booleanisPre){ intlen_1=str.length(); if(len_1 读取工具类:
importjava.io.InputStream; importjava.util.List; publicclassExcelReaderUtil{ //excel2003扩展名 publicstaticfinalStringEXCEL03_EXTENSION=".xls"; //excel2007扩展名 publicstaticfinalStringEXCEL07_EXTENSION=".xlsx"; //读取xls格式 publicstaticList>readExcelXls(InputStreamin)throwsException{ ExcelXlsReaderexcelXls=newExcelXlsReader(); List
>dataList=excelXls.process(in); returndataList; } //读取xlsx格式 publicstaticList
>readExcelXlsx(InputStreamin)throwsException{ ExcelXlsxReaderexcelXls=newExcelXlsxReader(); List
>dataList=excelXls.process(in); returndataList; } publicstaticvoidreadExcel(StringfileName)throwsException{ inttotalRows=0; if(fileName.endsWith(EXCEL03_EXTENSION)){//处理excel2003文件 ExcelXlsReaderexcelXls=newExcelXlsReader(); totalRows=excelXls.process(fileName); }elseif(fileName.endsWith(EXCEL07_EXTENSION)){//处理excel2007文件 ExcelXlsxReaderexcelXlsxReader=newExcelXlsxReader(); totalRows=excelXlsxReader.process(fileName); }else{ thrownewException("文件格式错误,fileName的扩展名只能是xls或xlsx。"); } System.out.println("发送的总行数:"+totalRows); } publicstaticvoidmain(String[]args)throwsException{ Stringpath="C:\\test.xlsx"; ExcelReaderUtil.readExcel(path); } }
以上就是本文的全部内容,希望对大家的学习有所帮助,也希望大家多多支持毛票票。
声明:本文内容来源于网络,版权归原作者所有,内容由互联网用户自发贡献自行上传,本网站不拥有所有权,未作人工编辑处理,也不承担相关法律责任。如果您发现有涉嫌版权的内容,欢迎发送邮件至:czq8825#qq.com(发邮件时,请将#更换为@)进行举报,并提供相关证据,一经查实,本站将立刻删除涉嫌侵权内容。