Java实现excel大数据量导入
本文实例为大家分享了Java实现excel大数据量导入的具体代码,供大家参考,具体内容如下
情景分析:
通常我们通过poi读取excel文件时,若在用户模式下,由于数据量较大、Sheet较多,很容易出现内存溢出的情况
用户模式读取excel的典型代码如下:
FileInputStreamfile=newFileInputStream("c:\\test.xlsx"); Workbookwb=newXSSFWorkbook(file);
而03版(xls)excel文件每个sheet最大长度为65536行,07版(xlsx)excel文件每个sheet最大长度为1048576行,因此我们主要解决07版(xlsx)excel大数据量导入出现内存溢出的问题
由于读取cvs文件效率高且占用内存少,所以我们采用xlsx转换为cvs的方式来解决
读取xlsx格式数据:
importjava.io.File; importjava.io.FileOutputStream; importjava.io.IOException; importjava.io.InputStream; importjava.io.OutputStream; importjava.io.PrintStream; importjava.text.SimpleDateFormat; importjava.util.ArrayList; importjava.util.Date; importjava.util.HashMap; importjava.util.List; importjava.util.Map; importjavax.xml.parsers.ParserConfigurationException; importjavax.xml.parsers.SAXParser; importjavax.xml.parsers.SAXParserFactory; importorg.apache.poi.hssf.usermodel.HSSFDateUtil; importorg.apache.poi.openxml4j.exceptions.OpenXML4JException; importorg.apache.poi.openxml4j.opc.OPCPackage; importorg.apache.poi.openxml4j.opc.PackageAccess; importorg.apache.poi.ss.usermodel.BuiltinFormats; importorg.apache.poi.ss.usermodel.DataFormatter; importorg.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable; importorg.apache.poi.xssf.eventusermodel.XSSFReader; importorg.apache.poi.xssf.model.StylesTable; importorg.apache.poi.xssf.usermodel.XSSFCellStyle; importorg.apache.poi.xssf.usermodel.XSSFRichTextString; importorg.xml.sax.Attributes; importorg.xml.sax.InputSource; importorg.xml.sax.SAXException; importorg.xml.sax.XMLReader; importorg.xml.sax.helpers.DefaultHandler; publicclassXLSX2CSV{ enumxssfDataType{ BOOL,ERROR,FORMULA,INLINESTR,SSTINDEX,NUMBER, } /** *使用xssf_sax_API处理Excel,请参考:http://poi.apache.org/spreadsheet/how-to.html#xssf_sax_api */ classMyXSSFSheetHandlerextendsDefaultHandler{ privateStylesTablestylesTable; privateReadOnlySharedStringsTablesharedStringsTable; privatefinalPrintStreamoutput; privatefinalintminColumnCount; privatebooleanvIsOpen; privatexssfDataTypenextDataType; privateshortformatIndex; privateStringformatString; privatefinalDataFormatterformatter; privateintthisColumn=-1; privateintlastColumnNumber=-1; privateStringBuffervalue; privateString[]record; privateListrows=newArrayList (); privatebooleanisCellNull=false; publicMyXSSFSheetHandler(StylesTablestyles,ReadOnlySharedStringsTablestrings,intcols,PrintStreamtarget){ this.stylesTable=styles; this.sharedStringsTable=strings; this.minColumnCount=cols; this.output=target; this.value=newStringBuffer(); this.nextDataType=xssfDataType.NUMBER; this.formatter=newDataFormatter(); record=newString[this.minColumnCount]; rows.clear();//每次读取都清空行集合 } publicvoidstartElement(Stringuri,StringlocalName,Stringname,Attributesattributes)throwsSAXException{ if("inlineStr".equals(name)||"v".equals(name)){ vIsOpen=true; //Clearcontentscache value.setLength(0); } //c=>cell elseif("c".equals(name)){ //Getthecellreference Stringr=attributes.getValue("r"); intfirstDigit=-1; for(intc=0;c contentsofacell if("v".equals(name)){ //Processthevaluecontentsasrequired. //Donow,ascharacters()maybecalledmorethanonce switch(nextDataType){ caseBOOL: charfirst=value.charAt(0); thisStr=first=='0'?"FALSE":"TRUE"; break; caseERROR: thisStr="\"ERROR:"+value.toString()+'"'; break; caseFORMULA: //Aformulacouldresultinastringvalue, //soalwaysadddouble-quotecharacters. thisStr='"'+value.toString()+'"'; break; caseINLINESTR: //TODO:haveseenanexampleofthis,soit'suntested. XSSFRichTextStringrtsi=newXSSFRichTextString(value.toString()); thisStr='"'+rtsi.toString()+'"'; break; caseSSTINDEX: StringsstIndex=value.toString(); try{ intidx=Integer.parseInt(sstIndex); XSSFRichTextStringrtss=newXSSFRichTextString(sharedStringsTable.getEntryAt(idx)); thisStr=rtss.toString(); }catch(NumberFormatExceptionex){ output.println("FailedtoparseSSTindex'"+sstIndex +"':"+ex.toString()); } break; caseNUMBER: Stringn=value.toString(); //判断是否是日期格式 if(HSSFDateUtil.isADateFormat(this.formatIndex,n)){ Doubled=Double.parseDouble(n); Datedate=HSSFDateUtil.getJavaDate(d); thisStr=formateDateToString(date); }elseif(this.formatString!=null) thisStr=formatter.formatRawCellContents( Double.parseDouble(n),this.formatIndex, this.formatString); else thisStr=n; break; default: thisStr="(TODO:Unexpectedtype:"+nextDataType+")"; break; } //Outputafterwe'veseenthestringcontents //Emitcommasforanyfieldsthatweremissingonthisrow if(lastColumnNumber==-1){ lastColumnNumber=0; } //判断单元格的值是否为空 if(thisStr==null||"".equals(isCellNull)){ isCellNull=true;//设置单元格是否为空值 } record[thisColumn]=thisStr; //Updatecolumn if(thisColumn>-1) lastColumnNumber=thisColumn; }elseif("row".equals(name)){ //Printoutanymissingcommasifneeded if(minColumns>0){ //Columnsare0based if(lastColumnNumber==-1){ lastColumnNumber=0; } if(isCellNull==false&&record[0]!=null &&record[1]!=null)//判断是否空行 { rows.add(record.clone()); isCellNull=false; for(inti=0;i getRows(){ returnrows; } publicvoidsetRows(List rows){ this.rows=rows; } /** *Capturescharactersonlyifasuitableelementisopen.Originally *wasjust"v";extendedforinlineStralso. */ publicvoidcharacters(char[]ch,intstart,intlength)throwsSAXException{ if(vIsOpen) value.append(ch,start,length); } /** *ConvertsanExcelcolumnnamelike"C"toazero-basedindex. *@paramname *@returnIndexcorrespondingtothespecifiedname */ privateintnameToColumn(Stringname){ intcolumn=-1; for(inti=0;i CSVconverter * *@parampkg *TheXLSXpackagetoprocess *@paramoutput *ThePrintStreamtooutputtheCSVto *@paramminColumns *Theminimumnumberofcolumnstooutput,or-1fornominimum */ publicXLSX2CSV(OPCPackagepkg,PrintStreamoutput,intminColumns){ this.xlsxPackage=pkg; this.output=output; this.minColumns=minColumns; } /** *Parsesandshowsthecontentofonesheetusingthespecifiedstylesand *shared-stringstables. *@paramstyles *@paramstrings *@paramsheetInputStream */ publicList processSheet(StylesTablestyles, ReadOnlySharedStringsTablestrings,InputStreamsheetInputStream) throwsIOException,ParserConfigurationException,SAXException{ InputSourcesheetSource=newInputSource(sheetInputStream); SAXParserFactorysaxFactory=SAXParserFactory.newInstance(); SAXParsersaxParser=saxFactory.newSAXParser(); XMLReadersheetParser=saxParser.getXMLReader(); MyXSSFSheetHandlerhandler=newMyXSSFSheetHandler(styles,strings,this.minColumns,this.output); sheetParser.setContentHandler(handler); sheetParser.parse(sheetSource); returnhandler.getRows(); } /** *初始化这个处理程序 *@throwsIOException *@throwsOpenXML4JException *@throwsParserConfigurationException *@throwsSAXException */ publicList process()throwsIOException,OpenXML4JException,ParserConfigurationException,SAXException{ ReadOnlySharedStringsTablestrings=newReadOnlySharedStringsTable(this.xlsxPackage); XSSFReaderxssfReader=newXSSFReader(this.xlsxPackage); List list=null; StylesTablestyles=xssfReader.getStylesTable(); XSSFReader.SheetIteratoriter=(XSSFReader.SheetIterator)xssfReader.getSheetsData(); intindex=0; while(iter.hasNext()){ InputStreamstream=iter.next(); index++; //默认取第一个工作薄 if(index==1){ list=processSheet(styles,strings,stream); stream.close(); } } returnlist; } /** *读取Excel * *@parampath *文件路径 *@paramsheetName *sheet名称 *@paramminColumns *列总数 *@return *@throwsSAXException *@throwsParserConfigurationException *@throwsOpenXML4JException *@throwsIOException */ privatestaticList readerExcel(Stringpath,intminColumns)throwsIOException,OpenXML4JException,ParserConfigurationException,SAXException{ OPCPackagep=OPCPackage.open(path,PackageAccess.READ); XLSX2CSVxlsx2csv=newXLSX2CSV(p,System.out,minColumns); List list=xlsx2csv.process(); p.close(); returnlist; } /** *读取Excel * *@paramfile *File *@paramsheetName *sheet名称 *@paramminColumns *列总数 *@return *@throwsSAXException *@throwsParserConfigurationException *@throwsOpenXML4JException *@throwsIOException */ privatestaticList readerExcelInputStream(Filefile,intminColumns)throwsIOException,OpenXML4JException,ParserConfigurationException,SAXException{ OPCPackagep=OPCPackage.openOrCreate(file); XLSX2CSVxlsx2csv=newXLSX2CSV(p,System.out,minColumns); List list=xlsx2csv.process(); p.close(); returnlist; } //获取表头及数据内容,可在业务层调用 publicstaticMap getTableDate(InputStreamin)throwsIOException,OpenXML4JException,ParserConfigurationException,SAXException{ Fileout=newFile("C:\\date.xlsx");//默认在C盘生成date.xlsx文件 OutputStreamoutput=null; try{ output=newFileOutputStream(out); byte[]buf=newbyte[1024]; intbytesRead; while((bytesRead=in.read(buf))>0){ output.write(buf,0,bytesRead); } }finally{ in.close(); output.close(); } Map map=newHashMap (); List headresult=newArrayList ();//表头集合 List >dataresult=newArrayList
>();//表数据集合 List
list=readerExcelInputStream(out,30);//默认读取30列 for(inti=0;i dataList=newArrayList (); for(Stringcell:list.get(i)){ dataList.add(cell==null?"":cell); } dataresult.add(dataList); } } map.put("headresult",headresult); map.put("dataresult",dataresult); returnmap; } publicstaticvoidmain(String[]args)throwsException{ Filefile=newFile("C:/Users/Administrator/Desktop/测试.xlsx"); System.out.println("开始读取..."); List list=XLSX2CSV.readerExcelInputStream(file,30);//默认读取30列 System.out.println("数据量:"+list.size()); for(inti=0;i 0){ for(Stringcell:list.get(i)){ System.out.print(cell+","); } System.out.println(); } } } }
说明:
代码中的加载excel的核心方法:
OPCPackagepkg=OPCPackage.open(path);//文件路径 OPCPackagepkg=OPCPackage.openOrCreate(file);//文件 OPCPackagepkg=OPCPackage.open(InputStream);//文件流
文件流方式对内存依赖极大,所以实际应用时,如果只能获取文件流的话,可以先将文件通过流拷贝到本地再解析代码中的
若上传文件大小超过限制,可在配置文件中设置,SpringBoot2项目application.properties中设置:
spring.servlet.multipart.maxFileSize=50MB spring.servlet.multipart.maxRequestSize=100MB
实际业务数据入库的话也可采用异步任务@Async的方式来提高入库效率:
importjava.util.Map; importorg.springframework.beans.factory.annotation.Autowired; importorg.springframework.scheduling.annotation.Async; importorg.springframework.stereotype.Service; importcn.com.app.dao.JkconfigDao; @Service publicclassJkrzService{ @Autowired privateJkconfigDaojkconfigDao; @Async("myTaskAsyncPool") publicvoidtransJkrz(Mapm){ jkconfigDao.insert(m); } }
线程池配置:
importjava.util.concurrent.Executor; importjava.util.concurrent.ThreadPoolExecutor; importorg.springframework.beans.factory.annotation.Autowired; importorg.springframework.context.annotation.Bean; importorg.springframework.context.annotation.Configuration; importorg.springframework.scheduling.annotation.EnableAsync; importorg.springframework.scheduling.concurrent.ThreadPoolTaskExecutor; @Configuration @EnableAsync publicclassAsyncTaskConfig{ @Autowired privateTaskThreadPoolConfigconfig; //ThredPoolTaskExcutor的处理流程 //当池子大小小于corePoolSize,就新建线程,并处理请求 //当池子大小等于corePoolSize,把请求放入workQueue中,池子里的空闲线程就去workQueue中取任务并处理 //当workQueue放不下任务时,就新建线程入池,并处理请求,如果池子大小撑到了maximumPoolSize,就用RejectedExecutionHandler来做拒绝处理 //当池子的线程数大于corePoolSize时,多余的线程会等待keepAliveTime长时间,如果无请求可处理就自行销毁 //当threadNamePrefix设置为true,则核心线程也会超时关闭 @Bean publicExecutormyTaskAsyncPool(){ ThreadPoolTaskExecutorexecutor=newThreadPoolTaskExecutor(); executor.setCorePoolSize(config.getCorePoolSize()); executor.setMaxPoolSize(config.getMaxPoolSize()); executor.setQueueCapacity(config.getQueueCapacity()); executor.setKeepAliveSeconds(config.getKeepAliveSeconds()); executor.setAllowCoreThreadTimeOut(true); executor.setThreadNamePrefix("MyExecutor-"); //rejection-policy:当pool已经达到maxsize的时候,如何处理新任务 //CALLER_RUNS:不在新线程中执行任务,而是由调用者所在的线程来执行 executor.setRejectedExecutionHandler(newThreadPoolExecutor.CallerRunsPolicy()); executor.initialize(); returnexecutor; } }
参数配置:
#核心线程数,当线程数小于核心线程数时,即使有线程空闲,线程池也会优先创建新线程,设置allowCoreThreadTimeout=true(默认false)时,核心线程会超时退出 spring.task.pool.corePoolSize=20 #最大线程数,当线程数大于等于corePoolSize,且任务队列已满时,线程池会创建新线程来处理任务 spring.task.pool.maxPoolSize=60 #线程空闲时间,当线程空闲时间达到keepAliveSeconds(秒)时,线程会退出,直到线程数量等于corePoolSize,如果allowCoreThreadTimeout=true,则会直到线程数量等于0 spring.task.pool.keepAliveSeconds=1 #任务队列容量,当核心线程数达到最大时,新任务会放在队列中排队等待执行 spring.task.pool.queueCapacity=400
读取xls格式数据:
importorg.apache.poi.hssf.eventusermodel.*; importorg.apache.poi.hssf.eventusermodel.dummyrecord.LastCellOfRowDummyRecord; importorg.apache.poi.hssf.eventusermodel.dummyrecord.MissingCellDummyRecord; importorg.apache.poi.hssf.model.HSSFFormulaParser; importorg.apache.poi.hssf.record.*; importorg.apache.poi.hssf.usermodel.HSSFDataFormatter; importorg.apache.poi.hssf.usermodel.HSSFWorkbook; importorg.apache.poi.poifs.filesystem.POIFSFileSystem; importjava.io.FileInputStream; importjava.io.InputStream; importjava.util.ArrayList; importjava.util.List; /** *@desc用于解决.xls2003版本大数据量问题 **/ publicclassExcelXlsReaderimplementsHSSFListener{ //存储所有数据 privateList>dataList=newArrayList
>(); privateintminColums=-1; privatePOIFSFileSystemfs; /** *总行数 */ privateinttotalRows=0; /** *上一行row的序号 */ privateintlastRowNumber; /** *上一单元格的序号 */ privateintlastColumnNumber; /** *是否输出formula,还是它对应的值 */ privatebooleanoutputFormulaValues=true; /** *用于转换formulas */ privateEventWorkbookBuilder.SheetRecordCollectingListenerworkbookBuildingListener; //excel2003工作簿 privateHSSFWorkbookstubWorkbook; privateSSTRecordsstRecord; privateFormatTrackingHSSFListenerformatListener; privatefinalHSSFDataFormatterformatter=newHSSFDataFormatter(); /** *文件的绝对路径 */ privateStringfilePath=""; //表索引 privateintsheetIndex=0; privateBoundSheetRecord[]orderedBSRs; @SuppressWarnings("unchecked") privateArrayListboundSheetRecords=newArrayList(); privateintnextRow; privateintnextColumn; privatebooleanoutputNextStringRecord; //当前行 privateintcurRow=0; //存储一行记录所有单元格的容器 privateList
cellList=newArrayList (); /** *判断整行是否为空行的标记 */ privatebooleanflag=false; @SuppressWarnings("unused") privateStringsheetName; /** *遍历excel下所有的sheet * *@paramfileName *@throwsException */ publicintprocess(StringfileName)throwsException{ filePath=fileName; this.fs=newPOIFSFileSystem(newFileInputStream(fileName)); MissingRecordAwareHSSFListenerlistener=newMissingRecordAwareHSSFListener(this); formatListener=newFormatTrackingHSSFListener(listener); HSSFEventFactoryfactory=newHSSFEventFactory(); HSSFRequestrequest=newHSSFRequest(); if(outputFormulaValues){ request.addListenerForAllRecords(formatListener); }else{ workbookBuildingListener=newEventWorkbookBuilder.SheetRecordCollectingListener(formatListener); request.addListenerForAllRecords(workbookBuildingListener); } factory.processWorkbookEvents(request,fs); returntotalRows;//返回该excel文件的总行数,不包括首列和空行 } publicList >process(InputStreamin)throwsException{ //filePath=fileName; this.fs=newPOIFSFileSystem(in); MissingRecordAwareHSSFListenerlistener=newMissingRecordAwareHSSFListener(this); formatListener=newFormatTrackingHSSFListener(listener); HSSFEventFactoryfactory=newHSSFEventFactory(); HSSFRequestrequest=newHSSFRequest(); if(outputFormulaValues){ request.addListenerForAllRecords(formatListener); }else{ workbookBuildingListener=newEventWorkbookBuilder.SheetRecordCollectingListener(formatListener); request.addListenerForAllRecords(workbookBuildingListener); } factory.processWorkbookEvents(request,fs); returndataList;//返回该excel文件的总行数,不包括首列和空行 } /** *HSSFListener监听方法,处理Record *处理每个单元格 *@paramrecord */ @SuppressWarnings("unchecked") publicvoidprocessRecord(Recordrecord){ intthisRow=-1; intthisColumn=-1; StringthisStr=null; Stringvalue=null; switch(record.getSid()){ caseBoundSheetRecord.sid: boundSheetRecords.add(record); break; caseBOFRecord.sid://开始处理每个sheet BOFRecordbr=(BOFRecord)record; if(br.getType()==BOFRecord.TYPE_WORKSHEET){ //如果有需要,则建立子工作簿 if(workbookBuildingListener!=null&&stubWorkbook==null){ stubWorkbook=workbookBuildingListener.getStubHSSFWorkbook(); } if(orderedBSRs==null){ orderedBSRs=BoundSheetRecord.orderByBofPosition(boundSheetRecords); } sheetName=orderedBSRs[sheetIndex].getSheetname(); sheetIndex++; } break; caseSSTRecord.sid: sstRecord=(SSTRecord)record; break; caseBlankRecord.sid://单元格为空白 BlankRecordbrec=(BlankRecord)record; thisRow=brec.getRow(); thisColumn=brec.getColumn(); thisStr=""; cellList.add(thisColumn,thisStr); break; caseBoolErrRecord.sid://单元格为布尔类型 BoolErrRecordberec=(BoolErrRecord)record; thisRow=berec.getRow(); thisColumn=berec.getColumn(); thisStr=berec.getBooleanValue()+""; cellList.add(thisColumn,thisStr); checkRowIsNull(thisStr);//如果里面某个单元格含有值,则标识该行不为空行 break; caseFormulaRecord.sid://单元格为公式类型 FormulaRecordfrec=(FormulaRecord)record; thisRow=frec.getRow(); thisColumn=frec.getColumn(); if(outputFormulaValues){ if(Double.isNaN(frec.getValue())){ outputNextStringRecord=true; nextRow=frec.getRow(); nextColumn=frec.getColumn(); }else{ thisStr='"'+HSSFFormulaParser.toFormulaString(stubWorkbook,frec.getParsedExpression())+'"'; } }else{ thisStr='"'+HSSFFormulaParser.toFormulaString(stubWorkbook,frec.getParsedExpression())+'"'; } cellList.add(thisColumn,thisStr); checkRowIsNull(thisStr);//如果里面某个单元格含有值,则标识该行不为空行 break; caseStringRecord.sid://单元格中公式的字符串 if(outputNextStringRecord){ StringRecordsrec=(StringRecord)record; thisStr=srec.getString(); thisRow=nextRow; thisColumn=nextColumn; outputNextStringRecord=false; } break; caseLabelRecord.sid: LabelRecordlrec=(LabelRecord)record; curRow=thisRow=lrec.getRow(); thisColumn=lrec.getColumn(); value=lrec.getValue().trim(); value=value.equals("")?"":value; cellList.add(thisColumn,value); checkRowIsNull(value);//如果里面某个单元格含有值,则标识该行不为空行 break; caseLabelSSTRecord.sid://单元格为字符串类型 LabelSSTRecordlsrec=(LabelSSTRecord)record; curRow=thisRow=lsrec.getRow(); thisColumn=lsrec.getColumn(); if(sstRecord==null){ cellList.add(thisColumn,""); }else{ value=sstRecord.getString(lsrec.getSSTIndex()).toString().trim(); value=value.equals("")?"":value; cellList.add(thisColumn,value); checkRowIsNull(value);//如果里面某个单元格含有值,则标识该行不为空行 } break; caseNumberRecord.sid://单元格为数字类型 NumberRecordnumrec=(NumberRecord)record; curRow=thisRow=numrec.getRow(); thisColumn=numrec.getColumn(); //第一种方式 //value=formatListener.formatNumberDateCell(numrec).trim();//这个被写死,采用的m/d/yyh:mm格式,不符合要求 //第二种方式,参照formatNumberDateCell里面的实现方法编写 DoublevalueDouble=((NumberRecord)numrec).getValue(); StringformatString=formatListener.getFormatString(numrec); if(formatString.contains("m/d/yy")){ formatString="yyyy-MM-ddhh:mm:ss"; } intformatIndex=formatListener.getFormatIndex(numrec); value=formatter.formatRawCellContents(valueDouble,formatIndex,formatString).trim(); value=value.equals("")?"":value; //向容器加入列值 cellList.add(thisColumn,value); checkRowIsNull(value);//如果里面某个单元格含有值,则标识该行不为空行 break; default: break; } //遇到新行的操作 if(thisRow!=-1&&thisRow!=lastRowNumber){ lastColumnNumber=-1; } //空值的操作 if(recordinstanceofMissingCellDummyRecord){ MissingCellDummyRecordmc=(MissingCellDummyRecord)record; curRow=thisRow=mc.getRow(); thisColumn=mc.getColumn(); cellList.add(thisColumn,""); } //更新行和列的值 if(thisRow>-1) lastRowNumber=thisRow; if(thisColumn>-1) lastColumnNumber=thisColumn; //行结束时的操作 if(recordinstanceofLastCellOfRowDummyRecord){ if(minColums>0){ //列值重新置空 if(lastColumnNumber==-1){ lastColumnNumber=0; } } lastColumnNumber=-1; if(flag&&curRow!=0){//该行不为空行且该行不是第一行,发送(第一行为列名,不需要) //ExcelReaderUtil.sendRows(filePath,sheetName,sheetIndex,curRow+1,cellList);//每行结束时,调用sendRows()方法 totalRows++; //添加到数据集合中 dataList.add(cellList); } //清空容器 cellList=newArrayList
(); //cellList.clear(); flag=false; } } /** *如果里面某个单元格含有值,则标识该行不为空行 *@paramvalue */ publicvoidcheckRowIsNull(Stringvalue){ if(value!=null&&!"".equals(value)){ flag=true; } } }
读取xlsx格式数据(自动获取表头长度方式):
importorg.apache.poi.openxml4j.opc.OPCPackage; importorg.apache.poi.ss.usermodel.BuiltinFormats; importorg.apache.poi.ss.usermodel.DataFormatter; importorg.apache.poi.xssf.eventusermodel.XSSFReader; importorg.apache.poi.xssf.model.SharedStringsTable; importorg.apache.poi.xssf.model.StylesTable; importorg.apache.poi.xssf.usermodel.XSSFCellStyle; importorg.apache.poi.xssf.usermodel.XSSFRichTextString; importorg.xml.sax.Attributes; importorg.xml.sax.InputSource; importorg.xml.sax.SAXException; importorg.xml.sax.XMLReader; importorg.xml.sax.helpers.DefaultHandler; importorg.xml.sax.helpers.XMLReaderFactory; importjava.io.InputStream; importjava.util.ArrayList; importjava.util.List; /** *@descPOI读取excel有两种模式,一种是用户模式,一种是事件驱动模式 *采用SAX事件驱动模式解决XLSX文件,可以有效解决用户模式内存溢出的问题, *该模式是POI官方推荐的读取大数据的模式, *在用户模式下,数据量较大,Sheet较多,或者是有很多无用的空行的情况下,容易出现内存溢出 **用于解决.xlsx2007版本大数据量问题 **/ publicclassExcelXlsxReaderextendsDefaultHandler{ //存储所有数据 privateList
>dataList=newArrayList
>(); /** *单元格中的数据可能的数据类型 */ enumCellDataType{ BOOL,ERROR,FORMULA,INLINESTR,SSTINDEX,NUMBER,DATE,NULL } /** *共享字符串表 */ privateSharedStringsTablesst; /** *上一次的索引值 */ privateStringlastIndex; /** *文件的绝对路径 */ privateStringfilePath=""; /** *工作表索引 */ privateintsheetIndex=0; /** *sheet名 */ privateStringsheetName=""; /** *总行数 */ privateinttotalRows=0; /** *一行内cell集合 */ privateList
cellList=newArrayList (); /** *判断整行是否为空行的标记 */ privatebooleanflag=false; /** *当前行 */ privateintcurRow=1; /** *当前列 */ privateintcurCol=0; /** *T元素标识 */ privatebooleanisTElement; /** *判断上一单元格是否为文本空单元格 */ privatebooleanstartElementFlag=true; privatebooleanendElementFlag=false; privatebooleancharactersFlag=false; /** *异常信息,如果为空则表示没有异常 */ privateStringexceptionMessage; /** *单元格数据类型,默认为字符串类型 */ privateCellDataTypenextDataType=CellDataType.SSTINDEX; privatefinalDataFormatterformatter=newDataFormatter(); /** *单元格日期格式的索引 */ privateshortformatIndex; /** *日期格式字符串 */ privateStringformatString; //定义前一个元素和当前元素的位置,用来计算其中空的单元格数量,如A6和A8等 privateStringprePreRef="A",preRef=null,ref=null; //定义该文档一行最大的单元格数,用来补全一行最后可能缺失的单元格 privateStringmaxRef=null; /** *单元格 */ privateStylesTablestylesTable; /** *遍历工作簿中所有的电子表格 *并缓存在mySheetList中 * *@paramfilename *@throwsException */ publicintprocess(Stringfilename)throwsException{ filePath=filename; OPCPackagepkg=OPCPackage.open(filename); XSSFReaderxssfReader=newXSSFReader(pkg); stylesTable=xssfReader.getStylesTable(); SharedStringsTablesst=xssfReader.getSharedStringsTable(); XMLReaderparser=XMLReaderFactory.createXMLReader("org.apache.xerces.parsers.SAXParser"); this.sst=sst; parser.setContentHandler(this); XSSFReader.SheetIteratorsheets=(XSSFReader.SheetIterator)xssfReader.getSheetsData(); while(sheets.hasNext()){//遍历sheet curRow=1;//标记初始行为第一行 sheetIndex++; InputStreamsheet=sheets.next();//sheets.next()和sheets.getSheetName()不能换位置,否则sheetName报错 sheetName=sheets.getSheetName(); InputSourcesheetSource=newInputSource(sheet); parser.parse(sheetSource);//解析excel的每条记录,在这个过程中startElement()、characters()、endElement()这三个函数会依次执行 sheet.close(); } returntotalRows;//返回该excel文件的总行数,不包括首列和空行 } publicList >process(InputStreamin)throwsException{ //filePath=filename; OPCPackagepkg=OPCPackage.open(in); XSSFReaderxssfReader=newXSSFReader(pkg); stylesTable=xssfReader.getStylesTable(); SharedStringsTablesst=xssfReader.getSharedStringsTable(); //XMLReaderparser=XMLReaderFactory.createXMLReader("org.apache.xerces.parsers.SAXParser"); XMLReaderparser=XMLReaderFactory.createXMLReader(); this.sst=sst; parser.setContentHandler(this); XSSFReader.SheetIteratorsheets=(XSSFReader.SheetIterator)xssfReader.getSheetsData(); while(sheets.hasNext()){//遍历sheet curRow=1;//标记初始行为第一行 sheetIndex++; InputStreamsheet=sheets.next();//sheets.next()和sheets.getSheetName()不能换位置,否则sheetName报错 sheetName=sheets.getSheetName(); InputSourcesheetSource=newInputSource(sheet); parser.parse(sheetSource);//解析excel的每条记录,在这个过程中startElement()、characters()、endElement()这三个函数会依次执行 sheet.close(); } returndataList; } /** *第一个执行 * *@paramuri *@paramlocalName *@paramname *@paramattributes *@throwsSAXException */ @Override publicvoidstartElement(Stringuri,StringlocalName,Stringname,Attributesattributes)throwsSAXException{ //c=>单元格 if("c".equals(name)){ //前一个单元格的位置 if(preRef==null){ preRef=attributes.getValue("r"); }else{ //中部文本空单元格标识‘endElementFlag'判断前一次是否为文本空字符串,true则表明不是文本空字符串,false表明是文本空字符串跳过把空字符串的位置赋予preRef if(endElementFlag){ preRef=ref; } } //当前单元格的位置 ref=attributes.getValue("r"); //首部文本空单元格标识‘startElementFlag'判断前一次,即首部是否为文本空字符串,true则表明不是文本空字符串,false表明是文本空字符串,且已知当前格,即第二格带“B”标志,则ref赋予preRef if(!startElementFlag&&!flag){//上一个单元格为文本空单元格,执行下面的,使ref=preRef;flag为true表明该单元格之前有数据值,即该单元格不是首部空单元格,则跳过 //这里只有上一个单元格为文本空单元格,且之前的几个单元格都没有值才会执行 preRef=ref; } //设定单元格类型 this.setNextDataType(attributes); endElementFlag=false; charactersFlag=false; startElementFlag=false; } //当元素为t时 if("t".equals(name)){ isTElement=true; }else{ isTElement=false; } //置空 lastIndex=""; } /** *第二个执行 *得到单元格对应的索引值或是内容值 *如果单元格类型是字符串、INLINESTR、数字、日期,lastIndex则是索引值 *如果单元格类型是布尔值、错误、公式,lastIndex则是内容值 *@paramch *@paramstart *@paramlength *@throwsSAXException */ @Override publicvoidcharacters(char[]ch,intstart,intlength)throwsSAXException{ startElementFlag=true; charactersFlag=true; lastIndex+=newString(ch,start,length); } /** *第三个执行 * *@paramuri *@paramlocalName *@paramname *@throwsSAXException */ @Override publicvoidendElement(Stringuri,StringlocalName,Stringname)throwsSAXException{ //t元素也包含字符串 if(isTElement){ //将单元格内容加入rowlist中,在这之前先去掉字符串前后的空白符 Stringvalue=lastIndex.trim(); cellList.add(curCol,value); endElementFlag=true; curCol++; isTElement=false; //如果里面某个单元格含有值,则标识该行不为空行 if(value!=null&&!"".equals(value)){ flag=true; } }elseif("v".equals(name)){ //v=>单元格的值,如果单元格是字符串,则v标签的值为该字符串在SST中的索引 Stringvalue=this.getDataValue(lastIndex.trim(),"");//根据索引值获取对应的单元格值 //补全单元格之间的空单元格 if(!ref.equals(preRef)){ intlen=countNullCell(ref,preRef); for(inti=0;i
(); //cellList.clear(); curRow++; curCol=0; preRef=null; prePreRef=null; ref=null; flag=false; } } } /** *处理数据类型 * *@paramattributes */ publicvoidsetNextDataType(Attributesattributes){ nextDataType=CellDataType.NUMBER;//cellType为空,则表示该单元格类型为数字 formatIndex=-1; formatString=null; StringcellType=attributes.getValue("t");//单元格类型 StringcellStyleStr=attributes.getValue("s");// StringcolumnData=attributes.getValue("r");//获取单元格的位置,如A1,B1 if("b".equals(cellType)){//处理布尔值 nextDataType=CellDataType.BOOL; }elseif("e".equals(cellType)){//处理错误 nextDataType=CellDataType.ERROR; }elseif("inlineStr".equals(cellType)){ nextDataType=CellDataType.INLINESTR; }elseif("s".equals(cellType)){//处理字符串 nextDataType=CellDataType.SSTINDEX; }elseif("str".equals(cellType)){ nextDataType=CellDataType.FORMULA; } if(cellStyleStr!=null){//处理日期 intstyleIndex=Integer.parseInt(cellStyleStr); XSSFCellStylestyle=stylesTable.getStyleAt(styleIndex); formatIndex=style.getDataFormat(); formatString=style.getDataFormatString(); if(formatString.contains("m/d/yyyy")||formatString.contains("yyyy/mm/dd")||formatString.contains("yyyy/m/d")){ nextDataType=CellDataType.DATE; formatString="yyyy-MM-ddhh:mm:ss"; } if(formatString==null){ nextDataType=CellDataType.NULL; formatString=BuiltinFormats.getBuiltinFormat(formatIndex); } } } /** *对解析出来的数据进行类型处理 *@paramvalue单元格的值, *value代表解析:BOOL的为0或1,ERROR的为内容值,FORMULA的为内容值,INLINESTR的为索引值需转换为内容值, *SSTINDEX的为索引值需转换为内容值,NUMBER为内容值,DATE为内容值 *@paramthisStr一个空字符串 *@return */ @SuppressWarnings("deprecation") publicStringgetDataValue(Stringvalue,StringthisStr){ switch(nextDataType){ //这几个的顺序不能随便交换,交换了很可能会导致数据错误 caseBOOL://布尔值 charfirst=value.charAt(0); thisStr=first=='0'?"FALSE":"TRUE"; break; caseERROR://错误 thisStr="\"ERROR:"+value.toString()+'"'; break; caseFORMULA://公式 thisStr='"'+value.toString()+'"'; break; caseINLINESTR: XSSFRichTextStringrtsi=newXSSFRichTextString(value.toString()); thisStr=rtsi.toString(); rtsi=null; break; caseSSTINDEX://字符串 StringsstIndex=value.toString(); try{ intidx=Integer.parseInt(sstIndex); XSSFRichTextStringrtss=newXSSFRichTextString(sst.getEntryAt(idx));//根据idx索引值获取内容值 thisStr=rtss.toString(); //System.out.println(thisStr); //有些字符串是文本格式的,但内容却是日期 rtss=null; }catch(NumberFormatExceptionex){ thisStr=value.toString(); } break; caseNUMBER://数字 if(formatString!=null){ thisStr=formatter.formatRawCellContents(Double.parseDouble(value),formatIndex,formatString).trim(); }else{ thisStr=value; } thisStr=thisStr.replace("_","").trim(); break; caseDATE://日期 thisStr=formatter.formatRawCellContents(Double.parseDouble(value),formatIndex,formatString); //对日期字符串作特殊处理,去掉T thisStr=thisStr.replace("T",""); break; default: thisStr=""; break; } returnthisStr; } publicintcountNullCell(Stringref,StringpreRef){ //excel2007最大行数是1048576,最大列数是16384,最后一列列名是XFD Stringxfd=ref.replaceAll("\\d+",""); Stringxfd_1=preRef.replaceAll("\\d+",""); xfd=fillChar(xfd,3,'@',true); xfd_1=fillChar(xfd_1,3,'@',true); char[]letter=xfd.toCharArray(); char[]letter_1=xfd_1.toCharArray(); intres=(letter[0]-letter_1[0])*26*26+(letter[1]-letter_1[1])*26+(letter[2]-letter_1[2]); returnres-1; } publicStringfillChar(Stringstr,intlen,charlet,booleanisPre){ intlen_1=str.length(); if(len_1 读取工具类:
importjava.io.InputStream; importjava.util.List; publicclassExcelReaderUtil{ //excel2003扩展名 publicstaticfinalStringEXCEL03_EXTENSION=".xls"; //excel2007扩展名 publicstaticfinalStringEXCEL07_EXTENSION=".xlsx"; //读取xls格式 publicstaticList>readExcelXls(InputStreamin)throwsException{ ExcelXlsReaderexcelXls=newExcelXlsReader(); List
>dataList=excelXls.process(in); returndataList; } //读取xlsx格式 publicstaticList
>readExcelXlsx(InputStreamin)throwsException{ ExcelXlsxReaderexcelXls=newExcelXlsxReader(); List
>dataList=excelXls.process(in); returndataList; } publicstaticvoidreadExcel(StringfileName)throwsException{ inttotalRows=0; if(fileName.endsWith(EXCEL03_EXTENSION)){//处理excel2003文件 ExcelXlsReaderexcelXls=newExcelXlsReader(); totalRows=excelXls.process(fileName); }elseif(fileName.endsWith(EXCEL07_EXTENSION)){//处理excel2007文件 ExcelXlsxReaderexcelXlsxReader=newExcelXlsxReader(); totalRows=excelXlsxReader.process(fileName); }else{ thrownewException("文件格式错误,fileName的扩展名只能是xls或xlsx。"); } System.out.println("发送的总行数:"+totalRows); } publicstaticvoidmain(String[]args)throwsException{ Stringpath="C:\\test.xlsx"; ExcelReaderUtil.readExcel(path); } }
以上就是本文的全部内容,希望对大家的学习有所帮助,也希望大家多多支持毛票票。
声明:本文内容来源于网络,版权归原作者所有,内容由互联网用户自发贡献自行上传,本网站不拥有所有权,未作人工编辑处理,也不承担相关法律责任。如果您发现有涉嫌版权的内容,欢迎发送邮件至:czq8825#qq.com(发邮件时,请将#更换为@)进行举报,并提供相关证据,一经查实,本站将立刻删除涉嫌侵权内容。