Java實現excel大數據量導入

發布時間：2020-08-30 03:33:00 來源：腳本之家閱讀：194 作者：王紹樺欄目：開發技術

本文實例為大家分享了Java實現excel大數據量導入的具體代碼，供大家參考，具體內容如下

情景分析：

通常我們通過poi讀取excel文件時，若在用戶模式下，由于數據量較大、Sheet較多，很容易出現內存溢出的情況

用戶模式讀取excel的典型代碼如下：

FileInputStream file = new FileInputStream("c:\\test.xlsx");
Workbook wb=new XSSFWorkbook(file);

而03版（xls）excel文件每個sheet最大長度為 65536行，07版（xlsx）excel文件每個sheet最大長度為 1048576行，因此我們主要解決07版（xlsx）excel大數據量導入出現內存溢出的問題

由于讀取cvs文件效率高且占用內存少，所以我們采用xlsx轉換為cvs的方式來解決

讀取xlsx格式數據：

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.PrintStream;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
 
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
 
import org.apache.poi.hssf.usermodel.HSSFDateUtil;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.openxml4j.opc.PackageAccess;
import org.apache.poi.ss.usermodel.BuiltinFormats;
import org.apache.poi.ss.usermodel.DataFormatter;
import org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.model.StylesTable;
import org.apache.poi.xssf.usermodel.XSSFCellStyle;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
 
public class XLSX2CSV {
 
 enum xssfDataType {
 BOOL, ERROR, FORMULA, INLINESTR, SSTINDEX, NUMBER,
 }
 
 /**
 * 使用xssf_sax_API處理Excel,請參考： http://poi.apache.org/spreadsheet/how-to.html#xssf_sax_api
 */
 class MyXSSFSheetHandler extends DefaultHandler {
 
 private StylesTable stylesTable;
 
 private ReadOnlySharedStringsTable sharedStringsTable;
 
 private final PrintStream output;
 
 private final int minColumnCount;
 
 private boolean vIsOpen;
 
 private xssfDataType nextDataType;
 
 private short formatIndex;
 private String formatString;
 private final DataFormatter formatter;
 
 private int thisColumn = -1;
 private int lastColumnNumber = -1;
 
 private StringBuffer value;
 private String[] record;
 private List<String[]> rows = new ArrayList<String[]>();
 private boolean isCellNull = false;
 
 public MyXSSFSheetHandler(StylesTable styles,ReadOnlySharedStringsTable strings, int cols, PrintStream target) {
 this.stylesTable = styles;
 this.sharedStringsTable = strings;
 this.minColumnCount = cols;
 this.output = target;
 this.value = new StringBuffer();
 this.nextDataType = xssfDataType.NUMBER;
 this.formatter = new DataFormatter();
 record = new String[this.minColumnCount];
 rows.clear();// 每次讀取都清空行集合
 }
 
 public void startElement(String uri, String localName, String name,Attributes attributes) throws SAXException {
 if ("inlineStr".equals(name) || "v".equals(name)) {
 vIsOpen = true;
 // Clear contents cache
 value.setLength(0);
 }
 // c => cell
 else if ("c".equals(name)) {
 // Get the cell reference
 String r = attributes.getValue("r");
 int firstDigit = -1;
 for (int c = 0; c < r.length(); ++c) {
  if (Character.isDigit(r.charAt(c))) {
  firstDigit = c;
  break;
  }
 }
 thisColumn = nameToColumn(r.substring(0, firstDigit));
 
 // Set up defaults.
 this.nextDataType = xssfDataType.NUMBER;
 this.formatIndex = -1;
 this.formatString = null;
 String cellType = attributes.getValue("t");
 String cellStyleStr = attributes.getValue("s");
 if ("b".equals(cellType))
  nextDataType = xssfDataType.BOOL;
 else if ("e".equals(cellType))
  nextDataType = xssfDataType.ERROR;
 else if ("inlineStr".equals(cellType))
  nextDataType = xssfDataType.INLINESTR;
 else if ("s".equals(cellType))
  nextDataType = xssfDataType.SSTINDEX;
 else if ("str".equals(cellType))
  nextDataType = xssfDataType.FORMULA;
 else if (cellStyleStr != null) {
  // It's a number, but almost certainly one
  // with a special style or format
  int styleIndex = Integer.parseInt(cellStyleStr);
  XSSFCellStyle style = stylesTable.getStyleAt(styleIndex);
  this.formatIndex = style.getDataFormat();
  this.formatString = style.getDataFormatString();
  if (this.formatString == null)
  this.formatString = BuiltinFormats
  .getBuiltinFormat(this.formatIndex);
 }
 }
 
 }
 
 public void endElement(String uri, String localName, String name)throws SAXException {
 
 String thisStr = null;
 
 // v => contents of a cell
 if ("v".equals(name)) {
 // Process the value contents as required.
 // Do now, as characters() may be called more than once
 switch (nextDataType) {
 
 case BOOL:
  char first = value.charAt(0);
  thisStr = first == '0' ? "FALSE" : "TRUE";
  break;
 
 case ERROR:
  thisStr = "\"ERROR:" + value.toString() + '"';
  break;
 
 case FORMULA:
  // A formula could result in a string value,
  // so always add double-quote characters.
  thisStr = '"' + value.toString() + '"';
  break;
 
 case INLINESTR:
  // TODO: have seen an example of this, so it's untested.
  XSSFRichTextString rtsi = new XSSFRichTextString(value.toString());
  thisStr = '"' + rtsi.toString() + '"';
  break;
 
 case SSTINDEX:
  String sstIndex = value.toString();
  try {
  int idx = Integer.parseInt(sstIndex);
  XSSFRichTextString rtss = new XSSFRichTextString(sharedStringsTable.getEntryAt(idx));
  thisStr = rtss.toString();
  } catch (NumberFormatException ex) {
  output.println("Failed to parse SST index '" + sstIndex
  + "': " + ex.toString());
  }
  break;
 
 case NUMBER:
  String n = value.toString();
  // 判斷是否是日期格式
  if (HSSFDateUtil.isADateFormat(this.formatIndex, n)) {
  Double d = Double.parseDouble(n);
  Date date=HSSFDateUtil.getJavaDate(d);
  thisStr=formateDateToString(date);
  } else if (this.formatString != null)
  thisStr = formatter.formatRawCellContents(
  Double.parseDouble(n), this.formatIndex,
  this.formatString);
  else
  thisStr = n;
  break;
 
 default:
  thisStr = "(TODO: Unexpected type: " + nextDataType + ")";
  break;
 }
 
 // Output after we've seen the string contents
 // Emit commas for any fields that were missing on this row
 if (lastColumnNumber == -1) {
  lastColumnNumber = 0;
 }
 //判斷單元格的值是否為空
 if (thisStr == null || "".equals(isCellNull)) {
  isCellNull = true;// 設置單元格是否為空值
 }
 record[thisColumn] = thisStr;
 // Update column
 if (thisColumn > -1)
  lastColumnNumber = thisColumn;
 
 } else if ("row".equals(name)) {
 // Print out any missing commas if needed
 if (minColumns > 0) {
  // Columns are 0 based
  if (lastColumnNumber == -1) {
  lastColumnNumber = 0;
  }
  if (isCellNull == false && record[0] != null
  && record[1] != null)// 判斷是否空行
  {
  rows.add(record.clone());
  isCellNull = false;
  for (int i = 0; i < record.length; i++) {
  record[i] = null;
  }
  }
 }
 lastColumnNumber = -1;
 }
 
 }
 
 public List<String[]> getRows() {
 return rows;
 }
 
 public void setRows(List<String[]> rows) {
 this.rows = rows;
 }
 
 /**
 * Captures characters only if a suitable element is open. Originally
 * was just "v"; extended for inlineStr also.
 */
 public void characters(char[] ch, int start, int length)throws SAXException {
 if (vIsOpen)
 value.append(ch, start, length);
 }
 
 /**
 * Converts an Excel column name like "C" to a zero-based index.
 * @param name
 * @return Index corresponding to the specified name
 */
 private int nameToColumn(String name) {
 int column = -1;
 for (int i = 0; i < name.length(); ++i) {
 int c = name.charAt(i);
 column = (column + 1) * 26 + c - 'A';
 }
 return column;
 }
 
 private String formateDateToString(Date date) {
 SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");//格式化日期
 return sdf.format(date);
 
 }
 
 }
 
 private OPCPackage xlsxPackage;
 private int minColumns;
 private PrintStream output;
 
 /**
 * Creates a new XLSX -> CSV converter
 * 
 * @param pkg
 *   The XLSX package to process
 * @param output
 *   The PrintStream to output the CSV to
 * @param minColumns
 *   The minimum number of columns to output, or -1 for no minimum
 */
 public XLSX2CSV(OPCPackage pkg, PrintStream output,int minColumns) {
 this.xlsxPackage = pkg;
 this.output = output;
 this.minColumns = minColumns;
 }
 
 /**
 * Parses and shows the content of one sheet using the specified styles and
 * shared-strings tables.
 * @param styles
 * @param strings
 * @param sheetInputStream
 */
 public List<String[]> processSheet(StylesTable styles,
 ReadOnlySharedStringsTable strings, InputStream sheetInputStream)
 throws IOException, ParserConfigurationException, SAXException {
 InputSource sheetSource = new InputSource(sheetInputStream);
 SAXParserFactory saxFactory = SAXParserFactory.newInstance();
 SAXParser saxParser = saxFactory.newSAXParser();
 XMLReader sheetParser = saxParser.getXMLReader();
 MyXSSFSheetHandler handler = new MyXSSFSheetHandler(styles, strings,this.minColumns, this.output);
 sheetParser.setContentHandler(handler);
 sheetParser.parse(sheetSource);
 return handler.getRows();
 }
 
 /**
 * 初始化這個處理程序 
 * @throws IOException
 * @throws OpenXML4JException
 * @throws ParserConfigurationException
 * @throws SAXException
 */
 public List<String[]> process() throws IOException, OpenXML4JException,ParserConfigurationException, SAXException {
 ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(this.xlsxPackage);
 XSSFReader xssfReader = new XSSFReader(this.xlsxPackage);
 List<String[]> list = null;
 StylesTable styles = xssfReader.getStylesTable();
 XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData();
 int index = 0;
 while (iter.hasNext()) {
 InputStream stream = iter.next();
 index++;
 //默認取第一個工作薄
 if(index == 1){
 list = processSheet(styles, strings, stream);
 stream.close();
 }
 }
 return list;
 }
 
 /**
 * 讀取Excel
 * 
 * @param path
 *   文件路徑
 * @param sheetName
 *   sheet名稱
 * @param minColumns
 *   列總數
 * @return
 * @throws SAXException
 * @throws ParserConfigurationException
 * @throws OpenXML4JException
 * @throws IOException
 */
 private static List<String[]> readerExcel(String path,int minColumns) throws IOException, OpenXML4JException,ParserConfigurationException, SAXException {
 OPCPackage p = OPCPackage.open(path, PackageAccess.READ);
 XLSX2CSV xlsx2csv = new XLSX2CSV(p, System.out, minColumns);
 List<String[]> list = xlsx2csv.process();
 p.close();
 return list;
 }
 
 /**
 * 讀取Excel
 * 
 * @param file
 *   File
 * @param sheetName
 *   sheet名稱
 * @param minColumns
 *   列總數
 * @return
 * @throws SAXException
 * @throws ParserConfigurationException
 * @throws OpenXML4JException
 * @throws IOException
 */
 private static List<String[]> readerExcelInputStream(File file,int minColumns) throws IOException, OpenXML4JException, ParserConfigurationException, SAXException {
 OPCPackage p = OPCPackage.openOrCreate(file);
 XLSX2CSV xlsx2csv = new XLSX2CSV(p, System.out, minColumns);
 List<String[]> list = xlsx2csv.process();
 p.close();
 return list;
 }
 
 //獲取表頭及數據內容,可在業務層調用
 public static Map<String,Object> getTableDate(InputStream in) throws IOException, OpenXML4JException, ParserConfigurationException, SAXException{ 
 File out = new File("C:\\date.xlsx");//默認在C盤生成date.xlsx文件
 OutputStream output = null;
 try {
 output = new FileOutputStream(out);
 byte[] buf = new byte[1024];
 int bytesRead;
 while ((bytesRead = in.read(buf)) > 0) {
 output.write(buf, 0, bytesRead);
 }
 } finally {
 in.close();
 output.close();
 }
 Map<String,Object> map = new HashMap<String,Object>();
 List<String> headresult = new ArrayList<String>();//表頭集合
 List<List<String>> dataresult = new ArrayList<List<String>>();//表數據集合
 List<String[]> list = readerExcelInputStream(out,30);//默認讀取30列
 for(int i=0;i<list.size();i++){
 if(i == 0){
 for(String cell : list.get(i)){
  if(cell != null){  
  headresult.add(cell);
  }
 }
 }else{
 List<String> dataList = new ArrayList<String>();
 for(String cell : list.get(i)){
  dataList.add(cell == null ? "" : cell);
 }
 dataresult.add(dataList);
 }
 }
 map.put("headresult", headresult);
 map.put("dataresult", dataresult);
 return map;
 }
 
 public static void main(String[] args) throws Exception {
 File file = new File("C:/Users/Administrator/Desktop/測試.xlsx");
 System.out.println("開始讀取...");
 List<String[]> list = XLSX2CSV.readerExcelInputStream(file, 30);//默認讀取30列
 System.out.println("數據量："+list.size());
 for(int i=0;i<list.size();i++){
 if(i > 0){
 for (String cell : list.get(i)) {
  System.out.print(cell + ",");
 }
 System.out.println();
 }
 }
 }
 
}

說明：

代碼中的加載excel的核心方法：

OPCPackage pkg = OPCPackage.open(path);//文件路徑
OPCPackage pkg = OPCPackage.openOrCreate(file);//文件
OPCPackage pkg = OPCPackage.open(InputStream);//文件流

文件流方式對內存依賴極大，所以實際應用時，如果只能獲取文件流的話，可以先將文件通過流拷貝到本地再解析代碼中的

若上傳文件大小超過限制，可在配置文件中設置，SpringBoot2項目application.properties中設置：

spring.servlet.multipart.maxFileSize=50MB
spring.servlet.multipart.maxRequestSize=100MB

實際業務數據入庫的話也可采用異步任務@Async的方式來提高入庫效率：

import java.util.Map;
 
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.scheduling.annotation.Async;
import org.springframework.stereotype.Service;
 
import cn.com.app.dao.JkconfigDao;
 
@Service
public class JkrzService {
 
 @Autowired
 private JkconfigDao jkconfigDao;
 
 @Async("myTaskAsyncPool")
 public void transJkrz(Map<String, Object> m) {
 jkconfigDao.insert(m);
 }
}

線程池配置：

import java.util.concurrent.Executor;
import java.util.concurrent.ThreadPoolExecutor;
 
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.scheduling.annotation.EnableAsync;
import org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor;
 
@Configuration
@EnableAsync
public class AsyncTaskConfig {
 
 @Autowired 
 private TaskThreadPoolConfig config;
 
 // ThredPoolTaskExcutor的處理流程
 // 當池子大小小于corePoolSize，就新建線程，并處理請求
 // 當池子大小等于corePoolSize，把請求放入workQueue中，池子里的空閑線程就去workQueue中取任務并處理
 // 當workQueue放不下任務時，就新建線程入池，并處理請求，如果池子大小撐到了maximumPoolSize，就用RejectedExecutionHandler來做拒絕處理
 // 當池子的線程數大于corePoolSize時，多余的線程會等待keepAliveTime長時間，如果無請求可處理就自行銷毀
 // 當threadNamePrefix設置為true，則核心線程也會超時關閉 
 @Bean
 public Executor myTaskAsyncPool() {
  ThreadPoolTaskExecutor executor = new ThreadPoolTaskExecutor(); 
   executor.setCorePoolSize(config.getCorePoolSize()); 
   executor.setMaxPoolSize(config.getMaxPoolSize()); 
   executor.setQueueCapacity(config.getQueueCapacity()); 
   executor.setKeepAliveSeconds(config.getKeepAliveSeconds()); 
   executor.setAllowCoreThreadTimeOut(true);
   executor.setThreadNamePrefix("MyExecutor-"); 
 
   // rejection-policy：當pool已經達到max size的時候，如何處理新任務 
   // CALLER_RUNS：不在新線程中執行任務，而是由調用者所在的線程來執行 
   executor.setRejectedExecutionHandler(new ThreadPoolExecutor.CallerRunsPolicy()); 
   executor.initialize(); 
  return executor;
 }
 
}

參數配置：

#核心線程數,當線程數小于核心線程數時,即使有線程空閑,線程池也會優先創建新線程,設置allowCoreThreadTimeout=true（默認false）時,核心線程會超時退出
spring.task.pool.corePoolSize=20
#最大線程數,當線程數大于等于corePoolSize,且任務隊列已滿時,線程池會創建新線程來處理任務
spring.task.pool.maxPoolSize=60
#線程空閑時間,當線程空閑時間達到keepAliveSeconds(秒)時,線程會退出,直到線程數量等于corePoolSize,如果allowCoreThreadTimeout=true,則會直到線程數量等于0
spring.task.pool.keepAliveSeconds=1
#任務隊列容量,當核心線程數達到最大時,新任務會放在隊列中排隊等待執行
spring.task.pool.queueCapacity=400

讀取xls格式數據：

import org.apache.poi.hssf.eventusermodel.*;
import org.apache.poi.hssf.eventusermodel.dummyrecord.LastCellOfRowDummyRecord;
import org.apache.poi.hssf.eventusermodel.dummyrecord.MissingCellDummyRecord;
import org.apache.poi.hssf.model.HSSFFormulaParser;
import org.apache.poi.hssf.record.*;
import org.apache.poi.hssf.usermodel.HSSFDataFormatter;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
 
import java.io.FileInputStream;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
 
/**
 * @desc 用于解決.xls2003版本大數據量問題
 **/
public class ExcelXlsReader implements HSSFListener {
 
 //存儲所有數據
 private List<List<String>> dataList = new ArrayList<List<String>>();
 
 private int minColums = -1;
 
 private POIFSFileSystem fs;
 
 /**
  * 總行數
  */
 private int totalRows=0;
 
 /**
  * 上一行row的序號
  */
 private int lastRowNumber;
 
 /**
  * 上一單元格的序號
  */
 private int lastColumnNumber;
 
 /**
  * 是否輸出formula，還是它對應的值
  */
 private boolean outputFormulaValues = true;
 
 /**
  * 用于轉換formulas
  */
 private EventWorkbookBuilder.SheetRecordCollectingListener workbookBuildingListener;
 
 //excel2003工作簿
 private HSSFWorkbook stubWorkbook;
 
 private SSTRecord sstRecord;
 
 private FormatTrackingHSSFListener formatListener;
 
 private final HSSFDataFormatter formatter = new HSSFDataFormatter();
 
 /**
  * 文件的絕對路徑
  */
 private String filePath = "";
 
 //表索引
 private int sheetIndex = 0;
 
 private BoundSheetRecord[] orderedBSRs;
 
 @SuppressWarnings("unchecked")
 private ArrayList boundSheetRecords = new ArrayList();
 
 private int nextRow;
 
 private int nextColumn;
 
 private boolean outputNextStringRecord;
 
 //當前行
 private int curRow = 0;
 
 //存儲一行記錄所有單元格的容器
 private List<String> cellList = new ArrayList<String>();
 
 /**
  * 判斷整行是否為空行的標記
  */
 private boolean flag = false;
 
 @SuppressWarnings("unused")
 private String sheetName;
 
 /**
  * 遍歷excel下所有的sheet
  *
  * @param fileName
  * @throws Exception
  */
 public int process(String fileName) throws Exception {
  filePath = fileName;
  this.fs = new POIFSFileSystem(new FileInputStream(fileName));
  MissingRecordAwareHSSFListener listener = new MissingRecordAwareHSSFListener(this);
  formatListener = new FormatTrackingHSSFListener(listener);
  HSSFEventFactory factory = new HSSFEventFactory();
  HSSFRequest request = new HSSFRequest();
  if (outputFormulaValues) {
   request.addListenerForAllRecords(formatListener);
  } else {
   workbookBuildingListener = new EventWorkbookBuilder.SheetRecordCollectingListener(formatListener);
   request.addListenerForAllRecords(workbookBuildingListener);
  }
  factory.processWorkbookEvents(request, fs);
 
  return totalRows; //返回該excel文件的總行數，不包括首列和空行
 }
 
 public List<List<String>> process(InputStream in) throws Exception {
  //filePath = fileName;
  this.fs = new POIFSFileSystem(in);
  MissingRecordAwareHSSFListener listener = new MissingRecordAwareHSSFListener(this);
  formatListener = new FormatTrackingHSSFListener(listener);
  HSSFEventFactory factory = new HSSFEventFactory();
  HSSFRequest request = new HSSFRequest();
  if (outputFormulaValues) {
   request.addListenerForAllRecords(formatListener);
  } else {
   workbookBuildingListener = new EventWorkbookBuilder.SheetRecordCollectingListener(formatListener);
   request.addListenerForAllRecords(workbookBuildingListener);
  }
  factory.processWorkbookEvents(request, fs);
 
  return dataList; //返回該excel文件的總行數，不包括首列和空行
 }
 
 /**
  * HSSFListener 監聽方法，處理Record
  * 處理每個單元格
  * @param record
  */
 @SuppressWarnings("unchecked")
 public void processRecord(Record record) {
  int thisRow = -1;
  int thisColumn = -1;
  String thisStr = null;
  String value = null;
  switch (record.getSid()) {
   case BoundSheetRecord.sid:
    boundSheetRecords.add(record);
    break;
   case BOFRecord.sid: //開始處理每個sheet
    BOFRecord br = (BOFRecord) record;
    if (br.getType() == BOFRecord.TYPE_WORKSHEET) {
     //如果有需要，則建立子工作簿
     if (workbookBuildingListener != null && stubWorkbook == null) {
      stubWorkbook = workbookBuildingListener.getStubHSSFWorkbook();
     }
 
     if (orderedBSRs == null) {
      orderedBSRs = BoundSheetRecord.orderByBofPosition(boundSheetRecords);
     }
     sheetName = orderedBSRs[sheetIndex].getSheetname();
     sheetIndex++;
    }
    break;
   case SSTRecord.sid:
    sstRecord = (SSTRecord) record;
    break;
   case BlankRecord.sid: //單元格為空白
    BlankRecord brec = (BlankRecord) record;
    thisRow = brec.getRow();
    thisColumn = brec.getColumn();
    thisStr = "";
    cellList.add(thisColumn, thisStr);
    break;
   case BoolErrRecord.sid: //單元格為布爾類型
    BoolErrRecord berec = (BoolErrRecord) record;
    thisRow = berec.getRow();
    thisColumn = berec.getColumn();
    thisStr = berec.getBooleanValue() + "";
    cellList.add(thisColumn, thisStr);
    checkRowIsNull(thisStr); //如果里面某個單元格含有值，則標識該行不為空行
    break;
   case FormulaRecord.sid://單元格為公式類型
    FormulaRecord frec = (FormulaRecord) record;
    thisRow = frec.getRow();
    thisColumn = frec.getColumn();
    if (outputFormulaValues) {
     if (Double.isNaN(frec.getValue())) {
      outputNextStringRecord = true;
      nextRow = frec.getRow();
      nextColumn = frec.getColumn();
     } else {
      thisStr = '"' + HSSFFormulaParser.toFormulaString(stubWorkbook, frec.getParsedExpression()) + '"';
     }
    } else {
     thisStr = '"' + HSSFFormulaParser.toFormulaString(stubWorkbook, frec.getParsedExpression()) + '"';
    }
    cellList.add(thisColumn, thisStr);
    checkRowIsNull(thisStr); //如果里面某個單元格含有值，則標識該行不為空行
    break;
   case StringRecord.sid: //單元格中公式的字符串
    if (outputNextStringRecord) {
     StringRecord srec = (StringRecord) record;
     thisStr = srec.getString();
     thisRow = nextRow;
     thisColumn = nextColumn;
     outputNextStringRecord = false;
    }
    break;
   case LabelRecord.sid:
    LabelRecord lrec = (LabelRecord) record;
    curRow = thisRow = lrec.getRow();
    thisColumn = lrec.getColumn();
    value = lrec.getValue().trim();
    value = value.equals("") ? "" : value;
    cellList.add(thisColumn, value);
    checkRowIsNull(value); //如果里面某個單元格含有值，則標識該行不為空行
    break;
   case LabelSSTRecord.sid: //單元格為字符串類型
    LabelSSTRecord lsrec = (LabelSSTRecord) record;
    curRow = thisRow = lsrec.getRow();
    thisColumn = lsrec.getColumn();
    if (sstRecord == null) {
     cellList.add(thisColumn, "");
    } else {
     value = sstRecord.getString(lsrec.getSSTIndex()).toString().trim();
     value = value.equals("") ? "" : value;
     cellList.add(thisColumn, value);
     checkRowIsNull(value); //如果里面某個單元格含有值，則標識該行不為空行
    }
    break;
   case NumberRecord.sid: //單元格為數字類型
    NumberRecord numrec = (NumberRecord) record;
    curRow = thisRow = numrec.getRow();
    thisColumn = numrec.getColumn();
 
    //第一種方式
    //value = formatListener.formatNumberDateCell(numrec).trim();//這個被寫死，采用的m/d/yy h:mm格式，不符合要求
 
    //第二種方式，參照formatNumberDateCell里面的實現方法編寫
    Double valueDouble=((NumberRecord)numrec).getValue();
    String formatString=formatListener.getFormatString(numrec);
    if (formatString.contains("m/d/yy")){
     formatString="yyyy-MM-dd hh:mm:ss";
    }
    int formatIndex=formatListener.getFormatIndex(numrec);
    value=formatter.formatRawCellContents(valueDouble, formatIndex, formatString).trim();
 
    value = value.equals("") ? "" : value;
    //向容器加入列值
    cellList.add(thisColumn, value);
    checkRowIsNull(value); //如果里面某個單元格含有值，則標識該行不為空行
    break;
   default:
    break;
  }
 
  //遇到新行的操作
  if (thisRow != -1 && thisRow != lastRowNumber) {
   lastColumnNumber = -1;
  }
 
  //空值的操作
  if (record instanceof MissingCellDummyRecord) {
   MissingCellDummyRecord mc = (MissingCellDummyRecord) record;
   curRow = thisRow = mc.getRow();
   thisColumn = mc.getColumn();
   cellList.add(thisColumn, "");
  }
 
  //更新行和列的值
  if (thisRow > -1)
   lastRowNumber = thisRow;
  if (thisColumn > -1)
   lastColumnNumber = thisColumn;
 
  //行結束時的操作
  if (record instanceof LastCellOfRowDummyRecord) {
   if (minColums > 0) {
    //列值重新置空
    if (lastColumnNumber == -1) {
     lastColumnNumber = 0;
    }
   }
   lastColumnNumber = -1;
 
   if (flag&&curRow!=0) { //該行不為空行且該行不是第一行，發送（第一行為列名，不需要）
    //ExcelReaderUtil.sendRows(filePath, sheetName, sheetIndex, curRow + 1, cellList); //每行結束時，調用sendRows()方法
    totalRows++;
    //添加到數據集合中
    dataList.add(cellList);
   }
   //清空容器
   cellList = new ArrayList<String>(); 
   //cellList.clear();
   flag=false;
  }
 }
 
 /**
  * 如果里面某個單元格含有值，則標識該行不為空行
  * @param value
  */
 public void checkRowIsNull(String value){
  if (value != null && !"".equals(value)) {
   flag = true;
  }
 }
}

讀取xlsx格式數據（自動獲取表頭長度方式）：

import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.ss.usermodel.BuiltinFormats;
import org.apache.poi.ss.usermodel.DataFormatter;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.model.SharedStringsTable;
import org.apache.poi.xssf.model.StylesTable;
import org.apache.poi.xssf.usermodel.XSSFCellStyle;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.helpers.XMLReaderFactory;
 
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
 
/**
 * @desc POI讀取excel有兩種模式，一種是用戶模式，一種是事件驅動模式
 * 采用SAX事件驅動模式解決XLSX文件，可以有效解決用戶模式內存溢出的問題，
 * 該模式是POI官方推薦的讀取大數據的模式，
 * 在用戶模式下，數據量較大，Sheet較多，或者是有很多無用的空行的情況下，容易出現內存溢出
 * <p>
 * 用于解決.xlsx2007版本大數據量問題
 **/
public class ExcelXlsxReader extends DefaultHandler {
 
 //存儲所有數據
 private List<List<String>> dataList = new ArrayList<List<String>>();
 
 /**
  * 單元格中的數據可能的數據類型
  */
 enum CellDataType {
  BOOL, ERROR, FORMULA, INLINESTR, SSTINDEX, NUMBER, DATE, NULL
 }
 
 /**
  * 共享字符串表
  */
 private SharedStringsTable sst;
 
 /**
  * 上一次的索引值
  */
 private String lastIndex;
 
 /**
  * 文件的絕對路徑
  */
 private String filePath = "";
 
 /**
  * 工作表索引
  */
 private int sheetIndex = 0;
 
 /**
  * sheet名
  */
 private String sheetName = "";
 
 /**
  * 總行數
  */
 private int totalRows=0;
 
 /**
  * 一行內cell集合
  */
 private List<String> cellList = new ArrayList<String>();
 
 /**
  * 判斷整行是否為空行的標記
  */
 private boolean flag = false;
 
 /**
  * 當前行
  */
 private int curRow = 1;
 
 /**
  * 當前列
  */
 private int curCol = 0;
 
 /**
  * T元素標識
  */
 private boolean isTElement;
 
 /**
  * 判斷上一單元格是否為文本空單元格
  */
 private boolean startElementFlag = true;
 private boolean endElementFlag = false;
 private boolean charactersFlag = false;
 
 /**
  * 異常信息，如果為空則表示沒有異常
  */
 private String exceptionMessage;
 
 /**
  * 單元格數據類型，默認為字符串類型
  */
 private CellDataType nextDataType = CellDataType.SSTINDEX;
 
 private final DataFormatter formatter = new DataFormatter();
 
 /**
  * 單元格日期格式的索引
  */
 private short formatIndex;
 
 /**
  * 日期格式字符串
  */
 private String formatString;
 
 //定義前一個元素和當前元素的位置，用來計算其中空的單元格數量，如A6和A8等
 private String prePreRef = "A", preRef = null, ref = null;
 
 //定義該文檔一行最大的單元格數，用來補全一行最后可能缺失的單元格
 private String maxRef = null;
 
 /**
  * 單元格
  */
 private StylesTable stylesTable;
 
 /**
  * 遍歷工作簿中所有的電子表格
  * 并緩存在mySheetList中
  *
  * @param filename
  * @throws Exception
  */
 public int process(String filename) throws Exception {
  filePath = filename;
  OPCPackage pkg = OPCPackage.open(filename);
  XSSFReader xssfReader = new XSSFReader(pkg);
  stylesTable = xssfReader.getStylesTable();
  SharedStringsTable sst = xssfReader.getSharedStringsTable();
  XMLReader parser = XMLReaderFactory.createXMLReader("org.apache.xerces.parsers.SAXParser");
  this.sst = sst;
  parser.setContentHandler(this);
  XSSFReader.SheetIterator sheets = (XSSFReader.SheetIterator) xssfReader.getSheetsData();
  while (sheets.hasNext()) { //遍歷sheet
   curRow = 1; //標記初始行為第一行
   sheetIndex++;
   InputStream sheet = sheets.next(); //sheets.next()和sheets.getSheetName()不能換位置，否則sheetName報錯
   sheetName = sheets.getSheetName();
   InputSource sheetSource = new InputSource(sheet);
   parser.parse(sheetSource); //解析excel的每條記錄，在這個過程中startElement()、characters()、endElement()這三個函數會依次執行
   sheet.close();
  }
  return totalRows; //返回該excel文件的總行數，不包括首列和空行
 }
 
 public List<List<String>> process(InputStream in) throws Exception {
  //filePath = filename;
  OPCPackage pkg = OPCPackage.open(in);
  XSSFReader xssfReader = new XSSFReader(pkg);
  stylesTable = xssfReader.getStylesTable();
  SharedStringsTable sst = xssfReader.getSharedStringsTable();
  //XMLReader parser = XMLReaderFactory.createXMLReader("org.apache.xerces.parsers.SAXParser");
  XMLReader parser = XMLReaderFactory.createXMLReader();
  this.sst = sst;
  parser.setContentHandler(this);
  XSSFReader.SheetIterator sheets = (XSSFReader.SheetIterator) xssfReader.getSheetsData();
  while (sheets.hasNext()) { //遍歷sheet
   curRow = 1; //標記初始行為第一行
   sheetIndex++;
   InputStream sheet = sheets.next(); //sheets.next()和sheets.getSheetName()不能換位置，否則sheetName報錯
   sheetName = sheets.getSheetName();
   InputSource sheetSource = new InputSource(sheet);
   parser.parse(sheetSource); //解析excel的每條記錄，在這個過程中startElement()、characters()、endElement()這三個函數會依次執行
   sheet.close();
  }
  return dataList;
 }
 
 /**
  * 第一個執行
  *
  * @param uri
  * @param localName
  * @param name
  * @param attributes
  * @throws SAXException
  */
 @Override
 public void startElement(String uri, String localName, String name, Attributes attributes) throws SAXException {
  //c => 單元格
  if ("c".equals(name)) {
 
   //前一個單元格的位置
   if (preRef == null) {
    preRef = attributes.getValue("r");
 
   } else {
    //中部文本空單元格標識 ‘endElementFlag' 判斷前一次是否為文本空字符串，true則表明不是文本空字符串，false表明是文本空字符串跳過把空字符串的位置賦予preRef
    if (endElementFlag){
     preRef = ref;
    }
   }
 
   //當前單元格的位置
   ref = attributes.getValue("r");
   //首部文本空單元格標識 ‘startElementFlag' 判斷前一次，即首部是否為文本空字符串，true則表明不是文本空字符串，false表明是文本空字符串, 且已知當前格，即第二格帶“B”標志，則ref賦予preRef
   if (!startElementFlag && !flag){ //上一個單元格為文本空單元格，執行下面的，使ref=preRef；flag為true表明該單元格之前有數據值，即該單元格不是首部空單元格，則跳過
    // 這里只有上一個單元格為文本空單元格，且之前的幾個單元格都沒有值才會執行
    preRef = ref;
   }
 
   //設定單元格類型
   this.setNextDataType(attributes);
   endElementFlag = false;
   charactersFlag = false;
   startElementFlag = false;
  }
 
  //當元素為t時
  if ("t".equals(name)) {
   isTElement = true;
  } else {
   isTElement = false;
  }
 
  //置空
  lastIndex = "";
 }
 
 
 
 /**
  * 第二個執行
  * 得到單元格對應的索引值或是內容值
  * 如果單元格類型是字符串、INLINESTR、數字、日期，lastIndex則是索引值
  * 如果單元格類型是布爾值、錯誤、公式，lastIndex則是內容值
  * @param ch
  * @param start
  * @param length
  * @throws SAXException
  */
 @Override
 public void characters(char[] ch, int start, int length) throws SAXException {
  startElementFlag = true;
  charactersFlag = true;
  lastIndex += new String(ch, start, length);
 }
 
 /**
  * 第三個執行
  *
  * @param uri
  * @param localName
  * @param name
  * @throws SAXException
  */
 @Override
 public void endElement(String uri, String localName, String name) throws SAXException {
  //t元素也包含字符串
  if (isTElement) {
   //將單元格內容加入rowlist中，在這之前先去掉字符串前后的空白符
   String value = lastIndex.trim();
   cellList.add(curCol, value);
   endElementFlag = true;
   curCol++;
   isTElement = false;
   //如果里面某個單元格含有值，則標識該行不為空行
   if (value != null && !"".equals(value)) {
    flag = true;
   }
  } else if ("v".equals(name)) {
   //v => 單元格的值，如果單元格是字符串，則v標簽的值為該字符串在SST中的索引
   String value = this.getDataValue(lastIndex.trim(), "");//根據索引值獲取對應的單元格值
 
   //補全單元格之間的空單元格
   if (!ref.equals(preRef)) {
    int len = countNullCell(ref, preRef);
    for (int i = 0; i < len; i++) {
     cellList.add(curCol, "");
     curCol++;
    }
   } else if (ref.equals(preRef) && !ref.startsWith("A")){ //ref等于preRef，且以B或者C...開頭，表明首部為空格
    int len = countNullCell(ref, "A");
    for (int i = 0; i <= len; i++) {
     cellList.add(curCol, "");
     curCol++;
    }
   }
   cellList.add(curCol, value);
   curCol++;
   endElementFlag = true;
   //如果里面某個單元格含有值，則標識該行不為空行
   if (value != null && !"".equals(value)) {
    flag = true;
   }
  } else {
   //如果標簽名稱為row，這說明已到行尾，調用optRows()方法
   if ("row".equals(name)) {
    //默認第一行為表頭，以該行單元格數目為最大數目
    if (curRow == 1) {
     maxRef = ref;
    }
    //補全一行尾部可能缺失的單元格
    if (maxRef != null) {
     int len = -1;
     //前一單元格，true則不是文本空字符串，false則是文本空字符串
     if (charactersFlag){
      len = countNullCell(maxRef, ref);
     }else {
      len = countNullCell(maxRef, preRef);
     }
     for (int i = 0; i <= len; i++) {
      cellList.add(curCol, "");
      curCol++;
     }
    }
 
    if (flag&&curRow!=1){ //該行不為空行且該行不是第一行，則發送（第一行為列名，不需要）
     //ExcelReaderUtil.sendRows(filePath, sheetName, sheetIndex, curRow, cellList);
     totalRows++;
     //添加到數據集合中
     dataList.add(cellList);
    }
 
    //清空容器
    cellList = new ArrayList<String>(); 
    //cellList.clear();
    curRow++;
    curCol = 0;
    preRef = null;
    prePreRef = null;
    ref = null;
    flag=false;
   }
  }
 }
 
 /**
  * 處理數據類型
  *
  * @param attributes
  */
 public void setNextDataType(Attributes attributes) {
  nextDataType = CellDataType.NUMBER; //cellType為空，則表示該單元格類型為數字
  formatIndex = -1;
  formatString = null;
  String cellType = attributes.getValue("t"); //單元格類型
  String cellStyleStr = attributes.getValue("s"); //
  String columnData = attributes.getValue("r"); //獲取單元格的位置，如A1,B1
 
  if ("b".equals(cellType)) { //處理布爾值
   nextDataType = CellDataType.BOOL;
  } else if ("e".equals(cellType)) { //處理錯誤
   nextDataType = CellDataType.ERROR;
  } else if ("inlineStr".equals(cellType)) {
   nextDataType = CellDataType.INLINESTR;
  } else if ("s".equals(cellType)) { //處理字符串
   nextDataType = CellDataType.SSTINDEX;
  } else if ("str".equals(cellType)) {
   nextDataType = CellDataType.FORMULA;
  }
 
  if (cellStyleStr != null) { //處理日期
   int styleIndex = Integer.parseInt(cellStyleStr);
   XSSFCellStyle style = stylesTable.getStyleAt(styleIndex);
   formatIndex = style.getDataFormat();
   formatString = style.getDataFormatString();
   if (formatString.contains("m/d/yyyy") || formatString.contains("yyyy/mm/dd")|| formatString.contains("yyyy/m/d") ) {
    nextDataType = CellDataType.DATE;
    formatString = "yyyy-MM-dd hh:mm:ss";
   }
 
   if (formatString == null) {
    nextDataType = CellDataType.NULL;
    formatString = BuiltinFormats.getBuiltinFormat(formatIndex);
   }
  }
 }
 
 /**
  * 對解析出來的數據進行類型處理
  * @param value 單元格的值，
  *    value代表解析：BOOL的為0或1， ERROR的為內容值，FORMULA的為內容值，INLINESTR的為索引值需轉換為內容值，
  *    SSTINDEX的為索引值需轉換為內容值， NUMBER為內容值，DATE為內容值
  * @param thisStr 一個空字符串
  * @return
  */
 @SuppressWarnings("deprecation")
 public String getDataValue(String value, String thisStr) {
  switch (nextDataType) {
   // 這幾個的順序不能隨便交換，交換了很可能會導致數據錯誤
   case BOOL: //布爾值
    char first = value.charAt(0);
    thisStr = first == '0' ? "FALSE" : "TRUE";
    break;
   case ERROR: //錯誤
    thisStr = "\"ERROR:" + value.toString() + '"';
    break;
   case FORMULA: //公式
    thisStr = '"' + value.toString() + '"';
    break;
   case INLINESTR:
    XSSFRichTextString rtsi = new XSSFRichTextString(value.toString());
    thisStr = rtsi.toString();
    rtsi = null;
    break;
   case SSTINDEX: //字符串
    String sstIndex = value.toString();
    try {
     int idx = Integer.parseInt(sstIndex);
     XSSFRichTextString rtss = new XSSFRichTextString(sst.getEntryAt(idx));//根據idx索引值獲取內容值
     thisStr = rtss.toString();
     //System.out.println(thisStr);
     //有些字符串是文本格式的，但內容卻是日期
 
     rtss = null;
    } catch (NumberFormatException ex) {
     thisStr = value.toString();
    }
    break;
   case NUMBER: //數字
    if (formatString != null) {
     thisStr = formatter.formatRawCellContents(Double.parseDouble(value), formatIndex, formatString).trim();
    } else {
     thisStr = value;
    }
    thisStr = thisStr.replace("_", "").trim();
    break;
   case DATE: //日期
    thisStr = formatter.formatRawCellContents(Double.parseDouble(value), formatIndex, formatString);
    // 對日期字符串作特殊處理，去掉T
    thisStr = thisStr.replace("T", " ");
    break;
   default:
    thisStr = " ";
    break;
  }
  return thisStr;
 }
 
 public int countNullCell(String ref, String preRef) {
  //excel2007最大行數是1048576，最大列數是16384，最后一列列名是XFD
  String xfd = ref.replaceAll("\\d+", "");
  String xfd_1 = preRef.replaceAll("\\d+", "");
 
  xfd = fillChar(xfd, 3, '@', true);
  xfd_1 = fillChar(xfd_1, 3, '@', true);
 
  char[] letter = xfd.toCharArray();
  char[] letter_1 = xfd_1.toCharArray();
  int res = (letter[0] - letter_1[0]) * 26 * 26 + (letter[1] - letter_1[1]) * 26 + (letter[2] - letter_1[2]);
  return res - 1;
 }
 
 public String fillChar(String str, int len, char let, boolean isPre) {
  int len_1 = str.length();
  if (len_1 < len) {
   if (isPre) {
    for (int i = 0; i < (len - len_1); i++) {
     str = let + str;
    }
   } else {
    for (int i = 0; i < (len - len_1); i++) {
     str = str + let;
    }
   }
  }
  return str;
 }
 
 /**
  * @return the exceptionMessage
  */
 public String getExceptionMessage() {
  return exceptionMessage;
 }
}

讀取工具類：

import java.io.InputStream;
import java.util.List;
 
public class ExcelReaderUtil {
 
 //excel2003擴展名
 public static final String EXCEL03_EXTENSION = ".xls";
 //excel2007擴展名
 public static final String EXCEL07_EXTENSION = ".xlsx";
 
 //讀取xls格式
 public static List<List<String>> readExcelXls(InputStream in) throws Exception {
  ExcelXlsReader excelXls=new ExcelXlsReader();
  List<List<String>> dataList =excelXls.process(in);
  return dataList;
 }
 
 //讀取xlsx格式
 public static List<List<String>> readExcelXlsx(InputStream in) throws Exception {
  ExcelXlsxReader excelXls=new ExcelXlsxReader();
  List<List<String>> dataList =excelXls.process(in);
  return dataList;
 }
 
 public static void readExcel(String fileName) throws Exception {
  int totalRows =0;
  if (fileName.endsWith(EXCEL03_EXTENSION)) { //處理excel2003文件
   ExcelXlsReader excelXls=new ExcelXlsReader();
   totalRows =excelXls.process(fileName);
  } else if (fileName.endsWith(EXCEL07_EXTENSION)) {//處理excel2007文件
   ExcelXlsxReader excelXlsxReader = new ExcelXlsxReader();
   totalRows = excelXlsxReader.process(fileName);
  } else {
   throw new Exception("文件格式錯誤，fileName的擴展名只能是xls或xlsx。");
  }
  System.out.println("發送的總行數：" + totalRows);
 }
 
 public static void main(String[] args) throws Exception {
  String path="C:\\test.xlsx";
  ExcelReaderUtil.readExcel(path);
 }
}

以上就是本文的全部內容，希望對大家的學習有所幫助，也希望大家多多支持億速云。

向AI問一下細節

亚洲激情专区-91九色丨porny丨老师-久久久久久久女国产乱让韩-国产精品午夜小视频观看

Java實現excel大數據量導入

猜你喜歡

亚洲激情专区-91九色丨porny丨老师-久久久久久久女国产乱让韩-国产精品午夜小视频观看

Java實現excel大數據量導入

猜你喜歡

最新資訊

相關推薦

相關標簽