首页 文章

使用Apache POI进行低内存写入/读取

提问于
浏览
4

我正在尝试编写一个非常大的XLSX文件(4M单元),而且我遇到了一些内存问题 .

我不能使用SXSSF,因为我还需要读取模板中的现有单元格 .

有什么办法可以减少内存占用吗?
也许结合流媒体阅读和流媒体写作?

2 回答

  • 2

    要处理内存不足的大数据,我认为唯一的选择是SXSSF api-s . 如果您需要读取现有单元格的一些数据,我假设您不需要同时使用整个4M . 在这种基于您的应用程序要求的情况下,您可以自己处理窗口大小并仅在内存中保留特定时间所需的数据量 . 您可以从以下示例开始:http://poi.apache.org/spreadsheet/how-to.html#sxssf

    有点像

    SXSSFWorkbook wb = new SXSSFWorkbook(-1); // turn off auto-flushing and accumulate all rows in memory
    // manually control how rows are flushed to disk 
    if(rownum % NOR == 0) {
    ((SXSSFSheet)sh).flushRows(NOR); // retain NOR last rows and flush all others
    

    希望这可以帮助 .

  • 2

    我使用SAX解析器来处理XML文档表示的事件 . 这是

    import com.sun.org.apache.xerces.internal.parsers.SAXParser;
    import org.apache.poi.openxml4j.opc.PackageAccess;
    import org.apache.poi.ss.usermodel.Cell;
    import org.apache.poi.ss.usermodel.Row;
    import org.apache.poi.ss.usermodel.Sheet;
    import org.apache.poi.xssf.eventusermodel.XSSFReader;
    import org.apache.poi.xssf.model.SharedStringsTable;
    import org.apache.poi.openxml4j.opc.OPCPackage;
    import org.apache.poi.xssf.usermodel.XSSFRichTextString;
    import org.xml.sax.Attributes;
    import org.xml.sax.ContentHandler;
    import org.xml.sax.InputSource;
    import org.xml.sax.SAXException;
    import org.xml.sax.XMLReader;
    import org.xml.sax.helpers.DefaultHandler;
    
    import java.io.BufferedInputStream;
    import java.io.InputStream;
    import java.util.Collections;
    import java.util.Iterator;
    import java.util.LinkedList;
    import java.util.List;
    
    
    public class LowMemoryExcelFileReader {
    
        private String file;
    
        public LowMemoryExcelFileReader(String file) {
            this.file = file;
        }
    
        public List<String[]> read() {
            try {
                return processFirstSheet(file);
            } catch (Exception e) {
               throw new RuntimeException(e);
            }
        }
    
        private List<String []> readSheet(Sheet sheet) {
            List<String []> res = new LinkedList<>();
            Iterator<Row> rowIterator = sheet.rowIterator();
    
            while (rowIterator.hasNext()) {
                Row row = rowIterator.next();
                int cellsNumber = row.getLastCellNum();
                String [] cellsValues = new String[cellsNumber];
    
                Iterator<Cell> cellIterator = row.cellIterator();
                int cellIndex = 0;
    
                while (cellIterator.hasNext()) {
                    Cell cell = cellIterator.next();
                    cellsValues[cellIndex++] = cell.getStringCellValue();
                }
    
                res.add(cellsValues);
            }
            return res;
        }
    
        public String getFile() {
            return file;
        }
    
        public void setFile(String file) {
            this.file = file;
        }
    
        private List<String []> processFirstSheet(String filename) throws Exception {
            OPCPackage pkg = OPCPackage.open(filename, PackageAccess.READ);
            XSSFReader r = new XSSFReader(pkg);
            SharedStringsTable sst = r.getSharedStringsTable();
    
            SheetHandler handler = new SheetHandler(sst);
            XMLReader parser = fetchSheetParser(handler);
            Iterator<InputStream> sheetIterator = r.getSheetsData();
    
            if (!sheetIterator.hasNext()) {
                return Collections.emptyList();
            }
    
            InputStream sheetInputStream = sheetIterator.next();
            BufferedInputStream bisSheet = new BufferedInputStream(sheetInputStream);
            InputSource sheetSource = new InputSource(bisSheet);
            parser.parse(sheetSource);
            List<String []> res = handler.getRowCache();
            bisSheet.close();
            return res;
        }
    
        public XMLReader fetchSheetParser(ContentHandler handler) throws SAXException {
            XMLReader parser = new SAXParser();
            parser.setContentHandler(handler);
            return parser;
        }
    
        /**
         * See org.xml.sax.helpers.DefaultHandler javadocs
         */
        private static class SheetHandler extends DefaultHandler {
    
            private static final String ROW_EVENT = "row";
            private static final String CELL_EVENT = "c";
    
            private SharedStringsTable sst;
            private String lastContents;
            private boolean nextIsString;
    
            private List<String> cellCache = new LinkedList<>();
            private List<String[]> rowCache = new LinkedList<>();
    
            private SheetHandler(SharedStringsTable sst) {
                this.sst = sst;
            }
    
            public void startElement(String uri, String localName, String name,
                                     Attributes attributes) throws SAXException {
                // c => cell
                if (CELL_EVENT.equals(name)) {
                    String cellType = attributes.getValue("t");
                    if(cellType != null && cellType.equals("s")) {
                        nextIsString = true;
                    } else {
                        nextIsString = false;
                    }
                } else if (ROW_EVENT.equals(name)) {
                    if (!cellCache.isEmpty()) {
                        rowCache.add(cellCache.toArray(new String[cellCache.size()]));
                    }
                    cellCache.clear();
                }
    
                // Clear contents cache
                lastContents = "";
            }
    
            public void endElement(String uri, String localName, String name)
                    throws SAXException {
                // Process the last contents as required.
                // Do now, as characters() may be called more than once
                if(nextIsString) {
                    int idx = Integer.parseInt(lastContents);
                    lastContents = new XSSFRichTextString(sst.getEntryAt(idx)).toString();
                    nextIsString = false;
                }
    
                // v => contents of a cell
                // Output after we've seen the string contents
                if(name.equals("v")) {
                    cellCache.add(lastContents);
                }
            }
    
            public void characters(char[] ch, int start, int length)
                    throws SAXException {
                lastContents += new String(ch, start, length);
            }
    
            public List<String[]> getRowCache() {
                return rowCache;
            }
        }
    }
    

相关问题