[ https://issues.apache.org/jira/browse/TIKA-3722?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
sagi shechter updated TIKA-3722: -------------------------------- Attachment: records_headers_02.xlsx > OOM exception on xlsx parsing > ----------------------------- > > Key: TIKA-3722 > URL: https://issues.apache.org/jira/browse/TIKA-3722 > Project: Tika > Issue Type: Bug > Reporter: sagi shechter > Priority: Major > Attachments: records_headers_02.xlsx > > > > {code:java} > The full exception stack trace is included below: > java.lang.OutOfMemoryError: Java heap space > at java.base/java.util.Arrays.copyOf(Arrays.java:3817) > at java.base/java.util.BitSet.ensureCapacity(BitSet.java:338) > at java.base/java.util.BitSet.expandTo(BitSet.java:353) > at java.base/java.util.BitSet.set(BitSet.java:448) > at > de.l3s.boilerpipe.sax.BoilerpipeHTMLContentHandler.characters(BoilerpipeHTMLContentHandler.java:267) > at > org.apache.tika.sax.boilerpipe.BoilerpipeContentHandler.characters(BoilerpipeContentHandler.java:165) > at > org.apache.tika.sax.TeeContentHandler.characters(TeeContentHandler.java:97) > at > org.apache.tika.sax.ContentHandlerDecorator.characters(ContentHandlerDecorator.java:141) > at > org.apache.tika.sax.SecureContentHandler.characters(SecureContentHandler.java:253) > at > org.apache.tika.sax.ContentHandlerDecorator.characters(ContentHandlerDecorator.java:141) > at > org.apache.tika.sax.ContentHandlerDecorator.characters(ContentHandlerDecorator.java:141) > at > org.apache.tika.sax.ContentHandlerDecorator.characters(ContentHandlerDecorator.java:141) > at > org.apache.tika.sax.SafeContentHandler.access$201(SafeContentHandler.java:47) > at > org.apache.tika.sax.SafeContentHandler.lambda$new$0(SafeContentHandler.java:57) > at > org.apache.tika.sax.SafeContentHandler$$Lambda$515/0x0000000800506c40.write(Unknown > Source) > at > org.apache.tika.sax.SafeContentHandler.filter(SafeContentHandler.java:106) > at > org.apache.tika.sax.SafeContentHandler.characters(SafeContentHandler.java:250) > at > org.apache.tika.sax.XHTMLContentHandler.characters(XHTMLContentHandler.java:270) > at > org.apache.tika.sax.XHTMLContentHandler.characters(XHTMLContentHandler.java:295) > at > org.apache.tika.parser.microsoft.ooxml.XSSFExcelExtractorDecorator$SheetTextAsHTML.cell(XSSFExcelExtractorDecorator.java:473) > at > org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler.outputCell(XSSFSheetXMLHandler.java:444) > at > org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler.endElement(XSSFSheetXMLHandler.java:317) > at > org.apache.tika.parser.microsoft.ooxml.XSSFExcelExtractorDecorator$XSSFSheetInterestingPartsCapturer.endElement(XSSFExcelExtractorDecorator.java:561) > at > org.apache.tika.sax.ContentHandlerDecorator.endElement(ContentHandlerDecorator.java:132) > at org.apache.xerces.parsers.AbstractSAXParser.endElement(Unknown Source) > at org.apache.xerces.impl.XMLNSDocumentScannerImpl.scanEndElement(Unknown > Source) > at > org.apache.xerces.impl.XMLDocumentFragmentScannerImpl$FragmentContentDispatcher.dispatch(Unknown > Source) > at > org.apache.xerces.impl.XMLDocumentFragmentScannerImpl.scanDocument(Unknown > Source) > at org.apache.xerces.parsers.XML11Configuration.parse(Unknown Source) > at org.apache.xerces.parsers.XML11Configuration.parse(Unknown Source) > at org.apache.xerces.parsers.XMLParser.parse(Unknown Source) > at org.apache.xerces.parsers.AbstractSAXParser.parse(Unknown Source) > {code} > -- This message was sent by Atlassian Jira (v8.20.7#820007)