sagi shechter created TIKA-3722: ----------------------------------- Summary: OOM exception on xlsx parsing Key: TIKA-3722 URL: https://issues.apache.org/jira/browse/TIKA-3722 Project: Tika Issue Type: Bug Reporter: sagi shechter
{code:java} The full exception stack trace is included below: java.lang.OutOfMemoryError: Java heap space at java.base/java.util.Arrays.copyOf(Arrays.java:3817) at java.base/java.util.BitSet.ensureCapacity(BitSet.java:338) at java.base/java.util.BitSet.expandTo(BitSet.java:353) at java.base/java.util.BitSet.set(BitSet.java:448) at de.l3s.boilerpipe.sax.BoilerpipeHTMLContentHandler.characters(BoilerpipeHTMLContentHandler.java:267) at org.apache.tika.sax.boilerpipe.BoilerpipeContentHandler.characters(BoilerpipeContentHandler.java:165) at org.apache.tika.sax.TeeContentHandler.characters(TeeContentHandler.java:97) at org.apache.tika.sax.ContentHandlerDecorator.characters(ContentHandlerDecorator.java:141) at org.apache.tika.sax.SecureContentHandler.characters(SecureContentHandler.java:253) at org.apache.tika.sax.ContentHandlerDecorator.characters(ContentHandlerDecorator.java:141) at org.apache.tika.sax.ContentHandlerDecorator.characters(ContentHandlerDecorator.java:141) at org.apache.tika.sax.ContentHandlerDecorator.characters(ContentHandlerDecorator.java:141) at org.apache.tika.sax.SafeContentHandler.access$201(SafeContentHandler.java:47) at org.apache.tika.sax.SafeContentHandler.lambda$new$0(SafeContentHandler.java:57) at org.apache.tika.sax.SafeContentHandler$$Lambda$515/0x0000000800506c40.write(Unknown Source) at org.apache.tika.sax.SafeContentHandler.filter(SafeContentHandler.java:106) at org.apache.tika.sax.SafeContentHandler.characters(SafeContentHandler.java:250) at org.apache.tika.sax.XHTMLContentHandler.characters(XHTMLContentHandler.java:270) at org.apache.tika.sax.XHTMLContentHandler.characters(XHTMLContentHandler.java:295) at org.apache.tika.parser.microsoft.ooxml.XSSFExcelExtractorDecorator$SheetTextAsHTML.cell(XSSFExcelExtractorDecorator.java:473) at org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler.outputCell(XSSFSheetXMLHandler.java:444) at org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler.endElement(XSSFSheetXMLHandler.java:317) at org.apache.tika.parser.microsoft.ooxml.XSSFExcelExtractorDecorator$XSSFSheetInterestingPartsCapturer.endElement(XSSFExcelExtractorDecorator.java:561) at org.apache.tika.sax.ContentHandlerDecorator.endElement(ContentHandlerDecorator.java:132) at org.apache.xerces.parsers.AbstractSAXParser.endElement(Unknown Source) at org.apache.xerces.impl.XMLNSDocumentScannerImpl.scanEndElement(Unknown Source) at org.apache.xerces.impl.XMLDocumentFragmentScannerImpl$FragmentContentDispatcher.dispatch(Unknown Source) at org.apache.xerces.impl.XMLDocumentFragmentScannerImpl.scanDocument(Unknown Source) at org.apache.xerces.parsers.XML11Configuration.parse(Unknown Source) at org.apache.xerces.parsers.XML11Configuration.parse(Unknown Source) at org.apache.xerces.parsers.XMLParser.parse(Unknown Source) at org.apache.xerces.parsers.AbstractSAXParser.parse(Unknown Source) {code} -- This message was sent by Atlassian Jira (v8.20.7#820007)