For more info, below is the dump from the OutOfMemoryError:
Thread-347" prio=5 tid=390 RUNNABLE
at org.apache.hadoop.mapred.IFile$Reader.readNextBlock(IFile.java:342)
at org.apache.hadoop.mapred.IFile$Reader.next(IFile.java:404)
Local Variable: org.apache.hadoop.io.DataInputBuffer#7
Local Variable: org.apache.hadoop.io.DataInputBuffer#6
Local Variable: org.apache.hadoop.mapred.IFile$Reader#3
at org.apache.hadoop.mapred.Merger$Segment.next(Merger.java:220)
at
org.apache.hadoop.mapred.Merger$MergeQueue.adjustPriorityQueue(Merger.java:330)
Local Variable: org.apache.hadoop.mapred.Merger$Segment#2
at org.apache.hadoop.mapred.Merger$MergeQueue.next(Merger.java:350)
at org.apache.hadoop.mapred.Merger.writeFile(Merger.java:156)
at
org.apache.hadoop.mapred.MapTask$MapOutputBuffer.mergeParts(MapTask.java:1535)
Local Variable: org.apache.hadoop.mapred.IFile$Writer#1
Local Variable: org.apache.hadoop.mapred.SpillRecord#1
Local Variable: org.apache.hadoop.mapred.IndexRecord#1
Local Variable: org.apache.hadoop.fs.Path[]#2
Local Variable: org.apache.hadoop.fs.Path#30
Local Variable: org.apache.hadoop.fs.FSDataOutputStream#1
Local Variable: org.apache.hadoop.fs.Path#29
Local Variable: org.apache.hadoop.mapred.Merger$MergeQueue#1
Local Variable: java.util.ArrayList#15453
at
org.apache.hadoop.mapred.MapTask$MapOutputBuffer.flush(MapTask.java:1154)
at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:359)
Local Variable: org.apache.hadoop.mapred.MapTask$MapOutputBuffer#1
Local Variable: org.apache.hadoop.io.DataInputBuffer#3
Local Variable: org.apache.hadoop.mapred.FileSplit#1
Local Variable: org.apache.hadoop.io.BytesWritable#1
Local Variable:
org.apache.hadoop.mapred.MapTask$TrackedRecordReader#1
Local Variable: org.apache.hadoop.mapred.SequenceFileRecordReader#1
Local Variable: org.apache.nutch.fetcher.Fetcher#2
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:307)
Local Variable: org.apache.hadoop.mapred.Task$TaskReporter#1
at
org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:177)
Local Variable: org.apache.hadoop.mapred.TaskAttemptID#1
Local Variable: org.apache.hadoop.mapred.FileOutputCommitter#1
Local Variable: org.apache.hadoop.mapred.JobClient$RawSplit[]#1
Local Variable: org.apache.hadoop.mapred.JobContext#1
Local Variable: org.apache.hadoop.mapred.MapTask#1
Local Variable: org.apache.hadoop.mapred.JobConf#11
--
View this message in context:
http://lucene.472066.n3.nabble.com/Tika-Excel-parsing-causing-out-of-memory-tp1188201p1204232.html
Sent from the Nutch - User mailing list archive at Nabble.com.