I got some more info. The random "INITIAL whatever" is just me testing an algebraic udf in jruby. I'm pasting the stack trace from where the errors start.
Exception in thread "Low Memory Detector" Exception in thread "Poller SunPKCS11-Darwin" java.lang.OutOfMemoryError: Java heap space INITIAL item 271346 becomes 271346 INITIAL item 271346 becomes 271346 INITIAL item 271347 becomes 271347 INITIAL item 271347 becomes 271347 at sun.security.pkcs11.wrapper.PKCS11.C_GetSlotInfo(Native Method) at sun.security.pkcs11.SunPKCS11.initToken(SunPKCS11.java:767) at sun.security.pkcs11.SunPKCS11.access$100(SunPKCS11.java:42) at sun.security.pkcs11.SunPKCS11$TokenPoller.run(SunPKCS11.java:700) at java.lang.Thread.run(Thread.java:680) INITIAL item 271348 becomes 271348 INITIAL item 271348 becomes 271348 2011-10-17 01:17:38,563 [communication thread] INFO org.apache.hadoop.mapred.LocalJobRunner - INITIAL item 271349 becomes 271349 INITIAL item 271349 becomes 271349 2011-10-17 01:17:40,810 [SpillThread] FATAL org.apache.hadoop.mapred.TaskRunner - Task attempt_local_0001_m_000000_0 failed : java.lang.OutOfMemoryError: Java heap space at java.lang.Long.valueOf(Long.java:557) at org.apache.pig.data.BinInterSedes.readDatum(BinInterSedes.java:297) at org.apache.pig.data.BinInterSedes.readDatum(BinInterSedes.java:251) at org.apache.pig.data.BinInterSedes.readTuple(BinInterSedes.java:111) at org.apache.pig.data.BinInterSedes.readDatum(BinInterSedes.java:270) at org.apache.pig.data.BinInterSedes.readDatum(BinInterSedes.java:251) at org.apache.pig.data.BinInterSedes.addColsToTuple(BinInterSedes.java:556) at org.apache.pig.data.BinSedesTuple.readFields(BinSedesTuple.java:64) at org.apache.pig.impl.io.PigNullableWritable.readFields(PigNullableWritable.java:114) at org.apache.hadoop.io.serializer.WritableSerialization$WritableDeserializer.deserialize(WritableSerialization.java:67) at org.apache.hadoop.io.serializer.WritableSerialization$WritableDeserializer.deserialize(WritableSerialization.java:40) at org.apache.hadoop.mapreduce.ReduceContext.nextKeyValue(ReduceContext.java:116) at org.apache.hadoop.mapreduce.ReduceContext$ValueIterator.next(ReduceContext.java:163) at org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POCombinerPackage.getNext(POCombinerPackage.java:141) at org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigCombiner$Combine.processOnePackageOutput(PigCombiner.java:171) at org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigCombiner$Combine.reduce(PigCombiner.java:162) at org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigCombiner$Combine.reduce(PigCombiner.java:1) at org.apache.hadoop.mapreduce.Reducer.run(Reducer.java:176) at org.apache.hadoop.mapred.Task$NewCombinerRunner.combine(Task.java:1222) at org.apache.hadoop.mapred.MapTask$MapOutputBuffer.sortAndSpill(MapTask.java:1265) at org.apache.hadoop.mapred.MapTask$MapOutputBuffer.access$1800(MapTask.java:686) at org.apache.hadoop.mapred.MapTask$MapOutputBuffer$SpillThread.run(MapTask.java:1173) 2011-10-17 01:17:40,811 [SpillThread] FATAL org.apache.hadoop.mapred.LocalJobRunner - Fatal: java.lang.OutOfMemoryError: Java heap space at java.lang.Long.valueOf(Long.java:557) at org.apache.pig.data.BinInterSedes.readDatum(BinInterSedes.java:297) at org.apache.pig.data.BinInterSedes.readDatum(BinInterSedes.java:251) at org.apache.pig.data.BinInterSedes.readTuple(BinInterSedes.java:111) at org.apache.pig.data.BinInterSedes.readDatum(BinInterSedes.java:270) at org.apache.pig.data.BinInterSedes.readDatum(BinInterSedes.java:251) at org.apache.pig.data.BinInterSedes.addColsToTuple(BinInterSedes.java:556) at org.apache.pig.data.BinSedesTuple.readFields(BinSedesTuple.java:64) at org.apache.pig.impl.io.PigNullableWritable.readFields(PigNullableWritable.java:114) at org.apache.hadoop.io.serializer.WritableSerialization$WritableDeserializer.deserialize(WritableSerialization.java:67) at org.apache.hadoop.io.serializer.WritableSerialization$WritableDeserializer.deserialize(WritableSerialization.java:40) at org.apache.hadoop.mapreduce.ReduceContext.nextKeyValue(ReduceContext.java:116) at org.apache.hadoop.mapreduce.ReduceContext$ValueIterator.next(ReduceContext.java:163) at org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POCombinerPackage.getNext(POCombinerPackage.java:141) at org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigCombiner$Combine.processOnePackageOutput(PigCombiner.java:171) at org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigCombiner$Combine.reduce(PigCombiner.java:162) at org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigCombiner$Combine.reduce(PigCombiner.java:1) at org.apache.hadoop.mapreduce.Reducer.run(Reducer.java:176) at org.apache.hadoop.mapred.Task$NewCombinerRunner.combine(Task.java:1222) at org.apache.hadoop.mapred.MapTask$MapOutputBuffer.sortAndSpill(MapTask.java:1265) at org.apache.hadoop.mapred.MapTask$MapOutputBuffer.access$1800(MapTask.java:686) at org.apache.hadoop.mapred.MapTask$MapOutputBuffer$SpillThread.run(MapTask.java:1173) from task: attempt_local_0001_m_000000_0 2011-10-17 01:17:40,812 [Thread-15] WARN org.apache.hadoop.mapred.LocalJobRunner - job_local_0001 java.io.IOException: Spill failed at org.apache.hadoop.mapred.MapTask$MapOutputBuffer.collect(MapTask.java:860) at org.apache.hadoop.mapred.MapTask$NewOutputCollector.write(MapTask.java:541) at org.apache.hadoop.mapreduce.TaskInputOutputContext.write(TaskInputOutputContext.java:80) at org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigGenericMapReduce$Map.collect(PigGenericMapReduce.java:127) at org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigGenericMapBase.runPipeline(PigGenericMapBase.java:269) at org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigGenericMapBase.map(PigGenericMapBase.java:262) at org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigGenericMapBase.map(PigGenericMapBase.java:1) at org.apache.hadoop.mapreduce.Mapper.run(Mapper.java:144) at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:621) at org.apache.hadoop.mapred.MapTask.run(MapTask.java:305) at org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:177) Caused by: java.lang.OutOfMemoryError: Java heap space at java.lang.Long.valueOf(Long.java:557) at org.apache.pig.data.BinInterSedes.readDatum(BinInterSedes.java:297) at org.apache.pig.data.BinInterSedes.readDatum(BinInterSedes.java:251) at org.apache.pig.data.BinInterSedes.readTuple(BinInterSedes.java:111) at org.apache.pig.data.BinInterSedes.readDatum(BinInterSedes.java:270) at org.apache.pig.data.BinInterSedes.readDatum(BinInterSedes.java:251) at org.apache.pig.data.BinInterSedes.addColsToTuple(BinInterSedes.java:556) at org.apache.pig.data.BinSedesTuple.readFields(BinSedesTuple.java:64) at org.apache.pig.impl.io.PigNullableWritable.readFields(PigNullableWritable.java:114) at org.apache.hadoop.io.serializer.WritableSerialization$WritableDeserializer.deserialize(WritableSerialization.java:67) at org.apache.hadoop.io.serializer.WritableSerialization$WritableDeserializer.deserialize(WritableSerialization.java:40) at org.apache.hadoop.mapreduce.ReduceContext.nextKeyValue(ReduceContext.java:116) at org.apache.hadoop.mapreduce.ReduceContext$ValueIterator.next(ReduceContext.java:163) at org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POCombinerPackage.getNext(POCombinerPackage.java:141) at org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigCombiner$Combine.processOnePackageOutput(PigCombiner.java:171) at org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigCombiner$Combine.reduce(PigCombiner.java:162) at org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigCombiner$Combine.reduce(PigCombiner.java:1) at org.apache.hadoop.mapreduce.Reducer.run(Reducer.java:176) at org.apache.hadoop.mapred.Task$NewCombinerRunner.combine(Task.java:1222) at org.apache.hadoop.mapred.MapTask$MapOutputBuffer.sortAndSpill(MapTask.java:1265) at org.apache.hadoop.mapred.MapTask$MapOutputBuffer.access$1800(MapTask.java:686) at org.apache.hadoop.mapred.MapTask$MapOutputBuffer$SpillThread.run(MapTask.java:1173) 2011/10/17 Jonathan Coveney <jcove...@gmail.com> > I recently got this error. I know for a fact I am doing something that is > causing memory errors, I'm just not sure what. I was wondering if any of you > have run into this error before, and know what sorts of things might lead to > it? In this case, it is arising from an Algebraic interface, so the details > are more opaque... > > java.io.IOException: Spill failed > at > org.apache.hadoop.mapred.MapTask$MapOutputBuffer.collect(MapTask.java:860) > at > org.apache.hadoop.mapred.MapTask$NewOutputCollector.write(MapTask.java:541) > at > org.apache.hadoop.mapreduce.TaskInputOutputContext.write(TaskInputOutputContext.java:80) > at > org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigGenericMapReduce$Map.collect(PigGenericMapReduce.java:127) > at > org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigGenericMapBase.runPipeline(PigGenericMapBase.java:269) > at > org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigGenericMapBase.map(PigGenericMapBase.java:262) > at > org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigGenericMapBase.map(PigGenericMapBase.java:1) > at org.apache.hadoop.mapreduce.Mapper.run(Mapper.java:144) > at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:621) > at org.apache.hadoop.mapred.MapTask.run(MapTask.java:305) > at > org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:177) > Caused by: java.lang.OutOfMemoryError: Java heap space > at java.util.Arrays.copyOf(Arrays.java:2734) > at java.util.ArrayList.ensureCapacity(ArrayList.java:167) > at java.util.ArrayList.add(ArrayList.java:351) > at org.apache.pig.data.InternalCachedBag.add(InternalCachedBag.java:80) > at > org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POCombinerPackage.getNext(POCombinerPackage.java:154) > at > org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigCombiner$Combine.processOnePackageOutput(PigCombiner.java:171) > at > org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigCombiner$Combine.reduce(PigCombiner.java:162) > at > org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigCombiner$Combine.reduce(PigCombiner.java:1) > at org.apache.hadoop.mapreduce.Reducer.run(Reducer.java:176) > at > org.apache.hadoop.mapred.Task$NewCombinerRunner.combine(Task.java:1222) > at > org.apache.hadoop.mapred.MapTask$MapOutputBuffer.sortAndSpill(MapTask.java:1265) > at > org.apache.hadoop.mapred.MapTask$MapOutputBuffer.access$1800(MapTask.java:686) > at > org.apache.hadoop.mapred.MapTask$MapOutputBuffer$SpillThread.run(MapTask.java:1173) > >