It ended up being the -Xmx parameter. It was skirting the line of allocated memory, and then JRuby tried to allocate a large chunk of memory and it couldn't spill. No issues with -Xmx1024m
2011/10/18 Thejas Nair <the...@hortonworks.com> > I see that you are running in local mode. You might want to specify a -Xmx > value if aren't doing that, the default max heap size is usually quite low. > > Is the udf that you are using holding onto lot of data ? Does this happen > with other builtin udfs as well ? > > -Thejas > > > > > On 10/17/11 1:19 AM, Jonathan Coveney wrote: > >> I got some more info. The random "INITIAL whatever" is just me testing an >> algebraic udf in jruby. I'm pasting the stack trace from where the errors >> start. >> >> Exception in thread "Low Memory Detector" Exception in thread "Poller >> SunPKCS11-Darwin" java.lang.OutOfMemoryError: Java heap space >> INITIAL item 271346 becomes 271346 >> INITIAL item 271346 becomes 271346 >> INITIAL item 271347 becomes 271347 >> INITIAL item 271347 becomes 271347 >> at sun.security.pkcs11.wrapper.**PKCS11.C_GetSlotInfo(Native Method) >> at sun.security.pkcs11.SunPKCS11.**initToken(SunPKCS11.java:767) >> at sun.security.pkcs11.SunPKCS11.**access$100(SunPKCS11.java:42) >> at sun.security.pkcs11.SunPKCS11$**TokenPoller.run(SunPKCS11.** >> java:700) >> at java.lang.Thread.run(Thread.**java:680) >> INITIAL item 271348 becomes 271348 >> INITIAL item 271348 becomes 271348 >> 2011-10-17 01:17:38,563 [communication thread] INFO >> org.apache.hadoop.mapred.**LocalJobRunner - >> INITIAL item 271349 becomes 271349 >> INITIAL item 271349 becomes 271349 >> 2011-10-17 01:17:40,810 [SpillThread] FATAL >> org.apache.hadoop.mapred.**TaskRunner - Task >> attempt_local_0001_m_000000_0 >> failed : java.lang.OutOfMemoryError: Java heap space >> at java.lang.Long.valueOf(Long.**java:557) >> at org.apache.pig.data.**BinInterSedes.readDatum(** >> BinInterSedes.java:297) >> at org.apache.pig.data.**BinInterSedes.readDatum(** >> BinInterSedes.java:251) >> at org.apache.pig.data.**BinInterSedes.readTuple(** >> BinInterSedes.java:111) >> at org.apache.pig.data.**BinInterSedes.readDatum(** >> BinInterSedes.java:270) >> at org.apache.pig.data.**BinInterSedes.readDatum(** >> BinInterSedes.java:251) >> at >> org.apache.pig.data.**BinInterSedes.addColsToTuple(** >> BinInterSedes.java:556) >> at org.apache.pig.data.**BinSedesTuple.readFields(** >> BinSedesTuple.java:64) >> at >> org.apache.pig.impl.io.**PigNullableWritable.**readFields(** >> PigNullableWritable.java:114) >> at >> org.apache.hadoop.io.**serializer.**WritableSerialization$** >> WritableDeserializer.**deserialize(**WritableSerialization.java:67) >> at >> org.apache.hadoop.io.**serializer.**WritableSerialization$** >> WritableDeserializer.**deserialize(**WritableSerialization.java:40) >> at >> org.apache.hadoop.mapreduce.**ReduceContext.nextKeyValue(** >> ReduceContext.java:116) >> at >> org.apache.hadoop.mapreduce.**ReduceContext$ValueIterator.** >> next(ReduceContext.java:163) >> at >> org.apache.pig.backend.hadoop.**executionengine.physicalLayer.** >> relationalOperators.**POCombinerPackage.getNext(** >> POCombinerPackage.java:141) >> at >> org.apache.pig.backend.hadoop.**executionengine.** >> mapReduceLayer.PigCombiner$**Combine.**processOnePackageOutput(** >> PigCombiner.java:171) >> at >> org.apache.pig.backend.hadoop.**executionengine.** >> mapReduceLayer.PigCombiner$**Combine.reduce(PigCombiner.**java:162) >> at >> org.apache.pig.backend.hadoop.**executionengine.** >> mapReduceLayer.PigCombiner$**Combine.reduce(PigCombiner.**java:1) >> at org.apache.hadoop.mapreduce.**Reducer.run(Reducer.java:176) >> at >> org.apache.hadoop.mapred.Task$**NewCombinerRunner.combine(** >> Task.java:1222) >> at >> org.apache.hadoop.mapred.**MapTask$MapOutputBuffer.** >> sortAndSpill(MapTask.java:**1265) >> at >> org.apache.hadoop.mapred.**MapTask$MapOutputBuffer.** >> access$1800(MapTask.java:686) >> at >> org.apache.hadoop.mapred.**MapTask$MapOutputBuffer$** >> SpillThread.run(MapTask.java:**1173) >> >> 2011-10-17 01:17:40,811 [SpillThread] FATAL >> org.apache.hadoop.mapred.**LocalJobRunner - Fatal: >> java.lang.OutOfMemoryError: >> Java heap space >> at java.lang.Long.valueOf(Long.**java:557) >> at org.apache.pig.data.**BinInterSedes.readDatum(** >> BinInterSedes.java:297) >> at org.apache.pig.data.**BinInterSedes.readDatum(** >> BinInterSedes.java:251) >> at org.apache.pig.data.**BinInterSedes.readTuple(** >> BinInterSedes.java:111) >> at org.apache.pig.data.**BinInterSedes.readDatum(** >> BinInterSedes.java:270) >> at org.apache.pig.data.**BinInterSedes.readDatum(** >> BinInterSedes.java:251) >> at >> org.apache.pig.data.**BinInterSedes.addColsToTuple(** >> BinInterSedes.java:556) >> at org.apache.pig.data.**BinSedesTuple.readFields(** >> BinSedesTuple.java:64) >> at >> org.apache.pig.impl.io.**PigNullableWritable.**readFields(** >> PigNullableWritable.java:114) >> at >> org.apache.hadoop.io.**serializer.**WritableSerialization$** >> WritableDeserializer.**deserialize(**WritableSerialization.java:67) >> at >> org.apache.hadoop.io.**serializer.**WritableSerialization$** >> WritableDeserializer.**deserialize(**WritableSerialization.java:40) >> at >> org.apache.hadoop.mapreduce.**ReduceContext.nextKeyValue(** >> ReduceContext.java:116) >> at >> org.apache.hadoop.mapreduce.**ReduceContext$ValueIterator.** >> next(ReduceContext.java:163) >> at >> org.apache.pig.backend.hadoop.**executionengine.physicalLayer.** >> relationalOperators.**POCombinerPackage.getNext(** >> POCombinerPackage.java:141) >> at >> org.apache.pig.backend.hadoop.**executionengine.** >> mapReduceLayer.PigCombiner$**Combine.**processOnePackageOutput(** >> PigCombiner.java:171) >> at >> org.apache.pig.backend.hadoop.**executionengine.** >> mapReduceLayer.PigCombiner$**Combine.reduce(PigCombiner.**java:162) >> at >> org.apache.pig.backend.hadoop.**executionengine.** >> mapReduceLayer.PigCombiner$**Combine.reduce(PigCombiner.**java:1) >> at org.apache.hadoop.mapreduce.**Reducer.run(Reducer.java:176) >> at >> org.apache.hadoop.mapred.Task$**NewCombinerRunner.combine(** >> Task.java:1222) >> at >> org.apache.hadoop.mapred.**MapTask$MapOutputBuffer.** >> sortAndSpill(MapTask.java:**1265) >> at >> org.apache.hadoop.mapred.**MapTask$MapOutputBuffer.** >> access$1800(MapTask.java:686) >> at >> org.apache.hadoop.mapred.**MapTask$MapOutputBuffer$** >> SpillThread.run(MapTask.java:**1173) >> from task: attempt_local_0001_m_000000_0 >> 2011-10-17 01:17:40,812 [Thread-15] WARN >> org.apache.hadoop.mapred.**LocalJobRunner - job_local_0001 >> java.io.IOException: Spill failed >> at >> org.apache.hadoop.mapred.**MapTask$MapOutputBuffer.** >> collect(MapTask.java:860) >> at >> org.apache.hadoop.mapred.**MapTask$NewOutputCollector.** >> write(MapTask.java:541) >> at >> org.apache.hadoop.mapreduce.**TaskInputOutputContext.write(** >> TaskInputOutputContext.java:**80) >> at >> org.apache.pig.backend.hadoop.**executionengine.**mapReduceLayer.** >> PigGenericMapReduce$Map.**collect(PigGenericMapReduce.**java:127) >> at >> org.apache.pig.backend.hadoop.**executionengine.**mapReduceLayer.** >> PigGenericMapBase.runPipeline(**PigGenericMapBase.java:269) >> at >> org.apache.pig.backend.hadoop.**executionengine.**mapReduceLayer.** >> PigGenericMapBase.map(**PigGenericMapBase.java:262) >> at >> org.apache.pig.backend.hadoop.**executionengine.**mapReduceLayer.** >> PigGenericMapBase.map(**PigGenericMapBase.java:1) >> at org.apache.hadoop.mapreduce.**Mapper.run(Mapper.java:144) >> at org.apache.hadoop.mapred.**MapTask.runNewMapper(MapTask.** >> java:621) >> at org.apache.hadoop.mapred.**MapTask.run(MapTask.java:305) >> at >> org.apache.hadoop.mapred.**LocalJobRunner$Job.run(** >> LocalJobRunner.java:177) >> Caused by: java.lang.OutOfMemoryError: Java heap space >> at java.lang.Long.valueOf(Long.**java:557) >> at org.apache.pig.data.**BinInterSedes.readDatum(** >> BinInterSedes.java:297) >> at org.apache.pig.data.**BinInterSedes.readDatum(** >> BinInterSedes.java:251) >> at org.apache.pig.data.**BinInterSedes.readTuple(** >> BinInterSedes.java:111) >> at org.apache.pig.data.**BinInterSedes.readDatum(** >> BinInterSedes.java:270) >> at org.apache.pig.data.**BinInterSedes.readDatum(** >> BinInterSedes.java:251) >> at >> org.apache.pig.data.**BinInterSedes.addColsToTuple(** >> BinInterSedes.java:556) >> at org.apache.pig.data.**BinSedesTuple.readFields(** >> BinSedesTuple.java:64) >> at >> org.apache.pig.impl.io.**PigNullableWritable.**readFields(** >> PigNullableWritable.java:114) >> at >> org.apache.hadoop.io.**serializer.**WritableSerialization$** >> WritableDeserializer.**deserialize(**WritableSerialization.java:67) >> at >> org.apache.hadoop.io.**serializer.**WritableSerialization$** >> WritableDeserializer.**deserialize(**WritableSerialization.java:40) >> at >> org.apache.hadoop.mapreduce.**ReduceContext.nextKeyValue(** >> ReduceContext.java:116) >> at >> org.apache.hadoop.mapreduce.**ReduceContext$ValueIterator.** >> next(ReduceContext.java:163) >> at >> org.apache.pig.backend.hadoop.**executionengine.physicalLayer.** >> relationalOperators.**POCombinerPackage.getNext(** >> POCombinerPackage.java:141) >> at >> org.apache.pig.backend.hadoop.**executionengine.** >> mapReduceLayer.PigCombiner$**Combine.**processOnePackageOutput(** >> PigCombiner.java:171) >> at >> org.apache.pig.backend.hadoop.**executionengine.** >> mapReduceLayer.PigCombiner$**Combine.reduce(PigCombiner.**java:162) >> at >> org.apache.pig.backend.hadoop.**executionengine.** >> mapReduceLayer.PigCombiner$**Combine.reduce(PigCombiner.**java:1) >> at org.apache.hadoop.mapreduce.**Reducer.run(Reducer.java:176) >> at >> org.apache.hadoop.mapred.Task$**NewCombinerRunner.combine(** >> Task.java:1222) >> at >> org.apache.hadoop.mapred.**MapTask$MapOutputBuffer.** >> sortAndSpill(MapTask.java:**1265) >> at >> org.apache.hadoop.mapred.**MapTask$MapOutputBuffer.** >> access$1800(MapTask.java:686) >> at >> org.apache.hadoop.mapred.**MapTask$MapOutputBuffer$** >> SpillThread.run(MapTask.java:**1173) >> >> >> 2011/10/17 Jonathan Coveney<jcove...@gmail.com> >> >> I recently got this error. I know for a fact I am doing something that is >>> causing memory errors, I'm just not sure what. I was wondering if any of >>> you >>> have run into this error before, and know what sorts of things might lead >>> to >>> it? In this case, it is arising from an Algebraic interface, so the >>> details >>> are more opaque... >>> >>> java.io.IOException: Spill failed >>> at >>> org.apache.hadoop.mapred.**MapTask$MapOutputBuffer.** >>> collect(MapTask.java:860) >>> at >>> org.apache.hadoop.mapred.**MapTask$NewOutputCollector.** >>> write(MapTask.java:541) >>> at >>> org.apache.hadoop.mapreduce.**TaskInputOutputContext.write(** >>> TaskInputOutputContext.java:**80) >>> at >>> org.apache.pig.backend.hadoop.**executionengine.**mapReduceLayer.** >>> PigGenericMapReduce$Map.**collect(PigGenericMapReduce.**java:127) >>> at >>> org.apache.pig.backend.hadoop.**executionengine.**mapReduceLayer.** >>> PigGenericMapBase.runPipeline(**PigGenericMapBase.java:269) >>> at >>> org.apache.pig.backend.hadoop.**executionengine.**mapReduceLayer.** >>> PigGenericMapBase.map(**PigGenericMapBase.java:262) >>> at >>> org.apache.pig.backend.hadoop.**executionengine.**mapReduceLayer.** >>> PigGenericMapBase.map(**PigGenericMapBase.java:1) >>> at org.apache.hadoop.mapreduce.**Mapper.run(Mapper.java:144) >>> at org.apache.hadoop.mapred.**MapTask.runNewMapper(MapTask.** >>> java:621) >>> at org.apache.hadoop.mapred.**MapTask.run(MapTask.java:305) >>> at >>> org.apache.hadoop.mapred.**LocalJobRunner$Job.run(** >>> LocalJobRunner.java:177) >>> Caused by: java.lang.OutOfMemoryError: Java heap space >>> at java.util.Arrays.copyOf(**Arrays.java:2734) >>> at java.util.ArrayList.**ensureCapacity(ArrayList.java:**167) >>> at java.util.ArrayList.add(**ArrayList.java:351) >>> at org.apache.pig.data.**InternalCachedBag.add(** >>> InternalCachedBag.java:80) >>> at >>> org.apache.pig.backend.hadoop.**executionengine.physicalLayer.** >>> relationalOperators.**POCombinerPackage.getNext(** >>> POCombinerPackage.java:154) >>> at >>> org.apache.pig.backend.hadoop.**executionengine.** >>> mapReduceLayer.PigCombiner$**Combine.**processOnePackageOutput(** >>> PigCombiner.java:171) >>> at >>> org.apache.pig.backend.hadoop.**executionengine.** >>> mapReduceLayer.PigCombiner$**Combine.reduce(PigCombiner.**java:162) >>> at >>> org.apache.pig.backend.hadoop.**executionengine.** >>> mapReduceLayer.PigCombiner$**Combine.reduce(PigCombiner.**java:1) >>> at org.apache.hadoop.mapreduce.**Reducer.run(Reducer.java:176) >>> at >>> org.apache.hadoop.mapred.Task$**NewCombinerRunner.combine(** >>> Task.java:1222) >>> at >>> org.apache.hadoop.mapred.**MapTask$MapOutputBuffer.** >>> sortAndSpill(MapTask.java:**1265) >>> at >>> org.apache.hadoop.mapred.**MapTask$MapOutputBuffer.** >>> access$1800(MapTask.java:686) >>> at >>> org.apache.hadoop.mapred.**MapTask$MapOutputBuffer$** >>> SpillThread.run(MapTask.java:**1173) >>> >>> >>> >> >