[ https://issues.apache.org/jira/browse/HIVE-21084?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Lê Văn Thanh updated HIVE-21084: -------------------------------- Description: Hello , I have a table with 10GB data and 4GB free RAM , I tried to select data from this table to other table with ORC format but I got an error about memory limit ( I'm running the query SELECT in console ) . Detail bug : Caused by: java.util.concurrent.ExecutionException: Exception thrown by job at org.apache.spark.JavaFutureActionWrapper.getImpl(FutureAction.scala:272) at org.apache.spark.JavaFutureActionWrapper.get(FutureAction.scala:277) at org.apache.hadoop.hive.ql.exec.spark.status.impl.LocalSparkJobStatus.getError(LocalSparkJobStatus.java:171) at org.apache.hadoop.hive.ql.exec.spark.SparkTask.getSparkJobInfo(SparkTask.java:369) at org.apache.hadoop.hive.ql.exec.spark.SparkTask.execute(SparkTask.java:118) at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:199) at org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:100) at org.apache.hadoop.hive.ql.Driver.launchTask(Driver.java:2182) at org.apache.hadoop.hive.ql.Driver.execute(Driver.java:1838) at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:1525) at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1236) at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1231) at org.apache.hive.service.cli.operation.SQLOperation.runQuery(SQLOperation.java:255) ... 11 more Caused by: org.apache.spark.SparkException: Job aborted due to stage failure: Task 6 in stage 0.0 failed 1 times, most recent failure: Lost task 6.0 in stage 0.0 (TID 7, localhost, executor driver): java.lang.OutOfMemoryError: GC overhead limit exceeded at java.util.Arrays.copyOfRange(Arrays.java:3664) at java.lang.String.<init>(String.java:207) at java.nio.HeapCharBuffer.toString(HeapCharBuffer.java:567) at java.nio.CharBuffer.toString(CharBuffer.java:1241) at org.apache.hadoop.io.Text.decode(Text.java:412) at org.apache.hadoop.io.Text.decode(Text.java:389) at org.apache.hadoop.io.Text.toString(Text.java:280) at org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableStringObjectInspector.getPrimitiveJavaObject(WritableStringObjectInspector.java:46) at org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.getString(PrimitiveObjectInspectorUtils.java:891) at org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorConverter$StringConverter.convert(PrimitiveObjectInspectorConverter.java:508) at org.apache.hadoop.hive.ql.udf.generic.GenericUDFToUnixTimeStamp.evaluate(GenericUDFToUnixTimeStamp.java:127) at org.apache.hadoop.hive.ql.exec.ExprNodeGenericFuncEvaluator._evaluate(ExprNodeGenericFuncEvaluator.java:187) at org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:80) at org.apache.hadoop.hive.ql.exec.ExprNodeGenericFuncEvaluator$DeferredExprObject.get(ExprNodeGenericFuncEvaluator.java:88) at org.apache.hadoop.hive.ql.udf.generic.GenericUDFBaseNumeric.evaluate(GenericUDFBaseNumeric.java:128) at org.apache.hadoop.hive.ql.exec.ExprNodeGenericFuncEvaluator._evaluate(ExprNodeGenericFuncEvaluator.java:187) at org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:80) at org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:68) at org.apache.hadoop.hive.ql.exec.SelectOperator.process(SelectOperator.java:88) at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:897) at org.apache.hadoop.hive.ql.exec.TableScanOperator.process(TableScanOperator.java:130) at org.apache.hadoop.hive.ql.exec.MapOperator$MapOpCtx.forward(MapOperator.java:148) at org.apache.hadoop.hive.ql.exec.MapOperator.process(MapOperator.java:547) at org.apache.hadoop.hive.ql.exec.spark.SparkMapRecordHandler.processRow(SparkMapRecordHandler.java:136) at org.apache.hadoop.hive.ql.exec.spark.HiveMapFunctionResultList.processNextRecord(HiveMapFunctionResultList.java:48) at org.apache.hadoop.hive.ql.exec.spark.HiveMapFunctionResultList.processNextRecord(HiveMapFunctionResultList.java:27) at org.apache.hadoop.hive.ql.exec.spark.HiveBaseFunctionResultList.hasNext(HiveBaseFunctionResultList.java:85) at scala.collection.convert.Wrappers$JIteratorWrapper.hasNext(Wrappers.scala:42) at scala.collection.Iterator$class.foreach(Iterator.scala:893) at scala.collection.AbstractIterator.foreach(Iterator.scala:1336) at org.apache.spark.rdd.AsyncRDDActions$$anonfun$foreachAsync$1$$anonfun$apply$12.apply(AsyncRDDActions.scala:127) at org.apache.spark.rdd.AsyncRDDActions$$anonfun$foreachAsync$1$$anonfun$apply$12.apply(AsyncRDDActions.scala:127) Driver stacktrace: at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1499) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1487) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1486) at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48) at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1486) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:814) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:814) at scala.Option.foreach(Option.scala:257) at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:814) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1714) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1669) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1658) at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48) Caused by: java.lang.OutOfMemoryError: GC overhead limit exceeded at java.util.Arrays.copyOfRange(Arrays.java:3664) at java.lang.String.<init>(String.java:207) at java.nio.HeapCharBuffer.toString(HeapCharBuffer.java:567) at java.nio.CharBuffer.toString(CharBuffer.java:1241) at org.apache.hadoop.io.Text.decode(Text.java:412) at org.apache.hadoop.io.Text.decode(Text.java:389) at org.apache.hadoop.io.Text.toString(Text.java:280) at org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableStringObjectInspector.getPrimitiveJavaObject(WritableStringObjectInspector.java:46) at org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.getString(PrimitiveObjectInspectorUtils.java:891) at org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorConverter$StringConverter.convert(PrimitiveObjectInspectorConverter.java:508) at org.apache.hadoop.hive.ql.udf.generic.GenericUDFToUnixTimeStamp.evaluate(GenericUDFToUnixTimeStamp.java:127) at org.apache.hadoop.hive.ql.exec.ExprNodeGenericFuncEvaluator._evaluate(ExprNodeGenericFuncEvaluator.java:187) at org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:80) at org.apache.hadoop.hive.ql.exec.ExprNodeGenericFuncEvaluator$DeferredExprObject.get(ExprNodeGenericFuncEvaluator.java:88) at org.apache.hadoop.hive.ql.udf.generic.GenericUDFBaseNumeric.evaluate(GenericUDFBaseNumeric.java:128) at org.apache.hadoop.hive.ql.exec.ExprNodeGenericFuncEvaluator._evaluate(ExprNodeGenericFuncEvaluator.java:187) at org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:80) at org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:68) at org.apache.hadoop.hive.ql.exec.SelectOperator.process(SelectOperator.java:88) at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:897) at org.apache.hadoop.hive.ql.exec.TableScanOperator.process(TableScanOperator.java:130) at org.apache.hadoop.hive.ql.exec.MapOperator$MapOpCtx.forward(MapOperator.java:148) at org.apache.hadoop.hive.ql.exec.MapOperator.process(MapOperator.java:547) at org.apache.hadoop.hive.ql.exec.spark.SparkMapRecordHandler.processRow(SparkMapRecordHandler.java:136) at org.apache.hadoop.hive.ql.exec.spark.HiveMapFunctionResultList.processNextRecord(HiveMapFunctionResultList.java:48) at org.apache.hadoop.hive.ql.exec.spark.HiveMapFunctionResultList.processNextRecord(HiveMapFunctionResultList.java:27) at org.apache.hadoop.hive.ql.exec.spark.HiveBaseFunctionResultList.hasNext(HiveBaseFunctionResultList.java:85) at scala.collection.convert.Wrappers$JIteratorWrapper.hasNext(Wrappers.scala:42) at scala.collection.Iterator$class.foreach(Iterator.scala:893) at scala.collection.AbstractIterator.foreach(Iterator.scala:1336) at org.apache.spark.rdd.AsyncRDDActions$$anonfun$foreachAsync$1$$anonfun$apply$12.apply(AsyncRDDActions.scala:127) at org.apache.spark.rdd.AsyncRDDActions$$anonfun$foreachAsync$1$$anonfun$apply$12.apply(AsyncRDDActions.scala:127) How to setup Apache Spark to use local hard disk when data does not fit in RAM in local mode? was: Hello , I have a table with 10GB data and 4GB free RAM , I tried to select data from this table to other table with ORC format but I got an error about memory limit ( I'm running the query SELECT in console ) . How to setup Apache Spark to use local hard disk when data does not fit in RAM in local mode? > gc overhead limit exceeded > --------------------------- > > Key: HIVE-21084 > URL: https://issues.apache.org/jira/browse/HIVE-21084 > Project: Hive > Issue Type: Bug > Environment: Ubuntu 16.04 > Hive 2.3.0 > Spark 2.0.0 > Reporter: Lê Văn Thanh > Priority: Critical > > Hello , > I have a table with 10GB data and 4GB free RAM , I tried to select data from > this table to other table with ORC format but I got an error about memory > limit ( I'm running the query SELECT in console ) . > Detail bug : > Caused by: java.util.concurrent.ExecutionException: Exception thrown by job > at org.apache.spark.JavaFutureActionWrapper.getImpl(FutureAction.scala:272) > at org.apache.spark.JavaFutureActionWrapper.get(FutureAction.scala:277) > at > org.apache.hadoop.hive.ql.exec.spark.status.impl.LocalSparkJobStatus.getError(LocalSparkJobStatus.java:171) > at > org.apache.hadoop.hive.ql.exec.spark.SparkTask.getSparkJobInfo(SparkTask.java:369) > at org.apache.hadoop.hive.ql.exec.spark.SparkTask.execute(SparkTask.java:118) > at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:199) > at > org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:100) > at org.apache.hadoop.hive.ql.Driver.launchTask(Driver.java:2182) > at org.apache.hadoop.hive.ql.Driver.execute(Driver.java:1838) > at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:1525) > at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1236) > at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1231) > at > org.apache.hive.service.cli.operation.SQLOperation.runQuery(SQLOperation.java:255) > ... 11 more > Caused by: org.apache.spark.SparkException: Job aborted due to stage failure: > Task 6 in stage 0.0 failed 1 times, most recent failure: Lost task 6.0 in > stage 0.0 (TID 7, localhost, executor driver): java.lang.OutOfMemoryError: GC > overhead limit exceeded > at java.util.Arrays.copyOfRange(Arrays.java:3664) > at java.lang.String.<init>(String.java:207) > at java.nio.HeapCharBuffer.toString(HeapCharBuffer.java:567) > at java.nio.CharBuffer.toString(CharBuffer.java:1241) > at org.apache.hadoop.io.Text.decode(Text.java:412) > at org.apache.hadoop.io.Text.decode(Text.java:389) > at org.apache.hadoop.io.Text.toString(Text.java:280) > at > org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableStringObjectInspector.getPrimitiveJavaObject(WritableStringObjectInspector.java:46) > at > org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.getString(PrimitiveObjectInspectorUtils.java:891) > at > org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorConverter$StringConverter.convert(PrimitiveObjectInspectorConverter.java:508) > at > org.apache.hadoop.hive.ql.udf.generic.GenericUDFToUnixTimeStamp.evaluate(GenericUDFToUnixTimeStamp.java:127) > at > org.apache.hadoop.hive.ql.exec.ExprNodeGenericFuncEvaluator._evaluate(ExprNodeGenericFuncEvaluator.java:187) > at > org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:80) > at > org.apache.hadoop.hive.ql.exec.ExprNodeGenericFuncEvaluator$DeferredExprObject.get(ExprNodeGenericFuncEvaluator.java:88) > at > org.apache.hadoop.hive.ql.udf.generic.GenericUDFBaseNumeric.evaluate(GenericUDFBaseNumeric.java:128) > at > org.apache.hadoop.hive.ql.exec.ExprNodeGenericFuncEvaluator._evaluate(ExprNodeGenericFuncEvaluator.java:187) > at > org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:80) > at > org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:68) > at > org.apache.hadoop.hive.ql.exec.SelectOperator.process(SelectOperator.java:88) > at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:897) > at > org.apache.hadoop.hive.ql.exec.TableScanOperator.process(TableScanOperator.java:130) > at > org.apache.hadoop.hive.ql.exec.MapOperator$MapOpCtx.forward(MapOperator.java:148) > at org.apache.hadoop.hive.ql.exec.MapOperator.process(MapOperator.java:547) > at > org.apache.hadoop.hive.ql.exec.spark.SparkMapRecordHandler.processRow(SparkMapRecordHandler.java:136) > at > org.apache.hadoop.hive.ql.exec.spark.HiveMapFunctionResultList.processNextRecord(HiveMapFunctionResultList.java:48) > at > org.apache.hadoop.hive.ql.exec.spark.HiveMapFunctionResultList.processNextRecord(HiveMapFunctionResultList.java:27) > at > org.apache.hadoop.hive.ql.exec.spark.HiveBaseFunctionResultList.hasNext(HiveBaseFunctionResultList.java:85) > at > scala.collection.convert.Wrappers$JIteratorWrapper.hasNext(Wrappers.scala:42) > at scala.collection.Iterator$class.foreach(Iterator.scala:893) > at scala.collection.AbstractIterator.foreach(Iterator.scala:1336) > at > org.apache.spark.rdd.AsyncRDDActions$$anonfun$foreachAsync$1$$anonfun$apply$12.apply(AsyncRDDActions.scala:127) > at > org.apache.spark.rdd.AsyncRDDActions$$anonfun$foreachAsync$1$$anonfun$apply$12.apply(AsyncRDDActions.scala:127) > Driver stacktrace: > at > org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1499) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1487) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1486) > at > scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) > at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48) > at > org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1486) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:814) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:814) > at scala.Option.foreach(Option.scala:257) > at > org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:814) > at > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1714) > at > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1669) > at > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1658) > at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48) > Caused by: java.lang.OutOfMemoryError: GC overhead limit exceeded > at java.util.Arrays.copyOfRange(Arrays.java:3664) > at java.lang.String.<init>(String.java:207) > at java.nio.HeapCharBuffer.toString(HeapCharBuffer.java:567) > at java.nio.CharBuffer.toString(CharBuffer.java:1241) > at org.apache.hadoop.io.Text.decode(Text.java:412) > at org.apache.hadoop.io.Text.decode(Text.java:389) > at org.apache.hadoop.io.Text.toString(Text.java:280) > at > org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableStringObjectInspector.getPrimitiveJavaObject(WritableStringObjectInspector.java:46) > at > org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.getString(PrimitiveObjectInspectorUtils.java:891) > at > org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorConverter$StringConverter.convert(PrimitiveObjectInspectorConverter.java:508) > at > org.apache.hadoop.hive.ql.udf.generic.GenericUDFToUnixTimeStamp.evaluate(GenericUDFToUnixTimeStamp.java:127) > at > org.apache.hadoop.hive.ql.exec.ExprNodeGenericFuncEvaluator._evaluate(ExprNodeGenericFuncEvaluator.java:187) > at > org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:80) > at > org.apache.hadoop.hive.ql.exec.ExprNodeGenericFuncEvaluator$DeferredExprObject.get(ExprNodeGenericFuncEvaluator.java:88) > at > org.apache.hadoop.hive.ql.udf.generic.GenericUDFBaseNumeric.evaluate(GenericUDFBaseNumeric.java:128) > at > org.apache.hadoop.hive.ql.exec.ExprNodeGenericFuncEvaluator._evaluate(ExprNodeGenericFuncEvaluator.java:187) > at > org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:80) > at > org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:68) > at > org.apache.hadoop.hive.ql.exec.SelectOperator.process(SelectOperator.java:88) > at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:897) > at > org.apache.hadoop.hive.ql.exec.TableScanOperator.process(TableScanOperator.java:130) > at > org.apache.hadoop.hive.ql.exec.MapOperator$MapOpCtx.forward(MapOperator.java:148) > at org.apache.hadoop.hive.ql.exec.MapOperator.process(MapOperator.java:547) > at > org.apache.hadoop.hive.ql.exec.spark.SparkMapRecordHandler.processRow(SparkMapRecordHandler.java:136) > at > org.apache.hadoop.hive.ql.exec.spark.HiveMapFunctionResultList.processNextRecord(HiveMapFunctionResultList.java:48) > at > org.apache.hadoop.hive.ql.exec.spark.HiveMapFunctionResultList.processNextRecord(HiveMapFunctionResultList.java:27) > at > org.apache.hadoop.hive.ql.exec.spark.HiveBaseFunctionResultList.hasNext(HiveBaseFunctionResultList.java:85) > at > scala.collection.convert.Wrappers$JIteratorWrapper.hasNext(Wrappers.scala:42) > at scala.collection.Iterator$class.foreach(Iterator.scala:893) > at scala.collection.AbstractIterator.foreach(Iterator.scala:1336) > at > org.apache.spark.rdd.AsyncRDDActions$$anonfun$foreachAsync$1$$anonfun$apply$12.apply(AsyncRDDActions.scala:127) > at > org.apache.spark.rdd.AsyncRDDActions$$anonfun$foreachAsync$1$$anonfun$apply$12.apply(AsyncRDDActions.scala:127) > How to setup Apache Spark to use local hard disk when data does not fit in > RAM in local mode? -- This message was sent by Atlassian JIRA (v7.6.3#76005)