Hello everybody, I am trying to do a simple groupBy :
*Code:* val df = hiveContext.sql("SELECT * FROM table1") df .printSchema() df .groupBy("customer_id").count().show(5) *Stacktrace* : root |-- customer_id: string (nullable = true) |-- rank: string (nullable = true) |-- reco_material_id: string (nullable = true) |-- score: string (nullable = true) |-- category: string (nullable = true) |-- is_achat: string (nullable = true) 15/03/26 17:19:29 INFO HiveMetaStore: 0: get_table : db=default tbl=table1 15/03/26 17:19:29 INFO audit: ugi=spark ip=unknown-ip-addr cmd=get_table : db=default tbl=table1 Exception in thread "main" java.util.NoSuchElementException: key not found: customer_id#0 at scala.collection.MapLike$class.default(MapLike.scala:228) at org.apache.spark.sql.catalyst.expressions.AttributeMap.default(AttributeMap.scala:29) at scala.collection.MapLike$class.apply(MapLike.scala:141) at org.apache.spark.sql.catalyst.expressions.AttributeMap.apply(AttributeMap.scala:29) at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244) at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244) at scala.collection.IndexedSeqOptimized$class.foreach(IndexedSeqOptimized.scala:33) at scala.collection.mutable.WrappedArray.foreach(WrappedArray.scala:34) at scala.collection.TraversableLike$class.map(TraversableLike.scala:244) at scala.collection.AbstractTraversable.map(Traversable.scala:105) at org.apache.spark.sql.hive.execution.HiveTableScan.<init>(HiveTableScan.scala:53) at org.apache.spark.sql.hive.HiveStrategies$HiveTableScans$$anonfun$15.apply(HiveStrategies.scala:216) at org.apache.spark.sql.hive.HiveStrategies$HiveTableScans$$anonfun$15.apply(HiveStrategies.scala:216) at org.apache.spark.sql.SQLContext$SparkPlanner.pruneFilterProject(SQLContext.scala:1034) at org.apache.spark.sql.hive.HiveStrategies$HiveTableScans$.apply(HiveStrategies.scala:212) at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$1.apply(QueryPlanner.scala:58) at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$1.apply(QueryPlanner.scala:58) at scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:371) at org.apache.spark.sql.catalyst.planning.QueryPlanner.apply(QueryPlanner.scala:59) at org.apache.spark.sql.catalyst.planning.QueryPlanner.planLater(QueryPlanner.scala:54) at org.apache.spark.sql.execution.SparkStrategies$HashAggregation$.apply(SparkStrategies.scala:152) at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$1.apply(QueryPlanner.scala:58) at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$1.apply(QueryPlanner.scala:58) at scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:371) at org.apache.spark.sql.catalyst.planning.QueryPlanner.apply(QueryPlanner.scala:59) at org.apache.spark.sql.catalyst.planning.QueryPlanner.planLater(QueryPlanner.scala:54) at org.apache.spark.sql.execution.SparkStrategies$BasicOperators$.apply(SparkStrategies.scala:290) at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$1.apply(QueryPlanner.scala:58) at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$1.apply(QueryPlanner.scala:58) at scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:371) at org.apache.spark.sql.catalyst.planning.QueryPlanner.apply(QueryPlanner.scala:59) at org.apache.spark.sql.SQLContext$QueryExecution.sparkPlan$lzycompute(SQLContext.scala:1081) at org.apache.spark.sql.SQLContext$QueryExecution.sparkPlan(SQLContext.scala:1079) at org.apache.spark.sql.SQLContext$QueryExecution.executedPlan$lzycompute(SQLContext.scala:1085) at org.apache.spark.sql.SQLContext$QueryExecution.executedPlan(SQLContext.scala:1085) at org.apache.spark.sql.DataFrame.collect(DataFrame.scala:815) at org.apache.spark.sql.DataFrame.head(DataFrame.scala:758) at org.apache.spark.sql.DataFrame.take(DataFrame.scala:809) at org.apache.spark.sql.DataFrame.showString(DataFrame.scala:178) at org.apache.spark.sql.DataFrame.show(DataFrame.scala:314) Does anyone have an idea? Regards, Germain Tanguy. -- View this message in context: http://apache-spark-user-list.1001560.n3.nabble.com/DataFrame-GroupBy-tp22242.html Sent from the Apache Spark User List mailing list archive at Nabble.com. --------------------------------------------------------------------- To unsubscribe, e-mail: user-unsubscr...@spark.apache.org For additional commands, e-mail: user-h...@spark.apache.org