[ https://issues.apache.org/jira/browse/SPARK-4052?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Yin Huai updated SPARK-4052: ---------------------------- Description: Seems ScalaReflection and InsertIntoHiveTable only take scala.collection.immutable.Map as the value type of MapType. Here are test cases showing errors. {code} val sqlContext = new org.apache.spark.sql.hive.HiveContext(sc) import sqlContext.createSchemaRDD val rdd = sc.parallelize(("key", "value") :: Nil) // Test1: This one fails. case class Test1(m: scala.collection.Map[String, String]) val rddOfTest1 = rdd.map { case (k, v) => Test1(Map(k->v)) } rddOfTest1.registerTempTable("t1") /* Stack trace scala.MatchError: scala.collection.Map[String,String] (of class scala.reflect.internal.Types$TypeRef$$anon$5) at org.apache.spark.sql.catalyst.ScalaReflection$.schemaFor(ScalaReflection.scala:53) at org.apache.spark.sql.catalyst.ScalaReflection$$anonfun$schemaFor$1.apply(ScalaReflection.scala:64) at org.apache.spark.sql.catalyst.ScalaReflection$$anonfun$schemaFor$1.apply(ScalaReflection.scala:62) ... */ // Test2: This one is fine. case class Test2(m: scala.collection.immutable.Map[String, String]) val rddOfTest2 = rdd.map { case (k, v) => Test2(Map(k->v)) } rddOfTest2.registerTempTable("t2") sqlContext.sql("SELECT m FROM t2").collect sqlContext.sql("SELECT m['key'] FROM t2").collect // Test3: This one fails. val schema = StructType(StructField("m", MapType(StringType, StringType), true) :: Nil) val rowRDD = rdd.map { case (k, v) => Row(scala.collection.mutable.HashMap(k->v)) } val schemaRDD = sqlContext.applySchema(rowRDD, schema) schemaRDD.registerTempTable("t3") sqlContext.sql("SELECT m FROM t3").collect sqlContext.sql("SELECT m['key'] FROM t3").collect sqlContext.sql("CREATE TABLE testHiveTable1(m MAP <STRING, STRING>)") sqlContext.sql("INSERT OVERWRITE TABLE testHiveTable1 SELECT m FROM t3") /* Stack trace 14/10/22 19:30:56 INFO DAGScheduler: Job 4 failed: runJob at InsertIntoHiveTable.scala:124, took 1.384579 s org.apache.spark.SparkException: Job aborted due to stage failure: Task 1 in stage 4.0 failed 4 times, most recent failure: Lost task 1.3 in stage 4.0 (TID 12, yins-mbp): java.lang.ClassCastException: scala.collection.mutable.HashMap cannot be cast to scala.collection.immutable.Map org.apache.spark.sql.hive.execution.InsertIntoHiveTable$$anonfun$wrapperFor$5.apply(InsertIntoHiveTable.scala:96) org.apache.spark.sql.hive.execution.InsertIntoHiveTable$$anonfun$wrapperFor$5.apply(InsertIntoHiveTable.scala:96) org.apache.spark.sql.hive.execution.InsertIntoHiveTable$$anonfun$org$apache$spark$sql$hive$execution$InsertIntoHiveTable$$writeToFile$1$1.apply(InsertIntoHiveTable.scala:148) org.apache.spark.sql.hive.execution.InsertIntoHiveTable$$anonfun$org$apache$spark$sql$hive$execution$InsertIntoHiveTable$$writeToFile$1$1.apply(InsertIntoHiveTable.scala:145) */ // Test4: This one is fine. val rowRDD = rdd.map { case (k, v) => Row(Map(k->v)) } val schemaRDD = sqlContext.applySchema(rowRDD, schema) schemaRDD.registerTempTable("t4") sqlContext.sql("SELECT m FROM t4").collect sqlContext.sql("SELECT m['key'] FROM t4").collect sqlContext.sql("CREATE TABLE testHiveTable1(m MAP <STRING, STRING>)") sqlContext.sql("INSERT OVERWRITE TABLE testHiveTable1 SELECT m FROM t4") {code} was: Seems ScalaReflection and InsertIntoHiveTable only take scala.collection.immutable.Map as the value type of MapType. Here are test cases showing errors. {code} val sqlContext = new org.apache.spark.sql.hive.HiveContext(sc) import sqlContext.createSchemaRDD val rdd = sc.parallelize(("key", "value") :: Nil) // Test1: This one fails. case class Test1(m: scala.collection.Map[String, String]) val rddOfTest1 = rdd.map { case (k, v) => Test1(Map(k->v)) } rddOfTest1.registerTempTable("t1") /* Stack trace scala.MatchError: scala.collection.Map[String,String] (of class scala.reflect.internal.Types$TypeRef$$anon$5) at org.apache.spark.sql.catalyst.ScalaReflection$.schemaFor(ScalaReflection.scala:53) at org.apache.spark.sql.catalyst.ScalaReflection$$anonfun$schemaFor$1.apply(ScalaReflection.scala:64) at org.apache.spark.sql.catalyst.ScalaReflection$$anonfun$schemaFor$1.apply(ScalaReflection.scala:62) ... */ // Test2: This one is fine. case class Test2(m: scala.collection.immutable.Map[String, String]) val rddOfTest2 = rdd.map { case (k, v) => Test2(Map(k->v)) } rddOfTest2.registerTempTable("t2") sqlContext.sql("SELECT m FROM t2").collect sqlContext.sql("SELECT m['key'] FROM t2").collect // Test3: This one fails. import org.apache.spark.sql._ val schema = StructType(StructField("m", MapType(StringType, StringType), true) :: Nil) val rowRDD = rdd.map { case (k, v) => Row(scala.collection.mutable.HashMap(k->v)) } val schemaRDD = sqlContext.applySchema(rowRDD, schema) schemaRDD.registerTempTable("t3") sqlContext.sql("SELECT m FROM t3").collect sqlContext.sql("SELECT m['key'] FROM t3").collect sqlContext.sql("CREATE TABLE testHiveTable(m MAP <STRING, STRING>)") sqlContext.sql("INSERT OVERWRITE TABLE testHiveTable SELECT m FROM t3") /* Stack trace 14/10/22 19:30:56 INFO DAGScheduler: Job 4 failed: runJob at InsertIntoHiveTable.scala:124, took 1.384579 s org.apache.spark.SparkException: Job aborted due to stage failure: Task 1 in stage 4.0 failed 4 times, most recent failure: Lost task 1.3 in stage 4.0 (TID 12, yins-mbp): java.lang.ClassCastException: scala.collection.mutable.HashMap cannot be cast to scala.collection.immutable.Map org.apache.spark.sql.hive.execution.InsertIntoHiveTable$$anonfun$wrapperFor$5.apply(InsertIntoHiveTable.scala:96) org.apache.spark.sql.hive.execution.InsertIntoHiveTable$$anonfun$wrapperFor$5.apply(InsertIntoHiveTable.scala:96) org.apache.spark.sql.hive.execution.InsertIntoHiveTable$$anonfun$org$apache$spark$sql$hive$execution$InsertIntoHiveTable$$writeToFile$1$1.apply(InsertIntoHiveTable.scala:148) org.apache.spark.sql.hive.execution.InsertIntoHiveTable$$anonfun$org$apache$spark$sql$hive$execution$InsertIntoHiveTable$$writeToFile$1$1.apply(InsertIntoHiveTable.scala:145) */ {code} > Use scala.collection.Map for pattern matching instead of using Predef.Map (it > is scala.collection.immutable.Map) > ---------------------------------------------------------------------------------------------------------------- > > Key: SPARK-4052 > URL: https://issues.apache.org/jira/browse/SPARK-4052 > Project: Spark > Issue Type: Bug > Components: SQL > Affects Versions: 1.1.0 > Reporter: Yin Huai > Priority: Minor > > Seems ScalaReflection and InsertIntoHiveTable only take > scala.collection.immutable.Map as the value type of MapType. Here are test > cases showing errors. > {code} > val sqlContext = new org.apache.spark.sql.hive.HiveContext(sc) > import sqlContext.createSchemaRDD > val rdd = sc.parallelize(("key", "value") :: Nil) > // Test1: This one fails. > case class Test1(m: scala.collection.Map[String, String]) > val rddOfTest1 = rdd.map { case (k, v) => Test1(Map(k->v)) } > rddOfTest1.registerTempTable("t1") > /* Stack trace > scala.MatchError: scala.collection.Map[String,String] (of class > scala.reflect.internal.Types$TypeRef$$anon$5) > at > org.apache.spark.sql.catalyst.ScalaReflection$.schemaFor(ScalaReflection.scala:53) > at > org.apache.spark.sql.catalyst.ScalaReflection$$anonfun$schemaFor$1.apply(ScalaReflection.scala:64) > at > org.apache.spark.sql.catalyst.ScalaReflection$$anonfun$schemaFor$1.apply(ScalaReflection.scala:62) > ... > */ > // Test2: This one is fine. > case class Test2(m: scala.collection.immutable.Map[String, String]) > val rddOfTest2 = rdd.map { case (k, v) => Test2(Map(k->v)) } > rddOfTest2.registerTempTable("t2") > sqlContext.sql("SELECT m FROM t2").collect > sqlContext.sql("SELECT m['key'] FROM t2").collect > // Test3: This one fails. > val schema = StructType(StructField("m", MapType(StringType, StringType), > true) :: Nil) > val rowRDD = rdd.map { case (k, v) => > Row(scala.collection.mutable.HashMap(k->v)) } > val schemaRDD = sqlContext.applySchema(rowRDD, schema) > schemaRDD.registerTempTable("t3") > sqlContext.sql("SELECT m FROM t3").collect > sqlContext.sql("SELECT m['key'] FROM t3").collect > sqlContext.sql("CREATE TABLE testHiveTable1(m MAP <STRING, STRING>)") > sqlContext.sql("INSERT OVERWRITE TABLE testHiveTable1 SELECT m FROM t3") > /* Stack trace > 14/10/22 19:30:56 INFO DAGScheduler: Job 4 failed: runJob at > InsertIntoHiveTable.scala:124, took 1.384579 s > org.apache.spark.SparkException: Job aborted due to stage failure: Task 1 in > stage 4.0 failed 4 times, most recent failure: Lost task 1.3 in stage 4.0 > (TID 12, yins-mbp): java.lang.ClassCastException: > scala.collection.mutable.HashMap cannot be cast to > scala.collection.immutable.Map > > org.apache.spark.sql.hive.execution.InsertIntoHiveTable$$anonfun$wrapperFor$5.apply(InsertIntoHiveTable.scala:96) > > org.apache.spark.sql.hive.execution.InsertIntoHiveTable$$anonfun$wrapperFor$5.apply(InsertIntoHiveTable.scala:96) > > org.apache.spark.sql.hive.execution.InsertIntoHiveTable$$anonfun$org$apache$spark$sql$hive$execution$InsertIntoHiveTable$$writeToFile$1$1.apply(InsertIntoHiveTable.scala:148) > > org.apache.spark.sql.hive.execution.InsertIntoHiveTable$$anonfun$org$apache$spark$sql$hive$execution$InsertIntoHiveTable$$writeToFile$1$1.apply(InsertIntoHiveTable.scala:145) > */ > // Test4: This one is fine. > val rowRDD = rdd.map { case (k, v) => Row(Map(k->v)) } > val schemaRDD = sqlContext.applySchema(rowRDD, schema) > schemaRDD.registerTempTable("t4") > sqlContext.sql("SELECT m FROM t4").collect > sqlContext.sql("SELECT m['key'] FROM t4").collect > sqlContext.sql("CREATE TABLE testHiveTable1(m MAP <STRING, STRING>)") > sqlContext.sql("INSERT OVERWRITE TABLE testHiveTable1 SELECT m FROM t4") > {code} -- This message was sent by Atlassian JIRA (v6.3.4#6332) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org