AngersZhuuuu commented on a change in pull request #29085: URL: https://github.com/apache/spark/pull/29085#discussion_r458649024
########## File path: sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationSuite.scala ########## @@ -206,75 +169,83 @@ class HiveScriptTransformationSuite extends SparkPlanTest with SQLTestUtils with val query = sql( s""" - |SELECT - |TRANSFORM(a, b, c, d, e) - |USING 'python $scriptFilePath' AS (a, b, c, d, e) - |FROM v + |SELECT TRANSFORM(a, b, c, d, e) + |USING 'python ${scriptFilePath}' + |FROM v """.stripMargin) - // In Hive 1.2, the string representation of a decimal omits trailing zeroes. - // But in Hive 2.3, it is always padded to 18 digits with trailing zeroes if necessary. - val decimalToString: Column => Column = if (HiveUtils.isHive23) { - c => c.cast("string") - } else { - c => c.cast("decimal(1, 0)").cast("string") - } - checkAnswer(query, identity, df.select( - 'a.cast("string"), - 'b.cast("string"), - 'c.cast("string"), - decimalToString('d), - 'e.cast("string")).collect()) + // In hive default serde mode, if we don't define output schema, it will choose first + // two column as output schema (key: String, value: String) + checkAnswer( + query, + identity, + df.select( + 'a.cast("string").as("key"), + 'b.cast("string").as("value")).collect()) } } - test("SPARK-30973: TRANSFORM should wait for the termination of the script (no serde)") { + test("SPARK-32106: TRANSFORM support complex data types as input and ouput type (hive serde)") { assume(TestUtils.testCommandAvailable("/bin/bash")) + withTempView("v") { + val df = Seq( + (1, "1", Array(0, 1, 2), Map("a" -> 1)), + (2, "2", Array(3, 4, 5), Map("b" -> 2))) + .toDF("a", "b", "c", "d") + .select('a, 'b, 'c, 'd, struct('a, 'b).as("e")) + df.createTempView("v") - val rowsDf = Seq("a", "b", "c").map(Tuple1.apply).toDF("a") - val e = intercept[SparkException] { - val plan = - new HiveScriptTransformationExec( - input = Seq(rowsDf.col("a").expr), - script = "some_non_existent_command", - output = Seq(AttributeReference("a", StringType)()), - child = rowsDf.queryExecution.sparkPlan, - ioschema = noSerdeIOSchema) - SparkPlanTest.executePlan(plan, hiveContext) + // Hive serde support ArrayType/MapType/StructType as input and output data type + checkAnswer( + df, + (child: SparkPlan) => createScriptTransformationExec( + input = Seq( + df.col("c").expr, + df.col("d").expr, + df.col("e").expr), + script = "cat", + output = Seq( + AttributeReference("c", ArrayType(IntegerType))(), + AttributeReference("d", MapType(StringType, IntegerType))(), + AttributeReference("e", StructType( + Seq( + StructField("col1", IntegerType, false), + StructField("col2", StringType, true))))()), + child = child, + ioschema = serdeIOSchema + ), + df.select('c, 'd, 'e).collect()) } - assert(e.getMessage.contains("Subprocess exited with status")) - assert(uncaughtExceptionHandler.exception.isEmpty) } - test("SPARK-30973: TRANSFORM should wait for the termination of the script (with serde)") { + test("SPARK-32106: TRANSFORM don't support CalenderIntervalType/UserDefinedType (hive serde)") { assume(TestUtils.testCommandAvailable("/bin/bash")) + withTempView("v") { + val df = Seq( + (1, new CalendarInterval(7, 1, 1000), new TestUDT.MyDenseVector(Array(1, 2, 3))), + (1, new CalendarInterval(7, 1, 1000), new TestUDT.MyDenseVector(Array(1, 2, 3)))) + .toDF("a", "b", "c") + df.createTempView("v") - val rowsDf = Seq("a", "b", "c").map(Tuple1.apply).toDF("a") - val e = intercept[SparkException] { - val plan = - new HiveScriptTransformationExec( - input = Seq(rowsDf.col("a").expr), - script = "some_non_existent_command", - output = Seq(AttributeReference("a", StringType)()), - child = rowsDf.queryExecution.sparkPlan, - ioschema = serdeIOSchema) - SparkPlanTest.executePlan(plan, hiveContext) + val e1 = intercept[Exception] { + sql( + """ + |SELECT TRANSFORM(a, b) USING 'cat' AS (a, b) + |FROM v + """.stripMargin).collect() + } + assert(e1.getMessage.contains("scala.MatchError: CalendarIntervalType")) Review comment: it's a SparkException, but root cause is MatchError. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org