[GitHub] [spark] maropu commented on a change in pull request #29085: [SPARK-32106][SQL]Implement SparkScriptTransformationExec in sql/core

GitBox Wed, 22 Jul 2020 01:28:16 -0700


maropu commented on a change in pull request #29085:
URL: https://github.com/apache/spark/pull/29085#discussion_r458623671




##########
File path: 
sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationSuite.scala
##########
@@ -206,75 +169,83 @@ class HiveScriptTransformationSuite extends SparkPlanTest 
with SQLTestUtils with
 
       val query = sql(
         s"""
-          |SELECT
-          |TRANSFORM(a, b, c, d, e)
-          |USING 'python $scriptFilePath' AS (a, b, c, d, e)
-          |FROM v
+           |SELECT TRANSFORM(a, b, c, d, e)
+           |USING 'python ${scriptFilePath}'
+           |FROM v
         """.stripMargin)
 
-      // In Hive 1.2, the string representation of a decimal omits trailing 
zeroes.
-      // But in Hive 2.3, it is always padded to 18 digits with trailing 
zeroes if necessary.
-      val decimalToString: Column => Column = if (HiveUtils.isHive23) {
-        c => c.cast("string")
-      } else {
-        c => c.cast("decimal(1, 0)").cast("string")
-      }
-      checkAnswer(query, identity, df.select(
-        'a.cast("string"),
-        'b.cast("string"),
-        'c.cast("string"),
-        decimalToString('d),
-        'e.cast("string")).collect())
+      // In hive default serde mode, if we don't define output schema, it will 
choose first
+      // two column as output schema (key: String, value: String)
+      checkAnswer(
+        query,
+        identity,
+        df.select(
+          'a.cast("string").as("key"),
+          'b.cast("string").as("value")).collect())
     }
   }
 
-  test("SPARK-30973: TRANSFORM should wait for the termination of the script 
(no serde)") {
+  test("SPARK-32106: TRANSFORM support complex data types as input and ouput 
type (hive serde)") {
     assume(TestUtils.testCommandAvailable("/bin/bash"))
+    withTempView("v") {
+      val df = Seq(
+        (1, "1", Array(0, 1, 2), Map("a" -> 1)),
+        (2, "2", Array(3, 4, 5), Map("b" -> 2)))
+        .toDF("a", "b", "c", "d")
+          .select('a, 'b, 'c, 'd, struct('a, 'b).as("e"))
+      df.createTempView("v")
 
-    val rowsDf = Seq("a", "b", "c").map(Tuple1.apply).toDF("a")
-    val e = intercept[SparkException] {
-      val plan =
-        new HiveScriptTransformationExec(
-          input = Seq(rowsDf.col("a").expr),
-          script = "some_non_existent_command",
-          output = Seq(AttributeReference("a", StringType)()),
-          child = rowsDf.queryExecution.sparkPlan,
-          ioschema = noSerdeIOSchema)
-      SparkPlanTest.executePlan(plan, hiveContext)
+      // Hive serde support ArrayType/MapType/StructType as input and output 
data type
+      checkAnswer(
+        df,
+        (child: SparkPlan) => createScriptTransformationExec(

Review comment:
        Why is the test method different between this test unit and ` 
test("SPARK-32106: TRANSFORM don't support CalenderIntervalType/UserDefinedType 
(hive serde)") {` ? This is a plan test and the other one is an end-2-end test?




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] [spark] maropu commented on a change in pull request #29085: [SPARK-32106][SQL]Implement SparkScriptTransformationExec in sql/core

Reply via email to