[ https://issues.apache.org/jira/browse/SPARK-31947?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
angerszhu updated SPARK-31947: ------------------------------ Description: For test case {code:java} test("SPARK-25990: TRANSFORM should handle different data types correctly") { assume(TestUtils.testCommandAvailable("python")) val scriptFilePath = getTestResourcePath("test_script.py") withTempView("v") { val df = Seq( (1, "1", 1.0, BigDecimal(1.0), new Timestamp(1), Date.valueOf("2015-05-21")), (2, "2", 2.0, BigDecimal(2.0), new Timestamp(2), Date.valueOf("2015-05-22")), (3, "3", 3.0, BigDecimal(3.0), new Timestamp(3), Date.valueOf("2015-05-23")) ).toDF("a", "b", "c", "d", "e", "f") // Note column d's data type is Decimal(38, 18) df.createTempView("v") val query = sql( s""" |SELECT |TRANSFORM(a, b, c, d, e, f) |USING 'python $scriptFilePath' AS (a, b, c, d, e, f) |FROM v """.stripMargin) val decimalToString: Column => Column = c => c.cast("string") checkAnswer(query, identity, df.select( 'a.cast("string"), 'b.cast("string"), 'c.cast("string"), decimalToString('d), 'e.cast("string"), 'f.cast("string")).collect()) } } {code} Get wrong result {code:java} [info] - SPARK-25990: TRANSFORM should handle different data types correctly *** FAILED *** (4 seconds, 997 milliseconds) [info] Results do not match for Spark plan: [info] ScriptTransformation [a#19, b#20, c#21, d#22, e#23, f#24], python /Users/angerszhu/Documents/project/AngersZhu/spark/sql/core/target/scala-2.12/test-classes/test_script.py, [a#31, b#32, c#33, d#34, e#35, f#36], org.apache.spark.sql.execution.script.ScriptTransformIOSchema@1ad5a29c [info] +- Project [_1#6 AS a#19, _2#7 AS b#20, _3#8 AS c#21, _4#9 AS d#22, _5#10 AS e#23, _6#11 AS f#24] [info] +- LocalTableScan [_1#6, _2#7, _3#8, _4#9, _5#10, _6#11] [info] [info] [info] == Results == [info] !== Expected Answer - 3 == == Actual Answer - 3 == [info] ![1,1,1.0,1.000000000000000000,1970-01-01 08:00:00.001,2015-05-21] [1,1,1.0,1.000000000000000000,1000,16576] [info] ![2,2,2.0,2.000000000000000000,1970-01-01 08:00:00.002,2015-05-22] [2,2,2.0,2.000000000000000000,2000,16577] [info] ![3,3,3.0,3.000000000000000000,1970-01-01 08:00:00.003,2015-05-23] [3,3,3.0,3.000000000000000000,3000,16578] (SparkPlanTest.scala:95) [ {code} > Solve string value error about Date/Timestamp in ScriptTransform > ---------------------------------------------------------------- > > Key: SPARK-31947 > URL: https://issues.apache.org/jira/browse/SPARK-31947 > Project: Spark > Issue Type: Sub-task > Components: SQL > Affects Versions: 3.1.0 > Reporter: angerszhu > Priority: Major > > For test case > > {code:java} > test("SPARK-25990: TRANSFORM should handle different data types correctly") { > assume(TestUtils.testCommandAvailable("python")) > val scriptFilePath = getTestResourcePath("test_script.py") > withTempView("v") { > val df = Seq( > (1, "1", 1.0, BigDecimal(1.0), new Timestamp(1), > Date.valueOf("2015-05-21")), > (2, "2", 2.0, BigDecimal(2.0), new Timestamp(2), > Date.valueOf("2015-05-22")), > (3, "3", 3.0, BigDecimal(3.0), new Timestamp(3), > Date.valueOf("2015-05-23")) > ).toDF("a", "b", "c", "d", "e", "f") // Note column d's data type is > Decimal(38, 18) > df.createTempView("v") val query = sql( > s""" > |SELECT > |TRANSFORM(a, b, c, d, e, f) > |USING 'python $scriptFilePath' AS (a, b, c, d, e, f) > |FROM v > """.stripMargin) val decimalToString: Column => Column = c => > c.cast("string") checkAnswer(query, identity, df.select( > 'a.cast("string"), > 'b.cast("string"), > 'c.cast("string"), > decimalToString('d), > 'e.cast("string"), > 'f.cast("string")).collect()) > } > } > {code} > > > Get wrong result > {code:java} > [info] - SPARK-25990: TRANSFORM should handle different data types correctly > *** FAILED *** (4 seconds, 997 milliseconds) > [info] Results do not match for Spark plan: > [info] ScriptTransformation [a#19, b#20, c#21, d#22, e#23, f#24], python > /Users/angerszhu/Documents/project/AngersZhu/spark/sql/core/target/scala-2.12/test-classes/test_script.py, > [a#31, b#32, c#33, d#34, e#35, f#36], > org.apache.spark.sql.execution.script.ScriptTransformIOSchema@1ad5a29c > [info] +- Project [_1#6 AS a#19, _2#7 AS b#20, _3#8 AS c#21, _4#9 AS d#22, > _5#10 AS e#23, _6#11 AS f#24] > [info] +- LocalTableScan [_1#6, _2#7, _3#8, _4#9, _5#10, _6#11] > [info] > [info] > [info] == Results == > [info] !== Expected Answer - 3 == > == Actual Answer - 3 == > [info] ![1,1,1.0,1.000000000000000000,1970-01-01 08:00:00.001,2015-05-21] > [1,1,1.0,1.000000000000000000,1000,16576] > [info] ![2,2,2.0,2.000000000000000000,1970-01-01 08:00:00.002,2015-05-22] > [2,2,2.0,2.000000000000000000,2000,16577] > [info] ![3,3,3.0,3.000000000000000000,1970-01-01 08:00:00.003,2015-05-23] > [3,3,3.0,3.000000000000000000,3000,16578] (SparkPlanTest.scala:95) > [ > {code} -- This message was sent by Atlassian Jira (v8.3.4#803005) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org