[jira] [Updated] (SPARK-31947) Solve string value error about Date/Timestamp in ScriptTransform

angerszhu (Jira) Fri, 12 Jun 2020 07:39:41 -0700


     [ 
https://issues.apache.org/jira/browse/SPARK-31947?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]


angerszhu updated SPARK-31947:
------------------------------
    Description: 
For test case

 
{code:java}
 test("SPARK-25990: TRANSFORM should handle different data types correctly") {
    assume(TestUtils.testCommandAvailable("python"))
    val scriptFilePath = getTestResourcePath("test_script.py")    
withTempView("v") {
      val df = Seq(
        (1, "1", 1.0, BigDecimal(1.0), new Timestamp(1), 
Date.valueOf("2015-05-21")),
        (2, "2", 2.0, BigDecimal(2.0), new Timestamp(2), 
Date.valueOf("2015-05-22")),
        (3, "3", 3.0, BigDecimal(3.0), new Timestamp(3), 
Date.valueOf("2015-05-23"))
      ).toDF("a", "b", "c", "d", "e", "f") // Note column d's data type is 
Decimal(38, 18)
      df.createTempView("v")      val query = sql(
        s"""
           |SELECT
           |TRANSFORM(a, b, c, d, e, f)
           |USING 'python $scriptFilePath' AS (a, b, c, d, e, f)
           |FROM v
        """.stripMargin)      val decimalToString: Column => Column = c => 
c.cast("string")      checkAnswer(query, identity, df.select(
        'a.cast("string"),
        'b.cast("string"),
        'c.cast("string"),
        decimalToString('d),
        'e.cast("string"),
        'f.cast("string")).collect())
    }
  }

{code}
 

 

Get wrong result
{code:java}
[info] - SPARK-25990: TRANSFORM should handle different data types correctly 
*** FAILED *** (4 seconds, 997 milliseconds)
[info]   Results do not match for Spark plan:
[info]    ScriptTransformation [a#19, b#20, c#21, d#22, e#23, f#24], python 
/Users/angerszhu/Documents/project/AngersZhu/spark/sql/core/target/scala-2.12/test-classes/test_script.py,
 [a#31, b#32, c#33, d#34, e#35, f#36], 
org.apache.spark.sql.execution.script.ScriptTransformIOSchema@1ad5a29c
[info]   +- Project [_1#6 AS a#19, _2#7 AS b#20, _3#8 AS c#21, _4#9 AS d#22, 
_5#10 AS e#23, _6#11 AS f#24]
[info]      +- LocalTableScan [_1#6, _2#7, _3#8, _4#9, _5#10, _6#11]
[info]
[info]
[info]    == Results ==
[info]    !== Expected Answer - 3 ==                                           
== Actual Answer - 3 ==
[info]   ![1,1,1.0,1.000000000000000000,1970-01-01 08:00:00.001,2015-05-21]   
[1,1,1.0,1.000000000000000000,1000,16576]
[info]   ![2,2,2.0,2.000000000000000000,1970-01-01 08:00:00.002,2015-05-22]   
[2,2,2.0,2.000000000000000000,2000,16577]
[info]   ![3,3,3.0,3.000000000000000000,1970-01-01 08:00:00.003,2015-05-23]   
[3,3,3.0,3.000000000000000000,3000,16578] (SparkPlanTest.scala:95)
[
{code}

> Solve string value error about Date/Timestamp in ScriptTransform
> ----------------------------------------------------------------
>
>                 Key: SPARK-31947
>                 URL: https://issues.apache.org/jira/browse/SPARK-31947
>             Project: Spark
>          Issue Type: Sub-task
>          Components: SQL
>    Affects Versions: 3.1.0
>            Reporter: angerszhu
>            Priority: Major
>
> For test case
>  
> {code:java}
>  test("SPARK-25990: TRANSFORM should handle different data types correctly") {
>     assume(TestUtils.testCommandAvailable("python"))
>     val scriptFilePath = getTestResourcePath("test_script.py")    
> withTempView("v") {
>       val df = Seq(
>         (1, "1", 1.0, BigDecimal(1.0), new Timestamp(1), 
> Date.valueOf("2015-05-21")),
>         (2, "2", 2.0, BigDecimal(2.0), new Timestamp(2), 
> Date.valueOf("2015-05-22")),
>         (3, "3", 3.0, BigDecimal(3.0), new Timestamp(3), 
> Date.valueOf("2015-05-23"))
>       ).toDF("a", "b", "c", "d", "e", "f") // Note column d's data type is 
> Decimal(38, 18)
>       df.createTempView("v")      val query = sql(
>         s"""
>            |SELECT
>            |TRANSFORM(a, b, c, d, e, f)
>            |USING 'python $scriptFilePath' AS (a, b, c, d, e, f)
>            |FROM v
>         """.stripMargin)      val decimalToString: Column => Column = c => 
> c.cast("string")      checkAnswer(query, identity, df.select(
>         'a.cast("string"),
>         'b.cast("string"),
>         'c.cast("string"),
>         decimalToString('d),
>         'e.cast("string"),
>         'f.cast("string")).collect())
>     }
>   }
> {code}
>  
>  
> Get wrong result
> {code:java}
> [info] - SPARK-25990: TRANSFORM should handle different data types correctly 
> *** FAILED *** (4 seconds, 997 milliseconds)
> [info]   Results do not match for Spark plan:
> [info]    ScriptTransformation [a#19, b#20, c#21, d#22, e#23, f#24], python 
> /Users/angerszhu/Documents/project/AngersZhu/spark/sql/core/target/scala-2.12/test-classes/test_script.py,
>  [a#31, b#32, c#33, d#34, e#35, f#36], 
> org.apache.spark.sql.execution.script.ScriptTransformIOSchema@1ad5a29c
> [info]   +- Project [_1#6 AS a#19, _2#7 AS b#20, _3#8 AS c#21, _4#9 AS d#22, 
> _5#10 AS e#23, _6#11 AS f#24]
> [info]      +- LocalTableScan [_1#6, _2#7, _3#8, _4#9, _5#10, _6#11]
> [info]
> [info]
> [info]    == Results ==
> [info]    !== Expected Answer - 3 ==                                          
>  == Actual Answer - 3 ==
> [info]   ![1,1,1.0,1.000000000000000000,1970-01-01 08:00:00.001,2015-05-21]   
> [1,1,1.0,1.000000000000000000,1000,16576]
> [info]   ![2,2,2.0,2.000000000000000000,1970-01-01 08:00:00.002,2015-05-22]   
> [2,2,2.0,2.000000000000000000,2000,16577]
> [info]   ![3,3,3.0,3.000000000000000000,1970-01-01 08:00:00.003,2015-05-23]   
> [3,3,3.0,3.000000000000000000,3000,16578] (SparkPlanTest.scala:95)
> [
> {code}



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org
For additional commands, e-mail: issues-h...@spark.apache.org

[jira] [Updated] (SPARK-31947) Solve string value error about Date/Timestamp in ScriptTransform

Reply via email to