Github user ueshin commented on a diff in the pull request:

    https://github.com/apache/spark/pull/22031#discussion_r208444629
  
    --- Diff: 
sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala ---
    @@ -2117,6 +2117,65 @@ class DataFrameFunctionsSuite extends QueryTest with 
SharedSQLContext {
         assert(ex4.getMessage.contains("data type mismatch: argument 3 
requires int type"))
       }
     
    +  test("zip_with function - arrays for primitive type not containing 
null") {
    +    val df1 = Seq(
    +      (Seq(9001, 9002, 9003), Seq(4, 5, 6)),
    +      (Seq(1, 2), Seq(3, 4)),
    +      (Seq.empty[Int], Seq.empty[Int]),
    +      (null, null)
    +    ).toDF("val1", "val2")
    +    val df2 = Seq(
    +      (Seq(1, 2, 3), Seq("a", "b", "c")),
    +      (Seq(1, 2, 3), Seq("a", "b"))
    +    ).toDF("val1", "val2")
    +
    +    def testArrayOfPrimitiveTypeNotContainsNull(): Unit = {
    +      val expectedValue1 = Seq(
    +        Row(Seq(9005, 9007, 9009)),
    +        Row(Seq(4, 6)),
    +        Row(Seq.empty),
    +        Row(null))
    +      checkAnswer(df1.selectExpr("zip_with(val1, val2, (x, y) -> x + y)"), 
expectedValue1)
    +
    +      val expectedValue2 = Seq(
    +        Row(Seq(Row("a", 1), Row("b", 2), Row("c", 3))),
    +        Row(Seq(Row("a", 1), Row("b", 2), Row(null, 3))))
    +      checkAnswer(df2.selectExpr("zip_with(val1, val2, (x, y) -> (y, 
x))"), expectedValue2)
    +    }
    +
    +    // Test with local relation, the Project will be evaluated without 
codegen
    +    testArrayOfPrimitiveTypeNotContainsNull()
    +    // Test with cached relation, the Project will be evaluated with 
codegen
    +    df1.cache()
    +    df2.cache()
    +    testArrayOfPrimitiveTypeNotContainsNull()
    +  }
    +
    +  test("zip_with function - arrays for primitive type containing null") {
    +    val df1 = Seq[(Seq[Integer], Seq[Integer])](
    +      (Seq(9001, null, 9003), Seq(4, 5, 6)),
    +      (Seq(1, null, 2, 4), Seq(3, 4)),
    +      (Seq.empty, Seq.empty),
    +      (null, null)
    +    ).toDF("val1", "val2")
    +
    +    def testArrayOfPrimitiveTypeContainsNull(): Unit = {
    +      val expectedValue1 = Seq(
    +        Row(Seq(9005, null, 9009)),
    +        Row(Seq(4, null, null, null)),
    +        Row(Seq.empty),
    +        Row(null))
    +      checkAnswer(df1.selectExpr("zip_with(val1, val2, (x, y) -> x + y)"), 
expectedValue1)
    +    }
    +
    +    // Test with local relation, the Project will be evaluated without 
codegen
    +    testArrayOfPrimitiveTypeContainsNull()
    +    // Test with cached relation, the Project will be evaluated with 
codegen
    +    df1.cache()
    +    testArrayOfPrimitiveTypeContainsNull()
    +  }
    +
    +
    --- End diff --
    
    Can you add a test for invalid cases?


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to