Github user ueshin commented on a diff in the pull request: https://github.com/apache/spark/pull/22031#discussion_r208445494 --- Diff: sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala --- @@ -2117,6 +2117,65 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSQLContext { assert(ex4.getMessage.contains("data type mismatch: argument 3 requires int type")) } + test("zip_with function - arrays for primitive type not containing null") { + val df1 = Seq( + (Seq(9001, 9002, 9003), Seq(4, 5, 6)), + (Seq(1, 2), Seq(3, 4)), + (Seq.empty[Int], Seq.empty[Int]), + (null, null) + ).toDF("val1", "val2") + val df2 = Seq( + (Seq(1, 2, 3), Seq("a", "b", "c")), + (Seq(1, 2, 3), Seq("a", "b")) + ).toDF("val1", "val2") + + def testArrayOfPrimitiveTypeNotContainsNull(): Unit = { + val expectedValue1 = Seq( + Row(Seq(9005, 9007, 9009)), + Row(Seq(4, 6)), + Row(Seq.empty), + Row(null)) + checkAnswer(df1.selectExpr("zip_with(val1, val2, (x, y) -> x + y)"), expectedValue1) + + val expectedValue2 = Seq( + Row(Seq(Row("a", 1), Row("b", 2), Row("c", 3))), + Row(Seq(Row("a", 1), Row("b", 2), Row(null, 3)))) + checkAnswer(df2.selectExpr("zip_with(val1, val2, (x, y) -> (y, x))"), expectedValue2) + } + + // Test with local relation, the Project will be evaluated without codegen + testArrayOfPrimitiveTypeNotContainsNull() + // Test with cached relation, the Project will be evaluated with codegen + df1.cache() + df2.cache() + testArrayOfPrimitiveTypeNotContainsNull() + } + + test("zip_with function - arrays for primitive type containing null") { + val df1 = Seq[(Seq[Integer], Seq[Integer])]( + (Seq(9001, null, 9003), Seq(4, 5, 6)), + (Seq(1, null, 2, 4), Seq(3, 4)), + (Seq.empty, Seq.empty), + (null, null) + ).toDF("val1", "val2") + + def testArrayOfPrimitiveTypeContainsNull(): Unit = { + val expectedValue1 = Seq( + Row(Seq(9005, null, 9009)), + Row(Seq(4, null, null, null)), + Row(Seq.empty), + Row(null)) + checkAnswer(df1.selectExpr("zip_with(val1, val2, (x, y) -> x + y)"), expectedValue1) + } + + // Test with local relation, the Project will be evaluated without codegen + testArrayOfPrimitiveTypeContainsNull() + // Test with cached relation, the Project will be evaluated with codegen + df1.cache() + testArrayOfPrimitiveTypeContainsNull() + } + + --- End diff -- Also can you add tests to `HigherOrderFunctionsSuite` to check more explicit patterns?
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org