[spark] branch branch-3.3 updated: [SPARK-38825][SQL][TEST][FOLLOWUP] Add test for in(null) and notIn(null)

huaxingao Mon, 18 Apr 2022 21:28:57 -0700

This is an automated email from the ASF dual-hosted git repository.

huaxingao pushed a commit to branch branch-3.3
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/branch-3.3 by this push:
     new dd6eca7550c [SPARK-38825][SQL][TEST][FOLLOWUP] Add test for in(null) 
and notIn(null)
dd6eca7550c is described below

commit dd6eca7550c25dbcad9f12caf9fccfcad981d33f
Author: huaxingao <huaxin_...@apple.com>
AuthorDate: Mon Apr 18 21:27:57 2022 -0700

    [SPARK-38825][SQL][TEST][FOLLOWUP] Add test for in(null) and notIn(null)
    
    ### What changes were proposed in this pull request?
    Add test for filter `in(null)` and `notIn(null)`
    
    ### Why are the changes needed?
    to make tests more complete
    
    ### Does this PR introduce _any_ user-facing change?
    No
    
    ### How was this patch tested?
    
    new test
    
    Closes #36248 from huaxingao/inNotIn.
    
    Authored-by: huaxingao <huaxin_...@apple.com>
    Signed-off-by: huaxingao <huaxin_...@apple.com>
    (cherry picked from commit b760e4a686939bdb837402286b8d3d8b445c5ed4)
    Signed-off-by: huaxingao <huaxin_...@apple.com>
---
 .../datasources/parquet/ParquetFilterSuite.scala   | 22 +++++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
index 71ea474409c..7a09011f27c 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
@@ -1905,21 +1905,33 @@ abstract class ParquetFilterSuite extends QueryTest 
with ParquetTest with Shared
   test("SPARK-38825: in and notIn filters") {
     import testImplicits._
     withTempPath { file =>
-      Seq(1, 2, 0, -1, 99, 1000, 3, 7, 
2).toDF("id").coalesce(1).write.mode("overwrite")
+      Seq(1, 2, 0, -1, 99, Integer.MAX_VALUE, 1000, 3, 7, Integer.MIN_VALUE, 2)
+        .toDF("id").coalesce(1).write.mode("overwrite")
         .parquet(file.getCanonicalPath)
       var df = spark.read.parquet(file.getCanonicalPath)
-      var in = df.filter(col("id").isin(100, 3, 11, 12, 13))
-      var notIn = df.filter(!col("id").isin(100, 3, 11, 12, 13))
-      checkAnswer(in, Seq(Row(3)))
+      var in = df.filter(col("id").isin(100, 3, 11, 12, 13, Integer.MAX_VALUE, 
Integer.MIN_VALUE))
+      var notIn =
+        df.filter(!col("id").isin(100, 3, 11, 12, 13, Integer.MAX_VALUE, 
Integer.MIN_VALUE))
+      checkAnswer(in, Seq(Row(3), Row(-2147483648), Row(2147483647)))
       checkAnswer(notIn, Seq(Row(1), Row(2), Row(0), Row(-1), Row(99), 
Row(1000), Row(7), Row(2)))
 
-      Seq("mary", "martin", "lucy", "alex", "mary", 
"dan").toDF("name").coalesce(1)
+      Seq("mary", "martin", "lucy", "alex", null, "mary", 
"dan").toDF("name").coalesce(1)
         .write.mode("overwrite").parquet(file.getCanonicalPath)
       df = spark.read.parquet(file.getCanonicalPath)
       in = df.filter(col("name").isin("mary", "victor", "leo", "alex"))
       notIn = df.filter(!col("name").isin("mary", "victor", "leo", "alex"))
       checkAnswer(in, Seq(Row("mary"), Row("alex"), Row("mary")))
       checkAnswer(notIn, Seq(Row("martin"), Row("lucy"), Row("dan")))
+
+      in = df.filter(col("name").isin("mary", "victor", "leo", "alex", null))
+      notIn = df.filter(!col("name").isin("mary", "victor", "leo", "alex", 
null))
+      checkAnswer(in, Seq(Row("mary"), Row("alex"), Row("mary")))
+      checkAnswer(notIn, Seq())
+
+      in = df.filter(col("name").isin(null))
+      notIn = df.filter(!col("name").isin(null))
+      checkAnswer(in, Seq())
+      checkAnswer(notIn, Seq())
     }
   }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch branch-3.3 updated: [SPARK-38825][SQL][TEST][FOLLOWUP] Add test for in(null) and notIn(null)

Reply via email to