This is an automated email from the ASF dual-hosted git repository. huaxingao pushed a commit to branch branch-3.3 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.3 by this push: new dd6eca7550c [SPARK-38825][SQL][TEST][FOLLOWUP] Add test for in(null) and notIn(null) dd6eca7550c is described below commit dd6eca7550c25dbcad9f12caf9fccfcad981d33f Author: huaxingao <huaxin_...@apple.com> AuthorDate: Mon Apr 18 21:27:57 2022 -0700 [SPARK-38825][SQL][TEST][FOLLOWUP] Add test for in(null) and notIn(null) ### What changes were proposed in this pull request? Add test for filter `in(null)` and `notIn(null)` ### Why are the changes needed? to make tests more complete ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? new test Closes #36248 from huaxingao/inNotIn. Authored-by: huaxingao <huaxin_...@apple.com> Signed-off-by: huaxingao <huaxin_...@apple.com> (cherry picked from commit b760e4a686939bdb837402286b8d3d8b445c5ed4) Signed-off-by: huaxingao <huaxin_...@apple.com> --- .../datasources/parquet/ParquetFilterSuite.scala | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala index 71ea474409c..7a09011f27c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala @@ -1905,21 +1905,33 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared test("SPARK-38825: in and notIn filters") { import testImplicits._ withTempPath { file => - Seq(1, 2, 0, -1, 99, 1000, 3, 7, 2).toDF("id").coalesce(1).write.mode("overwrite") + Seq(1, 2, 0, -1, 99, Integer.MAX_VALUE, 1000, 3, 7, Integer.MIN_VALUE, 2) + .toDF("id").coalesce(1).write.mode("overwrite") .parquet(file.getCanonicalPath) var df = spark.read.parquet(file.getCanonicalPath) - var in = df.filter(col("id").isin(100, 3, 11, 12, 13)) - var notIn = df.filter(!col("id").isin(100, 3, 11, 12, 13)) - checkAnswer(in, Seq(Row(3))) + var in = df.filter(col("id").isin(100, 3, 11, 12, 13, Integer.MAX_VALUE, Integer.MIN_VALUE)) + var notIn = + df.filter(!col("id").isin(100, 3, 11, 12, 13, Integer.MAX_VALUE, Integer.MIN_VALUE)) + checkAnswer(in, Seq(Row(3), Row(-2147483648), Row(2147483647))) checkAnswer(notIn, Seq(Row(1), Row(2), Row(0), Row(-1), Row(99), Row(1000), Row(7), Row(2))) - Seq("mary", "martin", "lucy", "alex", "mary", "dan").toDF("name").coalesce(1) + Seq("mary", "martin", "lucy", "alex", null, "mary", "dan").toDF("name").coalesce(1) .write.mode("overwrite").parquet(file.getCanonicalPath) df = spark.read.parquet(file.getCanonicalPath) in = df.filter(col("name").isin("mary", "victor", "leo", "alex")) notIn = df.filter(!col("name").isin("mary", "victor", "leo", "alex")) checkAnswer(in, Seq(Row("mary"), Row("alex"), Row("mary"))) checkAnswer(notIn, Seq(Row("martin"), Row("lucy"), Row("dan"))) + + in = df.filter(col("name").isin("mary", "victor", "leo", "alex", null)) + notIn = df.filter(!col("name").isin("mary", "victor", "leo", "alex", null)) + checkAnswer(in, Seq(Row("mary"), Row("alex"), Row("mary"))) + checkAnswer(notIn, Seq()) + + in = df.filter(col("name").isin(null)) + notIn = df.filter(!col("name").isin(null)) + checkAnswer(in, Seq()) + checkAnswer(notIn, Seq()) } } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org