This is an automated email from the ASF dual-hosted git repository. huaxingao pushed a commit to branch branch-3.3 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.3 by this push: new cf7e3574efc [SPARK-38825][SQL][TEST] Add a test to cover parquet notIn filter cf7e3574efc is described below commit cf7e3574efc1d4bb7233f18fcf344e94d26c2ac1 Author: huaxingao <huaxin_...@apple.com> AuthorDate: Thu Apr 7 16:08:45 2022 -0700 [SPARK-38825][SQL][TEST] Add a test to cover parquet notIn filter ### What changes were proposed in this pull request? Currently we don't have a test for parquet `notIn` filter, so add a test for this ### Why are the changes needed? to make tests more complete ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? new test Closes #36109 from huaxingao/inFilter. Authored-by: huaxingao <huaxin_...@apple.com> Signed-off-by: huaxingao <huaxin_...@apple.com> (cherry picked from commit d6fd0405b60875ac5e2c9daee1ec785f74e9b7a3) Signed-off-by: huaxingao <huaxin_...@apple.com> --- .../datasources/parquet/ParquetFilterSuite.scala | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala index 64a2ec6308c..71ea474409c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala @@ -1901,6 +1901,27 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared } } } + + test("SPARK-38825: in and notIn filters") { + import testImplicits._ + withTempPath { file => + Seq(1, 2, 0, -1, 99, 1000, 3, 7, 2).toDF("id").coalesce(1).write.mode("overwrite") + .parquet(file.getCanonicalPath) + var df = spark.read.parquet(file.getCanonicalPath) + var in = df.filter(col("id").isin(100, 3, 11, 12, 13)) + var notIn = df.filter(!col("id").isin(100, 3, 11, 12, 13)) + checkAnswer(in, Seq(Row(3))) + checkAnswer(notIn, Seq(Row(1), Row(2), Row(0), Row(-1), Row(99), Row(1000), Row(7), Row(2))) + + Seq("mary", "martin", "lucy", "alex", "mary", "dan").toDF("name").coalesce(1) + .write.mode("overwrite").parquet(file.getCanonicalPath) + df = spark.read.parquet(file.getCanonicalPath) + in = df.filter(col("name").isin("mary", "victor", "leo", "alex")) + notIn = df.filter(!col("name").isin("mary", "victor", "leo", "alex")) + checkAnswer(in, Seq(Row("mary"), Row("alex"), Row("mary"))) + checkAnswer(notIn, Seq(Row("martin"), Row("lucy"), Row("dan"))) + } + } } @ExtendedSQLTest --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org