Repository: spark Updated Branches: refs/heads/master 9e3bb3136 -> 9341c951e
[SPARK-23852][SQL] Add test that fails if PARQUET-1217 is not fixed ## What changes were proposed in this pull request? Add a new test that triggers if PARQUET-1217 - a predicate pushdown bug - is not fixed in Spark's Parquet dependency. ## How was this patch tested? New unit test passes. Author: Henry Robinson <he...@apache.org> Closes #21284 from henryr/spark-23852. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/9341c951 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/9341c951 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/9341c951 Branch: refs/heads/master Commit: 9341c951e85ff29714cbee302053872a6a4223da Parents: 9e3bb31 Author: Henry Robinson <he...@apache.org> Authored: Wed May 9 19:56:03 2018 -0700 Committer: gatorsmile <gatorsm...@gmail.com> Committed: Wed May 9 19:56:03 2018 -0700 ---------------------------------------------------------------------- .../test/resources/test-data/parquet-1217.parquet | Bin 0 -> 321 bytes .../datasources/parquet/ParquetFilterSuite.scala | 10 ++++++++++ 2 files changed, 10 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/9341c951/sql/core/src/test/resources/test-data/parquet-1217.parquet ---------------------------------------------------------------------- diff --git a/sql/core/src/test/resources/test-data/parquet-1217.parquet b/sql/core/src/test/resources/test-data/parquet-1217.parquet new file mode 100644 index 0000000..eb2dc4f Binary files /dev/null and b/sql/core/src/test/resources/test-data/parquet-1217.parquet differ http://git-wip-us.apache.org/repos/asf/spark/blob/9341c951/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala index 667e0b1..4d0ecde 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala @@ -648,6 +648,16 @@ class ParquetFilterSuite extends QueryTest with ParquetTest with SharedSQLContex } } } + + test("SPARK-23852: Broken Parquet push-down for partially-written stats") { + // parquet-1217.parquet contains a single column with values -1, 0, 1, 2 and null. + // The row-group statistics include null counts, but not min and max values, which + // triggers PARQUET-1217. + val df = readResourceParquetFile("test-data/parquet-1217.parquet") + + // Will return 0 rows if PARQUET-1217 is not fixed. + assert(df.where("col > 0").count() === 2) + } } class NumRowGroupsAcc extends AccumulatorV2[Integer, Integer] { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org