spark git commit: [SPARK-25629][TEST] Reduce ParquetFilterSuite: filter pushdown test time costs in Jenkins

gurwls223 Mon, 15 Oct 2018 21:30:38 -0700

Repository: spark
Updated Branches:
  refs/heads/master fdaa99897 -> 5c7f6b663



[SPARK-25629][TEST] Reduce ParquetFilterSuite: filter pushdown test time costs 
in Jenkins

## What changes were proposed in this pull request?

Only test these 4 cases is enough:
https://github.com/apache/spark/blob/be2238fb502b0f49a8a1baa6da9bc3e99540b40e/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetWriteSupport.scala#L269-L279

## How was this patch tested?

Manual tests on my local machine.
before:
```
- filter pushdown - decimal (13 seconds, 683 milliseconds)
```
after:
```
- filter pushdown - decimal (9 seconds, 713 milliseconds)
```

Closes #22636 from wangyum/SPARK-25629.

Authored-by: Yuming Wang <yumw...@ebay.com>
Signed-off-by: hyukjinkwon <gurwls...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5c7f6b66
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5c7f6b66
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5c7f6b66

Branch: refs/heads/master
Commit: 5c7f6b66368a956accfc34636c84ca3825f8d0b1
Parents: fdaa998
Author: Yuming Wang <yumw...@ebay.com>
Authored: Tue Oct 16 12:30:02 2018 +0800
Committer: hyukjinkwon <gurwls...@apache.org>
Committed: Tue Oct 16 12:30:02 2018 +0800

----------------------------------------------------------------------
 .../parquet/ParquetFilterSuite.scala            | 67 ++++++++++----------
 1 file changed, 33 insertions(+), 34 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/5c7f6b66/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
index 01e41b3..9cfc943 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
@@ -524,41 +524,40 @@ class ParquetFilterSuite extends QueryTest with 
ParquetTest with SharedSQLContex
   }
 
   test("filter pushdown - decimal") {
-    Seq(true, false).foreach { legacyFormat =>
+    Seq(
+      (false, Decimal.MAX_INT_DIGITS), // int32Writer
+      (false, Decimal.MAX_LONG_DIGITS), // int64Writer
+      (true, Decimal.MAX_LONG_DIGITS), // binaryWriterUsingUnscaledLong
+      (false, DecimalType.MAX_PRECISION) // binaryWriterUsingUnscaledBytes
+    ).foreach { case (legacyFormat, precision) =>
       withSQLConf(SQLConf.PARQUET_WRITE_LEGACY_FORMAT.key -> 
legacyFormat.toString) {
-        Seq(
-          s"a decimal(${Decimal.MAX_INT_DIGITS}, 2)",  // 32BitDecimalType
-          s"a decimal(${Decimal.MAX_LONG_DIGITS}, 2)", // 64BitDecimalType
-          "a decimal(38, 18)"                          // ByteArrayDecimalType
-        ).foreach { schemaDDL =>
-          val schema = StructType.fromDDL(schemaDDL)
-          val rdd =
-            spark.sparkContext.parallelize((1 to 4).map(i => Row(new 
java.math.BigDecimal(i))))
-          val dataFrame = spark.createDataFrame(rdd, schema)
-          testDecimalPushDown(dataFrame) { implicit df =>
-            assert(df.schema === schema)
-            checkFilterPredicate('a.isNull, classOf[Eq[_]], Seq.empty[Row])
-            checkFilterPredicate('a.isNotNull, classOf[NotEq[_]], (1 to 
4).map(Row.apply(_)))
-
-            checkFilterPredicate('a === 1, classOf[Eq[_]], 1)
-            checkFilterPredicate('a <=> 1, classOf[Eq[_]], 1)
-            checkFilterPredicate('a =!= 1, classOf[NotEq[_]], (2 to 
4).map(Row.apply(_)))
-
-            checkFilterPredicate('a < 2, classOf[Lt[_]], 1)
-            checkFilterPredicate('a > 3, classOf[Gt[_]], 4)
-            checkFilterPredicate('a <= 1, classOf[LtEq[_]], 1)
-            checkFilterPredicate('a >= 4, classOf[GtEq[_]], 4)
-
-            checkFilterPredicate(Literal(1) === 'a, classOf[Eq[_]], 1)
-            checkFilterPredicate(Literal(1) <=> 'a, classOf[Eq[_]], 1)
-            checkFilterPredicate(Literal(2) > 'a, classOf[Lt[_]], 1)
-            checkFilterPredicate(Literal(3) < 'a, classOf[Gt[_]], 4)
-            checkFilterPredicate(Literal(1) >= 'a, classOf[LtEq[_]], 1)
-            checkFilterPredicate(Literal(4) <= 'a, classOf[GtEq[_]], 4)
-
-            checkFilterPredicate(!('a < 4), classOf[GtEq[_]], 4)
-            checkFilterPredicate('a < 2 || 'a > 3, classOf[Operators.Or], 
Seq(Row(1), Row(4)))
-          }
+        val schema = StructType.fromDDL(s"a decimal($precision, 2)")
+        val rdd =
+          spark.sparkContext.parallelize((1 to 4).map(i => Row(new 
java.math.BigDecimal(i))))
+        val dataFrame = spark.createDataFrame(rdd, schema)
+        testDecimalPushDown(dataFrame) { implicit df =>
+          assert(df.schema === schema)
+          checkFilterPredicate('a.isNull, classOf[Eq[_]], Seq.empty[Row])
+          checkFilterPredicate('a.isNotNull, classOf[NotEq[_]], (1 to 
4).map(Row.apply(_)))
+
+          checkFilterPredicate('a === 1, classOf[Eq[_]], 1)
+          checkFilterPredicate('a <=> 1, classOf[Eq[_]], 1)
+          checkFilterPredicate('a =!= 1, classOf[NotEq[_]], (2 to 
4).map(Row.apply(_)))
+
+          checkFilterPredicate('a < 2, classOf[Lt[_]], 1)
+          checkFilterPredicate('a > 3, classOf[Gt[_]], 4)
+          checkFilterPredicate('a <= 1, classOf[LtEq[_]], 1)
+          checkFilterPredicate('a >= 4, classOf[GtEq[_]], 4)
+
+          checkFilterPredicate(Literal(1) === 'a, classOf[Eq[_]], 1)
+          checkFilterPredicate(Literal(1) <=> 'a, classOf[Eq[_]], 1)
+          checkFilterPredicate(Literal(2) > 'a, classOf[Lt[_]], 1)
+          checkFilterPredicate(Literal(3) < 'a, classOf[Gt[_]], 4)
+          checkFilterPredicate(Literal(1) >= 'a, classOf[LtEq[_]], 1)
+          checkFilterPredicate(Literal(4) <= 'a, classOf[GtEq[_]], 4)
+
+          checkFilterPredicate(!('a < 4), classOf[GtEq[_]], 4)
+          checkFilterPredicate('a < 2 || 'a > 3, classOf[Operators.Or], 
Seq(Row(1), Row(4)))
         }
       }
     }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-25629][TEST] Reduce ParquetFilterSuite: filter pushdown test time costs in Jenkins

Reply via email to