Repository: spark Updated Branches: refs/heads/master fdaa99897 -> 5c7f6b663
[SPARK-25629][TEST] Reduce ParquetFilterSuite: filter pushdown test time costs in Jenkins ## What changes were proposed in this pull request? Only test these 4 cases is enough: https://github.com/apache/spark/blob/be2238fb502b0f49a8a1baa6da9bc3e99540b40e/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetWriteSupport.scala#L269-L279 ## How was this patch tested? Manual tests on my local machine. before: ``` - filter pushdown - decimal (13 seconds, 683 milliseconds) ``` after: ``` - filter pushdown - decimal (9 seconds, 713 milliseconds) ``` Closes #22636 from wangyum/SPARK-25629. Authored-by: Yuming Wang <yumw...@ebay.com> Signed-off-by: hyukjinkwon <gurwls...@apache.org> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5c7f6b66 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5c7f6b66 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5c7f6b66 Branch: refs/heads/master Commit: 5c7f6b66368a956accfc34636c84ca3825f8d0b1 Parents: fdaa998 Author: Yuming Wang <yumw...@ebay.com> Authored: Tue Oct 16 12:30:02 2018 +0800 Committer: hyukjinkwon <gurwls...@apache.org> Committed: Tue Oct 16 12:30:02 2018 +0800 ---------------------------------------------------------------------- .../parquet/ParquetFilterSuite.scala | 67 ++++++++++---------- 1 file changed, 33 insertions(+), 34 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/5c7f6b66/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala index 01e41b3..9cfc943 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala @@ -524,41 +524,40 @@ class ParquetFilterSuite extends QueryTest with ParquetTest with SharedSQLContex } test("filter pushdown - decimal") { - Seq(true, false).foreach { legacyFormat => + Seq( + (false, Decimal.MAX_INT_DIGITS), // int32Writer + (false, Decimal.MAX_LONG_DIGITS), // int64Writer + (true, Decimal.MAX_LONG_DIGITS), // binaryWriterUsingUnscaledLong + (false, DecimalType.MAX_PRECISION) // binaryWriterUsingUnscaledBytes + ).foreach { case (legacyFormat, precision) => withSQLConf(SQLConf.PARQUET_WRITE_LEGACY_FORMAT.key -> legacyFormat.toString) { - Seq( - s"a decimal(${Decimal.MAX_INT_DIGITS}, 2)", // 32BitDecimalType - s"a decimal(${Decimal.MAX_LONG_DIGITS}, 2)", // 64BitDecimalType - "a decimal(38, 18)" // ByteArrayDecimalType - ).foreach { schemaDDL => - val schema = StructType.fromDDL(schemaDDL) - val rdd = - spark.sparkContext.parallelize((1 to 4).map(i => Row(new java.math.BigDecimal(i)))) - val dataFrame = spark.createDataFrame(rdd, schema) - testDecimalPushDown(dataFrame) { implicit df => - assert(df.schema === schema) - checkFilterPredicate('a.isNull, classOf[Eq[_]], Seq.empty[Row]) - checkFilterPredicate('a.isNotNull, classOf[NotEq[_]], (1 to 4).map(Row.apply(_))) - - checkFilterPredicate('a === 1, classOf[Eq[_]], 1) - checkFilterPredicate('a <=> 1, classOf[Eq[_]], 1) - checkFilterPredicate('a =!= 1, classOf[NotEq[_]], (2 to 4).map(Row.apply(_))) - - checkFilterPredicate('a < 2, classOf[Lt[_]], 1) - checkFilterPredicate('a > 3, classOf[Gt[_]], 4) - checkFilterPredicate('a <= 1, classOf[LtEq[_]], 1) - checkFilterPredicate('a >= 4, classOf[GtEq[_]], 4) - - checkFilterPredicate(Literal(1) === 'a, classOf[Eq[_]], 1) - checkFilterPredicate(Literal(1) <=> 'a, classOf[Eq[_]], 1) - checkFilterPredicate(Literal(2) > 'a, classOf[Lt[_]], 1) - checkFilterPredicate(Literal(3) < 'a, classOf[Gt[_]], 4) - checkFilterPredicate(Literal(1) >= 'a, classOf[LtEq[_]], 1) - checkFilterPredicate(Literal(4) <= 'a, classOf[GtEq[_]], 4) - - checkFilterPredicate(!('a < 4), classOf[GtEq[_]], 4) - checkFilterPredicate('a < 2 || 'a > 3, classOf[Operators.Or], Seq(Row(1), Row(4))) - } + val schema = StructType.fromDDL(s"a decimal($precision, 2)") + val rdd = + spark.sparkContext.parallelize((1 to 4).map(i => Row(new java.math.BigDecimal(i)))) + val dataFrame = spark.createDataFrame(rdd, schema) + testDecimalPushDown(dataFrame) { implicit df => + assert(df.schema === schema) + checkFilterPredicate('a.isNull, classOf[Eq[_]], Seq.empty[Row]) + checkFilterPredicate('a.isNotNull, classOf[NotEq[_]], (1 to 4).map(Row.apply(_))) + + checkFilterPredicate('a === 1, classOf[Eq[_]], 1) + checkFilterPredicate('a <=> 1, classOf[Eq[_]], 1) + checkFilterPredicate('a =!= 1, classOf[NotEq[_]], (2 to 4).map(Row.apply(_))) + + checkFilterPredicate('a < 2, classOf[Lt[_]], 1) + checkFilterPredicate('a > 3, classOf[Gt[_]], 4) + checkFilterPredicate('a <= 1, classOf[LtEq[_]], 1) + checkFilterPredicate('a >= 4, classOf[GtEq[_]], 4) + + checkFilterPredicate(Literal(1) === 'a, classOf[Eq[_]], 1) + checkFilterPredicate(Literal(1) <=> 'a, classOf[Eq[_]], 1) + checkFilterPredicate(Literal(2) > 'a, classOf[Lt[_]], 1) + checkFilterPredicate(Literal(3) < 'a, classOf[Gt[_]], 4) + checkFilterPredicate(Literal(1) >= 'a, classOf[LtEq[_]], 1) + checkFilterPredicate(Literal(4) <= 'a, classOf[GtEq[_]], 4) + + checkFilterPredicate(!('a < 4), classOf[GtEq[_]], 4) + checkFilterPredicate('a < 2 || 'a > 3, classOf[Operators.Or], Seq(Row(1), Row(4))) } } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org