Repository: spark Updated Branches: refs/heads/branch-2.3 8eb9a411d -> 1c3e8205d
Revert "[SPARK-23799][SQL] FilterEstimation.evaluateInSet produces devision by zero in a case of empty table with analyzed statistics" This reverts commit c2f4ee7baf07501cc1f8a23dd21d14aea53606c7. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1c3e8205 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1c3e8205 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1c3e8205 Branch: refs/heads/branch-2.3 Commit: 1c3e8205d04d8e40a9d73633631534a728d7b1fe Parents: 8eb9a41 Author: gatorsmile <gatorsm...@gmail.com> Authored: Mon Apr 23 13:57:57 2018 -0700 Committer: gatorsmile <gatorsm...@gmail.com> Committed: Mon Apr 23 13:57:57 2018 -0700 ---------------------------------------------------------------------- .../statsEstimation/FilterEstimation.scala | 4 --- .../statsEstimation/FilterEstimationSuite.scala | 11 -------- .../spark/sql/StatisticsCollectionSuite.scala | 28 -------------------- 3 files changed, 43 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/1c3e8205/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/FilterEstimation.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/FilterEstimation.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/FilterEstimation.scala index 5b8b698..4cc32de 100755 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/FilterEstimation.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/FilterEstimation.scala @@ -388,10 +388,6 @@ case class FilterEstimation(plan: Filter) extends Logging { val dataType = attr.dataType var newNdv = ndv - if (ndv.toDouble == 0 || colStat.min.isEmpty || colStat.max.isEmpty) { - return Some(0.0) - } - // use [min, max] to filter the original hSet dataType match { case _: NumericType | BooleanType | DateType | TimestampType => http://git-wip-us.apache.org/repos/asf/spark/blob/1c3e8205/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/FilterEstimationSuite.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/FilterEstimationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/FilterEstimationSuite.scala index e14b75e..2b1fe98 100755 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/FilterEstimationSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/FilterEstimationSuite.scala @@ -355,17 +355,6 @@ class FilterEstimationSuite extends StatsEstimationTestBase { expectedRowCount = 3) } - test("evaluateInSet with all zeros") { - validateEstimatedStats( - Filter(InSet(attrString, Set(3, 4, 5)), - StatsTestPlan(Seq(attrString), 0, - AttributeMap(Seq(attrString -> - ColumnStat(distinctCount = Some(0), min = None, max = None, - nullCount = Some(0), avgLen = Some(0), maxLen = Some(0)))))), - Seq(attrString -> ColumnStat(distinctCount = Some(0))), - expectedRowCount = 0) - } - test("cint NOT IN (3, 4, 5)") { validateEstimatedStats( Filter(Not(InSet(attrInt, Set(3, 4, 5))), childStatsTestPlan(Seq(attrInt), 10L)), http://git-wip-us.apache.org/repos/asf/spark/blob/1c3e8205/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala index 3f87774..b11e798 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala @@ -372,32 +372,4 @@ class StatisticsCollectionSuite extends StatisticsCollectionTestBase with Shared } } } - - test("Simple queries must be working, if CBO is turned on") { - withSQLConf(SQLConf.CBO_ENABLED.key -> "true") { - withTable("TBL1", "TBL") { - import org.apache.spark.sql.functions._ - val df = spark.range(1000L).select('id, - 'id * 2 as "FLD1", - 'id * 12 as "FLD2", - lit("aaa") + 'id as "fld3") - df.write - .mode(SaveMode.Overwrite) - .bucketBy(10, "id", "FLD1", "FLD2") - .sortBy("id", "FLD1", "FLD2") - .saveAsTable("TBL") - sql("ANALYZE TABLE TBL COMPUTE STATISTICS ") - sql("ANALYZE TABLE TBL COMPUTE STATISTICS FOR COLUMNS ID, FLD1, FLD2, FLD3") - val df2 = spark.sql( - """ - |SELECT t1.id, t1.fld1, t1.fld2, t1.fld3 - |FROM tbl t1 - |JOIN tbl t2 on t1.id=t2.id - |WHERE t1.fld3 IN (-123.23,321.23) - """.stripMargin) - df2.createTempView("TBL2") - sql("SELECT * FROM tbl2 WHERE fld3 IN ('qqq', 'qwe') ").queryExecution.executedPlan - } - } - } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org