Github user wzhfy commented on a diff in the pull request: https://github.com/apache/spark/pull/20062#discussion_r159016484 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/FilterEstimation.scala --- @@ -225,17 +224,17 @@ case class FilterEstimation(plan: Filter) extends Logging { def evaluateNullCheck( attr: Attribute, isNull: Boolean, - update: Boolean): Option[BigDecimal] = { + update: Boolean): Option[Double] = { if (!colStatsMap.contains(attr)) { logDebug("[CBO] No statistics for " + attr) return None } val colStat = colStatsMap(attr) val rowCountValue = childStats.rowCount.get - val nullPercent: BigDecimal = if (rowCountValue == 0) { + val nullPercent: Double = if (rowCountValue == 0) { 0 } else { - BigDecimal(colStat.nullCount) / BigDecimal(rowCountValue) + (BigDecimal(colStat.nullCount) / BigDecimal(rowCountValue)).toDouble --- End diff -- Theoretically, the value range of BigInt is larger than double, so it's better to convert to BigDecimal. But after the division, the result is bettwen [0,1], so it's safe to convert to Double.
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org