Github user lianhuiwang commented on a diff in the pull request: https://github.com/apache/spark/pull/18193#discussion_r139879632 --- Diff: sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala --- @@ -140,6 +141,62 @@ class DetermineTableStats(session: SparkSession) extends Rule[LogicalPlan] { } /** + * + * TODO: merge this with PruneFileSourcePartitions after we completely make hive as a data source. + */ +case class PruneHiveTablePartitions( + session: SparkSession) extends Rule[LogicalPlan] with PredicateHelper { + override def apply(plan: LogicalPlan): LogicalPlan = plan transformDown { + case filter @ Filter(condition, relation: HiveTableRelation) if relation.isPartitioned => + val predicates = splitConjunctivePredicates(condition) + val normalizedFilters = predicates.map { e => + e transform { + case a: AttributeReference => + a.withName(relation.output.find(_.semanticEquals(a)).get.name) + } + } + val partitionSet = AttributeSet(relation.partitionCols) + val pruningPredicates = normalizedFilters.filter { predicate => + !predicate.references.isEmpty && + predicate.references.subsetOf(partitionSet) + } + if (pruningPredicates.nonEmpty && session.sessionState.conf.fallBackToHdfsForStatsEnabled && + session.sessionState.conf.metastorePartitionPruning) { + val prunedPartitions = session.sharedState.externalCatalog.listPartitionsByFilter( + relation.tableMeta.database, + relation.tableMeta.identifier.table, + pruningPredicates, + session.sessionState.conf.sessionLocalTimeZone) + val sizeInBytes = try { + prunedPartitions.map { part => + val totalSize = part.parameters.get(StatsSetupConst.TOTAL_SIZE).map(_.toLong) + val rawDataSize = part.parameters.get(StatsSetupConst.RAW_DATA_SIZE).map(_.toLong) + if (totalSize.isDefined && totalSize.get > 0L) { --- End diff -- @cenyuhai Yes,I think what you said is right.Thanks.
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org