Github user lianhuiwang commented on a diff in the pull request:

    https://github.com/apache/spark/pull/18193#discussion_r139879632
  
    --- Diff: 
sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala ---
    @@ -140,6 +141,62 @@ class DetermineTableStats(session: SparkSession) 
extends Rule[LogicalPlan] {
     }
     
     /**
    + *
    + * TODO: merge this with PruneFileSourcePartitions after we completely 
make hive as a data source.
    + */
    +case class PruneHiveTablePartitions(
    +    session: SparkSession) extends Rule[LogicalPlan] with PredicateHelper {
    +  override def apply(plan: LogicalPlan): LogicalPlan = plan transformDown {
    +    case filter @ Filter(condition, relation: HiveTableRelation) if 
relation.isPartitioned =>
    +      val predicates = splitConjunctivePredicates(condition)
    +      val normalizedFilters = predicates.map { e =>
    +        e transform {
    +          case a: AttributeReference =>
    +            a.withName(relation.output.find(_.semanticEquals(a)).get.name)
    +        }
    +      }
    +      val partitionSet = AttributeSet(relation.partitionCols)
    +      val pruningPredicates = normalizedFilters.filter { predicate =>
    +        !predicate.references.isEmpty &&
    +          predicate.references.subsetOf(partitionSet)
    +      }
    +      if (pruningPredicates.nonEmpty && 
session.sessionState.conf.fallBackToHdfsForStatsEnabled &&
    +        session.sessionState.conf.metastorePartitionPruning) {
    +        val prunedPartitions = 
session.sharedState.externalCatalog.listPartitionsByFilter(
    +          relation.tableMeta.database,
    +          relation.tableMeta.identifier.table,
    +          pruningPredicates,
    +          session.sessionState.conf.sessionLocalTimeZone)
    +        val sizeInBytes = try {
    +          prunedPartitions.map { part =>
    +            val totalSize = 
part.parameters.get(StatsSetupConst.TOTAL_SIZE).map(_.toLong)
    +            val rawDataSize = 
part.parameters.get(StatsSetupConst.RAW_DATA_SIZE).map(_.toLong)
    +            if (totalSize.isDefined && totalSize.get > 0L) {
    --- End diff --
    
    @cenyuhai Yes,I think what you said is right.Thanks.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to