linliu-code commented on code in PR #18126:
URL: https://github.com/apache/hudi/pull/18126#discussion_r3011871124
##########
hudi-spark-datasource/hudi-spark3-common/src/main/scala/org/apache/spark/sql/hudi/analysis/Spark3HoodiePruneFileSourcePartitions.scala:
##########
@@ -105,11 +105,31 @@ private object Spark3HoodiePruneFileSourcePartitions
extends PredicateHelper {
Project(projects, withFilter)
}
+ /**
+ * Returns the logical name of an attribute by stripping Spark's internal
exprId suffix (e.g. #136).
+ * Filter expressions may reference columns with these suffixed names (e.g.
nested_record#136.level),
+ * while partition schema uses logical names (e.g. nested_record.level).
+ */
+ private def logicalAttributeName(attr: AttributeReference): String = {
+ attr.name.replaceAll("#\\d+$", "")
+ }
+
+ /**
+ * Returns true if the given attribute references a partition column. An
attribute references a
+ * partition column if its logical name (without #exprId) equals a partition
column name or
+ * is the struct parent of a nested partition path (e.g. nested_record for
nested_record.level).
+ */
+ private def isPartitionColumnReference(attr: AttributeReference,
partitionSchema: StructType): Boolean = {
+ val logicalName = logicalAttributeName(attr)
+ partitionSchema.names.contains(logicalName) ||
+ partitionSchema.names.exists(_.startsWith(logicalName + "."))
+ }
+
def getPartitionFiltersAndDataFilters(partitionSchema: StructType,
Review Comment:
removed.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]