alexeykudinkin commented on code in PR #7528: URL: https://github.com/apache/hudi/pull/7528#discussion_r1083321386
########## hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/sql/HoodieCatalystExpressionUtils.scala: ########## @@ -78,14 +80,52 @@ object HoodieCatalystExpressionUtils { * NOTE: Projection of the row from [[StructType]] A to [[StructType]] B is only possible, if * B is a subset of A */ - def generateUnsafeProjection(from: StructType, to: StructType): UnsafeProjection = { - val attrs = from.toAttributes - val attrsMap = attrs.map(attr => (attr.name, attr)).toMap - val targetExprs = to.fields.map(f => attrsMap(f.name)) + def generateUnsafeProjection(sourceStructType: StructType, targetStructType: StructType): UnsafeProjection = { + val resolver = SQLConf.get.resolver + val attrs = sourceStructType.toAttributes + val targetExprs = targetStructType.fields.map { targetField => + val attrRef = attrs.find(attr => resolver(attr.name, targetField.name)) + .getOrElse(throw new AnalysisException(s"Wasn't able to match target field `${targetField.name}` to any of the source attributes ($attrs)")) + + genProjectingExpression(attrRef, targetField.dataType) + } GenerateUnsafeProjection.generate(targetExprs, attrs) } + private def genProjectingExpression(sourceExpr: Expression, + targetDataType: DataType): Expression = { + checkState(sourceExpr.resolved) + + // TODO support array, map + (sourceExpr.dataType, targetDataType) match { + case (sdt, tdt) if sdt == tdt => + sourceExpr + + case (sourceType: StructType, targetType: StructType) => + val fieldValueExprs = targetType.fields.map { tf => Review Comment: Realized that this is actually not the right approach and the problem is elsewhere: - Problem was that we're simply not reading projected records from the Parquet -- and the reason for that was that in case when non-whitelisted RecordPayload is used -- we will fallback to reading full record, but we still were allowing `NestedSchemaPruning` to be applied nevertheless -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org