peter-toth commented on code in PR #55518:
URL: https://github.com/apache/spark/pull/55518#discussion_r3148764907


##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RewriteRowLevelCommand.scala:
##########
@@ -50,20 +51,35 @@ trait RewriteRowLevelCommand extends Rule[LogicalPlan] {
   protected def buildOperationTable(
       table: SupportsRowLevelOperations,
       command: Command,
-      options: CaseInsensitiveStringMap): RowLevelOperationTable = {
-    val info = RowLevelOperationInfoImpl(command, options)
+      options: CaseInsensitiveStringMap,
+      updatedColumns: Seq[NamedReference] = Nil): RowLevelOperationTable = {
+    val info = RowLevelOperationInfoImpl(command, options, updatedColumns)
     val operation = table.newRowLevelOperationBuilder(info).build()
     RowLevelOperationTable(table, operation)
   }
 
+  // Builds a DataSourceV2Relation for a row-level operation, optionally 
narrowing its output.
+  //
+  // When dataAttrs is non-empty, the relation output is narrowed to include 
only columns
+  // required for a column-update write. When dataAttrs is empty, the full 
relation.output is
+  // preserved.
   protected def buildRelationWithAttrs(
       relation: DataSourceV2Relation,
       table: RowLevelOperationTable,
       metadataAttrs: Seq[AttributeReference],
-      rowIdAttrs: Seq[AttributeReference] = Nil): DataSourceV2Relation = {
-
-    val attrs = dedupAttrs(relation.output ++ rowIdAttrs ++ metadataAttrs)
-    relation.copy(table = table, output = attrs)
+      rowIdAttrs: Seq[AttributeReference] = Nil,
+      dataAttrs: Seq[AttributeReference] = Nil,
+      cond: Expression = TrueLiteral): DataSourceV2Relation = {
+
+    if (dataAttrs.nonEmpty) {
+      val required =
+        AttributeSet(dataAttrs) ++ AttributeSet(Seq(cond)) ++ 
AttributeSet(rowIdAttrs)
+      val narrowOutput = relation.output.filter(required.contains)
+      relation.copy(table = table, output = dedupAttrs(narrowOutput ++ 
rowIdAttrs ++ metadataAttrs))

Review Comment:
   Can an attribute in `required` be missing from `relation.output`?
   `rowIdAttrs` seems to be added 2 times.
   If we already have a `dedupAttrs()` then probably doesn't make sense build 
`AttributeSet`s.
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to