juliuszsompolski commented on code in PR #51091:
URL: https://github.com/apache/spark/pull/51091#discussion_r2143353737
##########
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/MergeRowsExec.scala:
##########
@@ -233,6 +246,7 @@ case class MergeRowsExec(
}
}
+ longMetric("numTargetRowsCopied") += 1
Review Comment:
IIUC the code, this line should be deleted and the change
https://github.com/apache/spark/pull/51091/files#diff-a572ff40254b26b4a903f101ee466dd2dff9b8c7954a3b957fe5fc25b87ee10aR234-R236
is already handling all the cases.
I haven't run it but I think if I modify the test
```
test("Emit numTargetRowsCopied metrics") {
withTempView("source") {
createAndInitTable("pk INT NOT NULL, salary INT, dep STRING",
"""{ "pk": 1, "salary": 100, "dep": "hr" }
|{ "pk": 2, "salary": 200, "dep": "software" }
|{ "pk": 3, "salary": 300, "dep": "hr" }
|{ "pk": 4, "salary": 400, "dep": "marketing" }
|{ "pk": 5, "salary": 500, "dep": "executive" }
|""".stripMargin)
val sourceDF = Seq(1, 2, 6, 10).toDF("pk")
sourceDF.createOrReplaceTempView("source")
val mergeExec = findMergeExec {
s"""MERGE INTO $tableNameAsString t
|USING source s
|ON t.pk = s.pk
|WHEN MATCHED AND salary < 200 THEN
| UPDATE SET salary = 1000
|WHEN NOT MATCHED BY SOURCE AND salary > 400 THEN
| UPDATE SET salary = -1
|WHEN NOT MATCHED AND s.pk < 10 THEN
| INSERT (pk, salary, dep) VALUES (s.pk, -1, "dummy")
|""".stripMargin
}
mergeExec.metrics.get("numTargetRowsCopied") match {
case Some(metric) => assert(metric.value == 3, "3 rows copied
without updates")
case None => fail("numCopiedRows metric not found")
}
checkAnswer(
sql(s"SELECT * FROM $tableNameAsString"),
Seq(
Row(1, 1000, "hr"), // updated
Row(2, 200, "software"),
Row(3, 300, "hr"),
Row(4, 400, "marketing"),
Row(5, -1, "executive"), // updated
Row(6, -1, "dummy")) // inserted
}
}
```
will fail `assert(metric.value == 3, "3 rows copied without updates")`
because it will end up incrementing the metric on source row with pk=10.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]