spark git commit: [SPARK-21272] SortMergeJoin LeftAnti does not update numOutputRows
Repository: spark Updated Branches: refs/heads/branch-2.2 40fd0ce7f -> a05edf454 [SPARK-21272] SortMergeJoin LeftAnti does not update numOutputRows ## What changes were proposed in this pull request? Updating numOutputRows metric was missing from one return path of LeftAnti SortMergeJoin. ## How was this patch tested? Non-zero output rows manually seen in metrics. Author: Juliusz SompolskiCloses #18494 from juliuszsompolski/SPARK-21272. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a05edf45 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a05edf45 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a05edf45 Branch: refs/heads/branch-2.2 Commit: a05edf454a67261c89f0f2ecd1fe46bb8cebc257 Parents: 40fd0ce Author: Juliusz Sompolski Authored: Mon Jul 10 09:26:42 2017 -0700 Committer: gatorsmile Committed: Mon Jul 10 09:30:55 2017 -0700 -- .../spark/sql/execution/joins/SortMergeJoinExec.scala | 1 + .../spark/sql/execution/metric/SQLMetricsSuite.scala| 12 2 files changed, 13 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/a05edf45/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala index 26fb610..a772015 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala @@ -290,6 +290,7 @@ case class SortMergeJoinExec( currentLeftRow = smjScanner.getStreamedRow val currentRightMatches = smjScanner.getBufferedMatches if (currentRightMatches == null || currentRightMatches.length == 0) { + numOutputRows += 1 return true } var found = false http://git-wip-us.apache.org/repos/asf/spark/blob/a05edf45/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala -- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala index e5442455..79d1fbf 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala @@ -288,6 +288,18 @@ class SQLMetricsSuite extends SparkFunSuite with SharedSQLContext { } } + test("SortMergeJoin(left-anti) metrics") { +val anti = testData2.filter("a > 2") +withTempView("antiData") { + anti.createOrReplaceTempView("antiData") + val df = spark.sql( +"SELECT * FROM testData2 ANTI JOIN antiData ON testData2.a = antiData.a") + testSparkPlanMetrics(df, 1, Map( +0L -> ("SortMergeJoin", Map("number of output rows" -> 4L))) + ) +} + } + test("save metrics") { withTempPath { file => val previousExecutionIds = spark.sharedState.listener.executionIdToData.keySet - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-21272] SortMergeJoin LeftAnti does not update numOutputRows
Repository: spark Updated Branches: refs/heads/master 6a06c4b03 -> 18b3b00ec [SPARK-21272] SortMergeJoin LeftAnti does not update numOutputRows ## What changes were proposed in this pull request? Updating numOutputRows metric was missing from one return path of LeftAnti SortMergeJoin. ## How was this patch tested? Non-zero output rows manually seen in metrics. Author: Juliusz SompolskiCloses #18494 from juliuszsompolski/SPARK-21272. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/18b3b00e Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/18b3b00e Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/18b3b00e Branch: refs/heads/master Commit: 18b3b00ecfde6c694fb6fee4f4d07d04e3d08ccf Parents: 6a06c4b Author: Juliusz Sompolski Authored: Mon Jul 10 09:26:42 2017 -0700 Committer: gatorsmile Committed: Mon Jul 10 09:26:42 2017 -0700 -- .../spark/sql/execution/joins/SortMergeJoinExec.scala | 1 + .../spark/sql/execution/metric/SQLMetricsSuite.scala| 12 2 files changed, 13 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/18b3b00e/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala index 8445c26..639b8e0 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala @@ -290,6 +290,7 @@ case class SortMergeJoinExec( currentLeftRow = smjScanner.getStreamedRow val currentRightMatches = smjScanner.getBufferedMatches if (currentRightMatches == null || currentRightMatches.length == 0) { + numOutputRows += 1 return true } var found = false http://git-wip-us.apache.org/repos/asf/spark/blob/18b3b00e/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala -- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala index cb3405b..2911cbb 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala @@ -483,6 +483,18 @@ class SQLMetricsSuite extends SparkFunSuite with SharedSQLContext { } } + test("SortMergeJoin(left-anti) metrics") { +val anti = testData2.filter("a > 2") +withTempView("antiData") { + anti.createOrReplaceTempView("antiData") + val df = spark.sql( +"SELECT * FROM testData2 ANTI JOIN antiData ON testData2.a = antiData.a") + testSparkPlanMetrics(df, 1, Map( +0L -> ("SortMergeJoin", Map("number of output rows" -> 4L))) + ) +} + } + test("save metrics") { withTempPath { file => // person creates a temporary view. get the DF before listing previous execution IDs - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org