viirya commented on a change in pull request #33494: URL: https://github.com/apache/spark/pull/33494#discussion_r678891410
########## File path: sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala ########## @@ -396,27 +396,25 @@ class SQLMetricsSuite extends SharedSparkSession with SQLMetricsTestUtils } } - // TODO (SPARK-36272): Reenable this after we figure out why the expected size doesn't - // match after we adjust building's memory settings. - ignore("SPARK-32629: ShuffledHashJoin(full outer) metrics") { + test("SPARK-32629: ShuffledHashJoin(full outer) metrics") { val uniqueLeftDf = Seq(("1", "1"), ("11", "11")).toDF("key", "value") val nonUniqueLeftDf = Seq(("1", "1"), ("1", "2"), ("11", "11")).toDF("key", "value") val rightDf = (1 to 10).map(i => (i.toString, i.toString)).toDF("key2", "value") Seq( // Test unique key on build side - (uniqueLeftDf, rightDf, 11, 134228048, 10, 134221824), + (uniqueLeftDf, rightDf, 11, 10), // Test non-unique key on build side - (nonUniqueLeftDf, rightDf, 12, 134228552, 11, 134221824) - ).foreach { case (leftDf, rightDf, fojRows, fojBuildSize, rojRows, rojBuildSize) => + (nonUniqueLeftDf, rightDf, 12, 11) + ).foreach { case (leftDf, rightDf, fojRows, rojRows) => val fojDf = leftDf.hint("shuffle_hash").join( rightDf, $"key" === $"key2", "full_outer") fojDf.collect() val fojPlan = fojDf.queryExecution.executedPlan.collectFirst { case s: ShuffledHashJoinExec => s } assert(fojPlan.isDefined, "The query plan should have shuffled hash join") - testMetricsInSparkPlanOperator(fojPlan.get, - Map("numOutputRows" -> fojRows, "buildDataSize" -> fojBuildSize)) + testMetricsInSparkPlanOperator(fojPlan.get, Map("numOutputRows" -> fojRows)) + val fojBuildSize = fojPlan.get.metrics("buildDataSize").value // Test right outer join as well to verify build data size to be different // from full outer join. This makes sure we take extra BitSet/OpenHashSet Review comment: That's fine. The current change looks good. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org