maropu commented on a change in pull request #29804: URL: https://github.com/apache/spark/pull/29804#discussion_r493991276
########## File path: sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala ########## @@ -1012,4 +1014,43 @@ abstract class BucketedReadSuite extends QueryTest with SQLTestUtils { } } } + + test("SPARK-32859: disable unnecessary bucketed table scan based on query plan") { + withTable("t1", "t2") { + df1.write.format("parquet").bucketBy(8, "i").saveAsTable("t1") + df2.write.format("parquet").bucketBy(4, "i").saveAsTable("t2") + + def checkNumBucketedScan(query: String, expectedNumBucketedScan: Int): Unit = { + val plan = sql(query).queryExecution.executedPlan + val bucketedScan = plan.collect { case s: FileSourceScanExec if s.bucketedScan => s } + assert(bucketedScan.length == expectedNumBucketedScan) + } + + Seq( + ("SELECT * FROM t1 JOIN t2 ON t1.i = t2.i", 1, 2), + ("SELECT * FROM t1 JOIN t2 ON t1.i = t2.j", 1, 2), + ("SELECT * FROM t1 JOIN t2 ON t1.j = t2.j", 0, 2), + ("SELECT SUM(i) FROM t1 GROUP BY i", 1, 1), + ("SELECT SUM(i) FROM t1 GROUP BY j", 0, 1), + ("SELECT * FROM t1 WHERE i = 1", 1, 1), + ("SELECT * FROM t1 WHERE j = 1", 0, 1), Review comment: I left two comments about the test; - Could you add more test cases, e.g., multiple join cases, multiple bucket column cases, ...? - Could you split this single test unit into multiple ones having meaningful test titles?, e.g., `test("SPARK-32859: disable unnecessary bucketed table scan based on query plan - multiple join test")` ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org