iemejia commented on code in PR #12390:
URL: https://github.com/apache/gluten/pull/12390#discussion_r3491499352
##########
gluten-delta/src/test/scala/org/apache/gluten/execution/DeltaSuite.scala:
##########
@@ -794,4 +796,67 @@ abstract class DeltaSuite extends
WholeStageTransformerSuite {
checkAnswer(df, Row(0, null) :: Row(101, Seq(Row("a", 1), null)) :: Nil)
}
}
+
+ test("post-transform rules are no-op on non-Delta plans") {
+ withTempPath {
+ p =>
+ val path = p.getCanonicalPath
+ spark.range(100).selectExpr("id", "id * 2 as
value").write.parquet(path)
+ val df = spark.read.parquet(path)
+ val plan = df.queryExecution.executedPlan
+
+ // Rules should return the plan unchanged (early-exit guard)
+ val transformed = DeltaPostTransformRules.rules.foldLeft(plan) {
+ (p, rule) => rule(p)
+ }
+ // No DeltaScanTransformer in the plan, so rules should be identity
+ assert(
+ !transformed.exists(_.isInstanceOf[DeltaScanTransformer]),
+ "Non-Delta plan should not contain DeltaScanTransformer")
+ assert(
+ !plan.exists(_.isInstanceOf[DeltaScanTransformer]),
+ "Original plan should not contain DeltaScanTransformer")
+ }
+ }
+
+ test("post-transform rules produce DeltaScanTransformer for Delta tables") {
Review Comment:
Fixed. Renamed the test to "Delta scan is offloaded to DeltaScanTransformer"
to accurately reflect what it verifies -- that the Gluten planning pipeline
produces a DeltaScanTransformer in the executed plan.
##########
gluten-delta/src/test/scala/org/apache/gluten/execution/DeltaSuite.scala:
##########
@@ -794,4 +796,67 @@ abstract class DeltaSuite extends
WholeStageTransformerSuite {
checkAnswer(df, Row(0, null) :: Row(101, Seq(Row("a", 1), null)) :: Nil)
}
}
+
+ test("post-transform rules are no-op on non-Delta plans") {
+ withTempPath {
+ p =>
+ val path = p.getCanonicalPath
+ spark.range(100).selectExpr("id", "id * 2 as
value").write.parquet(path)
+ val df = spark.read.parquet(path)
+ val plan = df.queryExecution.executedPlan
+
+ // Rules should return the plan unchanged (early-exit guard)
+ val transformed = DeltaPostTransformRules.rules.foldLeft(plan) {
+ (p, rule) => rule(p)
+ }
+ // No DeltaScanTransformer in the plan, so rules should be identity
+ assert(
+ !transformed.exists(_.isInstanceOf[DeltaScanTransformer]),
+ "Non-Delta plan should not contain DeltaScanTransformer")
+ assert(
+ !plan.exists(_.isInstanceOf[DeltaScanTransformer]),
+ "Original plan should not contain DeltaScanTransformer")
+ }
+ }
+
+ test("post-transform rules produce DeltaScanTransformer for Delta tables") {
+ withTempPath {
+ p =>
+ import testImplicits._
+ val path = p.getCanonicalPath
+ Seq(1, 2, 3, 4,
5).toDF("id").coalesce(1).write.format("delta").save(path)
+ val df = spark.read.format("delta").load(path)
+ val plan = df.queryExecution.executedPlan
+
+ // Delta scan should be offloaded to DeltaScanTransformer
+ val deltaScans = plan.collect { case s: DeltaScanTransformer => s }
+ assert(deltaScans.nonEmpty, "Delta plan should contain
DeltaScanTransformer")
+ }
+ }
+
+ test("scanFilters returns consistent results on repeated access") {
+ withTempPath {
+ p =>
+ import testImplicits._
+ val path = p.getCanonicalPath
+ Seq((1, "a"), (2, "b"), (3, "c")).toDF("id", "value")
+ .coalesce(1)
+ .write
+ .format("delta")
+ .save(path)
+ val df = spark.read.format("delta").load(path).where("id > 1")
+ val plan = df.queryExecution.executedPlan
+ val scans = plan.collect { case s: DeltaScanTransformer => s }
+
+ if (scans.nonEmpty) {
+ val scan = scans.head
+ // scanFilters is now a lazy val; repeated calls should return the
same instance
+ val first = scan.scanFilters
+ val second = scan.scanFilters
+ val third = scan.scanFilters
+ assert(first eq second, "scanFilters should return the same cached
instance")
+ assert(second eq third, "scanFilters should return the same cached
instance")
+ }
Review Comment:
Fixed. Replaced `if (scans.nonEmpty)` with `assert(scans.nonEmpty, ...)` so
the test fails loudly if DeltaScanTransformer offloading regresses.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]