Github user rednaxelafx commented on a diff in the pull request:

    https://github.com/apache/spark/pull/20224#discussion_r163799937
  
    --- Diff: 
sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala
 ---
    @@ -228,4 +229,38 @@ class WholeStageCodegenSuite extends QueryTest with 
SharedSQLContext {
           }
         }
       }
    +
    +  test("codegen stage IDs should be preserved in transformations after 
CollapseCodegenStages") {
    +    // test case adapted from DataFrameSuite to trigger ReuseExchange
    +    withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "2") {
    +      val df = spark.range(100)
    +      val join = df.join(df, "id")
    +      val plan = join.queryExecution.executedPlan
    +      assert(!plan.find(p =>
    +        p.isInstanceOf[WholeStageCodegenExec] &&
    +          p.asInstanceOf[WholeStageCodegenExec].codegenStageId == 
0).isDefined,
    +        "codegen stage IDs should be preserved through ReuseExchange")
    +      checkAnswer(join, df.toDF)
    +    }
    +  }
    +
    +  test("including codegen stage ID in generated class name should not 
regress codegen caching") {
    +    import testImplicits._
    +
    +    withSQLConf(SQLConf.WHOLESTAGE_CODEGEN_USE_ID_IN_CLASS_NAME.key -> 
"true") {
    +      val bytecodeSizeHisto = 
CodegenMetrics.METRIC_GENERATED_METHOD_BYTECODE_SIZE
    +
    +      // the same query run twice should hit the codegen cache
    +      spark.range(3).select('id + 2).collect
    +      val after1 = bytecodeSizeHisto.getCount
    +      spark.range(3).select('id + 2).collect
    +      val after2 = bytecodeSizeHisto.getCount // same query shape as 
above, deliberately
    +      assert(after1 == after2, "Should hit codegen cache. No new 
compilation to bytecode expected")
    +
    +      // a different query can result in codegen cache miss, that's by 
design
    +      spark.range(5).select('id * 2).collect
    +      val after3 = bytecodeSizeHisto.getCount
    +      assert(after3 >= after2, "always")
    --- End diff --
    
    I like that. Updating now.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to