This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch branch-3.4
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.4 by this push:
     new 2974e625aae1 [SPARK-48128][SQL] For BitwiseCount / bit_count 
expression, fix codegen syntax error for boolean type inputs
2974e625aae1 is described below

commit 2974e625aae16e8711a1d115731fdfe516752899
Author: Josh Rosen <joshro...@databricks.com>
AuthorDate: Sat May 4 11:49:20 2024 -0700

    [SPARK-48128][SQL] For BitwiseCount / bit_count expression, fix codegen 
syntax error for boolean type inputs
    
    ### What changes were proposed in this pull request?
    
    This PR fixes an issue where `BitwiseCount` / `bit_count` of boolean inputs 
would cause codegen to generate syntactically invalid Java code that does not 
compile, triggering errors like
    
    ```
     java.util.concurrent.ExecutionException: 
org.codehaus.commons.compiler.CompileException: File 'generated.java', Line 41, 
Column 11: Failed to compile: org.codehaus.commons.compiler.CompileException: 
File 'generated.java', Line 41, Column 11: Unexpected token "if" in primary
    ```
    
    Even though this code has test cases in `bitwise.sql` via the query test 
framework, those existing test cases were insufficient to find this problem: I 
believe that is because the example queries were constant-folded using the 
interpreted path, leaving the codegen path without test coverage.
    
    This PR fixes the codegen issue and adds explicit expression tests to 
ensure that the same tests run on both the codegen and interpreted paths.
    
    ### Why are the changes needed?
    
    Fix a rare codegen to interpreted fallback issue, which may harm query 
performance.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No.
    
    ### How was this patch tested?
    
    Added new test cases to BitwiseExpressionsSuite.scala, copied from the 
existing `bitwise.sql` query test case file.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    No.
    
    Closes #46382 from JoshRosen/SPARK-48128-bit_count_codegen.
    
    Authored-by: Josh Rosen <joshro...@databricks.com>
    Signed-off-by: Dongjoon Hyun <dh...@apple.com>
    (cherry picked from commit 96f65c950064d330245dc53fcd50cf6d9753afc8)
    Signed-off-by: Dongjoon Hyun <dh...@apple.com>
---
 .../catalyst/expressions/bitwiseExpressions.scala  |  2 +-
 .../expressions/BitwiseExpressionsSuite.scala      | 41 ++++++++++++++++++++++
 2 files changed, 42 insertions(+), 1 deletion(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/bitwiseExpressions.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/bitwiseExpressions.scala
index 6061f625ef07..183e5d6697e9 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/bitwiseExpressions.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/bitwiseExpressions.scala
@@ -229,7 +229,7 @@ case class BitwiseCount(child: Expression)
   override def prettyName: String = "bit_count"
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = 
child.dataType match {
-    case BooleanType => defineCodeGen(ctx, ev, c => s"if ($c) 1 else 0")
+    case BooleanType => defineCodeGen(ctx, ev, c => s"($c) ? 1 : 0")
     case _ => defineCodeGen(ctx, ev, c => s"java.lang.Long.bitCount($c)")
   }
 
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/BitwiseExpressionsSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/BitwiseExpressionsSuite.scala
index 4cd5f3e861ac..5bd1bc346c02 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/BitwiseExpressionsSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/BitwiseExpressionsSuite.scala
@@ -133,6 +133,47 @@ class BitwiseExpressionsSuite extends SparkFunSuite with 
ExpressionEvalHelper {
     }
   }
 
+  test("BitCount") {
+    // null
+    val nullLongLiteral = Literal.create(null, LongType)
+    val nullIntLiteral = Literal.create(null, IntegerType)
+    val nullBooleanLiteral = Literal.create(null, BooleanType)
+    checkEvaluation(BitwiseCount(nullLongLiteral), null)
+    checkEvaluation(BitwiseCount(nullIntLiteral), null)
+    checkEvaluation(BitwiseCount(nullBooleanLiteral), null)
+
+    // boolean
+    checkEvaluation(BitwiseCount(Literal(true)), 1)
+    checkEvaluation(BitwiseCount(Literal(false)), 0)
+
+    // byte/tinyint
+    checkEvaluation(BitwiseCount(Literal(1.toByte)), 1)
+    checkEvaluation(BitwiseCount(Literal(2.toByte)), 1)
+    checkEvaluation(BitwiseCount(Literal(3.toByte)), 2)
+
+    // short/smallint
+    checkEvaluation(BitwiseCount(Literal(1.toShort)), 1)
+    checkEvaluation(BitwiseCount(Literal(2.toShort)), 1)
+    checkEvaluation(BitwiseCount(Literal(3.toShort)), 2)
+
+    // int
+    checkEvaluation(BitwiseCount(Literal(1)), 1)
+    checkEvaluation(BitwiseCount(Literal(2)), 1)
+    checkEvaluation(BitwiseCount(Literal(3)), 2)
+
+    // long/bigint
+    checkEvaluation(BitwiseCount(Literal(1L)), 1)
+    checkEvaluation(BitwiseCount(Literal(2L)), 1)
+    checkEvaluation(BitwiseCount(Literal(3L)), 2)
+
+    // negative num
+    checkEvaluation(BitwiseCount(Literal(-1L)), 64)
+
+    // edge value
+    checkEvaluation(BitwiseCount(Literal(9223372036854775807L)), 63)
+    checkEvaluation(BitwiseCount(Literal(-9223372036854775808L)), 1)
+  }
+
   test("BitGet") {
     val nullLongLiteral = Literal.create(null, LongType)
     val nullIntLiteral = Literal.create(null, IntegerType)


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to