This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new d65fb860d2 Remove unnecessary bit counting code from spark `bit_count` 
(#18841)
d65fb860d2 is described below

commit d65fb860d2a61a78661438333800a05a515aa7b7
Author: Pepijn Van Eeckhoudt <[email protected]>
AuthorDate: Fri Nov 21 15:31:50 2025 +0100

    Remove unnecessary bit counting code from spark `bit_count` (#18841)
    
    ## Which issue does this PR close?
    
    - Followup to #18225 and PR #18322
    
    ## Rationale for this change
    
    Spark's `bit_count` function always operators on 64-bit values, while
    the original `bit_count` implementation in `datafusion_spark` operated
    on the native size of the input value.
    In order to fix this a custom bit counting implementation was ported
    over from the Java Spark implementation. This isn't really necessary
    though. Widening signed integers to `i64` and then using
    `i64::count_ones` will get you the exact same result and is less
    obscure.
    
    ## What changes are included in this PR?
    
    Remove custom `bitcount` logic and use `i64::count_ones` instead.
    
    ## Are these changes tested?
    
    Covered by existing tests that were added for #18225
    
    ## Are there any user-facing changes?
    
    No
---
 datafusion/spark/src/function/bitwise/bit_count.rs | 25 ++++++----------------
 1 file changed, 6 insertions(+), 19 deletions(-)

diff --git a/datafusion/spark/src/function/bitwise/bit_count.rs 
b/datafusion/spark/src/function/bitwise/bit_count.rs
index 4b414b57cb..1af5598a1d 100644
--- a/datafusion/spark/src/function/bitwise/bit_count.rs
+++ b/datafusion/spark/src/function/bitwise/bit_count.rs
@@ -102,24 +102,25 @@ fn spark_bit_count(value_array: &[ArrayRef]) -> 
Result<ArrayRef> {
         DataType::Int8 => {
             let result: Int32Array = value_array
                 .as_primitive::<Int8Type>()
-                .unary(|v| bit_count(v.into()));
+                .unary(|v| (v as i64).count_ones() as i32);
             Ok(Arc::new(result))
         }
         DataType::Int16 => {
             let result: Int32Array = value_array
                 .as_primitive::<Int16Type>()
-                .unary(|v| bit_count(v.into()));
+                .unary(|v| (v as i64).count_ones() as i32);
             Ok(Arc::new(result))
         }
         DataType::Int32 => {
             let result: Int32Array = value_array
                 .as_primitive::<Int32Type>()
-                .unary(|v| bit_count(v.into()));
+                .unary(|v| (v as i64).count_ones() as i32);
             Ok(Arc::new(result))
         }
         DataType::Int64 => {
-            let result: Int32Array =
-                value_array.as_primitive::<Int64Type>().unary(bit_count);
+            let result: Int32Array = value_array
+                .as_primitive::<Int64Type>()
+                .unary(|v| v.count_ones() as i32);
             Ok(Arc::new(result))
         }
         DataType::UInt8 => {
@@ -155,20 +156,6 @@ fn spark_bit_count(value_array: &[ArrayRef]) -> 
Result<ArrayRef> {
     }
 }
 
-// Here’s the equivalent Rust implementation of the bitCount function (similar 
to Apache Spark's bitCount for LongType)
-// Spark: 
https://github.com/apache/spark/blob/ac717dd7aec665de578d7c6b0070e8fcdde3cea9/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/bitwiseExpressions.scala#L243
-// Java impl: 
https://github.com/openjdk/jdk/blob/d226023643f90027a8980d161ec6d423887ae3ce/src/java.base/share/classes/java/lang/Long.java#L1584
-fn bit_count(i: i64) -> i32 {
-    let mut u = i as u64;
-    u = u - ((u >> 1) & 0x5555555555555555);
-    u = (u & 0x3333333333333333) + ((u >> 2) & 0x3333333333333333);
-    u = (u + (u >> 4)) & 0x0f0f0f0f0f0f0f0f;
-    u = u + (u >> 8);
-    u = u + (u >> 16);
-    u = u + (u >> 32);
-    (u as i32) & 0x7f
-}
-
 #[cfg(test)]
 mod tests {
     use super::*;


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to