This is an automated email from the ASF dual-hosted git repository.
github-bot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new d65fb860d2 Remove unnecessary bit counting code from spark `bit_count`
(#18841)
d65fb860d2 is described below
commit d65fb860d2a61a78661438333800a05a515aa7b7
Author: Pepijn Van Eeckhoudt <[email protected]>
AuthorDate: Fri Nov 21 15:31:50 2025 +0100
Remove unnecessary bit counting code from spark `bit_count` (#18841)
## Which issue does this PR close?
- Followup to #18225 and PR #18322
## Rationale for this change
Spark's `bit_count` function always operators on 64-bit values, while
the original `bit_count` implementation in `datafusion_spark` operated
on the native size of the input value.
In order to fix this a custom bit counting implementation was ported
over from the Java Spark implementation. This isn't really necessary
though. Widening signed integers to `i64` and then using
`i64::count_ones` will get you the exact same result and is less
obscure.
## What changes are included in this PR?
Remove custom `bitcount` logic and use `i64::count_ones` instead.
## Are these changes tested?
Covered by existing tests that were added for #18225
## Are there any user-facing changes?
No
---
datafusion/spark/src/function/bitwise/bit_count.rs | 25 ++++++----------------
1 file changed, 6 insertions(+), 19 deletions(-)
diff --git a/datafusion/spark/src/function/bitwise/bit_count.rs
b/datafusion/spark/src/function/bitwise/bit_count.rs
index 4b414b57cb..1af5598a1d 100644
--- a/datafusion/spark/src/function/bitwise/bit_count.rs
+++ b/datafusion/spark/src/function/bitwise/bit_count.rs
@@ -102,24 +102,25 @@ fn spark_bit_count(value_array: &[ArrayRef]) ->
Result<ArrayRef> {
DataType::Int8 => {
let result: Int32Array = value_array
.as_primitive::<Int8Type>()
- .unary(|v| bit_count(v.into()));
+ .unary(|v| (v as i64).count_ones() as i32);
Ok(Arc::new(result))
}
DataType::Int16 => {
let result: Int32Array = value_array
.as_primitive::<Int16Type>()
- .unary(|v| bit_count(v.into()));
+ .unary(|v| (v as i64).count_ones() as i32);
Ok(Arc::new(result))
}
DataType::Int32 => {
let result: Int32Array = value_array
.as_primitive::<Int32Type>()
- .unary(|v| bit_count(v.into()));
+ .unary(|v| (v as i64).count_ones() as i32);
Ok(Arc::new(result))
}
DataType::Int64 => {
- let result: Int32Array =
- value_array.as_primitive::<Int64Type>().unary(bit_count);
+ let result: Int32Array = value_array
+ .as_primitive::<Int64Type>()
+ .unary(|v| v.count_ones() as i32);
Ok(Arc::new(result))
}
DataType::UInt8 => {
@@ -155,20 +156,6 @@ fn spark_bit_count(value_array: &[ArrayRef]) ->
Result<ArrayRef> {
}
}
-// Here’s the equivalent Rust implementation of the bitCount function (similar
to Apache Spark's bitCount for LongType)
-// Spark:
https://github.com/apache/spark/blob/ac717dd7aec665de578d7c6b0070e8fcdde3cea9/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/bitwiseExpressions.scala#L243
-// Java impl:
https://github.com/openjdk/jdk/blob/d226023643f90027a8980d161ec6d423887ae3ce/src/java.base/share/classes/java/lang/Long.java#L1584
-fn bit_count(i: i64) -> i32 {
- let mut u = i as u64;
- u = u - ((u >> 1) & 0x5555555555555555);
- u = (u & 0x3333333333333333) + ((u >> 2) & 0x3333333333333333);
- u = (u + (u >> 4)) & 0x0f0f0f0f0f0f0f0f;
- u = u + (u >> 8);
- u = u + (u >> 16);
- u = u + (u >> 32);
- (u as i32) & 0x7f
-}
-
#[cfg(test)]
mod tests {
use super::*;
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]