Dandandan commented on code in PR #23011:
URL: https://github.com/apache/datafusion/pull/23011#discussion_r3453139809
##########
datafusion/physical-expr/src/expressions/in_list/primitive_filter.rs:
##########
@@ -15,16 +15,77 @@
// specific language governing permissions and limitations
// under the License.
-use arrow::array::{
- Array, ArrayRef, AsArray, BooleanArray, downcast_array,
downcast_dictionary_array,
-};
+//! Optimized primitive type filters for InList expressions.
+//!
+//! This module provides membership tests for Arrow primitive types.
+
+use arrow::array::{Array, ArrayRef, AsArray, BooleanArray};
use arrow::buffer::{BooleanBuffer, NullBuffer};
-use arrow::compute::take;
use arrow::datatypes::*;
use datafusion_common::{HashSet, Result, exec_datafusion_err};
use std::hash::{Hash, Hasher};
-use super::static_filter::StaticFilter;
+use super::result::build_in_list_result;
+use super::static_filter::{StaticFilter, handle_dictionary};
+
+/// Bitmap filter for O(1) set membership via single bit test.
+///
+/// `UInt8` has only 256 possible values, so the filter stores membership in a
+/// 256-bit bitmap instead of using a hash table.
+pub(super) struct UInt8BitmapFilter {
+ null_count: usize,
+ bits: [u64; 4],
+}
+
+impl UInt8BitmapFilter {
+ pub(super) fn try_new(in_array: &ArrayRef) -> Result<Self> {
+ let prim_array =
in_array.as_primitive_opt::<UInt8Type>().ok_or_else(|| {
+ exec_datafusion_err!("UInt8BitmapFilter: expected UInt8 array")
+ })?;
+ let mut bits = [0u64; 4];
+ for v in prim_array.iter().flatten() {
Review Comment:
* `prim_array.iter().flatten()` generally is slower than iterating over
`.values()` (when no nulls). (Also for nulls I think it is probably faster to
include the nulls into the `bits[index / 64] |=` calculation.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]