alamb commented on code in PR #18449:
URL: https://github.com/apache/datafusion/pull/18449#discussion_r2542651928


##########
datafusion/physical-expr/src/expressions/in_list.rs:
##########
@@ -114,100 +103,161 @@ where
             _ => {}
         }
 
-        let v = v.as_any().downcast_ref::<T>().unwrap();
-        let in_array = &self.array;
-        let has_nulls = in_array.null_count() != 0;
+        let needle_nulls = v.logical_nulls();
+        let needle_nulls = needle_nulls.as_ref();
+        let haystack_has_nulls = self.in_array.null_count() != 0;
+
+        with_hashes([v], &self.state, |hashes| {
+            let cmp = make_comparator(v, &self.in_array, 
SortOptions::default())?;
+            Ok((0..v.len())
+                .map(|i| {
+                    // SQL three-valued logic: null IN (...) is always null
+                    if needle_nulls.is_some_and(|nulls| nulls.is_null(i)) {
+                        return None;
+                    }
 
-        Ok(ArrayIter::new(v)
-            .map(|v| {
-                v.and_then(|v| {
-                    let hash = v.hash_one(&self.hash_set.state);
+                    let hash = hashes[i];
                     let contains = self
-                        .hash_set
                         .map
                         .raw_entry()
-                        .from_hash(hash, |idx| 
in_array.value(*idx).is_equal(&v))
+                        .from_hash(hash, |idx| cmp(i, *idx).is_eq())
                         .is_some();
 
                     match contains {
                         true => Some(!negated),
-                        false if has_nulls => None,
+                        false if haystack_has_nulls => None,
                         false => Some(negated),
                     }
                 })
-            })
-            .collect())
+                .collect())
+        })
     }
+}
 
-    fn has_nulls(&self) -> bool {
-        self.array.null_count() != 0
+fn instantiate_static_filter(
+    in_array: ArrayRef,
+) -> Result<Arc<dyn StaticFilter + Send + Sync>> {
+    match in_array.data_type() {
+        DataType::Int32 => 
Ok(Arc::new(Int32StaticFilter::try_new(&in_array)?)),
+        _ => {
+            /* fall through to generic implementation */
+            Ok(Arc::new(ArrayStaticFilter::try_new(in_array)?))
+        }
     }
 }
 
-/// Computes an [`ArrayHashSet`] for the provided [`Array`] if there
-/// are nulls present or there are more than the configured number of
-/// elements.
-///
-/// Note: This is split into a separate function as higher-rank trait bounds 
currently
-/// cause type inference to misbehave
-fn make_hash_set<T>(array: &T) -> ArrayHashSet
-where
-    T: ArrayAccessor,
-    T::Item: IsEqual,
-{
-    let state = RandomState::new();
-    let mut map: HashMap<usize, (), ()> =
-        HashMap::with_capacity_and_hasher(array.len(), ());
-
-    let insert_value = |idx| {
-        let value = array.value(idx);
-        let hash = value.hash_one(&state);
-        if let RawEntryMut::Vacant(v) = map
-            .raw_entry_mut()
-            .from_hash(hash, |x| array.value(*x).is_equal(&value))
-        {
-            v.insert_with_hasher(hash, idx, (), |x| 
array.value(*x).hash_one(&state));
+impl ArrayStaticFilter {
+    /// Computes a [`StaticFilter`] for the provided [`Array`] if there
+    /// are nulls present or there are more than the configured number of
+    /// elements.
+    ///
+    /// Note: This is split into a separate function as higher-rank trait 
bounds currently
+    /// cause type inference to misbehave
+    fn try_new(in_array: ArrayRef) -> Result<ArrayStaticFilter> {
+        // Null type has no natural order - return empty hash set
+        if in_array.data_type() == &DataType::Null {
+            return Ok(ArrayStaticFilter {
+                in_array,
+                state: RandomState::new(),
+                map: HashMap::with_hasher(()),
+            });
         }
-    };
 
-    match array.nulls() {
-        Some(nulls) => {
-            BitIndexIterator::new(nulls.validity(), nulls.offset(), 
nulls.len())
-                .for_each(insert_value)
-        }
-        None => (0..array.len()).for_each(insert_value),
+        let state = RandomState::new();
+        let mut map: HashMap<usize, (), ()> = HashMap::with_hasher(());
+
+        with_hashes([&in_array], &state, |hashes| -> Result<()> {
+            let cmp = make_comparator(&in_array, &in_array, 
SortOptions::default())?;
+
+            let insert_value = |idx| {
+                let hash = hashes[idx];
+                if let RawEntryMut::Vacant(v) = map
+                    .raw_entry_mut()
+                    .from_hash(hash, |x| cmp(*x, idx).is_eq())
+                {
+                    v.insert_with_hasher(hash, idx, (), |x| hashes[*x]);
+                }
+            };
+
+            match in_array.nulls() {
+                Some(nulls) => {
+                    BitIndexIterator::new(nulls.validity(), nulls.offset(), 
nulls.len())
+                        .for_each(insert_value)
+                }
+                None => (0..in_array.len()).for_each(insert_value),
+            }
+
+            Ok(())
+        })?;
+
+        Ok(Self {
+            in_array,
+            state,
+            map,
+        })
     }
+}
 
-    ArrayHashSet { state, map }
+struct Int32StaticFilter {

Review Comment:
   Oh yeah, we should totally do the same thing here for the other types. I'll 
file a ticket to track that



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to