HaoYang670 commented on code in PR #3324: URL: https://github.com/apache/arrow-datafusion/pull/3324#discussion_r962144673
########## datafusion/physical-expr/src/expressions/in_list.rs: ########## @@ -398,6 +357,26 @@ fn set_contains_utf8<OffsetSize: OffsetSizeTrait>( collection_contains_check!(array, native_set, negated, contains_null) } +fn set_contains_binary<OffsetSize: OffsetSizeTrait>( + array: &GenericBinaryArray<OffsetSize>, + set: &HashSet<ScalarValue>, + negated: bool, +) -> ColumnarValue { + let contains_null = set.iter().any(|v| v.is_null()); + let native_array = set + .iter() + .flat_map(|v| match v { + Binary(v) | LargeBinary(v) => v.as_deref(), + datatype => { + unreachable!("InList can't reach other data type {} for {}.", datatype, v) + } + }) + .collect::<Vec<_>>(); + let native_set: HashSet<&[u8]> = HashSet::from_iter(native_array); Review Comment: I will do this in a follow-up patch as we have this problem in both `binary` and `string` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org