alamb commented on code in PR #7962: URL: https://github.com/apache/arrow-rs/pull/7962#discussion_r2225180471
########## arrow-buffer/src/util/bit_iterator.rs: ########## @@ -231,6 +231,63 @@ impl Iterator for BitIndexIterator<'_> { } } +/// An iterator of u32 whose index in a provided bitmask is true +/// Respects arbitrary offsets and slice lead/trail padding exactly like BitIndexIterator +#[derive(Debug)] +pub struct BitIndexU32Iterator<'a> { + curr: u64, + chunk_offset: i64, + iter: UnalignedBitChunkIterator<'a>, +} + +impl<'a> BitIndexU32Iterator<'a> { + /// Create a new [BitIndexU32Iterator] from the provided buffer, + /// offset and len in bits. + pub fn new(buffer: &'a [u8], offset: usize, len: usize) -> Self { + // Build the aligned chunks (including prefix/suffix masked) + let chunks = UnalignedBitChunk::new(buffer, offset, len); + let mut iter = chunks.iter(); + + // First 64-bit word (masked for lead padding), or 0 if empty + let curr = iter.next().unwrap_or(0); + // Negative lead padding ensures the first bit in curr maps to index 0 + let chunk_offset = -(chunks.lead_padding() as i64); + + Self { + curr, + chunk_offset, + iter, + } + } +} + +impl<'a> Iterator for BitIndexU32Iterator<'a> { Review Comment: I will do a test to compare the performance too Update: made https://github.com/apache/arrow-rs/pull/7979 and I queued up benchmark runs -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org