bkmgit commented on a change in pull request #11882:
URL: https://github.com/apache/arrow/pull/11882#discussion_r775463859



##########
File path: cpp/src/arrow/util/bit_block_counter.h
##########
@@ -424,6 +565,240 @@ class ARROW_EXPORT OptionalBinaryBitBlockCounter {
   }
 };
 
+/// \brief A class that computes popcounts on the result of bitwise operations
+/// between three bitmaps, 64 bits at a time. A 64-bit word is loaded from each
+/// bitmap, then the popcount is computed on e.g. the bitwise-and of the three
+/// words.
+class ARROW_EXPORT TernaryBitBlockCounter {
+ public:
+  TernaryBitBlockCounter(const uint8_t* left_bitmap, int64_t left_offset,
+                         const uint8_t* mid_bitmap, int64_t mid_offset,
+                         const uint8_t* right_bitmap, int64_t right_offset,
+                         int64_t length)
+      : left_bitmap_(util::MakeNonNull(left_bitmap) + left_offset / 8),
+        left_offset_(left_offset % 8),
+        mid_bitmap_(util::MakeNonNull(mid_bitmap) + mid_offset / 8),
+        mid_offset_(mid_offset % 8),
+        right_bitmap_(util::MakeNonNull(right_bitmap) + right_offset / 8),
+        right_offset_(right_offset % 8),
+        bits_remaining_(length) {}
+
+  /// \brief Return the popcount of the bitwise-and of the next run of
+  /// available bits, up to 64. The returned pair contains the size of run and
+  /// the number of true values. The last block will have a length less than 64
+  /// if the bitmap length is not a multiple of 64, and will return 0-length
+  /// blocks in subsequent invocations.
+  BitBlockCount NextAndAndWord() { return NextWord<detail::BitBlockAndAnd>(); }
+
+  /// \brief Computes "x & ~y & ~z" block for each available run of bits.
+  BitBlockCount NextAndNotAndNotWord() {
+    return NextWord<detail::BitBlockAndNotAndNot>();
+  }
+
+  /// \brief Computes "~x & y & ~z" block for each available run of bits.
+  BitBlockCount NextNotAndAndNotWord() {
+    return NextWord<detail::BitBlockNotAndAndNot>();
+  }
+
+  /// \brief Computes "~x & ~y & z" block for each available run of bits.
+  BitBlockCount NextNotAndNotAndWord() {
+    return NextWord<detail::BitBlockNotAndNotAnd>();
+  }
+
+  /// \brief Computes "~x & y & z" block for each available run of bits.
+  BitBlockCount NextNotAndAndWord() { return 
NextWord<detail::BitBlockNotAndAnd>(); }
+
+  /// \brief Computes "x & ~y & z" block for each available run of bits.
+  BitBlockCount NextAndNotAndWord() { return 
NextWord<detail::BitBlockAndNotAnd>(); }
+
+  /// \brief Computes "x & y & ~z" block for each available run of bits.
+  BitBlockCount NextAndAndNotWord() { return 
NextWord<detail::BitBlockNotAndNotAnd>(); }
+
+  /// \brief Computes "x | y | z" block for each available run of bits.
+  BitBlockCount NextOrOrWord() { return NextWord<detail::BitBlockOrOr>(); }
+
+  /// \brief Computes "x | ~y | z" block for each available run of bits.
+  BitBlockCount NextOrNotOrWord() { return 
NextWord<detail::BitBlockOrNotOr>(); }
+
+  /// \brief Computes "~x | y | z" block for each available run of bits.
+  BitBlockCount NextNotOrOrWord() { return 
NextWord<detail::BitBlockNotOrOr>(); }
+
+  /// \brief Computes "x | y | ~z" block for each available run of bits.
+  BitBlockCount NextOrOrNotWord() { return 
NextWord<detail::BitBlockOrOrNot>(); }
+
+  /// \brief Computes "~x | y | ~z" block for each available run of bits.
+  BitBlockCount NextNotOrOrNotWord() { return 
NextWord<detail::BitBlockNotOrOrNot>(); }
+
+  /// \brief Computes "x | ~y | ~z" block for each available run of bits.
+  BitBlockCount NextOrNotOrNotWord() { return 
NextWord<detail::BitBlockOrNotOrNot>(); }
+
+  /// \brief Computes "~x | ~y | z" block for each available run of bits.
+  BitBlockCount NextNotOrNotOrWord() { return 
NextWord<detail::BitBlockNotOrNotOr>(); }
+
+ private:
+  template <template <typename T> class Op>
+  BitBlockCount NextWord() {
+    using detail::LoadWord;
+    using detail::ShiftWord;
+
+    if (!bits_remaining_) {
+      return {0, 0};
+    }
+    // When the offset is > 0, we need there to be a word beyond the last 
aligned
+    // word in the bitmap for the bit shifting logic.
+    constexpr int64_t kWordBits = BitBlockCounter::kWordBits;
+    const int64_t bits_required_to_use_words =
+        std::max({left_offset_ == 0 ? 64 : 64 + (64 - left_offset_),
+                  mid_offset_ == 0 ? 64 : 64 + (64 - mid_offset_),
+                  right_offset_ == 0 ? 64 : 64 + (64 - right_offset_)});
+    if (bits_remaining_ < bits_required_to_use_words) {
+      const int16_t run_length =
+          static_cast<int16_t>(std::min(bits_remaining_, kWordBits));
+      int16_t popcount = 0;
+      for (int64_t i = 0; i < run_length; ++i) {
+        if (Op<bool>::Call(bit_util::GetBit(left_bitmap_, left_offset_ + i),
+                           bit_util::GetBit(mid_bitmap_, mid_offset_ + i),
+                           bit_util::GetBit(right_bitmap_, right_offset_ + 
i))) {
+          ++popcount;
+        }
+      }
+      // This code path should trigger _at most_ 2 times. In the "two times"
+      // case, the first time the run length will be a multiple of 8.
+      left_bitmap_ += run_length / 8;
+      mid_bitmap_ += run_length / 8;
+      right_bitmap_ += run_length / 8;
+      bits_remaining_ -= run_length;
+      return {run_length, popcount};
+    }
+
+    int64_t popcount = 0;
+    if (left_offset_ == 0 && right_offset_ == 0 && mid_offset_ == 0) {
+      popcount = bit_util::PopCount(Op<uint64_t>::Call(
+          LoadWord(left_bitmap_), LoadWord(mid_bitmap_), 
LoadWord(right_bitmap_)));
+    } else {
+      auto left_word =
+          ShiftWord(LoadWord(left_bitmap_), LoadWord(left_bitmap_ + 8), 
left_offset_);
+      auto mid_word =
+          ShiftWord(LoadWord(mid_bitmap_), LoadWord(mid_bitmap_ + 8), 
mid_offset_);
+      auto right_word =
+          ShiftWord(LoadWord(right_bitmap_), LoadWord(right_bitmap_ + 8), 
right_offset_);
+      popcount = bit_util::PopCount(Op<uint64_t>::Call(left_word, mid_word, 
right_word));
+    }
+    left_bitmap_ += kWordBits / 8;
+    mid_bitmap_ += kWordBits / 8;
+    right_bitmap_ += kWordBits / 8;
+    bits_remaining_ -= kWordBits;
+    return {64, static_cast<int16_t>(popcount)};
+  }
+
+  const uint8_t* left_bitmap_;
+  int64_t left_offset_;
+  const uint8_t* mid_bitmap_;
+  int64_t mid_offset_;
+  const uint8_t* right_bitmap_;
+  int64_t right_offset_;
+  int64_t bits_remaining_;
+};
+
+class ARROW_EXPORT OptionalTernaryBitBlockCounter {
+ public:
+  // Any bitmap may be NULLPTR
+  OptionalTernaryBitBlockCounter(const uint8_t* left_bitmap, int64_t 
left_offset,

Review comment:
       Removed since at present not used.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Reply via email to