AntoinePrv commented on code in PR #47294:
URL: https://github.com/apache/arrow/pull/47294#discussion_r2363120215


##########
cpp/src/arrow/util/rle_encoding_internal.h:
##########
@@ -299,385 +612,663 @@ class RleEncoder {
   uint8_t* literal_indicator_byte_;
 };
 
+/************************
+ *  RleBitPackedParser  *
+ ************************/
+
+template <typename Handler>
+void RleBitPackedParser::Parse(Handler&& handler) {
+  while (!exhausted()) {
+    auto [read, control] = PeekImpl(handler);
+    data_ += read;
+    data_size_ -= read;
+    if (ARROW_PREDICT_FALSE(control == ControlFlow::Break)) {
+      break;
+    }
+  }
+}
+
+namespace internal {
+/// The maximal unsigned size that a variable can fit.
+template <typename T>
+constexpr auto max_size_for_v =
+    static_cast<std::make_unsigned_t<T>>(std::numeric_limits<T>::max());
+
+}  // namespace internal
+
+template <typename Handler>
+auto RleBitPackedParser::PeekImpl(Handler&& handler) const
+    -> std::pair<rle_size_t, ControlFlow> {
+  ARROW_DCHECK(!exhausted());
+
+  constexpr auto kMaxSize = bit_util::kMaxLEB128ByteLenFor<uint32_t>;
+  uint32_t run_len_type = 0;
+  const auto header_bytes = bit_util::ParseLeadingLEB128(data_, kMaxSize, 
&run_len_type);
+
+  if (ARROW_PREDICT_FALSE(header_bytes == 0)) {
+    // Malfomrmed LEB128 data
+    return {};
+  }
+
+  const bool is_bit_packed = run_len_type & 1;
+  const uint32_t count = run_len_type >> 1;
+  if (is_bit_packed) {
+    constexpr auto kMaxCount = 
bit_util::CeilDiv(internal::max_size_for_v<rle_size_t>, 8);
+    if (ARROW_PREDICT_FALSE(count == 0 || count > kMaxCount)) {
+      // Illegal number of encoded values
+      return {0, ControlFlow::Break};
+    }
+
+    const auto values_count = static_cast<rle_size_t>(count * 8);
+    ARROW_DCHECK_LT(count, internal::max_size_for_v<rle_size_t>);

Review Comment:
   I changed for a check useful before the `static_cast`.
   ```
       ARROW_DCHECK_LT(static_cast<uint64_t>(count) * 8, 
internal::max_size_for_v<rle_size_t>);
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to