Repository: arrow Updated Branches: refs/heads/master 5da6b8795 -> f9d1e1be7
ARROW-1611: [C++] Add BitmapWriter, do not perform out of bounds read in BitmapReader when length is 0 close #1133 close #1131 Author: Rene Sugar <rene.su...@gmail.com> Closes #1137 from wesm/ARROW-1611 and squashes the following commits: 5bff0bd [Rene Sugar] ARROW-1611 Do not read bitmap when length is zero, add internal::BitmapWriter Project: http://git-wip-us.apache.org/repos/asf/arrow/repo Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/f9d1e1be Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/f9d1e1be Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/f9d1e1be Branch: refs/heads/master Commit: f9d1e1be756636cb8a280ac3723ed3dea2abe204 Parents: 5da6b87 Author: Rene Sugar <rene.su...@gmail.com> Authored: Tue Sep 26 21:56:33 2017 -0400 Committer: Wes McKinney <wes.mckin...@twosigma.com> Committed: Tue Sep 26 21:56:33 2017 -0400 ---------------------------------------------------------------------- cpp/src/arrow/util/bit-util-test.cc | 36 ++++++++ cpp/src/arrow/util/bit-util.h | 139 +++++++++++++++++++++---------- 2 files changed, 132 insertions(+), 43 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/arrow/blob/f9d1e1be/cpp/src/arrow/util/bit-util-test.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/util/bit-util-test.cc b/cpp/src/arrow/util/bit-util-test.cc index a5c6cec..5a66d7e 100644 --- a/cpp/src/arrow/util/bit-util-test.cc +++ b/cpp/src/arrow/util/bit-util-test.cc @@ -91,6 +91,42 @@ TEST(BitmapReader, DoesNotReadOutOfBounds) { ASSERT_TRUE(r2.IsNotSet()); r2.Next(); } + + // Does not access invalid memory + internal::BitmapReader r3(nullptr, 0, 0); +} + +TEST(BitmapWriter, DoesNotWriteOutOfBounds) { + uint8_t bitmap[16] = {0}; + + const int length = 128; + + int64_t num_values = 0; + + internal::BitmapWriter r1(bitmap, 0, length); + + // If this were to write out of bounds, valgrind would tell us + for (int i = 0; i < length; ++i) { + r1.Set(); + r1.Clear(); + r1.Next(); + } + r1.Finish(); + num_values = r1.position(); + + ASSERT_EQ(length, num_values); + + internal::BitmapWriter r2(bitmap, 5, length - 5); + + for (int i = 0; i < (length - 5); ++i) { + r2.Set(); + r2.Clear(); + r2.Next(); + } + r2.Finish(); + num_values = r2.position(); + + ASSERT_EQ((length - 5), num_values); } static inline int64_t SlowCountBits(const uint8_t* data, int64_t bit_offset, http://git-wip-us.apache.org/repos/asf/arrow/blob/f9d1e1be/cpp/src/arrow/util/bit-util.h ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/util/bit-util.h b/cpp/src/arrow/util/bit-util.h index fa0d7a4..a85aff7 100644 --- a/cpp/src/arrow/util/bit-util.h +++ b/cpp/src/arrow/util/bit-util.h @@ -48,49 +48,6 @@ #endif namespace arrow { -namespace internal { - -class BitmapReader { - public: - BitmapReader(const uint8_t* bitmap, int64_t start_offset, int64_t length) - : bitmap_(bitmap), position_(0), length_(length) { - byte_offset_ = start_offset / 8; - bit_offset_ = start_offset % 8; - current_byte_ = bitmap[byte_offset_]; - } - -#if defined(_MSC_VER) - // MSVC is finicky about this cast - bool IsSet() const { return (current_byte_ & (1 << bit_offset_)) != 0; } -#else - bool IsSet() const { return current_byte_ & (1 << bit_offset_); } -#endif - - bool IsNotSet() const { return (current_byte_ & (1 << bit_offset_)) == 0; } - - void Next() { - ++bit_offset_; - ++position_; - if (bit_offset_ == 8) { - bit_offset_ = 0; - ++byte_offset_; - if (ARROW_PREDICT_TRUE(position_ < length_)) { - current_byte_ = bitmap_[byte_offset_]; - } - } - } - - private: - const uint8_t* bitmap_; - int64_t position_; - int64_t length_; - - uint8_t current_byte_; - int64_t byte_offset_; - int64_t bit_offset_; -}; - -} // namespace internal #ifndef ARROW_NO_DEPRECATED_API @@ -436,6 +393,102 @@ Status BytesToBits(const std::vector<uint8_t>&, MemoryPool*, std::shared_ptr<Buf } // namespace BitUtil +namespace internal { + +class BitmapReader { + public: + BitmapReader(const uint8_t* bitmap, int64_t start_offset, int64_t length) + : bitmap_(bitmap), position_(0), length_(length) { + current_byte_ = 0; + byte_offset_ = start_offset / 8; + bit_offset_ = start_offset % 8; + if (length > 0) { + current_byte_ = bitmap[byte_offset_]; + } + } + +#if defined(_MSC_VER) + // MSVC is finicky about this cast + bool IsSet() const { return (current_byte_ & (1 << bit_offset_)) != 0; } +#else + bool IsSet() const { return current_byte_ & (1 << bit_offset_); } +#endif + + bool IsNotSet() const { return (current_byte_ & (1 << bit_offset_)) == 0; } + + void Next() { + ++bit_offset_; + ++position_; + if (bit_offset_ == 8) { + bit_offset_ = 0; + ++byte_offset_; + if (ARROW_PREDICT_TRUE(position_ < length_)) { + current_byte_ = bitmap_[byte_offset_]; + } + } + } + + private: + const uint8_t* bitmap_; + int64_t position_; + int64_t length_; + + uint8_t current_byte_; + int64_t byte_offset_; + int64_t bit_offset_; +}; + +class BitmapWriter { + public: + BitmapWriter(uint8_t* bitmap, int64_t start_offset, int64_t length) + : bitmap_(bitmap), position_(0), length_(length) { + current_byte_ = 0; + byte_offset_ = start_offset / 8; + bit_offset_ = start_offset % 8; + if (length > 0) { + current_byte_ = bitmap[byte_offset_]; + } + } + + void Set() { current_byte_ |= BitUtil::kBitmask[bit_offset_]; } + + void Clear() { current_byte_ &= BitUtil::kFlippedBitmask[bit_offset_]; } + + void Next() { + ++bit_offset_; + ++position_; + bitmap_[byte_offset_] = current_byte_; + if (bit_offset_ == 8) { + bit_offset_ = 0; + ++byte_offset_; + if (ARROW_PREDICT_TRUE(position_ < length_)) { + current_byte_ = bitmap_[byte_offset_]; + } + } + } + + void Finish() { + if (ARROW_PREDICT_TRUE(position_ < length_)) { + if (bit_offset_ != 0) { + bitmap_[byte_offset_] = current_byte_; + } + } + } + + int64_t position() const { return position_; } + + private: + uint8_t* bitmap_; + int64_t position_; + int64_t length_; + + uint8_t current_byte_; + int64_t byte_offset_; + int64_t bit_offset_; +}; + +} // namespace internal + // ---------------------------------------------------------------------- // Bitmap utilities