felipecrv commented on code in PR #35814: URL: https://github.com/apache/arrow/pull/35814#discussion_r1212061111
########## cpp/src/arrow/util/hashing_test.cc: ########## @@ -486,5 +488,117 @@ TEST(BinaryMemoTable, Empty) { EXPECT_EQ(offsets[0], 0); } +hash_t HashDataBitmap(const ArraySpan& array) { + EXPECT_EQ(array.type->id(), Type::BOOL); + const auto& bitmap = array.buffers[1]; + return ComputeBitmapHash(bitmap.data, bitmap.size, + /*seed=*/0, + /*bit_offset=*/array.offset, + /*num_bits=*/array.length); +} + +std::shared_ptr<BooleanArray> BuildBooleanArray(int len, bool start) { + // This could be memoized in the future to speed up tests. + BooleanBuilder builder; + for (int i = 0; i < len; ++i) { + EXPECT_TRUE(builder.Append(((i % 2 != 0) ^ start) == 1).ok()); + } + std::shared_ptr<BooleanArray> array; + EXPECT_TRUE(builder.Finish(&array).ok()); + return array; +} + +hash_t HashConcatenation(const ArrayVector& arrays, int64_t bits_offset = -1, + int64_t num_bits = -1) { + EXPECT_OK_AND_ASSIGN(auto concat, Concatenate(arrays)); + EXPECT_EQ(concat->type()->id(), Type::BOOL); + if (bits_offset == -1 || num_bits == -1) { + return HashDataBitmap(*concat->data()); + } + auto slice = concat->Slice(bits_offset, num_bits); + return HashDataBitmap(*slice->data()); +} + +TEST(BitmapHashTest, SmallInputs) { + for (bool start : {false, true}) { + auto block = BuildBooleanArray(64, start); + for (int len = 0; len < 64; len++) { + auto prefix = BuildBooleanArray(len, start); + auto expected_hash = HashDataBitmap(*prefix->data()); + + auto slice = block->Slice(0, len); + auto slice_hash = HashDataBitmap(*slice->data()); + ASSERT_EQ(expected_hash, slice_hash); + + for (int j = 1; j < len; j++) { + auto fragment = BuildBooleanArray(len - j, start ^ (j % 2 != 0)); + expected_hash = HashDataBitmap(*fragment->data()); + + slice = block->Slice(j, len - j); + slice_hash = HashDataBitmap(*slice->data()); + ASSERT_EQ(expected_hash, slice_hash); + } + } + } +} + +TEST(BitmapHashTest, LongerInputs) { + BooleanBuilder builder; + std::shared_ptr<BooleanArray> block_of_bools; + { + ASSERT_OK(builder.AppendValues(2, true)); + ASSERT_OK(builder.AppendValues(3, false)); + ASSERT_OK(builder.AppendValues(5, true)); + ASSERT_OK(builder.AppendValues(7, false)); + ASSERT_OK(builder.AppendValues(11, true)); + ASSERT_OK(builder.AppendValues(13, false)); + ASSERT_OK(builder.AppendValues(17, true)); + ASSERT_OK(builder.AppendValues(5, false)); + ASSERT_OK(builder.AppendValues(1, true)); + ASSERT_OK(builder.Finish(&block_of_bools)); + ASSERT_EQ(block_of_bools->length(), 64); + } + const auto hash_of_block = HashDataBitmap(*block_of_bools->data()); + + const auto kStep = 9; Review Comment: ``` felipeo@thinkpad: ~/code/arrow/cpp/ninja (hash_scalar_fix $%>)$ ninja arrow-utility-test && ./**/arrow-utility-test --gtest_break_on_failure --gtest_filter="*BitmapHash*" [15/15] Linking CXX executable debug/arrow-utility-test Note: Google Test filter = *BitmapHash* [==========] Running 2 tests from 1 test suite. [----------] Global test environment set-up. [----------] 2 tests from BitmapHashTest [ RUN ] BitmapHashTest.SmallInputs [ OK ] BitmapHashTest.SmallInputs (107 ms) [ RUN ] BitmapHashTest.LongerInputs [ OK ] BitmapHashTest.LongerInputs (121 ms) [----------] 2 tests from BitmapHashTest (228 ms total) [----------] Global test environment tear-down [==========] 2 tests from 1 test suite ran. (228 ms total) [ PASSED ] 2 tests. ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org