This is an automated email from the ASF dual-hosted git repository.
colinlee pushed a commit to branch fix_bloom_filter
in repository https://gitbox.apache.org/repos/asf/tsfile.git
The following commit(s) were added to refs/heads/fix_bloom_filter by this push:
new 5ccaaa9b fix serialize
5ccaaa9b is described below
commit 5ccaaa9b0c24c4790acaa56b62a2e02e0cfd6765
Author: ColinLee <[email protected]>
AuthorDate: Fri Jun 20 22:02:11 2025 +0800
fix serialize
---
cpp/src/reader/bloom_filter.cc | 29 ++++++++---------------------
cpp/test/reader/bloomfilter_test.cc | 16 ++++++++++++++--
2 files changed, 22 insertions(+), 23 deletions(-)
diff --git a/cpp/src/reader/bloom_filter.cc b/cpp/src/reader/bloom_filter.cc
index e55e59d4..dc5295dd 100644
--- a/cpp/src/reader/bloom_filter.cc
+++ b/cpp/src/reader/bloom_filter.cc
@@ -121,33 +121,20 @@ int BitSet::from_bytes(uint8_t *filter_data, uint32_t
filter_data_bytes_len) {
for (; word_idx < (filter_data_bytes_len / 8); word_idx += 1) {
uint64_t cur_word = 0;
uint8_t *cur_word_start_byte = filter_data + (word_idx * 8);
- cur_word |= *(cur_word_start_byte + 0);
- cur_word = cur_word << 8;
- cur_word |= *(cur_word_start_byte + 1);
- cur_word = cur_word << 8;
- cur_word |= *(cur_word_start_byte + 2);
- cur_word = cur_word << 8;
- cur_word |= *(cur_word_start_byte + 3);
- cur_word = cur_word << 8;
- cur_word |= *(cur_word_start_byte + 4);
- cur_word = cur_word << 8;
- cur_word |= *(cur_word_start_byte + 5);
- cur_word = cur_word << 8;
- cur_word |= *(cur_word_start_byte + 6);
- cur_word = cur_word << 8;
- cur_word |= *(cur_word_start_byte + 7);
- cur_word = cur_word << 8;
- *(words_ + word_idx) = cur_word;
+ for (int b = 0; b < 8; ++b) {
+ cur_word |= static_cast<uint64_t>(cur_word_start_byte[b]) << (8 *
b);
+ }
+ words_[word_idx] = cur_word;
}
if (filter_data_bytes_len > word_idx * 8) {
uint64_t cur_word = 0;
uint8_t *cur_word_start_byte = filter_data + (word_idx * 8);
- for (uint32_t r = 0; r < filter_data_bytes_len - word_idx * 8; r++) {
- cur_word |= *(cur_word_start_byte + r);
- cur_word = cur_word << 8;
+ int remain = filter_data_bytes_len - word_idx * 8;
+ for (int b = 0; b < remain; ++b) {
+ cur_word |= static_cast<uint64_t>(cur_word_start_byte[b]) << (8 *
b);
}
- *(words_ + word_idx) = cur_word;
+ words_[word_idx] = cur_word;
}
return ret;
}
diff --git a/cpp/test/reader/bloomfilter_test.cc
b/cpp/test/reader/bloomfilter_test.cc
index 5ae6c9bf..e061d966 100644
--- a/cpp/test/reader/bloomfilter_test.cc
+++ b/cpp/test/reader/bloomfilter_test.cc
@@ -24,6 +24,12 @@
using namespace storage;
TEST(BloomfilterTest, BloomFilter) {
BloomFilter filter;
+
+ std::unordered_set<uint8_t> my_set = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 128, 32, 0, 0, 1,
+ 0, 4, 0, 0, 0, 16, 0, 0, 0, 0, 32
+ };
+
filter.init(0.1, 10);
common::PageArena arena;
common::String device1 = common::String("test_table.test1.test", arena);
@@ -39,15 +45,21 @@ TEST(BloomfilterTest, BloomFilter) {
for (int i = 0; i < filter_data_bytes_len; i++) {
data.insert(static_cast<int>(filter_data_bytes[i]));
std::cout << static_cast<int>(filter_data_bytes[i]) << " ";
+ ASSERT_TRUE(my_set.find(static_cast<int>(filter_data_bytes[i])) !=
my_set.end());
}
filter.serialize_to(out);
+ std::cout << std::endl;
BloomFilter filter2;
filter2.deserialize_from(out);
// ASSERT_EQ(filter, filter2);
uint8_t *filter_data_bytes2 = nullptr;
- filter2.get_bit_set()->to_bytes(filter_data_bytes2, filter_data_bytes_len);
- for (int i = 0; i < filter_data_bytes_len; i++) {
+ int32_t filter_data_bytes_len2 = 0;
+ filter2.get_bit_set()->to_bytes(filter_data_bytes2,
filter_data_bytes_len2);
+ ASSERT_EQ(filter_data_bytes_len, filter_data_bytes_len2);
+ for (int i = 0; i < filter_data_bytes_len2; i++) {
ASSERT_TRUE(data.find(static_cast<int>(filter_data_bytes2[i])) !=
data.end());
+ std::cout << static_cast<int>(filter_data_bytes[i]) << " ";
+ ASSERT_TRUE(my_set.find(static_cast<int>(filter_data_bytes2[i])) !=
my_set.end());
}
}