This is an automated email from the ASF dual-hosted git repository.

colinlee pushed a commit to branch fix_bloom_filter
in repository https://gitbox.apache.org/repos/asf/tsfile.git


The following commit(s) were added to refs/heads/fix_bloom_filter by this push:
     new 5ccaaa9b fix serialize
5ccaaa9b is described below

commit 5ccaaa9b0c24c4790acaa56b62a2e02e0cfd6765
Author: ColinLee <[email protected]>
AuthorDate: Fri Jun 20 22:02:11 2025 +0800

    fix serialize
---
 cpp/src/reader/bloom_filter.cc      | 29 ++++++++---------------------
 cpp/test/reader/bloomfilter_test.cc | 16 ++++++++++++++--
 2 files changed, 22 insertions(+), 23 deletions(-)

diff --git a/cpp/src/reader/bloom_filter.cc b/cpp/src/reader/bloom_filter.cc
index e55e59d4..dc5295dd 100644
--- a/cpp/src/reader/bloom_filter.cc
+++ b/cpp/src/reader/bloom_filter.cc
@@ -121,33 +121,20 @@ int BitSet::from_bytes(uint8_t *filter_data, uint32_t 
filter_data_bytes_len) {
     for (; word_idx < (filter_data_bytes_len / 8); word_idx += 1) {
         uint64_t cur_word = 0;
         uint8_t *cur_word_start_byte = filter_data + (word_idx * 8);
-        cur_word |= *(cur_word_start_byte + 0);
-        cur_word = cur_word << 8;
-        cur_word |= *(cur_word_start_byte + 1);
-        cur_word = cur_word << 8;
-        cur_word |= *(cur_word_start_byte + 2);
-        cur_word = cur_word << 8;
-        cur_word |= *(cur_word_start_byte + 3);
-        cur_word = cur_word << 8;
-        cur_word |= *(cur_word_start_byte + 4);
-        cur_word = cur_word << 8;
-        cur_word |= *(cur_word_start_byte + 5);
-        cur_word = cur_word << 8;
-        cur_word |= *(cur_word_start_byte + 6);
-        cur_word = cur_word << 8;
-        cur_word |= *(cur_word_start_byte + 7);
-        cur_word = cur_word << 8;
-        *(words_ + word_idx) = cur_word;
+        for (int b = 0; b < 8; ++b) {
+            cur_word |= static_cast<uint64_t>(cur_word_start_byte[b]) << (8 * 
b);
+        }
+        words_[word_idx] = cur_word;
     }
 
     if (filter_data_bytes_len > word_idx * 8) {
         uint64_t cur_word = 0;
         uint8_t *cur_word_start_byte = filter_data + (word_idx * 8);
-        for (uint32_t r = 0; r < filter_data_bytes_len - word_idx * 8; r++) {
-            cur_word |= *(cur_word_start_byte + r);
-            cur_word = cur_word << 8;
+        int remain = filter_data_bytes_len - word_idx * 8;
+        for (int b = 0; b < remain; ++b) {
+            cur_word |= static_cast<uint64_t>(cur_word_start_byte[b]) << (8 * 
b);
         }
-        *(words_ + word_idx) = cur_word;
+        words_[word_idx] = cur_word;
     }
     return ret;
 }
diff --git a/cpp/test/reader/bloomfilter_test.cc 
b/cpp/test/reader/bloomfilter_test.cc
index 5ae6c9bf..e061d966 100644
--- a/cpp/test/reader/bloomfilter_test.cc
+++ b/cpp/test/reader/bloomfilter_test.cc
@@ -24,6 +24,12 @@
 using namespace storage;
 TEST(BloomfilterTest, BloomFilter) {
     BloomFilter filter;
+
+    std::unordered_set<uint8_t> my_set = {
+        0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 128, 32, 0, 0, 1,
+        0, 4, 0, 0, 0, 16, 0, 0, 0, 0, 32
+    };
+
     filter.init(0.1, 10);
     common::PageArena arena;
     common::String device1 = common::String("test_table.test1.test", arena);
@@ -39,15 +45,21 @@ TEST(BloomfilterTest, BloomFilter) {
     for (int i = 0; i < filter_data_bytes_len; i++) {
         data.insert(static_cast<int>(filter_data_bytes[i]));
         std::cout << static_cast<int>(filter_data_bytes[i]) << " ";
+        ASSERT_TRUE(my_set.find(static_cast<int>(filter_data_bytes[i])) != 
my_set.end());
     }
     filter.serialize_to(out);
 
+    std::cout << std::endl;
     BloomFilter filter2;
     filter2.deserialize_from(out);
     // ASSERT_EQ(filter, filter2);
     uint8_t *filter_data_bytes2 = nullptr;
-    filter2.get_bit_set()->to_bytes(filter_data_bytes2, filter_data_bytes_len);
-    for (int i = 0; i < filter_data_bytes_len; i++) {
+    int32_t filter_data_bytes_len2 = 0;
+    filter2.get_bit_set()->to_bytes(filter_data_bytes2, 
filter_data_bytes_len2);
+    ASSERT_EQ(filter_data_bytes_len, filter_data_bytes_len2);
+    for (int i = 0; i < filter_data_bytes_len2; i++) {
         ASSERT_TRUE(data.find(static_cast<int>(filter_data_bytes2[i])) != 
data.end());
+        std::cout << static_cast<int>(filter_data_bytes[i]) << " ";
+        ASSERT_TRUE(my_set.find(static_cast<int>(filter_data_bytes2[i])) != 
my_set.end());
     }
 }

Reply via email to