This is an automated email from the ASF dual-hosted git repository.

colinlee pushed a commit to branch develop
in repository https://gitbox.apache.org/repos/asf/tsfile.git


The following commit(s) were added to refs/heads/develop by this push:
     new 04eb621a Fix bloom filter error.
04eb621a is described below

commit 04eb621ab60d6be4b92a4b44b309d766625d369e
Author: Colin Lee <[email protected]>
AuthorDate: Fri Jun 20 23:40:09 2025 +0800

    Fix bloom filter error.
    
    * add bloomfilter test.
---
 cpp/src/reader/bloom_filter.cc       | 31 ++++++-----------
 cpp/src/reader/bloom_filter.h        |  3 +-
 cpp/test/reader/bloom_filter_test.cc | 67 ++++++++++++++++++++++++++++++++++++
 3 files changed, 79 insertions(+), 22 deletions(-)

diff --git a/cpp/src/reader/bloom_filter.cc b/cpp/src/reader/bloom_filter.cc
index 58348094..174791dd 100644
--- a/cpp/src/reader/bloom_filter.cc
+++ b/cpp/src/reader/bloom_filter.cc
@@ -121,33 +121,22 @@ int BitSet::from_bytes(uint8_t *filter_data, uint32_t 
filter_data_bytes_len) {
     for (; word_idx < (filter_data_bytes_len / 8); word_idx += 1) {
         uint64_t cur_word = 0;
         uint8_t *cur_word_start_byte = filter_data + (word_idx * 8);
-        cur_word |= *(cur_word_start_byte + 0);
-        cur_word = cur_word << 8;
-        cur_word |= *(cur_word_start_byte + 1);
-        cur_word = cur_word << 8;
-        cur_word |= *(cur_word_start_byte + 2);
-        cur_word = cur_word << 8;
-        cur_word |= *(cur_word_start_byte + 3);
-        cur_word = cur_word << 8;
-        cur_word |= *(cur_word_start_byte + 4);
-        cur_word = cur_word << 8;
-        cur_word |= *(cur_word_start_byte + 5);
-        cur_word = cur_word << 8;
-        cur_word |= *(cur_word_start_byte + 6);
-        cur_word = cur_word << 8;
-        cur_word |= *(cur_word_start_byte + 7);
-        cur_word = cur_word << 8;
-        *(words_ + word_idx) = cur_word;
+        for (int b = 0; b < 8; ++b) {
+            cur_word |= static_cast<uint64_t>(cur_word_start_byte[b])
+                        << (8 * b);
+        }
+        words_[word_idx] = cur_word;
     }
 
     if (filter_data_bytes_len > word_idx * 8) {
         uint64_t cur_word = 0;
         uint8_t *cur_word_start_byte = filter_data + (word_idx * 8);
-        for (uint32_t r = 0; r < filter_data_bytes_len - word_idx * 8; r++) {
-            cur_word |= *(cur_word_start_byte + r);
-            cur_word = cur_word << 8;
+        int remain = filter_data_bytes_len - word_idx * 8;
+        for (int b = 0; b < remain; ++b) {
+            cur_word |= static_cast<uint64_t>(cur_word_start_byte[b])
+                        << (8 * b);
         }
-        *(words_ + word_idx) = cur_word;
+        words_[word_idx] = cur_word;
     }
     return ret;
 }
diff --git a/cpp/src/reader/bloom_filter.h b/cpp/src/reader/bloom_filter.h
index 181cb96c..a43b264a 100644
--- a/cpp/src/reader/bloom_filter.h
+++ b/cpp/src/reader/bloom_filter.h
@@ -72,7 +72,7 @@ class BitSet {
     void set(int32_t pos) {
         int32_t word_idx = pos / 64;
         int32_t word_offset = pos % 64;
-        words_[word_idx] |= (1ul << word_offset);
+        words_[word_idx] |= (1ull << word_offset);
     }
     int32_t get_words_in_use() const {
         for (int32_t i = word_count_ - 1; i >= 0; i--) {
@@ -109,6 +109,7 @@ class BloomFilter {
                        const common::String &measurement_name);
     int serialize_to(common::ByteStream &out);
     int deserialize_from(common::ByteStream &in);
+    BitSet *get_bit_set() { return &bitset_; }
 
    private:
     common::String get_entry_string(const common::String &device_name,
diff --git a/cpp/test/reader/bloom_filter_test.cc 
b/cpp/test/reader/bloom_filter_test.cc
new file mode 100644
index 00000000..d947bf55
--- /dev/null
+++ b/cpp/test/reader/bloom_filter_test.cc
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * License); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License a
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include "reader/bloom_filter.h"
+
+#include <gtest/gtest.h>
+
+#include <unordered_set>
+using namespace storage;
+TEST(BloomfilterTest, BloomFilter) {
+    BloomFilter filter;
+
+    std::unordered_set<uint8_t> my_set = {0, 0, 0,   0,  0, 0, 0, 0, 2,
+                                          0, 2, 128, 32, 0, 0, 1, 0, 4,
+                                          0, 0, 0,   16, 0, 0, 0, 0, 32};
+
+    filter.init(0.1, 10);
+    common::PageArena arena;
+    common::String device1 = common::String("test_table.test1.test", arena);
+    common::String sensor = common::String();
+    filter.add_path_entry(device1, sensor);
+    common::String sensor1 = common::String("value", arena);
+    filter.add_path_entry(device1, sensor1);
+    common::ByteStream out(1024, common::MOD_DEFAULT);
+    uint8_t *filter_data_bytes = nullptr;
+    int32_t filter_data_bytes_len = 0;
+    filter.get_bit_set()->to_bytes(filter_data_bytes, filter_data_bytes_len);
+    std::unordered_set<uint8_t> data;
+    for (int i = 0; i < filter_data_bytes_len; i++) {
+        data.insert(static_cast<int>(filter_data_bytes[i]));
+        ASSERT_TRUE(my_set.find(static_cast<int>(filter_data_bytes[i])) !=
+                    my_set.end());
+    }
+    filter.serialize_to(out);
+
+    std::cout << std::endl;
+    BloomFilter filter2;
+    filter2.deserialize_from(out);
+    // ASSERT_EQ(filter, filter2);
+    uint8_t *filter_data_bytes2 = nullptr;
+    int32_t filter_data_bytes_len2 = 0;
+    filter2.get_bit_set()->to_bytes(filter_data_bytes2, 
filter_data_bytes_len2);
+    ASSERT_EQ(filter_data_bytes_len, filter_data_bytes_len2);
+    for (int i = 0; i < filter_data_bytes_len2; i++) {
+        ASSERT_TRUE(data.find(static_cast<int>(filter_data_bytes2[i])) !=
+                    data.end());
+        ASSERT_TRUE(my_set.find(static_cast<int>(filter_data_bytes2[i])) !=
+                    my_set.end());
+    }
+    common::mem_free(filter_data_bytes);
+    common::mem_free(filter_data_bytes2);
+}

Reply via email to