This is an automated email from the ASF dual-hosted git repository.
colinlee pushed a commit to branch develop
in repository https://gitbox.apache.org/repos/asf/tsfile.git
The following commit(s) were added to refs/heads/develop by this push:
new 04eb621a Fix bloom filter error.
04eb621a is described below
commit 04eb621ab60d6be4b92a4b44b309d766625d369e
Author: Colin Lee <[email protected]>
AuthorDate: Fri Jun 20 23:40:09 2025 +0800
Fix bloom filter error.
* add bloomfilter test.
---
cpp/src/reader/bloom_filter.cc | 31 ++++++-----------
cpp/src/reader/bloom_filter.h | 3 +-
cpp/test/reader/bloom_filter_test.cc | 67 ++++++++++++++++++++++++++++++++++++
3 files changed, 79 insertions(+), 22 deletions(-)
diff --git a/cpp/src/reader/bloom_filter.cc b/cpp/src/reader/bloom_filter.cc
index 58348094..174791dd 100644
--- a/cpp/src/reader/bloom_filter.cc
+++ b/cpp/src/reader/bloom_filter.cc
@@ -121,33 +121,22 @@ int BitSet::from_bytes(uint8_t *filter_data, uint32_t
filter_data_bytes_len) {
for (; word_idx < (filter_data_bytes_len / 8); word_idx += 1) {
uint64_t cur_word = 0;
uint8_t *cur_word_start_byte = filter_data + (word_idx * 8);
- cur_word |= *(cur_word_start_byte + 0);
- cur_word = cur_word << 8;
- cur_word |= *(cur_word_start_byte + 1);
- cur_word = cur_word << 8;
- cur_word |= *(cur_word_start_byte + 2);
- cur_word = cur_word << 8;
- cur_word |= *(cur_word_start_byte + 3);
- cur_word = cur_word << 8;
- cur_word |= *(cur_word_start_byte + 4);
- cur_word = cur_word << 8;
- cur_word |= *(cur_word_start_byte + 5);
- cur_word = cur_word << 8;
- cur_word |= *(cur_word_start_byte + 6);
- cur_word = cur_word << 8;
- cur_word |= *(cur_word_start_byte + 7);
- cur_word = cur_word << 8;
- *(words_ + word_idx) = cur_word;
+ for (int b = 0; b < 8; ++b) {
+ cur_word |= static_cast<uint64_t>(cur_word_start_byte[b])
+ << (8 * b);
+ }
+ words_[word_idx] = cur_word;
}
if (filter_data_bytes_len > word_idx * 8) {
uint64_t cur_word = 0;
uint8_t *cur_word_start_byte = filter_data + (word_idx * 8);
- for (uint32_t r = 0; r < filter_data_bytes_len - word_idx * 8; r++) {
- cur_word |= *(cur_word_start_byte + r);
- cur_word = cur_word << 8;
+ int remain = filter_data_bytes_len - word_idx * 8;
+ for (int b = 0; b < remain; ++b) {
+ cur_word |= static_cast<uint64_t>(cur_word_start_byte[b])
+ << (8 * b);
}
- *(words_ + word_idx) = cur_word;
+ words_[word_idx] = cur_word;
}
return ret;
}
diff --git a/cpp/src/reader/bloom_filter.h b/cpp/src/reader/bloom_filter.h
index 181cb96c..a43b264a 100644
--- a/cpp/src/reader/bloom_filter.h
+++ b/cpp/src/reader/bloom_filter.h
@@ -72,7 +72,7 @@ class BitSet {
void set(int32_t pos) {
int32_t word_idx = pos / 64;
int32_t word_offset = pos % 64;
- words_[word_idx] |= (1ul << word_offset);
+ words_[word_idx] |= (1ull << word_offset);
}
int32_t get_words_in_use() const {
for (int32_t i = word_count_ - 1; i >= 0; i--) {
@@ -109,6 +109,7 @@ class BloomFilter {
const common::String &measurement_name);
int serialize_to(common::ByteStream &out);
int deserialize_from(common::ByteStream &in);
+ BitSet *get_bit_set() { return &bitset_; }
private:
common::String get_entry_string(const common::String &device_name,
diff --git a/cpp/test/reader/bloom_filter_test.cc
b/cpp/test/reader/bloom_filter_test.cc
new file mode 100644
index 00000000..d947bf55
--- /dev/null
+++ b/cpp/test/reader/bloom_filter_test.cc
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * License); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License a
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include "reader/bloom_filter.h"
+
+#include <gtest/gtest.h>
+
+#include <unordered_set>
+using namespace storage;
+TEST(BloomfilterTest, BloomFilter) {
+ BloomFilter filter;
+
+ std::unordered_set<uint8_t> my_set = {0, 0, 0, 0, 0, 0, 0, 0, 2,
+ 0, 2, 128, 32, 0, 0, 1, 0, 4,
+ 0, 0, 0, 16, 0, 0, 0, 0, 32};
+
+ filter.init(0.1, 10);
+ common::PageArena arena;
+ common::String device1 = common::String("test_table.test1.test", arena);
+ common::String sensor = common::String();
+ filter.add_path_entry(device1, sensor);
+ common::String sensor1 = common::String("value", arena);
+ filter.add_path_entry(device1, sensor1);
+ common::ByteStream out(1024, common::MOD_DEFAULT);
+ uint8_t *filter_data_bytes = nullptr;
+ int32_t filter_data_bytes_len = 0;
+ filter.get_bit_set()->to_bytes(filter_data_bytes, filter_data_bytes_len);
+ std::unordered_set<uint8_t> data;
+ for (int i = 0; i < filter_data_bytes_len; i++) {
+ data.insert(static_cast<int>(filter_data_bytes[i]));
+ ASSERT_TRUE(my_set.find(static_cast<int>(filter_data_bytes[i])) !=
+ my_set.end());
+ }
+ filter.serialize_to(out);
+
+ std::cout << std::endl;
+ BloomFilter filter2;
+ filter2.deserialize_from(out);
+ // ASSERT_EQ(filter, filter2);
+ uint8_t *filter_data_bytes2 = nullptr;
+ int32_t filter_data_bytes_len2 = 0;
+ filter2.get_bit_set()->to_bytes(filter_data_bytes2,
filter_data_bytes_len2);
+ ASSERT_EQ(filter_data_bytes_len, filter_data_bytes_len2);
+ for (int i = 0; i < filter_data_bytes_len2; i++) {
+ ASSERT_TRUE(data.find(static_cast<int>(filter_data_bytes2[i])) !=
+ data.end());
+ ASSERT_TRUE(my_set.find(static_cast<int>(filter_data_bytes2[i])) !=
+ my_set.end());
+ }
+ common::mem_free(filter_data_bytes);
+ common::mem_free(filter_data_bytes2);
+}