This is an automated email from the ASF dual-hosted git repository.
jiangtian pushed a commit to branch develop
in repository https://gitbox.apache.org/repos/asf/tsfile.git
The following commit(s) were added to refs/heads/develop by this push:
new 5211bd45 Implement TagFilter functionality (#635)
5211bd45 is described below
commit 5211bd45760f837e5c021a9534deff3f7370eae6
Author: Hongzhi Gao <[email protected]>
AuthorDate: Wed Nov 26 16:07:20 2025 +0800
Implement TagFilter functionality (#635)
* "Implement TagFilter functionality with support for all operations except
LikePattern"
* add license
* fix some issues
* demo for TsFile-CPP TagFilter
---
cpp/examples/cpp_examples/cpp_examples.h | 1 +
cpp/examples/cpp_examples/demo_read.cpp | 7 +-
cpp/src/reader/device_meta_iterator.cc | 10 +-
cpp/src/reader/filter/filter.h | 5 +
cpp/src/reader/filter/tag_filter.cc | 295 +++++++++++++++
cpp/src/reader/filter/tag_filter.h | 188 ++++++++++
cpp/src/reader/tsfile_reader.cc | 14 +-
cpp/src/reader/tsfile_reader.h | 17 +
cpp/test/reader/filter/tag_filter_test.cc | 417 +++++++++++++++++++++
.../reader/table_view/tsfile_reader_table_test.cc | 6 +-
10 files changed, 952 insertions(+), 8 deletions(-)
diff --git a/cpp/examples/cpp_examples/cpp_examples.h
b/cpp/examples/cpp_examples/cpp_examples.h
index b0512e9a..5466fbe6 100644
--- a/cpp/examples/cpp_examples/cpp_examples.h
+++ b/cpp/examples/cpp_examples/cpp_examples.h
@@ -26,6 +26,7 @@
#include "file/write_file.h"
#include "reader/expression.h"
#include "reader/filter/filter.h"
+#include "reader/filter/tag_filter.h"
#include "reader/qds_with_timegenerator.h"
#include "reader/qds_without_timegenerator.h"
#include "reader/tsfile_reader.h"
diff --git a/cpp/examples/cpp_examples/demo_read.cpp
b/cpp/examples/cpp_examples/demo_read.cpp
index efef0c9c..f90efe35 100644
--- a/cpp/examples/cpp_examples/demo_read.cpp
+++ b/cpp/examples/cpp_examples/demo_read.cpp
@@ -40,8 +40,12 @@ int demo_read() {
columns.emplace_back("id2");
columns.emplace_back("s1");
+ auto table_schema = reader.get_table_schema(table_name);
+ storage::Filter* tag_filter1 =
storage::TagFilterBuilder(table_schema.get()).eq("id1", "id1_filed_1");
+ storage::Filter* tag_filter2 =
storage::TagFilterBuilder(table_schema.get()).eq("id2", "id1_filed_2");
+ storage::Filter* tag_filter =
storage::TagFilterBuilder(table_schema.get()).and_filter(tag_filter1,
tag_filter2);
// Column vector contains the columns you want to select.
- HANDLE_ERROR(reader.query(table_name, columns, 0, 100, temp_ret));
+ HANDLE_ERROR(reader.query(table_name, columns, 0, 100, temp_ret,
tag_filter));
// Get query handler.
auto ret = dynamic_cast<storage::TableResultSet*>(temp_ret);
@@ -98,5 +102,6 @@ int demo_read() {
// Close reader.
reader.close();
+ delete tag_filter;
return 0;
}
diff --git a/cpp/src/reader/device_meta_iterator.cc
b/cpp/src/reader/device_meta_iterator.cc
index 4f47341c..a59f511d 100644
--- a/cpp/src/reader/device_meta_iterator.cc
+++ b/cpp/src/reader/device_meta_iterator.cc
@@ -19,6 +19,8 @@
#include "device_meta_iterator.h"
+#include "filter/tag_filter.h"
+
namespace storage {
bool DeviceMetaIterator::has_next() {
if (!result_cache_.empty()) {
@@ -74,9 +76,11 @@ int DeviceMetaIterator::load_leaf_device(MetaIndexNode*
meta_index_node) {
const auto& leaf_children = meta_index_node->children_;
for (size_t i = 0; i < leaf_children.size(); i++) {
std::shared_ptr<IMetaIndexEntry> child = leaf_children[i];
- // const auto& device_id = child->name_;
- if (id_filter_ != nullptr /*TODO: !id_filter_->satisfy(device_id)*/) {
- continue;
+ if (id_filter_ != nullptr) {
+ if (!id_filter_->satisfyRow(
+ 0, child->get_device_id()->get_segments())) {
+ continue;
+ }
}
int32_t start_offset = child->get_offset();
int32_t end_offset = i + 1 < leaf_children.size()
diff --git a/cpp/src/reader/filter/filter.h b/cpp/src/reader/filter/filter.h
index 1846df5a..f39dddba 100644
--- a/cpp/src/reader/filter/filter.h
+++ b/cpp/src/reader/filter/filter.h
@@ -54,6 +54,11 @@ class Filter {
ASSERT(false);
return false;
}
+ virtual bool satisfyRow(int time,
+ std::vector<std::string*> segments) const {
+ ASSERT(false);
+ return false;
+ }
virtual std::vector<TimeRange*>* get_time_ranges() {
ASSERT(false);
return nullptr;
diff --git a/cpp/src/reader/filter/tag_filter.cc
b/cpp/src/reader/filter/tag_filter.cc
new file mode 100644
index 00000000..c4c56a1d
--- /dev/null
+++ b/cpp/src/reader/filter/tag_filter.cc
@@ -0,0 +1,295 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * License); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "tag_filter.h"
+
+#include <algorithm>
+#include <utility>
+
+namespace storage {
+
+// TagFilter base class implementation
+TagFilter::TagFilter(int col_idx, std::string tag_value)
+ : col_idx_(col_idx), value_(std::move(tag_value)), value2_("") {}
+
+TagFilter::~TagFilter() = default;
+
+bool TagFilter::satisfyRow(int time, std::vector<std::string*> segments) const
{
+ return satisfyRow(segments);
+}
+
+bool TagFilter::satisfyRow(std::vector<std::string*> segments) const {
+ ASSERT(false);
+ return false;
+}
+
+// TagEq implementation
+TagEq::TagEq(int col_idx, std::string tag_value)
+ : TagFilter(col_idx, std::move(tag_value)) {}
+
+bool TagEq::satisfyRow(std::vector<std::string*> segments) const {
+ if (col_idx_ >= segments.size()) return false;
+ return *segments[col_idx_] == value_;
+}
+
+// TagNeq implementation
+TagNeq::TagNeq(int col_idx, std::string tag_value)
+ : TagFilter(col_idx, std::move(tag_value)) {}
+
+bool TagNeq::satisfyRow(std::vector<std::string*> segments) const {
+ if (col_idx_ >= segments.size()) return false;
+ return *segments[col_idx_] != value_;
+}
+
+// TagLt implementation
+TagLt::TagLt(int col_idx, std::string tag_value)
+ : TagFilter(col_idx, std::move(tag_value)) {}
+
+bool TagLt::satisfyRow(std::vector<std::string*> segments) const {
+ if (col_idx_ >= segments.size()) return false;
+ return *segments[col_idx_] < value_;
+}
+
+// TagLteq implementation
+TagLteq::TagLteq(int col_idx, std::string tag_value)
+ : TagFilter(col_idx, std::move(tag_value)) {}
+
+bool TagLteq::satisfyRow(std::vector<std::string*> segments) const {
+ if (col_idx_ >= segments.size()) return false;
+ return *segments[col_idx_] <= value_;
+}
+
+// TagGt implementation
+TagGt::TagGt(int col_idx, std::string tag_value)
+ : TagFilter(col_idx, std::move(tag_value)) {}
+
+bool TagGt::satisfyRow(std::vector<std::string*> segments) const {
+ if (col_idx_ >= segments.size()) return false;
+ return *segments[col_idx_] > value_;
+}
+
+// TagGteq implementation
+TagGteq::TagGteq(int col_idx, std::string tag_value)
+ : TagFilter(col_idx, std::move(tag_value)) {}
+
+bool TagGteq::satisfyRow(std::vector<std::string*> segments) const {
+ if (col_idx_ >= segments.size()) return false;
+ return *segments[col_idx_] >= value_;
+}
+
+// TagRegExp implementation
+TagRegExp::TagRegExp(int col_idx, std::string tag_value)
+ : TagFilter(col_idx, std::move(tag_value)) {
+ try {
+ pattern_ = std::regex(value_);
+ is_valid_pattern_ = true;
+ } catch (const std::regex_error& e) {
+ is_valid_pattern_ = false;
+ }
+}
+
+bool TagRegExp::satisfyRow(std::vector<std::string*> segments) const {
+ if (col_idx_ >= segments.size() || !is_valid_pattern_) return false;
+ try {
+ return std::regex_search(*segments[col_idx_], pattern_);
+ } catch (const std::regex_error&) {
+ return false;
+ }
+}
+
+// TagNotRegExp implementation
+TagNotRegExp::TagNotRegExp(int col_idx, std::string tag_value)
+ : TagFilter(col_idx, std::move(tag_value)) {
+ try {
+ pattern_ = std::regex(value_);
+ is_valid_pattern_ = true;
+ } catch (const std::regex_error& e) {
+ is_valid_pattern_ = false;
+ }
+}
+
+bool TagNotRegExp::satisfyRow(std::vector<std::string*> segments) const {
+ if (col_idx_ >= segments.size() || !is_valid_pattern_) return false;
+ try {
+ return !std::regex_search(*segments[col_idx_], pattern_);
+ } catch (const std::regex_error&) {
+ return true;
+ }
+}
+
+// TagBetween implementation
+TagBetween::TagBetween(int col_idx, std::string lower_value,
+ std::string upper_value)
+ : TagFilter(col_idx, std::move(lower_value)) {
+ value2_ = std::move(upper_value);
+}
+
+bool TagBetween::satisfyRow(std::vector<std::string*> segments) const {
+ if (col_idx_ >= segments.size()) return false;
+ const std::string& segment_value = *segments[col_idx_];
+ return segment_value >= value_ && segment_value <= value2_;
+}
+
+// TagNotBetween implementation
+TagNotBetween::TagNotBetween(int col_idx, std::string lower_value,
+ std::string upper_value)
+ : TagFilter(col_idx, std::move(lower_value)) {
+ value2_ = std::move(upper_value);
+}
+
+bool TagNotBetween::satisfyRow(std::vector<std::string*> segments) const {
+ if (col_idx_ >= segments.size()) return false;
+ const std::string& segment_value = *segments[col_idx_];
+ return segment_value < value_ || segment_value > value2_;
+}
+
+// TagAnd implementation
+TagAnd::TagAnd(TagFilter* left, TagFilter* right)
+ : TagFilter(-1, ""), left_(left), right_(right) {}
+
+TagAnd::~TagAnd() {
+ delete left_;
+ delete right_;
+}
+
+bool TagAnd::satisfyRow(std::vector<std::string*> segments) const {
+ return left_->satisfyRow(segments) && right_->satisfyRow(segments);
+}
+
+// TagOr implementation
+TagOr::TagOr(TagFilter* left, TagFilter* right)
+ : TagFilter(-1, ""), left_(left), right_(right) {}
+
+TagOr::~TagOr() {
+ delete left_;
+ delete right_;
+}
+
+bool TagOr::satisfyRow(std::vector<std::string*> segments) const {
+ return left_->satisfyRow(segments) || right_->satisfyRow(segments);
+}
+
+// TagNot implementation
+TagNot::TagNot(TagFilter* filter) : TagFilter(-1, ""), filter_(filter) {}
+
+TagNot::~TagNot() { delete filter_; }
+
+bool TagNot::satisfyRow(std::vector<std::string*> segments) const {
+ return !filter_->satisfyRow(segments);
+}
+
+// TagFilterBuilder implementation
+TagFilterBuilder::TagFilterBuilder(TableSchema* schema)
+ : table_schema_(schema) {}
+
+Filter* TagFilterBuilder::eq(const std::string& columnName,
+ const std::string& value) {
+ auto idx = get_id_column_index(columnName);
+ if (idx < 0) return nullptr;
+ return new TagEq(idx, value);
+}
+
+Filter* TagFilterBuilder::neq(const std::string& columnName,
+ const std::string& value) {
+ auto idx = get_id_column_index(columnName);
+ if (idx < 0) return nullptr;
+ return new TagNeq(idx, value);
+}
+
+Filter* TagFilterBuilder::lt(const std::string& columnName,
+ const std::string& value) {
+ auto idx = get_id_column_index(columnName);
+ if (idx < 0) return nullptr;
+ return new TagLt(idx, value);
+}
+
+Filter* TagFilterBuilder::lteq(const std::string& columnName,
+ const std::string& value) {
+ auto idx = get_id_column_index(columnName);
+ if (idx < 0) return nullptr;
+ return new TagLteq(idx, value);
+}
+
+Filter* TagFilterBuilder::gt(const std::string& columnName,
+ const std::string& value) {
+ auto idx = get_id_column_index(columnName);
+ if (idx < 0) return nullptr;
+ return new TagGt(idx, value);
+}
+
+Filter* TagFilterBuilder::gteq(const std::string& columnName,
+ const std::string& value) {
+ auto idx = get_id_column_index(columnName);
+ if (idx < 0) return nullptr;
+ return new TagGteq(idx, value);
+}
+
+Filter* TagFilterBuilder::reg_exp(const std::string& columnName,
+ const std::string& value) {
+ auto idx = get_id_column_index(columnName);
+ if (idx < 0) return nullptr;
+ return new TagRegExp(idx, value);
+}
+
+Filter* TagFilterBuilder::not_reg_exp(const std::string& columnName,
+ const std::string& value) {
+ auto idx = get_id_column_index(columnName);
+ if (idx < 0) return nullptr;
+ return new TagNotRegExp(idx, value);
+}
+
+Filter* TagFilterBuilder::between_and(const std::string& columnName,
+ const std::string& lower,
+ const std::string& upper) {
+ auto idx = get_id_column_index(columnName);
+ if (idx < 0) return nullptr;
+ return new TagBetween(idx, lower, upper);
+}
+
+Filter* TagFilterBuilder::not_between_and(const std::string& columnName,
+ const std::string& lower,
+ const std::string& upper) {
+ auto idx = get_id_column_index(columnName);
+ if (idx < 0) return nullptr;
+ return new TagNotBetween(idx, lower, upper);
+}
+
+Filter* TagFilterBuilder::and_filter(Filter* left, Filter* right) {
+ return new TagAnd(dynamic_cast<TagFilter*>(left),
+ dynamic_cast<TagFilter*>(right));
+}
+
+Filter* TagFilterBuilder::or_filter(Filter* left, Filter* right) {
+ return new TagOr(dynamic_cast<TagFilter*>(left),
+ dynamic_cast<TagFilter*>(right));
+}
+
+Filter* TagFilterBuilder::not_filter(Filter* filter) {
+ return new TagNot(dynamic_cast<TagFilter*>(filter));
+}
+
+int TagFilterBuilder::get_id_column_index(const std::string& columnName) {
+ int idColumnOrder = table_schema_->find_id_column_order(columnName);
+ if (idColumnOrder == -1) {
+ return -1;
+ }
+ return idColumnOrder + 1;
+}
+
+} // namespace storage
\ No newline at end of file
diff --git a/cpp/src/reader/filter/tag_filter.h
b/cpp/src/reader/filter/tag_filter.h
new file mode 100644
index 00000000..c7d8843f
--- /dev/null
+++ b/cpp/src/reader/filter/tag_filter.h
@@ -0,0 +1,188 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * License); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef READER_FILTER_TAG_FILTER_H
+#define READER_FILTER_TAG_FILTER_H
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "common/allocator/my_string.h"
+#include "common/schema.h"
+#include "reader/filter/filter.h"
+
+struct table_schema;
+namespace storage {
+class TagFilter : public Filter {
+ public:
+ TagFilter(int col_idx, std::string tag_value);
+ ~TagFilter() override;
+
+ virtual bool satisfyRow(int time, std::vector<std::string*> segments)
const;
+ virtual bool satisfyRow(std::vector<std::string*> segments) const;
+
+ std::string value_;
+ std::string value2_; // For range queries
+ int col_idx_;
+};
+
+// Equality comparison
+class TagEq : public TagFilter {
+ public:
+ TagEq(int col_idx, std::string tag_value);
+ bool satisfyRow(std::vector<std::string*> segments) const override;
+};
+
+// Inequality comparison
+class TagNeq : public TagFilter {
+ public:
+ TagNeq(int col_idx, std::string tag_value);
+ bool satisfyRow(std::vector<std::string*> segments) const override;
+};
+
+// Less than comparison
+class TagLt : public TagFilter {
+ public:
+ TagLt(int col_idx, std::string tag_value);
+ bool satisfyRow(std::vector<std::string*> segments) const override;
+};
+
+// Less than or equal comparison
+class TagLteq : public TagFilter {
+ public:
+ TagLteq(int col_idx, std::string tag_value);
+ bool satisfyRow(std::vector<std::string*> segments) const override;
+};
+
+// Greater than comparison
+class TagGt : public TagFilter {
+ public:
+ TagGt(int col_idx, std::string tag_value);
+ bool satisfyRow(std::vector<std::string*> segments) const override;
+};
+
+// Greater than or equal comparison
+class TagGteq : public TagFilter {
+ public:
+ TagGteq(int col_idx, std::string tag_value);
+ bool satisfyRow(std::vector<std::string*> segments) const override;
+};
+
+// Regular expression match
+class TagRegExp : public TagFilter {
+ std::regex pattern_;
+ bool is_valid_pattern_ = false;
+
+ public:
+ TagRegExp(int col_idx, std::string tag_value);
+ bool satisfyRow(std::vector<std::string*> segments) const override;
+};
+
+// Regular expression not match
+class TagNotRegExp : public TagFilter {
+ std::regex pattern_;
+ bool is_valid_pattern_ = false;
+
+ public:
+ TagNotRegExp(int col_idx, std::string tag_value);
+ bool satisfyRow(std::vector<std::string*> segments) const override;
+};
+
+// Range query [value_, value2_]
+class TagBetween : public TagFilter {
+ public:
+ TagBetween(int col_idx, std::string lower_value, std::string upper_value);
+ bool satisfyRow(std::vector<std::string*> segments) const override;
+};
+
+// Not in range
+class TagNotBetween : public TagFilter {
+ public:
+ TagNotBetween(int col_idx, std::string lower_value,
+ std::string upper_value);
+ bool satisfyRow(std::vector<std::string*> segments) const override;
+};
+
+// Logical AND operation (binary)
+class TagAnd : public TagFilter {
+ public:
+ TagAnd(TagFilter* left, TagFilter* right);
+ ~TagAnd() override;
+ bool satisfyRow(std::vector<std::string*> segments) const override;
+
+ private:
+ TagFilter* left_;
+ TagFilter* right_;
+};
+
+// Logical OR operation (binary)
+class TagOr : public TagFilter {
+ public:
+ TagOr(TagFilter* left, TagFilter* right);
+ ~TagOr() override;
+ bool satisfyRow(std::vector<std::string*> segments) const override;
+
+ private:
+ TagFilter* left_;
+ TagFilter* right_;
+};
+
+// Logical NOT operation
+class TagNot : public TagFilter {
+ public:
+ explicit TagNot(TagFilter* filter);
+ ~TagNot() override;
+ bool satisfyRow(std::vector<std::string*> segments) const override;
+
+ private:
+ TagFilter* filter_;
+};
+
+class TagFilterBuilder {
+ TableSchema* table_schema_;
+
+ public:
+ explicit TagFilterBuilder(TableSchema* schema);
+
+ Filter* eq(const std::string& columnName, const std::string& value);
+ Filter* neq(const std::string& columnName, const std::string& value);
+ Filter* lt(const std::string& columnName, const std::string& value);
+ Filter* lteq(const std::string& columnName, const std::string& value);
+ Filter* gt(const std::string& columnName, const std::string& value);
+ Filter* gteq(const std::string& columnName, const std::string& value);
+ Filter* reg_exp(const std::string& columnName, const std::string& value);
+ Filter* not_reg_exp(const std::string& columnName,
+ const std::string& value);
+ Filter* between_and(const std::string& columnName, const std::string&
lower,
+ const std::string& upper);
+ Filter* not_between_and(const std::string& columnName,
+ const std::string& lower, const std::string&
upper);
+
+ // Logical operations
+ static Filter* and_filter(Filter* left, Filter* right);
+ static Filter* or_filter(Filter* left, Filter* right);
+ static Filter* not_filter(Filter* filter);
+
+ private:
+ int get_id_column_index(const std::string& columnName);
+};
+
+} // namespace storage
+#endif // READER_FILTER_TAG_FILTER_H
\ No newline at end of file
diff --git a/cpp/src/reader/tsfile_reader.cc b/cpp/src/reader/tsfile_reader.cc
index 6da09430..2ac45d84 100644
--- a/cpp/src/reader/tsfile_reader.cc
+++ b/cpp/src/reader/tsfile_reader.cc
@@ -88,6 +88,14 @@ int TsFileReader::query(const std::string& table_name,
const std::vector<std::string>& columns_names,
int64_t start_time, int64_t end_time,
ResultSet*& result_set) {
+ return this->query(table_name, columns_names, start_time, end_time,
+ result_set, nullptr);
+}
+
+int TsFileReader::query(const std::string& table_name,
+ const std::vector<std::string>& columns_names,
+ int64_t start_time, int64_t end_time,
+ ResultSet*& result_set, Filter* tag_filter) {
int ret = E_OK;
TsFileMeta* tsfile_meta = tsfile_executor_->get_tsfile_meta();
if (tsfile_meta == nullptr) {
@@ -102,9 +110,9 @@ int TsFileReader::query(const std::string& table_name,
std::vector<TSDataType> data_types = table_schema->get_data_types();
Filter* time_filter = new TimeBetween(start_time, end_time, false);
- ret =
- table_query_executor_->query(to_lower(table_name), columns_names,
- time_filter, nullptr, nullptr,
result_set);
+ ret = table_query_executor_->query(to_lower(table_name), columns_names,
+ time_filter, tag_filter, nullptr,
+ result_set);
return ret;
}
diff --git a/cpp/src/reader/tsfile_reader.h b/cpp/src/reader/tsfile_reader.h
index eb6a7b70..55e5f2c4 100644
--- a/cpp/src/reader/tsfile_reader.h
+++ b/cpp/src/reader/tsfile_reader.h
@@ -96,6 +96,23 @@ class TsFileReader {
int query(const std::string &table_name,
const std::vector<std::string> &columns_names, int64_t
start_time,
int64_t end_time, ResultSet *&result_set);
+
+ /**
+ * @brief query the tsfile by the table name, columns names, start time
+ * and end time, tag filter. this method is used to query the tsfile by the
+ * table model.
+ *
+ * @param [in] table_name the table name
+ * @param [in] columns_names the columns names
+ * @param [in] start_time the start time
+ * @param [in] end_time the end time
+ * @param [in] tag_filter the tag filter
+ * @param [out] result_set the result set
+ */
+ int query(const std::string &table_name,
+ const std::vector<std::string> &columns_names, int64_t
start_time,
+ int64_t end_time, ResultSet *&result_set, Filter *tag_filter);
+
/**
* @brief destroy the result set, this method should be called after the
* query is finished and result_set
diff --git a/cpp/test/reader/filter/tag_filter_test.cc
b/cpp/test/reader/filter/tag_filter_test.cc
new file mode 100644
index 00000000..0274d242
--- /dev/null
+++ b/cpp/test/reader/filter/tag_filter_test.cc
@@ -0,0 +1,417 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * License); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License a
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "reader/filter/tag_filter.h"
+
+#include <gtest/gtest.h>
+
+#include "common/schema.h"
+
+using namespace storage;
+
+class TagFilterTest : public ::testing::Test {
+ protected:
+ void SetUp() override {
+ std::vector<common::ColumnSchema> column_schemas;
+ column_schemas.emplace_back("name", common::TSDataType::TEXT,
+ common::ColumnCategory::TAG);
+ column_schemas.emplace_back("age", common::TSDataType::TEXT,
+ common::ColumnCategory::TAG);
+ column_schemas.emplace_back("department", common::TSDataType::TEXT,
+ common::ColumnCategory::TAG);
+ column_schemas.emplace_back("status", common::TSDataType::TEXT,
+ common::ColumnCategory::TAG);
+ column_schemas.emplace_back("score", common::TSDataType::TEXT,
+ common::ColumnCategory::TAG);
+
+ schema_ = new TableSchema("test_table", column_schemas);
+ builder_ = new TagFilterBuilder(schema_);
+ }
+
+ void TearDown() override {
+ delete builder_;
+ delete schema_;
+ }
+
+ // Helper method to create segments starting from index 1
+ static std::vector<std::string*> createSegments(
+ const std::string& name, const std::string& age,
+ const std::string& department, const std::string& status = "",
+ const std::string& score = "") {
+ std::vector<std::string*> segments;
+ segments.emplace_back(nullptr); // index 0 - placeholder or device
name
+ segments.push_back(new std::string(name));
+ segments.push_back(new std::string(age));
+ segments.push_back(new std::string(department));
+ segments.push_back(new std::string(status));
+ segments.push_back(new std::string(score));
+ return segments;
+ }
+
+ // Helper method to cleanup segments
+ static void cleanupSegments(std::vector<std::string*>& segments) {
+ for (size_t i = 1; i < segments.size(); i++) {
+ delete segments[i];
+ }
+ }
+
+ TableSchema* schema_ = nullptr;
+ TagFilterBuilder* builder_ = nullptr;
+};
+
+// Equality filter
+TEST_F(TagFilterTest, TagEqFilter) {
+ auto filter = builder_->eq("name", "john");
+ ASSERT_NE(filter, nullptr);
+
+ auto segments = createSegments("john", "25", "engineering", "active",
"95");
+ EXPECT_TRUE(filter->satisfyRow(0, segments));
+ cleanupSegments(segments);
+
+ segments = createSegments("alice", "25", "engineering", "active", "95");
+ EXPECT_FALSE(filter->satisfyRow(0, segments));
+ cleanupSegments(segments);
+
+ delete filter;
+}
+
+// Inequality filter
+TEST_F(TagFilterTest, TagNeqFilter) {
+ auto filter = builder_->neq("name", "john");
+ ASSERT_NE(filter, nullptr);
+
+ auto segments =
+ createSegments("alice", "25", "engineering", "active", "95");
+ EXPECT_TRUE(filter->satisfyRow(0, segments));
+ cleanupSegments(segments);
+
+ segments = createSegments("john", "25", "engineering", "active", "95");
+ EXPECT_FALSE(filter->satisfyRow(0, segments));
+ cleanupSegments(segments);
+
+ delete filter;
+}
+
+// Less than filter
+TEST_F(TagFilterTest, TagLtFilter) {
+ auto filter = builder_->lt("age", "30");
+ ASSERT_NE(filter, nullptr);
+
+ auto segments = createSegments("john", "25", "engineering", "active",
"95");
+ EXPECT_TRUE(filter->satisfyRow(0, segments));
+ cleanupSegments(segments);
+
+ segments = createSegments("john", "35", "engineering", "active", "95");
+ EXPECT_FALSE(filter->satisfyRow(0, segments));
+ cleanupSegments(segments);
+
+ delete filter;
+}
+
+// Less than or equal filter
+TEST_F(TagFilterTest, TagLteqFilter) {
+ auto filter = builder_->lteq("age", "30");
+ ASSERT_NE(filter, nullptr);
+
+ auto segments = createSegments("john", "30", "engineering", "active",
"95");
+ EXPECT_TRUE(filter->satisfyRow(0, segments));
+ cleanupSegments(segments);
+
+ segments = createSegments("john", "25", "engineering", "active", "95");
+ EXPECT_TRUE(filter->satisfyRow(0, segments));
+ cleanupSegments(segments);
+
+ segments = createSegments("john", "35", "engineering", "active", "95");
+ EXPECT_FALSE(filter->satisfyRow(0, segments));
+ cleanupSegments(segments);
+
+ delete filter;
+}
+
+// Greater than filter
+TEST_F(TagFilterTest, TagGtFilter) {
+ auto filter = builder_->gt("age", "30");
+ ASSERT_NE(filter, nullptr);
+
+ auto segments = createSegments("john", "35", "engineering", "active",
"95");
+ EXPECT_TRUE(filter->satisfyRow(0, segments));
+ cleanupSegments(segments);
+
+ segments = createSegments("john", "25", "engineering", "active", "95");
+ EXPECT_FALSE(filter->satisfyRow(0, segments));
+ cleanupSegments(segments);
+
+ delete filter;
+}
+
+// Greater than or equal filter
+TEST_F(TagFilterTest, TagGteqFilter) {
+ auto filter = builder_->gteq("age", "30");
+ ASSERT_NE(filter, nullptr);
+
+ auto segments = createSegments("john", "30", "engineering", "active",
"95");
+ EXPECT_TRUE(filter->satisfyRow(0, segments));
+ cleanupSegments(segments);
+
+ segments = createSegments("john", "35", "engineering", "active", "95");
+ EXPECT_TRUE(filter->satisfyRow(0, segments));
+ cleanupSegments(segments);
+
+ segments = createSegments("john", "25", "engineering", "active", "95");
+ EXPECT_FALSE(filter->satisfyRow(0, segments));
+ cleanupSegments(segments);
+
+ delete filter;
+}
+
+// Between filter
+TEST_F(TagFilterTest, TagBetweenFilter) {
+ auto filter = builder_->between_and("age", "25", "35");
+ ASSERT_NE(filter, nullptr);
+
+ auto segments = createSegments("john", "25", "engineering", "active",
"95");
+ EXPECT_TRUE(filter->satisfyRow(0, segments));
+ cleanupSegments(segments);
+
+ segments = createSegments("john", "30", "engineering", "active", "95");
+ EXPECT_TRUE(filter->satisfyRow(0, segments));
+ cleanupSegments(segments);
+
+ segments = createSegments("john", "35", "engineering", "active", "95");
+ EXPECT_TRUE(filter->satisfyRow(0, segments));
+ cleanupSegments(segments);
+
+ segments = createSegments("john", "20", "engineering", "active", "95");
+ EXPECT_FALSE(filter->satisfyRow(0, segments));
+ cleanupSegments(segments);
+
+ segments = createSegments("john", "40", "engineering", "active", "95");
+ EXPECT_FALSE(filter->satisfyRow(0, segments));
+ cleanupSegments(segments);
+
+ delete filter;
+}
+
+// Not between filter
+TEST_F(TagFilterTest, TagNotBetweenFilter) {
+ auto filter = builder_->not_between_and("age", "25", "35");
+ ASSERT_NE(filter, nullptr);
+
+ auto segments = createSegments("john", "20", "engineering", "active",
"95");
+ EXPECT_TRUE(filter->satisfyRow(0, segments));
+ cleanupSegments(segments);
+
+ segments = createSegments("john", "40", "engineering", "active", "95");
+ EXPECT_TRUE(filter->satisfyRow(0, segments));
+ cleanupSegments(segments);
+
+ segments = createSegments("john", "25", "engineering", "active", "95");
+ EXPECT_FALSE(filter->satisfyRow(0, segments));
+ cleanupSegments(segments);
+
+ segments = createSegments("john", "30", "engineering", "active", "95");
+ EXPECT_FALSE(filter->satisfyRow(0, segments));
+ cleanupSegments(segments);
+
+ delete filter;
+}
+
+// AND filter
+TEST_F(TagFilterTest, TagAndFilter) {
+ auto left_filter = builder_->gteq("age", "25");
+ auto right_filter = builder_->eq("department", "engineering");
+ auto and_filter = builder_->and_filter(left_filter, right_filter);
+ ASSERT_NE(and_filter, nullptr);
+
+ auto segments = createSegments("john", "30", "engineering", "active",
"95");
+ EXPECT_TRUE(and_filter->satisfyRow(0, segments));
+ cleanupSegments(segments);
+
+ segments = createSegments("john", "20", "engineering", "active", "95");
+ EXPECT_FALSE(and_filter->satisfyRow(0, segments));
+ cleanupSegments(segments);
+
+ segments = createSegments("john", "30", "sales", "active", "95");
+ EXPECT_FALSE(and_filter->satisfyRow(0, segments));
+ cleanupSegments(segments);
+
+ delete and_filter;
+}
+
+// OR filter
+TEST_F(TagFilterTest, TagOrFilter) {
+ auto left_filter = builder_->lt("age", "25");
+ auto right_filter = builder_->eq("department", "engineering");
+ auto or_filter = builder_->or_filter(left_filter, right_filter);
+ ASSERT_NE(or_filter, nullptr);
+
+ auto segments = createSegments("john", "20", "engineering", "active",
"95");
+ EXPECT_TRUE(or_filter->satisfyRow(0, segments));
+ cleanupSegments(segments);
+
+ segments = createSegments("john", "20", "sales", "active", "95");
+ EXPECT_TRUE(or_filter->satisfyRow(0, segments));
+ cleanupSegments(segments);
+
+ segments = createSegments("john", "30", "engineering", "active", "95");
+ EXPECT_TRUE(or_filter->satisfyRow(0, segments));
+ cleanupSegments(segments);
+
+ segments = createSegments("john", "30", "sales", "active", "95");
+ EXPECT_FALSE(or_filter->satisfyRow(0, segments));
+ cleanupSegments(segments);
+
+ delete or_filter;
+}
+
+// NOT filter
+TEST_F(TagFilterTest, TagNotFilter) {
+ auto base_filter = builder_->eq("status", "active");
+ auto not_filter = builder_->not_filter(base_filter);
+ ASSERT_NE(not_filter, nullptr);
+
+ auto segments =
+ createSegments("john", "30", "engineering", "inactive", "95");
+ EXPECT_TRUE(not_filter->satisfyRow(0, segments));
+ cleanupSegments(segments);
+
+ segments = createSegments("john", "30", "engineering", "active", "95");
+ EXPECT_FALSE(not_filter->satisfyRow(0, segments));
+ cleanupSegments(segments);
+
+ delete not_filter;
+}
+
+// Complex nested filters
+TEST_F(TagFilterTest, ComplexNestedFilters) {
+ auto age_filter = builder_->gteq("age", "25");
+ auto dept_filter = builder_->eq("department", "engineering");
+ auto score_filter = builder_->gt("score", "90");
+
+ auto and_filter = builder_->and_filter(age_filter, dept_filter);
+ auto complex_filter = builder_->or_filter(and_filter, score_filter);
+ ASSERT_NE(complex_filter, nullptr);
+
+ auto segments = createSegments("john", "30", "engineering", "active",
"85");
+ EXPECT_TRUE(complex_filter->satisfyRow(0, segments));
+ cleanupSegments(segments);
+
+ segments = createSegments("alice", "20", "sales", "active", "95");
+ EXPECT_TRUE(complex_filter->satisfyRow(0, segments));
+ cleanupSegments(segments);
+
+ segments = createSegments("bob", "20", "sales", "active", "85");
+ EXPECT_FALSE(complex_filter->satisfyRow(0, segments));
+ cleanupSegments(segments);
+
+ delete complex_filter;
+}
+
+// Invalid column name
+TEST_F(TagFilterTest, InvalidColumnName) {
+ auto filter = builder_->eq("invalid_column", "value");
+ EXPECT_EQ(filter, nullptr);
+}
+
+// Boundary conditions
+TEST_F(TagFilterTest, BoundaryConditions) {
+ auto filter = builder_->eq("name", "test");
+ ASSERT_NE(filter, nullptr);
+
+ std::vector<std::string*> empty_segments;
+ EXPECT_FALSE(filter->satisfyRow(0, empty_segments));
+
+ std::vector<std::string*> small_segments = {nullptr};
+ EXPECT_FALSE(filter->satisfyRow(0, small_segments));
+
+ std::vector<std::string*> minimal_segments = {nullptr,
+ new std::string("test")};
+ EXPECT_TRUE(filter->satisfyRow(0, minimal_segments));
+ delete minimal_segments[1];
+
+ delete filter;
+}
+
+// Basic regex match and not match
+TEST_F(TagFilterTest, TagRegExpBasic) {
+ auto filter = builder_->reg_exp("name", "^j.*");
+ ASSERT_NE(filter, nullptr);
+
+ auto segments = createSegments("john", "25", "engineering", "active",
"95");
+ EXPECT_TRUE(filter->satisfyRow(0, segments));
+ cleanupSegments(segments);
+
+ segments = createSegments("alice", "25", "engineering", "active", "95");
+ EXPECT_FALSE(filter->satisfyRow(0, segments));
+ cleanupSegments(segments);
+
+ auto not_filter = builder_->not_reg_exp("name", "^j.*");
+ segments = createSegments("alice", "25", "engineering", "active", "95");
+ EXPECT_TRUE(not_filter->satisfyRow(0, segments));
+ cleanupSegments(segments);
+
+ delete filter;
+ delete not_filter;
+}
+
+// Complex regex pattern with logical operations
+TEST_F(TagFilterTest, TagRegExpComplexLogic) {
+ // Match names starting with j OR age in 20s AND department contains "eng"
+ auto name_filter = builder_->reg_exp("name", "^j.*");
+ auto age_filter = builder_->reg_exp("age", "^2[0-9]$");
+ auto dept_filter = builder_->reg_exp("department", ".*eng.*");
+
+ auto age_dept_and = builder_->and_filter(age_filter, dept_filter);
+ auto complex_or = builder_->or_filter(name_filter, age_dept_and);
+
+ auto segments = createSegments("john", "35", "sales", "active", "95");
+ EXPECT_TRUE(complex_or->satisfyRow(0, segments)); // name matches
+ cleanupSegments(segments);
+
+ segments = createSegments("alice", "25", "engineering", "active", "95");
+ EXPECT_TRUE(complex_or->satisfyRow(0, segments)); // age and dept match
+ cleanupSegments(segments);
+
+ segments = createSegments("bob", "35", "sales", "active", "95");
+ EXPECT_FALSE(complex_or->satisfyRow(0, segments)); // no match
+ cleanupSegments(segments);
+
+ delete complex_or;
+}
+
+// Edge cases: invalid regex and boundary values
+TEST_F(TagFilterTest, TagRegExpEdgeCases) {
+ // Invalid regex should not crash
+ auto invalid_filter = builder_->reg_exp("name", "[invalid[pattern");
+ ASSERT_NE(invalid_filter, nullptr);
+
+ auto segments = createSegments("test", "25", "engineering", "active",
"95");
+ EXPECT_FALSE(
+ invalid_filter->satisfyRow(0, segments)); // handles gracefully
+ cleanupSegments(segments);
+
+ // Empty pattern matches everything
+ auto empty_filter = builder_->reg_exp("name", "");
+ segments = createSegments("any", "25", "engineering", "active", "95");
+ EXPECT_TRUE(empty_filter->satisfyRow(0, segments));
+ cleanupSegments(segments);
+
+ delete invalid_filter;
+ delete empty_filter;
+}
\ No newline at end of file
diff --git a/cpp/test/reader/table_view/tsfile_reader_table_test.cc
b/cpp/test/reader/table_view/tsfile_reader_table_test.cc
index 02398c68..4386b183 100644
--- a/cpp/test/reader/table_view/tsfile_reader_table_test.cc
+++ b/cpp/test/reader/table_view/tsfile_reader_table_test.cc
@@ -25,6 +25,7 @@
#include "common/tablet.h"
#include "file/tsfile_io_writer.h"
#include "file/write_file.h"
+#include "reader/filter/tag_filter.h"
#include "reader/table_result_set.h"
#include "reader/tsfile_reader.h"
#include "writer/chunk_writer.h"
@@ -148,9 +149,11 @@ class TsFileTableReaderTest : public ::testing::Test {
ASSERT_EQ(ret, common::E_OK);
ResultSet* tmp_result_set = nullptr;
+ Filter* tag_filter =
+ TagFilterBuilder(table_schema).eq("id0", "device_id");
ret = reader.query(table_schema->get_table_name(),
table_schema->get_measurement_names(), 0, end_time,
- tmp_result_set);
+ tmp_result_set, tag_filter);
auto* table_result_set = (TableResultSet*)tmp_result_set;
char* literal = new char[std::strlen("device_id") + 1];
std::strcpy(literal, "device_id");
@@ -197,6 +200,7 @@ class TsFileTableReaderTest : public ::testing::Test {
delete[] literal;
ASSERT_EQ(reader.close(), common::E_OK);
delete table_schema;
+ delete tag_filter;
}
};