This is an automated email from the ASF dual-hosted git repository. haonan pushed a commit to branch rc/2.2.0 in repository https://gitbox.apache.org/repos/asf/tsfile.git
commit f20151ea954d06a7cd0bdd92bd1affbb5ac2e6bc Author: Hongzhi Gao <[email protected]> AuthorDate: Wed Nov 26 16:07:20 2025 +0800 Implement TagFilter functionality (#635) * "Implement TagFilter functionality with support for all operations except LikePattern" * add license * fix some issues * demo for TsFile-CPP TagFilter --- cpp/examples/cpp_examples/cpp_examples.h | 1 + cpp/examples/cpp_examples/demo_read.cpp | 7 +- cpp/src/reader/device_meta_iterator.cc | 10 +- cpp/src/reader/filter/filter.h | 5 + cpp/src/reader/filter/tag_filter.cc | 295 +++++++++++++++ cpp/src/reader/filter/tag_filter.h | 188 ++++++++++ cpp/src/reader/tsfile_reader.cc | 14 +- cpp/src/reader/tsfile_reader.h | 17 + cpp/test/reader/filter/tag_filter_test.cc | 417 +++++++++++++++++++++ .../reader/table_view/tsfile_reader_table_test.cc | 6 +- 10 files changed, 952 insertions(+), 8 deletions(-) diff --git a/cpp/examples/cpp_examples/cpp_examples.h b/cpp/examples/cpp_examples/cpp_examples.h index b0512e9a..5466fbe6 100644 --- a/cpp/examples/cpp_examples/cpp_examples.h +++ b/cpp/examples/cpp_examples/cpp_examples.h @@ -26,6 +26,7 @@ #include "file/write_file.h" #include "reader/expression.h" #include "reader/filter/filter.h" +#include "reader/filter/tag_filter.h" #include "reader/qds_with_timegenerator.h" #include "reader/qds_without_timegenerator.h" #include "reader/tsfile_reader.h" diff --git a/cpp/examples/cpp_examples/demo_read.cpp b/cpp/examples/cpp_examples/demo_read.cpp index efef0c9c..f90efe35 100644 --- a/cpp/examples/cpp_examples/demo_read.cpp +++ b/cpp/examples/cpp_examples/demo_read.cpp @@ -40,8 +40,12 @@ int demo_read() { columns.emplace_back("id2"); columns.emplace_back("s1"); + auto table_schema = reader.get_table_schema(table_name); + storage::Filter* tag_filter1 = storage::TagFilterBuilder(table_schema.get()).eq("id1", "id1_filed_1"); + storage::Filter* tag_filter2 = storage::TagFilterBuilder(table_schema.get()).eq("id2", "id1_filed_2"); + storage::Filter* tag_filter = storage::TagFilterBuilder(table_schema.get()).and_filter(tag_filter1, tag_filter2); // Column vector contains the columns you want to select. - HANDLE_ERROR(reader.query(table_name, columns, 0, 100, temp_ret)); + HANDLE_ERROR(reader.query(table_name, columns, 0, 100, temp_ret, tag_filter)); // Get query handler. auto ret = dynamic_cast<storage::TableResultSet*>(temp_ret); @@ -98,5 +102,6 @@ int demo_read() { // Close reader. reader.close(); + delete tag_filter; return 0; } diff --git a/cpp/src/reader/device_meta_iterator.cc b/cpp/src/reader/device_meta_iterator.cc index 4f47341c..a59f511d 100644 --- a/cpp/src/reader/device_meta_iterator.cc +++ b/cpp/src/reader/device_meta_iterator.cc @@ -19,6 +19,8 @@ #include "device_meta_iterator.h" +#include "filter/tag_filter.h" + namespace storage { bool DeviceMetaIterator::has_next() { if (!result_cache_.empty()) { @@ -74,9 +76,11 @@ int DeviceMetaIterator::load_leaf_device(MetaIndexNode* meta_index_node) { const auto& leaf_children = meta_index_node->children_; for (size_t i = 0; i < leaf_children.size(); i++) { std::shared_ptr<IMetaIndexEntry> child = leaf_children[i]; - // const auto& device_id = child->name_; - if (id_filter_ != nullptr /*TODO: !id_filter_->satisfy(device_id)*/) { - continue; + if (id_filter_ != nullptr) { + if (!id_filter_->satisfyRow( + 0, child->get_device_id()->get_segments())) { + continue; + } } int32_t start_offset = child->get_offset(); int32_t end_offset = i + 1 < leaf_children.size() diff --git a/cpp/src/reader/filter/filter.h b/cpp/src/reader/filter/filter.h index 1846df5a..f39dddba 100644 --- a/cpp/src/reader/filter/filter.h +++ b/cpp/src/reader/filter/filter.h @@ -54,6 +54,11 @@ class Filter { ASSERT(false); return false; } + virtual bool satisfyRow(int time, + std::vector<std::string*> segments) const { + ASSERT(false); + return false; + } virtual std::vector<TimeRange*>* get_time_ranges() { ASSERT(false); return nullptr; diff --git a/cpp/src/reader/filter/tag_filter.cc b/cpp/src/reader/filter/tag_filter.cc new file mode 100644 index 00000000..c4c56a1d --- /dev/null +++ b/cpp/src/reader/filter/tag_filter.cc @@ -0,0 +1,295 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "tag_filter.h" + +#include <algorithm> +#include <utility> + +namespace storage { + +// TagFilter base class implementation +TagFilter::TagFilter(int col_idx, std::string tag_value) + : col_idx_(col_idx), value_(std::move(tag_value)), value2_("") {} + +TagFilter::~TagFilter() = default; + +bool TagFilter::satisfyRow(int time, std::vector<std::string*> segments) const { + return satisfyRow(segments); +} + +bool TagFilter::satisfyRow(std::vector<std::string*> segments) const { + ASSERT(false); + return false; +} + +// TagEq implementation +TagEq::TagEq(int col_idx, std::string tag_value) + : TagFilter(col_idx, std::move(tag_value)) {} + +bool TagEq::satisfyRow(std::vector<std::string*> segments) const { + if (col_idx_ >= segments.size()) return false; + return *segments[col_idx_] == value_; +} + +// TagNeq implementation +TagNeq::TagNeq(int col_idx, std::string tag_value) + : TagFilter(col_idx, std::move(tag_value)) {} + +bool TagNeq::satisfyRow(std::vector<std::string*> segments) const { + if (col_idx_ >= segments.size()) return false; + return *segments[col_idx_] != value_; +} + +// TagLt implementation +TagLt::TagLt(int col_idx, std::string tag_value) + : TagFilter(col_idx, std::move(tag_value)) {} + +bool TagLt::satisfyRow(std::vector<std::string*> segments) const { + if (col_idx_ >= segments.size()) return false; + return *segments[col_idx_] < value_; +} + +// TagLteq implementation +TagLteq::TagLteq(int col_idx, std::string tag_value) + : TagFilter(col_idx, std::move(tag_value)) {} + +bool TagLteq::satisfyRow(std::vector<std::string*> segments) const { + if (col_idx_ >= segments.size()) return false; + return *segments[col_idx_] <= value_; +} + +// TagGt implementation +TagGt::TagGt(int col_idx, std::string tag_value) + : TagFilter(col_idx, std::move(tag_value)) {} + +bool TagGt::satisfyRow(std::vector<std::string*> segments) const { + if (col_idx_ >= segments.size()) return false; + return *segments[col_idx_] > value_; +} + +// TagGteq implementation +TagGteq::TagGteq(int col_idx, std::string tag_value) + : TagFilter(col_idx, std::move(tag_value)) {} + +bool TagGteq::satisfyRow(std::vector<std::string*> segments) const { + if (col_idx_ >= segments.size()) return false; + return *segments[col_idx_] >= value_; +} + +// TagRegExp implementation +TagRegExp::TagRegExp(int col_idx, std::string tag_value) + : TagFilter(col_idx, std::move(tag_value)) { + try { + pattern_ = std::regex(value_); + is_valid_pattern_ = true; + } catch (const std::regex_error& e) { + is_valid_pattern_ = false; + } +} + +bool TagRegExp::satisfyRow(std::vector<std::string*> segments) const { + if (col_idx_ >= segments.size() || !is_valid_pattern_) return false; + try { + return std::regex_search(*segments[col_idx_], pattern_); + } catch (const std::regex_error&) { + return false; + } +} + +// TagNotRegExp implementation +TagNotRegExp::TagNotRegExp(int col_idx, std::string tag_value) + : TagFilter(col_idx, std::move(tag_value)) { + try { + pattern_ = std::regex(value_); + is_valid_pattern_ = true; + } catch (const std::regex_error& e) { + is_valid_pattern_ = false; + } +} + +bool TagNotRegExp::satisfyRow(std::vector<std::string*> segments) const { + if (col_idx_ >= segments.size() || !is_valid_pattern_) return false; + try { + return !std::regex_search(*segments[col_idx_], pattern_); + } catch (const std::regex_error&) { + return true; + } +} + +// TagBetween implementation +TagBetween::TagBetween(int col_idx, std::string lower_value, + std::string upper_value) + : TagFilter(col_idx, std::move(lower_value)) { + value2_ = std::move(upper_value); +} + +bool TagBetween::satisfyRow(std::vector<std::string*> segments) const { + if (col_idx_ >= segments.size()) return false; + const std::string& segment_value = *segments[col_idx_]; + return segment_value >= value_ && segment_value <= value2_; +} + +// TagNotBetween implementation +TagNotBetween::TagNotBetween(int col_idx, std::string lower_value, + std::string upper_value) + : TagFilter(col_idx, std::move(lower_value)) { + value2_ = std::move(upper_value); +} + +bool TagNotBetween::satisfyRow(std::vector<std::string*> segments) const { + if (col_idx_ >= segments.size()) return false; + const std::string& segment_value = *segments[col_idx_]; + return segment_value < value_ || segment_value > value2_; +} + +// TagAnd implementation +TagAnd::TagAnd(TagFilter* left, TagFilter* right) + : TagFilter(-1, ""), left_(left), right_(right) {} + +TagAnd::~TagAnd() { + delete left_; + delete right_; +} + +bool TagAnd::satisfyRow(std::vector<std::string*> segments) const { + return left_->satisfyRow(segments) && right_->satisfyRow(segments); +} + +// TagOr implementation +TagOr::TagOr(TagFilter* left, TagFilter* right) + : TagFilter(-1, ""), left_(left), right_(right) {} + +TagOr::~TagOr() { + delete left_; + delete right_; +} + +bool TagOr::satisfyRow(std::vector<std::string*> segments) const { + return left_->satisfyRow(segments) || right_->satisfyRow(segments); +} + +// TagNot implementation +TagNot::TagNot(TagFilter* filter) : TagFilter(-1, ""), filter_(filter) {} + +TagNot::~TagNot() { delete filter_; } + +bool TagNot::satisfyRow(std::vector<std::string*> segments) const { + return !filter_->satisfyRow(segments); +} + +// TagFilterBuilder implementation +TagFilterBuilder::TagFilterBuilder(TableSchema* schema) + : table_schema_(schema) {} + +Filter* TagFilterBuilder::eq(const std::string& columnName, + const std::string& value) { + auto idx = get_id_column_index(columnName); + if (idx < 0) return nullptr; + return new TagEq(idx, value); +} + +Filter* TagFilterBuilder::neq(const std::string& columnName, + const std::string& value) { + auto idx = get_id_column_index(columnName); + if (idx < 0) return nullptr; + return new TagNeq(idx, value); +} + +Filter* TagFilterBuilder::lt(const std::string& columnName, + const std::string& value) { + auto idx = get_id_column_index(columnName); + if (idx < 0) return nullptr; + return new TagLt(idx, value); +} + +Filter* TagFilterBuilder::lteq(const std::string& columnName, + const std::string& value) { + auto idx = get_id_column_index(columnName); + if (idx < 0) return nullptr; + return new TagLteq(idx, value); +} + +Filter* TagFilterBuilder::gt(const std::string& columnName, + const std::string& value) { + auto idx = get_id_column_index(columnName); + if (idx < 0) return nullptr; + return new TagGt(idx, value); +} + +Filter* TagFilterBuilder::gteq(const std::string& columnName, + const std::string& value) { + auto idx = get_id_column_index(columnName); + if (idx < 0) return nullptr; + return new TagGteq(idx, value); +} + +Filter* TagFilterBuilder::reg_exp(const std::string& columnName, + const std::string& value) { + auto idx = get_id_column_index(columnName); + if (idx < 0) return nullptr; + return new TagRegExp(idx, value); +} + +Filter* TagFilterBuilder::not_reg_exp(const std::string& columnName, + const std::string& value) { + auto idx = get_id_column_index(columnName); + if (idx < 0) return nullptr; + return new TagNotRegExp(idx, value); +} + +Filter* TagFilterBuilder::between_and(const std::string& columnName, + const std::string& lower, + const std::string& upper) { + auto idx = get_id_column_index(columnName); + if (idx < 0) return nullptr; + return new TagBetween(idx, lower, upper); +} + +Filter* TagFilterBuilder::not_between_and(const std::string& columnName, + const std::string& lower, + const std::string& upper) { + auto idx = get_id_column_index(columnName); + if (idx < 0) return nullptr; + return new TagNotBetween(idx, lower, upper); +} + +Filter* TagFilterBuilder::and_filter(Filter* left, Filter* right) { + return new TagAnd(dynamic_cast<TagFilter*>(left), + dynamic_cast<TagFilter*>(right)); +} + +Filter* TagFilterBuilder::or_filter(Filter* left, Filter* right) { + return new TagOr(dynamic_cast<TagFilter*>(left), + dynamic_cast<TagFilter*>(right)); +} + +Filter* TagFilterBuilder::not_filter(Filter* filter) { + return new TagNot(dynamic_cast<TagFilter*>(filter)); +} + +int TagFilterBuilder::get_id_column_index(const std::string& columnName) { + int idColumnOrder = table_schema_->find_id_column_order(columnName); + if (idColumnOrder == -1) { + return -1; + } + return idColumnOrder + 1; +} + +} // namespace storage \ No newline at end of file diff --git a/cpp/src/reader/filter/tag_filter.h b/cpp/src/reader/filter/tag_filter.h new file mode 100644 index 00000000..c7d8843f --- /dev/null +++ b/cpp/src/reader/filter/tag_filter.h @@ -0,0 +1,188 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#ifndef READER_FILTER_TAG_FILTER_H +#define READER_FILTER_TAG_FILTER_H + +#include <memory> +#include <string> +#include <vector> + +#include "common/allocator/my_string.h" +#include "common/schema.h" +#include "reader/filter/filter.h" + +struct table_schema; +namespace storage { +class TagFilter : public Filter { + public: + TagFilter(int col_idx, std::string tag_value); + ~TagFilter() override; + + virtual bool satisfyRow(int time, std::vector<std::string*> segments) const; + virtual bool satisfyRow(std::vector<std::string*> segments) const; + + std::string value_; + std::string value2_; // For range queries + int col_idx_; +}; + +// Equality comparison +class TagEq : public TagFilter { + public: + TagEq(int col_idx, std::string tag_value); + bool satisfyRow(std::vector<std::string*> segments) const override; +}; + +// Inequality comparison +class TagNeq : public TagFilter { + public: + TagNeq(int col_idx, std::string tag_value); + bool satisfyRow(std::vector<std::string*> segments) const override; +}; + +// Less than comparison +class TagLt : public TagFilter { + public: + TagLt(int col_idx, std::string tag_value); + bool satisfyRow(std::vector<std::string*> segments) const override; +}; + +// Less than or equal comparison +class TagLteq : public TagFilter { + public: + TagLteq(int col_idx, std::string tag_value); + bool satisfyRow(std::vector<std::string*> segments) const override; +}; + +// Greater than comparison +class TagGt : public TagFilter { + public: + TagGt(int col_idx, std::string tag_value); + bool satisfyRow(std::vector<std::string*> segments) const override; +}; + +// Greater than or equal comparison +class TagGteq : public TagFilter { + public: + TagGteq(int col_idx, std::string tag_value); + bool satisfyRow(std::vector<std::string*> segments) const override; +}; + +// Regular expression match +class TagRegExp : public TagFilter { + std::regex pattern_; + bool is_valid_pattern_ = false; + + public: + TagRegExp(int col_idx, std::string tag_value); + bool satisfyRow(std::vector<std::string*> segments) const override; +}; + +// Regular expression not match +class TagNotRegExp : public TagFilter { + std::regex pattern_; + bool is_valid_pattern_ = false; + + public: + TagNotRegExp(int col_idx, std::string tag_value); + bool satisfyRow(std::vector<std::string*> segments) const override; +}; + +// Range query [value_, value2_] +class TagBetween : public TagFilter { + public: + TagBetween(int col_idx, std::string lower_value, std::string upper_value); + bool satisfyRow(std::vector<std::string*> segments) const override; +}; + +// Not in range +class TagNotBetween : public TagFilter { + public: + TagNotBetween(int col_idx, std::string lower_value, + std::string upper_value); + bool satisfyRow(std::vector<std::string*> segments) const override; +}; + +// Logical AND operation (binary) +class TagAnd : public TagFilter { + public: + TagAnd(TagFilter* left, TagFilter* right); + ~TagAnd() override; + bool satisfyRow(std::vector<std::string*> segments) const override; + + private: + TagFilter* left_; + TagFilter* right_; +}; + +// Logical OR operation (binary) +class TagOr : public TagFilter { + public: + TagOr(TagFilter* left, TagFilter* right); + ~TagOr() override; + bool satisfyRow(std::vector<std::string*> segments) const override; + + private: + TagFilter* left_; + TagFilter* right_; +}; + +// Logical NOT operation +class TagNot : public TagFilter { + public: + explicit TagNot(TagFilter* filter); + ~TagNot() override; + bool satisfyRow(std::vector<std::string*> segments) const override; + + private: + TagFilter* filter_; +}; + +class TagFilterBuilder { + TableSchema* table_schema_; + + public: + explicit TagFilterBuilder(TableSchema* schema); + + Filter* eq(const std::string& columnName, const std::string& value); + Filter* neq(const std::string& columnName, const std::string& value); + Filter* lt(const std::string& columnName, const std::string& value); + Filter* lteq(const std::string& columnName, const std::string& value); + Filter* gt(const std::string& columnName, const std::string& value); + Filter* gteq(const std::string& columnName, const std::string& value); + Filter* reg_exp(const std::string& columnName, const std::string& value); + Filter* not_reg_exp(const std::string& columnName, + const std::string& value); + Filter* between_and(const std::string& columnName, const std::string& lower, + const std::string& upper); + Filter* not_between_and(const std::string& columnName, + const std::string& lower, const std::string& upper); + + // Logical operations + static Filter* and_filter(Filter* left, Filter* right); + static Filter* or_filter(Filter* left, Filter* right); + static Filter* not_filter(Filter* filter); + + private: + int get_id_column_index(const std::string& columnName); +}; + +} // namespace storage +#endif // READER_FILTER_TAG_FILTER_H \ No newline at end of file diff --git a/cpp/src/reader/tsfile_reader.cc b/cpp/src/reader/tsfile_reader.cc index 5dba982f..8118052b 100644 --- a/cpp/src/reader/tsfile_reader.cc +++ b/cpp/src/reader/tsfile_reader.cc @@ -88,6 +88,14 @@ int TsFileReader::query(const std::string& table_name, const std::vector<std::string>& columns_names, int64_t start_time, int64_t end_time, ResultSet*& result_set) { + return this->query(table_name, columns_names, start_time, end_time, + result_set, nullptr); +} + +int TsFileReader::query(const std::string& table_name, + const std::vector<std::string>& columns_names, + int64_t start_time, int64_t end_time, + ResultSet*& result_set, Filter* tag_filter) { int ret = E_OK; TsFileMeta* tsfile_meta = tsfile_executor_->get_tsfile_meta(); if (tsfile_meta == nullptr) { @@ -102,9 +110,9 @@ int TsFileReader::query(const std::string& table_name, std::vector<TSDataType> data_types = table_schema->get_data_types(); Filter* time_filter = new TimeBetween(start_time, end_time, false); - ret = - table_query_executor_->query(to_lower(table_name), columns_names, - time_filter, nullptr, nullptr, result_set); + ret = table_query_executor_->query(to_lower(table_name), columns_names, + time_filter, tag_filter, nullptr, + result_set); return ret; } diff --git a/cpp/src/reader/tsfile_reader.h b/cpp/src/reader/tsfile_reader.h index dc5105c0..c4a615f1 100644 --- a/cpp/src/reader/tsfile_reader.h +++ b/cpp/src/reader/tsfile_reader.h @@ -96,6 +96,23 @@ class TsFileReader { int query(const std::string &table_name, const std::vector<std::string> &columns_names, int64_t start_time, int64_t end_time, ResultSet *&result_set); + + /** + * @brief query the tsfile by the table name, columns names, start time + * and end time, tag filter. this method is used to query the tsfile by the + * table model. + * + * @param [in] table_name the table name + * @param [in] columns_names the columns names + * @param [in] start_time the start time + * @param [in] end_time the end time + * @param [in] tag_filter the tag filter + * @param [out] result_set the result set + */ + int query(const std::string &table_name, + const std::vector<std::string> &columns_names, int64_t start_time, + int64_t end_time, ResultSet *&result_set, Filter *tag_filter); + /** * @brief destroy the result set, this method should be called after the * query is finished and result_set diff --git a/cpp/test/reader/filter/tag_filter_test.cc b/cpp/test/reader/filter/tag_filter_test.cc new file mode 100644 index 00000000..0274d242 --- /dev/null +++ b/cpp/test/reader/filter/tag_filter_test.cc @@ -0,0 +1,417 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License); you may not use this file except in compliance + * with the License. You may obtain a copy of the License a + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "reader/filter/tag_filter.h" + +#include <gtest/gtest.h> + +#include "common/schema.h" + +using namespace storage; + +class TagFilterTest : public ::testing::Test { + protected: + void SetUp() override { + std::vector<common::ColumnSchema> column_schemas; + column_schemas.emplace_back("name", common::TSDataType::TEXT, + common::ColumnCategory::TAG); + column_schemas.emplace_back("age", common::TSDataType::TEXT, + common::ColumnCategory::TAG); + column_schemas.emplace_back("department", common::TSDataType::TEXT, + common::ColumnCategory::TAG); + column_schemas.emplace_back("status", common::TSDataType::TEXT, + common::ColumnCategory::TAG); + column_schemas.emplace_back("score", common::TSDataType::TEXT, + common::ColumnCategory::TAG); + + schema_ = new TableSchema("test_table", column_schemas); + builder_ = new TagFilterBuilder(schema_); + } + + void TearDown() override { + delete builder_; + delete schema_; + } + + // Helper method to create segments starting from index 1 + static std::vector<std::string*> createSegments( + const std::string& name, const std::string& age, + const std::string& department, const std::string& status = "", + const std::string& score = "") { + std::vector<std::string*> segments; + segments.emplace_back(nullptr); // index 0 - placeholder or device name + segments.push_back(new std::string(name)); + segments.push_back(new std::string(age)); + segments.push_back(new std::string(department)); + segments.push_back(new std::string(status)); + segments.push_back(new std::string(score)); + return segments; + } + + // Helper method to cleanup segments + static void cleanupSegments(std::vector<std::string*>& segments) { + for (size_t i = 1; i < segments.size(); i++) { + delete segments[i]; + } + } + + TableSchema* schema_ = nullptr; + TagFilterBuilder* builder_ = nullptr; +}; + +// Equality filter +TEST_F(TagFilterTest, TagEqFilter) { + auto filter = builder_->eq("name", "john"); + ASSERT_NE(filter, nullptr); + + auto segments = createSegments("john", "25", "engineering", "active", "95"); + EXPECT_TRUE(filter->satisfyRow(0, segments)); + cleanupSegments(segments); + + segments = createSegments("alice", "25", "engineering", "active", "95"); + EXPECT_FALSE(filter->satisfyRow(0, segments)); + cleanupSegments(segments); + + delete filter; +} + +// Inequality filter +TEST_F(TagFilterTest, TagNeqFilter) { + auto filter = builder_->neq("name", "john"); + ASSERT_NE(filter, nullptr); + + auto segments = + createSegments("alice", "25", "engineering", "active", "95"); + EXPECT_TRUE(filter->satisfyRow(0, segments)); + cleanupSegments(segments); + + segments = createSegments("john", "25", "engineering", "active", "95"); + EXPECT_FALSE(filter->satisfyRow(0, segments)); + cleanupSegments(segments); + + delete filter; +} + +// Less than filter +TEST_F(TagFilterTest, TagLtFilter) { + auto filter = builder_->lt("age", "30"); + ASSERT_NE(filter, nullptr); + + auto segments = createSegments("john", "25", "engineering", "active", "95"); + EXPECT_TRUE(filter->satisfyRow(0, segments)); + cleanupSegments(segments); + + segments = createSegments("john", "35", "engineering", "active", "95"); + EXPECT_FALSE(filter->satisfyRow(0, segments)); + cleanupSegments(segments); + + delete filter; +} + +// Less than or equal filter +TEST_F(TagFilterTest, TagLteqFilter) { + auto filter = builder_->lteq("age", "30"); + ASSERT_NE(filter, nullptr); + + auto segments = createSegments("john", "30", "engineering", "active", "95"); + EXPECT_TRUE(filter->satisfyRow(0, segments)); + cleanupSegments(segments); + + segments = createSegments("john", "25", "engineering", "active", "95"); + EXPECT_TRUE(filter->satisfyRow(0, segments)); + cleanupSegments(segments); + + segments = createSegments("john", "35", "engineering", "active", "95"); + EXPECT_FALSE(filter->satisfyRow(0, segments)); + cleanupSegments(segments); + + delete filter; +} + +// Greater than filter +TEST_F(TagFilterTest, TagGtFilter) { + auto filter = builder_->gt("age", "30"); + ASSERT_NE(filter, nullptr); + + auto segments = createSegments("john", "35", "engineering", "active", "95"); + EXPECT_TRUE(filter->satisfyRow(0, segments)); + cleanupSegments(segments); + + segments = createSegments("john", "25", "engineering", "active", "95"); + EXPECT_FALSE(filter->satisfyRow(0, segments)); + cleanupSegments(segments); + + delete filter; +} + +// Greater than or equal filter +TEST_F(TagFilterTest, TagGteqFilter) { + auto filter = builder_->gteq("age", "30"); + ASSERT_NE(filter, nullptr); + + auto segments = createSegments("john", "30", "engineering", "active", "95"); + EXPECT_TRUE(filter->satisfyRow(0, segments)); + cleanupSegments(segments); + + segments = createSegments("john", "35", "engineering", "active", "95"); + EXPECT_TRUE(filter->satisfyRow(0, segments)); + cleanupSegments(segments); + + segments = createSegments("john", "25", "engineering", "active", "95"); + EXPECT_FALSE(filter->satisfyRow(0, segments)); + cleanupSegments(segments); + + delete filter; +} + +// Between filter +TEST_F(TagFilterTest, TagBetweenFilter) { + auto filter = builder_->between_and("age", "25", "35"); + ASSERT_NE(filter, nullptr); + + auto segments = createSegments("john", "25", "engineering", "active", "95"); + EXPECT_TRUE(filter->satisfyRow(0, segments)); + cleanupSegments(segments); + + segments = createSegments("john", "30", "engineering", "active", "95"); + EXPECT_TRUE(filter->satisfyRow(0, segments)); + cleanupSegments(segments); + + segments = createSegments("john", "35", "engineering", "active", "95"); + EXPECT_TRUE(filter->satisfyRow(0, segments)); + cleanupSegments(segments); + + segments = createSegments("john", "20", "engineering", "active", "95"); + EXPECT_FALSE(filter->satisfyRow(0, segments)); + cleanupSegments(segments); + + segments = createSegments("john", "40", "engineering", "active", "95"); + EXPECT_FALSE(filter->satisfyRow(0, segments)); + cleanupSegments(segments); + + delete filter; +} + +// Not between filter +TEST_F(TagFilterTest, TagNotBetweenFilter) { + auto filter = builder_->not_between_and("age", "25", "35"); + ASSERT_NE(filter, nullptr); + + auto segments = createSegments("john", "20", "engineering", "active", "95"); + EXPECT_TRUE(filter->satisfyRow(0, segments)); + cleanupSegments(segments); + + segments = createSegments("john", "40", "engineering", "active", "95"); + EXPECT_TRUE(filter->satisfyRow(0, segments)); + cleanupSegments(segments); + + segments = createSegments("john", "25", "engineering", "active", "95"); + EXPECT_FALSE(filter->satisfyRow(0, segments)); + cleanupSegments(segments); + + segments = createSegments("john", "30", "engineering", "active", "95"); + EXPECT_FALSE(filter->satisfyRow(0, segments)); + cleanupSegments(segments); + + delete filter; +} + +// AND filter +TEST_F(TagFilterTest, TagAndFilter) { + auto left_filter = builder_->gteq("age", "25"); + auto right_filter = builder_->eq("department", "engineering"); + auto and_filter = builder_->and_filter(left_filter, right_filter); + ASSERT_NE(and_filter, nullptr); + + auto segments = createSegments("john", "30", "engineering", "active", "95"); + EXPECT_TRUE(and_filter->satisfyRow(0, segments)); + cleanupSegments(segments); + + segments = createSegments("john", "20", "engineering", "active", "95"); + EXPECT_FALSE(and_filter->satisfyRow(0, segments)); + cleanupSegments(segments); + + segments = createSegments("john", "30", "sales", "active", "95"); + EXPECT_FALSE(and_filter->satisfyRow(0, segments)); + cleanupSegments(segments); + + delete and_filter; +} + +// OR filter +TEST_F(TagFilterTest, TagOrFilter) { + auto left_filter = builder_->lt("age", "25"); + auto right_filter = builder_->eq("department", "engineering"); + auto or_filter = builder_->or_filter(left_filter, right_filter); + ASSERT_NE(or_filter, nullptr); + + auto segments = createSegments("john", "20", "engineering", "active", "95"); + EXPECT_TRUE(or_filter->satisfyRow(0, segments)); + cleanupSegments(segments); + + segments = createSegments("john", "20", "sales", "active", "95"); + EXPECT_TRUE(or_filter->satisfyRow(0, segments)); + cleanupSegments(segments); + + segments = createSegments("john", "30", "engineering", "active", "95"); + EXPECT_TRUE(or_filter->satisfyRow(0, segments)); + cleanupSegments(segments); + + segments = createSegments("john", "30", "sales", "active", "95"); + EXPECT_FALSE(or_filter->satisfyRow(0, segments)); + cleanupSegments(segments); + + delete or_filter; +} + +// NOT filter +TEST_F(TagFilterTest, TagNotFilter) { + auto base_filter = builder_->eq("status", "active"); + auto not_filter = builder_->not_filter(base_filter); + ASSERT_NE(not_filter, nullptr); + + auto segments = + createSegments("john", "30", "engineering", "inactive", "95"); + EXPECT_TRUE(not_filter->satisfyRow(0, segments)); + cleanupSegments(segments); + + segments = createSegments("john", "30", "engineering", "active", "95"); + EXPECT_FALSE(not_filter->satisfyRow(0, segments)); + cleanupSegments(segments); + + delete not_filter; +} + +// Complex nested filters +TEST_F(TagFilterTest, ComplexNestedFilters) { + auto age_filter = builder_->gteq("age", "25"); + auto dept_filter = builder_->eq("department", "engineering"); + auto score_filter = builder_->gt("score", "90"); + + auto and_filter = builder_->and_filter(age_filter, dept_filter); + auto complex_filter = builder_->or_filter(and_filter, score_filter); + ASSERT_NE(complex_filter, nullptr); + + auto segments = createSegments("john", "30", "engineering", "active", "85"); + EXPECT_TRUE(complex_filter->satisfyRow(0, segments)); + cleanupSegments(segments); + + segments = createSegments("alice", "20", "sales", "active", "95"); + EXPECT_TRUE(complex_filter->satisfyRow(0, segments)); + cleanupSegments(segments); + + segments = createSegments("bob", "20", "sales", "active", "85"); + EXPECT_FALSE(complex_filter->satisfyRow(0, segments)); + cleanupSegments(segments); + + delete complex_filter; +} + +// Invalid column name +TEST_F(TagFilterTest, InvalidColumnName) { + auto filter = builder_->eq("invalid_column", "value"); + EXPECT_EQ(filter, nullptr); +} + +// Boundary conditions +TEST_F(TagFilterTest, BoundaryConditions) { + auto filter = builder_->eq("name", "test"); + ASSERT_NE(filter, nullptr); + + std::vector<std::string*> empty_segments; + EXPECT_FALSE(filter->satisfyRow(0, empty_segments)); + + std::vector<std::string*> small_segments = {nullptr}; + EXPECT_FALSE(filter->satisfyRow(0, small_segments)); + + std::vector<std::string*> minimal_segments = {nullptr, + new std::string("test")}; + EXPECT_TRUE(filter->satisfyRow(0, minimal_segments)); + delete minimal_segments[1]; + + delete filter; +} + +// Basic regex match and not match +TEST_F(TagFilterTest, TagRegExpBasic) { + auto filter = builder_->reg_exp("name", "^j.*"); + ASSERT_NE(filter, nullptr); + + auto segments = createSegments("john", "25", "engineering", "active", "95"); + EXPECT_TRUE(filter->satisfyRow(0, segments)); + cleanupSegments(segments); + + segments = createSegments("alice", "25", "engineering", "active", "95"); + EXPECT_FALSE(filter->satisfyRow(0, segments)); + cleanupSegments(segments); + + auto not_filter = builder_->not_reg_exp("name", "^j.*"); + segments = createSegments("alice", "25", "engineering", "active", "95"); + EXPECT_TRUE(not_filter->satisfyRow(0, segments)); + cleanupSegments(segments); + + delete filter; + delete not_filter; +} + +// Complex regex pattern with logical operations +TEST_F(TagFilterTest, TagRegExpComplexLogic) { + // Match names starting with j OR age in 20s AND department contains "eng" + auto name_filter = builder_->reg_exp("name", "^j.*"); + auto age_filter = builder_->reg_exp("age", "^2[0-9]$"); + auto dept_filter = builder_->reg_exp("department", ".*eng.*"); + + auto age_dept_and = builder_->and_filter(age_filter, dept_filter); + auto complex_or = builder_->or_filter(name_filter, age_dept_and); + + auto segments = createSegments("john", "35", "sales", "active", "95"); + EXPECT_TRUE(complex_or->satisfyRow(0, segments)); // name matches + cleanupSegments(segments); + + segments = createSegments("alice", "25", "engineering", "active", "95"); + EXPECT_TRUE(complex_or->satisfyRow(0, segments)); // age and dept match + cleanupSegments(segments); + + segments = createSegments("bob", "35", "sales", "active", "95"); + EXPECT_FALSE(complex_or->satisfyRow(0, segments)); // no match + cleanupSegments(segments); + + delete complex_or; +} + +// Edge cases: invalid regex and boundary values +TEST_F(TagFilterTest, TagRegExpEdgeCases) { + // Invalid regex should not crash + auto invalid_filter = builder_->reg_exp("name", "[invalid[pattern"); + ASSERT_NE(invalid_filter, nullptr); + + auto segments = createSegments("test", "25", "engineering", "active", "95"); + EXPECT_FALSE( + invalid_filter->satisfyRow(0, segments)); // handles gracefully + cleanupSegments(segments); + + // Empty pattern matches everything + auto empty_filter = builder_->reg_exp("name", ""); + segments = createSegments("any", "25", "engineering", "active", "95"); + EXPECT_TRUE(empty_filter->satisfyRow(0, segments)); + cleanupSegments(segments); + + delete invalid_filter; + delete empty_filter; +} \ No newline at end of file diff --git a/cpp/test/reader/table_view/tsfile_reader_table_test.cc b/cpp/test/reader/table_view/tsfile_reader_table_test.cc index 02398c68..4386b183 100644 --- a/cpp/test/reader/table_view/tsfile_reader_table_test.cc +++ b/cpp/test/reader/table_view/tsfile_reader_table_test.cc @@ -25,6 +25,7 @@ #include "common/tablet.h" #include "file/tsfile_io_writer.h" #include "file/write_file.h" +#include "reader/filter/tag_filter.h" #include "reader/table_result_set.h" #include "reader/tsfile_reader.h" #include "writer/chunk_writer.h" @@ -148,9 +149,11 @@ class TsFileTableReaderTest : public ::testing::Test { ASSERT_EQ(ret, common::E_OK); ResultSet* tmp_result_set = nullptr; + Filter* tag_filter = + TagFilterBuilder(table_schema).eq("id0", "device_id"); ret = reader.query(table_schema->get_table_name(), table_schema->get_measurement_names(), 0, end_time, - tmp_result_set); + tmp_result_set, tag_filter); auto* table_result_set = (TableResultSet*)tmp_result_set; char* literal = new char[std::strlen("device_id") + 1]; std::strcpy(literal, "device_id"); @@ -197,6 +200,7 @@ class TsFileTableReaderTest : public ::testing::Test { delete[] literal; ASSERT_EQ(reader.close(), common::E_OK); delete table_schema; + delete tag_filter; } };
