This is an automated email from the ASF dual-hosted git repository.
gangwu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-cpp.git
The following commit(s) were added to refs/heads/main by this push:
new 36f746c2 feat: add manifest evaluator (#403)
36f746c2 is described below
commit 36f746c2138462a417d77b9fa08ba68364c5b444
Author: dongxiao <[email protected]>
AuthorDate: Thu Dec 11 13:28:25 2025 +0800
feat: add manifest evaluator (#403)
---
src/iceberg/CMakeLists.txt | 1 +
src/iceberg/expression/manifest_evaluator.cc | 384 ++++++++++++++++++++++
src/iceberg/expression/manifest_evaluator.h | 82 +++++
src/iceberg/expression/meson.build | 1 +
src/iceberg/expression/term.h | 2 +
src/iceberg/meson.build | 1 +
src/iceberg/row/struct_like.cc | 2 +-
src/iceberg/row/struct_like.h | 4 +
src/iceberg/test/CMakeLists.txt | 1 +
src/iceberg/test/manifest_evaluator_test.cc | 462 +++++++++++++++++++++++++++
src/iceberg/test/meson.build | 1 +
11 files changed, 940 insertions(+), 1 deletion(-)
diff --git a/src/iceberg/CMakeLists.txt b/src/iceberg/CMakeLists.txt
index 7218d76e..4c63c0c4 100644
--- a/src/iceberg/CMakeLists.txt
+++ b/src/iceberg/CMakeLists.txt
@@ -27,6 +27,7 @@ set(ICEBERG_SOURCES
expression/expressions.cc
expression/inclusive_metrics_evaluator.cc
expression/literal.cc
+ expression/manifest_evaluator.cc
expression/predicate.cc
expression/residual_evaluator.cc
expression/rewrite_not.cc
diff --git a/src/iceberg/expression/manifest_evaluator.cc
b/src/iceberg/expression/manifest_evaluator.cc
new file mode 100644
index 00000000..845ecb6c
--- /dev/null
+++ b/src/iceberg/expression/manifest_evaluator.cc
@@ -0,0 +1,384 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "iceberg/expression/manifest_evaluator.h"
+
+#include "iceberg/expression/binder.h"
+#include "iceberg/expression/expression_visitor.h"
+#include "iceberg/expression/rewrite_not.h"
+#include "iceberg/manifest/manifest_list.h"
+#include "iceberg/row/struct_like.h"
+#include "iceberg/schema.h"
+#include "iceberg/util/macros.h"
+
+namespace iceberg {
+
+namespace {
+constexpr bool kRowsMightMatch = true;
+constexpr bool kRowCannotMatch = false;
+constexpr int32_t kInPredicateLimit = 200;
+} // namespace
+
+class ManifestEvalVisitor : public BoundVisitor<bool> {
+ public:
+ explicit ManifestEvalVisitor(const ManifestFile& manifest)
+ : stats_(manifest.partitions) {}
+
+ Result<bool> AlwaysTrue() override { return kRowsMightMatch; }
+
+ Result<bool> AlwaysFalse() override { return kRowCannotMatch; }
+
+ Result<bool> Not(bool child_result) override { return !child_result; }
+
+ Result<bool> And(bool left_result, bool right_result) override {
+ return left_result && right_result;
+ }
+
+ Result<bool> Or(bool left_result, bool right_result) override {
+ return left_result || right_result;
+ }
+
+ Result<bool> IsNull(const std::shared_ptr<Bound>& expr) override {
+ // no need to check whether the field is required because binding
evaluates that case
+ // if the column has no null values, the expression cannot match
+ const auto& ref = expr->reference();
+ ICEBERG_ASSIGN_OR_RAISE(auto pos, GetPosition(*ref));
+ if (!stats_.at(pos).contains_null) {
+ return kRowCannotMatch;
+ }
+
+ return kRowsMightMatch;
+ }
+
+ Result<bool> NotNull(const std::shared_ptr<Bound>& expr) override {
+ const auto& ref = expr->reference();
+ ICEBERG_ASSIGN_OR_RAISE(auto pos, GetPosition(*ref));
+ if (AllValuesAreNull(stats_.at(pos), ref->type()->type_id())) {
+ return kRowCannotMatch;
+ }
+
+ return kRowsMightMatch;
+ }
+
+ Result<bool> IsNaN(const std::shared_ptr<Bound>& expr) override {
+ const auto& ref = expr->reference();
+ ICEBERG_ASSIGN_OR_RAISE(auto pos, GetPosition(*ref));
+ if (stats_.at(pos).contains_nan.has_value() &&
!stats_.at(pos).contains_nan.value()) {
+ return kRowCannotMatch;
+ }
+ if (AllValuesAreNull(stats_.at(pos), ref->type()->type_id())) {
+ return kRowCannotMatch;
+ }
+
+ return kRowsMightMatch;
+ }
+
+ Result<bool> NotNaN(const std::shared_ptr<Bound>& expr) override {
+ const auto& ref = expr->reference();
+ ICEBERG_ASSIGN_OR_RAISE(auto pos, GetPosition(*ref));
+ const auto& summary = stats_.at(pos);
+ // if containsNaN is true, containsNull is false and lowerBound is null,
all values
+ // are NaN
+ if (summary.contains_nan.has_value() && summary.contains_nan.value() &&
+ !summary.contains_null && !summary.lower_bound.has_value()) {
+ return kRowCannotMatch;
+ }
+
+ return kRowsMightMatch;
+ }
+
+ Result<bool> Lt(const std::shared_ptr<Bound>& expr, const Literal& lit)
override {
+ const auto& ref = expr->reference();
+ ICEBERG_ASSIGN_OR_RAISE(auto pos, GetPosition(*ref));
+ const auto& summary = stats_.at(pos);
+ if (!summary.lower_bound.has_value()) {
+ return kRowCannotMatch; // values are all null
+ }
+ ICEBERG_ASSIGN_OR_RAISE(
+ auto lower, DeserializeBoundLiteral(summary.lower_bound.value(),
ref->type()));
+ if (lower >= lit) {
+ return kRowCannotMatch;
+ }
+ return kRowsMightMatch;
+ }
+
+ Result<bool> LtEq(const std::shared_ptr<Bound>& expr, const Literal& lit)
override {
+ const auto& ref = expr->reference();
+ ICEBERG_ASSIGN_OR_RAISE(auto pos, GetPosition(*ref));
+ const auto& summary = stats_.at(pos);
+ if (!summary.lower_bound.has_value()) {
+ return kRowCannotMatch; // values are all null
+ }
+ ICEBERG_ASSIGN_OR_RAISE(
+ auto lower, DeserializeBoundLiteral(summary.lower_bound.value(),
ref->type()));
+ if (lower > lit) {
+ return kRowCannotMatch;
+ }
+ return kRowsMightMatch;
+ }
+
+ Result<bool> Gt(const std::shared_ptr<Bound>& expr, const Literal& lit)
override {
+ const auto& ref = expr->reference();
+ ICEBERG_ASSIGN_OR_RAISE(auto pos, GetPosition(*ref));
+ const auto& summary = stats_.at(pos);
+ if (!summary.upper_bound.has_value()) {
+ return kRowCannotMatch; // values are all null
+ }
+ ICEBERG_ASSIGN_OR_RAISE(
+ auto upper, DeserializeBoundLiteral(summary.upper_bound.value(),
ref->type()));
+ if (upper <= lit) {
+ return kRowCannotMatch;
+ }
+ return kRowsMightMatch;
+ }
+
+ Result<bool> GtEq(const std::shared_ptr<Bound>& expr, const Literal& lit)
override {
+ const auto& ref = expr->reference();
+ ICEBERG_ASSIGN_OR_RAISE(auto pos, GetPosition(*ref));
+ const auto& summary = stats_.at(pos);
+ if (!summary.upper_bound.has_value()) {
+ return kRowCannotMatch; // values are all null
+ }
+ ICEBERG_ASSIGN_OR_RAISE(
+ auto upper,
+ DeserializeBoundLiteral(summary.upper_bound.value(),
expr->reference()->type()));
+ if (upper < lit) {
+ return kRowCannotMatch;
+ }
+ return kRowsMightMatch;
+ }
+
+ Result<bool> Eq(const std::shared_ptr<Bound>& expr, const Literal& lit)
override {
+ const auto& ref = expr->reference();
+ ICEBERG_ASSIGN_OR_RAISE(auto pos, GetPosition(*ref));
+ const auto& summary = stats_.at(pos);
+ if (!summary.lower_bound.has_value() || !summary.upper_bound.has_value()) {
+ return kRowCannotMatch; // values are all null and literal cannot
contain null
+ }
+ ICEBERG_ASSIGN_OR_RAISE(
+ auto lower, DeserializeBoundLiteral(summary.lower_bound.value(),
ref->type()));
+ if (lower > lit) {
+ return kRowCannotMatch;
+ }
+
+ ICEBERG_ASSIGN_OR_RAISE(
+ auto upper, DeserializeBoundLiteral(summary.upper_bound.value(),
ref->type()));
+ if (upper < lit) {
+ return kRowCannotMatch;
+ }
+
+ return kRowsMightMatch;
+ }
+
+ Result<bool> NotEq(const std::shared_ptr<Bound>& expr, const Literal& lit)
override {
+ // because the bounds are not necessarily a min or max value, this cannot
be answered
+ // using them. notEq(col, X) with (X, Y) doesn't guarantee that X is a
value in col.
+ return kRowsMightMatch;
+ }
+
+ Result<bool> In(const std::shared_ptr<Bound>& expr,
+ const BoundSetPredicate::LiteralSet& literal_set) override {
+ const auto& ref = expr->reference();
+ ICEBERG_ASSIGN_OR_RAISE(auto pos, GetPosition(*ref));
+ const auto& summary = stats_.at(pos);
+ if (!summary.lower_bound.has_value() || !summary.upper_bound.has_value()) {
+ // values are all null and literalSet cannot contain null.
+ return kRowCannotMatch;
+ }
+ if (literal_set.size() > kInPredicateLimit) {
+ // skip evaluating the predicate if the number of values is too big
+ return kRowsMightMatch;
+ }
+
+ ICEBERG_ASSIGN_OR_RAISE(
+ auto lower, DeserializeBoundLiteral(summary.lower_bound.value(),
ref->type()));
+ ICEBERG_ASSIGN_OR_RAISE(
+ auto upper, DeserializeBoundLiteral(summary.upper_bound.value(),
ref->type()));
+
+ if (std::ranges::all_of(literal_set, [&](const Literal& lit) {
+ return lit < lower || lit > upper;
+ })) {
+ // if all values are less than lower bound or greater than upper bound,
+ // rows cannot match.
+ return kRowCannotMatch;
+ }
+ return kRowsMightMatch;
+ }
+
+ Result<bool> NotIn(const std::shared_ptr<Bound>& expr,
+ const BoundSetPredicate::LiteralSet& literal_set)
override {
+ // because the bounds are not necessarily a min or max value, this cannot
be answered
+ // using them. notIn(col, {X, ...}) with (X, Y) doesn't guarantee that X
is a value in
+ // col.
+ return kRowsMightMatch;
+ }
+
+ Result<bool> StartsWith(const std::shared_ptr<Bound>& expr,
+ const Literal& lit) override {
+ const auto& ref = expr->reference();
+ ICEBERG_ASSIGN_OR_RAISE(auto pos, GetPosition(*ref));
+ const auto& summary = stats_.at(pos);
+ if (!summary.lower_bound.has_value() || !summary.upper_bound.has_value()) {
+ return kRowCannotMatch;
+ }
+ if (lit.type()->type_id() != TypeId::kString) {
+ return InvalidExpression("Invalid literal: not a string, cannot use
StartsWith");
+ }
+ const auto& prefix = std::get<std::string>(lit.value());
+ ICEBERG_ASSIGN_OR_RAISE(
+ auto lower, DeserializeBoundLiteral(summary.lower_bound.value(),
ref->type()));
+ ICEBERG_ASSIGN_OR_RAISE(
+ auto upper, DeserializeBoundLiteral(summary.upper_bound.value(),
ref->type()));
+ const auto& lower_bound = std::get<std::string>(lower.value());
+ const auto& upper_bound = std::get<std::string>(upper.value());
+ // truncate lower bound so that its length in bytes is not greater than
the length of
+ // prefix
+ size_t length = std::min(prefix.size(), lower_bound.size());
+ if (lower_bound.substr(0, length) > prefix) {
+ return kRowCannotMatch;
+ }
+ length = std::min(prefix.size(), upper_bound.size());
+ if (upper_bound.substr(0, length) < prefix) {
+ return kRowCannotMatch;
+ }
+ return kRowsMightMatch;
+ }
+
+ Result<bool> NotStartsWith(const std::shared_ptr<Bound>& expr,
+ const Literal& lit) override {
+ const auto& ref = expr->reference();
+ ICEBERG_ASSIGN_OR_RAISE(auto pos, GetPosition(*ref));
+ const auto& summary = stats_.at(pos);
+ if (summary.contains_null || !summary.lower_bound.has_value() ||
+ !summary.upper_bound.has_value()) {
+ return kRowsMightMatch;
+ }
+ if (lit.type()->type_id() != TypeId::kString) {
+ return InvalidExpression("Invalid literal: not a string, cannot use
notStartsWith");
+ }
+ // notStartsWith will match unless all values must start with the prefix.
This happens
+ // when the lower and upper bounds both start with the prefix.
+ const auto& prefix = std::get<std::string>(lit.value());
+ ICEBERG_ASSIGN_OR_RAISE(
+ auto lower, DeserializeBoundLiteral(summary.lower_bound.value(),
ref->type()));
+ ICEBERG_ASSIGN_OR_RAISE(
+ auto upper, DeserializeBoundLiteral(summary.upper_bound.value(),
ref->type()));
+ const auto& lower_bound = std::get<std::string>(lower.value());
+ const auto& upper_bound = std::get<std::string>(upper.value());
+
+ // if lower is shorter than the prefix, it can't start with the prefix
+ if (lower_bound.size() < prefix.size()) {
+ return kRowsMightMatch;
+ }
+ if (lower_bound.starts_with(prefix)) {
+ // the lower bound starts with the prefix; check the upper bound
+ // if upper is shorter than the prefix, it can't start with the prefix
+ if (upper_bound.size() < prefix.size()) {
+ return kRowsMightMatch;
+ }
+ // truncate upper bound so that its length in bytes is not greater than
the length
+ // of prefix
+ if (upper_bound.starts_with(prefix)) {
+ return kRowCannotMatch;
+ }
+ }
+ return kRowsMightMatch;
+ }
+
+ private:
+ Result<size_t> GetPosition(const BoundReference& ref) const {
+ const auto& accessor = ref.accessor();
+ const auto& position_path = accessor.position_path();
+ if (position_path.empty()) {
+ return InvalidArgument("Invalid accessor: empty position path.");
+ }
+ // nested accessors are not supported for partition fields
+ if (position_path.size() > 1) {
+ return InvalidArgument("Cannot convert nested accessor to position");
+ }
+ auto pos = position_path.at(0);
+ if (pos >= stats_.size()) {
+ return InvalidArgument("Position {} is out of partition field range {}",
pos,
+ stats_.size());
+ }
+ return pos;
+ }
+
+ bool AllValuesAreNull(const PartitionFieldSummary& summary, TypeId typeId) {
+ // containsNull encodes whether at least one partition value is null,
+ // lowerBound is null if all partition values are null
+ bool allNull = summary.contains_null && !summary.lower_bound.has_value();
+
+ if (allNull && (typeId == TypeId::kDouble || typeId == TypeId::kFloat)) {
+ // floating point types may include NaN values, which we check
separately.
+ // In case bounds don't include NaN value, containsNaN needs to be
checked against.
+ allNull = summary.contains_nan.has_value() &&
!summary.contains_nan.value();
+ }
+ return allNull;
+ }
+
+ Result<Literal> DeserializeBoundLiteral(const std::vector<uint8_t>& bound,
+ const std::shared_ptr<Type>& type)
const {
+ if (!type->is_primitive()) {
+ return NotSupported("Bounds of non-primitive partition fields are not
supported.");
+ }
+ return Literal::Deserialize(
+ bound, std::move(internal::checked_pointer_cast<PrimitiveType>(type)));
+ }
+
+ private:
+ const std::vector<PartitionFieldSummary>& stats_;
+};
+
+ManifestEvaluator::ManifestEvaluator(std::shared_ptr<Expression> expr)
+ : expr_(std::move(expr)) {}
+
+ManifestEvaluator::~ManifestEvaluator() = default;
+
+Result<std::unique_ptr<ManifestEvaluator>> ManifestEvaluator::MakeRowFilter(
+ [[maybe_unused]] std::shared_ptr<Expression> expr,
+ [[maybe_unused]] const std::shared_ptr<PartitionSpec>& spec,
+ [[maybe_unused]] const Schema& schema, [[maybe_unused]] bool
case_sensitive) {
+ // TODO(xiao.dong) we need a projection util to project row filter to the
partition col
+ return NotImplemented("ManifestEvaluator::MakeRowFilter");
+}
+
+Result<std::unique_ptr<ManifestEvaluator>>
ManifestEvaluator::MakePartitionFilter(
+ std::shared_ptr<Expression> expr, const std::shared_ptr<PartitionSpec>&
spec,
+ const Schema& schema, bool case_sensitive) {
+ ICEBERG_ASSIGN_OR_RAISE(auto partition_type, spec->PartitionType(schema));
+ auto field_span = partition_type->fields();
+ std::vector<SchemaField> fields(field_span.begin(), field_span.end());
+ auto partition_schema = std::make_shared<Schema>(fields);
+ ICEBERG_ASSIGN_OR_RAISE(auto rewrite_expr,
RewriteNot::Visit(std::move(expr)));
+ ICEBERG_ASSIGN_OR_RAISE(auto partition_expr,
+ Binder::Bind(*partition_schema, rewrite_expr,
case_sensitive));
+ return std::unique_ptr<ManifestEvaluator>(
+ new ManifestEvaluator(std::move(partition_expr)));
+}
+
+Result<bool> ManifestEvaluator::Evaluate(const ManifestFile& manifest) const {
+ if (manifest.partitions.empty()) {
+ return kRowsMightMatch;
+ }
+ ManifestEvalVisitor visitor(manifest);
+ return Visit<bool, ManifestEvalVisitor>(expr_, visitor);
+}
+
+} // namespace iceberg
diff --git a/src/iceberg/expression/manifest_evaluator.h
b/src/iceberg/expression/manifest_evaluator.h
new file mode 100644
index 00000000..ddc743dc
--- /dev/null
+++ b/src/iceberg/expression/manifest_evaluator.h
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+/// \file iceberg/expression/manifest_evaluator.h
+///
+/// Evaluates an Expression on a ManifestFile to test whether the file contains
+/// matching partitions.
+///
+/// For row expressions, evaluation is inclusive: it returns true if a file
+/// may match and false if it cannot match.
+///
+/// Files are passed to #eval(ManifestFile), which returns true if the
manifest may
+/// contain data files that match the partition expression. Manifest files may
be
+/// skipped if and only if the return value of eval is false.
+///
+
+#include <memory>
+
+#include "iceberg/iceberg_export.h"
+#include "iceberg/result.h"
+#include "iceberg/type_fwd.h"
+
+namespace iceberg {
+
+/// \brief Evaluates an Expression against manifest.
+/// \note: The evaluator is thread-safe.
+class ICEBERG_EXPORT ManifestEvaluator {
+ public:
+ /// \brief Make a manifest evaluator for RowFilter
+ ///
+ /// \param expr The expression to evaluate
+ /// \param spec The partition spec
+ /// \param schema The schema of the table
+ /// \param case_sensitive Whether field name matching is case-sensitive
+ static Result<std::unique_ptr<ManifestEvaluator>> MakeRowFilter(
+ std::shared_ptr<Expression> expr, const std::shared_ptr<PartitionSpec>&
spec,
+ const Schema& schema, bool case_sensitive = true);
+
+ /// \brief Make a manifest evaluator for PartitionFilter
+ ///
+ /// \param expr The expression to evaluate
+ /// \param spec The partition spec
+ /// \param schema The schema of the table
+ /// \param case_sensitive Whether field name matching is case-sensitive
+ static Result<std::unique_ptr<ManifestEvaluator>> MakePartitionFilter(
+ std::shared_ptr<Expression> expr, const std::shared_ptr<PartitionSpec>&
spec,
+ const Schema& schema, bool case_sensitive = true);
+
+ ~ManifestEvaluator();
+
+ /// \brief Evaluate the expression against a manifest.
+ ///
+ /// \param manifest The manifest to evaluate
+ /// \return true if the row matches the expression, false otherwise, or error
+ Result<bool> Evaluate(const ManifestFile& manifest) const;
+
+ private:
+ explicit ManifestEvaluator(std::shared_ptr<Expression> expr);
+
+ private:
+ std::shared_ptr<Expression> expr_;
+};
+
+} // namespace iceberg
diff --git a/src/iceberg/expression/meson.build
b/src/iceberg/expression/meson.build
index f3b74848..fbb07267 100644
--- a/src/iceberg/expression/meson.build
+++ b/src/iceberg/expression/meson.build
@@ -25,6 +25,7 @@ install_headers(
'expressions.h',
'inclusive_metrics_evaluator.h',
'literal.h',
+ 'manifest_evaluator.h',
'predicate.h',
'residual_evaluator.h',
'rewrite_not.h',
diff --git a/src/iceberg/expression/term.h b/src/iceberg/expression/term.h
index 616f11da..5e834af5 100644
--- a/src/iceberg/expression/term.h
+++ b/src/iceberg/expression/term.h
@@ -157,6 +157,8 @@ class ICEBERG_EXPORT BoundReference
Kind kind() const override { return Kind::kReference; }
+ const StructLikeAccessor& accessor() const { return *accessor_; }
+
private:
BoundReference(SchemaField field, std::unique_ptr<StructLikeAccessor>
accessor);
diff --git a/src/iceberg/meson.build b/src/iceberg/meson.build
index c10c5a82..d70eae25 100644
--- a/src/iceberg/meson.build
+++ b/src/iceberg/meson.build
@@ -49,6 +49,7 @@ iceberg_sources = files(
'expression/expressions.cc',
'expression/inclusive_metrics_evaluator.cc',
'expression/literal.cc',
+ 'expression/manifest_evaluator.cc',
'expression/predicate.cc',
'expression/residual_evaluator.cc',
'expression/rewrite_not.cc',
diff --git a/src/iceberg/row/struct_like.cc b/src/iceberg/row/struct_like.cc
index 85bde1a6..24e61644 100644
--- a/src/iceberg/row/struct_like.cc
+++ b/src/iceberg/row/struct_like.cc
@@ -70,7 +70,7 @@ Result<Scalar> LiteralToScalar(const Literal& literal) {
StructLikeAccessor::StructLikeAccessor(std::shared_ptr<Type> type,
std::span<const size_t> position_path)
- : type_(std::move(type)) {
+ : type_(std::move(type)), position_path_(position_path.begin(),
position_path.end()) {
if (position_path.size() == 1) {
accessor_ = [pos =
position_path[0]](const StructLike& struct_like) ->
Result<Scalar> {
diff --git a/src/iceberg/row/struct_like.h b/src/iceberg/row/struct_like.h
index 36ff5d86..7b4bdc3b 100644
--- a/src/iceberg/row/struct_like.h
+++ b/src/iceberg/row/struct_like.h
@@ -121,9 +121,13 @@ class ICEBERG_EXPORT StructLikeAccessor {
/// \brief Get the type of the value that this accessor is bound to.
const Type& type() const { return *type_; }
+ /// \brief Get the position path of the value that this accessor bounded to.
+ const std::vector<size_t>& position_path() const { return position_path_; }
+
private:
std::shared_ptr<Type> type_;
std::function<Result<Scalar>(const StructLike&)> accessor_;
+ std::vector<size_t> position_path_;
};
} // namespace iceberg
diff --git a/src/iceberg/test/CMakeLists.txt b/src/iceberg/test/CMakeLists.txt
index f9cfb848..7d943484 100644
--- a/src/iceberg/test/CMakeLists.txt
+++ b/src/iceberg/test/CMakeLists.txt
@@ -85,6 +85,7 @@ add_iceberg_test(expression_test
expression_test.cc
expression_visitor_test.cc
literal_test.cc
+ manifest_evaluator_test.cc
inclusive_metrics_evaluator_test.cc
inclusive_metrics_evaluator_with_transform_test.cc
predicate_test.cc
diff --git a/src/iceberg/test/manifest_evaluator_test.cc
b/src/iceberg/test/manifest_evaluator_test.cc
new file mode 100644
index 00000000..4562e74e
--- /dev/null
+++ b/src/iceberg/test/manifest_evaluator_test.cc
@@ -0,0 +1,462 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "iceberg/expression/manifest_evaluator.h"
+
+#include <optional>
+#include <utility>
+#include <vector>
+
+#include <gtest/gtest.h>
+
+#include "iceberg/expression/expressions.h"
+#include "iceberg/manifest/manifest_list.h"
+#include "iceberg/partition_spec.h"
+#include "iceberg/schema.h"
+#include "iceberg/test/matchers.h"
+#include "iceberg/transform.h"
+#include "iceberg/type.h"
+
+namespace iceberg {
+
+class ManifestEvaluatorTest : public ::testing::Test {
+ protected:
+ static constexpr int32_t kIntMinValue = 30;
+ static constexpr int32_t kIntMaxValue = 79;
+
+ void SetUp() override {
+ schema_ = std::make_shared<Schema>(
+ std::vector<SchemaField>{
+ SchemaField::MakeRequired(1, "id", int32()),
+ SchemaField::MakeOptional(4, "all_nulls_missing_nan", string()),
+ SchemaField::MakeOptional(5, "some_nulls", string()),
+ SchemaField::MakeOptional(6, "no_nulls", string()),
+ SchemaField::MakeOptional(7, "float", float32()),
+ SchemaField::MakeOptional(8, "all_nulls_double", float64()),
+ SchemaField::MakeOptional(9, "all_nulls_no_nans", float32()),
+ SchemaField::MakeOptional(10, "all_nans", float64()),
+ SchemaField::MakeOptional(11, "both_nan_and_null", float32()),
+ SchemaField::MakeOptional(12, "no_nan_or_null", float64()),
+ SchemaField::MakeOptional(13, "all_nulls_missing_nan_float",
float32()),
+ SchemaField::MakeOptional(14, "all_same_value_or_null", string()),
+ SchemaField::MakeOptional(15, "no_nulls_same_value_a", string()),
+ },
+ 0);
+
+ ICEBERG_UNWRAP_OR_FAIL(auto spec_result,
+ PartitionSpec::Make(/*spec_id=*/0,
BuildIdentityFields()));
+ spec_ = std::shared_ptr<PartitionSpec>(std::move(spec_result));
+
+ file_ = BuildManifestFile();
+ no_stats_.manifest_path = "no-stats.avro";
+ no_stats_.partition_spec_id = 0;
+ }
+
+ std::vector<PartitionField> BuildIdentityFields() {
+ std::vector<PartitionField> fields;
+ int32_t partition_field_id = PartitionSpec::kLegacyPartitionDataIdStart;
+ auto add_field = [&](int32_t source_id, std::string name) {
+ fields.emplace_back(source_id, partition_field_id++, std::move(name),
+ Transform::Identity());
+ };
+ add_field(1, "id");
+ add_field(4, "all_nulls_missing_nan");
+ add_field(5, "some_nulls");
+ add_field(6, "no_nulls");
+ add_field(7, "float");
+ add_field(8, "all_nulls_double");
+ add_field(9, "all_nulls_no_nans");
+ add_field(10, "all_nans");
+ add_field(11, "both_nan_and_null");
+ add_field(12, "no_nan_or_null");
+ add_field(13, "all_nulls_missing_nan_float");
+ add_field(14, "all_same_value_or_null");
+ add_field(15, "no_nulls_same_value_a");
+ return fields;
+ }
+
+ PartitionFieldSummary MakeSummary(bool contains_null, std::optional<bool>
contains_nan,
+ std::optional<Literal> lower =
std::nullopt,
+ std::optional<Literal> upper =
std::nullopt) {
+ PartitionFieldSummary summary;
+ summary.contains_null = contains_null;
+ summary.contains_nan = contains_nan;
+ if (lower.has_value()) {
+ summary.lower_bound = lower->Serialize().value();
+ }
+ if (upper.has_value()) {
+ summary.upper_bound = upper->Serialize().value();
+ }
+ return summary;
+ }
+
+ ManifestFile BuildManifestFile() {
+ ManifestFile manifest;
+ manifest.manifest_path = "manifest-list.avro";
+ manifest.manifest_length = 1024;
+ manifest.partition_spec_id = 0;
+ manifest.partitions = {
+ MakeSummary(/*contains_null=*/false, std::nullopt,
Literal::Int(kIntMinValue),
+ Literal::Int(kIntMaxValue)),
+ MakeSummary(/*contains_null=*/true, std::nullopt, std::nullopt,
std::nullopt),
+ MakeSummary(/*contains_null=*/true, std::nullopt, Literal::String("a"),
+ Literal::String("z")),
+ MakeSummary(/*contains_null=*/false, std::nullopt,
Literal::String("a"),
+ Literal::String("z")),
+ MakeSummary(/*contains_null=*/false, std::nullopt,
Literal::Float(0.0F),
+ Literal::Float(20.0F)),
+ MakeSummary(/*contains_null=*/true, std::nullopt, std::nullopt,
std::nullopt),
+ MakeSummary(/*contains_null=*/true, /*contains_nan=*/false,
std::nullopt,
+ std::nullopt),
+ MakeSummary(/*contains_null=*/false, /*contains_nan=*/true,
std::nullopt,
+ std::nullopt),
+ MakeSummary(/*contains_null=*/true, /*contains_nan=*/true,
std::nullopt,
+ std::nullopt),
+ MakeSummary(/*contains_null=*/false, /*contains_nan=*/false,
Literal::Double(0.0),
+ Literal::Double(20.0)),
+ MakeSummary(/*contains_null=*/true, std::nullopt, std::nullopt,
std::nullopt),
+ MakeSummary(/*contains_null=*/true, std::nullopt, Literal::String("a"),
+ Literal::String("a")),
+ MakeSummary(/*contains_null=*/false, std::nullopt,
Literal::String("a"),
+ Literal::String("a")),
+ };
+ return manifest;
+ }
+
+ void ExpectEval(const std::shared_ptr<Expression>& expr, bool expected,
+ const ManifestFile& manifest, bool case_sensitive = true) {
+ ICEBERG_UNWRAP_OR_FAIL(auto evaluator,
ManifestEvaluator::MakePartitionFilter(
+ expr, spec_, *schema_,
case_sensitive));
+ auto result = evaluator->Evaluate(manifest);
+ ASSERT_TRUE(result.has_value());
+ ASSERT_EQ(expected, result.value()) << expr->ToString();
+ }
+
+ void ExpectEval(const std::shared_ptr<Expression>& expr, bool expected,
+ bool case_sensitive = true) {
+ ExpectEval(expr, expected, file_, case_sensitive);
+ }
+
+ std::shared_ptr<Schema> schema_;
+ std::shared_ptr<PartitionSpec> spec_;
+ ManifestFile file_;
+ ManifestFile no_stats_;
+};
+
+TEST_F(ManifestEvaluatorTest, AllNulls) {
+ ExpectEval(Expressions::NotNull("all_nulls_missing_nan"), false);
+ ExpectEval(Expressions::NotNull("all_nulls_missing_nan_float"), true);
+ ExpectEval(Expressions::NotNull("some_nulls"), true);
+ ExpectEval(Expressions::NotNull("no_nulls"), true);
+ ExpectEval(Expressions::StartsWith("all_nulls_missing_nan", "asad"), false);
+ ExpectEval(Expressions::NotStartsWith("all_nulls_missing_nan", "asad"),
true);
+}
+
+TEST_F(ManifestEvaluatorTest, NoNulls) {
+ ExpectEval(Expressions::IsNull("all_nulls_missing_nan"), true);
+ ExpectEval(Expressions::IsNull("some_nulls"), true);
+ ExpectEval(Expressions::IsNull("no_nulls"), false);
+ ExpectEval(Expressions::IsNull("both_nan_and_null"), true);
+}
+
+TEST_F(ManifestEvaluatorTest, IsNaN) {
+ ExpectEval(Expressions::IsNaN("float"), true);
+ ExpectEval(Expressions::IsNaN("all_nulls_double"), true);
+ ExpectEval(Expressions::IsNaN("all_nulls_missing_nan_float"), true);
+ ExpectEval(Expressions::IsNaN("all_nulls_no_nans"), false);
+ ExpectEval(Expressions::IsNaN("all_nans"), true);
+ ExpectEval(Expressions::IsNaN("both_nan_and_null"), true);
+ ExpectEval(Expressions::IsNaN("no_nan_or_null"), false);
+}
+
+TEST_F(ManifestEvaluatorTest, NotNaN) {
+ ExpectEval(Expressions::NotNaN("float"), true);
+ ExpectEval(Expressions::NotNaN("all_nulls_double"), true);
+ ExpectEval(Expressions::NotNaN("all_nulls_no_nans"), true);
+ ExpectEval(Expressions::NotNaN("all_nans"), false);
+ ExpectEval(Expressions::NotNaN("both_nan_and_null"), true);
+ ExpectEval(Expressions::NotNaN("no_nan_or_null"), true);
+}
+
+TEST_F(ManifestEvaluatorTest, MissingColumn) {
+ auto expr = Expressions::LessThan("missing", Literal::Int(5));
+ auto evaluator = ManifestEvaluator::MakePartitionFilter(expr, spec_,
*schema_, true);
+ ASSERT_FALSE(evaluator.has_value());
+ ASSERT_TRUE(evaluator.error().message.contains("Cannot find field
'missing'"))
+ << evaluator.error().message;
+}
+
+TEST_F(ManifestEvaluatorTest, MissingStats) {
+ std::vector<std::shared_ptr<Expression>> expressions = {
+ Expressions::LessThan("id", Literal::Int(5)),
+ Expressions::LessThanOrEqual("id", Literal::Int(30)),
+ Expressions::Equal("id", Literal::Int(70)),
+ Expressions::GreaterThan("id", Literal::Int(78)),
+ Expressions::GreaterThanOrEqual("id", Literal::Int(90)),
+ Expressions::NotEqual("id", Literal::Int(101)),
+ Expressions::IsNull("id"),
+ Expressions::NotNull("id"),
+ Expressions::StartsWith("all_nulls_missing_nan", "a"),
+ Expressions::IsNaN("float"),
+ Expressions::NotNaN("float"),
+ Expressions::NotStartsWith("all_nulls_missing_nan", "a"),
+ };
+
+ for (const auto& expr : expressions) {
+ ExpectEval(expr, true, no_stats_);
+ }
+}
+
+TEST_F(ManifestEvaluatorTest, Not) {
+ ExpectEval(
+ Expressions::Not(Expressions::LessThan("id", Literal::Int(kIntMinValue -
25))),
+ true);
+ ExpectEval(
+ Expressions::Not(Expressions::GreaterThan("id",
Literal::Int(kIntMinValue - 25))),
+ false);
+}
+
+TEST_F(ManifestEvaluatorTest, And) {
+ ExpectEval(Expressions::And(
+ Expressions::LessThan("id", Literal::Int(kIntMinValue - 25)),
+ Expressions::GreaterThanOrEqual("id",
Literal::Int(kIntMinValue - 30))),
+ false);
+ ExpectEval(Expressions::And(
+ Expressions::LessThan("id", Literal::Int(kIntMinValue - 25)),
+ Expressions::GreaterThanOrEqual("id",
Literal::Int(kIntMaxValue + 1))),
+ false);
+ ExpectEval(
+ Expressions::And(Expressions::GreaterThan("id",
Literal::Int(kIntMinValue - 25)),
+ Expressions::LessThanOrEqual("id",
Literal::Int(kIntMinValue))),
+ true);
+}
+
+TEST_F(ManifestEvaluatorTest, Or) {
+ ExpectEval(Expressions::Or(
+ Expressions::LessThan("id", Literal::Int(kIntMinValue - 25)),
+ Expressions::GreaterThanOrEqual("id",
Literal::Int(kIntMaxValue + 1))),
+ false);
+ ExpectEval(Expressions::Or(
+ Expressions::LessThan("id", Literal::Int(kIntMinValue - 25)),
+ Expressions::GreaterThanOrEqual("id",
Literal::Int(kIntMaxValue - 19))),
+ true);
+}
+
+TEST_F(ManifestEvaluatorTest, IntegerLt) {
+ ExpectEval(Expressions::LessThan("id", Literal::Int(kIntMinValue - 25)),
false);
+ ExpectEval(Expressions::LessThan("id", Literal::Int(kIntMinValue)), false);
+ ExpectEval(Expressions::LessThan("id", Literal::Int(kIntMinValue + 1)),
true);
+ ExpectEval(Expressions::LessThan("id", Literal::Int(kIntMaxValue)), true);
+}
+
+TEST_F(ManifestEvaluatorTest, IntegerLtEq) {
+ ExpectEval(Expressions::LessThanOrEqual("id", Literal::Int(kIntMinValue -
25)), false);
+ ExpectEval(Expressions::LessThanOrEqual("id", Literal::Int(kIntMinValue -
1)), false);
+ ExpectEval(Expressions::LessThanOrEqual("id", Literal::Int(kIntMinValue)),
true);
+ ExpectEval(Expressions::LessThanOrEqual("id", Literal::Int(kIntMaxValue)),
true);
+}
+
+TEST_F(ManifestEvaluatorTest, IntegerGt) {
+ ExpectEval(Expressions::GreaterThan("id", Literal::Int(kIntMaxValue + 6)),
false);
+ ExpectEval(Expressions::GreaterThan("id", Literal::Int(kIntMaxValue)),
false);
+ ExpectEval(Expressions::GreaterThan("id", Literal::Int(kIntMaxValue - 1)),
true);
+ ExpectEval(Expressions::GreaterThan("id", Literal::Int(kIntMaxValue - 4)),
true);
+}
+
+TEST_F(ManifestEvaluatorTest, IntegerGtEq) {
+ ExpectEval(Expressions::GreaterThanOrEqual("id", Literal::Int(kIntMaxValue +
6)),
+ false);
+ ExpectEval(Expressions::GreaterThanOrEqual("id", Literal::Int(kIntMaxValue +
1)),
+ false);
+ ExpectEval(Expressions::GreaterThanOrEqual("id",
Literal::Int(kIntMaxValue)), true);
+ ExpectEval(Expressions::GreaterThanOrEqual("id", Literal::Int(kIntMaxValue -
4)), true);
+}
+
+TEST_F(ManifestEvaluatorTest, IntegerEq) {
+ ExpectEval(Expressions::Equal("id", Literal::Int(kIntMinValue - 25)), false);
+ ExpectEval(Expressions::Equal("id", Literal::Int(kIntMinValue - 1)), false);
+ ExpectEval(Expressions::Equal("id", Literal::Int(kIntMinValue)), true);
+ ExpectEval(Expressions::Equal("id", Literal::Int(kIntMaxValue - 4)), true);
+ ExpectEval(Expressions::Equal("id", Literal::Int(kIntMaxValue)), true);
+ ExpectEval(Expressions::Equal("id", Literal::Int(kIntMaxValue + 1)), false);
+ ExpectEval(Expressions::Equal("id", Literal::Int(kIntMaxValue + 6)), false);
+}
+
+TEST_F(ManifestEvaluatorTest, IntegerNotEq) {
+ ExpectEval(Expressions::NotEqual("id", Literal::Int(kIntMinValue - 25)),
true);
+ ExpectEval(Expressions::NotEqual("id", Literal::Int(kIntMinValue - 1)),
true);
+ ExpectEval(Expressions::NotEqual("id", Literal::Int(kIntMinValue)), true);
+ ExpectEval(Expressions::NotEqual("id", Literal::Int(kIntMaxValue - 4)),
true);
+ ExpectEval(Expressions::NotEqual("id", Literal::Int(kIntMaxValue)), true);
+ ExpectEval(Expressions::NotEqual("id", Literal::Int(kIntMaxValue + 1)),
true);
+ ExpectEval(Expressions::NotEqual("id", Literal::Int(kIntMaxValue + 6)),
true);
+}
+
+TEST_F(ManifestEvaluatorTest, IntegerNotEqRewritten) {
+ ExpectEval(Expressions::Not(Expressions::Equal("id",
Literal::Int(kIntMinValue - 25))),
+ true);
+ ExpectEval(Expressions::Not(Expressions::Equal("id",
Literal::Int(kIntMinValue - 1))),
+ true);
+ ExpectEval(Expressions::Not(Expressions::Equal("id",
Literal::Int(kIntMinValue))),
+ true);
+ ExpectEval(Expressions::Not(Expressions::Equal("id",
Literal::Int(kIntMaxValue - 4))),
+ true);
+ ExpectEval(Expressions::Not(Expressions::Equal("id",
Literal::Int(kIntMaxValue))),
+ true);
+ ExpectEval(Expressions::Not(Expressions::Equal("id",
Literal::Int(kIntMaxValue + 1))),
+ true);
+ ExpectEval(Expressions::Not(Expressions::Equal("id",
Literal::Int(kIntMaxValue + 6))),
+ true);
+}
+
+TEST_F(ManifestEvaluatorTest, CaseInsensitiveIntegerNotEqRewritten) {
+ ExpectEval(Expressions::Not(Expressions::Equal("ID",
Literal::Int(kIntMinValue - 25))),
+ true,
+ /*case_sensitive=*/false);
+ ExpectEval(Expressions::Not(Expressions::Equal("ID",
Literal::Int(kIntMinValue - 1))),
+ true,
+ /*case_sensitive=*/false);
+ ExpectEval(Expressions::Not(Expressions::Equal("ID",
Literal::Int(kIntMinValue))), true,
+ /*case_sensitive=*/false);
+ ExpectEval(Expressions::Not(Expressions::Equal("ID",
Literal::Int(kIntMaxValue - 4))),
+ true,
+ /*case_sensitive=*/false);
+ ExpectEval(Expressions::Not(Expressions::Equal("ID",
Literal::Int(kIntMaxValue))), true,
+ /*case_sensitive=*/false);
+ ExpectEval(Expressions::Not(Expressions::Equal("ID",
Literal::Int(kIntMaxValue + 1))),
+ true,
+ /*case_sensitive=*/false);
+ ExpectEval(Expressions::Not(Expressions::Equal("ID",
Literal::Int(kIntMaxValue + 6))),
+ true,
+ /*case_sensitive=*/false);
+}
+
+TEST_F(ManifestEvaluatorTest, CaseSensitiveIntegerNotEqRewritten) {
+ auto expr = Expressions::Not(Expressions::Equal("ID",
Literal::Int(kIntMinValue - 25)));
+ auto evaluator = ManifestEvaluator::MakePartitionFilter(expr, spec_,
*schema_, true);
+ ASSERT_FALSE(evaluator.has_value());
+ ASSERT_TRUE(evaluator.error().message.contains("Cannot find field 'ID'"))
+ << evaluator.error().message;
+}
+
+TEST_F(ManifestEvaluatorTest, StringStartsWith) {
+ ExpectEval(Expressions::StartsWith("some_nulls", "a"), true,
/*case_sensitive=*/false);
+ ExpectEval(Expressions::StartsWith("some_nulls", "aa"), true,
/*case_sensitive=*/false);
+ ExpectEval(Expressions::StartsWith("some_nulls", "dddd"), true,
+ /*case_sensitive=*/false);
+ ExpectEval(Expressions::StartsWith("some_nulls", "z"), true,
/*case_sensitive=*/false);
+ ExpectEval(Expressions::StartsWith("no_nulls", "a"), true,
/*case_sensitive=*/false);
+ ExpectEval(Expressions::StartsWith("some_nulls", "zzzz"), false,
+ /*case_sensitive=*/false);
+ ExpectEval(Expressions::StartsWith("some_nulls", "1"), false,
/*case_sensitive=*/false);
+}
+
+TEST_F(ManifestEvaluatorTest, StringNotStartsWith) {
+ ExpectEval(Expressions::NotStartsWith("some_nulls", "a"), true,
+ /*case_sensitive=*/false);
+ ExpectEval(Expressions::NotStartsWith("some_nulls", "aa"), true,
+ /*case_sensitive=*/false);
+ ExpectEval(Expressions::NotStartsWith("some_nulls", "dddd"), true,
+ /*case_sensitive=*/false);
+ ExpectEval(Expressions::NotStartsWith("some_nulls", "z"), true,
+ /*case_sensitive=*/false);
+ ExpectEval(Expressions::NotStartsWith("no_nulls", "a"), true,
/*case_sensitive=*/false);
+ ExpectEval(Expressions::NotStartsWith("some_nulls", "zzzz"), true,
+ /*case_sensitive=*/false);
+ ExpectEval(Expressions::NotStartsWith("some_nulls", "1"), true,
+ /*case_sensitive=*/false);
+ ExpectEval(Expressions::NotStartsWith("all_same_value_or_null", "a"), true,
+ /*case_sensitive=*/false);
+ ExpectEval(Expressions::NotStartsWith("all_same_value_or_null", "aa"), true,
+ /*case_sensitive=*/false);
+ ExpectEval(Expressions::NotStartsWith("all_same_value_or_null", "A"), true,
+ /*case_sensitive=*/false);
+ ExpectEval(Expressions::NotStartsWith("all_nulls_missing_nan", "A"), true,
+ /*case_sensitive=*/false);
+ ExpectEval(Expressions::NotStartsWith("no_nulls_same_value_a", "a"), false,
+ /*case_sensitive=*/false);
+}
+
+TEST_F(ManifestEvaluatorTest, IntegerIn) {
+ ExpectEval(Expressions::In("id", {Literal::Int(kIntMinValue - 25),
+ Literal::Int(kIntMinValue - 24)}),
+ false);
+ ExpectEval(Expressions::In(
+ "id", {Literal::Int(kIntMinValue - 2),
Literal::Int(kIntMinValue - 1)}),
+ false);
+ ExpectEval(
+ Expressions::In("id", {Literal::Int(kIntMinValue - 1),
Literal::Int(kIntMinValue)}),
+ true);
+ ExpectEval(Expressions::In(
+ "id", {Literal::Int(kIntMaxValue - 4),
Literal::Int(kIntMaxValue - 3)}),
+ true);
+ ExpectEval(
+ Expressions::In("id", {Literal::Int(kIntMaxValue),
Literal::Int(kIntMaxValue + 1)}),
+ true);
+ ExpectEval(Expressions::In(
+ "id", {Literal::Int(kIntMaxValue + 1),
Literal::Int(kIntMaxValue + 2)}),
+ false);
+ ExpectEval(Expressions::In(
+ "id", {Literal::Int(kIntMaxValue + 6),
Literal::Int(kIntMaxValue + 7)}),
+ false);
+ ExpectEval(Expressions::In("all_nulls_missing_nan",
+ {Literal::String("abc"), Literal::String("def")}),
+ false);
+ ExpectEval(
+ Expressions::In("some_nulls", {Literal::String("abc"),
Literal::String("def")}),
+ true);
+ ExpectEval(
+ Expressions::In("no_nulls", {Literal::String("abc"),
Literal::String("def")}),
+ true);
+}
+
+TEST_F(ManifestEvaluatorTest, IntegerNotIn) {
+ ExpectEval(Expressions::NotIn("id", {Literal::Int(kIntMinValue - 25),
+ Literal::Int(kIntMinValue - 24)}),
+ true);
+ ExpectEval(Expressions::NotIn(
+ "id", {Literal::Int(kIntMinValue - 2),
Literal::Int(kIntMinValue - 1)}),
+ true);
+ ExpectEval(Expressions::NotIn(
+ "id", {Literal::Int(kIntMinValue - 1),
Literal::Int(kIntMinValue)}),
+ true);
+ ExpectEval(Expressions::NotIn(
+ "id", {Literal::Int(kIntMaxValue - 4),
Literal::Int(kIntMaxValue - 3)}),
+ true);
+ ExpectEval(Expressions::NotIn(
+ "id", {Literal::Int(kIntMaxValue), Literal::Int(kIntMaxValue
+ 1)}),
+ true);
+ ExpectEval(Expressions::NotIn(
+ "id", {Literal::Int(kIntMaxValue + 1),
Literal::Int(kIntMaxValue + 2)}),
+ true);
+ ExpectEval(Expressions::NotIn(
+ "id", {Literal::Int(kIntMaxValue + 6),
Literal::Int(kIntMaxValue + 7)}),
+ true);
+ ExpectEval(Expressions::NotIn("all_nulls_missing_nan",
+ {Literal::String("abc"),
Literal::String("def")}),
+ true);
+ ExpectEval(
+ Expressions::NotIn("some_nulls", {Literal::String("abc"),
Literal::String("def")}),
+ true);
+ ExpectEval(
+ Expressions::NotIn("no_nulls", {Literal::String("abc"),
Literal::String("def")}),
+ true);
+}
+
+} // namespace iceberg
diff --git a/src/iceberg/test/meson.build b/src/iceberg/test/meson.build
index 0f8b9291..f058cdda 100644
--- a/src/iceberg/test/meson.build
+++ b/src/iceberg/test/meson.build
@@ -64,6 +64,7 @@ iceberg_tests = {
'inclusive_metrics_evaluator_test.cc',
'inclusive_metrics_evaluator_with_transform_test.cc',
'literal_test.cc',
+ 'manifest_evaluator_test.cc',
'predicate_test.cc',
'residual_evaluator_test.cc',
'strict_metrics_evaluator_test.cc',