This is an automated email from the ASF dual-hosted git repository.

gangwu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-cpp.git


The following commit(s) were added to refs/heads/main by this push:
     new 36f746c2 feat: add manifest evaluator (#403)
36f746c2 is described below

commit 36f746c2138462a417d77b9fa08ba68364c5b444
Author: dongxiao <[email protected]>
AuthorDate: Thu Dec 11 13:28:25 2025 +0800

    feat: add manifest evaluator (#403)
---
 src/iceberg/CMakeLists.txt                   |   1 +
 src/iceberg/expression/manifest_evaluator.cc | 384 ++++++++++++++++++++++
 src/iceberg/expression/manifest_evaluator.h  |  82 +++++
 src/iceberg/expression/meson.build           |   1 +
 src/iceberg/expression/term.h                |   2 +
 src/iceberg/meson.build                      |   1 +
 src/iceberg/row/struct_like.cc               |   2 +-
 src/iceberg/row/struct_like.h                |   4 +
 src/iceberg/test/CMakeLists.txt              |   1 +
 src/iceberg/test/manifest_evaluator_test.cc  | 462 +++++++++++++++++++++++++++
 src/iceberg/test/meson.build                 |   1 +
 11 files changed, 940 insertions(+), 1 deletion(-)

diff --git a/src/iceberg/CMakeLists.txt b/src/iceberg/CMakeLists.txt
index 7218d76e..4c63c0c4 100644
--- a/src/iceberg/CMakeLists.txt
+++ b/src/iceberg/CMakeLists.txt
@@ -27,6 +27,7 @@ set(ICEBERG_SOURCES
     expression/expressions.cc
     expression/inclusive_metrics_evaluator.cc
     expression/literal.cc
+    expression/manifest_evaluator.cc
     expression/predicate.cc
     expression/residual_evaluator.cc
     expression/rewrite_not.cc
diff --git a/src/iceberg/expression/manifest_evaluator.cc 
b/src/iceberg/expression/manifest_evaluator.cc
new file mode 100644
index 00000000..845ecb6c
--- /dev/null
+++ b/src/iceberg/expression/manifest_evaluator.cc
@@ -0,0 +1,384 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "iceberg/expression/manifest_evaluator.h"
+
+#include "iceberg/expression/binder.h"
+#include "iceberg/expression/expression_visitor.h"
+#include "iceberg/expression/rewrite_not.h"
+#include "iceberg/manifest/manifest_list.h"
+#include "iceberg/row/struct_like.h"
+#include "iceberg/schema.h"
+#include "iceberg/util/macros.h"
+
+namespace iceberg {
+
+namespace {
+constexpr bool kRowsMightMatch = true;
+constexpr bool kRowCannotMatch = false;
+constexpr int32_t kInPredicateLimit = 200;
+}  // namespace
+
+class ManifestEvalVisitor : public BoundVisitor<bool> {
+ public:
+  explicit ManifestEvalVisitor(const ManifestFile& manifest)
+      : stats_(manifest.partitions) {}
+
+  Result<bool> AlwaysTrue() override { return kRowsMightMatch; }
+
+  Result<bool> AlwaysFalse() override { return kRowCannotMatch; }
+
+  Result<bool> Not(bool child_result) override { return !child_result; }
+
+  Result<bool> And(bool left_result, bool right_result) override {
+    return left_result && right_result;
+  }
+
+  Result<bool> Or(bool left_result, bool right_result) override {
+    return left_result || right_result;
+  }
+
+  Result<bool> IsNull(const std::shared_ptr<Bound>& expr) override {
+    // no need to check whether the field is required because binding 
evaluates that case
+    // if the column has no null values, the expression cannot match
+    const auto& ref = expr->reference();
+    ICEBERG_ASSIGN_OR_RAISE(auto pos, GetPosition(*ref));
+    if (!stats_.at(pos).contains_null) {
+      return kRowCannotMatch;
+    }
+
+    return kRowsMightMatch;
+  }
+
+  Result<bool> NotNull(const std::shared_ptr<Bound>& expr) override {
+    const auto& ref = expr->reference();
+    ICEBERG_ASSIGN_OR_RAISE(auto pos, GetPosition(*ref));
+    if (AllValuesAreNull(stats_.at(pos), ref->type()->type_id())) {
+      return kRowCannotMatch;
+    }
+
+    return kRowsMightMatch;
+  }
+
+  Result<bool> IsNaN(const std::shared_ptr<Bound>& expr) override {
+    const auto& ref = expr->reference();
+    ICEBERG_ASSIGN_OR_RAISE(auto pos, GetPosition(*ref));
+    if (stats_.at(pos).contains_nan.has_value() && 
!stats_.at(pos).contains_nan.value()) {
+      return kRowCannotMatch;
+    }
+    if (AllValuesAreNull(stats_.at(pos), ref->type()->type_id())) {
+      return kRowCannotMatch;
+    }
+
+    return kRowsMightMatch;
+  }
+
+  Result<bool> NotNaN(const std::shared_ptr<Bound>& expr) override {
+    const auto& ref = expr->reference();
+    ICEBERG_ASSIGN_OR_RAISE(auto pos, GetPosition(*ref));
+    const auto& summary = stats_.at(pos);
+    // if containsNaN is true, containsNull is false and lowerBound is null, 
all values
+    // are NaN
+    if (summary.contains_nan.has_value() && summary.contains_nan.value() &&
+        !summary.contains_null && !summary.lower_bound.has_value()) {
+      return kRowCannotMatch;
+    }
+
+    return kRowsMightMatch;
+  }
+
+  Result<bool> Lt(const std::shared_ptr<Bound>& expr, const Literal& lit) 
override {
+    const auto& ref = expr->reference();
+    ICEBERG_ASSIGN_OR_RAISE(auto pos, GetPosition(*ref));
+    const auto& summary = stats_.at(pos);
+    if (!summary.lower_bound.has_value()) {
+      return kRowCannotMatch;  // values are all null
+    }
+    ICEBERG_ASSIGN_OR_RAISE(
+        auto lower, DeserializeBoundLiteral(summary.lower_bound.value(), 
ref->type()));
+    if (lower >= lit) {
+      return kRowCannotMatch;
+    }
+    return kRowsMightMatch;
+  }
+
+  Result<bool> LtEq(const std::shared_ptr<Bound>& expr, const Literal& lit) 
override {
+    const auto& ref = expr->reference();
+    ICEBERG_ASSIGN_OR_RAISE(auto pos, GetPosition(*ref));
+    const auto& summary = stats_.at(pos);
+    if (!summary.lower_bound.has_value()) {
+      return kRowCannotMatch;  // values are all null
+    }
+    ICEBERG_ASSIGN_OR_RAISE(
+        auto lower, DeserializeBoundLiteral(summary.lower_bound.value(), 
ref->type()));
+    if (lower > lit) {
+      return kRowCannotMatch;
+    }
+    return kRowsMightMatch;
+  }
+
+  Result<bool> Gt(const std::shared_ptr<Bound>& expr, const Literal& lit) 
override {
+    const auto& ref = expr->reference();
+    ICEBERG_ASSIGN_OR_RAISE(auto pos, GetPosition(*ref));
+    const auto& summary = stats_.at(pos);
+    if (!summary.upper_bound.has_value()) {
+      return kRowCannotMatch;  // values are all null
+    }
+    ICEBERG_ASSIGN_OR_RAISE(
+        auto upper, DeserializeBoundLiteral(summary.upper_bound.value(), 
ref->type()));
+    if (upper <= lit) {
+      return kRowCannotMatch;
+    }
+    return kRowsMightMatch;
+  }
+
+  Result<bool> GtEq(const std::shared_ptr<Bound>& expr, const Literal& lit) 
override {
+    const auto& ref = expr->reference();
+    ICEBERG_ASSIGN_OR_RAISE(auto pos, GetPosition(*ref));
+    const auto& summary = stats_.at(pos);
+    if (!summary.upper_bound.has_value()) {
+      return kRowCannotMatch;  // values are all null
+    }
+    ICEBERG_ASSIGN_OR_RAISE(
+        auto upper,
+        DeserializeBoundLiteral(summary.upper_bound.value(), 
expr->reference()->type()));
+    if (upper < lit) {
+      return kRowCannotMatch;
+    }
+    return kRowsMightMatch;
+  }
+
+  Result<bool> Eq(const std::shared_ptr<Bound>& expr, const Literal& lit) 
override {
+    const auto& ref = expr->reference();
+    ICEBERG_ASSIGN_OR_RAISE(auto pos, GetPosition(*ref));
+    const auto& summary = stats_.at(pos);
+    if (!summary.lower_bound.has_value() || !summary.upper_bound.has_value()) {
+      return kRowCannotMatch;  // values are all null and literal cannot 
contain null
+    }
+    ICEBERG_ASSIGN_OR_RAISE(
+        auto lower, DeserializeBoundLiteral(summary.lower_bound.value(), 
ref->type()));
+    if (lower > lit) {
+      return kRowCannotMatch;
+    }
+
+    ICEBERG_ASSIGN_OR_RAISE(
+        auto upper, DeserializeBoundLiteral(summary.upper_bound.value(), 
ref->type()));
+    if (upper < lit) {
+      return kRowCannotMatch;
+    }
+
+    return kRowsMightMatch;
+  }
+
+  Result<bool> NotEq(const std::shared_ptr<Bound>& expr, const Literal& lit) 
override {
+    // because the bounds are not necessarily a min or max value, this cannot 
be answered
+    // using them. notEq(col, X) with (X, Y) doesn't guarantee that X is a 
value in col.
+    return kRowsMightMatch;
+  }
+
+  Result<bool> In(const std::shared_ptr<Bound>& expr,
+                  const BoundSetPredicate::LiteralSet& literal_set) override {
+    const auto& ref = expr->reference();
+    ICEBERG_ASSIGN_OR_RAISE(auto pos, GetPosition(*ref));
+    const auto& summary = stats_.at(pos);
+    if (!summary.lower_bound.has_value() || !summary.upper_bound.has_value()) {
+      // values are all null and literalSet cannot contain null.
+      return kRowCannotMatch;
+    }
+    if (literal_set.size() > kInPredicateLimit) {
+      // skip evaluating the predicate if the number of values is too big
+      return kRowsMightMatch;
+    }
+
+    ICEBERG_ASSIGN_OR_RAISE(
+        auto lower, DeserializeBoundLiteral(summary.lower_bound.value(), 
ref->type()));
+    ICEBERG_ASSIGN_OR_RAISE(
+        auto upper, DeserializeBoundLiteral(summary.upper_bound.value(), 
ref->type()));
+
+    if (std::ranges::all_of(literal_set, [&](const Literal& lit) {
+          return lit < lower || lit > upper;
+        })) {
+      // if all values are less than lower bound or greater than upper bound,
+      // rows cannot match.
+      return kRowCannotMatch;
+    }
+    return kRowsMightMatch;
+  }
+
+  Result<bool> NotIn(const std::shared_ptr<Bound>& expr,
+                     const BoundSetPredicate::LiteralSet& literal_set) 
override {
+    // because the bounds are not necessarily a min or max value, this cannot 
be answered
+    // using them. notIn(col, {X, ...}) with (X, Y) doesn't guarantee that X 
is a value in
+    // col.
+    return kRowsMightMatch;
+  }
+
+  Result<bool> StartsWith(const std::shared_ptr<Bound>& expr,
+                          const Literal& lit) override {
+    const auto& ref = expr->reference();
+    ICEBERG_ASSIGN_OR_RAISE(auto pos, GetPosition(*ref));
+    const auto& summary = stats_.at(pos);
+    if (!summary.lower_bound.has_value() || !summary.upper_bound.has_value()) {
+      return kRowCannotMatch;
+    }
+    if (lit.type()->type_id() != TypeId::kString) {
+      return InvalidExpression("Invalid literal: not a string, cannot use 
StartsWith");
+    }
+    const auto& prefix = std::get<std::string>(lit.value());
+    ICEBERG_ASSIGN_OR_RAISE(
+        auto lower, DeserializeBoundLiteral(summary.lower_bound.value(), 
ref->type()));
+    ICEBERG_ASSIGN_OR_RAISE(
+        auto upper, DeserializeBoundLiteral(summary.upper_bound.value(), 
ref->type()));
+    const auto& lower_bound = std::get<std::string>(lower.value());
+    const auto& upper_bound = std::get<std::string>(upper.value());
+    // truncate lower bound so that its length in bytes is not greater than 
the length of
+    // prefix
+    size_t length = std::min(prefix.size(), lower_bound.size());
+    if (lower_bound.substr(0, length) > prefix) {
+      return kRowCannotMatch;
+    }
+    length = std::min(prefix.size(), upper_bound.size());
+    if (upper_bound.substr(0, length) < prefix) {
+      return kRowCannotMatch;
+    }
+    return kRowsMightMatch;
+  }
+
+  Result<bool> NotStartsWith(const std::shared_ptr<Bound>& expr,
+                             const Literal& lit) override {
+    const auto& ref = expr->reference();
+    ICEBERG_ASSIGN_OR_RAISE(auto pos, GetPosition(*ref));
+    const auto& summary = stats_.at(pos);
+    if (summary.contains_null || !summary.lower_bound.has_value() ||
+        !summary.upper_bound.has_value()) {
+      return kRowsMightMatch;
+    }
+    if (lit.type()->type_id() != TypeId::kString) {
+      return InvalidExpression("Invalid literal: not a string, cannot use 
notStartsWith");
+    }
+    // notStartsWith will match unless all values must start with the prefix. 
This happens
+    // when the lower and upper bounds both start with the prefix.
+    const auto& prefix = std::get<std::string>(lit.value());
+    ICEBERG_ASSIGN_OR_RAISE(
+        auto lower, DeserializeBoundLiteral(summary.lower_bound.value(), 
ref->type()));
+    ICEBERG_ASSIGN_OR_RAISE(
+        auto upper, DeserializeBoundLiteral(summary.upper_bound.value(), 
ref->type()));
+    const auto& lower_bound = std::get<std::string>(lower.value());
+    const auto& upper_bound = std::get<std::string>(upper.value());
+
+    // if lower is shorter than the prefix, it can't start with the prefix
+    if (lower_bound.size() < prefix.size()) {
+      return kRowsMightMatch;
+    }
+    if (lower_bound.starts_with(prefix)) {
+      // the lower bound starts with the prefix; check the upper bound
+      // if upper is shorter than the prefix, it can't start with the prefix
+      if (upper_bound.size() < prefix.size()) {
+        return kRowsMightMatch;
+      }
+      // truncate upper bound so that its length in bytes is not greater than 
the length
+      // of prefix
+      if (upper_bound.starts_with(prefix)) {
+        return kRowCannotMatch;
+      }
+    }
+    return kRowsMightMatch;
+  }
+
+ private:
+  Result<size_t> GetPosition(const BoundReference& ref) const {
+    const auto& accessor = ref.accessor();
+    const auto& position_path = accessor.position_path();
+    if (position_path.empty()) {
+      return InvalidArgument("Invalid accessor: empty position path.");
+    }
+    // nested accessors are not supported for partition fields
+    if (position_path.size() > 1) {
+      return InvalidArgument("Cannot convert nested accessor to position");
+    }
+    auto pos = position_path.at(0);
+    if (pos >= stats_.size()) {
+      return InvalidArgument("Position {} is out of partition field range {}", 
pos,
+                             stats_.size());
+    }
+    return pos;
+  }
+
+  bool AllValuesAreNull(const PartitionFieldSummary& summary, TypeId typeId) {
+    // containsNull encodes whether at least one partition value is null,
+    // lowerBound is null if all partition values are null
+    bool allNull = summary.contains_null && !summary.lower_bound.has_value();
+
+    if (allNull && (typeId == TypeId::kDouble || typeId == TypeId::kFloat)) {
+      // floating point types may include NaN values, which we check 
separately.
+      // In case bounds don't include NaN value, containsNaN needs to be 
checked against.
+      allNull = summary.contains_nan.has_value() && 
!summary.contains_nan.value();
+    }
+    return allNull;
+  }
+
+  Result<Literal> DeserializeBoundLiteral(const std::vector<uint8_t>& bound,
+                                          const std::shared_ptr<Type>& type) 
const {
+    if (!type->is_primitive()) {
+      return NotSupported("Bounds of non-primitive partition fields are not 
supported.");
+    }
+    return Literal::Deserialize(
+        bound, std::move(internal::checked_pointer_cast<PrimitiveType>(type)));
+  }
+
+ private:
+  const std::vector<PartitionFieldSummary>& stats_;
+};
+
+ManifestEvaluator::ManifestEvaluator(std::shared_ptr<Expression> expr)
+    : expr_(std::move(expr)) {}
+
+ManifestEvaluator::~ManifestEvaluator() = default;
+
+Result<std::unique_ptr<ManifestEvaluator>> ManifestEvaluator::MakeRowFilter(
+    [[maybe_unused]] std::shared_ptr<Expression> expr,
+    [[maybe_unused]] const std::shared_ptr<PartitionSpec>& spec,
+    [[maybe_unused]] const Schema& schema, [[maybe_unused]] bool 
case_sensitive) {
+  // TODO(xiao.dong) we need a projection util to project row filter to the 
partition col
+  return NotImplemented("ManifestEvaluator::MakeRowFilter");
+}
+
+Result<std::unique_ptr<ManifestEvaluator>> 
ManifestEvaluator::MakePartitionFilter(
+    std::shared_ptr<Expression> expr, const std::shared_ptr<PartitionSpec>& 
spec,
+    const Schema& schema, bool case_sensitive) {
+  ICEBERG_ASSIGN_OR_RAISE(auto partition_type, spec->PartitionType(schema));
+  auto field_span = partition_type->fields();
+  std::vector<SchemaField> fields(field_span.begin(), field_span.end());
+  auto partition_schema = std::make_shared<Schema>(fields);
+  ICEBERG_ASSIGN_OR_RAISE(auto rewrite_expr, 
RewriteNot::Visit(std::move(expr)));
+  ICEBERG_ASSIGN_OR_RAISE(auto partition_expr,
+                          Binder::Bind(*partition_schema, rewrite_expr, 
case_sensitive));
+  return std::unique_ptr<ManifestEvaluator>(
+      new ManifestEvaluator(std::move(partition_expr)));
+}
+
+Result<bool> ManifestEvaluator::Evaluate(const ManifestFile& manifest) const {
+  if (manifest.partitions.empty()) {
+    return kRowsMightMatch;
+  }
+  ManifestEvalVisitor visitor(manifest);
+  return Visit<bool, ManifestEvalVisitor>(expr_, visitor);
+}
+
+}  // namespace iceberg
diff --git a/src/iceberg/expression/manifest_evaluator.h 
b/src/iceberg/expression/manifest_evaluator.h
new file mode 100644
index 00000000..ddc743dc
--- /dev/null
+++ b/src/iceberg/expression/manifest_evaluator.h
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+/// \file iceberg/expression/manifest_evaluator.h
+///
+/// Evaluates an Expression on a ManifestFile to test whether the file contains
+/// matching partitions.
+///
+/// For row expressions, evaluation is inclusive: it returns true if a file
+/// may match and false if it cannot match.
+///
+/// Files are passed to #eval(ManifestFile), which returns true if the 
manifest may
+/// contain data files that match the partition expression. Manifest files may 
be
+/// skipped if and only if the return value of eval is false.
+///
+
+#include <memory>
+
+#include "iceberg/iceberg_export.h"
+#include "iceberg/result.h"
+#include "iceberg/type_fwd.h"
+
+namespace iceberg {
+
+/// \brief Evaluates an Expression against manifest.
+/// \note: The evaluator is thread-safe.
+class ICEBERG_EXPORT ManifestEvaluator {
+ public:
+  /// \brief Make a manifest evaluator for RowFilter
+  ///
+  /// \param expr The expression to evaluate
+  /// \param spec The partition spec
+  /// \param schema The schema of the table
+  /// \param case_sensitive Whether field name matching is case-sensitive
+  static Result<std::unique_ptr<ManifestEvaluator>> MakeRowFilter(
+      std::shared_ptr<Expression> expr, const std::shared_ptr<PartitionSpec>& 
spec,
+      const Schema& schema, bool case_sensitive = true);
+
+  /// \brief Make a manifest evaluator for PartitionFilter
+  ///
+  /// \param expr The expression to evaluate
+  /// \param spec The partition spec
+  /// \param schema The schema of the table
+  /// \param case_sensitive Whether field name matching is case-sensitive
+  static Result<std::unique_ptr<ManifestEvaluator>> MakePartitionFilter(
+      std::shared_ptr<Expression> expr, const std::shared_ptr<PartitionSpec>& 
spec,
+      const Schema& schema, bool case_sensitive = true);
+
+  ~ManifestEvaluator();
+
+  /// \brief Evaluate the expression against a manifest.
+  ///
+  /// \param manifest The manifest to evaluate
+  /// \return true if the row matches the expression, false otherwise, or error
+  Result<bool> Evaluate(const ManifestFile& manifest) const;
+
+ private:
+  explicit ManifestEvaluator(std::shared_ptr<Expression> expr);
+
+ private:
+  std::shared_ptr<Expression> expr_;
+};
+
+}  // namespace iceberg
diff --git a/src/iceberg/expression/meson.build 
b/src/iceberg/expression/meson.build
index f3b74848..fbb07267 100644
--- a/src/iceberg/expression/meson.build
+++ b/src/iceberg/expression/meson.build
@@ -25,6 +25,7 @@ install_headers(
         'expressions.h',
         'inclusive_metrics_evaluator.h',
         'literal.h',
+        'manifest_evaluator.h',
         'predicate.h',
         'residual_evaluator.h',
         'rewrite_not.h',
diff --git a/src/iceberg/expression/term.h b/src/iceberg/expression/term.h
index 616f11da..5e834af5 100644
--- a/src/iceberg/expression/term.h
+++ b/src/iceberg/expression/term.h
@@ -157,6 +157,8 @@ class ICEBERG_EXPORT BoundReference
 
   Kind kind() const override { return Kind::kReference; }
 
+  const StructLikeAccessor& accessor() const { return *accessor_; }
+
  private:
   BoundReference(SchemaField field, std::unique_ptr<StructLikeAccessor> 
accessor);
 
diff --git a/src/iceberg/meson.build b/src/iceberg/meson.build
index c10c5a82..d70eae25 100644
--- a/src/iceberg/meson.build
+++ b/src/iceberg/meson.build
@@ -49,6 +49,7 @@ iceberg_sources = files(
     'expression/expressions.cc',
     'expression/inclusive_metrics_evaluator.cc',
     'expression/literal.cc',
+    'expression/manifest_evaluator.cc',
     'expression/predicate.cc',
     'expression/residual_evaluator.cc',
     'expression/rewrite_not.cc',
diff --git a/src/iceberg/row/struct_like.cc b/src/iceberg/row/struct_like.cc
index 85bde1a6..24e61644 100644
--- a/src/iceberg/row/struct_like.cc
+++ b/src/iceberg/row/struct_like.cc
@@ -70,7 +70,7 @@ Result<Scalar> LiteralToScalar(const Literal& literal) {
 
 StructLikeAccessor::StructLikeAccessor(std::shared_ptr<Type> type,
                                        std::span<const size_t> position_path)
-    : type_(std::move(type)) {
+    : type_(std::move(type)), position_path_(position_path.begin(), 
position_path.end()) {
   if (position_path.size() == 1) {
     accessor_ = [pos =
                      position_path[0]](const StructLike& struct_like) -> 
Result<Scalar> {
diff --git a/src/iceberg/row/struct_like.h b/src/iceberg/row/struct_like.h
index 36ff5d86..7b4bdc3b 100644
--- a/src/iceberg/row/struct_like.h
+++ b/src/iceberg/row/struct_like.h
@@ -121,9 +121,13 @@ class ICEBERG_EXPORT StructLikeAccessor {
   /// \brief Get the type of the value that this accessor is bound to.
   const Type& type() const { return *type_; }
 
+  /// \brief Get the position path of the value that this accessor bounded to.
+  const std::vector<size_t>& position_path() const { return position_path_; }
+
  private:
   std::shared_ptr<Type> type_;
   std::function<Result<Scalar>(const StructLike&)> accessor_;
+  std::vector<size_t> position_path_;
 };
 
 }  // namespace iceberg
diff --git a/src/iceberg/test/CMakeLists.txt b/src/iceberg/test/CMakeLists.txt
index f9cfb848..7d943484 100644
--- a/src/iceberg/test/CMakeLists.txt
+++ b/src/iceberg/test/CMakeLists.txt
@@ -85,6 +85,7 @@ add_iceberg_test(expression_test
                  expression_test.cc
                  expression_visitor_test.cc
                  literal_test.cc
+                 manifest_evaluator_test.cc
                  inclusive_metrics_evaluator_test.cc
                  inclusive_metrics_evaluator_with_transform_test.cc
                  predicate_test.cc
diff --git a/src/iceberg/test/manifest_evaluator_test.cc 
b/src/iceberg/test/manifest_evaluator_test.cc
new file mode 100644
index 00000000..4562e74e
--- /dev/null
+++ b/src/iceberg/test/manifest_evaluator_test.cc
@@ -0,0 +1,462 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "iceberg/expression/manifest_evaluator.h"
+
+#include <optional>
+#include <utility>
+#include <vector>
+
+#include <gtest/gtest.h>
+
+#include "iceberg/expression/expressions.h"
+#include "iceberg/manifest/manifest_list.h"
+#include "iceberg/partition_spec.h"
+#include "iceberg/schema.h"
+#include "iceberg/test/matchers.h"
+#include "iceberg/transform.h"
+#include "iceberg/type.h"
+
+namespace iceberg {
+
+class ManifestEvaluatorTest : public ::testing::Test {
+ protected:
+  static constexpr int32_t kIntMinValue = 30;
+  static constexpr int32_t kIntMaxValue = 79;
+
+  void SetUp() override {
+    schema_ = std::make_shared<Schema>(
+        std::vector<SchemaField>{
+            SchemaField::MakeRequired(1, "id", int32()),
+            SchemaField::MakeOptional(4, "all_nulls_missing_nan", string()),
+            SchemaField::MakeOptional(5, "some_nulls", string()),
+            SchemaField::MakeOptional(6, "no_nulls", string()),
+            SchemaField::MakeOptional(7, "float", float32()),
+            SchemaField::MakeOptional(8, "all_nulls_double", float64()),
+            SchemaField::MakeOptional(9, "all_nulls_no_nans", float32()),
+            SchemaField::MakeOptional(10, "all_nans", float64()),
+            SchemaField::MakeOptional(11, "both_nan_and_null", float32()),
+            SchemaField::MakeOptional(12, "no_nan_or_null", float64()),
+            SchemaField::MakeOptional(13, "all_nulls_missing_nan_float", 
float32()),
+            SchemaField::MakeOptional(14, "all_same_value_or_null", string()),
+            SchemaField::MakeOptional(15, "no_nulls_same_value_a", string()),
+        },
+        0);
+
+    ICEBERG_UNWRAP_OR_FAIL(auto spec_result,
+                           PartitionSpec::Make(/*spec_id=*/0, 
BuildIdentityFields()));
+    spec_ = std::shared_ptr<PartitionSpec>(std::move(spec_result));
+
+    file_ = BuildManifestFile();
+    no_stats_.manifest_path = "no-stats.avro";
+    no_stats_.partition_spec_id = 0;
+  }
+
+  std::vector<PartitionField> BuildIdentityFields() {
+    std::vector<PartitionField> fields;
+    int32_t partition_field_id = PartitionSpec::kLegacyPartitionDataIdStart;
+    auto add_field = [&](int32_t source_id, std::string name) {
+      fields.emplace_back(source_id, partition_field_id++, std::move(name),
+                          Transform::Identity());
+    };
+    add_field(1, "id");
+    add_field(4, "all_nulls_missing_nan");
+    add_field(5, "some_nulls");
+    add_field(6, "no_nulls");
+    add_field(7, "float");
+    add_field(8, "all_nulls_double");
+    add_field(9, "all_nulls_no_nans");
+    add_field(10, "all_nans");
+    add_field(11, "both_nan_and_null");
+    add_field(12, "no_nan_or_null");
+    add_field(13, "all_nulls_missing_nan_float");
+    add_field(14, "all_same_value_or_null");
+    add_field(15, "no_nulls_same_value_a");
+    return fields;
+  }
+
+  PartitionFieldSummary MakeSummary(bool contains_null, std::optional<bool> 
contains_nan,
+                                    std::optional<Literal> lower = 
std::nullopt,
+                                    std::optional<Literal> upper = 
std::nullopt) {
+    PartitionFieldSummary summary;
+    summary.contains_null = contains_null;
+    summary.contains_nan = contains_nan;
+    if (lower.has_value()) {
+      summary.lower_bound = lower->Serialize().value();
+    }
+    if (upper.has_value()) {
+      summary.upper_bound = upper->Serialize().value();
+    }
+    return summary;
+  }
+
+  ManifestFile BuildManifestFile() {
+    ManifestFile manifest;
+    manifest.manifest_path = "manifest-list.avro";
+    manifest.manifest_length = 1024;
+    manifest.partition_spec_id = 0;
+    manifest.partitions = {
+        MakeSummary(/*contains_null=*/false, std::nullopt, 
Literal::Int(kIntMinValue),
+                    Literal::Int(kIntMaxValue)),
+        MakeSummary(/*contains_null=*/true, std::nullopt, std::nullopt, 
std::nullopt),
+        MakeSummary(/*contains_null=*/true, std::nullopt, Literal::String("a"),
+                    Literal::String("z")),
+        MakeSummary(/*contains_null=*/false, std::nullopt, 
Literal::String("a"),
+                    Literal::String("z")),
+        MakeSummary(/*contains_null=*/false, std::nullopt, 
Literal::Float(0.0F),
+                    Literal::Float(20.0F)),
+        MakeSummary(/*contains_null=*/true, std::nullopt, std::nullopt, 
std::nullopt),
+        MakeSummary(/*contains_null=*/true, /*contains_nan=*/false, 
std::nullopt,
+                    std::nullopt),
+        MakeSummary(/*contains_null=*/false, /*contains_nan=*/true, 
std::nullopt,
+                    std::nullopt),
+        MakeSummary(/*contains_null=*/true, /*contains_nan=*/true, 
std::nullopt,
+                    std::nullopt),
+        MakeSummary(/*contains_null=*/false, /*contains_nan=*/false, 
Literal::Double(0.0),
+                    Literal::Double(20.0)),
+        MakeSummary(/*contains_null=*/true, std::nullopt, std::nullopt, 
std::nullopt),
+        MakeSummary(/*contains_null=*/true, std::nullopt, Literal::String("a"),
+                    Literal::String("a")),
+        MakeSummary(/*contains_null=*/false, std::nullopt, 
Literal::String("a"),
+                    Literal::String("a")),
+    };
+    return manifest;
+  }
+
+  void ExpectEval(const std::shared_ptr<Expression>& expr, bool expected,
+                  const ManifestFile& manifest, bool case_sensitive = true) {
+    ICEBERG_UNWRAP_OR_FAIL(auto evaluator, 
ManifestEvaluator::MakePartitionFilter(
+                                               expr, spec_, *schema_, 
case_sensitive));
+    auto result = evaluator->Evaluate(manifest);
+    ASSERT_TRUE(result.has_value());
+    ASSERT_EQ(expected, result.value()) << expr->ToString();
+  }
+
+  void ExpectEval(const std::shared_ptr<Expression>& expr, bool expected,
+                  bool case_sensitive = true) {
+    ExpectEval(expr, expected, file_, case_sensitive);
+  }
+
+  std::shared_ptr<Schema> schema_;
+  std::shared_ptr<PartitionSpec> spec_;
+  ManifestFile file_;
+  ManifestFile no_stats_;
+};
+
+TEST_F(ManifestEvaluatorTest, AllNulls) {
+  ExpectEval(Expressions::NotNull("all_nulls_missing_nan"), false);
+  ExpectEval(Expressions::NotNull("all_nulls_missing_nan_float"), true);
+  ExpectEval(Expressions::NotNull("some_nulls"), true);
+  ExpectEval(Expressions::NotNull("no_nulls"), true);
+  ExpectEval(Expressions::StartsWith("all_nulls_missing_nan", "asad"), false);
+  ExpectEval(Expressions::NotStartsWith("all_nulls_missing_nan", "asad"), 
true);
+}
+
+TEST_F(ManifestEvaluatorTest, NoNulls) {
+  ExpectEval(Expressions::IsNull("all_nulls_missing_nan"), true);
+  ExpectEval(Expressions::IsNull("some_nulls"), true);
+  ExpectEval(Expressions::IsNull("no_nulls"), false);
+  ExpectEval(Expressions::IsNull("both_nan_and_null"), true);
+}
+
+TEST_F(ManifestEvaluatorTest, IsNaN) {
+  ExpectEval(Expressions::IsNaN("float"), true);
+  ExpectEval(Expressions::IsNaN("all_nulls_double"), true);
+  ExpectEval(Expressions::IsNaN("all_nulls_missing_nan_float"), true);
+  ExpectEval(Expressions::IsNaN("all_nulls_no_nans"), false);
+  ExpectEval(Expressions::IsNaN("all_nans"), true);
+  ExpectEval(Expressions::IsNaN("both_nan_and_null"), true);
+  ExpectEval(Expressions::IsNaN("no_nan_or_null"), false);
+}
+
+TEST_F(ManifestEvaluatorTest, NotNaN) {
+  ExpectEval(Expressions::NotNaN("float"), true);
+  ExpectEval(Expressions::NotNaN("all_nulls_double"), true);
+  ExpectEval(Expressions::NotNaN("all_nulls_no_nans"), true);
+  ExpectEval(Expressions::NotNaN("all_nans"), false);
+  ExpectEval(Expressions::NotNaN("both_nan_and_null"), true);
+  ExpectEval(Expressions::NotNaN("no_nan_or_null"), true);
+}
+
+TEST_F(ManifestEvaluatorTest, MissingColumn) {
+  auto expr = Expressions::LessThan("missing", Literal::Int(5));
+  auto evaluator = ManifestEvaluator::MakePartitionFilter(expr, spec_, 
*schema_, true);
+  ASSERT_FALSE(evaluator.has_value());
+  ASSERT_TRUE(evaluator.error().message.contains("Cannot find field 
'missing'"))
+      << evaluator.error().message;
+}
+
+TEST_F(ManifestEvaluatorTest, MissingStats) {
+  std::vector<std::shared_ptr<Expression>> expressions = {
+      Expressions::LessThan("id", Literal::Int(5)),
+      Expressions::LessThanOrEqual("id", Literal::Int(30)),
+      Expressions::Equal("id", Literal::Int(70)),
+      Expressions::GreaterThan("id", Literal::Int(78)),
+      Expressions::GreaterThanOrEqual("id", Literal::Int(90)),
+      Expressions::NotEqual("id", Literal::Int(101)),
+      Expressions::IsNull("id"),
+      Expressions::NotNull("id"),
+      Expressions::StartsWith("all_nulls_missing_nan", "a"),
+      Expressions::IsNaN("float"),
+      Expressions::NotNaN("float"),
+      Expressions::NotStartsWith("all_nulls_missing_nan", "a"),
+  };
+
+  for (const auto& expr : expressions) {
+    ExpectEval(expr, true, no_stats_);
+  }
+}
+
+TEST_F(ManifestEvaluatorTest, Not) {
+  ExpectEval(
+      Expressions::Not(Expressions::LessThan("id", Literal::Int(kIntMinValue - 
25))),
+      true);
+  ExpectEval(
+      Expressions::Not(Expressions::GreaterThan("id", 
Literal::Int(kIntMinValue - 25))),
+      false);
+}
+
+TEST_F(ManifestEvaluatorTest, And) {
+  ExpectEval(Expressions::And(
+                 Expressions::LessThan("id", Literal::Int(kIntMinValue - 25)),
+                 Expressions::GreaterThanOrEqual("id", 
Literal::Int(kIntMinValue - 30))),
+             false);
+  ExpectEval(Expressions::And(
+                 Expressions::LessThan("id", Literal::Int(kIntMinValue - 25)),
+                 Expressions::GreaterThanOrEqual("id", 
Literal::Int(kIntMaxValue + 1))),
+             false);
+  ExpectEval(
+      Expressions::And(Expressions::GreaterThan("id", 
Literal::Int(kIntMinValue - 25)),
+                       Expressions::LessThanOrEqual("id", 
Literal::Int(kIntMinValue))),
+      true);
+}
+
+TEST_F(ManifestEvaluatorTest, Or) {
+  ExpectEval(Expressions::Or(
+                 Expressions::LessThan("id", Literal::Int(kIntMinValue - 25)),
+                 Expressions::GreaterThanOrEqual("id", 
Literal::Int(kIntMaxValue + 1))),
+             false);
+  ExpectEval(Expressions::Or(
+                 Expressions::LessThan("id", Literal::Int(kIntMinValue - 25)),
+                 Expressions::GreaterThanOrEqual("id", 
Literal::Int(kIntMaxValue - 19))),
+             true);
+}
+
+TEST_F(ManifestEvaluatorTest, IntegerLt) {
+  ExpectEval(Expressions::LessThan("id", Literal::Int(kIntMinValue - 25)), 
false);
+  ExpectEval(Expressions::LessThan("id", Literal::Int(kIntMinValue)), false);
+  ExpectEval(Expressions::LessThan("id", Literal::Int(kIntMinValue + 1)), 
true);
+  ExpectEval(Expressions::LessThan("id", Literal::Int(kIntMaxValue)), true);
+}
+
+TEST_F(ManifestEvaluatorTest, IntegerLtEq) {
+  ExpectEval(Expressions::LessThanOrEqual("id", Literal::Int(kIntMinValue - 
25)), false);
+  ExpectEval(Expressions::LessThanOrEqual("id", Literal::Int(kIntMinValue - 
1)), false);
+  ExpectEval(Expressions::LessThanOrEqual("id", Literal::Int(kIntMinValue)), 
true);
+  ExpectEval(Expressions::LessThanOrEqual("id", Literal::Int(kIntMaxValue)), 
true);
+}
+
+TEST_F(ManifestEvaluatorTest, IntegerGt) {
+  ExpectEval(Expressions::GreaterThan("id", Literal::Int(kIntMaxValue + 6)), 
false);
+  ExpectEval(Expressions::GreaterThan("id", Literal::Int(kIntMaxValue)), 
false);
+  ExpectEval(Expressions::GreaterThan("id", Literal::Int(kIntMaxValue - 1)), 
true);
+  ExpectEval(Expressions::GreaterThan("id", Literal::Int(kIntMaxValue - 4)), 
true);
+}
+
+TEST_F(ManifestEvaluatorTest, IntegerGtEq) {
+  ExpectEval(Expressions::GreaterThanOrEqual("id", Literal::Int(kIntMaxValue + 
6)),
+             false);
+  ExpectEval(Expressions::GreaterThanOrEqual("id", Literal::Int(kIntMaxValue + 
1)),
+             false);
+  ExpectEval(Expressions::GreaterThanOrEqual("id", 
Literal::Int(kIntMaxValue)), true);
+  ExpectEval(Expressions::GreaterThanOrEqual("id", Literal::Int(kIntMaxValue - 
4)), true);
+}
+
+TEST_F(ManifestEvaluatorTest, IntegerEq) {
+  ExpectEval(Expressions::Equal("id", Literal::Int(kIntMinValue - 25)), false);
+  ExpectEval(Expressions::Equal("id", Literal::Int(kIntMinValue - 1)), false);
+  ExpectEval(Expressions::Equal("id", Literal::Int(kIntMinValue)), true);
+  ExpectEval(Expressions::Equal("id", Literal::Int(kIntMaxValue - 4)), true);
+  ExpectEval(Expressions::Equal("id", Literal::Int(kIntMaxValue)), true);
+  ExpectEval(Expressions::Equal("id", Literal::Int(kIntMaxValue + 1)), false);
+  ExpectEval(Expressions::Equal("id", Literal::Int(kIntMaxValue + 6)), false);
+}
+
+TEST_F(ManifestEvaluatorTest, IntegerNotEq) {
+  ExpectEval(Expressions::NotEqual("id", Literal::Int(kIntMinValue - 25)), 
true);
+  ExpectEval(Expressions::NotEqual("id", Literal::Int(kIntMinValue - 1)), 
true);
+  ExpectEval(Expressions::NotEqual("id", Literal::Int(kIntMinValue)), true);
+  ExpectEval(Expressions::NotEqual("id", Literal::Int(kIntMaxValue - 4)), 
true);
+  ExpectEval(Expressions::NotEqual("id", Literal::Int(kIntMaxValue)), true);
+  ExpectEval(Expressions::NotEqual("id", Literal::Int(kIntMaxValue + 1)), 
true);
+  ExpectEval(Expressions::NotEqual("id", Literal::Int(kIntMaxValue + 6)), 
true);
+}
+
+TEST_F(ManifestEvaluatorTest, IntegerNotEqRewritten) {
+  ExpectEval(Expressions::Not(Expressions::Equal("id", 
Literal::Int(kIntMinValue - 25))),
+             true);
+  ExpectEval(Expressions::Not(Expressions::Equal("id", 
Literal::Int(kIntMinValue - 1))),
+             true);
+  ExpectEval(Expressions::Not(Expressions::Equal("id", 
Literal::Int(kIntMinValue))),
+             true);
+  ExpectEval(Expressions::Not(Expressions::Equal("id", 
Literal::Int(kIntMaxValue - 4))),
+             true);
+  ExpectEval(Expressions::Not(Expressions::Equal("id", 
Literal::Int(kIntMaxValue))),
+             true);
+  ExpectEval(Expressions::Not(Expressions::Equal("id", 
Literal::Int(kIntMaxValue + 1))),
+             true);
+  ExpectEval(Expressions::Not(Expressions::Equal("id", 
Literal::Int(kIntMaxValue + 6))),
+             true);
+}
+
+TEST_F(ManifestEvaluatorTest, CaseInsensitiveIntegerNotEqRewritten) {
+  ExpectEval(Expressions::Not(Expressions::Equal("ID", 
Literal::Int(kIntMinValue - 25))),
+             true,
+             /*case_sensitive=*/false);
+  ExpectEval(Expressions::Not(Expressions::Equal("ID", 
Literal::Int(kIntMinValue - 1))),
+             true,
+             /*case_sensitive=*/false);
+  ExpectEval(Expressions::Not(Expressions::Equal("ID", 
Literal::Int(kIntMinValue))), true,
+             /*case_sensitive=*/false);
+  ExpectEval(Expressions::Not(Expressions::Equal("ID", 
Literal::Int(kIntMaxValue - 4))),
+             true,
+             /*case_sensitive=*/false);
+  ExpectEval(Expressions::Not(Expressions::Equal("ID", 
Literal::Int(kIntMaxValue))), true,
+             /*case_sensitive=*/false);
+  ExpectEval(Expressions::Not(Expressions::Equal("ID", 
Literal::Int(kIntMaxValue + 1))),
+             true,
+             /*case_sensitive=*/false);
+  ExpectEval(Expressions::Not(Expressions::Equal("ID", 
Literal::Int(kIntMaxValue + 6))),
+             true,
+             /*case_sensitive=*/false);
+}
+
+TEST_F(ManifestEvaluatorTest, CaseSensitiveIntegerNotEqRewritten) {
+  auto expr = Expressions::Not(Expressions::Equal("ID", 
Literal::Int(kIntMinValue - 25)));
+  auto evaluator = ManifestEvaluator::MakePartitionFilter(expr, spec_, 
*schema_, true);
+  ASSERT_FALSE(evaluator.has_value());
+  ASSERT_TRUE(evaluator.error().message.contains("Cannot find field 'ID'"))
+      << evaluator.error().message;
+}
+
+TEST_F(ManifestEvaluatorTest, StringStartsWith) {
+  ExpectEval(Expressions::StartsWith("some_nulls", "a"), true, 
/*case_sensitive=*/false);
+  ExpectEval(Expressions::StartsWith("some_nulls", "aa"), true, 
/*case_sensitive=*/false);
+  ExpectEval(Expressions::StartsWith("some_nulls", "dddd"), true,
+             /*case_sensitive=*/false);
+  ExpectEval(Expressions::StartsWith("some_nulls", "z"), true, 
/*case_sensitive=*/false);
+  ExpectEval(Expressions::StartsWith("no_nulls", "a"), true, 
/*case_sensitive=*/false);
+  ExpectEval(Expressions::StartsWith("some_nulls", "zzzz"), false,
+             /*case_sensitive=*/false);
+  ExpectEval(Expressions::StartsWith("some_nulls", "1"), false, 
/*case_sensitive=*/false);
+}
+
+TEST_F(ManifestEvaluatorTest, StringNotStartsWith) {
+  ExpectEval(Expressions::NotStartsWith("some_nulls", "a"), true,
+             /*case_sensitive=*/false);
+  ExpectEval(Expressions::NotStartsWith("some_nulls", "aa"), true,
+             /*case_sensitive=*/false);
+  ExpectEval(Expressions::NotStartsWith("some_nulls", "dddd"), true,
+             /*case_sensitive=*/false);
+  ExpectEval(Expressions::NotStartsWith("some_nulls", "z"), true,
+             /*case_sensitive=*/false);
+  ExpectEval(Expressions::NotStartsWith("no_nulls", "a"), true, 
/*case_sensitive=*/false);
+  ExpectEval(Expressions::NotStartsWith("some_nulls", "zzzz"), true,
+             /*case_sensitive=*/false);
+  ExpectEval(Expressions::NotStartsWith("some_nulls", "1"), true,
+             /*case_sensitive=*/false);
+  ExpectEval(Expressions::NotStartsWith("all_same_value_or_null", "a"), true,
+             /*case_sensitive=*/false);
+  ExpectEval(Expressions::NotStartsWith("all_same_value_or_null", "aa"), true,
+             /*case_sensitive=*/false);
+  ExpectEval(Expressions::NotStartsWith("all_same_value_or_null", "A"), true,
+             /*case_sensitive=*/false);
+  ExpectEval(Expressions::NotStartsWith("all_nulls_missing_nan", "A"), true,
+             /*case_sensitive=*/false);
+  ExpectEval(Expressions::NotStartsWith("no_nulls_same_value_a", "a"), false,
+             /*case_sensitive=*/false);
+}
+
+TEST_F(ManifestEvaluatorTest, IntegerIn) {
+  ExpectEval(Expressions::In("id", {Literal::Int(kIntMinValue - 25),
+                                    Literal::Int(kIntMinValue - 24)}),
+             false);
+  ExpectEval(Expressions::In(
+                 "id", {Literal::Int(kIntMinValue - 2), 
Literal::Int(kIntMinValue - 1)}),
+             false);
+  ExpectEval(
+      Expressions::In("id", {Literal::Int(kIntMinValue - 1), 
Literal::Int(kIntMinValue)}),
+      true);
+  ExpectEval(Expressions::In(
+                 "id", {Literal::Int(kIntMaxValue - 4), 
Literal::Int(kIntMaxValue - 3)}),
+             true);
+  ExpectEval(
+      Expressions::In("id", {Literal::Int(kIntMaxValue), 
Literal::Int(kIntMaxValue + 1)}),
+      true);
+  ExpectEval(Expressions::In(
+                 "id", {Literal::Int(kIntMaxValue + 1), 
Literal::Int(kIntMaxValue + 2)}),
+             false);
+  ExpectEval(Expressions::In(
+                 "id", {Literal::Int(kIntMaxValue + 6), 
Literal::Int(kIntMaxValue + 7)}),
+             false);
+  ExpectEval(Expressions::In("all_nulls_missing_nan",
+                             {Literal::String("abc"), Literal::String("def")}),
+             false);
+  ExpectEval(
+      Expressions::In("some_nulls", {Literal::String("abc"), 
Literal::String("def")}),
+      true);
+  ExpectEval(
+      Expressions::In("no_nulls", {Literal::String("abc"), 
Literal::String("def")}),
+      true);
+}
+
+TEST_F(ManifestEvaluatorTest, IntegerNotIn) {
+  ExpectEval(Expressions::NotIn("id", {Literal::Int(kIntMinValue - 25),
+                                       Literal::Int(kIntMinValue - 24)}),
+             true);
+  ExpectEval(Expressions::NotIn(
+                 "id", {Literal::Int(kIntMinValue - 2), 
Literal::Int(kIntMinValue - 1)}),
+             true);
+  ExpectEval(Expressions::NotIn(
+                 "id", {Literal::Int(kIntMinValue - 1), 
Literal::Int(kIntMinValue)}),
+             true);
+  ExpectEval(Expressions::NotIn(
+                 "id", {Literal::Int(kIntMaxValue - 4), 
Literal::Int(kIntMaxValue - 3)}),
+             true);
+  ExpectEval(Expressions::NotIn(
+                 "id", {Literal::Int(kIntMaxValue), Literal::Int(kIntMaxValue 
+ 1)}),
+             true);
+  ExpectEval(Expressions::NotIn(
+                 "id", {Literal::Int(kIntMaxValue + 1), 
Literal::Int(kIntMaxValue + 2)}),
+             true);
+  ExpectEval(Expressions::NotIn(
+                 "id", {Literal::Int(kIntMaxValue + 6), 
Literal::Int(kIntMaxValue + 7)}),
+             true);
+  ExpectEval(Expressions::NotIn("all_nulls_missing_nan",
+                                {Literal::String("abc"), 
Literal::String("def")}),
+             true);
+  ExpectEval(
+      Expressions::NotIn("some_nulls", {Literal::String("abc"), 
Literal::String("def")}),
+      true);
+  ExpectEval(
+      Expressions::NotIn("no_nulls", {Literal::String("abc"), 
Literal::String("def")}),
+      true);
+}
+
+}  // namespace iceberg
diff --git a/src/iceberg/test/meson.build b/src/iceberg/test/meson.build
index 0f8b9291..f058cdda 100644
--- a/src/iceberg/test/meson.build
+++ b/src/iceberg/test/meson.build
@@ -64,6 +64,7 @@ iceberg_tests = {
             'inclusive_metrics_evaluator_test.cc',
             'inclusive_metrics_evaluator_with_transform_test.cc',
             'literal_test.cc',
+            'manifest_evaluator_test.cc',
             'predicate_test.cc',
             'residual_evaluator_test.cc',
             'strict_metrics_evaluator_test.cc',


Reply via email to