This is an automated email from the ASF dual-hosted git repository.

gangwu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-cpp.git


The following commit(s) were added to refs/heads/main by this push:
     new 1c431b6  feat: add satisfies order for SortField/SortOrder and 
Transform (#284)
1c431b6 is described below

commit 1c431b6ad24c7a4265add773c2f9ccd6e0175fac
Author: Junwang Zhao <[email protected]>
AuthorDate: Wed Nov 5 15:11:46 2025 +0800

    feat: add satisfies order for SortField/SortOrder and Transform (#284)
    
    This PR also makes the `ToString` consistent with Java implementation.
---
 src/iceberg/sort_field.cc           |  14 ++++-
 src/iceberg/sort_field.h            |   6 ++
 src/iceberg/sort_order.cc           |  33 +++++++++-
 src/iceberg/sort_order.h            |  14 +++++
 src/iceberg/test/sort_field_test.cc |  29 ++++++---
 src/iceberg/test/sort_order_test.cc |  78 ++++++++++++++++++++---
 src/iceberg/test/transform_test.cc  | 121 ++++++++++++++++++++++++++++++++++++
 src/iceberg/transform.cc            |  47 ++++++++++++++
 src/iceberg/transform.h             |  14 +++++
 9 files changed, 336 insertions(+), 20 deletions(-)

diff --git a/src/iceberg/sort_field.cc b/src/iceberg/sort_field.cc
index e9b3bf7..29083ba 100644
--- a/src/iceberg/sort_field.cc
+++ b/src/iceberg/sort_field.cc
@@ -41,10 +41,18 @@ SortDirection SortField::direction() const { return 
direction_; }
 
 NullOrder SortField::null_order() const { return null_order_; }
 
+bool SortField::Satisfies(const SortField& other) const {
+  if (*this == other) {
+    return true;
+  } else if (source_id_ != other.source_id() || direction_ != 
other.direction() ||
+             null_order_ != other.null_order()) {
+    return false;
+  }
+  return transform_->SatisfiesOrderOf(*other.transform());
+}
+
 std::string SortField::ToString() const {
-  return std::format(
-      "sort_field(source_id={}, transform={}, direction={}, null_order={})", 
source_id_,
-      *transform_, direction_, null_order_);
+  return std::format("{}({}) {} {}", *transform_, source_id_, direction_, 
null_order_);
 }
 
 bool SortField::Equals(const SortField& other) const {
diff --git a/src/iceberg/sort_field.h b/src/iceberg/sort_field.h
index 459472c..223c573 100644
--- a/src/iceberg/sort_field.h
+++ b/src/iceberg/sort_field.h
@@ -107,9 +107,15 @@ class ICEBERG_EXPORT SortField : public util::Formattable {
   /// \brief Get the null order.
   NullOrder null_order() const;
 
+  /// \brief Checks whether this field's order satisfies another field's order.
+  bool Satisfies(const SortField& other) const;
+
   std::string ToString() const override;
 
   friend bool operator==(const SortField& lhs, const SortField& rhs) {
+    if (&lhs == &rhs) {
+      return true;
+    }
     return lhs.Equals(rhs);
   }
 
diff --git a/src/iceberg/sort_order.cc b/src/iceberg/sort_order.cc
index 9db51c7..48a3066 100644
--- a/src/iceberg/sort_order.cc
+++ b/src/iceberg/sort_order.cc
@@ -20,6 +20,7 @@
 #include "iceberg/sort_order.h"
 
 #include <format>
+#include <ranges>
 
 #include "iceberg/util/formatter.h"  // IWYU pragma: keep
 
@@ -38,10 +39,38 @@ int32_t SortOrder::order_id() const { return order_id_; }
 
 std::span<const SortField> SortOrder::fields() const { return fields_; }
 
+bool SortOrder::Satisfies(const SortOrder& other) const {
+  // any ordering satisfies an unsorted ordering
+  if (other.is_unsorted()) {
+    return true;
+  }
+
+  // this ordering cannot satisfy an ordering with more sort fields
+  if (fields_.size() < other.fields().size()) {
+    return false;
+  }
+
+  // this ordering has either more or the same number of sort fields
+  for (const auto& [field, other_field] : std::views::zip(fields_, 
other.fields_)) {
+    if (!field.Satisfies(other_field)) {
+      return false;
+    }
+  }
+
+  return true;
+}
+
+bool SortOrder::SameOrder(const SortOrder& other) const {
+  return fields_ == other.fields_;
+}
+
 std::string SortOrder::ToString() const {
-  std::string repr = std::format("sort_order[order_id<{}>,\n", order_id_);
+  std::string repr = "[";
   for (const auto& field : fields_) {
-    std::format_to(std::back_inserter(repr), "  {}\n", field);
+    std::format_to(std::back_inserter(repr), "\n  {}", field);
+  }
+  if (!fields_.empty()) {
+    repr.push_back('\n');
   }
   repr += "]";
   return repr;
diff --git a/src/iceberg/sort_order.h b/src/iceberg/sort_order.h
index 6e49153..e245a74 100644
--- a/src/iceberg/sort_order.h
+++ b/src/iceberg/sort_order.h
@@ -49,6 +49,20 @@ class ICEBERG_EXPORT SortOrder : public util::Formattable {
   /// \brief Get the list of sort fields.
   std::span<const SortField> fields() const;
 
+  /// \brief Returns true if the sort order is sorted
+  bool is_sorted() const { return !fields_.empty(); }
+
+  /// \brief Returns true if the sort order is unsorted
+  /// A SortOrder is unsorted if it has no sort fields.
+  bool is_unsorted() const { return fields_.empty(); }
+
+  /// \brief Checks whether this order satisfies another order.
+  bool Satisfies(const SortOrder& other) const;
+
+  /// \brief Checks whether this order is equivalent to another order while 
ignoring the
+  /// order id.
+  bool SameOrder(const SortOrder& other) const;
+
   std::string ToString() const override;
 
   friend bool operator==(const SortOrder& lhs, const SortOrder& rhs) {
diff --git a/src/iceberg/test/sort_field_test.cc 
b/src/iceberg/test/sort_field_test.cc
index d8be799..751b8a0 100644
--- a/src/iceberg/test/sort_field_test.cc
+++ b/src/iceberg/test/sort_field_test.cc
@@ -36,14 +36,8 @@ TEST(SortFieldTest, Basics) {
     EXPECT_EQ(*transform, *field.transform());
     EXPECT_EQ(SortDirection::kAscending, field.direction());
     EXPECT_EQ(NullOrder::kFirst, field.null_order());
-    EXPECT_EQ(
-        "sort_field(source_id=1, transform=identity, direction=asc, "
-        "null_order=nulls-first)",
-        field.ToString());
-    EXPECT_EQ(
-        "sort_field(source_id=1, transform=identity, direction=asc, "
-        "null_order=nulls-first)",
-        std::format("{}", field));
+    EXPECT_EQ(field.ToString(), "identity(1) asc nulls-first");
+    EXPECT_EQ(std::format("{}", field), "identity(1) asc nulls-first");
   }
 }
 
@@ -67,4 +61,23 @@ TEST(SortFieldTest, Equality) {
   ASSERT_NE(field1, field5);
   ASSERT_NE(field5, field1);
 }
+
+TEST(SortFieldTest, Satisfies) {
+  const auto bucket_transform = Transform::Bucket(8);
+  const auto identity_transform = Transform::Identity();
+
+  SortField field1(1, bucket_transform, SortDirection::kAscending, 
NullOrder::kFirst);
+  SortField field2(1, bucket_transform, SortDirection::kAscending, 
NullOrder::kFirst);
+  SortField field3(1, identity_transform, SortDirection::kAscending, 
NullOrder::kFirst);
+  SortField field4(1, bucket_transform, SortDirection::kDescending, 
NullOrder::kFirst);
+  SortField field5(1, bucket_transform, SortDirection::kAscending, 
NullOrder::kLast);
+  SortField field6(2, bucket_transform, SortDirection::kAscending, 
NullOrder::kFirst);
+
+  EXPECT_TRUE(field1.Satisfies(field2));   // Same fields
+  EXPECT_FALSE(field1.Satisfies(field3));  // Different transform
+  EXPECT_FALSE(field1.Satisfies(field4));  // Different direction
+  EXPECT_FALSE(field1.Satisfies(field5));  // Different null order
+  EXPECT_FALSE(field1.Satisfies(field6));  // Different source_id
+}
+
 }  // namespace iceberg
diff --git a/src/iceberg/test/sort_order_test.cc 
b/src/iceberg/test/sort_order_test.cc
index 590f30b..bb407af 100644
--- a/src/iceberg/test/sort_order_test.cc
+++ b/src/iceberg/test/sort_order_test.cc
@@ -48,13 +48,12 @@ TEST(SortOrderTest, Basics) {
     ASSERT_EQ(st_field1, fields[0]);
     ASSERT_EQ(st_field2, fields[1]);
     auto sort_order_str =
-        "sort_order[order_id<100>,\n"
-        "  sort_field(source_id=5, transform=identity, direction=asc, "
-        "null_order=nulls-first)\n"
-        "  sort_field(source_id=7, transform=identity, direction=desc, "
-        "null_order=nulls-first)\n]";
-    EXPECT_EQ(sort_order_str, sort_order.ToString());
-    EXPECT_EQ(sort_order_str, std::format("{}", sort_order));
+        "[\n"
+        "  identity(5) asc nulls-first\n"
+        "  identity(7) desc nulls-first\n"
+        "]";
+    EXPECT_EQ(sort_order.ToString(), sort_order_str);
+    EXPECT_EQ(std::format("{}", sort_order), sort_order_str);
   }
 }
 
@@ -84,4 +83,69 @@ TEST(SortOrderTest, Equality) {
   ASSERT_NE(sort_order1, sort_order5);
   ASSERT_NE(sort_order5, sort_order1);
 }
+
+TEST(SortOrderTest, IsUnsorted) {
+  auto unsorted = SortOrder::Unsorted();
+  EXPECT_TRUE(unsorted->is_unsorted());
+  EXPECT_FALSE(unsorted->is_sorted());
+}
+
+TEST(SortOrderTest, IsSorted) {
+  SchemaField field1(5, "ts", iceberg::timestamp(), true);
+  auto identity_transform = Transform::Identity();
+  SortField st_field1(5, identity_transform, SortDirection::kAscending,
+                      NullOrder::kFirst);
+  SortOrder sorted_order(100, {st_field1});
+
+  EXPECT_TRUE(sorted_order.is_sorted());
+  EXPECT_FALSE(sorted_order.is_unsorted());
+}
+
+TEST(SortOrderTest, Satisfies) {
+  SchemaField field1(5, "ts", iceberg::timestamp(), true);
+  SchemaField field2(7, "bar", iceberg::string(), true);
+  auto identity_transform = Transform::Identity();
+  auto bucket_transform = Transform::Bucket(8);
+
+  SortField st_field1(5, identity_transform, SortDirection::kAscending,
+                      NullOrder::kFirst);
+  SortField st_field2(7, identity_transform, SortDirection::kDescending,
+                      NullOrder::kFirst);
+  SortField st_field3(7, bucket_transform, SortDirection::kAscending, 
NullOrder::kFirst);
+
+  SortOrder sort_order1(100, {st_field1, st_field2});
+  SortOrder sort_order2(101, {st_field1});
+  SortOrder sort_order3(102, {st_field1, st_field3});
+  SortOrder sort_order4(104, {st_field2});
+  auto unsorted = SortOrder::Unsorted();
+
+  // Any order satisfies an unsorted order, including unsorted itself
+  EXPECT_TRUE(unsorted->Satisfies(*unsorted));
+  EXPECT_TRUE(sort_order1.Satisfies(*unsorted));
+  EXPECT_TRUE(sort_order2.Satisfies(*unsorted));
+  EXPECT_TRUE(sort_order3.Satisfies(*unsorted));
+
+  // Unsorted does not satisfy any sorted order
+  EXPECT_FALSE(unsorted->Satisfies(sort_order1));
+  EXPECT_FALSE(unsorted->Satisfies(sort_order2));
+  EXPECT_FALSE(unsorted->Satisfies(sort_order3));
+
+  // A sort order satisfies itself
+  EXPECT_TRUE(sort_order1.Satisfies(sort_order1));
+  EXPECT_TRUE(sort_order2.Satisfies(sort_order2));
+  EXPECT_TRUE(sort_order3.Satisfies(sort_order3));
+
+  // A sort order with more fields satisfy one with fewer fields
+  EXPECT_TRUE(sort_order1.Satisfies(sort_order2));
+  EXPECT_TRUE(sort_order3.Satisfies(sort_order2));
+
+  // A sort order does not satisfy one with more fields
+  EXPECT_FALSE(sort_order2.Satisfies(sort_order1));
+  EXPECT_FALSE(sort_order2.Satisfies(sort_order3));
+
+  // A sort order does not satify one with different fields
+  EXPECT_FALSE(sort_order4.Satisfies(sort_order2));
+  EXPECT_FALSE(sort_order2.Satisfies(sort_order4));
+}
+
 }  // namespace iceberg
diff --git a/src/iceberg/test/transform_test.cc 
b/src/iceberg/test/transform_test.cc
index 79d2564..e4352af 100644
--- a/src/iceberg/test/transform_test.cc
+++ b/src/iceberg/test/transform_test.cc
@@ -720,4 +720,125 @@ INSTANTIATE_TEST_SUITE_P(
                                      .source = 
Literal::Null(iceberg::string()),
                                      .expected = 
Literal::Null(iceberg::string())}));
 
+TEST(TransformPreservesOrderTest, PreservesOrder) {
+  struct Case {
+    std::string transform_str;
+    bool expected;
+  };
+
+  const std::vector<Case> cases = {
+      {.transform_str = "identity", .expected = true},
+      {.transform_str = "year", .expected = true},
+      {.transform_str = "month", .expected = true},
+      {.transform_str = "day", .expected = true},
+      {.transform_str = "hour", .expected = true},
+      {.transform_str = "void", .expected = false},
+      {.transform_str = "bucket[16]", .expected = false},
+      {.transform_str = "truncate[32]", .expected = true},
+  };
+
+  for (const auto& c : cases) {
+    auto transform = TransformFromString(c.transform_str);
+    ASSERT_TRUE(transform.has_value()) << "Failed to parse: " << 
c.transform_str;
+
+    EXPECT_EQ(transform.value()->PreservesOrder(), c.expected)
+        << "Unexpected result for transform: " << c.transform_str;
+  }
+}
+
+TEST(TransformSatisfiesOrderOfTest, SatisfiesOrderOf) {
+  struct Case {
+    std::string transform_str;
+    std::string other_transform_str;
+    bool expected;
+  };
+
+  const std::vector<Case> cases = {
+      // Identity satisfies all order-preserving transforms
+      {.transform_str = "identity", .other_transform_str = "identity", 
.expected = true},
+      {.transform_str = "identity", .other_transform_str = "year", .expected = 
true},
+      {.transform_str = "identity", .other_transform_str = "month", .expected 
= true},
+      {.transform_str = "identity", .other_transform_str = "day", .expected = 
true},
+      {.transform_str = "identity", .other_transform_str = "hour", .expected = 
true},
+      {.transform_str = "identity",
+       .other_transform_str = "truncate[32]",
+       .expected = true},
+      {.transform_str = "identity",
+       .other_transform_str = "bucket[16]",
+       .expected = false},
+
+      // Truncate satisfies Truncate with smaller width
+      {.transform_str = "truncate[32]",
+       .other_transform_str = "truncate[16]",
+       .expected = true},
+      {.transform_str = "truncate[16]",
+       .other_transform_str = "truncate[16]",
+       .expected = true},
+      {.transform_str = "truncate[16]",
+       .other_transform_str = "truncate[32]",
+       .expected = false},
+      {.transform_str = "truncate[16]",
+       .other_transform_str = "bucket[32]",
+       .expected = false},
+
+      // Hour satisfies hour, day, month, and year
+      {.transform_str = "hour", .other_transform_str = "hour", .expected = 
true},
+      {.transform_str = "hour", .other_transform_str = "day", .expected = 
true},
+      {.transform_str = "hour", .other_transform_str = "month", .expected = 
true},
+      {.transform_str = "hour", .other_transform_str = "year", .expected = 
true},
+      {.transform_str = "hour", .other_transform_str = "identity", .expected = 
false},
+      {.transform_str = "hour", .other_transform_str = "bucket[16]", .expected 
= false},
+
+      // Day satisfies day, month, and year
+      {.transform_str = "day", .other_transform_str = "day", .expected = true},
+      {.transform_str = "day", .other_transform_str = "month", .expected = 
true},
+      {.transform_str = "day", .other_transform_str = "year", .expected = 
true},
+      {.transform_str = "day", .other_transform_str = "hour", .expected = 
false},
+      {.transform_str = "day", .other_transform_str = "identity", .expected = 
false},
+
+      // Month satisfies month and year
+      {.transform_str = "month", .other_transform_str = "month", .expected = 
true},
+      {.transform_str = "month", .other_transform_str = "year", .expected = 
true},
+      {.transform_str = "month", .other_transform_str = "day", .expected = 
false},
+      {.transform_str = "month", .other_transform_str = "hour", .expected = 
false},
+
+      // Year satisfies only year
+      {.transform_str = "year", .other_transform_str = "year", .expected = 
true},
+      {.transform_str = "year", .other_transform_str = "month", .expected = 
false},
+      {.transform_str = "year", .other_transform_str = "day", .expected = 
false},
+      {.transform_str = "year", .other_transform_str = "hour", .expected = 
false},
+
+      // Void satisfies no order-preserving transforms
+      {.transform_str = "void", .other_transform_str = "identity", .expected = 
false},
+      {.transform_str = "void", .other_transform_str = "year", .expected = 
false},
+      {.transform_str = "void", .other_transform_str = "month", .expected = 
false},
+      {.transform_str = "void", .other_transform_str = "day", .expected = 
false},
+      {.transform_str = "void", .other_transform_str = "hour", .expected = 
false},
+
+      // Bucket satisfies only itself
+      {.transform_str = "bucket[16]",
+       .other_transform_str = "bucket[16]",
+       .expected = true},
+      {.transform_str = "bucket[16]",
+       .other_transform_str = "bucket[32]",
+       .expected = false},
+      {.transform_str = "bucket[16]",
+       .other_transform_str = "identity",
+       .expected = false},
+  };
+
+  for (const auto& c : cases) {
+    auto transform = TransformFromString(c.transform_str);
+    auto other_transform = TransformFromString(c.other_transform_str);
+
+    ASSERT_TRUE(transform.has_value()) << "Failed to parse: " << 
c.transform_str;
+    ASSERT_TRUE(other_transform.has_value())
+        << "Failed to parse: " << c.other_transform_str;
+
+    EXPECT_EQ(transform.value()->SatisfiesOrderOf(*other_transform.value()), 
c.expected)
+        << "Unexpected result for transform: " << c.transform_str
+        << " and other transform: " << c.other_transform_str;
+  }
+}
+
 }  // namespace iceberg
diff --git a/src/iceberg/transform.cc b/src/iceberg/transform.cc
index dcacf84..4724cc1 100644
--- a/src/iceberg/transform.cc
+++ b/src/iceberg/transform.cc
@@ -21,6 +21,7 @@
 
 #include <format>
 #include <regex>
+#include <utility>
 
 #include "iceberg/transform_function.h"
 #include "iceberg/type.h"
@@ -124,6 +125,52 @@ Result<std::shared_ptr<TransformFunction>> Transform::Bind(
   }
 }
 
+bool Transform::PreservesOrder() const {
+  switch (transform_type_) {
+    case TransformType::kUnknown:
+    case TransformType::kVoid:
+    case TransformType::kBucket:
+      return false;
+    case TransformType::kIdentity:
+    case TransformType::kTruncate:
+    case TransformType::kYear:
+    case TransformType::kMonth:
+    case TransformType::kDay:
+    case TransformType::kHour:
+      return true;
+  }
+  std::unreachable();
+}
+
+bool Transform::SatisfiesOrderOf(const Transform& other) const {
+  auto other_type = other.transform_type();
+  switch (transform_type_) {
+    case TransformType::kIdentity:
+      // ordering by value is the same as long as the other preserves order
+      return other.PreservesOrder();
+    case TransformType::kTruncate: {
+      if (other_type != TransformType::kTruncate) {
+        return false;
+      }
+      return std::get<int32_t>(param_) >= std::get<int32_t>(other.param_);
+    }
+    case TransformType::kHour:
+      return other_type == TransformType::kHour || other_type == 
TransformType::kDay ||
+             other_type == TransformType::kMonth || other_type == 
TransformType::kYear;
+    case TransformType::kDay:
+      return other_type == TransformType::kDay || other_type == 
TransformType::kMonth ||
+             other_type == TransformType::kYear;
+    case TransformType::kMonth:
+      return other_type == TransformType::kMonth || other_type == 
TransformType::kYear;
+    case TransformType::kYear:
+    case TransformType::kBucket:
+    case TransformType::kUnknown:
+    case TransformType::kVoid:
+      return *this == other;
+  }
+  std::unreachable();
+}
+
 bool TransformFunction::Equals(const TransformFunction& other) const {
   return transform_type_ == other.transform_type_ && *source_type_ == 
*other.source_type_;
 }
diff --git a/src/iceberg/transform.h b/src/iceberg/transform.h
index d06e31f..21483e0 100644
--- a/src/iceberg/transform.h
+++ b/src/iceberg/transform.h
@@ -150,6 +150,20 @@ class ICEBERG_EXPORT Transform : public util::Formattable {
   Result<std::shared_ptr<TransformFunction>> Bind(
       const std::shared_ptr<Type>& source_type) const;
 
+  /// \brief Whether the transform preserves the order of values (is 
monotonic).
+  bool PreservesOrder() const;
+
+  /// \brief Whether ordering by this transform's result satisfies the 
ordering of another
+  /// transform's result.
+  ///
+  /// For example, sorting by day(ts) will produce an ordering that is also by 
month(ts)
+  /// or year(ts). However, sorting by day(ts) will not satisfy the order of 
hour(ts) or
+  /// identity(ts).
+  ///
+  /// \return true if ordering by this transform is equivalent to ordering by 
the other
+  /// transform.
+  bool SatisfiesOrderOf(const Transform& other) const;
+
   /// \brief Returns a string representation of this transform (e.g., 
"bucket[16]").
   std::string ToString() const override;
 

Reply via email to