This is an automated email from the ASF dual-hosted git repository.

xuanwo pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-cpp.git


The following commit(s) were added to refs/heads/main by this push:
     new 5bffdf6  feat: support operator== for Literal/Manifest/ManifestList 
(#147)
5bffdf6 is described below

commit 5bffdf6f21c34746d0f66d063b52aa1215f1749c
Author: dongxiao <[email protected]>
AuthorDate: Mon Jul 21 12:03:22 2025 +0800

    feat: support operator== for Literal/Manifest/ManifestList (#147)
    
    add operator== for manifest and manifest list
    
    ---------
    
    Co-authored-by: xiao.dong <[email protected]>
---
 src/iceberg/expression/literal.cc |   2 +
 src/iceberg/expression/literal.h  |   2 +
 src/iceberg/manifest_entry.cc     |   8 +++
 src/iceberg/manifest_entry.h      |  13 +++--
 src/iceberg/manifest_list.h       |  23 +++++---
 test/manifest_list_reader_test.cc | 111 +++++++++++++++-----------------------
 6 files changed, 79 insertions(+), 80 deletions(-)

diff --git a/src/iceberg/expression/literal.cc 
b/src/iceberg/expression/literal.cc
index 9ae3916..d053f30 100644
--- a/src/iceberg/expression/literal.cc
+++ b/src/iceberg/expression/literal.cc
@@ -179,6 +179,8 @@ std::strong_ordering CompareFloat(T lhs, T rhs) {
   return lhs_is_negative <=> rhs_is_negative;
 }
 
+bool Literal::operator==(const Literal& other) const { return (*this <=> 
other) == 0; }
+
 // Three-way comparison operator
 std::partial_ordering Literal::operator<=>(const Literal& other) const {
   // If types are different, comparison is unordered
diff --git a/src/iceberg/expression/literal.h b/src/iceberg/expression/literal.h
index 17752c4..739e20f 100644
--- a/src/iceberg/expression/literal.h
+++ b/src/iceberg/expression/literal.h
@@ -105,6 +105,8 @@ class ICEBERG_EXPORT Literal {
   /// was not valid
   Result<Literal> CastTo(const std::shared_ptr<PrimitiveType>& target_type) 
const;
 
+  bool operator==(const Literal& other) const;
+
   /// \brief Compare two PrimitiveLiterals. Both literals must have the same 
type
   /// and should not be AboveMax or BelowMin.
   std::partial_ordering operator<=>(const Literal& other) const;
diff --git a/src/iceberg/manifest_entry.cc b/src/iceberg/manifest_entry.cc
index 16df2f0..f670671 100644
--- a/src/iceberg/manifest_entry.cc
+++ b/src/iceberg/manifest_entry.cc
@@ -27,6 +27,14 @@
 
 namespace iceberg {
 
+bool ManifestEntry::operator==(const ManifestEntry& other) const {
+  return status == other.status && snapshot_id == other.snapshot_id &&
+             sequence_number == other.sequence_number &&
+             file_sequence_number == other.file_sequence_number &&
+             (data_file && other.data_file && *data_file == *other.data_file) 
||
+         (!data_file && !other.data_file);
+}
+
 std::shared_ptr<StructType> DataFile::Type(std::shared_ptr<StructType> 
partition_type) {
   return std::make_shared<StructType>(std::vector<SchemaField>{
       kContent,
diff --git a/src/iceberg/manifest_entry.h b/src/iceberg/manifest_entry.h
index 440b13c..9293fd6 100644
--- a/src/iceberg/manifest_entry.h
+++ b/src/iceberg/manifest_entry.h
@@ -29,6 +29,7 @@
 #include "iceberg/expression/literal.h"
 #include "iceberg/file_format.h"
 #include "iceberg/iceberg_export.h"
+#include "iceberg/partition_spec.h"
 #include "iceberg/result.h"
 #include "iceberg/schema_field.h"
 #include "iceberg/type.h"
@@ -68,13 +69,13 @@ struct ICEBERG_EXPORT DataFile {
   /// Field id: 134
   /// Type of content stored by the data file: data, equality deletes, or 
position
   /// deletes (all v1 files are data files)
-  Content content;
+  Content content = Content::kData;
   /// Field id: 100
   /// Full URI for the file with FS scheme
   std::string file_path;
   /// Field id: 101
   /// File format type, avro, orc, parquet, or puffin
-  FileFormatType file_format;
+  FileFormatType file_format = FileFormatType::kParquet;
   /// Field id: 102
   /// Partition data tuple, schema based on the partition spec output using 
partition
   /// field ids
@@ -146,7 +147,7 @@ struct ICEBERG_EXPORT DataFile {
   std::optional<int32_t> sort_order_id;
   /// This field is not included in spec, so it is not serialized into the 
manifest file.
   /// It is just store in memory representation used in process.
-  int32_t partition_spec_id;
+  int32_t partition_spec_id = PartitionSpec::kInitialSpecId;
   /// Field id: 142
   /// The _row_id for the first row in the data file.
   ///
@@ -261,6 +262,8 @@ struct ICEBERG_EXPORT DataFile {
       SchemaField::MakeOptional(145, "content_size_in_bytes", iceberg::int64(),
                                 "The length of referenced content stored in 
the file");
 
+  bool operator==(const DataFile& other) const = default;
+
   static std::shared_ptr<StructType> Type(std::shared_ptr<StructType> 
partition_type);
 };
 
@@ -272,7 +275,7 @@ struct ICEBERG_EXPORT ManifestEntry {
   /// Field id: 0
   /// Used to track additions and deletions. Deletes are informational only 
and not used
   /// in scans.
-  ManifestStatus status;
+  ManifestStatus status = ManifestStatus::kAdded;
   /// Field id: 1
   /// Snapshot id where the file was added, or deleted if status is 2. 
Inherited when
   /// null.
@@ -297,6 +300,8 @@ struct ICEBERG_EXPORT ManifestEntry {
   inline static const SchemaField kFileSequenceNumber =
       SchemaField::MakeOptional(4, "file_sequence_number", iceberg::int64());
 
+  bool operator==(const ManifestEntry& other) const;
+
   static std::shared_ptr<StructType> TypeFromPartitionType(
       std::shared_ptr<StructType> partition_type);
   static std::shared_ptr<StructType> TypeFromDataFileType(
diff --git a/src/iceberg/manifest_list.h b/src/iceberg/manifest_list.h
index 074d614..66433da 100644
--- a/src/iceberg/manifest_list.h
+++ b/src/iceberg/manifest_list.h
@@ -28,8 +28,11 @@
 #include <utility>
 
 #include "iceberg/iceberg_export.h"
+#include "iceberg/partition_spec.h"
 #include "iceberg/result.h"
 #include "iceberg/schema_field.h"
+#include "iceberg/snapshot.h"
+#include "iceberg/table_metadata.h"
 #include "iceberg/type.h"
 
 namespace iceberg {
@@ -40,7 +43,7 @@ namespace iceberg {
 struct ICEBERG_EXPORT PartitionFieldSummary {
   /// Field id: 509
   /// Whether the manifest contains at least one partition with a null value 
for the field
-  bool contains_null;
+  bool contains_null = true;
   /// Field id: 518
   /// Whether the manifest contains at least one partition with a NaN value 
for the field
   std::optional<bool> contains_nan;
@@ -64,6 +67,8 @@ struct ICEBERG_EXPORT PartitionFieldSummary {
   inline static const SchemaField kUpperBound = SchemaField::MakeOptional(
       511, "upper_bound", iceberg::binary(), "Partition upper bound for all 
files");
 
+  bool operator==(const PartitionFieldSummary& other) const = default;
+
   static const StructType& Type();
 };
 
@@ -83,26 +88,26 @@ struct ICEBERG_EXPORT ManifestFile {
   std::string manifest_path;
   /// Field id: 501
   /// Length of the manifest file in bytes
-  int64_t manifest_length;
+  int64_t manifest_length = 0;
   /// Field id: 502
   /// ID of a partition spec used to write the manifest; must be listed in 
table metadata
   /// partition-specs
-  int32_t partition_spec_id;
+  int32_t partition_spec_id = PartitionSpec::kInitialSpecId;
   /// Field id: 517
   /// The type of files tracked by the manifest, either data or delete files; 
0 for all v1
   /// manifests
-  Content content;
+  Content content = Content::kData;
   /// Field id: 515
   /// The sequence number when the manifest was added to the table; use 0 when 
reading v1
   /// manifest lists
-  int64_t sequence_number;
+  int64_t sequence_number = TableMetadata::kInitialSequenceNumber;
   /// Field id: 516
   /// The minimum data sequence number of all live data or delete files in the 
manifest;
   /// use 0 when reading v1 manifest lists
-  int64_t min_sequence_number;
+  int64_t min_sequence_number = TableMetadata::kInitialSequenceNumber;
   /// Field id: 503
   /// ID of the snapshot where the manifest file was added
-  int64_t added_snapshot_id;
+  int64_t added_snapshot_id = Snapshot::kInvalidSnapshotId;
   /// Field id: 504
   /// Number of entries in the manifest that have status ADDED (1), when null 
this is
   /// assumed to be non-zero
@@ -137,7 +142,7 @@ struct ICEBERG_EXPORT ManifestFile {
   std::vector<uint8_t> key_metadata;
   /// Field id: 520
   /// The starting _row_id to assign to rows added by ADDED data files
-  int64_t first_row_id;
+  std::optional<int64_t> first_row_id;
 
   /// \brief Checks if this manifest file contains entries with ADDED status.
   bool has_added_files() const { return added_files_count.value_or(1) > 0; }
@@ -188,6 +193,8 @@ struct ICEBERG_EXPORT ManifestFile {
       520, "first_row_id", iceberg::int64(),
       "Starting row ID to assign to new rows in ADDED data files");
 
+  bool operator==(const ManifestFile& other) const = default;
+
   static const StructType& Type();
 };
 
diff --git a/test/manifest_list_reader_test.cc 
b/test/manifest_list_reader_test.cc
index f825a58..9240b1f 100644
--- a/test/manifest_list_reader_test.cc
+++ b/test/manifest_list_reader_test.cc
@@ -42,6 +42,46 @@ class ManifestListReaderTest : public TempFileTestBase {
     file_io_ = 
std::make_shared<iceberg::arrow::ArrowFileSystemFileIO>(local_fs_);
   }
 
+  std::vector<ManifestFile> PrepareTestManifestList() {
+    std::vector<ManifestFile> manifest_files;
+    std::string test_dir_prefix = "/tmp/db/db/iceberg_test/metadata/";
+    std::vector<std::string> paths = 
{"2bccd69e-d642-4816-bba0-261cd9bd0d93-m0.avro",
+                                      
"9b6ffacd-ef10-4abf-a89c-01c733696796-m0.avro",
+                                      
"2541e6b5-4923-4bd5-886d-72c6f7228400-m0.avro",
+                                      
"3118c801-d2e0-4df6-8c7a-7d4eaade32f8-m0.avro"};
+    std::vector<int64_t> file_size = {7433, 7431, 7433, 7431};
+    std::vector<int64_t> snapshot_id = {7412193043800610213, 
5485972788975780755,
+                                        1679468743751242972, 
1579605567338877265};
+    std::vector<std::vector<uint8_t>> bounds = {{'x', ';', 0x07, 0x00},
+                                                {'(', 0x19, 0x07, 0x00},
+                                                {0xd0, 0xd4, 0x06, 0x00},
+                                                {0xb8, 0xd4, 0x06, 0x00}};
+    for (int i = 0; i < 4; ++i) {
+      ManifestFile manifest_file;
+      manifest_file.manifest_path = test_dir_prefix + paths[i];
+      manifest_file.manifest_length = file_size[i];
+      manifest_file.partition_spec_id = 0;
+      manifest_file.content = ManifestFile::Content::kData;
+      manifest_file.sequence_number = 4 - i;
+      manifest_file.min_sequence_number = 4 - i;
+      manifest_file.added_snapshot_id = snapshot_id[i];
+      manifest_file.added_files_count = 1;
+      manifest_file.existing_files_count = 0;
+      manifest_file.deleted_files_count = 0;
+      manifest_file.added_rows_count = 1;
+      manifest_file.existing_rows_count = 0;
+      manifest_file.deleted_rows_count = 0;
+      PartitionFieldSummary partition;
+      partition.contains_null = false;
+      partition.contains_nan = false;
+      partition.lower_bound = bounds[i];
+      partition.upper_bound = bounds[i];
+      manifest_file.partitions.emplace_back(partition);
+      manifest_files.emplace_back(manifest_file);
+    }
+    return manifest_files;
+  }
+
   std::shared_ptr<::arrow::fs::LocalFileSystem> local_fs_;
   std::shared_ptr<FileIO> file_io_;
 };
@@ -55,74 +95,9 @@ TEST_F(ManifestListReaderTest, BasicTest) {
   auto read_result = manifest_reader->Files();
   ASSERT_EQ(read_result.has_value(), true);
   ASSERT_EQ(read_result.value().size(), 4);
-  std::string test_dir_prefix = "/tmp/db/db/iceberg_test/metadata/";
-  for (const auto& file : read_result.value()) {
-    auto manifest_path = file.manifest_path.substr(test_dir_prefix.size());
-    if (manifest_path == "2bccd69e-d642-4816-bba0-261cd9bd0d93-m0.avro") {
-      ASSERT_EQ(file.added_snapshot_id, 7412193043800610213);
-      ASSERT_EQ(file.manifest_length, 7433);
-      ASSERT_EQ(file.sequence_number, 4);
-      ASSERT_EQ(file.min_sequence_number, 4);
-      ASSERT_EQ(file.partitions.size(), 1);
-      const auto& partition = file.partitions[0];
-      ASSERT_EQ(partition.contains_null, false);
-      ASSERT_EQ(partition.contains_nan.value(), false);
-      ASSERT_EQ(partition.lower_bound.value(),
-                std::vector<uint8_t>({'x', ';', 0x07, 0x00}));
-      ASSERT_EQ(partition.upper_bound.value(),
-                std::vector<uint8_t>({'x', ';', 0x07, 0x00}));
-    } else if (manifest_path == 
"9b6ffacd-ef10-4abf-a89c-01c733696796-m0.avro") {
-      ASSERT_EQ(file.added_snapshot_id, 5485972788975780755);
-      ASSERT_EQ(file.manifest_length, 7431);
-      ASSERT_EQ(file.sequence_number, 3);
-      ASSERT_EQ(file.min_sequence_number, 3);
-      ASSERT_EQ(file.partitions.size(), 1);
-      const auto& partition = file.partitions[0];
-      ASSERT_EQ(partition.contains_null, false);
-      ASSERT_EQ(partition.contains_nan.value(), false);
-      ASSERT_EQ(partition.lower_bound.value(),
-                std::vector<uint8_t>({'(', 0x19, 0x07, 0x00}));
-      ASSERT_EQ(partition.upper_bound.value(),
-                std::vector<uint8_t>({'(', 0x19, 0x07, 0x00}));
-    } else if (manifest_path == 
"2541e6b5-4923-4bd5-886d-72c6f7228400-m0.avro") {
-      ASSERT_EQ(file.added_snapshot_id, 1679468743751242972);
-      ASSERT_EQ(file.manifest_length, 7433);
-      ASSERT_EQ(file.sequence_number, 2);
-      ASSERT_EQ(file.min_sequence_number, 2);
-      ASSERT_EQ(file.partitions.size(), 1);
-      const auto& partition = file.partitions[0];
-      ASSERT_EQ(partition.contains_null, false);
-      ASSERT_EQ(partition.contains_nan.value(), false);
-      ASSERT_EQ(partition.lower_bound.value(),
-                std::vector<uint8_t>({0xd0, 0xd4, 0x06, 0x00}));
-      ASSERT_EQ(partition.upper_bound.value(),
-                std::vector<uint8_t>({0xd0, 0xd4, 0x06, 0x00}));
-    } else if (manifest_path == 
"3118c801-d2e0-4df6-8c7a-7d4eaade32f8-m0.avro") {
-      ASSERT_EQ(file.added_snapshot_id, 1579605567338877265);
-      ASSERT_EQ(file.manifest_length, 7431);
-      ASSERT_EQ(file.sequence_number, 1);
-      ASSERT_EQ(file.min_sequence_number, 1);
-      ASSERT_EQ(file.partitions.size(), 1);
-      const auto& partition = file.partitions[0];
-      ASSERT_EQ(partition.contains_null, false);
-      ASSERT_EQ(partition.contains_nan.value(), false);
-      ASSERT_EQ(partition.lower_bound.value(),
-                std::vector<uint8_t>({0xb8, 0xd4, 0x06, 0x00}));
-      ASSERT_EQ(partition.upper_bound.value(),
-                std::vector<uint8_t>({0xb8, 0xd4, 0x06, 0x00}));
-    } else {
-      ASSERT_TRUE(false) << "Unexpected manifest file: " << manifest_path;
-    }
-    ASSERT_EQ(file.partition_spec_id, 0);
-    ASSERT_EQ(file.content, ManifestFile::Content::kData);
-    ASSERT_EQ(file.added_files_count, 1);
-    ASSERT_EQ(file.existing_files_count, 0);
-    ASSERT_EQ(file.deleted_files_count, 0);
-    ASSERT_EQ(file.added_rows_count, 1);
-    ASSERT_EQ(file.existing_rows_count, 0);
-    ASSERT_EQ(file.deleted_rows_count, 0);
-    ASSERT_EQ(file.key_metadata.empty(), true);
-  }
+
+  auto expected_manifest_list = PrepareTestManifestList();
+  ASSERT_EQ(read_result.value(), expected_manifest_list);
 }
 
 }  // namespace iceberg

Reply via email to