shangxinli commented on code in PR #583:
URL: https://github.com/apache/iceberg-cpp/pull/583#discussion_r2934313225


##########
src/iceberg/data/equality_delete_writer.h:
##########
@@ -57,6 +57,10 @@ class ICEBERG_EXPORT EqualityDeleteWriter : public 
FileWriter {
  public:
   ~EqualityDeleteWriter() override;
 
+  /// \brief Create a new EqualityDeleteWriter instance.
+  static Result<std::unique_ptr<EqualityDeleteWriter>> Make(
+      const EqualityDeleteWriterOptions& options);

Review Comment:
   Thanks for the suggestion! Added the `// TODO: add key_metadata for 
encryption` comment to track this missing functionality.



##########
src/iceberg/data/equality_delete_writer.cc:
##########
@@ -19,24 +19,131 @@
 
 #include "iceberg/data/equality_delete_writer.h"
 
+#include <map>
+
+#include "iceberg/file_writer.h"
+#include "iceberg/manifest/manifest_entry.h"
+#include "iceberg/util/macros.h"
+
 namespace iceberg {
 
 class EqualityDeleteWriter::Impl {
  public:
+  static Result<std::unique_ptr<Impl>> Make(EqualityDeleteWriterOptions 
options) {
+    WriterOptions writer_options{
+        .path = options.path,
+        .schema = options.schema,
+        .io = options.io,
+        .properties = WriterProperties::FromMap(options.properties),
+    };
+
+    ICEBERG_ASSIGN_OR_RAISE(auto writer,
+                            WriterFactoryRegistry::Open(options.format, 
writer_options));
+
+    return std::unique_ptr<Impl>(new Impl(std::move(options), 
std::move(writer)));
+  }
+
+  Status Write(ArrowArray* data) {
+    ICEBERG_DCHECK(writer_, "Writer not initialized");
+    return writer_->Write(data);
+  }
+
+  Result<int64_t> Length() const {
+    ICEBERG_DCHECK(writer_, "Writer not initialized");
+    return writer_->length();
+  }
+
+  Status Close() {
+    ICEBERG_DCHECK(writer_, "Writer not initialized");
+    if (closed_) {
+      // Idempotent: no-op if already closed
+      return {};
+    }
+    ICEBERG_RETURN_UNEXPECTED(writer_->Close());
+    closed_ = true;
+    return {};
+  }
+
+  Result<FileWriter::WriteResult> Metadata() {
+    ICEBERG_CHECK(closed_, "Cannot get metadata before closing the writer");
+
+    ICEBERG_ASSIGN_OR_RAISE(auto metrics, writer_->metrics());
+    ICEBERG_ASSIGN_OR_RAISE(auto length, writer_->length());
+    auto split_offsets = writer_->split_offsets();
+
+    // Serialize literal bounds to binary format
+    std::map<int32_t, std::vector<uint8_t>> lower_bounds_map;
+    for (const auto& [col_id, literal] : metrics.lower_bounds) {
+      ICEBERG_ASSIGN_OR_RAISE(auto serialized, literal.Serialize());
+      lower_bounds_map[col_id] = std::move(serialized);
+    }
+    std::map<int32_t, std::vector<uint8_t>> upper_bounds_map;
+    for (const auto& [col_id, literal] : metrics.upper_bounds) {
+      ICEBERG_ASSIGN_OR_RAISE(auto serialized, literal.Serialize());
+      upper_bounds_map[col_id] = std::move(serialized);
+    }
+
+    auto data_file = std::make_shared<DataFile>(DataFile{
+        .content = DataFile::Content::kEqualityDeletes,
+        .file_path = options_.path,
+        .file_format = options_.format,
+        .partition = options_.partition,
+        .record_count = metrics.row_count.value_or(-1),
+        .file_size_in_bytes = length,
+        .column_sizes = {metrics.column_sizes.begin(), 
metrics.column_sizes.end()},
+        .value_counts = {metrics.value_counts.begin(), 
metrics.value_counts.end()},
+        .null_value_counts = {metrics.null_value_counts.begin(),
+                              metrics.null_value_counts.end()},
+        .nan_value_counts = {metrics.nan_value_counts.begin(),
+                             metrics.nan_value_counts.end()},
+        .lower_bounds = std::move(lower_bounds_map),
+        .upper_bounds = std::move(upper_bounds_map),
+        .split_offsets = std::move(split_offsets),

Review Comment:
   Good catch! Added `partition_spec_id` population from `options_.spec` in the 
`DataFile` initialization, along with a test assertion to verify it. Thanks!



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to