wgtmac commented on code in PR #583:
URL: https://github.com/apache/iceberg-cpp/pull/583#discussion_r2932112682


##########
src/iceberg/data/equality_delete_writer.h:
##########
@@ -57,6 +57,10 @@ class ICEBERG_EXPORT EqualityDeleteWriter : public 
FileWriter {
  public:
   ~EqualityDeleteWriter() override;
 
+  /// \brief Create a new EqualityDeleteWriter instance.
+  static Result<std::unique_ptr<EqualityDeleteWriter>> Make(
+      const EqualityDeleteWriterOptions& options);

Review Comment:
   The Java implementation's `EqualityDeleteWriter` constructor accepts 
`EncryptionKeyMetadata`. The C++ `EqualityDeleteWriterOptions` struct is 
currently missing an encryption key metadata field.
   
   *Suggested fix:* Add a `// TODO: add key_metadata for encryption` comment to 
the struct to track this missing functionality.



##########
src/iceberg/data/equality_delete_writer.cc:
##########
@@ -19,24 +19,131 @@
 
 #include "iceberg/data/equality_delete_writer.h"
 
+#include <map>
+
+#include "iceberg/file_writer.h"
+#include "iceberg/manifest/manifest_entry.h"
+#include "iceberg/util/macros.h"
+
 namespace iceberg {
 
 class EqualityDeleteWriter::Impl {
  public:
+  static Result<std::unique_ptr<Impl>> Make(EqualityDeleteWriterOptions 
options) {
+    WriterOptions writer_options{
+        .path = options.path,
+        .schema = options.schema,
+        .io = options.io,
+        .properties = WriterProperties::FromMap(options.properties),
+    };
+
+    ICEBERG_ASSIGN_OR_RAISE(auto writer,
+                            WriterFactoryRegistry::Open(options.format, 
writer_options));
+
+    return std::unique_ptr<Impl>(new Impl(std::move(options), 
std::move(writer)));
+  }
+
+  Status Write(ArrowArray* data) {
+    ICEBERG_DCHECK(writer_, "Writer not initialized");
+    return writer_->Write(data);
+  }
+
+  Result<int64_t> Length() const {
+    ICEBERG_DCHECK(writer_, "Writer not initialized");
+    return writer_->length();
+  }
+
+  Status Close() {
+    ICEBERG_DCHECK(writer_, "Writer not initialized");
+    if (closed_) {
+      // Idempotent: no-op if already closed
+      return {};
+    }
+    ICEBERG_RETURN_UNEXPECTED(writer_->Close());
+    closed_ = true;
+    return {};
+  }
+
+  Result<FileWriter::WriteResult> Metadata() {
+    ICEBERG_CHECK(closed_, "Cannot get metadata before closing the writer");
+
+    ICEBERG_ASSIGN_OR_RAISE(auto metrics, writer_->metrics());
+    ICEBERG_ASSIGN_OR_RAISE(auto length, writer_->length());
+    auto split_offsets = writer_->split_offsets();
+
+    // Serialize literal bounds to binary format
+    std::map<int32_t, std::vector<uint8_t>> lower_bounds_map;
+    for (const auto& [col_id, literal] : metrics.lower_bounds) {
+      ICEBERG_ASSIGN_OR_RAISE(auto serialized, literal.Serialize());
+      lower_bounds_map[col_id] = std::move(serialized);
+    }
+    std::map<int32_t, std::vector<uint8_t>> upper_bounds_map;
+    for (const auto& [col_id, literal] : metrics.upper_bounds) {
+      ICEBERG_ASSIGN_OR_RAISE(auto serialized, literal.Serialize());
+      upper_bounds_map[col_id] = std::move(serialized);
+    }
+
+    auto data_file = std::make_shared<DataFile>(DataFile{
+        .content = DataFile::Content::kEqualityDeletes,
+        .file_path = options_.path,
+        .file_format = options_.format,
+        .partition = options_.partition,
+        .record_count = metrics.row_count.value_or(-1),
+        .file_size_in_bytes = length,
+        .column_sizes = {metrics.column_sizes.begin(), 
metrics.column_sizes.end()},
+        .value_counts = {metrics.value_counts.begin(), 
metrics.value_counts.end()},
+        .null_value_counts = {metrics.null_value_counts.begin(),
+                              metrics.null_value_counts.end()},
+        .nan_value_counts = {metrics.nan_value_counts.begin(),
+                             metrics.nan_value_counts.end()},
+        .lower_bounds = std::move(lower_bounds_map),
+        .upper_bounds = std::move(upper_bounds_map),
+        .split_offsets = std::move(split_offsets),

Review Comment:
   In Java, `FileMetadata.deleteFileBuilder(spec)` natively maps the spec's ID 
to the metadata. In C++, `DataFile` requires `partition_spec_id` to be 
explicitly set for certain operations like `FastAppend` (which enforces 
`ICEBERG_BUILDER_CHECK(file->partition_spec_id.has_value(), ...)`).
   
   *Suggested code fix:* Populate the spec ID using `options_.spec` in the 
`DataFile` initialization block inside `Metadata()`:
   ```cpp
   .partition_spec_id = options_.spec ? 
std::make_optional(options_.spec->spec_id()) : std::nullopt,
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to