ethan-tyler commented on code in PR #1987:
URL: https://github.com/apache/iceberg-rust/pull/1987#discussion_r2662623914


##########
crates/iceberg/src/transaction/snapshot.rs:
##########
@@ -163,14 +166,129 @@ impl<'a> SnapshotProducer<'a> {
         Ok(())
     }
 
+    /// Validates added delete files.
+    ///
+    /// Checks that:
+    /// - Delete files are not used with format version 1
+    /// - Delete files have valid content types (PositionDeletes or 
EqualityDeletes)
+    /// - Equality delete files have equality_ids set
+    /// - Delete files reference valid partition specs
+    /// - Partition values are compatible with partition types
+    pub(crate) fn validate_added_delete_files(&self) -> Result<()> {
+        let format_version = self.table.metadata().format_version();
+        if format_version == FormatVersion::V1 && 
!self.added_delete_files.is_empty() {
+            return Err(Error::new(
+                ErrorKind::FeatureUnsupported,
+                "Delete files are not supported in format version 1. Upgrade 
the table to format version 2 or later.",
+            ));
+        }
+
+        for delete_file in &self.added_delete_files {
+            match delete_file.content_type() {
+                DataContentType::PositionDeletes => {
+                    if delete_file.equality_ids().is_some() {
+                        return Err(Error::new(
+                            ErrorKind::DataInvalid,
+                            "Position delete files must not have equality_ids 
set",
+                        ));
+                    }
+                }
+                DataContentType::EqualityDeletes => {
+                    let ids = delete_file.equality_ids().ok_or_else(|| {
+                        Error::new(
+                            ErrorKind::DataInvalid,
+                            "Equality delete files must have equality_ids set",
+                        )
+                    })?;
+                    if ids.is_empty() {
+                        return Err(Error::new(
+                            ErrorKind::DataInvalid,
+                            "Equality delete files must have equality_ids set",
+                        ));
+                    }
+                }
+                DataContentType::Data => {
+                    return Err(Error::new(
+                        ErrorKind::DataInvalid,
+                        "Data content type is not allowed for delete files",
+                    ));
+                }
+            }
+
+            // TODO: This validation is too strict for partition evolution 
scenarios where delete
+            // files may reference older partition specs. Once 
manifest-per-spec is implemented,
+            // relax this to check that the spec_id exists rather than 
matching the default.
+            if self.table.metadata().default_partition_spec_id() != 
delete_file.partition_spec_id {

Review Comment:
   Will file one once this PR is ready to merge. Waiting to hear from @CTTY on 
how this fits with the DML epic and next steps. 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to