This is an automated email from the ASF dual-hosted git repository.

liurenjie1024 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-rust.git


The following commit(s) were added to refs/heads/main by this push:
     new 50cd2ab6 feat: add bulk insertion to deletion vector (#1578)
50cd2ab6 is described below

commit 50cd2ab65895a07b0d356b61200645340482e127
Author: dentiny <[email protected]>
AuthorDate: Thu Aug 7 17:31:09 2025 -0700

    feat: add bulk insertion to deletion vector (#1578)
    
    ## What changes are included in this PR?
    
    In this PR, I added a bulk insertion API to deletion vector and roaring
    bitmap.
    
    Context:
    - I'm working on iceberg-related feature on daily basis, and I'm
    implementing own deletion vector and puffin blob myself
    + Code reference:
    
https://github.com/Mooncake-Labs/moonlink/blob/main/src/moonlink/src/storage/iceberg/deletion_vector.rs
    - I would like to leverage upstream's implementation to reduce
    re-inventing the wheels, then I noticed some differences
    + My impl supports bulk insertion, because `append` provides better perf
      + In my use case, all deleted rows are fetched in ascending order
    
    ## Are these changes tested?
    
    Yes, unit tests added.
---
 crates/iceberg/src/delete_vector.rs | 81 ++++++++++++++++++++++++++++++++++++-
 1 file changed, 80 insertions(+), 1 deletion(-)

diff --git a/crates/iceberg/src/delete_vector.rs 
b/crates/iceberg/src/delete_vector.rs
index 7b4d40ba..f382bf07 100644
--- a/crates/iceberg/src/delete_vector.rs
+++ b/crates/iceberg/src/delete_vector.rs
@@ -21,6 +21,8 @@ use roaring::RoaringTreemap;
 use roaring::bitmap::Iter;
 use roaring::treemap::BitmapIter;
 
+use crate::{Error, ErrorKind, Result};
+
 #[derive(Debug, Default)]
 pub struct DeleteVector {
     inner: RoaringTreemap,
@@ -28,7 +30,7 @@ pub struct DeleteVector {
 
 impl DeleteVector {
     #[allow(unused)]
-    pub(crate) fn new(roaring_treemap: RoaringTreemap) -> DeleteVector {
+    pub fn new(roaring_treemap: RoaringTreemap) -> DeleteVector {
         DeleteVector {
             inner: roaring_treemap,
         }
@@ -43,6 +45,25 @@ impl DeleteVector {
         self.inner.insert(pos)
     }
 
+    /// Marks the given `positions` as deleted and returns the number of 
elements appended.
+    ///
+    /// The input slice must be strictly ordered in ascending order, and every 
value must be greater than all existing values already in the set.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if the precondition is not met.
+    #[allow(dead_code)]
+    pub fn insert_positions(&mut self, positions: &[u64]) -> Result<usize> {
+        if let Err(err) = self.inner.append(positions.iter().copied()) {
+            return Err(Error::new(
+                ErrorKind::PreconditionFailed,
+                "failed to marks rows as deleted".to_string(),
+            )
+            .with_source(err));
+        }
+        Ok(positions.len())
+    }
+
     #[allow(unused)]
     pub fn len(&self) -> u64 {
         self.inner.len()
@@ -120,3 +141,61 @@ impl BitOrAssign for DeleteVector {
         self.inner.bitor_assign(&other.inner);
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_insertion_and_iteration() {
+        let mut dv = DeleteVector::default();
+        assert!(dv.insert(42));
+        assert!(dv.insert(100));
+        assert!(!dv.insert(42));
+
+        let mut items: Vec<u64> = dv.iter().collect();
+        items.sort();
+        assert_eq!(items, vec![42, 100]);
+        assert_eq!(dv.len(), 2);
+    }
+
+    #[test]
+    fn test_successful_insert_positions() {
+        let mut dv = DeleteVector::default();
+        let positions = vec![1, 2, 3, 1000, 1 << 33];
+        assert_eq!(dv.insert_positions(&positions).unwrap(), 5);
+
+        let mut collected: Vec<u64> = dv.iter().collect();
+        collected.sort();
+        assert_eq!(collected, positions);
+    }
+
+    /// Testing scenario: bulk insertion fails because input positions are not 
strictly increasing.
+    #[test]
+    fn test_failed_insertion_unsorted_elements() {
+        let mut dv = DeleteVector::default();
+        let positions = vec![1, 3, 5, 4];
+        let res = dv.insert_positions(&positions);
+        assert!(res.is_err());
+    }
+
+    /// Testing scenario: bulk insertion fails because input positions have 
intersection with existing ones.
+    #[test]
+    fn test_failed_insertion_with_intersection() {
+        let mut dv = DeleteVector::default();
+        let positions = vec![1, 3, 5];
+        assert_eq!(dv.insert_positions(&positions).unwrap(), 3);
+
+        let res = dv.insert_positions(&[2, 4]);
+        assert!(res.is_err());
+    }
+
+    /// Testing scenario: bulk insertion fails because input positions have 
duplicates.
+    #[test]
+    fn test_failed_insertion_duplicate_elements() {
+        let mut dv = DeleteVector::default();
+        let positions = vec![1, 3, 5, 5];
+        let res = dv.insert_positions(&positions);
+        assert!(res.is_err());
+    }
+}

Reply via email to