This is an automated email from the ASF dual-hosted git repository.
liurenjie1024 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-rust.git
The following commit(s) were added to refs/heads/main by this push:
new 50cd2ab6 feat: add bulk insertion to deletion vector (#1578)
50cd2ab6 is described below
commit 50cd2ab65895a07b0d356b61200645340482e127
Author: dentiny <[email protected]>
AuthorDate: Thu Aug 7 17:31:09 2025 -0700
feat: add bulk insertion to deletion vector (#1578)
## What changes are included in this PR?
In this PR, I added a bulk insertion API to deletion vector and roaring
bitmap.
Context:
- I'm working on iceberg-related feature on daily basis, and I'm
implementing own deletion vector and puffin blob myself
+ Code reference:
https://github.com/Mooncake-Labs/moonlink/blob/main/src/moonlink/src/storage/iceberg/deletion_vector.rs
- I would like to leverage upstream's implementation to reduce
re-inventing the wheels, then I noticed some differences
+ My impl supports bulk insertion, because `append` provides better perf
+ In my use case, all deleted rows are fetched in ascending order
## Are these changes tested?
Yes, unit tests added.
---
crates/iceberg/src/delete_vector.rs | 81 ++++++++++++++++++++++++++++++++++++-
1 file changed, 80 insertions(+), 1 deletion(-)
diff --git a/crates/iceberg/src/delete_vector.rs
b/crates/iceberg/src/delete_vector.rs
index 7b4d40ba..f382bf07 100644
--- a/crates/iceberg/src/delete_vector.rs
+++ b/crates/iceberg/src/delete_vector.rs
@@ -21,6 +21,8 @@ use roaring::RoaringTreemap;
use roaring::bitmap::Iter;
use roaring::treemap::BitmapIter;
+use crate::{Error, ErrorKind, Result};
+
#[derive(Debug, Default)]
pub struct DeleteVector {
inner: RoaringTreemap,
@@ -28,7 +30,7 @@ pub struct DeleteVector {
impl DeleteVector {
#[allow(unused)]
- pub(crate) fn new(roaring_treemap: RoaringTreemap) -> DeleteVector {
+ pub fn new(roaring_treemap: RoaringTreemap) -> DeleteVector {
DeleteVector {
inner: roaring_treemap,
}
@@ -43,6 +45,25 @@ impl DeleteVector {
self.inner.insert(pos)
}
+ /// Marks the given `positions` as deleted and returns the number of
elements appended.
+ ///
+ /// The input slice must be strictly ordered in ascending order, and every
value must be greater than all existing values already in the set.
+ ///
+ /// # Errors
+ ///
+ /// Returns an error if the precondition is not met.
+ #[allow(dead_code)]
+ pub fn insert_positions(&mut self, positions: &[u64]) -> Result<usize> {
+ if let Err(err) = self.inner.append(positions.iter().copied()) {
+ return Err(Error::new(
+ ErrorKind::PreconditionFailed,
+ "failed to marks rows as deleted".to_string(),
+ )
+ .with_source(err));
+ }
+ Ok(positions.len())
+ }
+
#[allow(unused)]
pub fn len(&self) -> u64 {
self.inner.len()
@@ -120,3 +141,61 @@ impl BitOrAssign for DeleteVector {
self.inner.bitor_assign(&other.inner);
}
}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn test_insertion_and_iteration() {
+ let mut dv = DeleteVector::default();
+ assert!(dv.insert(42));
+ assert!(dv.insert(100));
+ assert!(!dv.insert(42));
+
+ let mut items: Vec<u64> = dv.iter().collect();
+ items.sort();
+ assert_eq!(items, vec![42, 100]);
+ assert_eq!(dv.len(), 2);
+ }
+
+ #[test]
+ fn test_successful_insert_positions() {
+ let mut dv = DeleteVector::default();
+ let positions = vec![1, 2, 3, 1000, 1 << 33];
+ assert_eq!(dv.insert_positions(&positions).unwrap(), 5);
+
+ let mut collected: Vec<u64> = dv.iter().collect();
+ collected.sort();
+ assert_eq!(collected, positions);
+ }
+
+ /// Testing scenario: bulk insertion fails because input positions are not
strictly increasing.
+ #[test]
+ fn test_failed_insertion_unsorted_elements() {
+ let mut dv = DeleteVector::default();
+ let positions = vec![1, 3, 5, 4];
+ let res = dv.insert_positions(&positions);
+ assert!(res.is_err());
+ }
+
+ /// Testing scenario: bulk insertion fails because input positions have
intersection with existing ones.
+ #[test]
+ fn test_failed_insertion_with_intersection() {
+ let mut dv = DeleteVector::default();
+ let positions = vec![1, 3, 5];
+ assert_eq!(dv.insert_positions(&positions).unwrap(), 3);
+
+ let res = dv.insert_positions(&[2, 4]);
+ assert!(res.is_err());
+ }
+
+ /// Testing scenario: bulk insertion fails because input positions have
duplicates.
+ #[test]
+ fn test_failed_insertion_duplicate_elements() {
+ let mut dv = DeleteVector::default();
+ let positions = vec![1, 3, 5, 5];
+ let res = dv.insert_positions(&positions);
+ assert!(res.is_err());
+ }
+}