This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 661bbad8f Add ObjectStore::append (#3791)
661bbad8f is described below

commit 661bbad8f817613c9bd5cab8616dcfaa37858865
Author: Raphael Taylor-Davies <[email protected]>
AuthorDate: Thu Mar 2 17:07:50 2023 +0000

    Add ObjectStore::append (#3791)
---
 object_store/src/lib.rs | 37 +++++++++++++++++++++++++++++++++++--
 1 file changed, 35 insertions(+), 2 deletions(-)

diff --git a/object_store/src/lib.rs b/object_store/src/lib.rs
index 671b22d0f..3af538254 100644
--- a/object_store/src/lib.rs
+++ b/object_store/src/lib.rs
@@ -278,7 +278,11 @@ pub type MultipartId = String;
 /// Universal API to multiple object store services.
 #[async_trait]
 pub trait ObjectStore: std::fmt::Display + Send + Sync + Debug + 'static {
-    /// Save the provided bytes to the specified location.
+    /// Save the provided bytes to the specified location
+    ///
+    /// The operation is guaranteed to be atomic, it will either successfully
+    /// write the entirety of `bytes` to `location`, or fail. No clients
+    /// should be able to observe a partially written object
     async fn put(&self, location: &Path, bytes: Bytes) -> Result<()>;
 
     /// Get a multi-part upload that allows writing data in chunks
@@ -286,7 +290,9 @@ pub trait ObjectStore: std::fmt::Display + Send + Sync + 
Debug + 'static {
     /// Most cloud-based uploads will buffer and upload parts in parallel.
     ///
     /// To complete the upload, [AsyncWrite::poll_shutdown] must be called
-    /// to completion.
+    /// to completion. This operation is guaranteed to be atomic, it will 
either
+    /// make all the written data available at `location`, or fail. No clients
+    /// should be able to observe a partially written object
     ///
     /// For some object stores (S3, GCS, and local in particular), if the
     /// writer fails or panics, you must call [ObjectStore::abort_multipart]
@@ -306,6 +312,33 @@ pub trait ObjectStore: std::fmt::Display + Send + Sync + 
Debug + 'static {
         multipart_id: &MultipartId,
     ) -> Result<()>;
 
+    /// Returns an [`AsyncWrite`] that can be used to append to the object at 
`location`
+    ///
+    /// A new object will be created if it doesn't already exist, otherwise it 
will be
+    /// opened, with subsequent writes appended to the end.
+    ///
+    /// This operation cannot be supported by all stores, most use-cases 
should prefer
+    /// [`ObjectStore::put`] and [`ObjectStore::put_multipart`] for better 
portability
+    /// and stronger guarantees
+    ///
+    /// This API is not guaranteed to be atomic, in particular
+    ///
+    /// * On error, `location` may contain partial data
+    /// * Concurrent calls to [`ObjectStore::list`] may return partially 
written objects
+    /// * Concurrent calls to [`ObjectStore::get`] may return partially 
written data
+    /// * Concurrent calls to [`ObjectStore::put`] may result in data loss / 
corruption
+    /// * Concurrent calls to [`ObjectStore::append`] may result in data loss 
/ corruption
+    ///
+    /// Additionally some stores, such as Azure, may only support appending to 
objects created
+    /// with [`ObjectStore::append`], and not with [`ObjectStore::put`], 
[`ObjectStore::copy`], or
+    /// [`ObjectStore::put_multipart`]
+    async fn append(
+        &self,
+        _location: &Path,
+    ) -> Result<Box<dyn AsyncWrite + Unpin + Send>> {
+        Err(Error::NotImplemented)
+    }
+
     /// Return the bytes that are stored at the specified location.
     async fn get(&self, location: &Path) -> Result<GetResult>;
 

Reply via email to