tustvold commented on code in PR #4978:
URL: https://github.com/apache/arrow-rs/pull/4978#discussion_r1376053172
##########
object_store/src/lib.rs:
##########
@@ -212,16 +308,128 @@
//! # }
//! # async fn multi_upload() {
//! #
-//! let object_store: Arc<dyn ObjectStore> = get_object_store();
-//! let path: Path = "data/large_file".try_into().unwrap();
-//! let (_id, mut writer) = object_store.put_multipart(&path).await.unwrap();
-//!
-//! let bytes = Bytes::from_static(b"hello");
-//! writer.write_all(&bytes).await.unwrap();
-//! writer.flush().await.unwrap();
-//! writer.shutdown().await.unwrap();
+//! let object_store: Arc<dyn ObjectStore> = get_object_store();
+//! let path = Path::from("data/large_file");
+//! let ranges = object_store.get_ranges(&path, &[90..100, 400..600,
0..10]).await.unwrap();
+//! assert_eq!(ranges.len(), 3);
+//! assert_eq!(ranges[0].len(), 10);
+//! # }
+//! ```
+//!
+//! # Conditional Fetch
+//!
+//! More complex object retrieval can be supported by
[`ObjectStore::get_opts`].
+//!
+//! For example, efficiently refreshing a cache without re-fetching the entire
object
+//! data if the object hasn't been modified.
+//!
+//! ```
+//! # use std::collections::btree_map::Entry;
+//! # use std::collections::HashMap;
+//! # use object_store::{GetOptions, GetResult, ObjectStore, Result, Error};
+//! # use std::sync::Arc;
+//! # use std::time::{Duration, Instant};
+//! # use bytes::Bytes;
+//! # use tokio::io::AsyncWriteExt;
+//! # use object_store::path::Path;
+//! struct CacheEntry {
+//! data: Bytes,
+//! e_tag: String,
+//! refreshed_at: Instant,
+//! }
+//!
+//! struct Cache {
+//! entries: HashMap<Path, CacheEntry>,
+//! store: Arc<dyn ObjectStore>,
+//! }
+//!
+//! impl Cache {
+//! pub async fn get(&mut self, path: &Path) -> Result<Bytes> {
+//! Ok(match self.entries.get_mut(path) {
+//! Some(e) => match e.refreshed_at.elapsed() <
Duration::from_secs(10) {
+//! true => e.data.clone(), // Return cached data
+//! false => {
+//! let opts = GetOptions {
+//! if_none_match: Some(e.e_tag.clone()),
+//! ..GetOptions::default()
+//! };
+//! match self.store.get_opts(&path, opts).await {
+//! Ok(d) => e.data = d.bytes().await?,
+//! Err(Error::NotModified { .. }) => {} // Data has
not changed
+//! Err(e) => return Err(e),
+//! };
+//! e.refreshed_at = Instant::now();
+//! e.data.clone()
+//! }
+//! },
+//! None => {
+//! let get = self.store.get(&path).await?;
+//! let e_tag = get.meta.e_tag.clone();
+//! let data = get.bytes().await?;
+//! if let Some(e_tag) = e_tag {
+//! let entry = CacheEntry {
+//! e_tag,
+//! data: data.clone(),
+//! refreshed_at: Instant::now(),
+//! };
+//! self.entries.insert(path.clone(), entry);
+//! }
+//! data
+//! }
+//! })
+//! }
+//! }
+//! ```
+//!
+//! # Conditional Put
+//!
+//! The default behaviour when writing data is to upsert any existing object
at the given path.
+//! More complex behaviours can be achieved using [`PutMode`], and can be used
to build
+//! [Optimistic Concurrency Control] based transactions. This facilitates
building metadata catalogs,
+//! such as [Apache Iceberg] or [Delta Lake], directly on top of object
storage, without relying on
+//! a separate DBMS.
+//!
+//! ```
+//! # use object_store::{Error, ObjectStore, PutMode, UpdateVersion};
+//! # use std::sync::Arc;
+//! # use bytes::Bytes;
+//! # use tokio::io::AsyncWriteExt;
+//! # use object_store::memory::InMemory;
+//! # use object_store::path::Path;
+//! # fn get_object_store() -> Arc<dyn ObjectStore> {
+//! # Arc::new(InMemory::new())
+//! # }
+//! # fn do_update(b: Bytes) -> Bytes {b}
+//! # async fn conditional_put() {
Review Comment:
I opted to add a comment
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]