MrCroxx commented on code in PR #6366: URL: https://github.com/apache/opendal/pull/6366#discussion_r2255757354
########## core/src/layers/foyer.rs: ########## @@ -0,0 +1,354 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::{ + future::Future, + ops::{Bound, Deref, RangeBounds}, + sync::Arc, +}; + +use foyer::{Code, CodeError, Error as FoyerError, HybridCache}; + +use crate::raw::oio::*; +use crate::raw::*; +use crate::*; + +fn extract_err(e: FoyerError) -> Error { + let e = match e.downcast::<Error>() { + Ok(e) => return e, + Err(e) => e, + }; + Error::new(ErrorKind::Unexpected, e.to_string()) +} + +/// [`FoyerValue`] is a wrapper around `Buffer` that implements the `Code` trait. +#[derive(Debug)] +pub struct FoyerValue(pub Buffer); + +impl Deref for FoyerValue { + type Target = Buffer; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl Code for FoyerValue { + fn encode(&self, writer: &mut impl std::io::Write) -> std::result::Result<(), CodeError> { + let len = self.0.len() as u64; + writer.write_all(&len.to_le_bytes())?; + std::io::copy(&mut self.0.clone(), writer)?; + Ok(()) + } + + fn decode(reader: &mut impl std::io::Read) -> std::result::Result<Self, CodeError> + where + Self: Sized, + { + let mut len_bytes = [0u8; 8]; + reader.read_exact(&mut len_bytes)?; + let len = u64::from_le_bytes(len_bytes) as usize; + let mut buffer = vec![0u8; len]; + reader.read_exact(&mut buffer[..len])?; + Ok(FoyerValue(buffer.into())) + } + + fn estimated_size(&self) -> usize { + 8 + self.0.len() + } +} + +/// Hybrid cache layer for OpenDAL that uses [foyer](https://github.com/foyer-rs/foyer) for caching. +/// +/// # Operation Behavior +/// - `write`: [`FoyerLayer`] will write to the foyer hybrid cache after the service's write operation is completed. +/// - `read`: [`FoyerLayer`] will first check the foyer hybrid cache for the data. If the data is not found, it will perform the read operation on the service and cache the result. +/// - `delete`: [`FoyerLayer`] will remove the data from the foyer hybrid cache regardless of whether the service's delete operation is successful. +/// - Other operations: [`FoyerLayer`] will not cache the results of other operations, such as `list`, `copy`, `rename`, etc. They will be passed through to the underlying accessor without caching. +/// +/// # Examples +/// +/// ```rust +/// use opendal::layers::FoyerLayer; +/// use opendal::services::S3; +/// +/// ``` +#[derive(Debug)] +pub struct FoyerLayer { + cache: HybridCache<String, FoyerValue>, +} + +impl FoyerLayer { + /// Creates a new `FoyerLayer` with the given foyer hybrid cache. + pub fn new(cache: HybridCache<String, FoyerValue>) -> Self { + FoyerLayer { cache } + } +} + +impl<A: Access> Layer<A> for FoyerLayer { + type LayeredAccess = FoyerAccessor<A>; + + fn layer(&self, accessor: A) -> Self::LayeredAccess { + let cache = self.cache.clone(); + FoyerAccessor { + inner: Arc::new(Inner { accessor, cache }), + } + } +} + +#[derive(Debug)] +struct Inner<A: Access> { + accessor: A, + cache: HybridCache<String, FoyerValue>, +} + +#[derive(Debug)] +pub struct FoyerAccessor<A: Access> { + inner: Arc<Inner<A>>, +} + +impl<A: Access> LayeredAccess for FoyerAccessor<A> { + type Inner = A; + type Reader = Buffer; + type Writer = Writer<A>; + type Lister = A::Lister; + type Deleter = Deleter<A>; + + fn inner(&self) -> &Self::Inner { + &self.inner.accessor + } + + fn info(&self) -> Arc<AccessorInfo> { + self.inner.accessor.info() + } + + async fn read(&self, path: &str, args: OpRead) -> Result<(RpRead, Self::Reader)> { + let path = path.to_string(); + let range = args.range().to_range(); + let entry = self + .inner + .cache + .fetch(path.clone(), || { Review Comment: I just found that this requirement is a bit tricky for foyer. As a cache, foyer does not support versioning (and it is also difficult to support, as caches allow only partial data retention, and supporting versioning requires a lot of additional overhead). If users want to read the latest version without a version tag, it may lead to reading incorrect objects or result in cache misses. I think a better approach might be to bypass the cache when there is a versioning requirement, or to treat objects without a version and those with a clear version as two separate objects in the cache without fallback. Any ideas? cc @Xuanwo @jorgehermo9 for help. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@opendal.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org