This is an automated email from the ASF dual-hosted git repository. xuanwo pushed a commit to branch xuanwo/gdrive-test-fix in repository https://gitbox.apache.org/repos/asf/opendal.git
commit 18fcc81820cae6e43dead18b0b85fadd521ebd22 Author: Xuanwo <[email protected]> AuthorDate: Sat Dec 20 00:05:20 2025 +0800 fix(services/gdrive): Fix prefix list not handeled correctly --- core/services/gdrive/src/core.rs | 24 ++++--- core/services/gdrive/src/lister.rs | 131 ++++++++++++++++++++++++++++++++----- 2 files changed, 129 insertions(+), 26 deletions(-) diff --git a/core/services/gdrive/src/core.rs b/core/services/gdrive/src/core.rs index 5593424c9..b0245f08e 100644 --- a/core/services/gdrive/src/core.rs +++ b/core/services/gdrive/src/core.rs @@ -52,26 +52,30 @@ impl Debug for GdriveCore { } impl GdriveCore { - pub async fn gdrive_stat(&self, path: &str) -> Result<Response<Buffer>> { - let path = build_abs_path(&self.root, path); - let file_id = self.path_cache.get(&path).await?.ok_or(Error::new( - ErrorKind::NotFound, - format!("path not found: {path}"), - ))?; - + pub async fn gdrive_stat_by_id(&self, file_id: &str) -> Result<Response<Buffer>> { // The file metadata in the Google Drive API is very complex. // For now, we only need the file id, name, mime type and modified time. let mut req = Request::get(format!( "https://www.googleapis.com/drive/v3/files/{file_id}?fields=id,name,mimeType,size,modifiedTime" )) - .extension(Operation::Stat) - .body(Buffer::new()) - .map_err(new_request_build_error)?; + .extension(Operation::Stat) + .body(Buffer::new()) + .map_err(new_request_build_error)?; self.sign(&mut req).await?; self.info.http_client().send(req).await } + pub async fn gdrive_stat(&self, path: &str) -> Result<Response<Buffer>> { + let path = build_abs_path(&self.root, path); + let file_id = self.path_cache.get(&path).await?.ok_or(Error::new( + ErrorKind::NotFound, + format!("path not found: {path}"), + ))?; + + self.gdrive_stat_by_id(&file_id).await + } + pub async fn gdrive_get(&self, path: &str, range: BytesRange) -> Result<Response<HttpBody>> { let path = build_abs_path(&self.root, path); let path_id = self.path_cache.get(&path).await?.ok_or(Error::new( diff --git a/core/services/gdrive/src/lister.rs b/core/services/gdrive/src/lister.rs index 643effab0..76bc37af0 100644 --- a/core/services/gdrive/src/lister.rs +++ b/core/services/gdrive/src/lister.rs @@ -69,7 +69,10 @@ impl oio::PageList for GdriveLister { // Include the current directory itself when handling the first page of the listing. if ctx.token.is_empty() && !ctx.done { - let path = build_rel_path(&self.core.root, &self.path); + let mut path = build_rel_path(&self.core.root, &self.path); + if !path.is_empty() && !path.ends_with('/') { + path.push('/'); + } let e = oio::Entry::new(&path, Metadata::new(EntryMode::DIR)); ctx.entries.push_back(e); } @@ -147,6 +150,7 @@ const PAGE_SIZE: i32 = 1000; pub struct GdriveFlatLister { core: Arc<GdriveCore>, root_path: String, + prefix: Option<String>, /// Queue of directory IDs that still need to be listed pending_dirs: VecDeque<PendingDir>, @@ -181,6 +185,7 @@ impl GdriveFlatLister { Self { core, root_path, + prefix: None, pending_dirs: VecDeque::new(), entry_buffer: VecDeque::new(), current_batch: Vec::new(), @@ -210,22 +215,101 @@ impl GdriveFlatLister { "GdriveFlatLister: root path not found: {:?}", &self.root_path ); - self.done = true; + // Directory listing on a non-existent directory should return empty. + if self.root_path.ends_with('/') { + self.done = true; + return Ok(()); + } + + // Prefix listing can still return matches even if the exact path doesn't exist. + let prefix = self.root_path.clone(); + self.prefix = Some(prefix.clone()); + + let mut parent_path = get_parent(&prefix).to_string(); + if parent_path == "/" { + parent_path.clear(); + } + + let parent_id = self.core.path_cache.get(&parent_path).await?; + let parent_id = match parent_id { + Some(id) => id, + None => { + self.done = true; + return Ok(()); + } + }; + + self.root_path = parent_path.clone(); + self.pending_dirs.push_back(PendingDir { + id: parent_id, + path: parent_path, + }); + + self.started = true; return Ok(()); } }; - // Add the root directory entry first - let rel_path = build_rel_path(&self.core.root, &self.root_path); - let entry = oio::Entry::new(&rel_path, Metadata::new(EntryMode::DIR)); - self.entry_buffer.push_back(entry); + // Resolve the entry type for root path so we can handle: + // + // - `list("dir", recursive=true)` where `dir` is a folder but without trailing slash. + // - `list("prefix", recursive=true)` where `prefix` points to a file or a file prefix. + let resp = self.core.gdrive_stat_by_id(&root_id).await?; + if resp.status() != StatusCode::OK { + return Err(parse_error(resp)); + } + + let bytes = resp.into_body().to_bytes(); + let root_file = + serde_json::from_slice::<GdriveFile>(&bytes).map_err(new_json_deserialize_error)?; + let root_is_dir = root_file.mime_type.as_str() == "application/vnd.google-apps.folder"; + + if root_is_dir { + // Directory paths must end with `/` (except the root which is represented by empty path). + if !self.root_path.is_empty() && !self.root_path.ends_with('/') { + self.root_path.push('/'); + self.core.path_cache.insert(&self.root_path, &root_id).await; + } + + // Add the root directory entry first. + let mut rel_path = build_rel_path(&self.core.root, &self.root_path); + if !rel_path.is_empty() && !rel_path.ends_with('/') { + rel_path.push('/'); + } + let entry = oio::Entry::new(&rel_path, Metadata::new(EntryMode::DIR)); + self.entry_buffer.push_back(entry); + + // Queue the root directory for listing. + self.pending_dirs.push_back(PendingDir { + id: root_id.clone(), + path: self.root_path.clone(), + }); + self.dir_id_to_path.insert(root_id, self.root_path.clone()); + } else { + // For file/prefix listing, start from its parent directory and filter results by prefix. + let prefix = self.root_path.clone(); + self.prefix = Some(prefix.clone()); + + let mut parent_path = get_parent(&prefix).to_string(); + if parent_path == "/" { + parent_path.clear(); + } + + let parent_id = self.core.path_cache.get(&parent_path).await?; + let parent_id = match parent_id { + Some(id) => id, + None => { + self.done = true; + return Ok(()); + } + }; - // Queue the root directory for listing - self.pending_dirs.push_back(PendingDir { - id: root_id.clone(), - path: self.root_path.clone(), - }); - self.dir_id_to_path.insert(root_id, self.root_path.clone()); + self.root_path = parent_path.clone(); + self.pending_dirs.push_back(PendingDir { + id: parent_id, + path: parent_path, + }); + } self.started = true; Ok(()) @@ -347,11 +431,26 @@ impl GdriveFlatLister { } } - let entry = oio::Entry::new(&rel_path, metadata); - self.entry_buffer.push_back(entry); + let matches_prefix = self + .prefix + .as_ref() + .map(|prefix| abs_path.starts_with(prefix)) + .unwrap_or(true); + + if matches_prefix { + let entry = oio::Entry::new(&rel_path, metadata); + self.entry_buffer.push_back(entry); + } + + // If it's a directory, queue it for recursive listing if it can contain matching entries. + let should_traverse = is_dir + && self + .prefix + .as_ref() + .map(|prefix| abs_path.starts_with(prefix) || prefix.starts_with(&abs_path)) + .unwrap_or(true); - // If it's a directory, queue it for recursive listing - if is_dir { + if should_traverse { self.pending_dirs.push_back(PendingDir { id: file.id.clone(), path: abs_path.clone(),
