This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/main by this push:
new 7a15e4b47c Fix LocalFileSystem with range request that ends beyond end
of file (#6751)
7a15e4b47c is described below
commit 7a15e4b47ca97df2edef689c9f2ebd2f3888b79e
Author: Kyle Barron <[email protected]>
AuthorDate: Thu Feb 6 12:12:31 2025 -0500
Fix LocalFileSystem with range request that ends beyond end of file (#6751)
* Fix LocalFileSystem with range request that ends beyond end of file
* fix windows
* add comment
* Seek error
* fix seek check
* remove windows flag
* Get file length from file metadata
---
object_store/src/local.rs | 60 ++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 59 insertions(+), 1 deletion(-)
diff --git a/object_store/src/local.rs b/object_store/src/local.rs
index 65e87f9821..6fef4614f9 100644
--- a/object_store/src/local.rs
+++ b/object_store/src/local.rs
@@ -868,7 +868,27 @@ pub(crate) fn chunked_stream(
}
pub(crate) fn read_range(file: &mut File, path: &PathBuf, range: Range<u64>)
-> Result<Bytes> {
- let to_read = range.end - range.start;
+ let file_metadata = file.metadata().map_err(|e| Error::Metadata {
+ source: e.into(),
+ path: path.to_string_lossy().to_string(),
+ })?;
+
+ // If none of the range is satisfiable we should error, e.g. if the start
offset is beyond the
+ // extents of the file
+ let file_len = file_metadata.len();
+ if range.start >= file_len {
+ return Err(Error::InvalidRange {
+ source: InvalidGetRange::StartTooLarge {
+ requested: range.start,
+ length: file_len,
+ },
+ }
+ .into());
+ }
+
+ // Don't read past end of file
+ let to_read = range.end.min(file_len) - range.start;
+
file.seek(SeekFrom::Start(range.start)).map_err(|source| {
let path = path.into();
Error::Seek { source, path }
@@ -1131,6 +1151,44 @@ mod tests {
assert_eq!(&*read_data, data);
}
+ #[tokio::test]
+ async fn range_request_start_beyond_end_of_file() {
+ let root = TempDir::new().unwrap();
+ let integration =
LocalFileSystem::new_with_prefix(root.path()).unwrap();
+
+ let location = Path::from("some_file");
+
+ let data = Bytes::from("arbitrary data");
+
+ integration
+ .put(&location, data.clone().into())
+ .await
+ .unwrap();
+
+ integration
+ .get_range(&location, 100..200)
+ .await
+ .expect_err("Should error with start range beyond end of file");
+ }
+
+ #[tokio::test]
+ async fn range_request_beyond_end_of_file() {
+ let root = TempDir::new().unwrap();
+ let integration =
LocalFileSystem::new_with_prefix(root.path()).unwrap();
+
+ let location = Path::from("some_file");
+
+ let data = Bytes::from("arbitrary data");
+
+ integration
+ .put(&location, data.clone().into())
+ .await
+ .unwrap();
+
+ let read_data = integration.get_range(&location,
0..100).await.unwrap();
+ assert_eq!(&*read_data, data);
+ }
+
#[tokio::test]
#[cfg(target_family = "unix")]
// Fails on github actions runner (which runs the tests as root)