This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new 7a15e4b47c Fix LocalFileSystem with range request that ends beyond end 
of file (#6751)
7a15e4b47c is described below

commit 7a15e4b47ca97df2edef689c9f2ebd2f3888b79e
Author: Kyle Barron <[email protected]>
AuthorDate: Thu Feb 6 12:12:31 2025 -0500

    Fix LocalFileSystem with range request that ends beyond end of file (#6751)
    
    * Fix LocalFileSystem with range request that ends beyond end of file
    
    * fix windows
    
    * add comment
    
    * Seek error
    
    * fix seek check
    
    * remove windows flag
    
    * Get file length from file metadata
---
 object_store/src/local.rs | 60 ++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 59 insertions(+), 1 deletion(-)

diff --git a/object_store/src/local.rs b/object_store/src/local.rs
index 65e87f9821..6fef4614f9 100644
--- a/object_store/src/local.rs
+++ b/object_store/src/local.rs
@@ -868,7 +868,27 @@ pub(crate) fn chunked_stream(
 }
 
 pub(crate) fn read_range(file: &mut File, path: &PathBuf, range: Range<u64>) 
-> Result<Bytes> {
-    let to_read = range.end - range.start;
+    let file_metadata = file.metadata().map_err(|e| Error::Metadata {
+        source: e.into(),
+        path: path.to_string_lossy().to_string(),
+    })?;
+
+    // If none of the range is satisfiable we should error, e.g. if the start 
offset is beyond the
+    // extents of the file
+    let file_len = file_metadata.len();
+    if range.start >= file_len {
+        return Err(Error::InvalidRange {
+            source: InvalidGetRange::StartTooLarge {
+                requested: range.start,
+                length: file_len,
+            },
+        }
+        .into());
+    }
+
+    // Don't read past end of file
+    let to_read = range.end.min(file_len) - range.start;
+
     file.seek(SeekFrom::Start(range.start)).map_err(|source| {
         let path = path.into();
         Error::Seek { source, path }
@@ -1131,6 +1151,44 @@ mod tests {
         assert_eq!(&*read_data, data);
     }
 
+    #[tokio::test]
+    async fn range_request_start_beyond_end_of_file() {
+        let root = TempDir::new().unwrap();
+        let integration = 
LocalFileSystem::new_with_prefix(root.path()).unwrap();
+
+        let location = Path::from("some_file");
+
+        let data = Bytes::from("arbitrary data");
+
+        integration
+            .put(&location, data.clone().into())
+            .await
+            .unwrap();
+
+        integration
+            .get_range(&location, 100..200)
+            .await
+            .expect_err("Should error with start range beyond end of file");
+    }
+
+    #[tokio::test]
+    async fn range_request_beyond_end_of_file() {
+        let root = TempDir::new().unwrap();
+        let integration = 
LocalFileSystem::new_with_prefix(root.path()).unwrap();
+
+        let location = Path::from("some_file");
+
+        let data = Bytes::from("arbitrary data");
+
+        integration
+            .put(&location, data.clone().into())
+            .await
+            .unwrap();
+
+        let read_data = integration.get_range(&location, 
0..100).await.unwrap();
+        assert_eq!(&*read_data, data);
+    }
+
     #[tokio::test]
     #[cfg(target_family = "unix")]
     // Fails on github actions runner (which runs the tests as root)

Reply via email to