This is an automated email from the ASF dual-hosted git repository.

guanmingchiu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/mahout.git

commit cc326daebcb2b6c26039fb9fd6175583fdabfeb3
Author: Ryan Huang <[email protected]>
AuthorDate: Mon Jan 5 18:44:12 2026 +0800

    [QDP] [Numpy] add path exist check (#793)
    
    Signed-off-by: Hsien-Cheng Huang <[email protected]>
---
 qdp/qdp-core/src/readers/arrow_ipc.rs | 22 +++++++++++++++++-
 qdp/qdp-core/src/readers/numpy.rs     | 20 ++++++++++++----
 qdp/qdp-core/src/readers/parquet.rs   | 44 +++++++++++++++++++++++++++++++++--
 3 files changed, 78 insertions(+), 8 deletions(-)

diff --git a/qdp/qdp-core/src/readers/arrow_ipc.rs 
b/qdp/qdp-core/src/readers/arrow_ipc.rs
index 54d038b81..4809cb3d5 100644
--- a/qdp/qdp-core/src/readers/arrow_ipc.rs
+++ b/qdp/qdp-core/src/readers/arrow_ipc.rs
@@ -38,8 +38,28 @@ impl ArrowIPCReader {
     /// # Arguments
     /// * `path` - Path to the Arrow IPC file (.arrow or .feather)
     pub fn new<P: AsRef<Path>>(path: P) -> Result<Self> {
+        let path = path.as_ref();
+
+        // Verify file exists
+        match path.try_exists() {
+            Ok(false) => {
+                return Err(MahoutError::Io(format!(
+                    "Arrow IPC file not found: {}",
+                    path.display()
+                )));
+            }
+            Err(e) => {
+                return Err(MahoutError::Io(format!(
+                    "Failed to check if Arrow IPC file exists at {}: {}",
+                    path.display(),
+                    e
+                )));
+            }
+            Ok(true) => {}
+        }
+
         Ok(Self {
-            path: path.as_ref().to_path_buf(),
+            path: path.to_path_buf(),
             read: false,
         })
     }
diff --git a/qdp/qdp-core/src/readers/numpy.rs 
b/qdp/qdp-core/src/readers/numpy.rs
index 556437c5d..aecf4cf12 100644
--- a/qdp/qdp-core/src/readers/numpy.rs
+++ b/qdp/qdp-core/src/readers/numpy.rs
@@ -57,11 +57,21 @@ impl NumpyReader {
         let path = path.as_ref();
 
         // Verify file exists
-        if !path.exists() {
-            return Err(MahoutError::Io(format!(
-                "NumPy file not found: {}",
-                path.display()
-            )));
+        match path.try_exists() {
+            Ok(false) => {
+                return Err(MahoutError::Io(format!(
+                    "NumPy file not found: {}",
+                    path.display()
+                )));
+            }
+            Err(e) => {
+                return Err(MahoutError::Io(format!(
+                    "Failed to check if NumPy file exists at {}: {}",
+                    path.display(),
+                    e
+                )));
+            }
+            Ok(true) => {}
         }
 
         Ok(Self {
diff --git a/qdp/qdp-core/src/readers/parquet.rs 
b/qdp/qdp-core/src/readers/parquet.rs
index 1d28073a3..5322d120e 100644
--- a/qdp/qdp-core/src/readers/parquet.rs
+++ b/qdp/qdp-core/src/readers/parquet.rs
@@ -40,7 +40,27 @@ impl ParquetReader {
     /// * `path` - Path to the Parquet file
     /// * `batch_size` - Optional batch size for reading (defaults to entire 
file)
     pub fn new<P: AsRef<Path>>(path: P, batch_size: Option<usize>) -> 
Result<Self> {
-        let file = File::open(path.as_ref())
+        let path = path.as_ref();
+
+        // Verify file exists
+        match path.try_exists() {
+            Ok(false) => {
+                return Err(MahoutError::Io(format!(
+                    "Parquet file not found: {}",
+                    path.display()
+                )));
+            }
+            Err(e) => {
+                return Err(MahoutError::Io(format!(
+                    "Failed to check if Parquet file exists at {}: {}",
+                    path.display(),
+                    e
+                )));
+            }
+            Ok(true) => {}
+        }
+
+        let file = File::open(path)
             .map_err(|e| MahoutError::Io(format!("Failed to open Parquet file: 
{}", e)))?;
 
         let builder = ParquetRecordBatchReaderBuilder::try_new(file)
@@ -231,7 +251,27 @@ impl ParquetStreamingReader {
     /// * `path` - Path to the Parquet file
     /// * `batch_size` - Optional batch size (defaults to 2048)
     pub fn new<P: AsRef<Path>>(path: P, batch_size: Option<usize>) -> 
Result<Self> {
-        let file = File::open(path.as_ref())
+        let path = path.as_ref();
+
+        // Verify file exists
+        match path.try_exists() {
+            Ok(false) => {
+                return Err(MahoutError::Io(format!(
+                    "Parquet file not found: {}",
+                    path.display()
+                )));
+            }
+            Err(e) => {
+                return Err(MahoutError::Io(format!(
+                    "Failed to check if Parquet file exists at {}: {}",
+                    path.display(),
+                    e
+                )));
+            }
+            Ok(true) => {}
+        }
+
+        let file = File::open(path)
             .map_err(|e| MahoutError::Io(format!("Failed to open Parquet file: 
{}", e)))?;
 
         let builder = ParquetRecordBatchReaderBuilder::try_new(file)

Reply via email to