This is an automated email from the ASF dual-hosted git repository. guanmingchiu pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/mahout.git
commit cc326daebcb2b6c26039fb9fd6175583fdabfeb3 Author: Ryan Huang <[email protected]> AuthorDate: Mon Jan 5 18:44:12 2026 +0800 [QDP] [Numpy] add path exist check (#793) Signed-off-by: Hsien-Cheng Huang <[email protected]> --- qdp/qdp-core/src/readers/arrow_ipc.rs | 22 +++++++++++++++++- qdp/qdp-core/src/readers/numpy.rs | 20 ++++++++++++---- qdp/qdp-core/src/readers/parquet.rs | 44 +++++++++++++++++++++++++++++++++-- 3 files changed, 78 insertions(+), 8 deletions(-) diff --git a/qdp/qdp-core/src/readers/arrow_ipc.rs b/qdp/qdp-core/src/readers/arrow_ipc.rs index 54d038b81..4809cb3d5 100644 --- a/qdp/qdp-core/src/readers/arrow_ipc.rs +++ b/qdp/qdp-core/src/readers/arrow_ipc.rs @@ -38,8 +38,28 @@ impl ArrowIPCReader { /// # Arguments /// * `path` - Path to the Arrow IPC file (.arrow or .feather) pub fn new<P: AsRef<Path>>(path: P) -> Result<Self> { + let path = path.as_ref(); + + // Verify file exists + match path.try_exists() { + Ok(false) => { + return Err(MahoutError::Io(format!( + "Arrow IPC file not found: {}", + path.display() + ))); + } + Err(e) => { + return Err(MahoutError::Io(format!( + "Failed to check if Arrow IPC file exists at {}: {}", + path.display(), + e + ))); + } + Ok(true) => {} + } + Ok(Self { - path: path.as_ref().to_path_buf(), + path: path.to_path_buf(), read: false, }) } diff --git a/qdp/qdp-core/src/readers/numpy.rs b/qdp/qdp-core/src/readers/numpy.rs index 556437c5d..aecf4cf12 100644 --- a/qdp/qdp-core/src/readers/numpy.rs +++ b/qdp/qdp-core/src/readers/numpy.rs @@ -57,11 +57,21 @@ impl NumpyReader { let path = path.as_ref(); // Verify file exists - if !path.exists() { - return Err(MahoutError::Io(format!( - "NumPy file not found: {}", - path.display() - ))); + match path.try_exists() { + Ok(false) => { + return Err(MahoutError::Io(format!( + "NumPy file not found: {}", + path.display() + ))); + } + Err(e) => { + return Err(MahoutError::Io(format!( + "Failed to check if NumPy file exists at {}: {}", + path.display(), + e + ))); + } + Ok(true) => {} } Ok(Self { diff --git a/qdp/qdp-core/src/readers/parquet.rs b/qdp/qdp-core/src/readers/parquet.rs index 1d28073a3..5322d120e 100644 --- a/qdp/qdp-core/src/readers/parquet.rs +++ b/qdp/qdp-core/src/readers/parquet.rs @@ -40,7 +40,27 @@ impl ParquetReader { /// * `path` - Path to the Parquet file /// * `batch_size` - Optional batch size for reading (defaults to entire file) pub fn new<P: AsRef<Path>>(path: P, batch_size: Option<usize>) -> Result<Self> { - let file = File::open(path.as_ref()) + let path = path.as_ref(); + + // Verify file exists + match path.try_exists() { + Ok(false) => { + return Err(MahoutError::Io(format!( + "Parquet file not found: {}", + path.display() + ))); + } + Err(e) => { + return Err(MahoutError::Io(format!( + "Failed to check if Parquet file exists at {}: {}", + path.display(), + e + ))); + } + Ok(true) => {} + } + + let file = File::open(path) .map_err(|e| MahoutError::Io(format!("Failed to open Parquet file: {}", e)))?; let builder = ParquetRecordBatchReaderBuilder::try_new(file) @@ -231,7 +251,27 @@ impl ParquetStreamingReader { /// * `path` - Path to the Parquet file /// * `batch_size` - Optional batch size (defaults to 2048) pub fn new<P: AsRef<Path>>(path: P, batch_size: Option<usize>) -> Result<Self> { - let file = File::open(path.as_ref()) + let path = path.as_ref(); + + // Verify file exists + match path.try_exists() { + Ok(false) => { + return Err(MahoutError::Io(format!( + "Parquet file not found: {}", + path.display() + ))); + } + Err(e) => { + return Err(MahoutError::Io(format!( + "Failed to check if Parquet file exists at {}: {}", + path.display(), + e + ))); + } + Ok(true) => {} + } + + let file = File::open(path) .map_err(|e| MahoutError::Io(format!("Failed to open Parquet file: {}", e)))?; let builder = ParquetRecordBatchReaderBuilder::try_new(file)
