This is an automated email from the ASF dual-hosted git repository. wesm pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push: new 658618e ARROW-7208: [Python][Parquet] Raise better error message when passing a directory path instead of a file path to ParquetFile 658618e is described below commit 658618ecd540bc6af76efa608cd1ff7b7938ba4c Author: Wes McKinney <w...@apache.org> AuthorDate: Sun Jul 12 22:31:18 2020 -0500 ARROW-7208: [Python][Parquet] Raise better error message when passing a directory path instead of a file path to ParquetFile Closes #7722 from wesm/ARROW-7208 Authored-by: Wes McKinney <w...@apache.org> Signed-off-by: Wes McKinney <w...@apache.org> --- python/pyarrow/io.pxi | 9 +++++++++ python/pyarrow/tests/test_parquet.py | 9 +++++++++ 2 files changed, 18 insertions(+) diff --git a/python/pyarrow/io.pxi b/python/pyarrow/io.pxi index 8f8cbd1..76a058d 100644 --- a/python/pyarrow/io.pxi +++ b/python/pyarrow/io.pxi @@ -776,11 +776,19 @@ def memory_map(path, mode='r'): ------- mmap : MemoryMappedFile """ + _check_is_file(path) + cdef MemoryMappedFile mmap = MemoryMappedFile() mmap._open(path, mode) return mmap +cdef _check_is_file(path): + if os.path.isdir(path): + raise IOError("Expected file path, but {0} is a directory" + .format(path)) + + def create_memory_map(path, size): """ Create a file of the given size and memory-map it. @@ -807,6 +815,7 @@ cdef class OSFile(NativeFile): object path def __cinit__(self, path, mode='r', MemoryPool memory_pool=None): + _check_is_file(path) self.path = path cdef: diff --git a/python/pyarrow/tests/test_parquet.py b/python/pyarrow/tests/test_parquet.py index 539c444..410eee1 100644 --- a/python/pyarrow/tests/test_parquet.py +++ b/python/pyarrow/tests/test_parquet.py @@ -3448,6 +3448,15 @@ def test_empty_row_groups(tempdir): assert reader.read_row_group(i).equals(table) +def test_parquet_file_pass_directory_instead_of_file(tempdir): + # ARROW-7208 + path = tempdir / 'directory' + os.mkdir(str(path)) + + with pytest.raises(IOError, match="Expected file path"): + pq.ParquetFile(path) + + @pytest.mark.pandas @parametrize_legacy_dataset def test_parquet_writer_with_caller_provided_filesystem(use_legacy_dataset):