This is an automated email from the ASF dual-hosted git repository.

wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 658618e  ARROW-7208: [Python][Parquet] Raise better error message when 
passing a directory path instead of a file path to ParquetFile
658618e is described below

commit 658618ecd540bc6af76efa608cd1ff7b7938ba4c
Author: Wes McKinney <w...@apache.org>
AuthorDate: Sun Jul 12 22:31:18 2020 -0500

    ARROW-7208: [Python][Parquet] Raise better error message when passing a 
directory path instead of a file path to ParquetFile
    
    Closes #7722 from wesm/ARROW-7208
    
    Authored-by: Wes McKinney <w...@apache.org>
    Signed-off-by: Wes McKinney <w...@apache.org>
---
 python/pyarrow/io.pxi                | 9 +++++++++
 python/pyarrow/tests/test_parquet.py | 9 +++++++++
 2 files changed, 18 insertions(+)

diff --git a/python/pyarrow/io.pxi b/python/pyarrow/io.pxi
index 8f8cbd1..76a058d 100644
--- a/python/pyarrow/io.pxi
+++ b/python/pyarrow/io.pxi
@@ -776,11 +776,19 @@ def memory_map(path, mode='r'):
     -------
     mmap : MemoryMappedFile
     """
+    _check_is_file(path)
+
     cdef MemoryMappedFile mmap = MemoryMappedFile()
     mmap._open(path, mode)
     return mmap
 
 
+cdef _check_is_file(path):
+    if os.path.isdir(path):
+        raise IOError("Expected file path, but {0} is a directory"
+                      .format(path))
+
+
 def create_memory_map(path, size):
     """
     Create a file of the given size and memory-map it.
@@ -807,6 +815,7 @@ cdef class OSFile(NativeFile):
         object path
 
     def __cinit__(self, path, mode='r', MemoryPool memory_pool=None):
+        _check_is_file(path)
         self.path = path
 
         cdef:
diff --git a/python/pyarrow/tests/test_parquet.py 
b/python/pyarrow/tests/test_parquet.py
index 539c444..410eee1 100644
--- a/python/pyarrow/tests/test_parquet.py
+++ b/python/pyarrow/tests/test_parquet.py
@@ -3448,6 +3448,15 @@ def test_empty_row_groups(tempdir):
         assert reader.read_row_group(i).equals(table)
 
 
+def test_parquet_file_pass_directory_instead_of_file(tempdir):
+    # ARROW-7208
+    path = tempdir / 'directory'
+    os.mkdir(str(path))
+
+    with pytest.raises(IOError, match="Expected file path"):
+        pq.ParquetFile(path)
+
+
 @pytest.mark.pandas
 @parametrize_legacy_dataset
 def test_parquet_writer_with_caller_provided_filesystem(use_legacy_dataset):

Reply via email to