[arrow] branch master updated: ARROW-2198: [Python] correct docstring for parquet.read_table

wesm Wed, 28 Feb 2018 20:32:43 -0800

This is an automated email from the ASF dual-hosted git repository.

wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git



The following commit(s) were added to refs/heads/master by this push:
     new 8b3bbae  ARROW-2198: [Python] correct docstring for parquet.read_table
8b3bbae is described below

commit 8b3bbaea8e1787184df1de22f4eb596584ea7044
Author: Wes McKinney <[email protected]>
AuthorDate: Wed Feb 28 23:31:44 2018 -0500

    ARROW-2198: [Python] correct docstring for parquet.read_table
    
    cc @wesm
    
    Author: Wes McKinney <[email protected]>
    Author: siddharth <[email protected]>
    
    Closes #1654 from siddharthteotia/ARROW-2198 and squashes the following 
commits:
    
    35ff194d <Wes McKinney> Refactor read_table, read_pandas docstrings, better 
code reuse
    506f874b <siddharth> ARROW-2198:  correct docstring for parquet.read_table
---
 python/pyarrow/parquet.py | 105 +++++++++++++++++++++-------------------------
 1 file changed, 48 insertions(+), 57 deletions(-)

diff --git a/python/pyarrow/parquet.py b/python/pyarrow/parquet.py
index 485459e..f46ce94 100644
--- a/python/pyarrow/parquet.py
+++ b/python/pyarrow/parquet.py
@@ -42,9 +42,9 @@ class ParquetFile(object):
 
     Parameters
     ----------
-    source : str or pyarrow.io.NativeFile
-        Readable source. For passing Python file objects or byte buffers,
-        see pyarrow.io.PythonFileInterface or pyarrow.io.BufferReader.
+    source : str, pyarrow.NativeFile, or file-like object
+        Readable source. For passing bytes or buffer-like file containing a
+        Parquet file, use pyarorw.BufferReader
     metadata : ParquetFileMetadata, default None
         Use existing metadata object, rather than reading from file.
     common_metadata : ParquetFileMetadata, default None
@@ -862,35 +862,34 @@ def _make_manifest(path_or_paths, fs, pathsep='/'):
     return pieces, partitions, metadata_path
 
 
-def read_table(source, columns=None, nthreads=1, metadata=None,
-               use_pandas_metadata=False):
-    """
-    Read a Table from Parquet format
+_read_table_docstring = """
+{0}
 
-    Parameters
-    ----------
-    source: str or pyarrow.io.NativeFile
-        Location of Parquet dataset. If a string passed, can be a single file
-        name or directory name. For passing Python file objects or byte
-        buffers, see pyarrow.io.PythonFileInterface or pyarrow.io.BufferReader.
-    columns: list
-        If not None, only these columns will be read from the file. A column
-        name may be a prefix of a nested field, e.g. 'a' will select 'a.b',
-        'a.c', and 'a.d.e'
-    nthreads : int, default 1
-        Number of columns to read in parallel. Requires that the underlying
-        file source is threadsafe
-    metadata : FileMetaData
-        If separately computed
-    use_pandas_metadata : boolean, default False
-        If True and file has custom pandas schema metadata, ensure that
-        index columns are also loaded
+Parameters
+----------
+source: str, pyarrow.NativeFile, or file-like object
+    If a string passed, can be a single file name or directory name. For
+    file-like objects, only read a single file. Use pyarrow.BufferReader to
+    read a file contained in a bytes or buffer-like object
+columns: list
+    If not None, only these columns will be read from the file. A column
+    name may be a prefix of a nested field, e.g. 'a' will select 'a.b',
+    'a.c', and 'a.d.e'
+nthreads : int, default 1
+    Number of columns to read in parallel. Requires that the underlying
+    file source is threadsafe
+metadata : FileMetaData
+    If separately computed
+{1}
+
+Returns
+-------
+{2}
+"""
 
-    Returns
-    -------
-    pyarrow.Table
-        Content of the file as a table (of columns)
-    """
+
+def read_table(source, columns=None, nthreads=1, metadata=None,
+               use_pandas_metadata=False):
     if is_string(source):
         fs = LocalFileSystem.get_instance()
         if fs.isdir(source):
@@ -902,37 +901,29 @@ def read_table(source, columns=None, nthreads=1, 
metadata=None,
                    use_pandas_metadata=use_pandas_metadata)
 
 
-def read_pandas(source, columns=None, nthreads=1, metadata=None):
-    """
-    Read a Table from Parquet format, also reading DataFrame index values if
-    known in the file metadata
+read_table.__doc__ = _read_table_docstring.format(
+    'Read a Table from Parquet format',
+    """use_pandas_metadata : boolean, default False
+    If True and file has custom pandas schema metadata, ensure that
+    index columns are also loaded""",
+    """pyarrow.Table
+    Content of the file as a table (of columns)""")
 
-    Parameters
-    ----------
-    source: str or pyarrow.io.NativeFile
-        Location of Parquet dataset. If a string passed, can be a single file
-        name. For passing Python file objects or byte buffers,
-        see pyarrow.io.PythonFileInterface or pyarrow.io.BufferReader.
-    columns: list
-        If not None, only these columns will be read from the file. A column
-        name may be a prefix of a nested field, e.g. 'a' will select 'a.b',
-        'a.c', and 'a.d.e'
-    nthreads : int, default 1
-        Number of columns to read in parallel. Requires that the underlying
-        file source is threadsafe
-    metadata : FileMetaData
-        If separately computed
 
-    Returns
-    -------
-    pyarrow.Table
-        Content of the file as a Table of Columns, including DataFrame indexes
-        as Columns.
-    """
+def read_pandas(source, columns=None, nthreads=1, metadata=None):
     return read_table(source, columns=columns, nthreads=nthreads,
                       metadata=metadata, use_pandas_metadata=True)
 
 
+read_pandas.__doc__ = _read_table_docstring.format(
+    'Read a Table from Parquet format, also reading DataFrame\n'
+    'index values if known in the file metadata',
+    '',
+    """pyarrow.Table
+    Content of the file as a Table of Columns, including DataFrame
+    indexes as columns""")
+
+
 def write_table(table, where, row_group_size=None, version='1.0',
                 use_dictionary=True, compression='snappy',
                 use_deprecated_int96_timestamps=None,
@@ -966,7 +957,7 @@ Write a Table to Parquet format
 Parameters
 ----------
 table : pyarrow.Table
-where: string or pyarrow.io.NativeFile
+where: string or pyarrow.NativeFile
 {0}
 """.format(_parquet_writer_arg_docs)
 
@@ -1064,7 +1055,7 @@ def write_metadata(schema, where, version='1.0',
     Parameters
     ----------
     schema : pyarrow.Schema
-    where: string or pyarrow.io.NativeFile
+    where: string or pyarrow.NativeFile
     version : {"1.0", "2.0"}, default "1.0"
         The Parquet format version, defaults to 1.0
     use_deprecated_int96_timestamps : boolean, default False

-- 
To stop receiving notification emails like this one, please contact
[email protected].

[arrow] branch master updated: ARROW-2198: [Python] correct docstring for parquet.read_table

Reply via email to