This is an automated email from the ASF dual-hosted git repository.
wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 8b3bbae ARROW-2198: [Python] correct docstring for parquet.read_table
8b3bbae is described below
commit 8b3bbaea8e1787184df1de22f4eb596584ea7044
Author: Wes McKinney <[email protected]>
AuthorDate: Wed Feb 28 23:31:44 2018 -0500
ARROW-2198: [Python] correct docstring for parquet.read_table
cc @wesm
Author: Wes McKinney <[email protected]>
Author: siddharth <[email protected]>
Closes #1654 from siddharthteotia/ARROW-2198 and squashes the following
commits:
35ff194d <Wes McKinney> Refactor read_table, read_pandas docstrings, better
code reuse
506f874b <siddharth> ARROW-2198: correct docstring for parquet.read_table
---
python/pyarrow/parquet.py | 105 +++++++++++++++++++++-------------------------
1 file changed, 48 insertions(+), 57 deletions(-)
diff --git a/python/pyarrow/parquet.py b/python/pyarrow/parquet.py
index 485459e..f46ce94 100644
--- a/python/pyarrow/parquet.py
+++ b/python/pyarrow/parquet.py
@@ -42,9 +42,9 @@ class ParquetFile(object):
Parameters
----------
- source : str or pyarrow.io.NativeFile
- Readable source. For passing Python file objects or byte buffers,
- see pyarrow.io.PythonFileInterface or pyarrow.io.BufferReader.
+ source : str, pyarrow.NativeFile, or file-like object
+ Readable source. For passing bytes or buffer-like file containing a
+ Parquet file, use pyarorw.BufferReader
metadata : ParquetFileMetadata, default None
Use existing metadata object, rather than reading from file.
common_metadata : ParquetFileMetadata, default None
@@ -862,35 +862,34 @@ def _make_manifest(path_or_paths, fs, pathsep='/'):
return pieces, partitions, metadata_path
-def read_table(source, columns=None, nthreads=1, metadata=None,
- use_pandas_metadata=False):
- """
- Read a Table from Parquet format
+_read_table_docstring = """
+{0}
- Parameters
- ----------
- source: str or pyarrow.io.NativeFile
- Location of Parquet dataset. If a string passed, can be a single file
- name or directory name. For passing Python file objects or byte
- buffers, see pyarrow.io.PythonFileInterface or pyarrow.io.BufferReader.
- columns: list
- If not None, only these columns will be read from the file. A column
- name may be a prefix of a nested field, e.g. 'a' will select 'a.b',
- 'a.c', and 'a.d.e'
- nthreads : int, default 1
- Number of columns to read in parallel. Requires that the underlying
- file source is threadsafe
- metadata : FileMetaData
- If separately computed
- use_pandas_metadata : boolean, default False
- If True and file has custom pandas schema metadata, ensure that
- index columns are also loaded
+Parameters
+----------
+source: str, pyarrow.NativeFile, or file-like object
+ If a string passed, can be a single file name or directory name. For
+ file-like objects, only read a single file. Use pyarrow.BufferReader to
+ read a file contained in a bytes or buffer-like object
+columns: list
+ If not None, only these columns will be read from the file. A column
+ name may be a prefix of a nested field, e.g. 'a' will select 'a.b',
+ 'a.c', and 'a.d.e'
+nthreads : int, default 1
+ Number of columns to read in parallel. Requires that the underlying
+ file source is threadsafe
+metadata : FileMetaData
+ If separately computed
+{1}
+
+Returns
+-------
+{2}
+"""
- Returns
- -------
- pyarrow.Table
- Content of the file as a table (of columns)
- """
+
+def read_table(source, columns=None, nthreads=1, metadata=None,
+ use_pandas_metadata=False):
if is_string(source):
fs = LocalFileSystem.get_instance()
if fs.isdir(source):
@@ -902,37 +901,29 @@ def read_table(source, columns=None, nthreads=1,
metadata=None,
use_pandas_metadata=use_pandas_metadata)
-def read_pandas(source, columns=None, nthreads=1, metadata=None):
- """
- Read a Table from Parquet format, also reading DataFrame index values if
- known in the file metadata
+read_table.__doc__ = _read_table_docstring.format(
+ 'Read a Table from Parquet format',
+ """use_pandas_metadata : boolean, default False
+ If True and file has custom pandas schema metadata, ensure that
+ index columns are also loaded""",
+ """pyarrow.Table
+ Content of the file as a table (of columns)""")
- Parameters
- ----------
- source: str or pyarrow.io.NativeFile
- Location of Parquet dataset. If a string passed, can be a single file
- name. For passing Python file objects or byte buffers,
- see pyarrow.io.PythonFileInterface or pyarrow.io.BufferReader.
- columns: list
- If not None, only these columns will be read from the file. A column
- name may be a prefix of a nested field, e.g. 'a' will select 'a.b',
- 'a.c', and 'a.d.e'
- nthreads : int, default 1
- Number of columns to read in parallel. Requires that the underlying
- file source is threadsafe
- metadata : FileMetaData
- If separately computed
- Returns
- -------
- pyarrow.Table
- Content of the file as a Table of Columns, including DataFrame indexes
- as Columns.
- """
+def read_pandas(source, columns=None, nthreads=1, metadata=None):
return read_table(source, columns=columns, nthreads=nthreads,
metadata=metadata, use_pandas_metadata=True)
+read_pandas.__doc__ = _read_table_docstring.format(
+ 'Read a Table from Parquet format, also reading DataFrame\n'
+ 'index values if known in the file metadata',
+ '',
+ """pyarrow.Table
+ Content of the file as a Table of Columns, including DataFrame
+ indexes as columns""")
+
+
def write_table(table, where, row_group_size=None, version='1.0',
use_dictionary=True, compression='snappy',
use_deprecated_int96_timestamps=None,
@@ -966,7 +957,7 @@ Write a Table to Parquet format
Parameters
----------
table : pyarrow.Table
-where: string or pyarrow.io.NativeFile
+where: string or pyarrow.NativeFile
{0}
""".format(_parquet_writer_arg_docs)
@@ -1064,7 +1055,7 @@ def write_metadata(schema, where, version='1.0',
Parameters
----------
schema : pyarrow.Schema
- where: string or pyarrow.io.NativeFile
+ where: string or pyarrow.NativeFile
version : {"1.0", "2.0"}, default "1.0"
The Parquet format version, defaults to 1.0
use_deprecated_int96_timestamps : boolean, default False
--
To stop receiving notification emails like this one, please contact
[email protected].