Repository: spark Updated Branches: refs/heads/master 9244957b5 -> 7bf4da8a3
[MINOR] Fixed up pandas_udf related docs and formatting ## What changes were proposed in this pull request? Fixed some minor issues with pandas_udf related docs and formatting. ## How was this patch tested? NA Author: Bryan Cutler <cutl...@gmail.com> Closes #19375 from BryanCutler/arrow-pandas_udf-cleanup-minor. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7bf4da8a Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7bf4da8a Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7bf4da8a Branch: refs/heads/master Commit: 7bf4da8a33c33b03bbfddc698335fe9b86ce1e0e Parents: 9244957 Author: Bryan Cutler <cutl...@gmail.com> Authored: Thu Sep 28 10:24:51 2017 +0900 Committer: hyukjinkwon <gurwls...@gmail.com> Committed: Thu Sep 28 10:24:51 2017 +0900 ---------------------------------------------------------------------- python/pyspark/serializers.py | 6 +++--- python/pyspark/sql/functions.py | 6 ++---- 2 files changed, 5 insertions(+), 7 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/7bf4da8a/python/pyspark/serializers.py ---------------------------------------------------------------------- diff --git a/python/pyspark/serializers.py b/python/pyspark/serializers.py index db77b7e..ad18bd0 100644 --- a/python/pyspark/serializers.py +++ b/python/pyspark/serializers.py @@ -191,7 +191,7 @@ class FramedSerializer(Serializer): class ArrowSerializer(FramedSerializer): """ - Serializes an Arrow stream. + Serializes bytes as Arrow data with the Arrow file format. """ def dumps(self, batch): @@ -239,7 +239,7 @@ class ArrowStreamPandasSerializer(Serializer): def dump_stream(self, iterator, stream): """ - Make ArrowRecordBatches from Pandas Serieses and serialize. Input is a single series or + Make ArrowRecordBatches from Pandas Series and serialize. Input is a single series or a list of series accompanied by an optional pyarrow type to coerce the data to. """ import pyarrow as pa @@ -257,7 +257,7 @@ class ArrowStreamPandasSerializer(Serializer): def load_stream(self, stream): """ - Deserialize ArrowRecordBatchs to an Arrow table and return as a list of pandas.Series. + Deserialize ArrowRecordBatches to an Arrow table and return as a list of pandas.Series. """ import pyarrow as pa reader = pa.open_stream(stream) http://git-wip-us.apache.org/repos/asf/spark/blob/7bf4da8a/python/pyspark/sql/functions.py ---------------------------------------------------------------------- diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index 63e9a83..b45a59d 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -2199,16 +2199,14 @@ def pandas_udf(f=None, returnType=StringType()): ... >>> df = spark.createDataFrame([(1, "John Doe", 21)], ("id", "name", "age")) >>> df.select(slen("name").alias("slen(name)"), to_upper("name"), add_one("age")) \\ - ... .show() # doctest: +SKIP + ... .show() # doctest: +SKIP +----------+--------------+------------+ |slen(name)|to_upper(name)|add_one(age)| +----------+--------------+------------+ | 8| JOHN DOE| 22| +----------+--------------+------------+ """ - wrapped_udf = _create_udf(f, returnType=returnType, vectorized=True) - - return wrapped_udf + return _create_udf(f, returnType=returnType, vectorized=True) blacklist = ['map', 'since', 'ignore_unicode_prefix'] --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org