This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 67e6120 [SPARK-35810][PYTHON] Deprecate ps.broadcast API 67e6120 is described below commit 67e6120a851066f183e41f57cc3b10f2f3704df7 Author: itholic <haejoon....@databricks.com> AuthorDate: Mon Jul 19 10:44:59 2021 +0900 [SPARK-35810][PYTHON] Deprecate ps.broadcast API ### What changes were proposed in this pull request? The `broadcast` functions in `pyspark.pandas` is duplicated to `DataFrame.spark.hint` with `"broadcast"`. ```python # The below 2 lines are the same df.spark.hint("broadcast") ps.broadcast(df) ``` So, we should remove `broadcast` in the future, and show deprecation warning for now. ### Why are the changes needed? For deduplication of functions ### Does this PR introduce _any_ user-facing change? They see the deprecation warning when using `broadcast` in `pyspark.pandas`. ```python >>> ps.broadcast(df) FutureWarning: `broadcast` has been deprecated and will be removed in a future version. use `DataFrame.spark.hint` with 'broadcast' for `name` parameter instead. warnings.warn( ``` ### How was this patch tested? Manually check the warning message and see the build passed. Closes #33379 from itholic/SPARK-35810. Lead-authored-by: itholic <haejoon....@databricks.com> Co-authored-by: Hyukjin Kwon <gurwls...@gmail.com> Co-authored-by: Haejoon Lee <44108233+itho...@users.noreply.github.com> Signed-off-by: Hyukjin Kwon <gurwls...@apache.org> --- python/pyspark/pandas/generic.py | 10 ++++++++++ python/pyspark/pandas/namespace.py | 8 ++++++++ 2 files changed, 18 insertions(+) diff --git a/python/pyspark/pandas/generic.py b/python/pyspark/pandas/generic.py index c60097e..c1009b0 100644 --- a/python/pyspark/pandas/generic.py +++ b/python/pyspark/pandas/generic.py @@ -860,6 +860,11 @@ class Frame(object, metaclass=ABCMeta): ) if num_files is not None: + warnings.warn( + "`num_files` has been deprecated and might be removed in a future version. " + "Use `DataFrame.spark.repartition` instead.", + FutureWarning, + ) sdf = sdf.repartition(num_files) builder = sdf.write.mode(mode) @@ -998,6 +1003,11 @@ class Frame(object, metaclass=ABCMeta): sdf = psdf.to_spark(index_col=index_col) # type: ignore if num_files is not None: + warnings.warn( + "`num_files` has been deprecated and might be removed in a future version. " + "Use `DataFrame.spark.repartition` instead.", + FutureWarning, + ) sdf = sdf.repartition(num_files) builder = sdf.write.mode(mode) diff --git a/python/pyspark/pandas/namespace.py b/python/pyspark/pandas/namespace.py index a46926d..9af91cb 100644 --- a/python/pyspark/pandas/namespace.py +++ b/python/pyspark/pandas/namespace.py @@ -39,6 +39,7 @@ from distutils.version import LooseVersion from functools import reduce from io import BytesIO import json +import warnings import numpy as np import pandas as pd @@ -2822,6 +2823,8 @@ def broadcast(obj: DataFrame) -> DataFrame: """ Marks a DataFrame as small enough for use in broadcast joins. + .. deprecated:: 3.2.0 + Use :func:`DataFrame.spark.hint` instead. Parameters ---------- obj : DataFrame @@ -2852,6 +2855,11 @@ def broadcast(obj: DataFrame) -> DataFrame: ...BroadcastHashJoin... ... """ + warnings.warn( + "`broadcast` has been deprecated and might be removed in a future version. " + "Use `DataFrame.spark.hint` with 'broadcast' for `name` parameter instead.", + FutureWarning, + ) if not isinstance(obj, DataFrame): raise TypeError("Invalid type : expected DataFrame got {}".format(type(obj).__name__)) return DataFrame( --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org