itholic commented on code in PR #42793:
URL: https://github.com/apache/spark/pull/42793#discussion_r1327908926
##
python/pyspark/pandas/frame.py:
##
@@ -1321,11 +1323,76 @@ def applymap(self, func: Callable[[Any], Any]) ->
"DataFrame":
0 1.00 4.494400
1 11.262736 20.857489
"""
+warnings.warn(
+"DataFrame.applymap has been deprecated. Use DataFrame.map
instead", FutureWarning
+)
# TODO: We can implement shortcut theoretically since it creates new
DataFrame
# anyway and we don't have to worry about operations on different
DataFrames.
return self._apply_series_op(lambda psser: psser.apply(func))
+def map(self, func: Callable[[Any], Any]) -> "DataFrame":
+"""
+Apply a function to a Dataframe elementwise.
+
+This method applies a function that accepts and returns a scalar
+to every element of a DataFrame.
+
+.. versionadded:: 4.0.0
+DataFrame.applymap was deprecated and renamed to DataFrame.map.
+
+.. note:: this API executes the function once to infer the type which
is
+ potentially expensive, for instance, when the dataset is created
after
+ aggregations or sorting.
+
+ To avoid this, specify return type in ``func``, for instance, as
below:
+
+ >>> def square(x) -> np.int32:
+ ... return x ** 2
+
+ pandas-on-Spark uses return type hints and does not try to infer
the type.
+
+Parameters
+--
+func : callable
+Python function returns a single value from a single value.
+
+Returns
+---
+DataFrame
+Transformed DataFrame.
+
+Examples
+
+>>> df = ps.DataFrame([[1, 2.12], [3.356, 4.567]])
+>>> df
+ 0 1
+0 1.000 2.120
+1 3.356 4.567
+
+>>> def str_len(x) -> int:
+... return len(str(x))
+>>> df.map(str_len)
+ 0 1
+0 3 4
+1 5 5
+
+>>> def power(x) -> float:
+... return x ** 2
+>>> df.map(power)
+ 0 1
+0 1.00 4.494400
+1 11.262736 20.857489
+
+You can omit type hints and let pandas-on-Spark infer its type.
+
+>>> df.map(lambda x: x ** 2)
+ 0 1
+0 1.00 4.494400
+1 11.262736 20.857489
+"""
+return self.applymap(func=func)
Review Comment:
Oh, yeah we shouldn't call `applymap` here.
Just applied the suggestion. Thanks!
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org
-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org