This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 54a9d4e3869f [MINOR][PS] Convert loop append in Pyspark to list
comprehension
54a9d4e3869f is described below
commit 54a9d4e3869f13f8556ba133ccb181ca29a41672
Author: Devin Petersohn <[email protected]>
AuthorDate: Fri Feb 6 07:32:27 2026 +0900
[MINOR][PS] Convert loop append in Pyspark to list comprehension
### What changes were proposed in this pull request?
Use list comprehension in `pyspark.DataFrame.sort`
### Why are the changes needed?
This will improve performance over the loop append paradigm.
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
CI
### Was this patch authored or co-authored using generative AI tooling?
No
Closes #53754 from devin-petersohn/devin/pyspark_perf_00.
Authored-by: Devin Petersohn <[email protected]>
Signed-off-by: Hyukjin Kwon <[email protected]>
---
python/pyspark/sql/dataframe.py | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index 161651b78a91..c6f348ce600a 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -3159,24 +3159,23 @@ class DataFrame:
if len(cols) == 1 and isinstance(cols[0], list):
cols = cols[0]
- _cols: List[Column] = []
- for c in cols:
+ def _get_col(c: Union[int, str, Column, List[int | str | Column]]) ->
Column:
if isinstance(c, int) and not isinstance(c, bool):
# ordinal is 1-based
if c > 0:
- _cols.append(self[c - 1])
+ return self[c - 1]
# negative ordinal means sort by desc
elif c < 0:
- _cols.append(self[-c - 1].desc())
+ return self[-c - 1].desc()
else:
raise PySparkIndexError(
errorClass="ZERO_INDEX",
messageParameters={},
)
elif isinstance(c, Column):
- _cols.append(c)
+ return c
elif isinstance(c, str):
- _cols.append(_to_col(c))
+ return _to_col(c)
else:
raise PySparkTypeError(
errorClass="NOT_COLUMN_OR_INT_OR_STR",
@@ -3186,6 +3185,7 @@ class DataFrame:
},
)
+ _cols: List[Column] = [_get_col(c) for c in cols]
ascending = kwargs.get("ascending", True)
if isinstance(ascending, (bool, int)):
if not ascending:
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]