This is an automated email from the ASF dual-hosted git repository. ruifengz pushed a commit to branch branch-3.4 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.4 by this push: new c4156d49c4b [SPARK-42378][CONNECT][PYTHON] Make `DataFrame.select` support `a.*` c4156d49c4b is described below commit c4156d49c4b4bacb95ca84f09babf5873693b5ef Author: Ruifeng Zheng <ruife...@apache.org> AuthorDate: Wed Feb 8 14:43:00 2023 +0800 [SPARK-42378][CONNECT][PYTHON] Make `DataFrame.select` support `a.*` ### What changes were proposed in this pull request? Make `DataFrame.select` support `a.*` ### Why are the changes needed? bugfix: ``` Traceback (most recent call last): File "/Users/ruifeng.zheng/Dev/spark/python/pyspark/sql/tests/connect/test_connect_basic.py", line 1377, in test_select_star cdf.select("a", "b.*").collect(), File "/Users/ruifeng.zheng/Dev/spark/python/pyspark/sql/connect/dataframe.py", line 1305, in collect table = self._session.client.to_table(query) File "/Users/ruifeng.zheng/Dev/spark/python/pyspark/sql/connect/client.py", line 445, in to_table table, _ = self._execute_and_fetch(req) File "/Users/ruifeng.zheng/Dev/spark/python/pyspark/sql/connect/client.py", line 639, in _execute_and_fetch self._handle_error(rpc_error) File "/Users/ruifeng.zheng/Dev/spark/python/pyspark/sql/connect/client.py", line 675, in _handle_error raise SparkConnectAnalysisException( pyspark.errors.exceptions.SparkConnectAnalysisException: [FIELD_NOT_FOUND] No such struct field `*` in `c`, `d`. ``` ### Does this PR introduce _any_ user-facing change? yes ### How was this patch tested? added ut Closes #39934 from zhengruifeng/connect_select_star. Authored-by: Ruifeng Zheng <ruife...@apache.org> Signed-off-by: Ruifeng Zheng <ruife...@apache.org> (cherry picked from commit dbc4c621679318e0652a00aec54927b5659d65cd) Signed-off-by: Ruifeng Zheng <ruife...@apache.org> --- python/pyspark/sql/connect/plan.py | 9 +++---- .../sql/tests/connect/test_connect_basic.py | 29 ++++++++++++++++++++++ 2 files changed, 33 insertions(+), 5 deletions(-) diff --git a/python/pyspark/sql/connect/plan.py b/python/pyspark/sql/connect/plan.py index d1d41b6a690..39b32f065ea 100644 --- a/python/pyspark/sql/connect/plan.py +++ b/python/pyspark/sql/connect/plan.py @@ -375,17 +375,16 @@ class Project(LogicalPlan): ) def plan(self, session: "SparkConnectClient") -> proto.Relation: + from pyspark.sql.connect.functions import col + assert self._child is not None + proj_exprs = [] for c in self._columns: if isinstance(c, Column): proj_exprs.append(c.to_plan(session)) - elif c == "*": - exp = proto.Expression() - exp.unresolved_star.SetInParent() - proj_exprs.append(exp) else: - proj_exprs.append(self.unresolved_attr(c)) + proj_exprs.append(col(c).to_plan(session)) plan = proto.Relation() plan.project.input.CopyFrom(self._child.plan(session)) diff --git a/python/pyspark/sql/tests/connect/test_connect_basic.py b/python/pyspark/sql/tests/connect/test_connect_basic.py index 63b58dc1b56..9068d6f5635 100644 --- a/python/pyspark/sql/tests/connect/test_connect_basic.py +++ b/python/pyspark/sql/tests/connect/test_connect_basic.py @@ -1349,6 +1349,35 @@ class SparkConnectBasicTests(SparkConnectSQLTestCase): .toPandas(), ) + def test_select_star(self): + data = [Row(a=1, b=Row(c=2, d=Row(e=3)))] + + # +---+--------+ + # | a| b| + # +---+--------+ + # | 1|{2, {3}}| + # +---+--------+ + + cdf = self.connect.createDataFrame(data=data) + sdf = self.spark.createDataFrame(data=data) + + self.assertEqual( + cdf.select("*").collect(), + sdf.select("*").collect(), + ) + self.assertEqual( + cdf.select("a", "*").collect(), + sdf.select("a", "*").collect(), + ) + self.assertEqual( + cdf.select("a", "b").collect(), + sdf.select("a", "b").collect(), + ) + self.assertEqual( + cdf.select("a", "b.*").collect(), + sdf.select("a", "b.*").collect(), + ) + def test_fill_na(self): # SPARK-41128: Test fill na query = """ --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org