This is an automated email from the ASF dual-hosted git repository. ruifengz pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new d31161b27404 [SPARK-48276][PYTHON][CONNECT] Add the missing `__repr__` method for `SQLExpression` d31161b27404 is described below commit d31161b27404219169345c716d7b7fe20356085d Author: Ruifeng Zheng <ruife...@apache.org> AuthorDate: Wed May 15 11:42:33 2024 +0800 [SPARK-48276][PYTHON][CONNECT] Add the missing `__repr__` method for `SQLExpression` ### What changes were proposed in this pull request? 1, Add the missing `__repr__` method for `SQLExpression` 2, also adjust the output of `lit(None)`: `None` -> `NULL` to be more consistent with the Spark Classic ### Why are the changes needed? bug fix, all expressions should implement the `__repr__` method. ``` In [2]: from pyspark.sql.functions import when, lit, expr In [3]: expression = expr("foo") In [4]: when(expression, lit(None)) Out[4]: --------------------------------------------------------------------------- TypeError Traceback (most recent call last) File ~/.dev/miniconda3/envs/spark_dev_312/lib/python3.12/site-packages/IPython/core/formatters.py:711, in PlainTextFormatter.__call__(self, obj) 704 stream = StringIO() 705 printer = pretty.RepresentationPrinter(stream, self.verbose, 706 self.max_width, self.newline, 707 max_seq_length=self.max_seq_length, 708 singleton_pprinters=self.singleton_printers, 709 type_pprinters=self.type_printers, 710 deferred_pprinters=self.deferred_printers) --> 711 printer.pretty(obj) 712 printer.flush() 713 return stream.getvalue() File ~/.dev/miniconda3/envs/spark_dev_312/lib/python3.12/site-packages/IPython/lib/pretty.py:411, in RepresentationPrinter.pretty(self, obj) 408 return meth(obj, self, cycle) 409 if cls is not object \ 410 and callable(cls.__dict__.get('__repr__')): --> 411 return _repr_pprint(obj, self, cycle) 413 return _default_pprint(obj, self, cycle) 414 finally: File ~/.dev/miniconda3/envs/spark_dev_312/lib/python3.12/site-packages/IPython/lib/pretty.py:779, in _repr_pprint(obj, p, cycle) 777 """A pprint that just redirects to the normal repr function.""" 778 # Find newlines and replace them with p.break_() --> 779 output = repr(obj) 780 lines = output.splitlines() 781 with p.group(): File ~/Dev/spark/python/pyspark/sql/connect/column.py:441, in Column.__repr__(self) 440 def __repr__(self) -> str: --> 441 return "Column<'%s'>" % self._expr.__repr__() File ~/Dev/spark/python/pyspark/sql/connect/expressions.py:148, in CaseWhen.__repr__(self) 147 def __repr__(self) -> str: --> 148 _cases = "".join([f" WHEN {c} THEN {v}" for c, v in self._branches]) 149 _else = f" ELSE {self._else_value}" if self._else_value is not None else "" 150 return "CASE" + _cases + _else + " END" TypeError: __str__ returned non-string (type NoneType) ``` ### Does this PR introduce _any_ user-facing change? yes ``` In [3]: from pyspark.sql.functions import when, lit, expr In [4]: expression = expr("foo") In [5]: when_cond = when(expression, lit(None)) In [6]: when_cond Out[6]: Column<'CASE WHEN foo THEN NULL END'> In [7]: str(when_cond) Out[7]: "Column<'CASE WHEN foo THEN NULL END'>" ``` ### How was this patch tested? added ut ### Was this patch authored or co-authored using generative AI tooling? no Closes #46583 from zhengruifeng/expr_repr. Authored-by: Ruifeng Zheng <ruife...@apache.org> Signed-off-by: Ruifeng Zheng <ruife...@apache.org> --- python/pyspark/sql/connect/expressions.py | 9 ++++++++- python/pyspark/sql/tests/test_column.py | 5 +++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/python/pyspark/sql/connect/expressions.py b/python/pyspark/sql/connect/expressions.py index b1735f65f520..92dde2f3670e 100644 --- a/python/pyspark/sql/connect/expressions.py +++ b/python/pyspark/sql/connect/expressions.py @@ -455,7 +455,10 @@ class LiteralExpression(Expression): return expr def __repr__(self) -> str: - return f"{self._value}" + if self._value is None: + return "NULL" + else: + return f"{self._value}" class ColumnReference(Expression): @@ -536,6 +539,7 @@ class SQLExpression(Expression): def __init__(self, expr: str) -> None: super().__init__() + assert isinstance(expr, str) self._expr: str = expr def to_plan(self, session: "SparkConnectClient") -> proto.Expression: @@ -547,6 +551,9 @@ class SQLExpression(Expression): def __eq__(self, other: Any) -> bool: return other is not None and isinstance(other, SQLExpression) and other._expr == self._expr + def __repr__(self) -> str: + return self._expr + class SortOrder(Expression): def __init__(self, child: Expression, ascending: bool = True, nullsFirst: bool = True) -> None: diff --git a/python/pyspark/sql/tests/test_column.py b/python/pyspark/sql/tests/test_column.py index 92921685d58e..6e5fcde57cab 100644 --- a/python/pyspark/sql/tests/test_column.py +++ b/python/pyspark/sql/tests/test_column.py @@ -255,6 +255,11 @@ class ColumnTestsMixin: def test_isinstance_dataframe(self): self.assertIsInstance(self.spark.range(1).id, Column) + def test_expr_str_representation(self): + expression = sf.expr("foo") + when_cond = sf.when(expression, sf.lit(None)) + self.assertEqual(str(when_cond), "Column<'CASE WHEN foo THEN NULL END'>") + class ColumnTests(ColumnTestsMixin, ReusedSQLTestCase): pass --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org