This is an automated email from the ASF dual-hosted git repository.

timsaucer pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-python.git


The following commit(s) were added to refs/heads/main by this push:
     new 694a5d8d feat: Add SQL expression support for `with_columns` (#1286)
694a5d8d is described below

commit 694a5d8d8d6a7e44b92cc45deedf8b162eb1366d
Author: Marko Milenković <[email protected]>
AuthorDate: Tue Oct 28 12:13:07 2025 +0000

    feat: Add SQL expression support for `with_columns` (#1286)
    
    * add SQL expression support for `with_columns`
    
    * fix ruff errors
    
    * Update python/datafusion/dataframe.py
    
    Co-authored-by: Hendrik Makait <[email protected]>
    
    * Update python/datafusion/dataframe.py
    
    Co-authored-by: Hendrik Makait <[email protected]>
    
    * remove parentheses
    
    * update example
    
    * fix ident
    
    ---------
    
    Co-authored-by: Hendrik Makait <[email protected]>
---
 python/datafusion/dataframe.py | 42 +++++++++++++++++++++++++++++++++++-------
 python/tests/test_dataframe.py | 38 +++++++++++++++++++++++++++++---------
 2 files changed, 64 insertions(+), 16 deletions(-)

diff --git a/python/datafusion/dataframe.py b/python/datafusion/dataframe.py
index 645598b5..eed30f57 100644
--- a/python/datafusion/dataframe.py
+++ b/python/datafusion/dataframe.py
@@ -545,13 +545,14 @@ class DataFrame:
         return DataFrame(self.df.with_column(name, ensure_expr(expr)))
 
     def with_columns(
-        self, *exprs: Expr | Iterable[Expr], **named_exprs: Expr
+        self, *exprs: Expr | str | Iterable[Expr | str], **named_exprs: Expr | 
str
     ) -> DataFrame:
         """Add columns to the DataFrame.
 
-        By passing expressions, iterables of expressions, or named expressions.
+        By passing expressions, iterables of expressions, string SQL 
expressions,
+        or named expressions.
         All expressions must be :class:`~datafusion.expr.Expr` objects created 
via
-        :func:`datafusion.col` or :func:`datafusion.lit`.
+        :func:`datafusion.col` or :func:`datafusion.lit`, or SQL expression 
strings.
         To pass named expressions use the form ``name=Expr``.
 
         Example usage: The following will add 4 columns labeled ``a``, ``b``, 
``c``,
@@ -564,17 +565,44 @@ class DataFrame:
                 d=lit(3)
             )
 
+            Equivalent example using just SQL strings:
+
+            df = df.with_columns(
+                "x as a",
+                ["1 as b", "y as c"],
+                d="3"
+            )
+
         Args:
-            exprs: Either a single expression or an iterable of expressions to 
add.
+            exprs: Either a single expression, an iterable of expressions to 
add or
+                   SQL expression strings.
             named_exprs: Named expressions in the form of ``name=expr``
 
         Returns:
             DataFrame with the new columns added.
         """
-        expressions = ensure_expr_list(exprs)
+        expressions = []
+        for expr in exprs:
+            if isinstance(expr, str):
+                expressions.append(self.parse_sql_expr(expr).expr)
+            elif isinstance(expr, Iterable) and not isinstance(
+                expr, (Expr, str, bytes, bytearray)
+            ):
+                expressions.extend(
+                    [
+                        self.parse_sql_expr(e).expr
+                        if isinstance(e, str)
+                        else ensure_expr(e)
+                        for e in expr
+                    ]
+                )
+            else:
+                expressions.append(ensure_expr(expr))
+
         for alias, expr in named_exprs.items():
-            ensure_expr(expr)
-            expressions.append(expr.alias(alias).expr)
+            e = self.parse_sql_expr(expr) if isinstance(expr, str) else expr
+            ensure_expr(e)
+            expressions.append(e.alias(alias).expr)
 
         return DataFrame(self.df.with_columns(expressions))
 
diff --git a/python/tests/test_dataframe.py b/python/tests/test_dataframe.py
index b2333382..c3a5253c 100644
--- a/python/tests/test_dataframe.py
+++ b/python/tests/test_dataframe.py
@@ -538,15 +538,35 @@ def test_with_columns(df):
     assert result.column(6) == pa.array([5, 7, 9])
 
 
-def test_with_columns_invalid_expr(df):
-    with pytest.raises(TypeError, match=re.escape(EXPR_TYPE_ERROR)):
-        df.with_columns("a")
-    with pytest.raises(TypeError, match=re.escape(EXPR_TYPE_ERROR)):
-        df.with_columns(c="a")
-    with pytest.raises(TypeError, match=re.escape(EXPR_TYPE_ERROR)):
-        df.with_columns(["a"])
-    with pytest.raises(TypeError, match=re.escape(EXPR_TYPE_ERROR)):
-        df.with_columns(c=["a"])
+def test_with_columns_str(df):
+    df = df.with_columns(
+        "a + b as c",
+        "a + b as d",
+        [
+            "a + b as e",
+            "a + b as f",
+        ],
+        g="a + b",
+    )
+
+    # execute and collect the first (and only) batch
+    result = df.collect()[0]
+
+    assert result.schema.field(0).name == "a"
+    assert result.schema.field(1).name == "b"
+    assert result.schema.field(2).name == "c"
+    assert result.schema.field(3).name == "d"
+    assert result.schema.field(4).name == "e"
+    assert result.schema.field(5).name == "f"
+    assert result.schema.field(6).name == "g"
+
+    assert result.column(0) == pa.array([1, 2, 3])
+    assert result.column(1) == pa.array([4, 5, 6])
+    assert result.column(2) == pa.array([5, 7, 9])
+    assert result.column(3) == pa.array([5, 7, 9])
+    assert result.column(4) == pa.array([5, 7, 9])
+    assert result.column(5) == pa.array([5, 7, 9])
+    assert result.column(6) == pa.array([5, 7, 9])
 
 
 def test_cast(df):


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to