Re: [PR] Add missing conditional functions [datafusion-python]

via GitHub Mon, 30 Mar 2026 09:27:44 -0700


Copilot commented on code in PR #1464:
URL: 
https://github.com/apache/datafusion-python/pull/1464#discussion_r3010872075



##########
python/tests/test_functions.py:
##########
@@ -1435,3 +1435,165 @@ def test_coalesce(df):
     assert result.column(0) == pa.array(
         ["Hello", "fallback", "!"], type=pa.string_view()
     )
+
+
+def test_greatest(df):
+    ctx = SessionContext()
+    batch = pa.RecordBatch.from_arrays(
+        [
+            pa.array([1, 5, None]),
+            pa.array([3, 2, None]),
+            pa.array([2, 8, None]),
+        ],
+        names=["a", "b", "c"],
+    )
+    df_test = ctx.create_dataframe([[batch]])
+
+    # Test greatest with two columns
+    result = df_test.select(
+        f.greatest(column("a"), column("b")).alias("greatest_ab")
+    ).collect()[0]
+    assert result.column(0) == pa.array([3, 5, None], type=pa.int64())
+
+    # Test greatest with three columns
+    result = df_test.select(
+        f.greatest(column("a"), column("b"), column("c")).alias("greatest_abc")
+    ).collect()[0]
+    assert result.column(0) == pa.array([3, 8, None], type=pa.int64())
+
+    # Test greatest with nulls mixed in (partial nulls)
+    batch2 = pa.RecordBatch.from_arrays(
+        [
+            pa.array([None, 10]),
+            pa.array([5, None]),
+        ],
+        names=["x", "y"],
+    )
+    df_test2 = ctx.create_dataframe([[batch2]])
+    result = df_test2.select(f.greatest(column("x"), 
column("y")).alias("g")).collect()[
+        0
+    ]
+    assert result.column(0) == pa.array([5, 10], type=pa.int64())
+
+    # Test greatest with string columns
+    batch3 = pa.RecordBatch.from_arrays(
+        [
+            pa.array(["apple", "cherry"]),
+            pa.array(["banana", "apricot"]),
+        ],
+        names=["s1", "s2"],
+    )
+    df_test3 = ctx.create_dataframe([[batch3]])
+    result = df_test3.select(
+        f.greatest(column("s1"), column("s2")).alias("g")
+    ).collect()[0]
+    assert result.column(0).to_pylist() == ["banana", "cherry"]
+
+
+def test_least(df):
+    ctx = SessionContext()
+    batch = pa.RecordBatch.from_arrays(
+        [
+            pa.array([1, 5, None]),
+            pa.array([3, 2, None]),
+            pa.array([2, 8, None]),
+        ],
+        names=["a", "b", "c"],
+    )
+    df_test = ctx.create_dataframe([[batch]])
+
+    # Test least with two columns
+    result = df_test.select(
+        f.least(column("a"), column("b")).alias("least_ab")
+    ).collect()[0]
+    assert result.column(0) == pa.array([1, 2, None], type=pa.int64())
+
+    # Test least with three columns
+    result = df_test.select(
+        f.least(column("a"), column("b"), column("c")).alias("least_abc")
+    ).collect()[0]
+    assert result.column(0) == pa.array([1, 2, None], type=pa.int64())
+
+    # Test least with partial nulls
+    batch2 = pa.RecordBatch.from_arrays(
+        [
+            pa.array([None, 10]),
+            pa.array([5, None]),
+        ],
+        names=["x", "y"],
+    )
+    df_test2 = ctx.create_dataframe([[batch2]])
+    result = df_test2.select(f.least(column("x"), 
column("y")).alias("l")).collect()[0]
+    assert result.column(0) == pa.array([5, 10], type=pa.int64())
+
+    # Test least with string columns
+    batch3 = pa.RecordBatch.from_arrays(
+        [
+            pa.array(["apple", "cherry"]),
+            pa.array(["banana", "apricot"]),
+        ],
+        names=["s1", "s2"],
+    )
+    df_test3 = ctx.create_dataframe([[batch3]])
+    result = df_test3.select(f.least(column("s1"), 
column("s2")).alias("l")).collect()[
+        0
+    ]
+    assert result.column(0).to_pylist() == ["apple", "apricot"]
+
+
+def test_nvl2(df):
+    ctx = SessionContext()
+    batch = pa.RecordBatch.from_arrays(
+        [

Review Comment:
   `df` fixture is accepted but never used in this test. Because pytest will 
still construct the fixture, this adds unnecessary setup cost and can slow the 
suite. Either remove the `df` parameter or refactor the test to reuse the 
provided fixture/context instead of creating a new `SessionContext`.



##########
python/tests/test_functions.py:
##########
@@ -1435,3 +1435,165 @@ def test_coalesce(df):
     assert result.column(0) == pa.array(
         ["Hello", "fallback", "!"], type=pa.string_view()
     )
+
+
+def test_greatest(df):
+    ctx = SessionContext()
+    batch = pa.RecordBatch.from_arrays(
+        [
+            pa.array([1, 5, None]),
+            pa.array([3, 2, None]),
+            pa.array([2, 8, None]),
+        ],
+        names=["a", "b", "c"],
+    )
+    df_test = ctx.create_dataframe([[batch]])
+
+    # Test greatest with two columns
+    result = df_test.select(
+        f.greatest(column("a"), column("b")).alias("greatest_ab")
+    ).collect()[0]
+    assert result.column(0) == pa.array([3, 5, None], type=pa.int64())
+
+    # Test greatest with three columns
+    result = df_test.select(
+        f.greatest(column("a"), column("b"), column("c")).alias("greatest_abc")
+    ).collect()[0]
+    assert result.column(0) == pa.array([3, 8, None], type=pa.int64())
+
+    # Test greatest with nulls mixed in (partial nulls)
+    batch2 = pa.RecordBatch.from_arrays(
+        [
+            pa.array([None, 10]),
+            pa.array([5, None]),
+        ],
+        names=["x", "y"],
+    )
+    df_test2 = ctx.create_dataframe([[batch2]])
+    result = df_test2.select(f.greatest(column("x"), 
column("y")).alias("g")).collect()[
+        0
+    ]
+    assert result.column(0) == pa.array([5, 10], type=pa.int64())
+
+    # Test greatest with string columns
+    batch3 = pa.RecordBatch.from_arrays(
+        [
+            pa.array(["apple", "cherry"]),
+            pa.array(["banana", "apricot"]),
+        ],
+        names=["s1", "s2"],
+    )
+    df_test3 = ctx.create_dataframe([[batch3]])
+    result = df_test3.select(
+        f.greatest(column("s1"), column("s2")).alias("g")
+    ).collect()[0]
+    assert result.column(0).to_pylist() == ["banana", "cherry"]
+
+
+def test_least(df):
+    ctx = SessionContext()
+    batch = pa.RecordBatch.from_arrays(
+        [
+            pa.array([1, 5, None]),
+            pa.array([3, 2, None]),
+            pa.array([2, 8, None]),
+        ],
+        names=["a", "b", "c"],
+    )
+    df_test = ctx.create_dataframe([[batch]])
+
+    # Test least with two columns
+    result = df_test.select(
+        f.least(column("a"), column("b")).alias("least_ab")
+    ).collect()[0]
+    assert result.column(0) == pa.array([1, 2, None], type=pa.int64())
+
+    # Test least with three columns
+    result = df_test.select(
+        f.least(column("a"), column("b"), column("c")).alias("least_abc")
+    ).collect()[0]
+    assert result.column(0) == pa.array([1, 2, None], type=pa.int64())
+
+    # Test least with partial nulls
+    batch2 = pa.RecordBatch.from_arrays(
+        [
+            pa.array([None, 10]),
+            pa.array([5, None]),
+        ],
+        names=["x", "y"],
+    )
+    df_test2 = ctx.create_dataframe([[batch2]])
+    result = df_test2.select(f.least(column("x"), 
column("y")).alias("l")).collect()[0]
+    assert result.column(0) == pa.array([5, 10], type=pa.int64())
+
+    # Test least with string columns
+    batch3 = pa.RecordBatch.from_arrays(
+        [
+            pa.array(["apple", "cherry"]),
+            pa.array(["banana", "apricot"]),
+        ],
+        names=["s1", "s2"],
+    )
+    df_test3 = ctx.create_dataframe([[batch3]])
+    result = df_test3.select(f.least(column("s1"), 
column("s2")).alias("l")).collect()[
+        0
+    ]
+    assert result.column(0).to_pylist() == ["apple", "apricot"]
+
+
+def test_nvl2(df):
+    ctx = SessionContext()
+    batch = pa.RecordBatch.from_arrays(
+        [
+            pa.array([None, 1, None, 4]),
+            pa.array([10, 20, 30, 40]),
+            pa.array([100, 200, 300, 400]),
+        ],
+        names=["a", "b", "c"],
+    )
+    df_test = ctx.create_dataframe([[batch]])
+
+    # nvl2 returns b when a is not null, c when a is null
+    result = df_test.select(
+        f.nvl2(column("a"), column("b"), column("c")).alias("result")
+    ).collect()[0]
+    assert result.column(0) == pa.array([100, 20, 300, 40], type=pa.int64())
+
+    # Test with string columns
+    batch2 = pa.RecordBatch.from_arrays(
+        [
+            pa.array(["x", None]),
+            pa.array(["not_null", "not_null"]),
+            pa.array(["is_null", "is_null"]),
+        ],
+        names=["a", "b", "c"],
+    )
+    df_test2 = ctx.create_dataframe([[batch2]])
+    result = df_test2.select(
+        f.nvl2(column("a"), column("b"), column("c")).alias("result")
+    ).collect()[0]
+    assert result.column(0).to_pylist() == ["not_null", "is_null"]
+
+
+def test_ifnull(df):
+    ctx = SessionContext()
+    batch = pa.RecordBatch.from_arrays(

Review Comment:
   `df` fixture is accepted but never used in this test. Because pytest will 
still construct the fixture, this adds unnecessary setup cost and can slow the 
suite. Either remove the `df` parameter or refactor the test to reuse the 
provided fixture/context instead of creating a new `SessionContext`.



##########
python/tests/test_functions.py:
##########
@@ -1435,3 +1435,165 @@ def test_coalesce(df):
     assert result.column(0) == pa.array(
         ["Hello", "fallback", "!"], type=pa.string_view()
     )
+
+
+def test_greatest(df):
+    ctx = SessionContext()
+    batch = pa.RecordBatch.from_arrays(
+        [
+            pa.array([1, 5, None]),
+            pa.array([3, 2, None]),
+            pa.array([2, 8, None]),
+        ],
+        names=["a", "b", "c"],
+    )
+    df_test = ctx.create_dataframe([[batch]])
+
+    # Test greatest with two columns
+    result = df_test.select(
+        f.greatest(column("a"), column("b")).alias("greatest_ab")
+    ).collect()[0]
+    assert result.column(0) == pa.array([3, 5, None], type=pa.int64())
+
+    # Test greatest with three columns
+    result = df_test.select(
+        f.greatest(column("a"), column("b"), column("c")).alias("greatest_abc")
+    ).collect()[0]
+    assert result.column(0) == pa.array([3, 8, None], type=pa.int64())
+
+    # Test greatest with nulls mixed in (partial nulls)
+    batch2 = pa.RecordBatch.from_arrays(
+        [
+            pa.array([None, 10]),
+            pa.array([5, None]),
+        ],
+        names=["x", "y"],
+    )
+    df_test2 = ctx.create_dataframe([[batch2]])
+    result = df_test2.select(f.greatest(column("x"), 
column("y")).alias("g")).collect()[
+        0
+    ]
+    assert result.column(0) == pa.array([5, 10], type=pa.int64())
+
+    # Test greatest with string columns
+    batch3 = pa.RecordBatch.from_arrays(
+        [
+            pa.array(["apple", "cherry"]),
+            pa.array(["banana", "apricot"]),
+        ],
+        names=["s1", "s2"],
+    )
+    df_test3 = ctx.create_dataframe([[batch3]])
+    result = df_test3.select(
+        f.greatest(column("s1"), column("s2")).alias("g")
+    ).collect()[0]
+    assert result.column(0).to_pylist() == ["banana", "cherry"]
+
+
+def test_least(df):

Review Comment:
   `df` fixture is accepted but never used in this test. Because pytest will 
still construct the fixture, this adds unnecessary setup cost and can slow the 
suite. Either remove the `df` parameter or refactor the test to reuse the 
provided fixture/context instead of creating a new `SessionContext`.
   ```suggestion
   def test_least():
   ```



##########
python/tests/test_functions.py:
##########
@@ -1435,3 +1435,165 @@ def test_coalesce(df):
     assert result.column(0) == pa.array(
         ["Hello", "fallback", "!"], type=pa.string_view()
     )
+
+
+def test_greatest(df):

Review Comment:
   `df` fixture is accepted but never used in this test. Because pytest will 
still construct the fixture, this adds unnecessary setup cost and can slow the 
suite. Either remove the `df` parameter or refactor the test to reuse the 
provided fixture/context instead of creating a new `SessionContext`.
   ```suggestion
   def test_greatest():
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Re: [PR] Add missing conditional functions [datafusion-python]

Reply via email to