This is an automated email from the ASF dual-hosted git repository.

timsaucer pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-python.git


The following commit(s) were added to refs/heads/main by this push:
     new 3a4ae6d8 Do not convert pyarrow scalar values to plain python types 
when passing as `lit` (#1319)
3a4ae6d8 is described below

commit 3a4ae6d8ed43fa5a82725a37961b626ff884fd96
Author: Tim Saucer <[email protected]>
AuthorDate: Mon Jan 5 08:25:45 2026 -0500

    Do not convert pyarrow scalar values to plain python types when passing as 
`lit` (#1319)
    
    * Add unit tests for pyarrow scalar round trips
    
    * Do not convert to bare python object for lit conversion from pyarrow 
scalar
---
 python/tests/test_expr.py | 108 +++++++++++++++++++++++++++++++++++++++++++++-
 src/pyarrow_util.rs       |   3 +-
 2 files changed, 108 insertions(+), 3 deletions(-)

diff --git a/python/tests/test_expr.py b/python/tests/test_expr.py
index 28c71549..1f821a3d 100644
--- a/python/tests/test_expr.py
+++ b/python/tests/test_expr.py
@@ -17,7 +17,8 @@
 
 import re
 from concurrent.futures import ThreadPoolExecutor
-from datetime import datetime, timezone
+from datetime import date, datetime, time, timezone
+from decimal import Decimal
 
 import pyarrow as pa
 import pytest
@@ -999,3 +1000,108 @@ def test_ensure_expr_list_bytes():
 def test_ensure_expr_list_bytearray():
     with pytest.raises(TypeError, match=re.escape(EXPR_TYPE_ERROR)):
         ensure_expr_list(bytearray(b"a"))
+
+
[email protected](
+    "value",
+    [
+        # Boolean
+        pa.scalar(True, type=pa.bool_()),  # noqa: FBT003
+        pa.scalar(False, type=pa.bool_()),  # noqa: FBT003
+        # Integers - signed
+        pa.scalar(127, type=pa.int8()),
+        pa.scalar(-128, type=pa.int8()),
+        pa.scalar(32767, type=pa.int16()),
+        pa.scalar(-32768, type=pa.int16()),
+        pa.scalar(2147483647, type=pa.int32()),
+        pa.scalar(-2147483648, type=pa.int32()),
+        pa.scalar(9223372036854775807, type=pa.int64()),
+        pa.scalar(-9223372036854775808, type=pa.int64()),
+        # Integers - unsigned
+        pa.scalar(255, type=pa.uint8()),
+        pa.scalar(65535, type=pa.uint16()),
+        pa.scalar(4294967295, type=pa.uint32()),
+        pa.scalar(18446744073709551615, type=pa.uint64()),
+        # Floating point
+        pa.scalar(3.14, type=pa.float32()),
+        pa.scalar(3.141592653589793, type=pa.float64()),
+        pa.scalar(float("inf"), type=pa.float64()),
+        pa.scalar(float("-inf"), type=pa.float64()),
+        # Decimal
+        pa.scalar(Decimal("123.45"), type=pa.decimal128(10, 2)),
+        pa.scalar(Decimal("-999999.999"), type=pa.decimal128(12, 3)),
+        pa.scalar(Decimal("0.00001"), type=pa.decimal128(10, 5)),
+        pa.scalar(Decimal("123.45"), type=pa.decimal256(20, 2)),
+        # Strings
+        pa.scalar("hello world", type=pa.string()),
+        pa.scalar("", type=pa.string()),
+        pa.scalar("unicode: 日本語 🎉", type=pa.string()),
+        pa.scalar("hello", type=pa.large_string()),
+        # Binary
+        pa.scalar(b"binary data", type=pa.binary()),
+        pa.scalar(b"", type=pa.binary()),
+        pa.scalar(b"\x00\x01\x02\xff", type=pa.binary()),
+        pa.scalar(b"large binary", type=pa.large_binary()),
+        pa.scalar(b"fixed!", type=pa.binary(6)),  # fixed size binary
+        # Date
+        pa.scalar(date(2023, 8, 18), type=pa.date32()),
+        pa.scalar(date(1970, 1, 1), type=pa.date32()),
+        pa.scalar(date(2023, 8, 18), type=pa.date64()),
+        # Time
+        pa.scalar(time(12, 30, 45), type=pa.time32("s")),
+        pa.scalar(time(12, 30, 45, 123000), type=pa.time32("ms")),
+        pa.scalar(time(12, 30, 45, 123456), type=pa.time64("us")),
+        pa.scalar(
+            12 * 3600 * 10**9 + 30 * 60 * 10**9, type=pa.time64("ns")
+        ),  # raw nanos
+        # Timestamp - various resolutions
+        pa.scalar(1692335046, type=pa.timestamp("s")),
+        pa.scalar(1692335046618, type=pa.timestamp("ms")),
+        pa.scalar(1692335046618897, type=pa.timestamp("us")),
+        pa.scalar(1692335046618897499, type=pa.timestamp("ns")),
+        # Timestamp with timezone
+        pa.scalar(1692335046, type=pa.timestamp("s", tz="UTC")),
+        pa.scalar(1692335046618897, type=pa.timestamp("us", 
tz="America/New_York")),
+        pa.scalar(1692335046618897499, type=pa.timestamp("ns", 
tz="Europe/London")),
+        # Duration
+        pa.scalar(3600, type=pa.duration("s")),
+        pa.scalar(3600000, type=pa.duration("ms")),
+        pa.scalar(3600000000, type=pa.duration("us")),
+        pa.scalar(3600000000000, type=pa.duration("ns")),
+        # Interval
+        pa.scalar((1, 15, 3600000000000), type=pa.month_day_nano_interval()),
+        pa.scalar((0, 0, 0), type=pa.month_day_nano_interval()),
+        pa.scalar((12, 30, 0), type=pa.month_day_nano_interval()),
+        # Null
+        pa.scalar(None, type=pa.null()),
+        pa.scalar(None, type=pa.int64()),
+        pa.scalar(None, type=pa.string()),
+        # List types
+        pa.scalar([1, 2, 3], type=pa.list_(pa.int64())),
+        pa.scalar([], type=pa.list_(pa.int64())),
+        pa.scalar(["a", "b", "c"], type=pa.list_(pa.string())),
+        pa.scalar([[1, 2], [3, 4]], type=pa.list_(pa.list_(pa.int64()))),
+        pa.scalar([1, 2, 3], type=pa.large_list(pa.int64())),
+        # Fixed size list
+        pa.scalar([1, 2, 3], type=pa.list_(pa.int64(), 3)),
+        # Struct
+        pa.scalar(
+            {"x": 1, "y": 2}, type=pa.struct([("x", pa.int64()), ("y", 
pa.int64())])
+        ),
+        pa.scalar(
+            {"name": "Alice", "age": 30},
+            type=pa.struct([("name", pa.string()), ("age", pa.int32())]),
+        ),
+        pa.scalar(
+            {"nested": {"a": 1}},
+            type=pa.struct([("nested", pa.struct([("a", pa.int64())]))]),
+        ),
+        # Map
+        pa.scalar([("key1", 1), ("key2", 2)], type=pa.map_(pa.string(), 
pa.int64())),
+    ],
+    ids=lambda v: f"{v.type}",
+)
+def test_round_trip_pyscalar_value(ctx: SessionContext, value: pa.Scalar):
+    df = ctx.sql("select 1 as a")
+    df = df.select(lit(value))
+    assert pa.table(df)[0][0] == value
diff --git a/src/pyarrow_util.rs b/src/pyarrow_util.rs
index cd9e396a..264cfd34 100644
--- a/src/pyarrow_util.rs
+++ b/src/pyarrow_util.rs
@@ -30,11 +30,10 @@ impl FromPyArrow for PyScalarValue {
     fn from_pyarrow_bound(value: &Bound<'_, PyAny>) -> PyResult<Self> {
         let py = value.py();
         let typ = value.getattr("type")?;
-        let val = value.call_method0("as_py")?;
 
         // construct pyarrow array from the python value and pyarrow type
         let factory = py.import("pyarrow")?.getattr("array")?;
-        let args = PyList::new(py, [val])?;
+        let args = PyList::new(py, [value])?;
         let array = factory.call1((args, typ))?;
 
         // convert the pyarrow array to rust array using C data interface


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to