ntjohnson1 commented on code in PR #1554:
URL:
https://github.com/apache/datafusion-python/pull/1554#discussion_r3293126416
##########
python/datafusion/context.py:
##########
@@ -962,6 +962,45 @@ def register_record_batches(
"""
self.ctx.register_record_batches(name, partitions)
+ def read_batch(self, batch: pa.RecordBatch) -> DataFrame:
Review Comment:
I would consider it more pythonic for read_batches to accept RecordBatch |
Iterable[RecordBatches]
##########
python/datafusion/functions.py:
##########
@@ -2756,10 +2766,26 @@ def get_field(expr: Expr, name: Expr | str) -> Expr:
... )
>>> result.collect_column("x_val")[0].as_py()
1
+
+ Multi-level lookup:
+
+ >>> df = df.with_column(
+ ... "outer",
+ ... dfn.functions.named_struct([("inner", dfn.col("s"))]),
Review Comment:
NIT: Not required here but the doctest namespace already imports functions a
F which would make this addition less verbose.
##########
python/datafusion/context.py:
##########
@@ -1744,11 +1747,15 @@ def __datafusion_logical_extension_codec__(self) -> Any:
"""Access the PyCapsule FFI_LogicalExtensionCodec."""
return self.ctx.__datafusion_logical_extension_codec__()
- def with_logical_extension_codec(self, codec: Any) -> SessionContext:
+ def with_logical_extension_codec(
+ self, codec: LogicalExtensionCodecExportable | _PyCapsule
+ ) -> SessionContext:
"""Create a new session context with specified codec.
This only supports codecs that have been implemented using the
- FFI interface.
+ FFI interface. ``codec`` must either be a raw
``FFI_LogicalExtensionCodec``
+ ``PyCapsule`` or an object exposing
+ ``__datafusion_logical_extension_codec__``.
Review Comment:
Do we need this addition? Isn't this redundant with the typing?
##########
python/datafusion/context.py:
##########
@@ -1759,11 +1766,15 @@ def __datafusion_physical_extension_codec__(self) ->
Any:
"""Access the PyCapsule FFI_PhysicalExtensionCodec."""
return self.ctx.__datafusion_physical_extension_codec__()
- def with_physical_extension_codec(self, codec: Any) -> SessionContext:
+ def with_physical_extension_codec(
+ self, codec: PhysicalExtensionCodecExportable | _PyCapsule
+ ) -> SessionContext:
"""Create a new session context with the specified physical codec.
This only supports codecs that have been implemented using the
- FFI interface.
+ FFI interface. ``codec`` must either be a raw
Review Comment:
Ditto on shadowing type hint
##########
python/datafusion/context.py:
##########
@@ -1310,6 +1310,65 @@ def deregister_udwf(self, name: str) -> None:
"""
self.ctx.deregister_udwf(name)
+ def udf(self, name: str) -> ScalarUDF:
+ """Look up a registered scalar UDF by name.
+
+ Args:
+ name: Name of the registered scalar UDF.
+
+ Raises:
+ Exception: If no scalar UDF is registered under ``name``.
Review Comment:
I don't recall if this is the convention across the code base but on quick
look I'd expect to just return ScalarUDF | None
##########
python/datafusion/context.py:
##########
@@ -1313,11 +1313,44 @@ def deregister_udwf(self, name: str) -> None:
def udf(self, name: str) -> ScalarUDF:
"""Look up a registered scalar UDF by name.
+ Returns the same :py:class:`~datafusion.user_defined.ScalarUDF`
Review Comment:
Shadows the return type.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]