kevinjqliu commented on code in PR #1997: URL: https://github.com/apache/iceberg-rust/pull/1997#discussion_r2854365015
########## bindings/python/DEPENDENCIES.rust.tsv: ########## Review Comment: i think we usually bump these during release. Previous upgrade PR didnt include this https://github.com/apache/iceberg-rust/pull/1899/ ########## Cargo.toml: ########## @@ -42,14 +42,14 @@ rust-version = "1.88" anyhow = "1.0.72" apache-avro = { version = "0.21", features = ["zstandard"] } array-init = "2" -arrow-arith = "57.0" -arrow-array = "57.0" -arrow-buffer = "57.0" -arrow-cast = "57.0" -arrow-ord = "57.0" -arrow-schema = "57.0" -arrow-select = "57.0" -arrow-string = "57.0" +arrow-arith = "57.1" Review Comment: 👍 datafusion 51.1.0 uses arrow-* 57.1 https://crates.io/crates/datafusion/52.1.0/dependencies ########## bindings/python/pyproject.toml: ########## @@ -25,17 +25,13 @@ classifiers = [ "Intended Audience :: Developers", "License :: OSI Approved :: Apache Software License", "Operating System :: OS Independent", - "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3 :: Only", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", - "Programming Language :: Python :: 3.13", - "Programming Language :: Rust", Review Comment: i think we should revert these changes ########## bindings/python/pyproject.toml: ########## @@ -55,7 +51,7 @@ ignore = ["F403", "F405"] dev = [ "maturin>=1.0,<2.0", "pytest>=8.3.2", - "datafusion==50.*", - "pyiceberg[sql-sqlite,pyarrow]", + "datafusion==52.*", + "pyiceberg[sql-sqlite,pyarrow]==0.9.1", Review Comment: tested this locally, we dont need the pin. update the Cargo.lock instead ########## bindings/python/tests/test_datafusion_table_provider.py: ########## @@ -142,12 +145,12 @@ def test_register_pyiceberg_table( iceberg_table.append(arrow_table_with_null) # monkey patch the __datafusion_table_provider__ method to the iceberg table - def __datafusion_table_provider__(self): + def __datafusion_table_provider__(self, session): Review Comment: 👍 `session` is available here https://github.com/apache/datafusion-python/blob/22086650c8df8b7bc382130c9560f76762dbe6c0/python/datafusion/context.py#L96-L102 ########## bindings/python/tests/test_datafusion_table_provider.py: ########## @@ -25,10 +25,13 @@ import pyarrow as pa from pathlib import Path import datafusion +from packaging.version import Version -assert ( - datafusion.__version__ >= "45" -) # iceberg table provider only works for datafusion >= 45 +if Version(datafusion.__version__) < Version("52.0.0"): Review Comment: +1 should take from `datafusion-ffi` ########## bindings/python/src/datafusion_table_provider.rs: ########## @@ -19,17 +19,51 @@ use std::collections::HashMap; use std::ffi::CString; use std::sync::Arc; +use datafusion_ffi::proto::logical_extension_codec::FFI_LogicalExtensionCodec; use datafusion_ffi::table_provider::FFI_TableProvider; use iceberg::TableIdent; use iceberg::io::FileIO; use iceberg::table::StaticTable; use iceberg_datafusion::table::IcebergStaticTableProvider; -use pyo3::exceptions::PyRuntimeError; -use pyo3::prelude::*; -use pyo3::types::PyCapsule; +use pyo3::exceptions::{PyRuntimeError, PyValueError}; +use pyo3::prelude::{PyAnyMethods, PyCapsuleMethods, *}; +use pyo3::types::{PyAny, PyCapsule}; use crate::runtime::runtime; +fn validate_pycapsule(capsule: &Bound<PyCapsule>, name: &str) -> PyResult<()> { Review Comment: 👍 these are from the 52.0.0 upgrade guide https://github.com/apache/datafusion-python/blob/main/docs/source/user-guide/upgrade-guides.rst#datafusion-5200 and matches the example https://github.com/apache/datafusion-python/blob/22086650c8df8b7bc382130c9560f76762dbe6c0/examples/datafusion-ffi-example/src/utils.rs ########## bindings/python/src/datafusion_table_provider.rs: ########## @@ -19,17 +19,51 @@ use std::collections::HashMap; use std::ffi::CString; use std::sync::Arc; +use datafusion_ffi::proto::logical_extension_codec::FFI_LogicalExtensionCodec; use datafusion_ffi::table_provider::FFI_TableProvider; use iceberg::TableIdent; use iceberg::io::FileIO; use iceberg::table::StaticTable; use iceberg_datafusion::table::IcebergStaticTableProvider; -use pyo3::exceptions::PyRuntimeError; -use pyo3::prelude::*; -use pyo3::types::PyCapsule; +use pyo3::exceptions::{PyRuntimeError, PyValueError}; +use pyo3::prelude::{PyAnyMethods, PyCapsuleMethods, *}; +use pyo3::types::{PyAny, PyCapsule}; use crate::runtime::runtime; +fn validate_pycapsule(capsule: &Bound<PyCapsule>, name: &str) -> PyResult<()> { Review Comment: nit: should we make both `pub(crate) `? ########## bindings/python/pyproject.toml: ########## @@ -55,7 +51,7 @@ ignore = ["F403", "F405"] dev = [ "maturin>=1.0,<2.0", "pytest>=8.3.2", - "datafusion==50.*", - "pyiceberg[sql-sqlite,pyarrow]", + "datafusion==52.*", + "pyiceberg[sql-sqlite,pyarrow]==0.9.1", Review Comment: just checking, is this pin needed? pyiceberg has 0.10 and 0.11 already -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
