This is an automated email from the ASF dual-hosted git repository.
timsaucer pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-python.git
The following commit(s) were added to refs/heads/main by this push:
new e42775c2 ci: update pre-commit hooks, fix linting, and refresh
dependencies (#1385)
e42775c2 is described below
commit e42775c2fcfe8929df0874414ba2bcd6bbea174c
Author: dario curreri <[email protected]>
AuthorDate: Thu Feb 26 15:13:38 2026 +0100
ci: update pre-commit hooks, fix linting, and refresh dependencies (#1385)
* ci: update pre-commit hooks and fix linting issues
* Update Ruff version in pre-commit configuration to v0.15.1.
* Add noqa comments to suppress specific linting warnings in various files.
* Update regex patterns in test cases for better matching.
* style: correct indentation in GitHub Actions workflow file
* Adjusted indentation for the enable-cache option in the test.yml workflow
file to ensure proper YAML formatting.
* refactor: reorder imports in indexed_field.rs for clarity
* Adjusted the order of imports in indexed_field.rs to improve readability
and maintain consistency with project conventions.
* build: update dependencies in Cargo.toml and Cargo.lock
* Bump versions of several dependencies including tokio, pyo3-log, prost,
uuid, and log to their latest releases.
* Update Cargo.lock to reflect the changes in dependency versions.
* style: format pyproject.toml for consistency
* Adjusted formatting in pyproject.toml for improved readability by
aligning lists and ensuring consistent indentation.
* Updated dependencies and configuration settings for better organization.
* style: remove noqa comments for import statements
* Cleaned up import statements in multiple files by removing unnecessary
noqa comments, enhancing code readability and maintaining consistency across
the codebase.
* style: simplify formatting in pyproject.toml
* Streamlined list formatting in pyproject.toml for improved readability by
removing unnecessary line breaks and ensuring consistent structure across
sections.
* No functional changes were made; the focus was solely on code style and
organization.
---
.github/workflows/test.yml | 2 +-
.pre-commit-config.yaml | 2 +-
Cargo.lock | 4 +-
Cargo.toml | 12 +++---
pyproject.toml | 81 ++++++++++++++++++++-------------------
python/datafusion/expr.py | 2 +
python/datafusion/user_defined.py | 4 +-
python/tests/test_catalog.py | 2 +-
python/tests/test_dataframe.py | 2 +-
python/tests/test_functions.py | 24 ++++++------
python/tests/test_sql.py | 2 +-
python/tests/test_udf.py | 7 ++--
python/tests/test_udwf.py | 4 +-
src/expr/indexed_field.rs | 5 ++-
14 files changed, 78 insertions(+), 75 deletions(-)
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 55248b6b..4cad8db2 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -67,7 +67,7 @@ jobs:
- name: Install dependencies
uses: astral-sh/setup-uv@v7
with:
- enable-cache: true
+ enable-cache: true
# Download the Linux wheel built in the build workflow
- name: Download pre-built Linux wheel
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index bcefa405..8ae6a4e3 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -22,7 +22,7 @@ repos:
- id: actionlint-docker
- repo: https://github.com/astral-sh/ruff-pre-commit
# Ruff version.
- rev: v0.9.10
+ rev: v0.15.1
hooks:
# Run the linter.
- id: ruff
diff --git a/Cargo.lock b/Cargo.lock
index e830a46b..f35f10bc 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -593,9 +593,9 @@ dependencies = [
[[package]]
name = "bumpalo"
-version = "3.20.0"
+version = "3.20.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c81d250916401487680ed13b8b675660281dcfc3ab0121fe44c94bcab9eae2fb"
+checksum = "5c6f81257d10a0f602a294ae4182251151ff97dbb504ef9afcdda4a64b24d9b4"
[[package]]
name = "byteorder"
diff --git a/Cargo.toml b/Cargo.toml
index afa167bb..313640ec 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -42,7 +42,7 @@ protoc = ["datafusion-substrait/protoc"]
substrait = ["dep:datafusion-substrait"]
[dependencies]
-tokio = { version = "1.47", features = [
+tokio = { version = "1.49", features = [
"macros",
"rt",
"rt-multi-thread",
@@ -54,16 +54,16 @@ pyo3 = { version = "0.26", features = [
"abi3-py310",
] }
pyo3-async-runtimes = { version = "0.26", features = ["tokio-runtime"] }
-pyo3-log = "0.13.2"
+pyo3-log = "0.13.3"
arrow = { version = "57", features = ["pyarrow"] }
arrow-select = { version = "57" }
datafusion = { version = "52", features = ["avro", "unicode_expressions"] }
datafusion-substrait = { version = "52", optional = true }
datafusion-proto = { version = "52" }
datafusion-ffi = { version = "52" }
-prost = "0.14.1" # keep in line with `datafusion-substrait`
+prost = "0.14.3" # keep in line with `datafusion-substrait`
serde_json = "1"
-uuid = { version = "1.18", features = ["v4"] }
+uuid = { version = "1.21", features = ["v4"] }
mimalloc = { version = "0.1", optional = true, default-features = false,
features = [
"local_dynamic_tls",
] }
@@ -77,11 +77,11 @@ object_store = { version = "0.12.4", features = [
"http",
] }
url = "2"
-log = "0.4.27"
+log = "0.4.29"
parking_lot = "0.12"
[build-dependencies]
-prost-types = "0.14.1" # keep in line with `datafusion-substrait`
+prost-types = "0.14.3" # keep in line with `datafusion-substrait`
pyo3-build-config = "0.26"
[lib]
diff --git a/pyproject.toml b/pyproject.toml
index 5a5128a2..08d64eca 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -25,7 +25,7 @@ description = "Build and run queries against data"
readme = "README.md"
license = { file = "LICENSE.txt" }
requires-python = ">=3.10"
-keywords = ["datafusion", "dataframe", "rust", "query-engine"]
+keywords = ["dataframe", "datafusion", "query-engine", "rust"]
classifiers = [
"Development Status :: 2 - Pre-Alpha",
"Intended Audience :: Developers",
@@ -62,7 +62,7 @@ profile = "black"
python-source = "python"
module-name = "datafusion._internal"
include = [{ path = "Cargo.lock", format = "sdist" }]
-exclude = [".github/**", "ci/**", ".asf.yaml"]
+exclude = [".asf.yaml", ".github/**", "ci/**"]
# Require Cargo.lock is up to date
locked = true
features = ["substrait"]
@@ -77,19 +77,19 @@ select = ["ALL"]
ignore = [
"A001", # Allow using words like min as variable names
"A002", # Allow using words like filter as variable names
+ "A005", # Allow module named io
"ANN401", # Allow Any for wrapper classes
"COM812", # Recommended to ignore these rules when using with ruff-format
- "FIX002", # Allow TODO lines - consider removing at some point
"FBT001", # Allow boolean positional args
"FBT002", # Allow boolean positional args
+ "FIX002", # Allow TODO lines - consider removing at some point
"ISC001", # Recommended to ignore these rules when using with ruff-format
+ "N812", # Allow importing functions as `F`
+ "PD901", # Allow variable name df
+ "PLR0913", # Allow many arguments in function definition
"SLF001", # Allow accessing private members
"TD002", # Do not require author names in TODO statements
"TD003", # Allow TODO lines
- "PLR0913", # Allow many arguments in function definition
- "PD901", # Allow variable name df
- "N812", # Allow importing functions as `F`
- "A005", # Allow module named io
]
[tool.ruff.lint.pydocstyle]
@@ -99,7 +99,7 @@ convention = "google"
max-doc-length = 88
[tool.ruff.lint.flake8-boolean-trap]
-extend-allowed-calls = ["lit", "datafusion.lit"]
+extend-allowed-calls = ["datafusion.lit", "lit"]
# Disable docstring checking for these directories
[tool.ruff.lint.per-file-ignores]
@@ -108,68 +108,69 @@ extend-allowed-calls = ["lit", "datafusion.lit"]
"ARG",
"BLE001",
"D",
- "S101",
- "SLF",
"PD",
+ "PLC0415",
+ "PLR0913",
"PLR2004",
+ "PT004",
"PT011",
"RUF015",
+ "S101",
"S608",
- "PLR0913",
- "PT004",
+ "SLF",
]
"examples/*" = [
- "D",
- "W505",
- "E501",
- "T201",
- "S101",
- "PLR2004",
"ANN001",
"ANN202",
- "INP001",
+ "D",
"DTZ007",
+ "E501",
+ "INP001",
+ "PLR2004",
"RUF015",
+ "S101",
+ "T201",
+ "W505",
]
"dev/*" = [
+ "ANN001",
+ "C",
"D",
"E",
- "T",
- "S",
+ "ERA001",
+ "EXE",
+ "N817",
"PLR",
- "C",
+ "S",
"SIM",
+ "T",
"UP",
- "EXE",
- "N817",
- "ERA001",
- "ANN001",
]
"benchmarks/*" = [
+ "ANN001",
+ "BLE",
"D",
+ "E",
+ "ERA001",
+ "EXE",
"F",
- "T",
- "BLE",
"FURB",
+ "INP001",
"PLR",
- "E",
- "TD",
- "TRY",
"S",
"SIM",
- "EXE",
+ "T",
+ "TD",
+ "TRY",
"UP",
- "ERA001",
- "ANN001",
- "INP001",
]
"docs/*" = ["D"]
-"docs/source/conf.py" = ["ERA001", "ANN001", "INP001"]
+"docs/source/conf.py" = ["ANN001", "ERA001", "INP001"]
[tool.codespell]
-skip = ["./target", "uv.lock", "./python/tests/test_functions.py"]
+skip = ["./python/tests/test_functions.py", "./target", "uv.lock"]
count = true
-ignore-words-list = ["ans", "IST"]
+ignore-words-list = ["IST", "ans"]
[dependency-groups]
dev = [
@@ -182,8 +183,8 @@ dev = [
"pre-commit>=4.3.0",
"pyarrow>=19.0.0",
"pygithub==2.5.0",
- "pytest>=7.4.4",
"pytest-asyncio>=0.23.3",
+ "pytest>=7.4.4",
"pyyaml>=6.0.3",
"ruff>=0.9.1",
"toml>=0.10.2",
@@ -196,6 +197,6 @@ docs = [
"pickleshare>=0.7.5",
"pydata-sphinx-theme==0.8.0",
"setuptools>=75.3.0",
- "sphinx>=7.1.2",
"sphinx-autoapi>=3.4.0",
+ "sphinx>=7.1.2",
]
diff --git a/python/datafusion/expr.py b/python/datafusion/expr.py
index 9df58f52..5760b894 100644
--- a/python/datafusion/expr.py
+++ b/python/datafusion/expr.py
@@ -20,6 +20,8 @@
See :ref:`Expressions` in the online documentation for more details.
"""
+# ruff: noqa: PLC0415
+
from __future__ import annotations
from collections.abc import Iterable, Sequence
diff --git a/python/datafusion/user_defined.py
b/python/datafusion/user_defined.py
index d4e5302b..eef23e74 100644
--- a/python/datafusion/user_defined.py
+++ b/python/datafusion/user_defined.py
@@ -583,11 +583,11 @@ class AggregateUDF:
AggregateUDF that is exported via the FFI bindings.
"""
if _is_pycapsule(func):
- aggregate = cast(AggregateUDF, object.__new__(AggregateUDF))
+ aggregate = cast("AggregateUDF", object.__new__(AggregateUDF))
aggregate._udaf = df_internal.AggregateUDF.from_pycapsule(func)
return aggregate
- capsule = cast(AggregateUDFExportable, func)
+ capsule = cast("AggregateUDFExportable", func)
name = str(capsule.__class__)
return AggregateUDF(
name=name,
diff --git a/python/tests/test_catalog.py b/python/tests/test_catalog.py
index 71c08da2..9310da50 100644
--- a/python/tests/test_catalog.py
+++ b/python/tests/test_catalog.py
@@ -248,7 +248,7 @@ def test_exception_not_mangled(ctx: SessionContext):
schema.register_table("test_table", create_dataset())
- with pytest.raises(ValueError, match="^test_table is not an acceptable
name$"):
+ with pytest.raises(ValueError, match=r"^test_table is not an acceptable
name$"):
ctx.sql(f"select * from {catalog_name}.{schema_name}.test_table")
diff --git a/python/tests/test_dataframe.py b/python/tests/test_dataframe.py
index 71abe292..de6b00ac 100644
--- a/python/tests/test_dataframe.py
+++ b/python/tests/test_dataframe.py
@@ -2790,7 +2790,7 @@ def test_write_parquet_with_options_encoding(tmp_path,
encoding, data_types, res
def test_write_parquet_with_options_unsupported_encoding(df, tmp_path,
encoding):
"""Test that unsupported Parquet encodings do not work."""
# BaseException is used since this throws a Rust panic:
https://github.com/PyO3/pyo3/issues/3519
- with pytest.raises(BaseException, match="Encoding .*? is not supported"):
+ with pytest.raises(BaseException, match=r"Encoding .*? is not supported"):
df.write_parquet_with_options(tmp_path,
ParquetWriterOptions(encoding=encoding))
diff --git a/python/tests/test_functions.py b/python/tests/test_functions.py
index 7b3332ed..5a61a2dd 100644
--- a/python/tests/test_functions.py
+++ b/python/tests/test_functions.py
@@ -303,19 +303,19 @@ def py_flatten(arr):
lambda data: [np.concatenate([arr, arr]) for arr in data],
),
(
- lambda col: f.array_dims(col),
+ f.array_dims,
lambda data: [[len(r)] for r in data],
),
(
- lambda col: f.array_distinct(col),
+ f.array_distinct,
lambda data: [list(set(r)) for r in data],
),
(
- lambda col: f.list_distinct(col),
+ f.list_distinct,
lambda data: [list(set(r)) for r in data],
),
(
- lambda col: f.list_dims(col),
+ f.list_dims,
lambda data: [[len(r)] for r in data],
),
(
@@ -323,11 +323,11 @@ def py_flatten(arr):
lambda data: [r[0] for r in data],
),
(
- lambda col: f.array_empty(col),
+ f.array_empty,
lambda data: [len(r) == 0 for r in data],
),
(
- lambda col: f.empty(col),
+ f.empty,
lambda data: [len(r) == 0 for r in data],
),
(
@@ -343,11 +343,11 @@ def py_flatten(arr):
lambda data: [r[0] for r in data],
),
(
- lambda col: f.array_length(col),
+ f.array_length,
lambda data: [len(r) for r in data],
),
(
- lambda col: f.list_length(col),
+ f.list_length,
lambda data: [len(r) for r in data],
),
(
@@ -391,11 +391,11 @@ def py_flatten(arr):
lambda data: [[i + 1 for i, _v in enumerate(r) if _v == 1.0] for r
in data],
),
(
- lambda col: f.array_ndims(col),
+ f.array_ndims,
lambda data: [np.array(r).ndim for r in data],
),
(
- lambda col: f.list_ndims(col),
+ f.list_ndims,
lambda data: [np.array(r).ndim for r in data],
),
(
@@ -415,11 +415,11 @@ def py_flatten(arr):
lambda data: [np.insert(arr, 0, 99.0) for arr in data],
),
(
- lambda col: f.array_pop_back(col),
+ f.array_pop_back,
lambda data: [arr[:-1] for arr in data],
),
(
- lambda col: f.array_pop_front(col),
+ f.array_pop_front,
lambda data: [arr[1:] for arr in data],
),
(
diff --git a/python/tests/test_sql.py b/python/tests/test_sql.py
index 12710cf0..92c31193 100644
--- a/python/tests/test_sql.py
+++ b/python/tests/test_sql.py
@@ -31,7 +31,7 @@ from . import generic as helpers
def test_no_table(ctx):
with pytest.raises(
ValueError,
- match="^Error during planning: table 'datafusion.public.b' not found$",
+ match=r"^Error during planning: table 'datafusion.public.b' not
found$",
):
ctx.sql("SELECT a FROM b").collect()
diff --git a/python/tests/test_udf.py b/python/tests/test_udf.py
index c0ba1d83..b2540fb5 100644
--- a/python/tests/test_udf.py
+++ b/python/tests/test_udf.py
@@ -15,7 +15,10 @@
# specific language governing permissions and limitations
# under the License.
+from uuid import UUID
+
import pyarrow as pa
+import pyarrow.compute as pc
import pytest
from datafusion import SessionContext, column, udf
from datafusion import functions as f
@@ -128,8 +131,6 @@ def test_udf_with_parameters_decorator(df) -> None:
def test_udf_with_metadata(ctx) -> None:
- from uuid import UUID
-
@udf([pa.string()], pa.uuid(), "stable")
def uuid_from_string(uuid_string):
return pa.array((UUID(s).bytes for s in uuid_string.to_pylist()),
pa.uuid())
@@ -151,8 +152,6 @@ def test_udf_with_metadata(ctx) -> None:
def test_udf_with_nullability(ctx: SessionContext) -> None:
- import pyarrow.compute as pc
-
field_nullable_i64 = pa.field("with_nulls", type=pa.int64(), nullable=True)
field_non_nullable_i64 = pa.field("no_nulls", type=pa.int64(),
nullable=False)
diff --git a/python/tests/test_udwf.py b/python/tests/test_udwf.py
index 5aaf0066..38b935b7 100644
--- a/python/tests/test_udwf.py
+++ b/python/tests/test_udwf.py
@@ -433,8 +433,8 @@ def test_udwf_functions(complex_window_df, name, expr,
expected):
[
udwf(SimpleWindowCount, pa.int64(), pa.int64(), "immutable"),
udwf(SimpleWindowCount, [pa.int64()], pa.int64(), "immutable"),
- udwf([pa.int64()], pa.int64(), "immutable")(lambda:
SimpleWindowCount()),
- udwf(pa.int64(), pa.int64(), "immutable")(lambda: SimpleWindowCount()),
+ udwf([pa.int64()], pa.int64(), "immutable")(SimpleWindowCount),
+ udwf(pa.int64(), pa.int64(), "immutable")(SimpleWindowCount),
],
)
def test_udwf_overloads(udwf_func, count_window_df):
diff --git a/src/expr/indexed_field.rs b/src/expr/indexed_field.rs
index 1dfa0ed2..79f52817 100644
--- a/src/expr/indexed_field.rs
+++ b/src/expr/indexed_field.rs
@@ -15,12 +15,13 @@
// specific language governing permissions and limitations
// under the License.
-use crate::expr::PyExpr;
+use std::fmt::{Display, Formatter};
+
use datafusion::logical_expr::expr::{GetFieldAccess, GetIndexedField};
use pyo3::prelude::*;
-use std::fmt::{Display, Formatter};
use super::literal::PyLiteral;
+use crate::expr::PyExpr;
#[pyclass(frozen, name = "GetIndexedField", module = "datafusion.expr",
subclass)]
#[derive(Clone)]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]