Copilot commented on code in PR #48622: URL: https://github.com/apache/arrow/pull/48622#discussion_r3409713771
########## python/pyarrow-stubs/pyarrow/_types.pyi: ########## @@ -0,0 +1,727 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import datetime as dt # noqa: F401 + +from collections.abc import Iterable, Iterator, Mapping, Sequence +from decimal import Decimal # noqa: F401 +from typing import Any, Generic, Literal, Protocol, TypeAlias + +import numpy as np +import pandas as pd + +from typing_extensions import Self, TypeVar, deprecated + +from pyarrow._stubs_typing import SupportsArrowSchema, TimeUnit +from pyarrow.lib import ( # noqa: F401 + Array, + Buffer, + ChunkedArray, + ExtensionArray, + ExtensionScalar, + MemoryPool, + MonthDayNano, + Table, +) + +class _Weakrefable: ... +class _Metadata(_Weakrefable): ... + +class DataType(_Weakrefable): + def field(self, i: int) -> Field[Any]: ... + @property + def id(self) -> int: ... + @property + def bit_width(self) -> int: ... + @property + def byte_width(self) -> int: ... + @property + def num_fields(self) -> int: ... + @property + def num_buffers(self) -> int: ... + @property + def has_variadic_buffers(self) -> bool: ... + + def __hash__(self) -> int: ... + def equals( + self, other: DataType | str, *, check_metadata: bool = False + ) -> bool: ... + def to_pandas_dtype(self) -> np.generic: ... + def _export_to_c(self, out_ptr: int) -> None: ... + @classmethod + def _import_from_c(cls, in_ptr: int) -> Self: ... + def __arrow_c_schema__(self) -> Any: ... + @classmethod + def _import_from_c_capsule(cls, schema: Any) -> Self: ... + +_AsPyType = TypeVar("_AsPyType") +_DataTypeT = TypeVar("_DataTypeT", bound=DataType) +_DataTypeT_co = TypeVar("_DataTypeT_co", bound=DataType, covariant=True) + +class _BasicDataType(DataType, Generic[_AsPyType]): ... +class NullType(_BasicDataType[None]): ... +class BoolType(_BasicDataType[bool]): ... +class UInt8Type(_BasicDataType[int]): ... +class Int8Type(_BasicDataType[int]): ... +class UInt16Type(_BasicDataType[int]): ... +class Int16Type(_BasicDataType[int]): ... +class UInt32Type(_BasicDataType[int]): ... +class Int32Type(_BasicDataType[int]): ... +class UInt64Type(_BasicDataType[int]): ... +class Int64Type(_BasicDataType[int]): ... +class Float16Type(_BasicDataType[float]): ... +class Float32Type(_BasicDataType[float]): ... +class Float64Type(_BasicDataType[float]): ... +class Date32Type(_BasicDataType[dt.date]): ... +class Date64Type(_BasicDataType[dt.date]): ... +class MonthDayNanoIntervalType(_BasicDataType[MonthDayNano]): ... +class StringType(_BasicDataType[str]): ... +class LargeStringType(_BasicDataType[str]): ... +class StringViewType(_BasicDataType[str]): ... +class BinaryType(_BasicDataType[bytes]): ... +class LargeBinaryType(_BasicDataType[bytes]): ... +class BinaryViewType(_BasicDataType[bytes]): ... + +_Unit = TypeVar("_Unit", bound=TimeUnit, default=Literal["us"]) +_Tz = TypeVar("_Tz", str, None, default=None) + +class TimestampType(_BasicDataType[int], Generic[_Unit, _Tz]): + @property + def unit(self) -> _Unit: ... + @property + def tz(self) -> _Tz: ... + +_Time32Unit = TypeVar("_Time32Unit", bound=Literal["s", "ms"]) + +class Time32Type(_BasicDataType[dt.time], Generic[_Time32Unit]): + @property + def unit(self) -> _Time32Unit: ... + +_Time64Unit = TypeVar("_Time64Unit", bound=Literal["us", "ns"]) + +class Time64Type(_BasicDataType[dt.time], Generic[_Time64Unit]): + @property + def unit(self) -> _Time64Unit: ... + +class DurationType(_BasicDataType[dt.timedelta], Generic[_Unit]): + @property + def unit(self) -> _Unit: ... + +_FixedSizeBinaryAsPyType = TypeVar("_FixedSizeBinaryAsPyType", default=bytes) + +class FixedSizeBinaryType(_BasicDataType[_FixedSizeBinaryAsPyType]): ... + +_Precision = TypeVar("_Precision", default=Any) +_Scale = TypeVar("_Scale", default=Any) +_Precision_co = TypeVar("_Precision_co", default=Any, covariant=True) +_Scale_co = TypeVar("_Scale_co", default=Any, covariant=True) + +class _HasPrecisionScale(Protocol[_Precision_co, _Scale_co]): + @property + def precision(self) -> _Precision_co: ... + @property + def scale(self) -> _Scale_co: ... + +class Decimal32Type( + FixedSizeBinaryType[Decimal], _HasPrecisionScale[_Precision_co, _Scale_co] +): ... + +class Decimal64Type( + FixedSizeBinaryType[Decimal], _HasPrecisionScale[_Precision_co, _Scale_co] +): ... + +class Decimal128Type( + FixedSizeBinaryType[Decimal], _HasPrecisionScale[_Precision_co, _Scale_co] +): ... + +class Decimal256Type( + FixedSizeBinaryType[Decimal], _HasPrecisionScale[_Precision_co, _Scale_co] +): ... + +class ListType(DataType, Generic[_DataTypeT_co]): + @property + def value_field(self) -> Field[_DataTypeT_co]: ... + @property + def value_type(self) -> _DataTypeT_co: ... + +class LargeListType(DataType, Generic[_DataTypeT_co]): + @property + def value_field(self) -> Field[_DataTypeT_co]: ... + @property + def value_type(self) -> _DataTypeT_co: ... + +class ListViewType(DataType, Generic[_DataTypeT_co]): + @property + def value_field(self) -> Field[_DataTypeT_co]: ... + @property + def value_type(self) -> _DataTypeT_co: ... + +class LargeListViewType(DataType, Generic[_DataTypeT_co]): + @property + def value_field(self) -> Field[_DataTypeT_co]: ... + @property + def value_type(self) -> _DataTypeT_co: ... + +class FixedSizeListType(DataType, Generic[_DataTypeT_co, _Size]): + @property + def value_field(self) -> Field[_DataTypeT_co]: ... + @property + def value_type(self) -> _DataTypeT_co: ... + @property + def list_size(self) -> int: ... + +class DictionaryMemo(_Weakrefable): ... + +_IndexT = TypeVar( + "_IndexT", + UInt8Type, + Int8Type, + UInt16Type, + Int16Type, + UInt32Type, + Int32Type, + UInt64Type, + Int64Type, +) +_BasicValueT = TypeVar( + "_BasicValueT", bound=_BasicDataType[Any], default=_BasicDataType[Any] +) +_ValueT = TypeVar("_ValueT", bound=DataType, default=DataType) +_K = TypeVar("_K", bound=DataType, default=DataType) +_Ordered = TypeVar("_Ordered", Literal[True], Literal[False], default=Literal[False]) + +class DictionaryType(DataType, Generic[_IndexT, _BasicValueT, _Ordered]): + @property + def ordered(self) -> _Ordered: ... + @property + def index_type(self) -> _IndexT: ... + @property + def value_type(self) -> _BasicValueT: ... + +class MapType(DataType, Generic[_K, _ValueT, _Ordered]): + @property + def key_field(self) -> Field[_K]: ... + @property + def key_type(self) -> _K: ... + @property + def item_field(self) -> Field[_ValueT]: ... + @property + def item_type(self) -> _ValueT: ... + @property + def keys_sorted(self) -> _Ordered: ... + +_Size = TypeVar("_Size", default=int) Review Comment: After defining `_Size` earlier (so it can be used by `FixedSizeListType`), this second `_Size` definition becomes a redefinition and may raise a type-checking error. Remove the duplicate definition. ########## python/pyarrow-stubs/pyarrow/_types.pyi: ########## @@ -0,0 +1,727 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import datetime as dt # noqa: F401 + +from collections.abc import Iterable, Iterator, Mapping, Sequence +from decimal import Decimal # noqa: F401 +from typing import Any, Generic, Literal, Protocol, TypeAlias + +import numpy as np +import pandas as pd + +from typing_extensions import Self, TypeVar, deprecated + +from pyarrow._stubs_typing import SupportsArrowSchema, TimeUnit +from pyarrow.lib import ( # noqa: F401 + Array, + Buffer, + ChunkedArray, + ExtensionArray, + ExtensionScalar, + MemoryPool, + MonthDayNano, + Table, +) + +class _Weakrefable: ... +class _Metadata(_Weakrefable): ... + +class DataType(_Weakrefable): + def field(self, i: int) -> Field[Any]: ... + @property + def id(self) -> int: ... + @property + def bit_width(self) -> int: ... + @property + def byte_width(self) -> int: ... + @property + def num_fields(self) -> int: ... + @property + def num_buffers(self) -> int: ... + @property + def has_variadic_buffers(self) -> bool: ... + + def __hash__(self) -> int: ... + def equals( + self, other: DataType | str, *, check_metadata: bool = False + ) -> bool: ... + def to_pandas_dtype(self) -> np.generic: ... + def _export_to_c(self, out_ptr: int) -> None: ... + @classmethod + def _import_from_c(cls, in_ptr: int) -> Self: ... + def __arrow_c_schema__(self) -> Any: ... + @classmethod + def _import_from_c_capsule(cls, schema: Any) -> Self: ... + +_AsPyType = TypeVar("_AsPyType") +_DataTypeT = TypeVar("_DataTypeT", bound=DataType) +_DataTypeT_co = TypeVar("_DataTypeT_co", bound=DataType, covariant=True) + +class _BasicDataType(DataType, Generic[_AsPyType]): ... +class NullType(_BasicDataType[None]): ... +class BoolType(_BasicDataType[bool]): ... +class UInt8Type(_BasicDataType[int]): ... +class Int8Type(_BasicDataType[int]): ... +class UInt16Type(_BasicDataType[int]): ... +class Int16Type(_BasicDataType[int]): ... +class UInt32Type(_BasicDataType[int]): ... +class Int32Type(_BasicDataType[int]): ... +class UInt64Type(_BasicDataType[int]): ... +class Int64Type(_BasicDataType[int]): ... +class Float16Type(_BasicDataType[float]): ... +class Float32Type(_BasicDataType[float]): ... +class Float64Type(_BasicDataType[float]): ... +class Date32Type(_BasicDataType[dt.date]): ... +class Date64Type(_BasicDataType[dt.date]): ... +class MonthDayNanoIntervalType(_BasicDataType[MonthDayNano]): ... +class StringType(_BasicDataType[str]): ... +class LargeStringType(_BasicDataType[str]): ... +class StringViewType(_BasicDataType[str]): ... +class BinaryType(_BasicDataType[bytes]): ... +class LargeBinaryType(_BasicDataType[bytes]): ... +class BinaryViewType(_BasicDataType[bytes]): ... + +_Unit = TypeVar("_Unit", bound=TimeUnit, default=Literal["us"]) +_Tz = TypeVar("_Tz", str, None, default=None) Review Comment: `TypeVar` constraints must be types; using literal `None` as a constraint (and as the default) is rejected by type checkers. Use `type(None)` (i.e., NoneType) instead so `_Tz` correctly represents `str | None`. ########## python/pyarrow-stubs/pyarrow/_types.pyi: ########## @@ -0,0 +1,727 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import datetime as dt # noqa: F401 + +from collections.abc import Iterable, Iterator, Mapping, Sequence +from decimal import Decimal # noqa: F401 +from typing import Any, Generic, Literal, Protocol, TypeAlias + +import numpy as np +import pandas as pd + +from typing_extensions import Self, TypeVar, deprecated + +from pyarrow._stubs_typing import SupportsArrowSchema, TimeUnit +from pyarrow.lib import ( # noqa: F401 + Array, + Buffer, + ChunkedArray, + ExtensionArray, + ExtensionScalar, + MemoryPool, + MonthDayNano, + Table, +) + +class _Weakrefable: ... +class _Metadata(_Weakrefable): ... + +class DataType(_Weakrefable): + def field(self, i: int) -> Field[Any]: ... + @property + def id(self) -> int: ... + @property + def bit_width(self) -> int: ... + @property + def byte_width(self) -> int: ... + @property + def num_fields(self) -> int: ... + @property + def num_buffers(self) -> int: ... + @property + def has_variadic_buffers(self) -> bool: ... + + def __hash__(self) -> int: ... + def equals( + self, other: DataType | str, *, check_metadata: bool = False + ) -> bool: ... + def to_pandas_dtype(self) -> np.generic: ... + def _export_to_c(self, out_ptr: int) -> None: ... + @classmethod + def _import_from_c(cls, in_ptr: int) -> Self: ... + def __arrow_c_schema__(self) -> Any: ... + @classmethod + def _import_from_c_capsule(cls, schema: Any) -> Self: ... + +_AsPyType = TypeVar("_AsPyType") +_DataTypeT = TypeVar("_DataTypeT", bound=DataType) +_DataTypeT_co = TypeVar("_DataTypeT_co", bound=DataType, covariant=True) + +class _BasicDataType(DataType, Generic[_AsPyType]): ... +class NullType(_BasicDataType[None]): ... +class BoolType(_BasicDataType[bool]): ... +class UInt8Type(_BasicDataType[int]): ... +class Int8Type(_BasicDataType[int]): ... +class UInt16Type(_BasicDataType[int]): ... +class Int16Type(_BasicDataType[int]): ... +class UInt32Type(_BasicDataType[int]): ... +class Int32Type(_BasicDataType[int]): ... +class UInt64Type(_BasicDataType[int]): ... +class Int64Type(_BasicDataType[int]): ... +class Float16Type(_BasicDataType[float]): ... +class Float32Type(_BasicDataType[float]): ... +class Float64Type(_BasicDataType[float]): ... +class Date32Type(_BasicDataType[dt.date]): ... +class Date64Type(_BasicDataType[dt.date]): ... +class MonthDayNanoIntervalType(_BasicDataType[MonthDayNano]): ... +class StringType(_BasicDataType[str]): ... +class LargeStringType(_BasicDataType[str]): ... +class StringViewType(_BasicDataType[str]): ... +class BinaryType(_BasicDataType[bytes]): ... +class LargeBinaryType(_BasicDataType[bytes]): ... +class BinaryViewType(_BasicDataType[bytes]): ... + +_Unit = TypeVar("_Unit", bound=TimeUnit, default=Literal["us"]) +_Tz = TypeVar("_Tz", str, None, default=None) + +class TimestampType(_BasicDataType[int], Generic[_Unit, _Tz]): + @property + def unit(self) -> _Unit: ... + @property + def tz(self) -> _Tz: ... + +_Time32Unit = TypeVar("_Time32Unit", bound=Literal["s", "ms"]) + +class Time32Type(_BasicDataType[dt.time], Generic[_Time32Unit]): + @property + def unit(self) -> _Time32Unit: ... + +_Time64Unit = TypeVar("_Time64Unit", bound=Literal["us", "ns"]) + +class Time64Type(_BasicDataType[dt.time], Generic[_Time64Unit]): + @property + def unit(self) -> _Time64Unit: ... + +class DurationType(_BasicDataType[dt.timedelta], Generic[_Unit]): + @property + def unit(self) -> _Unit: ... + +_FixedSizeBinaryAsPyType = TypeVar("_FixedSizeBinaryAsPyType", default=bytes) + +class FixedSizeBinaryType(_BasicDataType[_FixedSizeBinaryAsPyType]): ... + +_Precision = TypeVar("_Precision", default=Any) +_Scale = TypeVar("_Scale", default=Any) +_Precision_co = TypeVar("_Precision_co", default=Any, covariant=True) +_Scale_co = TypeVar("_Scale_co", default=Any, covariant=True) + +class _HasPrecisionScale(Protocol[_Precision_co, _Scale_co]): + @property + def precision(self) -> _Precision_co: ... + @property + def scale(self) -> _Scale_co: ... + +class Decimal32Type( + FixedSizeBinaryType[Decimal], _HasPrecisionScale[_Precision_co, _Scale_co] +): ... + +class Decimal64Type( + FixedSizeBinaryType[Decimal], _HasPrecisionScale[_Precision_co, _Scale_co] +): ... + +class Decimal128Type( + FixedSizeBinaryType[Decimal], _HasPrecisionScale[_Precision_co, _Scale_co] +): ... + +class Decimal256Type( + FixedSizeBinaryType[Decimal], _HasPrecisionScale[_Precision_co, _Scale_co] +): ... + +class ListType(DataType, Generic[_DataTypeT_co]): + @property + def value_field(self) -> Field[_DataTypeT_co]: ... + @property + def value_type(self) -> _DataTypeT_co: ... + +class LargeListType(DataType, Generic[_DataTypeT_co]): + @property + def value_field(self) -> Field[_DataTypeT_co]: ... + @property + def value_type(self) -> _DataTypeT_co: ... + +class ListViewType(DataType, Generic[_DataTypeT_co]): + @property + def value_field(self) -> Field[_DataTypeT_co]: ... + @property + def value_type(self) -> _DataTypeT_co: ... + +class LargeListViewType(DataType, Generic[_DataTypeT_co]): + @property + def value_field(self) -> Field[_DataTypeT_co]: ... + @property + def value_type(self) -> _DataTypeT_co: ... + +class FixedSizeListType(DataType, Generic[_DataTypeT_co, _Size]): + @property Review Comment: `FixedSizeListType` is parameterized by `_Size`, but `_Size` is defined later in the file. Type checkers (mypy/pyright) will treat `_Size` as undefined at this point and fail to analyze the stub. Define `_Size` before it is first used. ########## python/pyarrow-stubs/pyarrow/_types.pyi: ########## @@ -0,0 +1,727 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import datetime as dt # noqa: F401 + +from collections.abc import Iterable, Iterator, Mapping, Sequence +from decimal import Decimal # noqa: F401 +from typing import Any, Generic, Literal, Protocol, TypeAlias + +import numpy as np +import pandas as pd Review Comment: `pandas` is an optional runtime dependency for PyArrow; importing it unconditionally in a top-level stub makes type checking fail for users who have `pyarrow` installed without `pandas`. Mark the import as optional so missing `pandas` doesn't break analysis of unrelated parts of the stubs. ########## python/pyarrow-stubs/pyarrow/_stubs_typing.pyi: ########## @@ -0,0 +1,155 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import datetime as dt + +from collections.abc import Collection, Container, Iterator, Sequence, Sized +from decimal import Decimal +from typing import Any, Literal, Protocol, TypeAlias, TypeVar + +import numpy as np + +from numpy.typing import NDArray + +from pyarrow import lib +from pyarrow.lib import ChunkedArray + +ArrayLike: TypeAlias = Any +ScalarLike: TypeAlias = Any +Order: TypeAlias = Literal["ascending", "descending"] +JoinType: TypeAlias = Literal[ + "left semi", + "right semi", + "left anti", + "right anti", + "inner", + "left outer", + "right outer", + "full outer", +] +Compression: TypeAlias = Literal[ + "gzip", "bz2", "brotli", "lz4", "lz4_frame", "lz4_raw", "zstd", "snappy" +] +NullEncoding: TypeAlias = Literal["mask", "encode"] +NullSelectionBehavior: TypeAlias = Literal["drop", "emit_null"] +TimeUnit: TypeAlias = Literal["s", "ms", "us", "ns"] + +IntegerType: TypeAlias = ( + lib.Int8Type + | lib.Int16Type + | lib.Int32Type + | lib.Int64Type + | lib.UInt8Type + | lib.UInt16Type + | lib.UInt32Type + | lib.UInt64Type +) +PyScalar: TypeAlias = ( + bool + | int + | float + | Decimal + | str + | bytes + | dt.date + | dt.datetime + | dt.time + | dt.timedelta +) +NumpyScalar: TypeAlias = "np.generic[Any]" Review Comment: This type alias is currently a string literal (`"np.generic[Any]"`), which type checkers won’t treat as a proper type in a `TypeAlias` assignment. That in turn makes `NumpyScalarT_co = TypeVar(..., bound=NumpyScalar)` invalid/unhelpful. Use `np.generic` (or `np.generic[Any]` if supported by the checker) directly. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
