From 0bd844055f15303df1b2cbcab98150e1af138541 Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Tue, 10 Mar 2026 01:32:33 +0100 Subject: [PATCH 01/21] check stubs are included at wheel build time diff --git c/python/CMakeLists.txt i/python/CMakeLists.txt index 6395b3e1e7..f71a495e22 100644 --- c/python/CMakeLists.txt +++ i/python/CMakeLists.txt @@ -1042,9 +1042,9 @@ if(EXISTS "${PYARROW_STUBS_SOURCE_DIR}") install(CODE " execute_process( COMMAND \"${Python3_EXECUTABLE}\" - \"${CMAKE_CURRENT_SOURCE_DIR}/scripts/update_stub_docstrings.py\" + \"${CMAKE_SOURCE_DIR}/scripts/update_stub_docstrings.py\" \"${CMAKE_INSTALL_PREFIX}\" - \"${CMAKE_CURRENT_SOURCE_DIR}\" + \"${CMAKE_SOURCE_DIR}\" RESULT_VARIABLE _pyarrow_stub_docstrings_result ) if(NOT _pyarrow_stub_docstrings_result EQUAL 0) --- python/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index d0ddb9009f89..d4f440789f16 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -1054,9 +1054,9 @@ if(EXISTS "${PYARROW_STUBS_SOURCE_DIR}") install(CODE " execute_process( COMMAND \"${Python3_EXECUTABLE}\" - \"${CMAKE_CURRENT_SOURCE_DIR}/scripts/update_stub_docstrings.py\" + \"${CMAKE_SOURCE_DIR}/scripts/update_stub_docstrings.py\" \"${CMAKE_INSTALL_PREFIX}\" - \"${CMAKE_CURRENT_SOURCE_DIR}\" + \"${CMAKE_SOURCE_DIR}\" RESULT_VARIABLE _pyarrow_stub_docstrings_result ) if(NOT _pyarrow_stub_docstrings_result EQUAL 0) From b865f343c9c68ca529623013c8904aa688c77f42 Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Mon, 22 Dec 2025 00:44:32 +0100 Subject: [PATCH 02/21] Add internal types and helpers --- .../pyarrow-stubs/pyarrow/_stubs_typing.pyi | 133 +++ python/pyarrow-stubs/pyarrow/_types.pyi | 966 ++++++++++++++++++ python/pyarrow-stubs/pyarrow/error.pyi | 104 ++ python/pyarrow-stubs/pyarrow/io.pyi | 22 + python/pyarrow-stubs/pyarrow/lib.pyi | 25 + python/pyarrow-stubs/pyarrow/scalar.pyi | 22 + python/pyarrow/fs.py | 2 +- 7 files changed, 1273 insertions(+), 1 deletion(-) create mode 100644 python/pyarrow-stubs/pyarrow/_stubs_typing.pyi create mode 100644 python/pyarrow-stubs/pyarrow/_types.pyi create mode 100644 python/pyarrow-stubs/pyarrow/error.pyi create mode 100644 python/pyarrow-stubs/pyarrow/io.pyi create mode 100644 python/pyarrow-stubs/pyarrow/lib.pyi create mode 100644 python/pyarrow-stubs/pyarrow/scalar.pyi diff --git a/python/pyarrow-stubs/pyarrow/_stubs_typing.pyi b/python/pyarrow-stubs/pyarrow/_stubs_typing.pyi new file mode 100644 index 000000000000..0715012fddc3 --- /dev/null +++ b/python/pyarrow-stubs/pyarrow/_stubs_typing.pyi @@ -0,0 +1,133 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import datetime as dt + +from collections.abc import Collection, Iterator, Sequence +from decimal import Decimal +from typing import Any, Literal, Protocol, TypeAlias, TypeVar + +import numpy as np + +from numpy.typing import NDArray + +from pyarrow.lib import BooleanArray, IntegerArray, ChunkedArray + +ArrayLike: TypeAlias = Any +ScalarLike: TypeAlias = Any +Order: TypeAlias = Literal["ascending", "descending"] +JoinType: TypeAlias = Literal[ + "left semi", + "right semi", + "left anti", + "right anti", + "inner", + "left outer", + "right outer", + "full outer", +] +Compression: TypeAlias = Literal[ + "gzip", "bz2", "brotli", "lz4", "lz4_frame", "lz4_raw", "zstd", "snappy" +] +NullEncoding: TypeAlias = Literal["mask", "encode"] +NullSelectionBehavior: TypeAlias = Literal["drop", "emit_null"] +TimeUnit: TypeAlias = Literal["s", "ms", "us", "ns"] +Mask: TypeAlias = ( + Sequence[bool | None] + | NDArray[np.bool_] + | BooleanArray + | ChunkedArray[Any] +) +Indices: TypeAlias = ( + Sequence[int | None] + | NDArray[np.integer[Any]] + | IntegerArray + | ChunkedArray[Any] +) + +PyScalar: TypeAlias = (bool | int | float | Decimal | str | bytes | + dt.date | dt.datetime | dt.time | dt.timedelta) + +_T = TypeVar("_T") +_V = TypeVar("_V", covariant=True) + +SingleOrList: TypeAlias = list[_T] | _T + + +class SupportEq(Protocol): + def __eq__(self, other) -> bool: ... + + +class SupportLt(Protocol): + def __lt__(self, other) -> bool: ... + + +class SupportGt(Protocol): + def __gt__(self, other) -> bool: ... + + +class SupportLe(Protocol): + def __le__(self, other) -> bool: ... + + +class SupportGe(Protocol): + def __ge__(self, other) -> bool: ... + + +FilterTuple: TypeAlias = ( + tuple[str, Literal["=", "==", "!="], SupportEq] + | tuple[str, Literal["<"], SupportLt] + | tuple[str, Literal[">"], SupportGt] + | tuple[str, Literal["<="], SupportLe] + | tuple[str, Literal[">="], SupportGe] + | tuple[str, Literal["in", "not in"], Collection] + | tuple[str, str, Any] # Allow general str for operator to avoid type errors +) + + +class Buffer(Protocol): + ... + + +class SupportPyBuffer(Protocol): + ... + + +class SupportArrowStream(Protocol): + def __arrow_c_stream__(self, requested_schema=None) -> Any: ... + + +class SupportPyArrowArray(Protocol): + def __arrow_array__(self, type=None) -> Any: ... + + +class SupportArrowArray(Protocol): + def __arrow_c_array__(self, requested_schema=None) -> Any: ... + + +class SupportArrowDeviceArray(Protocol): + def __arrow_c_device_array__(self, requested_schema=None, **kwargs) -> Any: ... + + +class SupportArrowSchema(Protocol): + def __arrow_c_schema__(self) -> Any: ... + + +class NullableCollection(Protocol[_V]): # type: ignore[reportInvalidTypeVarUse] + def __iter__(self) -> Iterator[_V] | Iterator[_V | None]: ... + def __len__(self) -> int: ... + def __contains__(self, item: Any, /) -> bool: ... diff --git a/python/pyarrow-stubs/pyarrow/_types.pyi b/python/pyarrow-stubs/pyarrow/_types.pyi new file mode 100644 index 000000000000..3d802382ba1c --- /dev/null +++ b/python/pyarrow-stubs/pyarrow/_types.pyi @@ -0,0 +1,966 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import datetime as dt # noqa: F401 +import sys + +from collections.abc import Mapping, Sequence, Iterable, Iterator +from decimal import Decimal # noqa: F401 + +if sys.version_info >= (3, 11): + from typing import Self +else: + from typing_extensions import Self + +from typing import Any, Generic, Literal + +import numpy as np +import pandas as pd + +from pyarrow._stubs_typing import SupportArrowSchema +from pyarrow.lib import ( # noqa: F401 + Array, + ChunkedArray, + ExtensionArray, + MemoryPool, + MonthDayNano, + Table, +) +from typing_extensions import TypeVar, deprecated + +from .io import Buffer +from .scalar import ExtensionScalar +from ._stubs_typing import TimeUnit + +class _Weakrefable: + ... + + +class _Metadata(_Weakrefable): + ... + + +class DataType(_Weakrefable): + def field(self, i: int) -> Field: ... + + @property + def id(self) -> int: ... + @property + def bit_width(self) -> int: ... + + @property + def byte_width(self) -> int: ... + + @property + def num_fields(self) -> int: ... + + @property + def num_buffers(self) -> int: ... + + @property + def has_variadic_buffers(self) -> bool: ... + + # Properties that exist on specific subtypes but accessed generically + @property + def list_size(self) -> int: ... + + def __hash__(self) -> int: ... + + def equals(self, other: DataType | str, *, + check_metadata: bool = False) -> bool: ... + + def to_pandas_dtype(self) -> np.generic: ... + + def _export_to_c(self, out_ptr: int) -> None: ... + + @classmethod + def _import_from_c(cls, in_ptr: int) -> Self: ... + + def __arrow_c_schema__(self) -> Any: ... + + @classmethod + def _import_from_c_capsule(cls, schema) -> Self: ... + + +_AsPyType = TypeVar("_AsPyType") +_DataTypeT = TypeVar("_DataTypeT", bound=DataType) + + +class _BasicDataType(DataType, Generic[_AsPyType]): + ... + + +class NullType(_BasicDataType[None]): + ... + + +class BoolType(_BasicDataType[bool]): + ... + + +class UInt8Type(_BasicDataType[int]): + ... + + +class Int8Type(_BasicDataType[int]): + ... + + +class UInt16Type(_BasicDataType[int]): + ... + + +class Int16Type(_BasicDataType[int]): + ... + + +class UInt32Type(_BasicDataType[int]): + ... + + +class Int32Type(_BasicDataType[int]): + ... + + +class UInt64Type(_BasicDataType[int]): + ... + + +class Int64Type(_BasicDataType[int]): + ... + + +class Float16Type(_BasicDataType[float]): + ... + + +class Float32Type(_BasicDataType[float]): + ... + + +class Float64Type(_BasicDataType[float]): + ... + + +class Date32Type(_BasicDataType[dt.date]): + ... + + +class Date64Type(_BasicDataType[dt.date]): + ... + + +class MonthDayNanoIntervalType(_BasicDataType[MonthDayNano]): + ... + + +class StringType(_BasicDataType[str]): + ... + + +class LargeStringType(_BasicDataType[str]): + ... + + +class StringViewType(_BasicDataType[str]): + ... + + +class BinaryType(_BasicDataType[bytes]): + ... + + +class LargeBinaryType(_BasicDataType[bytes]): + ... + + +class BinaryViewType(_BasicDataType[bytes]): + ... + + +_Unit = TypeVar("_Unit", bound=TimeUnit, default=Literal["us"]) +_Tz = TypeVar("_Tz", str, None, default=None) + + +class TimestampType(_BasicDataType[int], Generic[_Unit, _Tz]): + + @property + def unit(self) -> _Unit: ... + + @property + def tz(self) -> _Tz: ... + + +_Time32Unit = TypeVar("_Time32Unit", bound=Literal["s", "ms"]) + + +class Time32Type(_BasicDataType[dt.time], Generic[_Time32Unit]): + @property + def unit(self) -> _Time32Unit: ... + + +_Time64Unit = TypeVar("_Time64Unit", bound=Literal["us", "ns"]) + + +class Time64Type(_BasicDataType[dt.time], Generic[_Time64Unit]): + @property + def unit(self) -> _Time64Unit: ... + + +class DurationType(_BasicDataType[dt.timedelta], Generic[_Unit]): + @property + def unit(self) -> _Unit: ... + + +class FixedSizeBinaryType(_BasicDataType[Decimal]): + ... + + +_Precision = TypeVar("_Precision", default=Any) +_Scale = TypeVar("_Scale", default=Any) + + +class Decimal32Type(FixedSizeBinaryType, Generic[_Precision, _Scale]): + @property + def precision(self) -> _Precision: ... + + @property + def scale(self) -> _Scale: ... + + +class Decimal64Type(FixedSizeBinaryType, Generic[_Precision, _Scale]): + @property + def precision(self) -> _Precision: ... + + @property + def scale(self) -> _Scale: ... + + +class Decimal128Type(FixedSizeBinaryType, Generic[_Precision, _Scale]): + @property + def precision(self) -> _Precision: ... + + @property + def scale(self) -> _Scale: ... + + +class Decimal256Type(FixedSizeBinaryType, Generic[_Precision, _Scale]): + @property + def precision(self) -> _Precision: ... + + @property + def scale(self) -> _Scale: ... + + +class ListType(DataType, Generic[_DataTypeT]): + @property + def value_field(self) -> Field[_DataTypeT]: ... + + @property + def value_type(self) -> _DataTypeT: ... + + +class LargeListType(DataType, Generic[_DataTypeT]): + @property + def value_field(self) -> Field[_DataTypeT]: ... + @property + def value_type(self) -> _DataTypeT: ... + + +class ListViewType(DataType, Generic[_DataTypeT]): + @property + def value_field(self) -> Field[_DataTypeT]: ... + + @property + def value_type(self) -> _DataTypeT: ... + + +class LargeListViewType(DataType, Generic[_DataTypeT]): + @property + def value_field(self) -> Field[_DataTypeT]: ... + + @property + def value_type(self) -> _DataTypeT: ... + + +class FixedSizeListType(DataType, Generic[_DataTypeT, _Size]): + @property + def value_field(self) -> Field[_DataTypeT]: ... + + @property + def value_type(self) -> _DataTypeT: ... + + @property + def list_size(self) -> int: ... + + +class DictionaryMemo(_Weakrefable): + ... + + +_IndexT = TypeVar( + "_IndexT", + UInt8Type, + Int8Type, + UInt16Type, + Int16Type, + UInt32Type, + Int32Type, + UInt64Type, + Int64Type, +) +_BasicValueT = TypeVar("_BasicValueT", bound=_BasicDataType) +_ValueT = TypeVar("_ValueT", bound=DataType) +_Ordered = TypeVar("_Ordered", Literal[True], Literal[False], default=Literal[False]) + + +class DictionaryType(DataType, Generic[_IndexT, _BasicValueT, _Ordered]): + @property + def ordered(self) -> _Ordered: ... + + @property + def index_type(self) -> _IndexT: ... + + @property + def value_type(self) -> _BasicValueT: ... + + +_K = TypeVar("_K", bound=DataType) + + +class MapType(DataType, Generic[_K, _ValueT, _Ordered]): + @property + def key_field(self) -> Field[_K]: ... + + @property + def key_type(self) -> _K: ... + + @property + def item_field(self) -> Field[_ValueT]: ... + + @property + def item_type(self) -> _ValueT: ... + + @property + def keys_sorted(self) -> _Ordered: ... + + +_Size = TypeVar("_Size", default=int) + + +class StructType(DataType): + def get_field_index(self, name: str) -> int: ... + + def field(self, i: int | str) -> Field: ... + + def get_all_field_indices(self, name: str) -> list[int]: ... + + def __len__(self) -> int: ... + + def __iter__(self) -> Iterator[Field]: ... + + __getitem__ = field + @property + def names(self) -> list[str]: ... + + @property + def fields(self) -> list[Field]: ... + + +class UnionType(DataType): + @property + def mode(self) -> Literal["sparse", "dense"]: ... + + @property + def type_codes(self) -> list[int]: ... + + def __len__(self) -> int: ... + + def __iter__(self) -> Iterator[Field]: ... + + def field(self, i: int) -> Field: ... + + __getitem__ = field + + +class SparseUnionType(UnionType): + @property + def mode(self) -> Literal["sparse"]: ... + + +class DenseUnionType(UnionType): + @property + def mode(self) -> Literal["dense"]: ... + + +_RunEndType = TypeVar("_RunEndType", Int16Type, Int32Type, Int64Type) + + +class RunEndEncodedType(DataType, Generic[_RunEndType, _BasicValueT]): + @property + def run_end_type(self) -> _RunEndType: ... + @property + def value_type(self) -> _BasicValueT: ... + + +_StorageT = TypeVar("_StorageT", bound=Array | ChunkedArray) + + +class BaseExtensionType(DataType): + def __arrow_ext_class__(self) -> type[ExtensionArray]: ... + + def __arrow_ext_scalar_class__(self) -> type[ExtensionScalar]: ... + + @property + def extension_name(self) -> str: ... + + @property + def storage_type(self) -> DataType: ... + + def wrap_array(self, storage: _StorageT) -> _StorageT: ... + + +class ExtensionType(BaseExtensionType): + def __init__(self, storage_type: DataType, extension_name: str) -> None: ... + + def __arrow_ext_serialize__(self) -> bytes: ... + + @classmethod + def __arrow_ext_deserialize__( + cls, storage_type: DataType, serialized: bytes) -> Self: ... + + +class FixedShapeTensorType(BaseExtensionType, Generic[_ValueT]): + @property + def value_type(self) -> _ValueT: ... + + @property + def shape(self) -> list[int]: ... + + @property + def dim_names(self) -> list[str] | None: ... + + @property + def permutation(self) -> list[int] | None: ... + + +class Bool8Type(BaseExtensionType): + ... + + +class UuidType(BaseExtensionType): + ... + + +class JsonType(BaseExtensionType): + ... + + +class OpaqueType(BaseExtensionType): + @property + def type_name(self) -> str: ... + + @property + def vendor_name(self) -> str: ... + + +class UnknownExtensionType(ExtensionType): + def __init__(self, storage_type: DataType, serialized: bytes) -> None: ... + + +def register_extension_type(ext_type: ExtensionType) -> None: ... + + +def unregister_extension_type(type_name: str) -> None: ... + + +class KeyValueMetadata(_Metadata, Mapping[bytes, bytes]): + def __init__( + self, __arg0__: Mapping[str | bytes, str | bytes] + | Iterable[tuple[str, str]] + | KeyValueMetadata + | None = None, **kwargs: str + ) -> None: ... + + def equals(self, other: KeyValueMetadata) -> bool: ... + + def __len__(self) -> int: ... + + def __contains__(self, /, __key: object) -> bool: ... # type: ignore[override] + + def __getitem__(self, /, __key: Any) -> Any: ... # type: ignore[override] + + def __iter__(self) -> Iterator[bytes]: ... + + def get_all(self, key: str) -> list[bytes]: ... + + def to_dict(self) -> dict[bytes, bytes]: ... + + +class Field(_Weakrefable, Generic[_DataTypeT]): + def equals(self, other: Field, check_metadata: bool = False) -> bool: ... + + def __hash__(self) -> int: ... + + @property + def nullable(self) -> bool: ... + + @property + def name(self) -> str: ... + + @property + def metadata(self) -> dict[bytes, bytes] | None: ... + + @property + def type(self) -> _DataTypeT: ... + def with_metadata(self, metadata: dict[bytes | str, bytes | str] | + Mapping[bytes | str, bytes | str] | Any) -> Self: ... + + def remove_metadata(self) -> Self: ... + + def with_type(self, new_type: DataType) -> Field: ... + + def with_name(self, name: str) -> Self: ... + + def with_nullable(self, nullable: bool) -> Field[_DataTypeT]: ... + + def flatten(self) -> list[Field]: ... + + def _export_to_c(self, out_ptr: int) -> None: ... + + @classmethod + def _import_from_c(cls, in_ptr: int) -> Self: ... + + def __arrow_c_schema__(self) -> Any: ... + + @classmethod + def _import_from_c_capsule(cls, schema) -> Self: ... + + +class Schema(_Weakrefable): + def __len__(self) -> int: ... + + def __getitem__(self, key: str | int) -> Field: ... + + _field = __getitem__ + def __iter__(self) -> Iterator[Field]: ... + + def __hash__(self) -> int: ... + + def __sizeof__(self) -> int: ... + @property + def pandas_metadata(self) -> dict: ... + + @property + def names(self) -> list[str]: ... + + @property + def types(self) -> list[DataType]: ... + + @property + def metadata(self) -> dict[bytes, bytes]: ... + + def empty_table(self) -> Table: ... + + def equals(self, other: Schema, check_metadata: bool = False) -> bool: ... + + @classmethod + def from_pandas(cls, df: pd.DataFrame, preserve_index: bool | + None = None) -> Schema: ... + + def field(self, i: int | str | bytes) -> Field: ... + + @deprecated("Use 'field' instead") + def field_by_name(self, name: str) -> Field: ... + + def get_field_index(self, name: str) -> int: ... + + def get_all_field_indices(self, name: str) -> list[int]: ... + + def append(self, field: Field) -> Schema: ... + + def insert(self, i: int, field: Field) -> Schema: ... + + def remove(self, i: int) -> Schema: ... + + def set(self, i: int, field: Field) -> Schema: ... + + @deprecated("Use 'with_metadata' instead") + def add_metadata(self, metadata: dict) -> Schema: ... + + def with_metadata(self, metadata: dict) -> Schema: ... + + def serialize(self, memory_pool: MemoryPool | None = None) -> Buffer: ... + + def remove_metadata(self) -> Schema: ... + + def to_string( + self, + truncate_metadata: bool = True, + show_field_metadata: bool = True, + show_schema_metadata: bool = True, + element_size_limit: int | None = None, + ) -> str: ... + + def _export_to_c(self, out_ptr: int) -> None: ... + + @classmethod + def _import_from_c(cls, in_ptr: int) -> Schema: ... + + def __arrow_c_schema__(self) -> Any: ... + + @staticmethod + def _import_from_c_capsule(schema: Any) -> Schema: ... + + +def unify_schemas( + schemas: Sequence[Schema], + *, + promote_options: Literal["default", "permissive"] = "default" +) -> Schema: ... + + +def field( + name: SupportArrowSchema | str | Any, type: _DataTypeT | str | None = None, + nullable: bool = ..., + metadata: dict[Any, Any] | None = None +) -> Field[_DataTypeT] | Field[Any]: ... + + +def null() -> NullType: ... + + +def bool_() -> BoolType: ... + + +def uint8() -> UInt8Type: ... + + +def int8() -> Int8Type: ... + + +def uint16() -> UInt16Type: ... + + +def int16() -> Int16Type: ... + + +def uint32() -> UInt32Type: ... + + +def int32() -> Int32Type: ... + + +def int64() -> Int64Type: ... + + +def uint64() -> UInt64Type: ... + + +def timestamp( + unit: _Unit | str, tz: _Tz | None = None) -> TimestampType[_Unit, _Tz]: ... + + +def time32(unit: _Time32Unit | str) -> Time32Type[_Time32Unit]: ... + + +def time64(unit: _Time64Unit | str) -> Time64Type[_Time64Unit]: ... + + +def duration(unit: _Unit | str) -> DurationType[_Unit]: ... + + +def month_day_nano_interval() -> MonthDayNanoIntervalType: ... + + +def date32() -> Date32Type: ... + + +def date64() -> Date64Type: ... + + +def float16() -> Float16Type: ... + + +def float32() -> Float32Type: ... + + +def float64() -> Float64Type: ... + + +def decimal32(precision: _Precision, scale: _Scale | + None = None) -> Decimal32Type[_Precision, _Scale | Literal[0]]: ... + + +def decimal64(precision: _Precision, scale: _Scale | + None = None) -> Decimal64Type[_Precision, _Scale | Literal[0]]: ... + + +def decimal128(precision: _Precision, scale: _Scale | + None = None) -> Decimal128Type[_Precision, _Scale | Literal[0]]: ... + + +def decimal256(precision: _Precision, scale: _Scale | + None = None) -> Decimal256Type[_Precision, _Scale | Literal[0]]: ... + + +def string() -> StringType: ... + + +utf8 = string + + +def binary(length: Literal[-1] | int = ...) -> BinaryType | FixedSizeBinaryType: ... + + +def large_binary() -> LargeBinaryType: ... + + +def large_string() -> LargeStringType: ... + + +large_utf8 = large_string + + +def binary_view() -> BinaryViewType: ... + + +def string_view() -> StringViewType: ... + + +def list_( + value_type: _DataTypeT | Field[_DataTypeT] | None = None, + list_size: Literal[-1] | _Size | None = None +) -> ListType[_DataTypeT] | FixedSizeListType[_DataTypeT, _Size]: ... + + +def large_list(value_type: _DataTypeT | + Field[_DataTypeT] | None = None) -> LargeListType[_DataTypeT]: ... + + +def list_view(value_type: _DataTypeT | + Field[_DataTypeT] | None = None) -> ListViewType[_DataTypeT]: ... + + +def large_list_view( + value_type: _DataTypeT | Field[_DataTypeT] | None = None +) -> LargeListViewType[_DataTypeT]: ... + + +def map_( + key_type: _K | Field | str | None = None, + item_type: _ValueT | Field | str | None = None, + keys_sorted: bool | None = None +) -> MapType[_K, _ValueT, Literal[False]]: ... + + +def dictionary( + index_type: _IndexT | str, + value_type: _BasicValueT | str, + ordered: _Ordered | None = None +) -> DictionaryType[_IndexT, _BasicValueT, _Ordered]: ... + + +def struct( + fields: Iterable[ + Field[Any] + | tuple[str, Field[Any] | None] + | tuple[str, DataType | None] + ] | Mapping[str, Field[Any] | DataType | None], +) -> StructType: ... + + +def sparse_union( + child_fields: list[Field[Any]], type_codes: list[int] | None = None +) -> SparseUnionType: ... + + +def dense_union( + child_fields: list[Field[Any]], type_codes: list[int] | None = None +) -> DenseUnionType: ... + + +def union( + child_fields: list[Field[Any]], mode: Literal["sparse", "dense"] | int | str, + type_codes: list[int] | None = None) -> SparseUnionType | DenseUnionType: ... + + +def run_end_encoded( + run_end_type: _RunEndType | str | None, value_type: _BasicValueT | str | None +) -> RunEndEncodedType[_RunEndType, _BasicValueT]: ... + + +def json_(storage_type: DataType = ...) -> JsonType: ... + + +def uuid() -> UuidType: ... + + +def fixed_shape_tensor( + value_type: _ValueT, + shape: Sequence[int], + dim_names: Sequence[str] | None = None, + permutation: Sequence[int] | None = None, +) -> FixedShapeTensorType[_ValueT]: ... + + +def bool8() -> Bool8Type: ... + + +def opaque(storage_type: DataType, type_name: str, vendor_name: str) -> OpaqueType: ... + + +def type_for_alias(name: Any) -> DataType: ... + + +def schema( + fields: ( + Iterable[Field[Any]] + | Iterable[tuple[str, DataType | str | None]] + | Mapping[Any, DataType | str | None] + ), + metadata: Mapping[bytes, bytes] + | Mapping[str, str] + | Mapping[bytes, str] + | Mapping[str, bytes] | None = None, +) -> Schema: ... + + +def from_numpy_dtype(dtype: np.dtype[Any] | type | str) -> DataType: ... + + +__all__ = [ + "_Weakrefable", + "_Metadata", + "DataType", + "_BasicDataType", + "NullType", + "BoolType", + "UInt8Type", + "Int8Type", + "UInt16Type", + "Int16Type", + "UInt32Type", + "Int32Type", + "UInt64Type", + "Int64Type", + "Float16Type", + "Float32Type", + "Float64Type", + "Date32Type", + "Date64Type", + "MonthDayNanoIntervalType", + "StringType", + "LargeStringType", + "StringViewType", + "BinaryType", + "LargeBinaryType", + "BinaryViewType", + "TimestampType", + "Time32Type", + "Time64Type", + "DurationType", + "FixedSizeBinaryType", + "Decimal32Type", + "Decimal64Type", + "Decimal128Type", + "Decimal256Type", + "ListType", + "LargeListType", + "ListViewType", + "LargeListViewType", + "FixedSizeListType", + "DictionaryMemo", + "DictionaryType", + "MapType", + "StructType", + "UnionType", + "SparseUnionType", + "DenseUnionType", + "RunEndEncodedType", + "BaseExtensionType", + "ExtensionType", + "FixedShapeTensorType", + "Bool8Type", + "UuidType", + "JsonType", + "OpaqueType", + "UnknownExtensionType", + "register_extension_type", + "unregister_extension_type", + "KeyValueMetadata", + "Field", + "Schema", + "unify_schemas", + "field", + "null", + "bool_", + "uint8", + "int8", + "uint16", + "int16", + "uint32", + "int32", + "int64", + "uint64", + "timestamp", + "time32", + "time64", + "duration", + "month_day_nano_interval", + "date32", + "date64", + "float16", + "float32", + "float64", + "decimal32", + "decimal64", + "decimal128", + "decimal256", + "string", + "utf8", + "binary", + "large_binary", + "large_string", + "large_utf8", + "binary_view", + "string_view", + "list_", + "large_list", + "list_view", + "large_list_view", + "map_", + "dictionary", + "struct", + "sparse_union", + "dense_union", + "union", + "run_end_encoded", + "json_", + "uuid", + "fixed_shape_tensor", + "bool8", + "opaque", + "type_for_alias", + "schema", + "from_numpy_dtype", + "_Unit", + "_Tz", + "_Time32Unit", + "_Time64Unit", + "_DataTypeT", +] diff --git a/python/pyarrow-stubs/pyarrow/error.pyi b/python/pyarrow-stubs/pyarrow/error.pyi new file mode 100644 index 000000000000..eac936afcb53 --- /dev/null +++ b/python/pyarrow-stubs/pyarrow/error.pyi @@ -0,0 +1,104 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import sys + +if sys.version_info >= (3, 11): + from typing import Self +else: + from typing_extensions import Self + + +class ArrowException(Exception): + ... + + +class ArrowInvalid(ValueError, ArrowException): + ... + + +class ArrowMemoryError(MemoryError, ArrowException): + ... + + +class ArrowKeyError(KeyError, ArrowException): + ... + + +class ArrowTypeError(TypeError, ArrowException): + ... + + +class ArrowNotImplementedError(NotImplementedError, ArrowException): + ... + + +class ArrowCapacityError(ArrowException): + ... + + +class ArrowIndexError(IndexError, ArrowException): + ... + + +class ArrowSerializationError(ArrowException): + ... + + +class ArrowCancelled(ArrowException): + signum: int | None + def __init__(self, message: str, signum: int | None = None) -> None: ... + + +ArrowIOError = IOError + + +class StopToken: + ... + + +def enable_signal_handlers(enable: bool) -> None: ... + + +have_signal_refcycle: bool + + +class SignalStopHandler: + def __enter__(self) -> Self: ... + def __exit__(self, exc_type, exc_value, exc_tb) -> None: ... + def __dealloc__(self) -> None: ... + @property + def stop_token(self) -> StopToken: ... + + +__all__ = [ + "ArrowException", + "ArrowInvalid", + "ArrowMemoryError", + "ArrowKeyError", + "ArrowTypeError", + "ArrowNotImplementedError", + "ArrowCapacityError", + "ArrowIndexError", + "ArrowSerializationError", + "ArrowCancelled", + "ArrowIOError", + "StopToken", + "enable_signal_handlers", + "have_signal_refcycle", + "SignalStopHandler", +] diff --git a/python/pyarrow-stubs/pyarrow/io.pyi b/python/pyarrow-stubs/pyarrow/io.pyi new file mode 100644 index 000000000000..467ec48cc76d --- /dev/null +++ b/python/pyarrow-stubs/pyarrow/io.pyi @@ -0,0 +1,22 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Placeholder stub - complete annotations in future PR.""" + +from typing import Any + +def __getattr__(name: str) -> Any: ... diff --git a/python/pyarrow-stubs/pyarrow/lib.pyi b/python/pyarrow-stubs/pyarrow/lib.pyi new file mode 100644 index 000000000000..775434be2ea4 --- /dev/null +++ b/python/pyarrow-stubs/pyarrow/lib.pyi @@ -0,0 +1,25 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Placeholder stub for pyarrow.lib C extension module. + +Complete type annotations will be added in subsequent PRs. +""" + +from typing import Any + +def __getattr__(name: str) -> Any: ... diff --git a/python/pyarrow-stubs/pyarrow/scalar.pyi b/python/pyarrow-stubs/pyarrow/scalar.pyi new file mode 100644 index 000000000000..467ec48cc76d --- /dev/null +++ b/python/pyarrow-stubs/pyarrow/scalar.pyi @@ -0,0 +1,22 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Placeholder stub - complete annotations in future PR.""" + +from typing import Any + +def __getattr__(name: str) -> Any: ... diff --git a/python/pyarrow/fs.py b/python/pyarrow/fs.py index 670ccaaf2455..f055c5081826 100644 --- a/python/pyarrow/fs.py +++ b/python/pyarrow/fs.py @@ -111,7 +111,7 @@ def _ensure_filesystem(filesystem, *, use_mmap=False): else: # handle fsspec-compatible filesystems try: - import fsspec + import fsspec # type: ignore[import-untyped] except ImportError: pass else: From 37d87fb663fa3dfe39e414f080e959762f111813 Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Mon, 9 Feb 2026 21:08:58 +0100 Subject: [PATCH 03/21] Update python/pyarrow-stubs/pyarrow/_stubs_typing.pyi Co-authored-by: Dan Redding <125183946+dangotbanned@users.noreply.github.com> --- python/pyarrow-stubs/pyarrow/_stubs_typing.pyi | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/python/pyarrow-stubs/pyarrow/_stubs_typing.pyi b/python/pyarrow-stubs/pyarrow/_stubs_typing.pyi index 0715012fddc3..0e4013c2b9e6 100644 --- a/python/pyarrow-stubs/pyarrow/_stubs_typing.pyi +++ b/python/pyarrow-stubs/pyarrow/_stubs_typing.pyi @@ -46,16 +46,29 @@ Compression: TypeAlias = Literal[ NullEncoding: TypeAlias = Literal["mask", "encode"] NullSelectionBehavior: TypeAlias = Literal["drop", "emit_null"] TimeUnit: TypeAlias = Literal["s", "ms", "us", "ns"] +from pyarrow import lib + +IntegerType: TypeAlias = ( + lib.Int8Type + | lib.Int16Type + | lib.Int32Type + | lib.Int64Type + | lib.UInt8Type + | lib.UInt16Type + | lib.UInt32Type + | lib.UInt64Type +) + Mask: TypeAlias = ( Sequence[bool | None] | NDArray[np.bool_] - | BooleanArray + | lib.Array[lib.Scalar[lib.BoolType]] | ChunkedArray[Any] ) Indices: TypeAlias = ( Sequence[int | None] | NDArray[np.integer[Any]] - | IntegerArray + | lib.Array[lib.Scalar[IntegerType]] | ChunkedArray[Any] ) From 43ff40f14574a11efc5058ea0ae1507e3330c0e7 Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Mon, 9 Mar 2026 22:16:26 +0100 Subject: [PATCH 04/21] post rebase changes --- python/CMakeLists.txt | 4 +- .../pyarrow-stubs/pyarrow/_stubs_typing.pyi | 14 ++--- python/pyarrow-stubs/pyarrow/_types.pyi | 63 +++++++++---------- python/pyarrow-stubs/pyarrow/error.pyi | 8 +-- python/pyarrow-stubs/pyarrow/io.pyi | 10 ++- python/pyarrow-stubs/pyarrow/scalar.pyi | 10 ++- 6 files changed, 58 insertions(+), 51 deletions(-) diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index d4f440789f16..d0ddb9009f89 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -1054,9 +1054,9 @@ if(EXISTS "${PYARROW_STUBS_SOURCE_DIR}") install(CODE " execute_process( COMMAND \"${Python3_EXECUTABLE}\" - \"${CMAKE_SOURCE_DIR}/scripts/update_stub_docstrings.py\" + \"${CMAKE_CURRENT_SOURCE_DIR}/scripts/update_stub_docstrings.py\" \"${CMAKE_INSTALL_PREFIX}\" - \"${CMAKE_SOURCE_DIR}\" + \"${CMAKE_CURRENT_SOURCE_DIR}\" RESULT_VARIABLE _pyarrow_stub_docstrings_result ) if(NOT _pyarrow_stub_docstrings_result EQUAL 0) diff --git a/python/pyarrow-stubs/pyarrow/_stubs_typing.pyi b/python/pyarrow-stubs/pyarrow/_stubs_typing.pyi index 0e4013c2b9e6..2b8230758958 100644 --- a/python/pyarrow-stubs/pyarrow/_stubs_typing.pyi +++ b/python/pyarrow-stubs/pyarrow/_stubs_typing.pyi @@ -25,6 +25,7 @@ import numpy as np from numpy.typing import NDArray +from pyarrow import lib from pyarrow.lib import BooleanArray, IntegerArray, ChunkedArray ArrayLike: TypeAlias = Any @@ -46,7 +47,6 @@ Compression: TypeAlias = Literal[ NullEncoding: TypeAlias = Literal["mask", "encode"] NullSelectionBehavior: TypeAlias = Literal["drop", "emit_null"] TimeUnit: TypeAlias = Literal["s", "ms", "us", "ns"] -from pyarrow import lib IntegerType: TypeAlias = ( lib.Int8Type @@ -82,23 +82,23 @@ SingleOrList: TypeAlias = list[_T] | _T class SupportEq(Protocol): - def __eq__(self, other) -> bool: ... + def __eq__(self, other: object) -> bool: ... class SupportLt(Protocol): - def __lt__(self, other) -> bool: ... + def __lt__(self, other: object) -> bool: ... class SupportGt(Protocol): - def __gt__(self, other) -> bool: ... + def __gt__(self, other: object) -> bool: ... class SupportLe(Protocol): - def __le__(self, other) -> bool: ... + def __le__(self, other: object) -> bool: ... class SupportGe(Protocol): - def __ge__(self, other) -> bool: ... + def __ge__(self, other: object) -> bool: ... FilterTuple: TypeAlias = ( @@ -140,7 +140,7 @@ class SupportArrowSchema(Protocol): def __arrow_c_schema__(self) -> Any: ... -class NullableCollection(Protocol[_V]): # type: ignore[reportInvalidTypeVarUse] +class NullableCollection(Protocol[_V]): def __iter__(self) -> Iterator[_V] | Iterator[_V | None]: ... def __len__(self) -> int: ... def __contains__(self, item: Any, /) -> bool: ... diff --git a/python/pyarrow-stubs/pyarrow/_types.pyi b/python/pyarrow-stubs/pyarrow/_types.pyi index 3d802382ba1c..d646551eecb3 100644 --- a/python/pyarrow-stubs/pyarrow/_types.pyi +++ b/python/pyarrow-stubs/pyarrow/_types.pyi @@ -16,22 +16,18 @@ # under the License. import datetime as dt # noqa: F401 -import sys from collections.abc import Mapping, Sequence, Iterable, Iterator from decimal import Decimal # noqa: F401 - -if sys.version_info >= (3, 11): - from typing import Self -else: - from typing_extensions import Self - from typing import Any, Generic, Literal import numpy as np import pandas as pd -from pyarrow._stubs_typing import SupportArrowSchema +from typing_extensions import Self, TypeVar, deprecated + +from pyarrow._stubs_typing import SupportArrowSchema, TimeUnit +from pyarrow.io import Buffer from pyarrow.lib import ( # noqa: F401 Array, ChunkedArray, @@ -40,11 +36,7 @@ from pyarrow.lib import ( # noqa: F401 MonthDayNano, Table, ) -from typing_extensions import TypeVar, deprecated - -from .io import Buffer -from .scalar import ExtensionScalar -from ._stubs_typing import TimeUnit +from pyarrow.scalar import ExtensionScalar class _Weakrefable: ... @@ -226,7 +218,12 @@ class DurationType(_BasicDataType[dt.timedelta], Generic[_Unit]): def unit(self) -> _Unit: ... -class FixedSizeBinaryType(_BasicDataType[Decimal]): +_FixedSizeBinaryAsPyType = TypeVar("_FixedSizeBinaryAsPyType", default=bytes) + + +class FixedSizeBinaryType( + _BasicDataType[_FixedSizeBinaryAsPyType], Generic[_FixedSizeBinaryAsPyType] +): ... @@ -234,7 +231,7 @@ _Precision = TypeVar("_Precision", default=Any) _Scale = TypeVar("_Scale", default=Any) -class Decimal32Type(FixedSizeBinaryType, Generic[_Precision, _Scale]): +class Decimal32Type(FixedSizeBinaryType[Decimal], Generic[_Precision, _Scale]): @property def precision(self) -> _Precision: ... @@ -242,7 +239,7 @@ class Decimal32Type(FixedSizeBinaryType, Generic[_Precision, _Scale]): def scale(self) -> _Scale: ... -class Decimal64Type(FixedSizeBinaryType, Generic[_Precision, _Scale]): +class Decimal64Type(FixedSizeBinaryType[Decimal], Generic[_Precision, _Scale]): @property def precision(self) -> _Precision: ... @@ -250,7 +247,7 @@ class Decimal64Type(FixedSizeBinaryType, Generic[_Precision, _Scale]): def scale(self) -> _Scale: ... -class Decimal128Type(FixedSizeBinaryType, Generic[_Precision, _Scale]): +class Decimal128Type(FixedSizeBinaryType[Decimal], Generic[_Precision, _Scale]): @property def precision(self) -> _Precision: ... @@ -258,7 +255,7 @@ class Decimal128Type(FixedSizeBinaryType, Generic[_Precision, _Scale]): def scale(self) -> _Scale: ... -class Decimal256Type(FixedSizeBinaryType, Generic[_Precision, _Scale]): +class Decimal256Type(FixedSizeBinaryType[Decimal], Generic[_Precision, _Scale]): @property def precision(self) -> _Precision: ... @@ -491,7 +488,7 @@ def unregister_extension_type(type_name: str) -> None: ... class KeyValueMetadata(_Metadata, Mapping[bytes, bytes]): def __init__( self, __arg0__: Mapping[str | bytes, str | bytes] - | Iterable[tuple[str, str]] + | Iterable[tuple[str | bytes, str | bytes]] | KeyValueMetadata | None = None, **kwargs: str ) -> None: ... @@ -500,9 +497,9 @@ class KeyValueMetadata(_Metadata, Mapping[bytes, bytes]): def __len__(self) -> int: ... - def __contains__(self, /, __key: object) -> bool: ... # type: ignore[override] + def __contains__(self, /, __key: object) -> bool: ... - def __getitem__(self, /, __key: Any) -> Any: ... # type: ignore[override] + def __getitem__(self, /, __key: Any) -> Any: ... def __iter__(self) -> Iterator[bytes]: ... @@ -636,7 +633,7 @@ def unify_schemas( def field( name: SupportArrowSchema | str | Any, type: _DataTypeT | str | None = None, - nullable: bool = ..., + nullable: bool = True, metadata: dict[Any, Any] | None = None ) -> Field[_DataTypeT] | Field[Any]: ... @@ -702,20 +699,20 @@ def float32() -> Float32Type: ... def float64() -> Float64Type: ... -def decimal32(precision: _Precision, scale: _Scale | - None = None) -> Decimal32Type[_Precision, _Scale | Literal[0]]: ... +def decimal32(precision: _Precision, + scale: _Scale | Literal[0] = 0) -> Decimal32Type[_Precision, _Scale | Literal[0]]: ... -def decimal64(precision: _Precision, scale: _Scale | - None = None) -> Decimal64Type[_Precision, _Scale | Literal[0]]: ... +def decimal64(precision: _Precision, + scale: _Scale | Literal[0] = 0) -> Decimal64Type[_Precision, _Scale | Literal[0]]: ... -def decimal128(precision: _Precision, scale: _Scale | - None = None) -> Decimal128Type[_Precision, _Scale | Literal[0]]: ... +def decimal128(precision: _Precision, + scale: _Scale | Literal[0] = 0) -> Decimal128Type[_Precision, _Scale | Literal[0]]: ... -def decimal256(precision: _Precision, scale: _Scale | - None = None) -> Decimal256Type[_Precision, _Scale | Literal[0]]: ... +def decimal256(precision: _Precision, + scale: _Scale | Literal[0] = 0) -> Decimal256Type[_Precision, _Scale | Literal[0]]: ... def string() -> StringType: ... @@ -724,7 +721,7 @@ def string() -> StringType: ... utf8 = string -def binary(length: Literal[-1] | int = ...) -> BinaryType | FixedSizeBinaryType: ... +def binary(length: Literal[-1] | int = ...) -> BinaryType | FixedSizeBinaryType[bytes]: ... def large_binary() -> LargeBinaryType: ... @@ -764,8 +761,8 @@ def large_list_view( def map_( key_type: _K | Field | str | None = None, item_type: _ValueT | Field | str | None = None, - keys_sorted: bool | None = None -) -> MapType[_K, _ValueT, Literal[False]]: ... + keys_sorted: _Ordered | None = None +) -> MapType[_K, _ValueT, _Ordered]: ... def dictionary( diff --git a/python/pyarrow-stubs/pyarrow/error.pyi b/python/pyarrow-stubs/pyarrow/error.pyi index eac936afcb53..6a010071de08 100644 --- a/python/pyarrow-stubs/pyarrow/error.pyi +++ b/python/pyarrow-stubs/pyarrow/error.pyi @@ -15,12 +15,7 @@ # specific language governing permissions and limitations # under the License. -import sys - -if sys.version_info >= (3, 11): - from typing import Self -else: - from typing_extensions import Self +from typing_extensions import Self class ArrowException(Exception): @@ -80,7 +75,6 @@ have_signal_refcycle: bool class SignalStopHandler: def __enter__(self) -> Self: ... def __exit__(self, exc_type, exc_value, exc_tb) -> None: ... - def __dealloc__(self) -> None: ... @property def stop_token(self) -> StopToken: ... diff --git a/python/pyarrow-stubs/pyarrow/io.pyi b/python/pyarrow-stubs/pyarrow/io.pyi index 467ec48cc76d..856fb093a6cd 100644 --- a/python/pyarrow-stubs/pyarrow/io.pyi +++ b/python/pyarrow-stubs/pyarrow/io.pyi @@ -15,8 +15,16 @@ # specific language governing permissions and limitations # under the License. -"""Placeholder stub - complete annotations in future PR.""" +"""Placeholder stub - complete annotations in future PR. + +At runtime, these symbols are provided by the pyarrow.lib C extension. +""" from typing import Any + +class Buffer: + ... + + def __getattr__(name: str) -> Any: ... diff --git a/python/pyarrow-stubs/pyarrow/scalar.pyi b/python/pyarrow-stubs/pyarrow/scalar.pyi index 467ec48cc76d..384507d2a2b6 100644 --- a/python/pyarrow-stubs/pyarrow/scalar.pyi +++ b/python/pyarrow-stubs/pyarrow/scalar.pyi @@ -15,8 +15,16 @@ # specific language governing permissions and limitations # under the License. -"""Placeholder stub - complete annotations in future PR.""" +"""Placeholder stub - complete annotations in future PR. + +At runtime, these symbols are provided by the pyarrow.lib C extension. +""" from typing import Any + +class ExtensionScalar: + ... + + def __getattr__(name: str) -> Any: ... From 07c1398d96e89b65bf524658fe1f7524f3b452c5 Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Fri, 13 Mar 2026 16:27:18 +0100 Subject: [PATCH 05/21] linting and formatting --- python/pyarrow-stubs/pyarrow/__init__.pyi | 8 +- .../pyarrow-stubs/pyarrow/_stubs_typing.pyi | 55 +- python/pyarrow-stubs/pyarrow/_types.pyi | 481 ++++-------------- python/pyarrow-stubs/pyarrow/error.pyi | 54 +- python/pyarrow-stubs/pyarrow/io.pyi | 11 +- python/pyarrow-stubs/pyarrow/lib.pyi | 6 +- python/pyarrow-stubs/pyarrow/scalar.pyi | 11 +- 7 files changed, 134 insertions(+), 492 deletions(-) diff --git a/python/pyarrow-stubs/pyarrow/__init__.pyi b/python/pyarrow-stubs/pyarrow/__init__.pyi index ccec8d5abc07..9c7120c7f807 100644 --- a/python/pyarrow-stubs/pyarrow/__init__.pyi +++ b/python/pyarrow-stubs/pyarrow/__init__.pyi @@ -15,11 +15,9 @@ # specific language governing permissions and limitations # under the License. -"""Type stubs for PyArrow. - -This is a placeholder stub file. -Complete type annotations will be added in subsequent PRs. -""" +# Type stubs for PyArrow. +# This is a placeholder stub file. +# Complete type annotations will be added in subsequent PRs. from typing import Any diff --git a/python/pyarrow-stubs/pyarrow/_stubs_typing.pyi b/python/pyarrow-stubs/pyarrow/_stubs_typing.pyi index 2b8230758958..1d5f7600c4b7 100644 --- a/python/pyarrow-stubs/pyarrow/_stubs_typing.pyi +++ b/python/pyarrow-stubs/pyarrow/_stubs_typing.pyi @@ -26,7 +26,7 @@ import numpy as np from numpy.typing import NDArray from pyarrow import lib -from pyarrow.lib import BooleanArray, IntegerArray, ChunkedArray +from pyarrow.lib import ChunkedArray ArrayLike: TypeAlias = Any ScalarLike: TypeAlias = Any @@ -72,34 +72,38 @@ Indices: TypeAlias = ( | ChunkedArray[Any] ) -PyScalar: TypeAlias = (bool | int | float | Decimal | str | bytes | - dt.date | dt.datetime | dt.time | dt.timedelta) +PyScalar: TypeAlias = ( + bool + | int + | float + | Decimal + | str + | bytes + | dt.date + | dt.datetime + | dt.time + | dt.timedelta +) _T = TypeVar("_T") _V = TypeVar("_V", covariant=True) SingleOrList: TypeAlias = list[_T] | _T - class SupportEq(Protocol): - def __eq__(self, other: object) -> bool: ... - + def __eq__(self, other: object, /) -> bool: ... class SupportLt(Protocol): - def __lt__(self, other: object) -> bool: ... - + def __lt__(self, other: object, /) -> bool: ... class SupportGt(Protocol): - def __gt__(self, other: object) -> bool: ... - + def __gt__(self, other: object, /) -> bool: ... class SupportLe(Protocol): - def __le__(self, other: object) -> bool: ... - + def __le__(self, other: object, /) -> bool: ... class SupportGe(Protocol): - def __ge__(self, other: object) -> bool: ... - + def __ge__(self, other: object, /) -> bool: ... FilterTuple: TypeAlias = ( tuple[str, Literal["=", "==", "!="], SupportEq] @@ -111,35 +115,24 @@ FilterTuple: TypeAlias = ( | tuple[str, str, Any] # Allow general str for operator to avoid type errors ) - -class Buffer(Protocol): - ... - - -class SupportPyBuffer(Protocol): - ... - +class Buffer(Protocol): ... +class SupportPyBuffer(Protocol): ... class SupportArrowStream(Protocol): - def __arrow_c_stream__(self, requested_schema=None) -> Any: ... - + def __arrow_c_stream__(self, requested_schema=None, /) -> Any: ... class SupportPyArrowArray(Protocol): - def __arrow_array__(self, type=None) -> Any: ... - + def __arrow_array__(self, type=None, /) -> Any: ... class SupportArrowArray(Protocol): - def __arrow_c_array__(self, requested_schema=None) -> Any: ... - + def __arrow_c_array__(self, requested_schema=None, /) -> Any: ... class SupportArrowDeviceArray(Protocol): - def __arrow_c_device_array__(self, requested_schema=None, **kwargs) -> Any: ... - + def __arrow_c_device_array__(self, requested_schema=None, /, **kwargs) -> Any: ... class SupportArrowSchema(Protocol): def __arrow_c_schema__(self) -> Any: ... - class NullableCollection(Protocol[_V]): def __iter__(self) -> Iterator[_V] | Iterator[_V | None]: ... def __len__(self) -> int: ... diff --git a/python/pyarrow-stubs/pyarrow/_types.pyi b/python/pyarrow-stubs/pyarrow/_types.pyi index d646551eecb3..1fd5c5002d1e 100644 --- a/python/pyarrow-stubs/pyarrow/_types.pyi +++ b/python/pyarrow-stubs/pyarrow/_types.pyi @@ -38,276 +38,157 @@ from pyarrow.lib import ( # noqa: F401 ) from pyarrow.scalar import ExtensionScalar -class _Weakrefable: - ... - - -class _Metadata(_Weakrefable): - ... - +class _Weakrefable: ... +class _Metadata(_Weakrefable): ... class DataType(_Weakrefable): def field(self, i: int) -> Field: ... - @property def id(self) -> int: ... @property def bit_width(self) -> int: ... - @property def byte_width(self) -> int: ... - @property def num_fields(self) -> int: ... - @property def num_buffers(self) -> int: ... - @property def has_variadic_buffers(self) -> bool: ... # Properties that exist on specific subtypes but accessed generically @property def list_size(self) -> int: ... - def __hash__(self) -> int: ... - - def equals(self, other: DataType | str, *, - check_metadata: bool = False) -> bool: ... - + def equals( + self, other: DataType | str, *, check_metadata: bool = False + ) -> bool: ... def to_pandas_dtype(self) -> np.generic: ... - def _export_to_c(self, out_ptr: int) -> None: ... - @classmethod def _import_from_c(cls, in_ptr: int) -> Self: ... - def __arrow_c_schema__(self) -> Any: ... - @classmethod def _import_from_c_capsule(cls, schema) -> Self: ... - _AsPyType = TypeVar("_AsPyType") _DataTypeT = TypeVar("_DataTypeT", bound=DataType) - -class _BasicDataType(DataType, Generic[_AsPyType]): - ... - - -class NullType(_BasicDataType[None]): - ... - - -class BoolType(_BasicDataType[bool]): - ... - - -class UInt8Type(_BasicDataType[int]): - ... - - -class Int8Type(_BasicDataType[int]): - ... - - -class UInt16Type(_BasicDataType[int]): - ... - - -class Int16Type(_BasicDataType[int]): - ... - - -class UInt32Type(_BasicDataType[int]): - ... - - -class Int32Type(_BasicDataType[int]): - ... - - -class UInt64Type(_BasicDataType[int]): - ... - - -class Int64Type(_BasicDataType[int]): - ... - - -class Float16Type(_BasicDataType[float]): - ... - - -class Float32Type(_BasicDataType[float]): - ... - - -class Float64Type(_BasicDataType[float]): - ... - - -class Date32Type(_BasicDataType[dt.date]): - ... - - -class Date64Type(_BasicDataType[dt.date]): - ... - - -class MonthDayNanoIntervalType(_BasicDataType[MonthDayNano]): - ... - - -class StringType(_BasicDataType[str]): - ... - - -class LargeStringType(_BasicDataType[str]): - ... - - -class StringViewType(_BasicDataType[str]): - ... - - -class BinaryType(_BasicDataType[bytes]): - ... - - -class LargeBinaryType(_BasicDataType[bytes]): - ... - - -class BinaryViewType(_BasicDataType[bytes]): - ... - +class _BasicDataType(DataType, Generic[_AsPyType]): ... +class NullType(_BasicDataType[None]): ... +class BoolType(_BasicDataType[bool]): ... +class UInt8Type(_BasicDataType[int]): ... +class Int8Type(_BasicDataType[int]): ... +class UInt16Type(_BasicDataType[int]): ... +class Int16Type(_BasicDataType[int]): ... +class UInt32Type(_BasicDataType[int]): ... +class Int32Type(_BasicDataType[int]): ... +class UInt64Type(_BasicDataType[int]): ... +class Int64Type(_BasicDataType[int]): ... +class Float16Type(_BasicDataType[float]): ... +class Float32Type(_BasicDataType[float]): ... +class Float64Type(_BasicDataType[float]): ... +class Date32Type(_BasicDataType[dt.date]): ... +class Date64Type(_BasicDataType[dt.date]): ... +class MonthDayNanoIntervalType(_BasicDataType[MonthDayNano]): ... +class StringType(_BasicDataType[str]): ... +class LargeStringType(_BasicDataType[str]): ... +class StringViewType(_BasicDataType[str]): ... +class BinaryType(_BasicDataType[bytes]): ... +class LargeBinaryType(_BasicDataType[bytes]): ... +class BinaryViewType(_BasicDataType[bytes]): ... _Unit = TypeVar("_Unit", bound=TimeUnit, default=Literal["us"]) _Tz = TypeVar("_Tz", str, None, default=None) - class TimestampType(_BasicDataType[int], Generic[_Unit, _Tz]): - @property def unit(self) -> _Unit: ... - @property def tz(self) -> _Tz: ... - _Time32Unit = TypeVar("_Time32Unit", bound=Literal["s", "ms"]) - class Time32Type(_BasicDataType[dt.time], Generic[_Time32Unit]): @property def unit(self) -> _Time32Unit: ... - _Time64Unit = TypeVar("_Time64Unit", bound=Literal["us", "ns"]) - class Time64Type(_BasicDataType[dt.time], Generic[_Time64Unit]): @property def unit(self) -> _Time64Unit: ... - class DurationType(_BasicDataType[dt.timedelta], Generic[_Unit]): @property def unit(self) -> _Unit: ... - _FixedSizeBinaryAsPyType = TypeVar("_FixedSizeBinaryAsPyType", default=bytes) - class FixedSizeBinaryType( - _BasicDataType[_FixedSizeBinaryAsPyType], Generic[_FixedSizeBinaryAsPyType] -): - ... - + _BasicDataType[_FixedSizeBinaryAsPyType] +): ... _Precision = TypeVar("_Precision", default=Any) _Scale = TypeVar("_Scale", default=Any) - class Decimal32Type(FixedSizeBinaryType[Decimal], Generic[_Precision, _Scale]): @property def precision(self) -> _Precision: ... - @property def scale(self) -> _Scale: ... - class Decimal64Type(FixedSizeBinaryType[Decimal], Generic[_Precision, _Scale]): @property def precision(self) -> _Precision: ... - @property def scale(self) -> _Scale: ... - class Decimal128Type(FixedSizeBinaryType[Decimal], Generic[_Precision, _Scale]): @property def precision(self) -> _Precision: ... - @property def scale(self) -> _Scale: ... - class Decimal256Type(FixedSizeBinaryType[Decimal], Generic[_Precision, _Scale]): @property def precision(self) -> _Precision: ... - @property def scale(self) -> _Scale: ... - class ListType(DataType, Generic[_DataTypeT]): @property def value_field(self) -> Field[_DataTypeT]: ... - @property def value_type(self) -> _DataTypeT: ... - class LargeListType(DataType, Generic[_DataTypeT]): @property def value_field(self) -> Field[_DataTypeT]: ... @property def value_type(self) -> _DataTypeT: ... - class ListViewType(DataType, Generic[_DataTypeT]): @property def value_field(self) -> Field[_DataTypeT]: ... - @property def value_type(self) -> _DataTypeT: ... - class LargeListViewType(DataType, Generic[_DataTypeT]): @property def value_field(self) -> Field[_DataTypeT]: ... - @property def value_type(self) -> _DataTypeT: ... - class FixedSizeListType(DataType, Generic[_DataTypeT, _Size]): @property def value_field(self) -> Field[_DataTypeT]: ... - @property def value_type(self) -> _DataTypeT: ... - @property def list_size(self) -> int: ... - -class DictionaryMemo(_Weakrefable): - ... - +class DictionaryMemo(_Weakrefable): ... _IndexT = TypeVar( "_IndexT", @@ -324,287 +205,197 @@ _BasicValueT = TypeVar("_BasicValueT", bound=_BasicDataType) _ValueT = TypeVar("_ValueT", bound=DataType) _Ordered = TypeVar("_Ordered", Literal[True], Literal[False], default=Literal[False]) - class DictionaryType(DataType, Generic[_IndexT, _BasicValueT, _Ordered]): @property def ordered(self) -> _Ordered: ... - @property def index_type(self) -> _IndexT: ... - @property def value_type(self) -> _BasicValueT: ... - _K = TypeVar("_K", bound=DataType) - class MapType(DataType, Generic[_K, _ValueT, _Ordered]): @property def key_field(self) -> Field[_K]: ... - @property def key_type(self) -> _K: ... - @property def item_field(self) -> Field[_ValueT]: ... - @property def item_type(self) -> _ValueT: ... - @property def keys_sorted(self) -> _Ordered: ... - _Size = TypeVar("_Size", default=int) - class StructType(DataType): def get_field_index(self, name: str) -> int: ... - def field(self, i: int | str) -> Field: ... - def get_all_field_indices(self, name: str) -> list[int]: ... - def __len__(self) -> int: ... - def __iter__(self) -> Iterator[Field]: ... __getitem__ = field @property def names(self) -> list[str]: ... - @property def fields(self) -> list[Field]: ... - class UnionType(DataType): @property def mode(self) -> Literal["sparse", "dense"]: ... - @property def type_codes(self) -> list[int]: ... - def __len__(self) -> int: ... - def __iter__(self) -> Iterator[Field]: ... - def field(self, i: int) -> Field: ... __getitem__ = field - class SparseUnionType(UnionType): @property def mode(self) -> Literal["sparse"]: ... - class DenseUnionType(UnionType): @property def mode(self) -> Literal["dense"]: ... - _RunEndType = TypeVar("_RunEndType", Int16Type, Int32Type, Int64Type) - class RunEndEncodedType(DataType, Generic[_RunEndType, _BasicValueT]): @property def run_end_type(self) -> _RunEndType: ... @property def value_type(self) -> _BasicValueT: ... - _StorageT = TypeVar("_StorageT", bound=Array | ChunkedArray) - class BaseExtensionType(DataType): def __arrow_ext_class__(self) -> type[ExtensionArray]: ... - def __arrow_ext_scalar_class__(self) -> type[ExtensionScalar]: ... - @property def extension_name(self) -> str: ... - @property def storage_type(self) -> DataType: ... - def wrap_array(self, storage: _StorageT) -> _StorageT: ... - class ExtensionType(BaseExtensionType): def __init__(self, storage_type: DataType, extension_name: str) -> None: ... - def __arrow_ext_serialize__(self) -> bytes: ... - @classmethod def __arrow_ext_deserialize__( - cls, storage_type: DataType, serialized: bytes) -> Self: ... - + cls, storage_type: DataType, serialized: bytes + ) -> Self: ... class FixedShapeTensorType(BaseExtensionType, Generic[_ValueT]): @property def value_type(self) -> _ValueT: ... - @property def shape(self) -> list[int]: ... - @property def dim_names(self) -> list[str] | None: ... - @property def permutation(self) -> list[int] | None: ... - -class Bool8Type(BaseExtensionType): - ... - - -class UuidType(BaseExtensionType): - ... - - -class JsonType(BaseExtensionType): - ... - +class Bool8Type(BaseExtensionType): ... +class UuidType(BaseExtensionType): ... +class JsonType(BaseExtensionType): ... class OpaqueType(BaseExtensionType): @property def type_name(self) -> str: ... - @property def vendor_name(self) -> str: ... - class UnknownExtensionType(ExtensionType): def __init__(self, storage_type: DataType, serialized: bytes) -> None: ... - def register_extension_type(ext_type: ExtensionType) -> None: ... - - def unregister_extension_type(type_name: str) -> None: ... - class KeyValueMetadata(_Metadata, Mapping[bytes, bytes]): def __init__( - self, __arg0__: Mapping[str | bytes, str | bytes] + self, + __arg0__: Mapping[str | bytes, str | bytes] | Iterable[tuple[str | bytes, str | bytes]] | KeyValueMetadata - | None = None, **kwargs: str + | None = None, + **kwargs: str, ) -> None: ... - def equals(self, other: KeyValueMetadata) -> bool: ... - def __len__(self) -> int: ... - def __contains__(self, /, __key: object) -> bool: ... - def __getitem__(self, /, __key: Any) -> Any: ... - def __iter__(self) -> Iterator[bytes]: ... - def get_all(self, key: str) -> list[bytes]: ... - def to_dict(self) -> dict[bytes, bytes]: ... - class Field(_Weakrefable, Generic[_DataTypeT]): def equals(self, other: Field, check_metadata: bool = False) -> bool: ... - def __hash__(self) -> int: ... - @property def nullable(self) -> bool: ... - @property def name(self) -> str: ... - @property def metadata(self) -> dict[bytes, bytes] | None: ... - @property def type(self) -> _DataTypeT: ... - def with_metadata(self, metadata: dict[bytes | str, bytes | str] | - Mapping[bytes | str, bytes | str] | Any) -> Self: ... - + def with_metadata( + self, + metadata: dict[bytes | str, bytes | str] + | Mapping[bytes | str, bytes | str] + | Any, + ) -> Self: ... def remove_metadata(self) -> Self: ... - def with_type(self, new_type: DataType) -> Field: ... - def with_name(self, name: str) -> Self: ... - def with_nullable(self, nullable: bool) -> Field[_DataTypeT]: ... - def flatten(self) -> list[Field]: ... - def _export_to_c(self, out_ptr: int) -> None: ... - @classmethod def _import_from_c(cls, in_ptr: int) -> Self: ... - def __arrow_c_schema__(self) -> Any: ... - @classmethod def _import_from_c_capsule(cls, schema) -> Self: ... - class Schema(_Weakrefable): def __len__(self) -> int: ... - def __getitem__(self, key: str | int) -> Field: ... _field = __getitem__ def __iter__(self) -> Iterator[Field]: ... - def __hash__(self) -> int: ... - def __sizeof__(self) -> int: ... @property def pandas_metadata(self) -> dict: ... - @property def names(self) -> list[str]: ... - @property def types(self) -> list[DataType]: ... - @property def metadata(self) -> dict[bytes, bytes]: ... - def empty_table(self) -> Table: ... - def equals(self, other: Schema, check_metadata: bool = False) -> bool: ... - @classmethod - def from_pandas(cls, df: pd.DataFrame, preserve_index: bool | - None = None) -> Schema: ... - + def from_pandas( + cls, df: pd.DataFrame, preserve_index: bool | None = None + ) -> Schema: ... def field(self, i: int | str | bytes) -> Field: ... - @deprecated("Use 'field' instead") def field_by_name(self, name: str) -> Field: ... - def get_field_index(self, name: str) -> int: ... - def get_all_field_indices(self, name: str) -> list[int]: ... - def append(self, field: Field) -> Schema: ... - def insert(self, i: int, field: Field) -> Schema: ... - def remove(self, i: int) -> Schema: ... - def set(self, i: int, field: Field) -> Schema: ... - @deprecated("Use 'with_metadata' instead") def add_metadata(self, metadata: dict) -> Schema: ... - def with_metadata(self, metadata: dict) -> Schema: ... - def serialize(self, memory_pool: MemoryPool | None = None) -> Buffer: ... - def remove_metadata(self) -> Schema: ... - def to_string( self, truncate_metadata: bool = True, @@ -612,218 +403,126 @@ class Schema(_Weakrefable): show_schema_metadata: bool = True, element_size_limit: int | None = None, ) -> str: ... - def _export_to_c(self, out_ptr: int) -> None: ... - @classmethod def _import_from_c(cls, in_ptr: int) -> Schema: ... - def __arrow_c_schema__(self) -> Any: ... - @staticmethod def _import_from_c_capsule(schema: Any) -> Schema: ... - def unify_schemas( schemas: Sequence[Schema], *, - promote_options: Literal["default", "permissive"] = "default" + promote_options: Literal["default", "permissive"] = "default", ) -> Schema: ... - - def field( - name: SupportArrowSchema | str | Any, type: _DataTypeT | str | None = None, + name: SupportArrowSchema | str | Any, + type: _DataTypeT | str | None = None, nullable: bool = True, - metadata: dict[Any, Any] | None = None + metadata: dict[Any, Any] | None = None, ) -> Field[_DataTypeT] | Field[Any]: ... - - def null() -> NullType: ... - - def bool_() -> BoolType: ... - - def uint8() -> UInt8Type: ... - - def int8() -> Int8Type: ... - - def uint16() -> UInt16Type: ... - - def int16() -> Int16Type: ... - - def uint32() -> UInt32Type: ... - - def int32() -> Int32Type: ... - - def int64() -> Int64Type: ... - - def uint64() -> UInt64Type: ... - - def timestamp( - unit: _Unit | str, tz: _Tz | None = None) -> TimestampType[_Unit, _Tz]: ... - - + unit: _Unit | str, tz: _Tz | None = None +) -> TimestampType[_Unit, _Tz]: ... def time32(unit: _Time32Unit | str) -> Time32Type[_Time32Unit]: ... - - def time64(unit: _Time64Unit | str) -> Time64Type[_Time64Unit]: ... - - def duration(unit: _Unit | str) -> DurationType[_Unit]: ... - - def month_day_nano_interval() -> MonthDayNanoIntervalType: ... - - def date32() -> Date32Type: ... - - def date64() -> Date64Type: ... - - def float16() -> Float16Type: ... - - def float32() -> Float32Type: ... - - def float64() -> Float64Type: ... - - -def decimal32(precision: _Precision, - scale: _Scale | Literal[0] = 0) -> Decimal32Type[_Precision, _Scale | Literal[0]]: ... - - -def decimal64(precision: _Precision, - scale: _Scale | Literal[0] = 0) -> Decimal64Type[_Precision, _Scale | Literal[0]]: ... - - -def decimal128(precision: _Precision, - scale: _Scale | Literal[0] = 0) -> Decimal128Type[_Precision, _Scale | Literal[0]]: ... - - -def decimal256(precision: _Precision, - scale: _Scale | Literal[0] = 0) -> Decimal256Type[_Precision, _Scale | Literal[0]]: ... - - +def decimal32( + precision: _Precision, scale: _Scale | Literal[0] = 0 +) -> Decimal32Type[_Precision, _Scale | Literal[0]]: ... +def decimal64( + precision: _Precision, scale: _Scale | Literal[0] = 0 +) -> Decimal64Type[_Precision, _Scale | Literal[0]]: ... +def decimal128( + precision: _Precision, scale: _Scale | Literal[0] = 0 +) -> Decimal128Type[_Precision, _Scale | Literal[0]]: ... +def decimal256( + precision: _Precision, scale: _Scale | Literal[0] = 0 +) -> Decimal256Type[_Precision, _Scale | Literal[0]]: ... def string() -> StringType: ... - utf8 = string - -def binary(length: Literal[-1] | int = ...) -> BinaryType | FixedSizeBinaryType[bytes]: ... - - +def binary( + length: Literal[-1] | int = ..., +) -> BinaryType | FixedSizeBinaryType[bytes]: ... def large_binary() -> LargeBinaryType: ... - - def large_string() -> LargeStringType: ... - large_utf8 = large_string - def binary_view() -> BinaryViewType: ... - - def string_view() -> StringViewType: ... - - def list_( value_type: _DataTypeT | Field[_DataTypeT] | None = None, - list_size: Literal[-1] | _Size | None = None + list_size: Literal[-1] | _Size | None = None, ) -> ListType[_DataTypeT] | FixedSizeListType[_DataTypeT, _Size]: ... - - -def large_list(value_type: _DataTypeT | - Field[_DataTypeT] | None = None) -> LargeListType[_DataTypeT]: ... - - -def list_view(value_type: _DataTypeT | - Field[_DataTypeT] | None = None) -> ListViewType[_DataTypeT]: ... - - +def large_list( + value_type: _DataTypeT | Field[_DataTypeT] | None = None, +) -> LargeListType[_DataTypeT]: ... +def list_view( + value_type: _DataTypeT | Field[_DataTypeT] | None = None, +) -> ListViewType[_DataTypeT]: ... def large_list_view( - value_type: _DataTypeT | Field[_DataTypeT] | None = None + value_type: _DataTypeT | Field[_DataTypeT] | None = None, ) -> LargeListViewType[_DataTypeT]: ... - - def map_( key_type: _K | Field | str | None = None, item_type: _ValueT | Field | str | None = None, - keys_sorted: _Ordered | None = None + keys_sorted: _Ordered | None = None, ) -> MapType[_K, _ValueT, _Ordered]: ... - - def dictionary( index_type: _IndexT | str, value_type: _BasicValueT | str, - ordered: _Ordered | None = None + ordered: _Ordered | None = None, ) -> DictionaryType[_IndexT, _BasicValueT, _Ordered]: ... - - def struct( fields: Iterable[ - Field[Any] - | tuple[str, Field[Any] | None] - | tuple[str, DataType | None] - ] | Mapping[str, Field[Any] | DataType | None], + Field[Any] | tuple[str, Field[Any] | None] | tuple[str, DataType | None] + ] + | Mapping[str, Field[Any] | DataType | None], ) -> StructType: ... - - def sparse_union( child_fields: list[Field[Any]], type_codes: list[int] | None = None ) -> SparseUnionType: ... - - def dense_union( child_fields: list[Field[Any]], type_codes: list[int] | None = None ) -> DenseUnionType: ... - - def union( - child_fields: list[Field[Any]], mode: Literal["sparse", "dense"] | int | str, - type_codes: list[int] | None = None) -> SparseUnionType | DenseUnionType: ... - - + child_fields: list[Field[Any]], + mode: Literal["sparse", "dense"] | int, + type_codes: list[int] | None = None, +) -> SparseUnionType | DenseUnionType: ... def run_end_encoded( run_end_type: _RunEndType | str | None, value_type: _BasicValueT | str | None ) -> RunEndEncodedType[_RunEndType, _BasicValueT]: ... - - def json_(storage_type: DataType = ...) -> JsonType: ... - - def uuid() -> UuidType: ... - - def fixed_shape_tensor( value_type: _ValueT, shape: Sequence[int], dim_names: Sequence[str] | None = None, permutation: Sequence[int] | None = None, ) -> FixedShapeTensorType[_ValueT]: ... - - def bool8() -> Bool8Type: ... - - def opaque(storage_type: DataType, type_name: str, vendor_name: str) -> OpaqueType: ... - - def type_for_alias(name: Any) -> DataType: ... - - def schema( fields: ( Iterable[Field[Any]] @@ -833,13 +532,11 @@ def schema( metadata: Mapping[bytes, bytes] | Mapping[str, str] | Mapping[bytes, str] - | Mapping[str, bytes] | None = None, + | Mapping[str, bytes] + | None = None, ) -> Schema: ... - - def from_numpy_dtype(dtype: np.dtype[Any] | type | str) -> DataType: ... - __all__ = [ "_Weakrefable", "_Metadata", diff --git a/python/pyarrow-stubs/pyarrow/error.pyi b/python/pyarrow-stubs/pyarrow/error.pyi index 6a010071de08..8ee75c3ec414 100644 --- a/python/pyarrow-stubs/pyarrow/error.pyi +++ b/python/pyarrow-stubs/pyarrow/error.pyi @@ -17,68 +17,34 @@ from typing_extensions import Self - -class ArrowException(Exception): - ... - - -class ArrowInvalid(ValueError, ArrowException): - ... - - -class ArrowMemoryError(MemoryError, ArrowException): - ... - - -class ArrowKeyError(KeyError, ArrowException): - ... - - -class ArrowTypeError(TypeError, ArrowException): - ... - - -class ArrowNotImplementedError(NotImplementedError, ArrowException): - ... - - -class ArrowCapacityError(ArrowException): - ... - - -class ArrowIndexError(IndexError, ArrowException): - ... - - -class ArrowSerializationError(ArrowException): - ... - +class ArrowException(Exception): ... +class ArrowInvalid(ValueError, ArrowException): ... +class ArrowMemoryError(MemoryError, ArrowException): ... +class ArrowKeyError(KeyError, ArrowException): ... +class ArrowTypeError(TypeError, ArrowException): ... +class ArrowNotImplementedError(NotImplementedError, ArrowException): ... +class ArrowCapacityError(ArrowException): ... +class ArrowIndexError(IndexError, ArrowException): ... +class ArrowSerializationError(ArrowException): ... class ArrowCancelled(ArrowException): signum: int | None def __init__(self, message: str, signum: int | None = None) -> None: ... - ArrowIOError = IOError - -class StopToken: - ... - +class StopToken: ... def enable_signal_handlers(enable: bool) -> None: ... - have_signal_refcycle: bool - class SignalStopHandler: def __enter__(self) -> Self: ... def __exit__(self, exc_type, exc_value, exc_tb) -> None: ... @property def stop_token(self) -> StopToken: ... - __all__ = [ "ArrowException", "ArrowInvalid", diff --git a/python/pyarrow-stubs/pyarrow/io.pyi b/python/pyarrow-stubs/pyarrow/io.pyi index 856fb093a6cd..8a1da39a1cf2 100644 --- a/python/pyarrow-stubs/pyarrow/io.pyi +++ b/python/pyarrow-stubs/pyarrow/io.pyi @@ -15,16 +15,11 @@ # specific language governing permissions and limitations # under the License. -"""Placeholder stub - complete annotations in future PR. - -At runtime, these symbols are provided by the pyarrow.lib C extension. -""" +# Placeholder stub - complete annotations in future PR. +# At runtime, these symbols are provided by the pyarrow.lib C extension. from typing import Any - -class Buffer: - ... - +class Buffer: ... def __getattr__(name: str) -> Any: ... diff --git a/python/pyarrow-stubs/pyarrow/lib.pyi b/python/pyarrow-stubs/pyarrow/lib.pyi index 775434be2ea4..0c539af067ed 100644 --- a/python/pyarrow-stubs/pyarrow/lib.pyi +++ b/python/pyarrow-stubs/pyarrow/lib.pyi @@ -15,10 +15,8 @@ # specific language governing permissions and limitations # under the License. -"""Placeholder stub for pyarrow.lib C extension module. - -Complete type annotations will be added in subsequent PRs. -""" +# Placeholder stub for pyarrow.lib C extension module. +# Complete type annotations will be added in subsequent PRs. from typing import Any diff --git a/python/pyarrow-stubs/pyarrow/scalar.pyi b/python/pyarrow-stubs/pyarrow/scalar.pyi index 384507d2a2b6..307b3e689a25 100644 --- a/python/pyarrow-stubs/pyarrow/scalar.pyi +++ b/python/pyarrow-stubs/pyarrow/scalar.pyi @@ -15,16 +15,11 @@ # specific language governing permissions and limitations # under the License. -"""Placeholder stub - complete annotations in future PR. - -At runtime, these symbols are provided by the pyarrow.lib C extension. -""" +# Placeholder stub - complete annotations in future PR. +# At runtime, these symbols are provided by the pyarrow.lib C extension. from typing import Any - -class ExtensionScalar: - ... - +class ExtensionScalar: ... def __getattr__(name: str) -> Any: ... From 0acbc9dd02b86962d9210aa7e6205467b79fa8bf Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Fri, 13 Mar 2026 16:39:04 +0100 Subject: [PATCH 06/21] add pyi check to pre-commit --- .pre-commit-config.yaml | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index e7cf88b16c1f..f97ed61fbbc1 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -136,11 +136,22 @@ repos: - id: flake8 alias: python name: Python Lint + additional_dependencies: + - flake8-pyi args: - "--config" - "python/setup.cfg" + - "--extend-select" + - "Y" + - "--per-file-ignores" + - "python/pyarrow-stubs/pyarrow/*.pyi:E301,E302,E305,E701" files: >- ^(c_glib|dev|python)/ + types: + - file + types_or: + - python + - pyi exclude: >- ( ?^python/pyarrow/vendored/| From 83cab21eeee8f693265533c0ebac81b2425553bb Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Fri, 13 Mar 2026 16:39:47 +0100 Subject: [PATCH 07/21] ruff format --- python/pyarrow-stubs/pyarrow/_types.pyi | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/python/pyarrow-stubs/pyarrow/_types.pyi b/python/pyarrow-stubs/pyarrow/_types.pyi index 1fd5c5002d1e..c4d88ffce6b8 100644 --- a/python/pyarrow-stubs/pyarrow/_types.pyi +++ b/python/pyarrow-stubs/pyarrow/_types.pyi @@ -125,9 +125,7 @@ class DurationType(_BasicDataType[dt.timedelta], Generic[_Unit]): _FixedSizeBinaryAsPyType = TypeVar("_FixedSizeBinaryAsPyType", default=bytes) -class FixedSizeBinaryType( - _BasicDataType[_FixedSizeBinaryAsPyType] -): ... +class FixedSizeBinaryType(_BasicDataType[_FixedSizeBinaryAsPyType]): ... _Precision = TypeVar("_Precision", default=Any) _Scale = TypeVar("_Scale", default=Any) From 8e550773b37b9493bc0e59b76e91dfdf8f0a7c88 Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Mon, 16 Mar 2026 01:20:19 +0100 Subject: [PATCH 08/21] Apply suggestions from code review Co-authored-by: Dan Redding <125183946+dangotbanned@users.noreply.github.com> --- .../pyarrow-stubs/pyarrow/_stubs_typing.pyi | 7 ++- python/pyarrow-stubs/pyarrow/_types.pyi | 49 ++++++++----------- 2 files changed, 24 insertions(+), 32 deletions(-) diff --git a/python/pyarrow-stubs/pyarrow/_stubs_typing.pyi b/python/pyarrow-stubs/pyarrow/_stubs_typing.pyi index 1d5f7600c4b7..6d0c15fd6e3e 100644 --- a/python/pyarrow-stubs/pyarrow/_stubs_typing.pyi +++ b/python/pyarrow-stubs/pyarrow/_stubs_typing.pyi @@ -133,7 +133,6 @@ class SupportArrowDeviceArray(Protocol): class SupportArrowSchema(Protocol): def __arrow_c_schema__(self) -> Any: ... -class NullableCollection(Protocol[_V]): - def __iter__(self) -> Iterator[_V] | Iterator[_V | None]: ... - def __len__(self) -> int: ... - def __contains__(self, item: Any, /) -> bool: ... +from collections.abc import Container, Sized +class NullableCollection(Sized, Container[_V], Protocol[_V]): + def __iter__(self) -> Iterator[_V] | Iterator[_V | None]: ... diff --git a/python/pyarrow-stubs/pyarrow/_types.pyi b/python/pyarrow-stubs/pyarrow/_types.pyi index c4d88ffce6b8..30f719a739f9 100644 --- a/python/pyarrow-stubs/pyarrow/_types.pyi +++ b/python/pyarrow-stubs/pyarrow/_types.pyi @@ -42,7 +42,7 @@ class _Weakrefable: ... class _Metadata(_Weakrefable): ... class DataType(_Weakrefable): - def field(self, i: int) -> Field: ... + def field(self, i: int) -> Field[Any]: ... @property def id(self) -> int: ... @property @@ -72,7 +72,8 @@ class DataType(_Weakrefable): def _import_from_c_capsule(cls, schema) -> Self: ... _AsPyType = TypeVar("_AsPyType") -_DataTypeT = TypeVar("_DataTypeT", bound=DataType) +_DataTypeT = TypeVar("_DataTypeT", bound=DataType) +_DataTypeT_co = TypeVar("_DataTypeT", bound=DataType, covariant=True) class _BasicDataType(DataType, Generic[_AsPyType]): ... class NullType(_BasicDataType[None]): ... @@ -127,32 +128,24 @@ _FixedSizeBinaryAsPyType = TypeVar("_FixedSizeBinaryAsPyType", default=bytes) class FixedSizeBinaryType(_BasicDataType[_FixedSizeBinaryAsPyType]): ... -_Precision = TypeVar("_Precision", default=Any) -_Scale = TypeVar("_Scale", default=Any) - -class Decimal32Type(FixedSizeBinaryType[Decimal], Generic[_Precision, _Scale]): - @property - def precision(self) -> _Precision: ... - @property - def scale(self) -> _Scale: ... - -class Decimal64Type(FixedSizeBinaryType[Decimal], Generic[_Precision, _Scale]): - @property - def precision(self) -> _Precision: ... - @property - def scale(self) -> _Scale: ... - -class Decimal128Type(FixedSizeBinaryType[Decimal], Generic[_Precision, _Scale]): - @property - def precision(self) -> _Precision: ... - @property - def scale(self) -> _Scale: ... - -class Decimal256Type(FixedSizeBinaryType[Decimal], Generic[_Precision, _Scale]): - @property - def precision(self) -> _Precision: ... - @property - def scale(self) -> _Scale: ... +from typing import Protocol + +_Precision = TypeVar("_Precision", default=Any , covariant=True) +_Scale = TypeVar("_Scale", default=Any , covariant=True) + +class _HasPrecisionScale(Protocol[_Precision, _Scale]): + @property + def precision(self) -> _Precision: ... + @property + def scale(self) -> _Scale: ... + +class Decimal32Type(FixedSizeBinaryType[Decimal], _HasPrecisionScale[_Precision, _Scale]): ... + +class Decimal64Type(FixedSizeBinaryType[Decimal], _HasPrecisionScale[_Precision, _Scale]): ... + +class Decimal128Type(FixedSizeBinaryType[Decimal], _HasPrecisionScale[_Precision, _Scale]): ... + +class Decimal256Type(FixedSizeBinaryType[Decimal], _HasPrecisionScale[_Precision, _Scale]): ... class ListType(DataType, Generic[_DataTypeT]): @property From eae20d12632c0ff7a4181143220d111998151de8 Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Mon, 16 Mar 2026 01:50:36 +0100 Subject: [PATCH 09/21] review feedback --- .../pyarrow-stubs/pyarrow/_stubs_typing.pyi | 56 +++++--- python/pyarrow-stubs/pyarrow/_types.pyi | 132 ++++++++++-------- 2 files changed, 109 insertions(+), 79 deletions(-) diff --git a/python/pyarrow-stubs/pyarrow/_stubs_typing.pyi b/python/pyarrow-stubs/pyarrow/_stubs_typing.pyi index 6d0c15fd6e3e..572bc7ba99f2 100644 --- a/python/pyarrow-stubs/pyarrow/_stubs_typing.pyi +++ b/python/pyarrow-stubs/pyarrow/_stubs_typing.pyi @@ -17,7 +17,7 @@ import datetime as dt -from collections.abc import Collection, Iterator, Sequence +from collections.abc import Collection, Container, Iterator, Sequence, Sized from decimal import Decimal from typing import Any, Literal, Protocol, TypeAlias, TypeVar @@ -90,49 +90,65 @@ _V = TypeVar("_V", covariant=True) SingleOrList: TypeAlias = list[_T] | _T -class SupportEq(Protocol): + +class SupportsEq(Protocol): def __eq__(self, other: object, /) -> bool: ... -class SupportLt(Protocol): + +class SupportsLt(Protocol): def __lt__(self, other: object, /) -> bool: ... -class SupportGt(Protocol): + +class SupportsGt(Protocol): def __gt__(self, other: object, /) -> bool: ... -class SupportLe(Protocol): + +class SupportsLe(Protocol): def __le__(self, other: object, /) -> bool: ... -class SupportGe(Protocol): + +class SupportsGe(Protocol): def __ge__(self, other: object, /) -> bool: ... + FilterTuple: TypeAlias = ( - tuple[str, Literal["=", "==", "!="], SupportEq] - | tuple[str, Literal["<"], SupportLt] - | tuple[str, Literal[">"], SupportGt] - | tuple[str, Literal["<="], SupportLe] - | tuple[str, Literal[">="], SupportGe] + tuple[str, Literal["=", "==", "!="], SupportsEq] + | tuple[str, Literal["<"], SupportsLt] + | tuple[str, Literal[">"], SupportsGt] + | tuple[str, Literal["<="], SupportsLe] + | tuple[str, Literal[">="], SupportsGe] | tuple[str, Literal["in", "not in"], Collection] | tuple[str, str, Any] # Allow general str for operator to avoid type errors ) + class Buffer(Protocol): ... -class SupportPyBuffer(Protocol): ... -class SupportArrowStream(Protocol): + +class SupportsPyBuffer(Protocol): ... + + +class SupportsArrowStream(Protocol): def __arrow_c_stream__(self, requested_schema=None, /) -> Any: ... -class SupportPyArrowArray(Protocol): + +class SupportsPyArrowArray(Protocol): def __arrow_array__(self, type=None, /) -> Any: ... -class SupportArrowArray(Protocol): + +class SupportsArrowArray(Protocol): def __arrow_c_array__(self, requested_schema=None, /) -> Any: ... -class SupportArrowDeviceArray(Protocol): + +class SupportsArrowDeviceArray(Protocol): def __arrow_c_device_array__(self, requested_schema=None, /, **kwargs) -> Any: ... -class SupportArrowSchema(Protocol): + +class SupportsArrowSchema(Protocol): def __arrow_c_schema__(self) -> Any: ... -from collections.abc import Container, Sized -class NullableCollection(Sized, Container[_V], Protocol[_V]): - def __iter__(self) -> Iterator[_V] | Iterator[_V | None]: ... + +class NullableCollection(Sized, Container[_V], Protocol[_V]): + def __iter__(self) -> Iterator[_V] | Iterator[_V | None]: ... + def __len__(self) -> int: ... + def __contains__(self, item: Any, /) -> bool: ... diff --git a/python/pyarrow-stubs/pyarrow/_types.pyi b/python/pyarrow-stubs/pyarrow/_types.pyi index 30f719a739f9..acd558203949 100644 --- a/python/pyarrow-stubs/pyarrow/_types.pyi +++ b/python/pyarrow-stubs/pyarrow/_types.pyi @@ -17,16 +17,16 @@ import datetime as dt # noqa: F401 -from collections.abc import Mapping, Sequence, Iterable, Iterator +from collections.abc import Iterable, Iterator, Mapping, Sequence from decimal import Decimal # noqa: F401 -from typing import Any, Generic, Literal +from typing import Any, Generic, Literal, Protocol, TypeAlias import numpy as np import pandas as pd from typing_extensions import Self, TypeVar, deprecated -from pyarrow._stubs_typing import SupportArrowSchema, TimeUnit +from pyarrow._stubs_typing import SupportsArrowSchema, TimeUnit from pyarrow.io import Buffer from pyarrow.lib import ( # noqa: F401 Array, @@ -72,8 +72,8 @@ class DataType(_Weakrefable): def _import_from_c_capsule(cls, schema) -> Self: ... _AsPyType = TypeVar("_AsPyType") -_DataTypeT = TypeVar("_DataTypeT", bound=DataType) -_DataTypeT_co = TypeVar("_DataTypeT", bound=DataType, covariant=True) +_DataTypeT = TypeVar("_DataTypeT", bound=DataType) +_DataTypeT_co = TypeVar("_DataTypeT_co", bound=DataType, covariant=True) class _BasicDataType(DataType, Generic[_AsPyType]): ... class NullType(_BasicDataType[None]): ... @@ -128,54 +128,60 @@ _FixedSizeBinaryAsPyType = TypeVar("_FixedSizeBinaryAsPyType", default=bytes) class FixedSizeBinaryType(_BasicDataType[_FixedSizeBinaryAsPyType]): ... -from typing import Protocol - -_Precision = TypeVar("_Precision", default=Any , covariant=True) -_Scale = TypeVar("_Scale", default=Any , covariant=True) - -class _HasPrecisionScale(Protocol[_Precision, _Scale]): - @property - def precision(self) -> _Precision: ... - @property - def scale(self) -> _Scale: ... - -class Decimal32Type(FixedSizeBinaryType[Decimal], _HasPrecisionScale[_Precision, _Scale]): ... - -class Decimal64Type(FixedSizeBinaryType[Decimal], _HasPrecisionScale[_Precision, _Scale]): ... - -class Decimal128Type(FixedSizeBinaryType[Decimal], _HasPrecisionScale[_Precision, _Scale]): ... - -class Decimal256Type(FixedSizeBinaryType[Decimal], _HasPrecisionScale[_Precision, _Scale]): ... +_Precision = TypeVar("_Precision", default=Any, covariant=True) +_Scale = TypeVar("_Scale", default=Any, covariant=True) -class ListType(DataType, Generic[_DataTypeT]): +class _HasPrecisionScale(Protocol[_Precision, _Scale]): @property - def value_field(self) -> Field[_DataTypeT]: ... + def precision(self) -> _Precision: ... @property - def value_type(self) -> _DataTypeT: ... + def scale(self) -> _Scale: ... -class LargeListType(DataType, Generic[_DataTypeT]): +class Decimal32Type( + FixedSizeBinaryType[Decimal], _HasPrecisionScale[_Precision, _Scale] +): ... + +class Decimal64Type( + FixedSizeBinaryType[Decimal], _HasPrecisionScale[_Precision, _Scale] +): ... + +class Decimal128Type( + FixedSizeBinaryType[Decimal], _HasPrecisionScale[_Precision, _Scale] +): ... + +class Decimal256Type( + FixedSizeBinaryType[Decimal], _HasPrecisionScale[_Precision, _Scale] +): ... + +class ListType(DataType, Generic[_DataTypeT_co]): @property - def value_field(self) -> Field[_DataTypeT]: ... + def value_field(self) -> Field[_DataTypeT_co]: ... @property - def value_type(self) -> _DataTypeT: ... + def value_type(self) -> _DataTypeT_co: ... -class ListViewType(DataType, Generic[_DataTypeT]): +class LargeListType(DataType, Generic[_DataTypeT_co]): @property - def value_field(self) -> Field[_DataTypeT]: ... + def value_field(self) -> Field[_DataTypeT_co]: ... @property - def value_type(self) -> _DataTypeT: ... + def value_type(self) -> _DataTypeT_co: ... -class LargeListViewType(DataType, Generic[_DataTypeT]): +class ListViewType(DataType, Generic[_DataTypeT_co]): @property - def value_field(self) -> Field[_DataTypeT]: ... + def value_field(self) -> Field[_DataTypeT_co]: ... @property - def value_type(self) -> _DataTypeT: ... + def value_type(self) -> _DataTypeT_co: ... -class FixedSizeListType(DataType, Generic[_DataTypeT, _Size]): +class LargeListViewType(DataType, Generic[_DataTypeT_co]): @property - def value_field(self) -> Field[_DataTypeT]: ... + def value_field(self) -> Field[_DataTypeT_co]: ... @property - def value_type(self) -> _DataTypeT: ... + def value_type(self) -> _DataTypeT_co: ... + +class FixedSizeListType(DataType, Generic[_DataTypeT_co, _Size]): + @property + def value_field(self) -> Field[_DataTypeT_co]: ... + @property + def value_type(self) -> _DataTypeT_co: ... @property def list_size(self) -> int: ... @@ -305,13 +311,22 @@ class UnknownExtensionType(ExtensionType): def register_extension_type(ext_type: ExtensionType) -> None: ... def unregister_extension_type(type_name: str) -> None: ... +_StrOrBytes: TypeAlias = str | bytes +_MetadataMapping: TypeAlias = Mapping[_StrOrBytes, _StrOrBytes] +_MetadataIterable: TypeAlias = Iterable[tuple[_StrOrBytes, _StrOrBytes]] +_KeyValueMetadataInput: TypeAlias = _MetadataMapping | _MetadataIterable | None +_FieldTypeInput: TypeAlias = DataType | str | None +_SchemaMetadataInput: TypeAlias = ( + Mapping[bytes, bytes] + | Mapping[str, str] + | Mapping[bytes, str] + | Mapping[str, bytes] +) + class KeyValueMetadata(_Metadata, Mapping[bytes, bytes]): def __init__( self, - __arg0__: Mapping[str | bytes, str | bytes] - | Iterable[tuple[str | bytes, str | bytes]] - | KeyValueMetadata - | None = None, + __arg0__: _KeyValueMetadataInput | KeyValueMetadata = None, **kwargs: str, ) -> None: ... def equals(self, other: KeyValueMetadata) -> bool: ... @@ -322,7 +337,7 @@ class KeyValueMetadata(_Metadata, Mapping[bytes, bytes]): def get_all(self, key: str) -> list[bytes]: ... def to_dict(self) -> dict[bytes, bytes]: ... -class Field(_Weakrefable, Generic[_DataTypeT]): +class Field(_Weakrefable, Generic[_DataTypeT_co]): def equals(self, other: Field, check_metadata: bool = False) -> bool: ... def __hash__(self) -> int: ... @property @@ -332,17 +347,15 @@ class Field(_Weakrefable, Generic[_DataTypeT]): @property def metadata(self) -> dict[bytes, bytes] | None: ... @property - def type(self) -> _DataTypeT: ... + def type(self) -> _DataTypeT_co: ... def with_metadata( self, - metadata: dict[bytes | str, bytes | str] - | Mapping[bytes | str, bytes | str] - | Any, + metadata: _MetadataMapping | Any, ) -> Self: ... def remove_metadata(self) -> Self: ... def with_type(self, new_type: DataType) -> Field: ... def with_name(self, name: str) -> Self: ... - def with_nullable(self, nullable: bool) -> Field[_DataTypeT]: ... + def with_nullable(self, nullable: bool) -> Field[_DataTypeT_co]: ... def flatten(self) -> list[Field]: ... def _export_to_c(self, out_ptr: int) -> None: ... @classmethod @@ -351,6 +364,14 @@ class Field(_Weakrefable, Generic[_DataTypeT]): @classmethod def _import_from_c_capsule(cls, schema) -> Self: ... +_StructFieldTuple: TypeAlias = ( + tuple[str, Field[Any] | None] | tuple[str, _FieldTypeInput] +) +_StructFieldsInput: TypeAlias = ( + Iterable[Field[Any] | _StructFieldTuple] + | Mapping[str, Field[Any] | DataType | str | None] +) + class Schema(_Weakrefable): def __len__(self) -> int: ... def __getitem__(self, key: str | int) -> Field: ... @@ -407,10 +428,10 @@ def unify_schemas( promote_options: Literal["default", "permissive"] = "default", ) -> Schema: ... def field( - name: SupportArrowSchema | str | Any, + name: SupportsArrowSchema | str | bytes, type: _DataTypeT | str | None = None, nullable: bool = True, - metadata: dict[Any, Any] | None = None, + metadata: _MetadataMapping | None = None, ) -> Field[_DataTypeT] | Field[Any]: ... def null() -> NullType: ... def bool_() -> BoolType: ... @@ -484,10 +505,7 @@ def dictionary( ordered: _Ordered | None = None, ) -> DictionaryType[_IndexT, _BasicValueT, _Ordered]: ... def struct( - fields: Iterable[ - Field[Any] | tuple[str, Field[Any] | None] | tuple[str, DataType | None] - ] - | Mapping[str, Field[Any] | DataType | None], + fields: _StructFieldsInput, ) -> StructType: ... def sparse_union( child_fields: list[Field[Any]], type_codes: list[int] | None = None @@ -520,11 +538,7 @@ def schema( | Iterable[tuple[str, DataType | str | None]] | Mapping[Any, DataType | str | None] ), - metadata: Mapping[bytes, bytes] - | Mapping[str, str] - | Mapping[bytes, str] - | Mapping[str, bytes] - | None = None, + metadata: _SchemaMetadataInput | None = None, ) -> Schema: ... def from_numpy_dtype(dtype: np.dtype[Any] | type | str) -> DataType: ... From 789934a7c08c176985d1688b0ac3a5a1f2c4e54e Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Mon, 16 Mar 2026 13:57:38 +0100 Subject: [PATCH 10/21] Decimals should be covariant --- python/pyarrow-stubs/pyarrow/_types.pyi | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/python/pyarrow-stubs/pyarrow/_types.pyi b/python/pyarrow-stubs/pyarrow/_types.pyi index acd558203949..02f93645a456 100644 --- a/python/pyarrow-stubs/pyarrow/_types.pyi +++ b/python/pyarrow-stubs/pyarrow/_types.pyi @@ -128,29 +128,31 @@ _FixedSizeBinaryAsPyType = TypeVar("_FixedSizeBinaryAsPyType", default=bytes) class FixedSizeBinaryType(_BasicDataType[_FixedSizeBinaryAsPyType]): ... -_Precision = TypeVar("_Precision", default=Any, covariant=True) -_Scale = TypeVar("_Scale", default=Any, covariant=True) +_Precision = TypeVar("_Precision", default=Any) +_Scale = TypeVar("_Scale", default=Any) +_Precision_co = TypeVar("_Precision_co", default=Any, covariant=True) +_Scale_co = TypeVar("_Scale_co", default=Any, covariant=True) -class _HasPrecisionScale(Protocol[_Precision, _Scale]): +class _HasPrecisionScale(Protocol[_Precision_co, _Scale_co]): @property - def precision(self) -> _Precision: ... + def precision(self) -> _Precision_co: ... @property - def scale(self) -> _Scale: ... + def scale(self) -> _Scale_co: ... class Decimal32Type( - FixedSizeBinaryType[Decimal], _HasPrecisionScale[_Precision, _Scale] + FixedSizeBinaryType[Decimal], _HasPrecisionScale[_Precision_co, _Scale_co] ): ... class Decimal64Type( - FixedSizeBinaryType[Decimal], _HasPrecisionScale[_Precision, _Scale] + FixedSizeBinaryType[Decimal], _HasPrecisionScale[_Precision_co, _Scale_co] ): ... class Decimal128Type( - FixedSizeBinaryType[Decimal], _HasPrecisionScale[_Precision, _Scale] + FixedSizeBinaryType[Decimal], _HasPrecisionScale[_Precision_co, _Scale_co] ): ... class Decimal256Type( - FixedSizeBinaryType[Decimal], _HasPrecisionScale[_Precision, _Scale] + FixedSizeBinaryType[Decimal], _HasPrecisionScale[_Precision_co, _Scale_co] ): ... class ListType(DataType, Generic[_DataTypeT_co]): From 772913798ff931a9cdf1c432b35d045e8dea1bb8 Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Tue, 17 Mar 2026 02:10:18 +0100 Subject: [PATCH 11/21] fix editable install collisions --- docs/source/developers/python/building.rst | 1 + python/CMakeLists.txt | 9 --------- 2 files changed, 1 insertion(+), 9 deletions(-) diff --git a/docs/source/developers/python/building.rst b/docs/source/developers/python/building.rst index 73a2482ecbfa..bb033281f9b9 100644 --- a/docs/source/developers/python/building.rst +++ b/docs/source/developers/python/building.rst @@ -204,6 +204,7 @@ Windows tab under the :ref:`pyarrow_build_section` section. $ export ARROW_HOME=$(pwd)/dist $ export LD_LIBRARY_PATH=$(pwd)/dist/lib:$LD_LIBRARY_PATH + $ export DYLD_LIBRARY_PATH=$(pwd)/dist/lib:$DYLD_LIBRARY_PATH $ export CMAKE_PREFIX_PATH=$ARROW_HOME:$CMAKE_PREFIX_PATH .. tab-item:: Windows diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index d0ddb9009f89..fe67bd615036 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -293,15 +293,6 @@ message(STATUS "Found Cython version: ${CYTHON_VERSION}") include(GNUInstallDirs) find_package(Arrow REQUIRED) -# When not bundling Arrow C++ libraries on macOS, add the Arrow library -# directory to the RPATH so that the extensions can find libarrow at runtime. -if(APPLE - AND NOT PYARROW_BUNDLE_ARROW_CPP - AND ARROW_SHARED_LIB) - get_filename_component(_arrow_lib_dir "${ARROW_SHARED_LIB}" DIRECTORY) - list(APPEND CMAKE_INSTALL_RPATH "${_arrow_lib_dir}") -endif() - macro(define_option name description arrow_option) set("PYARROW_${name}" "AUTO" From ddd48c063408b785deb472f07395d7ea3a7057c3 Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Tue, 17 Mar 2026 21:39:36 +0100 Subject: [PATCH 12/21] review feedback --- .../pyarrow-stubs/pyarrow/_stubs_typing.pyi | 20 ++-- python/pyarrow-stubs/pyarrow/_types.pyi | 96 +++++++++++++++---- 2 files changed, 86 insertions(+), 30 deletions(-) diff --git a/python/pyarrow-stubs/pyarrow/_stubs_typing.pyi b/python/pyarrow-stubs/pyarrow/_stubs_typing.pyi index 572bc7ba99f2..cfa5cb6e27ea 100644 --- a/python/pyarrow-stubs/pyarrow/_stubs_typing.pyi +++ b/python/pyarrow-stubs/pyarrow/_stubs_typing.pyi @@ -91,32 +91,32 @@ _V = TypeVar("_V", covariant=True) SingleOrList: TypeAlias = list[_T] | _T -class SupportsEq(Protocol): +class SupportsDunderEQ(Protocol): def __eq__(self, other: object, /) -> bool: ... -class SupportsLt(Protocol): +class SupportsDunderLT(Protocol): def __lt__(self, other: object, /) -> bool: ... -class SupportsGt(Protocol): +class SupportsDunderGT(Protocol): def __gt__(self, other: object, /) -> bool: ... -class SupportsLe(Protocol): +class SupportsDunderLE(Protocol): def __le__(self, other: object, /) -> bool: ... -class SupportsGe(Protocol): +class SupportsDunderGE(Protocol): def __ge__(self, other: object, /) -> bool: ... FilterTuple: TypeAlias = ( - tuple[str, Literal["=", "==", "!="], SupportsEq] - | tuple[str, Literal["<"], SupportsLt] - | tuple[str, Literal[">"], SupportsGt] - | tuple[str, Literal["<="], SupportsLe] - | tuple[str, Literal[">="], SupportsGe] + tuple[str, Literal["=", "==", "!="], SupportsDunderEQ] + | tuple[str, Literal["<"], SupportsDunderLT] + | tuple[str, Literal[">"], SupportsDunderGT] + | tuple[str, Literal["<="], SupportsDunderLE] + | tuple[str, Literal[">="], SupportsDunderGE] | tuple[str, Literal["in", "not in"], Collection] | tuple[str, str, Any] # Allow general str for operator to avoid type errors ) diff --git a/python/pyarrow-stubs/pyarrow/_types.pyi b/python/pyarrow-stubs/pyarrow/_types.pyi index 02f93645a456..fd4a9ea49448 100644 --- a/python/pyarrow-stubs/pyarrow/_types.pyi +++ b/python/pyarrow-stubs/pyarrow/_types.pyi @@ -42,7 +42,7 @@ class _Weakrefable: ... class _Metadata(_Weakrefable): ... class DataType(_Weakrefable): - def field(self, i: int) -> Field[Any]: ... + def field(self, i: int) -> Field[Any]: ... @property def id(self) -> int: ... @property @@ -56,9 +56,6 @@ class DataType(_Weakrefable): @property def has_variadic_buffers(self) -> bool: ... - # Properties that exist on specific subtypes but accessed generically - @property - def list_size(self) -> int: ... def __hash__(self) -> int: ... def equals( self, other: DataType | str, *, check_metadata: bool = False @@ -200,8 +197,9 @@ _IndexT = TypeVar( UInt64Type, Int64Type, ) -_BasicValueT = TypeVar("_BasicValueT", bound=_BasicDataType) -_ValueT = TypeVar("_ValueT", bound=DataType) +_BasicValueT = TypeVar("_BasicValueT", bound=_BasicDataType, default=_BasicDataType) +_ValueT = TypeVar("_ValueT", bound=DataType, default=DataType) +_K = TypeVar("_K", bound=DataType, default=DataType) _Ordered = TypeVar("_Ordered", Literal[True], Literal[False], default=Literal[False]) class DictionaryType(DataType, Generic[_IndexT, _BasicValueT, _Ordered]): @@ -212,8 +210,6 @@ class DictionaryType(DataType, Generic[_IndexT, _BasicValueT, _Ordered]): @property def value_type(self) -> _BasicValueT: ... -_K = TypeVar("_K", bound=DataType) - class MapType(DataType, Generic[_K, _ValueT, _Ordered]): @property def key_field(self) -> Field[_K]: ... @@ -317,7 +313,66 @@ _StrOrBytes: TypeAlias = str | bytes _MetadataMapping: TypeAlias = Mapping[_StrOrBytes, _StrOrBytes] _MetadataIterable: TypeAlias = Iterable[tuple[_StrOrBytes, _StrOrBytes]] _KeyValueMetadataInput: TypeAlias = _MetadataMapping | _MetadataIterable | None -_FieldTypeInput: TypeAlias = DataType | str | None +_DataTypeAlias: TypeAlias = Literal[ + "null", + "bool", + "boolean", + "i1", + "int8", + "i2", + "int16", + "i4", + "int32", + "i8", + "int64", + "u1", + "uint8", + "u2", + "uint16", + "u4", + "uint32", + "u8", + "uint64", + "f2", + "halffloat", + "float16", + "f4", + "float", + "float32", + "f8", + "double", + "float64", + "string", + "str", + "utf8", + "binary", + "large_string", + "large_str", + "large_utf8", + "large_binary", + "binary_view", + "string_view", + "date32", + "date64", + "date32[day]", + "date64[ms]", + "time32[s]", + "time32[ms]", + "time64[us]", + "time64[ns]", + "timestamp[s]", + "timestamp[ms]", + "timestamp[us]", + "timestamp[ns]", + "duration[s]", + "duration[ms]", + "duration[us]", + "duration[ns]", + "month_day_nano_interval", +] +_DataTypeAliasInput: TypeAlias = _DataTypeAlias | str +_DataTypeLike: TypeAlias = DataType | _DataTypeAliasInput +_FieldTypeInput: TypeAlias = _DataTypeLike | None _SchemaMetadataInput: TypeAlias = ( Mapping[bytes, bytes] | Mapping[str, str] @@ -371,7 +426,7 @@ _StructFieldTuple: TypeAlias = ( ) _StructFieldsInput: TypeAlias = ( Iterable[Field[Any] | _StructFieldTuple] - | Mapping[str, Field[Any] | DataType | str | None] + | Mapping[str, Field[Any] | _FieldTypeInput] ) class Schema(_Weakrefable): @@ -406,8 +461,8 @@ class Schema(_Weakrefable): def remove(self, i: int) -> Schema: ... def set(self, i: int, field: Field) -> Schema: ... @deprecated("Use 'with_metadata' instead") - def add_metadata(self, metadata: dict) -> Schema: ... - def with_metadata(self, metadata: dict) -> Schema: ... + def add_metadata(self, metadata: _MetadataMapping) -> Schema: ... + def with_metadata(self, metadata: _MetadataMapping) -> Schema: ... def serialize(self, memory_pool: MemoryPool | None = None) -> Buffer: ... def remove_metadata(self) -> Schema: ... def to_string( @@ -431,7 +486,7 @@ def unify_schemas( ) -> Schema: ... def field( name: SupportsArrowSchema | str | bytes, - type: _DataTypeT | str | None = None, + type: _DataTypeT | _DataTypeAliasInput | None = None, nullable: bool = True, metadata: _MetadataMapping | None = None, ) -> Field[_DataTypeT] | Field[Any]: ... @@ -497,13 +552,13 @@ def large_list_view( value_type: _DataTypeT | Field[_DataTypeT] | None = None, ) -> LargeListViewType[_DataTypeT]: ... def map_( - key_type: _K | Field | str | None = None, - item_type: _ValueT | Field | str | None = None, + key_type: _K | Field | _FieldTypeInput = None, + item_type: _ValueT | Field | _FieldTypeInput = None, keys_sorted: _Ordered | None = None, ) -> MapType[_K, _ValueT, _Ordered]: ... def dictionary( - index_type: _IndexT | str, - value_type: _BasicValueT | str, + index_type: _IndexT | _DataTypeAliasInput, + value_type: _BasicValueT | _DataTypeAliasInput, ordered: _Ordered | None = None, ) -> DictionaryType[_IndexT, _BasicValueT, _Ordered]: ... def struct( @@ -521,7 +576,8 @@ def union( type_codes: list[int] | None = None, ) -> SparseUnionType | DenseUnionType: ... def run_end_encoded( - run_end_type: _RunEndType | str | None, value_type: _BasicValueT | str | None + run_end_type: _RunEndType | _DataTypeAliasInput | None, + value_type: _BasicValueT | _DataTypeAliasInput | None, ) -> RunEndEncodedType[_RunEndType, _BasicValueT]: ... def json_(storage_type: DataType = ...) -> JsonType: ... def uuid() -> UuidType: ... @@ -537,8 +593,8 @@ def type_for_alias(name: Any) -> DataType: ... def schema( fields: ( Iterable[Field[Any]] - | Iterable[tuple[str, DataType | str | None]] - | Mapping[Any, DataType | str | None] + | Iterable[tuple[str, _FieldTypeInput]] + | Mapping[Any, _FieldTypeInput] ), metadata: _SchemaMetadataInput | None = None, ) -> Schema: ... From 6f2a03aeecf970e5eb68c3493de0f6a5689e8c8e Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Tue, 17 Mar 2026 22:38:22 +0100 Subject: [PATCH 13/21] move placeholder --- python/pyarrow-stubs/pyarrow/_types.pyi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/pyarrow-stubs/pyarrow/_types.pyi b/python/pyarrow-stubs/pyarrow/_types.pyi index fd4a9ea49448..a94f3cce32bd 100644 --- a/python/pyarrow-stubs/pyarrow/_types.pyi +++ b/python/pyarrow-stubs/pyarrow/_types.pyi @@ -27,16 +27,16 @@ import pandas as pd from typing_extensions import Self, TypeVar, deprecated from pyarrow._stubs_typing import SupportsArrowSchema, TimeUnit -from pyarrow.io import Buffer from pyarrow.lib import ( # noqa: F401 Array, + Buffer, ChunkedArray, ExtensionArray, + ExtensionScalar, MemoryPool, MonthDayNano, Table, ) -from pyarrow.scalar import ExtensionScalar class _Weakrefable: ... class _Metadata(_Weakrefable): ... From ed2c72b11ba52c14da338161efeb98e623717a91 Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Wed, 6 May 2026 17:52:37 +0200 Subject: [PATCH 14/21] post rebase --- docs/source/developers/python/building.rst | 1 - python/CMakeLists.txt | 9 +++++++++ python/pyarrow-stubs/pyarrow/__init__.pyi | 8 +++++--- 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/docs/source/developers/python/building.rst b/docs/source/developers/python/building.rst index bb033281f9b9..73a2482ecbfa 100644 --- a/docs/source/developers/python/building.rst +++ b/docs/source/developers/python/building.rst @@ -204,7 +204,6 @@ Windows tab under the :ref:`pyarrow_build_section` section. $ export ARROW_HOME=$(pwd)/dist $ export LD_LIBRARY_PATH=$(pwd)/dist/lib:$LD_LIBRARY_PATH - $ export DYLD_LIBRARY_PATH=$(pwd)/dist/lib:$DYLD_LIBRARY_PATH $ export CMAKE_PREFIX_PATH=$ARROW_HOME:$CMAKE_PREFIX_PATH .. tab-item:: Windows diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index fe67bd615036..d0ddb9009f89 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -293,6 +293,15 @@ message(STATUS "Found Cython version: ${CYTHON_VERSION}") include(GNUInstallDirs) find_package(Arrow REQUIRED) +# When not bundling Arrow C++ libraries on macOS, add the Arrow library +# directory to the RPATH so that the extensions can find libarrow at runtime. +if(APPLE + AND NOT PYARROW_BUNDLE_ARROW_CPP + AND ARROW_SHARED_LIB) + get_filename_component(_arrow_lib_dir "${ARROW_SHARED_LIB}" DIRECTORY) + list(APPEND CMAKE_INSTALL_RPATH "${_arrow_lib_dir}") +endif() + macro(define_option name description arrow_option) set("PYARROW_${name}" "AUTO" diff --git a/python/pyarrow-stubs/pyarrow/__init__.pyi b/python/pyarrow-stubs/pyarrow/__init__.pyi index 9c7120c7f807..ccec8d5abc07 100644 --- a/python/pyarrow-stubs/pyarrow/__init__.pyi +++ b/python/pyarrow-stubs/pyarrow/__init__.pyi @@ -15,9 +15,11 @@ # specific language governing permissions and limitations # under the License. -# Type stubs for PyArrow. -# This is a placeholder stub file. -# Complete type annotations will be added in subsequent PRs. +"""Type stubs for PyArrow. + +This is a placeholder stub file. +Complete type annotations will be added in subsequent PRs. +""" from typing import Any From f0272fef6ab6dada854077e957bdb25b781da599 Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Sun, 14 Jun 2026 16:16:48 +0200 Subject: [PATCH 15/21] Apply suggestions from code review Co-authored-by: Dan Redding <125183946+dangotbanned@users.noreply.github.com> --- .../pyarrow-stubs/pyarrow/_stubs_typing.pyi | 73 ++++++------- python/pyarrow-stubs/pyarrow/_types.pyi | 100 +++++++++--------- 2 files changed, 89 insertions(+), 84 deletions(-) diff --git a/python/pyarrow-stubs/pyarrow/_stubs_typing.pyi b/python/pyarrow-stubs/pyarrow/_stubs_typing.pyi index cfa5cb6e27ea..b7f22480e608 100644 --- a/python/pyarrow-stubs/pyarrow/_stubs_typing.pyi +++ b/python/pyarrow-stubs/pyarrow/_stubs_typing.pyi @@ -48,42 +48,43 @@ NullEncoding: TypeAlias = Literal["mask", "encode"] NullSelectionBehavior: TypeAlias = Literal["drop", "emit_null"] TimeUnit: TypeAlias = Literal["s", "ms", "us", "ns"] -IntegerType: TypeAlias = ( - lib.Int8Type - | lib.Int16Type - | lib.Int32Type - | lib.Int64Type - | lib.UInt8Type - | lib.UInt16Type - | lib.UInt32Type - | lib.UInt64Type -) - -Mask: TypeAlias = ( - Sequence[bool | None] - | NDArray[np.bool_] - | lib.Array[lib.Scalar[lib.BoolType]] - | ChunkedArray[Any] -) -Indices: TypeAlias = ( - Sequence[int | None] - | NDArray[np.integer[Any]] - | lib.Array[lib.Scalar[IntegerType]] - | ChunkedArray[Any] -) - -PyScalar: TypeAlias = ( - bool - | int - | float - | Decimal - | str - | bytes - | dt.date - | dt.datetime - | dt.time - | dt.timedelta -) +IntegerType: TypeAlias = ( + lib.Int8Type + | lib.Int16Type + | lib.Int32Type + | lib.Int64Type + | lib.UInt8Type + | lib.UInt16Type + | lib.UInt32Type + | lib.UInt64Type +) +PyScalar: TypeAlias = ( + bool + | int + | float + | Decimal + | str + | bytes + | dt.date + | dt.datetime + | dt.time + | dt.timedelta +) +NumpyScalar: TypeAlias = "np.generic[Any]" + +PyScalarT_co = TypeVar("PyScalarT_co", bound=PyScalar, covariant=True) +NumpyScalarT_co = TypeVar("NumpyScalarT_co", bound=NumpyScalar, covariant=True) +DataTypeT_co = TypeVar("DataTypeT_co", bound=lib.DataType, covariant=True) + +IntoArray: TypeAlias = ( + Sequence[PyScalarT_co | None] + | NDArray[NumpyScalarT_co] + | lib.Array[lib.Scalar[DataTypeT_co]] + | ChunkedArray[Any] +) + +Mask: TypeAlias = IntoArray[bool, np.bool_, lib.BoolType] +Indices: TypeAlias = IntoArray[int, np.integer[Any], IntegerType] _T = TypeVar("_T") _V = TypeVar("_V", covariant=True) diff --git a/python/pyarrow-stubs/pyarrow/_types.pyi b/python/pyarrow-stubs/pyarrow/_types.pyi index a94f3cce32bd..69ff034d99b9 100644 --- a/python/pyarrow-stubs/pyarrow/_types.pyi +++ b/python/pyarrow-stubs/pyarrow/_types.pyi @@ -197,7 +197,9 @@ _IndexT = TypeVar( UInt64Type, Int64Type, ) -_BasicValueT = TypeVar("_BasicValueT", bound=_BasicDataType, default=_BasicDataType) +_BasicValueT = TypeVar( + "_BasicValueT", bound=_BasicDataType[Any], default=_BasicDataType[Any] +) _ValueT = TypeVar("_ValueT", bound=DataType, default=DataType) _K = TypeVar("_K", bound=DataType, default=DataType) _Ordered = TypeVar("_Ordered", Literal[True], Literal[False], default=Literal[False]) @@ -226,16 +228,16 @@ _Size = TypeVar("_Size", default=int) class StructType(DataType): def get_field_index(self, name: str) -> int: ... - def field(self, i: int | str) -> Field: ... + def field(self, i: int | str) -> Field[Any]: ... def get_all_field_indices(self, name: str) -> list[int]: ... def __len__(self) -> int: ... - def __iter__(self) -> Iterator[Field]: ... + def __iter__(self) -> Iterator[Field[Any]]: ... __getitem__ = field @property def names(self) -> list[str]: ... @property - def fields(self) -> list[Field]: ... + def fields(self) -> list[Field[Any]]: ... class UnionType(DataType): @property @@ -243,8 +245,8 @@ class UnionType(DataType): @property def type_codes(self) -> list[int]: ... def __len__(self) -> int: ... - def __iter__(self) -> Iterator[Field]: ... - def field(self, i: int) -> Field: ... + def __iter__(self) -> Iterator[Field[Any]]: ... + def field(self, i: int) -> Field[Any]: ... __getitem__ = field @@ -264,10 +266,10 @@ class RunEndEncodedType(DataType, Generic[_RunEndType, _BasicValueT]): @property def value_type(self) -> _BasicValueT: ... -_StorageT = TypeVar("_StorageT", bound=Array | ChunkedArray) +_StorageT = TypeVar("_StorageT", bound=Array[Any] | ChunkedArray[Any]) class BaseExtensionType(DataType): - def __arrow_ext_class__(self) -> type[ExtensionArray]: ... + def __arrow_ext_class__(self) -> type[ExtensionArray[Any]]: ... def __arrow_ext_scalar_class__(self) -> type[ExtensionScalar]: ... @property def extension_name(self) -> str: ... @@ -310,7 +312,9 @@ def register_extension_type(ext_type: ExtensionType) -> None: ... def unregister_extension_type(type_name: str) -> None: ... _StrOrBytes: TypeAlias = str | bytes -_MetadataMapping: TypeAlias = Mapping[_StrOrBytes, _StrOrBytes] +_MetadataMapping: TypeAlias = ( + Mapping[bytes, bytes] | Mapping[str, str] | Mapping[bytes, str] | Mapping[str, bytes] +) _MetadataIterable: TypeAlias = Iterable[tuple[_StrOrBytes, _StrOrBytes]] _KeyValueMetadataInput: TypeAlias = _MetadataMapping | _MetadataIterable | None _DataTypeAlias: TypeAlias = Literal[ @@ -373,12 +377,6 @@ _DataTypeAlias: TypeAlias = Literal[ _DataTypeAliasInput: TypeAlias = _DataTypeAlias | str _DataTypeLike: TypeAlias = DataType | _DataTypeAliasInput _FieldTypeInput: TypeAlias = _DataTypeLike | None -_SchemaMetadataInput: TypeAlias = ( - Mapping[bytes, bytes] - | Mapping[str, str] - | Mapping[bytes, str] - | Mapping[str, bytes] -) class KeyValueMetadata(_Metadata, Mapping[bytes, bytes]): def __init__( @@ -395,7 +393,7 @@ class KeyValueMetadata(_Metadata, Mapping[bytes, bytes]): def to_dict(self) -> dict[bytes, bytes]: ... class Field(_Weakrefable, Generic[_DataTypeT_co]): - def equals(self, other: Field, check_metadata: bool = False) -> bool: ... + def equals(self, other: Field[Any], check_metadata: bool = False) -> bool: ... def __hash__(self) -> int: ... @property def nullable(self) -> bool: ... @@ -410,16 +408,16 @@ class Field(_Weakrefable, Generic[_DataTypeT_co]): metadata: _MetadataMapping | Any, ) -> Self: ... def remove_metadata(self) -> Self: ... - def with_type(self, new_type: DataType) -> Field: ... + def with_type(self, new_type: _DataTypeT) -> Field[_DataTypeT]: ... def with_name(self, name: str) -> Self: ... def with_nullable(self, nullable: bool) -> Field[_DataTypeT_co]: ... - def flatten(self) -> list[Field]: ... + def flatten(self) -> list[Field[Any]]: ... def _export_to_c(self, out_ptr: int) -> None: ... @classmethod def _import_from_c(cls, in_ptr: int) -> Self: ... def __arrow_c_schema__(self) -> Any: ... @classmethod - def _import_from_c_capsule(cls, schema) -> Self: ... + def _import_from_c_capsule(cls, schema: Any) -> Self: ... _StructFieldTuple: TypeAlias = ( tuple[str, Field[Any] | None] | tuple[str, _FieldTypeInput] @@ -431,14 +429,14 @@ _StructFieldsInput: TypeAlias = ( class Schema(_Weakrefable): def __len__(self) -> int: ... - def __getitem__(self, key: str | int) -> Field: ... + def __getitem__(self, key: str | int) -> Field[Any]: ... _field = __getitem__ - def __iter__(self) -> Iterator[Field]: ... + def __iter__(self) -> Iterator[Field[Any]]: ... def __hash__(self) -> int: ... def __sizeof__(self) -> int: ... @property - def pandas_metadata(self) -> dict: ... + def pandas_metadata(self) -> dict[bytes, bytes]: ... @property def names(self) -> list[str]: ... @property @@ -451,15 +449,15 @@ class Schema(_Weakrefable): def from_pandas( cls, df: pd.DataFrame, preserve_index: bool | None = None ) -> Schema: ... - def field(self, i: int | str | bytes) -> Field: ... - @deprecated("Use 'field' instead") - def field_by_name(self, name: str) -> Field: ... + def field(self, i: int | str | bytes) -> Field[Any]: ... + @deprecated("Use 'field' instead") + def field_by_name(self, name: str) -> Field[Any]: ... def get_field_index(self, name: str) -> int: ... def get_all_field_indices(self, name: str) -> list[int]: ... - def append(self, field: Field) -> Schema: ... - def insert(self, i: int, field: Field) -> Schema: ... - def remove(self, i: int) -> Schema: ... - def set(self, i: int, field: Field) -> Schema: ... + def append(self, field: Field[Any]) -> Schema: ... + def insert(self, i: int, field: Field[Any]) -> Schema: ... + def remove(self, i: int) -> Schema: ... + def set(self, i: int, field: Field[Any]) -> Schema: ... @deprecated("Use 'with_metadata' instead") def add_metadata(self, metadata: _MetadataMapping) -> Schema: ... def with_metadata(self, metadata: _MetadataMapping) -> Schema: ... @@ -538,24 +536,30 @@ large_utf8 = large_string def binary_view() -> BinaryViewType: ... def string_view() -> StringViewType: ... -def list_( - value_type: _DataTypeT | Field[_DataTypeT] | None = None, - list_size: Literal[-1] | _Size | None = None, -) -> ListType[_DataTypeT] | FixedSizeListType[_DataTypeT, _Size]: ... -def large_list( - value_type: _DataTypeT | Field[_DataTypeT] | None = None, -) -> LargeListType[_DataTypeT]: ... -def list_view( - value_type: _DataTypeT | Field[_DataTypeT] | None = None, -) -> ListViewType[_DataTypeT]: ... -def large_list_view( - value_type: _DataTypeT | Field[_DataTypeT] | None = None, -) -> LargeListViewType[_DataTypeT]: ... -def map_( - key_type: _K | Field | _FieldTypeInput = None, - item_type: _ValueT | Field | _FieldTypeInput = None, - keys_sorted: _Ordered | None = None, -) -> MapType[_K, _ValueT, _Ordered]: ... +from typing import overload + +@overload +def list_( + value_type: _DataTypeT | Field[_DataTypeT], list_size: Literal[-1] | None = None +) -> ListType[_DataTypeT]: ... +@overload +def list_( + value_type: _DataTypeT | Field[_DataTypeT], list_size: _Size +) -> FixedSizeListType[_DataTypeT, _Size]: ... +def large_list( + value_type: _DataTypeT | Field[_DataTypeT], +) -> LargeListType[_DataTypeT]: ... +def list_view( + value_type: _DataTypeT | Field[_DataTypeT], +) -> ListViewType[_DataTypeT]: ... +def large_list_view( + value_type: _DataTypeT | Field[_DataTypeT], +) -> LargeListViewType[_DataTypeT]: ... +def map_( + key_type: _K | Field[_K] | _FieldTypeInput, + item_type: _ValueT | Field[_ValueT] | _FieldTypeInput, + keys_sorted: _Ordered | None = None, +) -> MapType[_K, _ValueT, _Ordered]: ... def dictionary( index_type: _IndexT | _DataTypeAliasInput, value_type: _BasicValueT | _DataTypeAliasInput, @@ -596,7 +600,7 @@ def schema( | Iterable[tuple[str, _FieldTypeInput]] | Mapping[Any, _FieldTypeInput] ), - metadata: _SchemaMetadataInput | None = None, + metadata: _MetadataMapping | None = None, ) -> Schema: ... def from_numpy_dtype(dtype: np.dtype[Any] | type | str) -> DataType: ... From 50077b6c4f6cea409ec024c70ffc636af70489b3 Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Sun, 14 Jun 2026 16:42:49 +0200 Subject: [PATCH 16/21] Update python/pyarrow-stubs/pyarrow/_types.pyi Co-authored-by: Dan Redding <125183946+dangotbanned@users.noreply.github.com> --- python/pyarrow-stubs/pyarrow/_types.pyi | 1350 +++++++++++------------ 1 file changed, 675 insertions(+), 675 deletions(-) diff --git a/python/pyarrow-stubs/pyarrow/_types.pyi b/python/pyarrow-stubs/pyarrow/_types.pyi index 69ff034d99b9..efc3bd847ca5 100644 --- a/python/pyarrow-stubs/pyarrow/_types.pyi +++ b/python/pyarrow-stubs/pyarrow/_types.pyi @@ -1,541 +1,541 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -import datetime as dt # noqa: F401 - -from collections.abc import Iterable, Iterator, Mapping, Sequence -from decimal import Decimal # noqa: F401 -from typing import Any, Generic, Literal, Protocol, TypeAlias - -import numpy as np -import pandas as pd - -from typing_extensions import Self, TypeVar, deprecated - -from pyarrow._stubs_typing import SupportsArrowSchema, TimeUnit -from pyarrow.lib import ( # noqa: F401 - Array, - Buffer, - ChunkedArray, - ExtensionArray, - ExtensionScalar, - MemoryPool, - MonthDayNano, - Table, -) - -class _Weakrefable: ... -class _Metadata(_Weakrefable): ... - -class DataType(_Weakrefable): - def field(self, i: int) -> Field[Any]: ... - @property - def id(self) -> int: ... - @property - def bit_width(self) -> int: ... - @property - def byte_width(self) -> int: ... - @property - def num_fields(self) -> int: ... - @property - def num_buffers(self) -> int: ... - @property - def has_variadic_buffers(self) -> bool: ... - - def __hash__(self) -> int: ... - def equals( - self, other: DataType | str, *, check_metadata: bool = False - ) -> bool: ... - def to_pandas_dtype(self) -> np.generic: ... - def _export_to_c(self, out_ptr: int) -> None: ... - @classmethod - def _import_from_c(cls, in_ptr: int) -> Self: ... - def __arrow_c_schema__(self) -> Any: ... - @classmethod - def _import_from_c_capsule(cls, schema) -> Self: ... - -_AsPyType = TypeVar("_AsPyType") -_DataTypeT = TypeVar("_DataTypeT", bound=DataType) -_DataTypeT_co = TypeVar("_DataTypeT_co", bound=DataType, covariant=True) - -class _BasicDataType(DataType, Generic[_AsPyType]): ... -class NullType(_BasicDataType[None]): ... -class BoolType(_BasicDataType[bool]): ... -class UInt8Type(_BasicDataType[int]): ... -class Int8Type(_BasicDataType[int]): ... -class UInt16Type(_BasicDataType[int]): ... -class Int16Type(_BasicDataType[int]): ... -class UInt32Type(_BasicDataType[int]): ... -class Int32Type(_BasicDataType[int]): ... -class UInt64Type(_BasicDataType[int]): ... -class Int64Type(_BasicDataType[int]): ... -class Float16Type(_BasicDataType[float]): ... -class Float32Type(_BasicDataType[float]): ... -class Float64Type(_BasicDataType[float]): ... -class Date32Type(_BasicDataType[dt.date]): ... -class Date64Type(_BasicDataType[dt.date]): ... -class MonthDayNanoIntervalType(_BasicDataType[MonthDayNano]): ... -class StringType(_BasicDataType[str]): ... -class LargeStringType(_BasicDataType[str]): ... -class StringViewType(_BasicDataType[str]): ... -class BinaryType(_BasicDataType[bytes]): ... -class LargeBinaryType(_BasicDataType[bytes]): ... -class BinaryViewType(_BasicDataType[bytes]): ... - -_Unit = TypeVar("_Unit", bound=TimeUnit, default=Literal["us"]) -_Tz = TypeVar("_Tz", str, None, default=None) - -class TimestampType(_BasicDataType[int], Generic[_Unit, _Tz]): - @property - def unit(self) -> _Unit: ... - @property - def tz(self) -> _Tz: ... - -_Time32Unit = TypeVar("_Time32Unit", bound=Literal["s", "ms"]) - -class Time32Type(_BasicDataType[dt.time], Generic[_Time32Unit]): - @property - def unit(self) -> _Time32Unit: ... - -_Time64Unit = TypeVar("_Time64Unit", bound=Literal["us", "ns"]) - -class Time64Type(_BasicDataType[dt.time], Generic[_Time64Unit]): - @property - def unit(self) -> _Time64Unit: ... - -class DurationType(_BasicDataType[dt.timedelta], Generic[_Unit]): - @property - def unit(self) -> _Unit: ... - -_FixedSizeBinaryAsPyType = TypeVar("_FixedSizeBinaryAsPyType", default=bytes) - -class FixedSizeBinaryType(_BasicDataType[_FixedSizeBinaryAsPyType]): ... - -_Precision = TypeVar("_Precision", default=Any) -_Scale = TypeVar("_Scale", default=Any) -_Precision_co = TypeVar("_Precision_co", default=Any, covariant=True) -_Scale_co = TypeVar("_Scale_co", default=Any, covariant=True) - -class _HasPrecisionScale(Protocol[_Precision_co, _Scale_co]): - @property - def precision(self) -> _Precision_co: ... - @property - def scale(self) -> _Scale_co: ... - -class Decimal32Type( - FixedSizeBinaryType[Decimal], _HasPrecisionScale[_Precision_co, _Scale_co] -): ... - -class Decimal64Type( - FixedSizeBinaryType[Decimal], _HasPrecisionScale[_Precision_co, _Scale_co] -): ... - -class Decimal128Type( - FixedSizeBinaryType[Decimal], _HasPrecisionScale[_Precision_co, _Scale_co] -): ... - -class Decimal256Type( - FixedSizeBinaryType[Decimal], _HasPrecisionScale[_Precision_co, _Scale_co] -): ... - -class ListType(DataType, Generic[_DataTypeT_co]): - @property - def value_field(self) -> Field[_DataTypeT_co]: ... - @property - def value_type(self) -> _DataTypeT_co: ... - -class LargeListType(DataType, Generic[_DataTypeT_co]): - @property - def value_field(self) -> Field[_DataTypeT_co]: ... - @property - def value_type(self) -> _DataTypeT_co: ... - -class ListViewType(DataType, Generic[_DataTypeT_co]): - @property - def value_field(self) -> Field[_DataTypeT_co]: ... - @property - def value_type(self) -> _DataTypeT_co: ... - -class LargeListViewType(DataType, Generic[_DataTypeT_co]): - @property - def value_field(self) -> Field[_DataTypeT_co]: ... - @property - def value_type(self) -> _DataTypeT_co: ... - -class FixedSizeListType(DataType, Generic[_DataTypeT_co, _Size]): - @property - def value_field(self) -> Field[_DataTypeT_co]: ... - @property - def value_type(self) -> _DataTypeT_co: ... - @property - def list_size(self) -> int: ... - -class DictionaryMemo(_Weakrefable): ... - -_IndexT = TypeVar( - "_IndexT", - UInt8Type, - Int8Type, - UInt16Type, - Int16Type, - UInt32Type, - Int32Type, - UInt64Type, - Int64Type, -) +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import datetime as dt # noqa: F401 + +from collections.abc import Iterable, Iterator, Mapping, Sequence +from decimal import Decimal # noqa: F401 +from typing import Any, Generic, Literal, Protocol, TypeAlias + +import numpy as np +import pandas as pd + +from typing_extensions import Self, TypeVar, deprecated + +from pyarrow._stubs_typing import SupportsArrowSchema, TimeUnit +from pyarrow.lib import ( # noqa: F401 + Array, + Buffer, + ChunkedArray, + ExtensionArray, + ExtensionScalar, + MemoryPool, + MonthDayNano, + Table, +) + +class _Weakrefable: ... +class _Metadata(_Weakrefable): ... + +class DataType(_Weakrefable): + def field(self, i: int) -> Field[Any]: ... + @property + def id(self) -> int: ... + @property + def bit_width(self) -> int: ... + @property + def byte_width(self) -> int: ... + @property + def num_fields(self) -> int: ... + @property + def num_buffers(self) -> int: ... + @property + def has_variadic_buffers(self) -> bool: ... + + def __hash__(self) -> int: ... + def equals( + self, other: DataType | str, *, check_metadata: bool = False + ) -> bool: ... + def to_pandas_dtype(self) -> np.generic: ... + def _export_to_c(self, out_ptr: int) -> None: ... + @classmethod + def _import_from_c(cls, in_ptr: int) -> Self: ... + def __arrow_c_schema__(self) -> Any: ... + @classmethod + def _import_from_c_capsule(cls, schema: Any) -> Self: ... + +_AsPyType = TypeVar("_AsPyType") +_DataTypeT = TypeVar("_DataTypeT", bound=DataType) +_DataTypeT_co = TypeVar("_DataTypeT_co", bound=DataType, covariant=True) + +class _BasicDataType(DataType, Generic[_AsPyType]): ... +class NullType(_BasicDataType[None]): ... +class BoolType(_BasicDataType[bool]): ... +class UInt8Type(_BasicDataType[int]): ... +class Int8Type(_BasicDataType[int]): ... +class UInt16Type(_BasicDataType[int]): ... +class Int16Type(_BasicDataType[int]): ... +class UInt32Type(_BasicDataType[int]): ... +class Int32Type(_BasicDataType[int]): ... +class UInt64Type(_BasicDataType[int]): ... +class Int64Type(_BasicDataType[int]): ... +class Float16Type(_BasicDataType[float]): ... +class Float32Type(_BasicDataType[float]): ... +class Float64Type(_BasicDataType[float]): ... +class Date32Type(_BasicDataType[dt.date]): ... +class Date64Type(_BasicDataType[dt.date]): ... +class MonthDayNanoIntervalType(_BasicDataType[MonthDayNano]): ... +class StringType(_BasicDataType[str]): ... +class LargeStringType(_BasicDataType[str]): ... +class StringViewType(_BasicDataType[str]): ... +class BinaryType(_BasicDataType[bytes]): ... +class LargeBinaryType(_BasicDataType[bytes]): ... +class BinaryViewType(_BasicDataType[bytes]): ... + +_Unit = TypeVar("_Unit", bound=TimeUnit, default=Literal["us"]) +_Tz = TypeVar("_Tz", str, None, default=None) + +class TimestampType(_BasicDataType[int], Generic[_Unit, _Tz]): + @property + def unit(self) -> _Unit: ... + @property + def tz(self) -> _Tz: ... + +_Time32Unit = TypeVar("_Time32Unit", bound=Literal["s", "ms"]) + +class Time32Type(_BasicDataType[dt.time], Generic[_Time32Unit]): + @property + def unit(self) -> _Time32Unit: ... + +_Time64Unit = TypeVar("_Time64Unit", bound=Literal["us", "ns"]) + +class Time64Type(_BasicDataType[dt.time], Generic[_Time64Unit]): + @property + def unit(self) -> _Time64Unit: ... + +class DurationType(_BasicDataType[dt.timedelta], Generic[_Unit]): + @property + def unit(self) -> _Unit: ... + +_FixedSizeBinaryAsPyType = TypeVar("_FixedSizeBinaryAsPyType", default=bytes) + +class FixedSizeBinaryType(_BasicDataType[_FixedSizeBinaryAsPyType]): ... + +_Precision = TypeVar("_Precision", default=Any) +_Scale = TypeVar("_Scale", default=Any) +_Precision_co = TypeVar("_Precision_co", default=Any, covariant=True) +_Scale_co = TypeVar("_Scale_co", default=Any, covariant=True) + +class _HasPrecisionScale(Protocol[_Precision_co, _Scale_co]): + @property + def precision(self) -> _Precision_co: ... + @property + def scale(self) -> _Scale_co: ... + +class Decimal32Type( + FixedSizeBinaryType[Decimal], _HasPrecisionScale[_Precision_co, _Scale_co] +): ... + +class Decimal64Type( + FixedSizeBinaryType[Decimal], _HasPrecisionScale[_Precision_co, _Scale_co] +): ... + +class Decimal128Type( + FixedSizeBinaryType[Decimal], _HasPrecisionScale[_Precision_co, _Scale_co] +): ... + +class Decimal256Type( + FixedSizeBinaryType[Decimal], _HasPrecisionScale[_Precision_co, _Scale_co] +): ... + +class ListType(DataType, Generic[_DataTypeT_co]): + @property + def value_field(self) -> Field[_DataTypeT_co]: ... + @property + def value_type(self) -> _DataTypeT_co: ... + +class LargeListType(DataType, Generic[_DataTypeT_co]): + @property + def value_field(self) -> Field[_DataTypeT_co]: ... + @property + def value_type(self) -> _DataTypeT_co: ... + +class ListViewType(DataType, Generic[_DataTypeT_co]): + @property + def value_field(self) -> Field[_DataTypeT_co]: ... + @property + def value_type(self) -> _DataTypeT_co: ... + +class LargeListViewType(DataType, Generic[_DataTypeT_co]): + @property + def value_field(self) -> Field[_DataTypeT_co]: ... + @property + def value_type(self) -> _DataTypeT_co: ... + +class FixedSizeListType(DataType, Generic[_DataTypeT_co, _Size]): + @property + def value_field(self) -> Field[_DataTypeT_co]: ... + @property + def value_type(self) -> _DataTypeT_co: ... + @property + def list_size(self) -> int: ... + +class DictionaryMemo(_Weakrefable): ... + +_IndexT = TypeVar( + "_IndexT", + UInt8Type, + Int8Type, + UInt16Type, + Int16Type, + UInt32Type, + Int32Type, + UInt64Type, + Int64Type, +) _BasicValueT = TypeVar( "_BasicValueT", bound=_BasicDataType[Any], default=_BasicDataType[Any] ) -_ValueT = TypeVar("_ValueT", bound=DataType, default=DataType) -_K = TypeVar("_K", bound=DataType, default=DataType) -_Ordered = TypeVar("_Ordered", Literal[True], Literal[False], default=Literal[False]) - -class DictionaryType(DataType, Generic[_IndexT, _BasicValueT, _Ordered]): - @property - def ordered(self) -> _Ordered: ... - @property - def index_type(self) -> _IndexT: ... - @property - def value_type(self) -> _BasicValueT: ... - -class MapType(DataType, Generic[_K, _ValueT, _Ordered]): - @property - def key_field(self) -> Field[_K]: ... - @property - def key_type(self) -> _K: ... - @property - def item_field(self) -> Field[_ValueT]: ... - @property - def item_type(self) -> _ValueT: ... - @property - def keys_sorted(self) -> _Ordered: ... - -_Size = TypeVar("_Size", default=int) - -class StructType(DataType): - def get_field_index(self, name: str) -> int: ... +_ValueT = TypeVar("_ValueT", bound=DataType, default=DataType) +_K = TypeVar("_K", bound=DataType, default=DataType) +_Ordered = TypeVar("_Ordered", Literal[True], Literal[False], default=Literal[False]) + +class DictionaryType(DataType, Generic[_IndexT, _BasicValueT, _Ordered]): + @property + def ordered(self) -> _Ordered: ... + @property + def index_type(self) -> _IndexT: ... + @property + def value_type(self) -> _BasicValueT: ... + +class MapType(DataType, Generic[_K, _ValueT, _Ordered]): + @property + def key_field(self) -> Field[_K]: ... + @property + def key_type(self) -> _K: ... + @property + def item_field(self) -> Field[_ValueT]: ... + @property + def item_type(self) -> _ValueT: ... + @property + def keys_sorted(self) -> _Ordered: ... + +_Size = TypeVar("_Size", default=int) + +class StructType(DataType): + def get_field_index(self, name: str) -> int: ... def field(self, i: int | str) -> Field[Any]: ... - def get_all_field_indices(self, name: str) -> list[int]: ... - def __len__(self) -> int: ... + def get_all_field_indices(self, name: str) -> list[int]: ... + def __len__(self) -> int: ... def __iter__(self) -> Iterator[Field[Any]]: ... - - __getitem__ = field - @property - def names(self) -> list[str]: ... - @property + + __getitem__ = field + @property + def names(self) -> list[str]: ... + @property def fields(self) -> list[Field[Any]]: ... - -class UnionType(DataType): - @property - def mode(self) -> Literal["sparse", "dense"]: ... - @property - def type_codes(self) -> list[int]: ... - def __len__(self) -> int: ... + +class UnionType(DataType): + @property + def mode(self) -> Literal["sparse", "dense"]: ... + @property + def type_codes(self) -> list[int]: ... + def __len__(self) -> int: ... def __iter__(self) -> Iterator[Field[Any]]: ... def field(self, i: int) -> Field[Any]: ... - - __getitem__ = field - -class SparseUnionType(UnionType): - @property - def mode(self) -> Literal["sparse"]: ... - -class DenseUnionType(UnionType): - @property - def mode(self) -> Literal["dense"]: ... - -_RunEndType = TypeVar("_RunEndType", Int16Type, Int32Type, Int64Type) - -class RunEndEncodedType(DataType, Generic[_RunEndType, _BasicValueT]): - @property - def run_end_type(self) -> _RunEndType: ... - @property - def value_type(self) -> _BasicValueT: ... - + + __getitem__ = field + +class SparseUnionType(UnionType): + @property + def mode(self) -> Literal["sparse"]: ... + +class DenseUnionType(UnionType): + @property + def mode(self) -> Literal["dense"]: ... + +_RunEndType = TypeVar("_RunEndType", Int16Type, Int32Type, Int64Type) + +class RunEndEncodedType(DataType, Generic[_RunEndType, _BasicValueT]): + @property + def run_end_type(self) -> _RunEndType: ... + @property + def value_type(self) -> _BasicValueT: ... + _StorageT = TypeVar("_StorageT", bound=Array[Any] | ChunkedArray[Any]) - -class BaseExtensionType(DataType): + +class BaseExtensionType(DataType): def __arrow_ext_class__(self) -> type[ExtensionArray[Any]]: ... - def __arrow_ext_scalar_class__(self) -> type[ExtensionScalar]: ... - @property - def extension_name(self) -> str: ... - @property - def storage_type(self) -> DataType: ... - def wrap_array(self, storage: _StorageT) -> _StorageT: ... - -class ExtensionType(BaseExtensionType): - def __init__(self, storage_type: DataType, extension_name: str) -> None: ... - def __arrow_ext_serialize__(self) -> bytes: ... - @classmethod - def __arrow_ext_deserialize__( - cls, storage_type: DataType, serialized: bytes - ) -> Self: ... - -class FixedShapeTensorType(BaseExtensionType, Generic[_ValueT]): - @property - def value_type(self) -> _ValueT: ... - @property - def shape(self) -> list[int]: ... - @property - def dim_names(self) -> list[str] | None: ... - @property - def permutation(self) -> list[int] | None: ... - -class Bool8Type(BaseExtensionType): ... -class UuidType(BaseExtensionType): ... -class JsonType(BaseExtensionType): ... - -class OpaqueType(BaseExtensionType): - @property - def type_name(self) -> str: ... - @property - def vendor_name(self) -> str: ... - -class UnknownExtensionType(ExtensionType): - def __init__(self, storage_type: DataType, serialized: bytes) -> None: ... - -def register_extension_type(ext_type: ExtensionType) -> None: ... -def unregister_extension_type(type_name: str) -> None: ... - -_StrOrBytes: TypeAlias = str | bytes + def __arrow_ext_scalar_class__(self) -> type[ExtensionScalar]: ... + @property + def extension_name(self) -> str: ... + @property + def storage_type(self) -> DataType: ... + def wrap_array(self, storage: _StorageT) -> _StorageT: ... + +class ExtensionType(BaseExtensionType): + def __init__(self, storage_type: DataType, extension_name: str) -> None: ... + def __arrow_ext_serialize__(self) -> bytes: ... + @classmethod + def __arrow_ext_deserialize__( + cls, storage_type: DataType, serialized: bytes + ) -> Self: ... + +class FixedShapeTensorType(BaseExtensionType, Generic[_ValueT]): + @property + def value_type(self) -> _ValueT: ... + @property + def shape(self) -> list[int]: ... + @property + def dim_names(self) -> list[str] | None: ... + @property + def permutation(self) -> list[int] | None: ... + +class Bool8Type(BaseExtensionType): ... +class UuidType(BaseExtensionType): ... +class JsonType(BaseExtensionType): ... + +class OpaqueType(BaseExtensionType): + @property + def type_name(self) -> str: ... + @property + def vendor_name(self) -> str: ... + +class UnknownExtensionType(ExtensionType): + def __init__(self, storage_type: DataType, serialized: bytes) -> None: ... + +def register_extension_type(ext_type: ExtensionType) -> None: ... +def unregister_extension_type(type_name: str) -> None: ... + +_StrOrBytes: TypeAlias = str | bytes _MetadataMapping: TypeAlias = ( Mapping[bytes, bytes] | Mapping[str, str] | Mapping[bytes, str] | Mapping[str, bytes] ) -_MetadataIterable: TypeAlias = Iterable[tuple[_StrOrBytes, _StrOrBytes]] -_KeyValueMetadataInput: TypeAlias = _MetadataMapping | _MetadataIterable | None -_DataTypeAlias: TypeAlias = Literal[ - "null", - "bool", - "boolean", - "i1", - "int8", - "i2", - "int16", - "i4", - "int32", - "i8", - "int64", - "u1", - "uint8", - "u2", - "uint16", - "u4", - "uint32", - "u8", - "uint64", - "f2", - "halffloat", - "float16", - "f4", - "float", - "float32", - "f8", - "double", - "float64", - "string", - "str", - "utf8", - "binary", - "large_string", - "large_str", - "large_utf8", - "large_binary", - "binary_view", - "string_view", - "date32", - "date64", - "date32[day]", - "date64[ms]", - "time32[s]", - "time32[ms]", - "time64[us]", - "time64[ns]", - "timestamp[s]", - "timestamp[ms]", - "timestamp[us]", - "timestamp[ns]", - "duration[s]", - "duration[ms]", - "duration[us]", - "duration[ns]", - "month_day_nano_interval", -] -_DataTypeAliasInput: TypeAlias = _DataTypeAlias | str -_DataTypeLike: TypeAlias = DataType | _DataTypeAliasInput -_FieldTypeInput: TypeAlias = _DataTypeLike | None - -class KeyValueMetadata(_Metadata, Mapping[bytes, bytes]): - def __init__( - self, - __arg0__: _KeyValueMetadataInput | KeyValueMetadata = None, - **kwargs: str, - ) -> None: ... - def equals(self, other: KeyValueMetadata) -> bool: ... - def __len__(self) -> int: ... - def __contains__(self, /, __key: object) -> bool: ... - def __getitem__(self, /, __key: Any) -> Any: ... - def __iter__(self) -> Iterator[bytes]: ... - def get_all(self, key: str) -> list[bytes]: ... - def to_dict(self) -> dict[bytes, bytes]: ... - -class Field(_Weakrefable, Generic[_DataTypeT_co]): +_MetadataIterable: TypeAlias = Iterable[tuple[_StrOrBytes, _StrOrBytes]] +_KeyValueMetadataInput: TypeAlias = _MetadataMapping | _MetadataIterable | None +_DataTypeAlias: TypeAlias = Literal[ + "null", + "bool", + "boolean", + "i1", + "int8", + "i2", + "int16", + "i4", + "int32", + "i8", + "int64", + "u1", + "uint8", + "u2", + "uint16", + "u4", + "uint32", + "u8", + "uint64", + "f2", + "halffloat", + "float16", + "f4", + "float", + "float32", + "f8", + "double", + "float64", + "string", + "str", + "utf8", + "binary", + "large_string", + "large_str", + "large_utf8", + "large_binary", + "binary_view", + "string_view", + "date32", + "date64", + "date32[day]", + "date64[ms]", + "time32[s]", + "time32[ms]", + "time64[us]", + "time64[ns]", + "timestamp[s]", + "timestamp[ms]", + "timestamp[us]", + "timestamp[ns]", + "duration[s]", + "duration[ms]", + "duration[us]", + "duration[ns]", + "month_day_nano_interval", +] +_DataTypeAliasInput: TypeAlias = _DataTypeAlias | str +_DataTypeLike: TypeAlias = DataType | _DataTypeAliasInput +_FieldTypeInput: TypeAlias = _DataTypeLike | None + +class KeyValueMetadata(_Metadata, Mapping[bytes, bytes]): + def __init__( + self, + __arg0__: _KeyValueMetadataInput | KeyValueMetadata = None, + **kwargs: str, + ) -> None: ... + def equals(self, other: KeyValueMetadata) -> bool: ... + def __len__(self) -> int: ... + def __contains__(self, /, __key: object) -> bool: ... + def __getitem__(self, /, __key: Any) -> Any: ... + def __iter__(self) -> Iterator[bytes]: ... + def get_all(self, key: str) -> list[bytes]: ... + def to_dict(self) -> dict[bytes, bytes]: ... + +class Field(_Weakrefable, Generic[_DataTypeT_co]): def equals(self, other: Field[Any], check_metadata: bool = False) -> bool: ... - def __hash__(self) -> int: ... - @property - def nullable(self) -> bool: ... - @property - def name(self) -> str: ... - @property - def metadata(self) -> dict[bytes, bytes] | None: ... - @property - def type(self) -> _DataTypeT_co: ... - def with_metadata( - self, - metadata: _MetadataMapping | Any, - ) -> Self: ... - def remove_metadata(self) -> Self: ... + def __hash__(self) -> int: ... + @property + def nullable(self) -> bool: ... + @property + def name(self) -> str: ... + @property + def metadata(self) -> dict[bytes, bytes] | None: ... + @property + def type(self) -> _DataTypeT_co: ... + def with_metadata( + self, + metadata: _MetadataMapping | Any, + ) -> Self: ... + def remove_metadata(self) -> Self: ... def with_type(self, new_type: _DataTypeT) -> Field[_DataTypeT]: ... - def with_name(self, name: str) -> Self: ... - def with_nullable(self, nullable: bool) -> Field[_DataTypeT_co]: ... + def with_name(self, name: str) -> Self: ... + def with_nullable(self, nullable: bool) -> Field[_DataTypeT_co]: ... def flatten(self) -> list[Field[Any]]: ... - def _export_to_c(self, out_ptr: int) -> None: ... - @classmethod - def _import_from_c(cls, in_ptr: int) -> Self: ... - def __arrow_c_schema__(self) -> Any: ... - @classmethod + def _export_to_c(self, out_ptr: int) -> None: ... + @classmethod + def _import_from_c(cls, in_ptr: int) -> Self: ... + def __arrow_c_schema__(self) -> Any: ... + @classmethod def _import_from_c_capsule(cls, schema: Any) -> Self: ... - -_StructFieldTuple: TypeAlias = ( - tuple[str, Field[Any] | None] | tuple[str, _FieldTypeInput] -) -_StructFieldsInput: TypeAlias = ( - Iterable[Field[Any] | _StructFieldTuple] - | Mapping[str, Field[Any] | _FieldTypeInput] -) - -class Schema(_Weakrefable): - def __len__(self) -> int: ... + +_StructFieldTuple: TypeAlias = ( + tuple[str, Field[Any] | None] | tuple[str, _FieldTypeInput] +) +_StructFieldsInput: TypeAlias = ( + Iterable[Field[Any] | _StructFieldTuple] + | Mapping[str, Field[Any] | _FieldTypeInput] +) + +class Schema(_Weakrefable): + def __len__(self) -> int: ... def __getitem__(self, key: str | int) -> Field[Any]: ... - - _field = __getitem__ + + _field = __getitem__ def __iter__(self) -> Iterator[Field[Any]]: ... - def __hash__(self) -> int: ... - def __sizeof__(self) -> int: ... - @property + def __hash__(self) -> int: ... + def __sizeof__(self) -> int: ... + @property def pandas_metadata(self) -> dict[bytes, bytes]: ... - @property - def names(self) -> list[str]: ... - @property - def types(self) -> list[DataType]: ... - @property - def metadata(self) -> dict[bytes, bytes]: ... - def empty_table(self) -> Table: ... - def equals(self, other: Schema, check_metadata: bool = False) -> bool: ... - @classmethod - def from_pandas( - cls, df: pd.DataFrame, preserve_index: bool | None = None - ) -> Schema: ... + @property + def names(self) -> list[str]: ... + @property + def types(self) -> list[DataType]: ... + @property + def metadata(self) -> dict[bytes, bytes]: ... + def empty_table(self) -> Table: ... + def equals(self, other: Schema, check_metadata: bool = False) -> bool: ... + @classmethod + def from_pandas( + cls, df: pd.DataFrame, preserve_index: bool | None = None + ) -> Schema: ... def field(self, i: int | str | bytes) -> Field[Any]: ... @deprecated("Use 'field' instead") def field_by_name(self, name: str) -> Field[Any]: ... - def get_field_index(self, name: str) -> int: ... - def get_all_field_indices(self, name: str) -> list[int]: ... + def get_field_index(self, name: str) -> int: ... + def get_all_field_indices(self, name: str) -> list[int]: ... def append(self, field: Field[Any]) -> Schema: ... def insert(self, i: int, field: Field[Any]) -> Schema: ... def remove(self, i: int) -> Schema: ... def set(self, i: int, field: Field[Any]) -> Schema: ... - @deprecated("Use 'with_metadata' instead") - def add_metadata(self, metadata: _MetadataMapping) -> Schema: ... - def with_metadata(self, metadata: _MetadataMapping) -> Schema: ... - def serialize(self, memory_pool: MemoryPool | None = None) -> Buffer: ... - def remove_metadata(self) -> Schema: ... - def to_string( - self, - truncate_metadata: bool = True, - show_field_metadata: bool = True, - show_schema_metadata: bool = True, - element_size_limit: int | None = None, - ) -> str: ... - def _export_to_c(self, out_ptr: int) -> None: ... - @classmethod - def _import_from_c(cls, in_ptr: int) -> Schema: ... - def __arrow_c_schema__(self) -> Any: ... - @staticmethod - def _import_from_c_capsule(schema: Any) -> Schema: ... - -def unify_schemas( - schemas: Sequence[Schema], - *, - promote_options: Literal["default", "permissive"] = "default", -) -> Schema: ... -def field( - name: SupportsArrowSchema | str | bytes, - type: _DataTypeT | _DataTypeAliasInput | None = None, - nullable: bool = True, - metadata: _MetadataMapping | None = None, -) -> Field[_DataTypeT] | Field[Any]: ... -def null() -> NullType: ... -def bool_() -> BoolType: ... -def uint8() -> UInt8Type: ... -def int8() -> Int8Type: ... -def uint16() -> UInt16Type: ... -def int16() -> Int16Type: ... -def uint32() -> UInt32Type: ... -def int32() -> Int32Type: ... -def int64() -> Int64Type: ... -def uint64() -> UInt64Type: ... -def timestamp( - unit: _Unit | str, tz: _Tz | None = None -) -> TimestampType[_Unit, _Tz]: ... -def time32(unit: _Time32Unit | str) -> Time32Type[_Time32Unit]: ... -def time64(unit: _Time64Unit | str) -> Time64Type[_Time64Unit]: ... -def duration(unit: _Unit | str) -> DurationType[_Unit]: ... -def month_day_nano_interval() -> MonthDayNanoIntervalType: ... -def date32() -> Date32Type: ... -def date64() -> Date64Type: ... -def float16() -> Float16Type: ... -def float32() -> Float32Type: ... -def float64() -> Float64Type: ... -def decimal32( - precision: _Precision, scale: _Scale | Literal[0] = 0 -) -> Decimal32Type[_Precision, _Scale | Literal[0]]: ... -def decimal64( - precision: _Precision, scale: _Scale | Literal[0] = 0 -) -> Decimal64Type[_Precision, _Scale | Literal[0]]: ... -def decimal128( - precision: _Precision, scale: _Scale | Literal[0] = 0 -) -> Decimal128Type[_Precision, _Scale | Literal[0]]: ... -def decimal256( - precision: _Precision, scale: _Scale | Literal[0] = 0 -) -> Decimal256Type[_Precision, _Scale | Literal[0]]: ... -def string() -> StringType: ... - -utf8 = string - -def binary( - length: Literal[-1] | int = ..., -) -> BinaryType | FixedSizeBinaryType[bytes]: ... -def large_binary() -> LargeBinaryType: ... -def large_string() -> LargeStringType: ... - -large_utf8 = large_string - -def binary_view() -> BinaryViewType: ... -def string_view() -> StringViewType: ... + @deprecated("Use 'with_metadata' instead") + def add_metadata(self, metadata: _MetadataMapping) -> Schema: ... + def with_metadata(self, metadata: _MetadataMapping) -> Schema: ... + def serialize(self, memory_pool: MemoryPool | None = None) -> Buffer: ... + def remove_metadata(self) -> Schema: ... + def to_string( + self, + truncate_metadata: bool = True, + show_field_metadata: bool = True, + show_schema_metadata: bool = True, + element_size_limit: int | None = None, + ) -> str: ... + def _export_to_c(self, out_ptr: int) -> None: ... + @classmethod + def _import_from_c(cls, in_ptr: int) -> Schema: ... + def __arrow_c_schema__(self) -> Any: ... + @staticmethod + def _import_from_c_capsule(schema: Any) -> Schema: ... + +def unify_schemas( + schemas: Sequence[Schema], + *, + promote_options: Literal["default", "permissive"] = "default", +) -> Schema: ... +def field( + name: SupportsArrowSchema | str | bytes, + type: _DataTypeT | _DataTypeAliasInput | None = None, + nullable: bool = True, + metadata: _MetadataMapping | None = None, +) -> Field[_DataTypeT] | Field[Any]: ... +def null() -> NullType: ... +def bool_() -> BoolType: ... +def uint8() -> UInt8Type: ... +def int8() -> Int8Type: ... +def uint16() -> UInt16Type: ... +def int16() -> Int16Type: ... +def uint32() -> UInt32Type: ... +def int32() -> Int32Type: ... +def int64() -> Int64Type: ... +def uint64() -> UInt64Type: ... +def timestamp( + unit: _Unit | str, tz: _Tz | None = None +) -> TimestampType[_Unit, _Tz]: ... +def time32(unit: _Time32Unit | str) -> Time32Type[_Time32Unit]: ... +def time64(unit: _Time64Unit | str) -> Time64Type[_Time64Unit]: ... +def duration(unit: _Unit | str) -> DurationType[_Unit]: ... +def month_day_nano_interval() -> MonthDayNanoIntervalType: ... +def date32() -> Date32Type: ... +def date64() -> Date64Type: ... +def float16() -> Float16Type: ... +def float32() -> Float32Type: ... +def float64() -> Float64Type: ... +def decimal32( + precision: _Precision, scale: _Scale | Literal[0] = 0 +) -> Decimal32Type[_Precision, _Scale | Literal[0]]: ... +def decimal64( + precision: _Precision, scale: _Scale | Literal[0] = 0 +) -> Decimal64Type[_Precision, _Scale | Literal[0]]: ... +def decimal128( + precision: _Precision, scale: _Scale | Literal[0] = 0 +) -> Decimal128Type[_Precision, _Scale | Literal[0]]: ... +def decimal256( + precision: _Precision, scale: _Scale | Literal[0] = 0 +) -> Decimal256Type[_Precision, _Scale | Literal[0]]: ... +def string() -> StringType: ... + +utf8 = string + +def binary( + length: Literal[-1] | int = ..., +) -> BinaryType | FixedSizeBinaryType[bytes]: ... +def large_binary() -> LargeBinaryType: ... +def large_string() -> LargeStringType: ... + +large_utf8 = large_string + +def binary_view() -> BinaryViewType: ... +def string_view() -> StringViewType: ... from typing import overload @overload @@ -560,168 +560,168 @@ def map_( item_type: _ValueT | Field[_ValueT] | _FieldTypeInput, keys_sorted: _Ordered | None = None, ) -> MapType[_K, _ValueT, _Ordered]: ... -def dictionary( - index_type: _IndexT | _DataTypeAliasInput, - value_type: _BasicValueT | _DataTypeAliasInput, - ordered: _Ordered | None = None, -) -> DictionaryType[_IndexT, _BasicValueT, _Ordered]: ... -def struct( - fields: _StructFieldsInput, -) -> StructType: ... -def sparse_union( - child_fields: list[Field[Any]], type_codes: list[int] | None = None -) -> SparseUnionType: ... -def dense_union( - child_fields: list[Field[Any]], type_codes: list[int] | None = None -) -> DenseUnionType: ... -def union( - child_fields: list[Field[Any]], - mode: Literal["sparse", "dense"] | int, - type_codes: list[int] | None = None, -) -> SparseUnionType | DenseUnionType: ... -def run_end_encoded( - run_end_type: _RunEndType | _DataTypeAliasInput | None, - value_type: _BasicValueT | _DataTypeAliasInput | None, -) -> RunEndEncodedType[_RunEndType, _BasicValueT]: ... -def json_(storage_type: DataType = ...) -> JsonType: ... -def uuid() -> UuidType: ... -def fixed_shape_tensor( - value_type: _ValueT, - shape: Sequence[int], - dim_names: Sequence[str] | None = None, - permutation: Sequence[int] | None = None, -) -> FixedShapeTensorType[_ValueT]: ... -def bool8() -> Bool8Type: ... -def opaque(storage_type: DataType, type_name: str, vendor_name: str) -> OpaqueType: ... -def type_for_alias(name: Any) -> DataType: ... -def schema( - fields: ( - Iterable[Field[Any]] - | Iterable[tuple[str, _FieldTypeInput]] - | Mapping[Any, _FieldTypeInput] - ), +def dictionary( + index_type: _IndexT | _DataTypeAliasInput, + value_type: _BasicValueT | _DataTypeAliasInput, + ordered: _Ordered | None = None, +) -> DictionaryType[_IndexT, _BasicValueT, _Ordered]: ... +def struct( + fields: _StructFieldsInput, +) -> StructType: ... +def sparse_union( + child_fields: list[Field[Any]], type_codes: list[int] | None = None +) -> SparseUnionType: ... +def dense_union( + child_fields: list[Field[Any]], type_codes: list[int] | None = None +) -> DenseUnionType: ... +def union( + child_fields: list[Field[Any]], + mode: Literal["sparse", "dense"] | int, + type_codes: list[int] | None = None, +) -> SparseUnionType | DenseUnionType: ... +def run_end_encoded( + run_end_type: _RunEndType | _DataTypeAliasInput | None, + value_type: _BasicValueT | _DataTypeAliasInput | None, +) -> RunEndEncodedType[_RunEndType, _BasicValueT]: ... +def json_(storage_type: DataType = ...) -> JsonType: ... +def uuid() -> UuidType: ... +def fixed_shape_tensor( + value_type: _ValueT, + shape: Sequence[int], + dim_names: Sequence[str] | None = None, + permutation: Sequence[int] | None = None, +) -> FixedShapeTensorType[_ValueT]: ... +def bool8() -> Bool8Type: ... +def opaque(storage_type: DataType, type_name: str, vendor_name: str) -> OpaqueType: ... +def type_for_alias(name: Any) -> DataType: ... +def schema( + fields: ( + Iterable[Field[Any]] + | Iterable[tuple[str, _FieldTypeInput]] + | Mapping[Any, _FieldTypeInput] + ), metadata: _MetadataMapping | None = None, -) -> Schema: ... -def from_numpy_dtype(dtype: np.dtype[Any] | type | str) -> DataType: ... - -__all__ = [ - "_Weakrefable", - "_Metadata", - "DataType", - "_BasicDataType", - "NullType", - "BoolType", - "UInt8Type", - "Int8Type", - "UInt16Type", - "Int16Type", - "UInt32Type", - "Int32Type", - "UInt64Type", - "Int64Type", - "Float16Type", - "Float32Type", - "Float64Type", - "Date32Type", - "Date64Type", - "MonthDayNanoIntervalType", - "StringType", - "LargeStringType", - "StringViewType", - "BinaryType", - "LargeBinaryType", - "BinaryViewType", - "TimestampType", - "Time32Type", - "Time64Type", - "DurationType", - "FixedSizeBinaryType", - "Decimal32Type", - "Decimal64Type", - "Decimal128Type", - "Decimal256Type", - "ListType", - "LargeListType", - "ListViewType", - "LargeListViewType", - "FixedSizeListType", - "DictionaryMemo", - "DictionaryType", - "MapType", - "StructType", - "UnionType", - "SparseUnionType", - "DenseUnionType", - "RunEndEncodedType", - "BaseExtensionType", - "ExtensionType", - "FixedShapeTensorType", - "Bool8Type", - "UuidType", - "JsonType", - "OpaqueType", - "UnknownExtensionType", - "register_extension_type", - "unregister_extension_type", - "KeyValueMetadata", - "Field", - "Schema", - "unify_schemas", - "field", - "null", - "bool_", - "uint8", - "int8", - "uint16", - "int16", - "uint32", - "int32", - "int64", - "uint64", - "timestamp", - "time32", - "time64", - "duration", - "month_day_nano_interval", - "date32", - "date64", - "float16", - "float32", - "float64", - "decimal32", - "decimal64", - "decimal128", - "decimal256", - "string", - "utf8", - "binary", - "large_binary", - "large_string", - "large_utf8", - "binary_view", - "string_view", - "list_", - "large_list", - "list_view", - "large_list_view", - "map_", - "dictionary", - "struct", - "sparse_union", - "dense_union", - "union", - "run_end_encoded", - "json_", - "uuid", - "fixed_shape_tensor", - "bool8", - "opaque", - "type_for_alias", - "schema", - "from_numpy_dtype", - "_Unit", - "_Tz", - "_Time32Unit", - "_Time64Unit", - "_DataTypeT", -] +) -> Schema: ... +def from_numpy_dtype(dtype: np.dtype[Any] | type | str) -> DataType: ... + +__all__ = [ + "_Weakrefable", + "_Metadata", + "DataType", + "_BasicDataType", + "NullType", + "BoolType", + "UInt8Type", + "Int8Type", + "UInt16Type", + "Int16Type", + "UInt32Type", + "Int32Type", + "UInt64Type", + "Int64Type", + "Float16Type", + "Float32Type", + "Float64Type", + "Date32Type", + "Date64Type", + "MonthDayNanoIntervalType", + "StringType", + "LargeStringType", + "StringViewType", + "BinaryType", + "LargeBinaryType", + "BinaryViewType", + "TimestampType", + "Time32Type", + "Time64Type", + "DurationType", + "FixedSizeBinaryType", + "Decimal32Type", + "Decimal64Type", + "Decimal128Type", + "Decimal256Type", + "ListType", + "LargeListType", + "ListViewType", + "LargeListViewType", + "FixedSizeListType", + "DictionaryMemo", + "DictionaryType", + "MapType", + "StructType", + "UnionType", + "SparseUnionType", + "DenseUnionType", + "RunEndEncodedType", + "BaseExtensionType", + "ExtensionType", + "FixedShapeTensorType", + "Bool8Type", + "UuidType", + "JsonType", + "OpaqueType", + "UnknownExtensionType", + "register_extension_type", + "unregister_extension_type", + "KeyValueMetadata", + "Field", + "Schema", + "unify_schemas", + "field", + "null", + "bool_", + "uint8", + "int8", + "uint16", + "int16", + "uint32", + "int32", + "int64", + "uint64", + "timestamp", + "time32", + "time64", + "duration", + "month_day_nano_interval", + "date32", + "date64", + "float16", + "float32", + "float64", + "decimal32", + "decimal64", + "decimal128", + "decimal256", + "string", + "utf8", + "binary", + "large_binary", + "large_string", + "large_utf8", + "binary_view", + "string_view", + "list_", + "large_list", + "list_view", + "large_list_view", + "map_", + "dictionary", + "struct", + "sparse_union", + "dense_union", + "union", + "run_end_encoded", + "json_", + "uuid", + "fixed_shape_tensor", + "bool8", + "opaque", + "type_for_alias", + "schema", + "from_numpy_dtype", + "_Unit", + "_Tz", + "_Time32Unit", + "_Time64Unit", + "_DataTypeT", +] From 94fcdf4ea3d96db033ae5106fcd59db502fea42b Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Sun, 14 Jun 2026 09:00:13 -0700 Subject: [PATCH 17/21] update --- python/pyarrow-stubs/pyarrow/__init__.pyi | 6 - .../pyarrow-stubs/pyarrow/_stubs_typing.pyi | 131 +- python/pyarrow-stubs/pyarrow/_types.pyi | 1476 +++++++++-------- python/pyarrow-stubs/pyarrow/error.pyi | 9 +- 4 files changed, 814 insertions(+), 808 deletions(-) diff --git a/python/pyarrow-stubs/pyarrow/__init__.pyi b/python/pyarrow-stubs/pyarrow/__init__.pyi index ccec8d5abc07..4b2a8da206c2 100644 --- a/python/pyarrow-stubs/pyarrow/__init__.pyi +++ b/python/pyarrow-stubs/pyarrow/__init__.pyi @@ -15,12 +15,6 @@ # specific language governing permissions and limitations # under the License. -"""Type stubs for PyArrow. - -This is a placeholder stub file. -Complete type annotations will be added in subsequent PRs. -""" - from typing import Any # TODO(GH-48970): remove __getattr__ before release as this diff --git a/python/pyarrow-stubs/pyarrow/_stubs_typing.pyi b/python/pyarrow-stubs/pyarrow/_stubs_typing.pyi index b7f22480e608..7802f0330597 100644 --- a/python/pyarrow-stubs/pyarrow/_stubs_typing.pyi +++ b/python/pyarrow-stubs/pyarrow/_stubs_typing.pyi @@ -17,9 +17,16 @@ import datetime as dt +from _typeshed import ( + SupportsDunderGE, + SupportsDunderGT, + SupportsDunderLE, + SupportsDunderLT, +) from collections.abc import Collection, Container, Iterator, Sequence, Sized from decimal import Decimal from typing import Any, Literal, Protocol, TypeAlias, TypeVar +from typing_extensions import TypeAliasType import numpy as np @@ -48,108 +55,84 @@ NullEncoding: TypeAlias = Literal["mask", "encode"] NullSelectionBehavior: TypeAlias = Literal["drop", "emit_null"] TimeUnit: TypeAlias = Literal["s", "ms", "us", "ns"] -IntegerType: TypeAlias = ( - lib.Int8Type - | lib.Int16Type - | lib.Int32Type - | lib.Int64Type - | lib.UInt8Type - | lib.UInt16Type - | lib.UInt32Type - | lib.UInt64Type -) -PyScalar: TypeAlias = ( - bool - | int - | float - | Decimal - | str - | bytes - | dt.date - | dt.datetime - | dt.time - | dt.timedelta -) -NumpyScalar: TypeAlias = "np.generic[Any]" - -PyScalarT_co = TypeVar("PyScalarT_co", bound=PyScalar, covariant=True) -NumpyScalarT_co = TypeVar("NumpyScalarT_co", bound=NumpyScalar, covariant=True) -DataTypeT_co = TypeVar("DataTypeT_co", bound=lib.DataType, covariant=True) - -IntoArray: TypeAlias = ( - Sequence[PyScalarT_co | None] - | NDArray[NumpyScalarT_co] - | lib.Array[lib.Scalar[DataTypeT_co]] - | ChunkedArray[Any] -) - -Mask: TypeAlias = IntoArray[bool, np.bool_, lib.BoolType] -Indices: TypeAlias = IntoArray[int, np.integer[Any], IntegerType] +IntegerType: TypeAlias = ( + lib.Int8Type + | lib.Int16Type + | lib.Int32Type + | lib.Int64Type + | lib.UInt8Type + | lib.UInt16Type + | lib.UInt32Type + | lib.UInt64Type +) +PyScalar: TypeAlias = ( + bool + | int + | float + | Decimal + | str + | bytes + | dt.date + | dt.datetime + | dt.time + | dt.timedelta +) +NumpyScalar: TypeAlias = np.generic[Any] + +_PyScalarT_co = TypeVar("_PyScalarT_co", bound=PyScalar, covariant=True) +_NumpyScalarT_co = TypeVar("_NumpyScalarT_co", bound=NumpyScalar, covariant=True) +_DataTypeT_co = TypeVar("_DataTypeT_co", bound=lib.DataType, covariant=True) +IntoArray = TypeAliasType( + "IntoArray", + Sequence[_PyScalarT_co | None] + | NDArray[_NumpyScalarT_co] + | lib.Array[lib.Scalar[_DataTypeT_co]] + | ChunkedArray[Any], + type_params=(_PyScalarT_co, _NumpyScalarT_co, _DataTypeT_co), +) + +Mask: TypeAlias = IntoArray[bool, np.bool_, lib.BoolType] +Indices: TypeAlias = IntoArray[int, np.integer[Any], IntegerType] _T = TypeVar("_T") _V = TypeVar("_V", covariant=True) SingleOrList: TypeAlias = list[_T] | _T - class SupportsDunderEQ(Protocol): def __eq__(self, other: object, /) -> bool: ... - -class SupportsDunderLT(Protocol): - def __lt__(self, other: object, /) -> bool: ... - - -class SupportsDunderGT(Protocol): - def __gt__(self, other: object, /) -> bool: ... - - -class SupportsDunderLE(Protocol): - def __le__(self, other: object, /) -> bool: ... - - -class SupportsDunderGE(Protocol): - def __ge__(self, other: object, /) -> bool: ... - - FilterTuple: TypeAlias = ( tuple[str, Literal["=", "==", "!="], SupportsDunderEQ] - | tuple[str, Literal["<"], SupportsDunderLT] - | tuple[str, Literal[">"], SupportsDunderGT] - | tuple[str, Literal["<="], SupportsDunderLE] - | tuple[str, Literal[">="], SupportsDunderGE] - | tuple[str, Literal["in", "not in"], Collection] + | tuple[str, Literal["<"], SupportsDunderLT[Any]] + | tuple[str, Literal[">"], SupportsDunderGT[Any]] + | tuple[str, Literal["<="], SupportsDunderLE[Any]] + | tuple[str, Literal[">="], SupportsDunderGE[Any]] + | tuple[str, Literal["in", "not in"], Collection[Any]] | tuple[str, str, Any] # Allow general str for operator to avoid type errors ) - class Buffer(Protocol): ... - - class SupportsPyBuffer(Protocol): ... - class SupportsArrowStream(Protocol): - def __arrow_c_stream__(self, requested_schema=None, /) -> Any: ... - + def __arrow_c_stream__(self, requested_schema: Any = None, /) -> Any: ... class SupportsPyArrowArray(Protocol): - def __arrow_array__(self, type=None, /) -> Any: ... - + def __arrow_array__(self, type: Any = None, /) -> Any: ... class SupportsArrowArray(Protocol): - def __arrow_c_array__(self, requested_schema=None, /) -> Any: ... - + def __arrow_c_array__(self, requested_schema: Any = None, /) -> Any: ... class SupportsArrowDeviceArray(Protocol): - def __arrow_c_device_array__(self, requested_schema=None, /, **kwargs) -> Any: ... - + def __arrow_c_device_array__( + self, requested_schema: Any = None, /, **kwargs: Any + ) -> Any: ... class SupportsArrowSchema(Protocol): def __arrow_c_schema__(self) -> Any: ... - -class NullableCollection(Sized, Container[_V], Protocol[_V]): +class NullableCollection(Sized, Container[Any], Protocol[_V]): def __iter__(self) -> Iterator[_V] | Iterator[_V | None]: ... def __len__(self) -> int: ... def __contains__(self, item: Any, /) -> bool: ... diff --git a/python/pyarrow-stubs/pyarrow/_types.pyi b/python/pyarrow-stubs/pyarrow/_types.pyi index efc3bd847ca5..218993886531 100644 --- a/python/pyarrow-stubs/pyarrow/_types.pyi +++ b/python/pyarrow-stubs/pyarrow/_types.pyi @@ -1,727 +1,749 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -import datetime as dt # noqa: F401 - -from collections.abc import Iterable, Iterator, Mapping, Sequence -from decimal import Decimal # noqa: F401 -from typing import Any, Generic, Literal, Protocol, TypeAlias - -import numpy as np -import pandas as pd - -from typing_extensions import Self, TypeVar, deprecated - -from pyarrow._stubs_typing import SupportsArrowSchema, TimeUnit -from pyarrow.lib import ( # noqa: F401 - Array, - Buffer, - ChunkedArray, - ExtensionArray, - ExtensionScalar, - MemoryPool, - MonthDayNano, - Table, -) - -class _Weakrefable: ... -class _Metadata(_Weakrefable): ... - -class DataType(_Weakrefable): - def field(self, i: int) -> Field[Any]: ... - @property - def id(self) -> int: ... - @property - def bit_width(self) -> int: ... - @property - def byte_width(self) -> int: ... - @property - def num_fields(self) -> int: ... - @property - def num_buffers(self) -> int: ... - @property - def has_variadic_buffers(self) -> bool: ... - - def __hash__(self) -> int: ... - def equals( - self, other: DataType | str, *, check_metadata: bool = False - ) -> bool: ... - def to_pandas_dtype(self) -> np.generic: ... - def _export_to_c(self, out_ptr: int) -> None: ... - @classmethod - def _import_from_c(cls, in_ptr: int) -> Self: ... - def __arrow_c_schema__(self) -> Any: ... - @classmethod - def _import_from_c_capsule(cls, schema: Any) -> Self: ... - -_AsPyType = TypeVar("_AsPyType") -_DataTypeT = TypeVar("_DataTypeT", bound=DataType) -_DataTypeT_co = TypeVar("_DataTypeT_co", bound=DataType, covariant=True) - -class _BasicDataType(DataType, Generic[_AsPyType]): ... -class NullType(_BasicDataType[None]): ... -class BoolType(_BasicDataType[bool]): ... -class UInt8Type(_BasicDataType[int]): ... -class Int8Type(_BasicDataType[int]): ... -class UInt16Type(_BasicDataType[int]): ... -class Int16Type(_BasicDataType[int]): ... -class UInt32Type(_BasicDataType[int]): ... -class Int32Type(_BasicDataType[int]): ... -class UInt64Type(_BasicDataType[int]): ... -class Int64Type(_BasicDataType[int]): ... -class Float16Type(_BasicDataType[float]): ... -class Float32Type(_BasicDataType[float]): ... -class Float64Type(_BasicDataType[float]): ... -class Date32Type(_BasicDataType[dt.date]): ... -class Date64Type(_BasicDataType[dt.date]): ... -class MonthDayNanoIntervalType(_BasicDataType[MonthDayNano]): ... -class StringType(_BasicDataType[str]): ... -class LargeStringType(_BasicDataType[str]): ... -class StringViewType(_BasicDataType[str]): ... -class BinaryType(_BasicDataType[bytes]): ... -class LargeBinaryType(_BasicDataType[bytes]): ... -class BinaryViewType(_BasicDataType[bytes]): ... - -_Unit = TypeVar("_Unit", bound=TimeUnit, default=Literal["us"]) -_Tz = TypeVar("_Tz", str, None, default=None) - -class TimestampType(_BasicDataType[int], Generic[_Unit, _Tz]): - @property - def unit(self) -> _Unit: ... - @property - def tz(self) -> _Tz: ... - -_Time32Unit = TypeVar("_Time32Unit", bound=Literal["s", "ms"]) - -class Time32Type(_BasicDataType[dt.time], Generic[_Time32Unit]): - @property - def unit(self) -> _Time32Unit: ... - -_Time64Unit = TypeVar("_Time64Unit", bound=Literal["us", "ns"]) - -class Time64Type(_BasicDataType[dt.time], Generic[_Time64Unit]): - @property - def unit(self) -> _Time64Unit: ... - -class DurationType(_BasicDataType[dt.timedelta], Generic[_Unit]): - @property - def unit(self) -> _Unit: ... - -_FixedSizeBinaryAsPyType = TypeVar("_FixedSizeBinaryAsPyType", default=bytes) - -class FixedSizeBinaryType(_BasicDataType[_FixedSizeBinaryAsPyType]): ... - -_Precision = TypeVar("_Precision", default=Any) -_Scale = TypeVar("_Scale", default=Any) -_Precision_co = TypeVar("_Precision_co", default=Any, covariant=True) -_Scale_co = TypeVar("_Scale_co", default=Any, covariant=True) - -class _HasPrecisionScale(Protocol[_Precision_co, _Scale_co]): - @property - def precision(self) -> _Precision_co: ... - @property - def scale(self) -> _Scale_co: ... - -class Decimal32Type( - FixedSizeBinaryType[Decimal], _HasPrecisionScale[_Precision_co, _Scale_co] -): ... - -class Decimal64Type( - FixedSizeBinaryType[Decimal], _HasPrecisionScale[_Precision_co, _Scale_co] -): ... - -class Decimal128Type( - FixedSizeBinaryType[Decimal], _HasPrecisionScale[_Precision_co, _Scale_co] -): ... - -class Decimal256Type( - FixedSizeBinaryType[Decimal], _HasPrecisionScale[_Precision_co, _Scale_co] -): ... - -class ListType(DataType, Generic[_DataTypeT_co]): - @property - def value_field(self) -> Field[_DataTypeT_co]: ... - @property - def value_type(self) -> _DataTypeT_co: ... - -class LargeListType(DataType, Generic[_DataTypeT_co]): - @property - def value_field(self) -> Field[_DataTypeT_co]: ... - @property - def value_type(self) -> _DataTypeT_co: ... - -class ListViewType(DataType, Generic[_DataTypeT_co]): - @property - def value_field(self) -> Field[_DataTypeT_co]: ... - @property - def value_type(self) -> _DataTypeT_co: ... - -class LargeListViewType(DataType, Generic[_DataTypeT_co]): - @property - def value_field(self) -> Field[_DataTypeT_co]: ... - @property - def value_type(self) -> _DataTypeT_co: ... - -class FixedSizeListType(DataType, Generic[_DataTypeT_co, _Size]): - @property - def value_field(self) -> Field[_DataTypeT_co]: ... - @property - def value_type(self) -> _DataTypeT_co: ... - @property - def list_size(self) -> int: ... - -class DictionaryMemo(_Weakrefable): ... - -_IndexT = TypeVar( - "_IndexT", - UInt8Type, - Int8Type, - UInt16Type, - Int16Type, - UInt32Type, - Int32Type, - UInt64Type, - Int64Type, -) -_BasicValueT = TypeVar( - "_BasicValueT", bound=_BasicDataType[Any], default=_BasicDataType[Any] -) -_ValueT = TypeVar("_ValueT", bound=DataType, default=DataType) -_K = TypeVar("_K", bound=DataType, default=DataType) -_Ordered = TypeVar("_Ordered", Literal[True], Literal[False], default=Literal[False]) - -class DictionaryType(DataType, Generic[_IndexT, _BasicValueT, _Ordered]): - @property - def ordered(self) -> _Ordered: ... - @property - def index_type(self) -> _IndexT: ... - @property - def value_type(self) -> _BasicValueT: ... - -class MapType(DataType, Generic[_K, _ValueT, _Ordered]): - @property - def key_field(self) -> Field[_K]: ... - @property - def key_type(self) -> _K: ... - @property - def item_field(self) -> Field[_ValueT]: ... - @property - def item_type(self) -> _ValueT: ... - @property - def keys_sorted(self) -> _Ordered: ... - -_Size = TypeVar("_Size", default=int) - -class StructType(DataType): - def get_field_index(self, name: str) -> int: ... - def field(self, i: int | str) -> Field[Any]: ... - def get_all_field_indices(self, name: str) -> list[int]: ... - def __len__(self) -> int: ... - def __iter__(self) -> Iterator[Field[Any]]: ... - - __getitem__ = field - @property - def names(self) -> list[str]: ... - @property - def fields(self) -> list[Field[Any]]: ... - -class UnionType(DataType): - @property - def mode(self) -> Literal["sparse", "dense"]: ... - @property - def type_codes(self) -> list[int]: ... - def __len__(self) -> int: ... - def __iter__(self) -> Iterator[Field[Any]]: ... - def field(self, i: int) -> Field[Any]: ... - - __getitem__ = field - -class SparseUnionType(UnionType): - @property - def mode(self) -> Literal["sparse"]: ... - -class DenseUnionType(UnionType): - @property - def mode(self) -> Literal["dense"]: ... - -_RunEndType = TypeVar("_RunEndType", Int16Type, Int32Type, Int64Type) - -class RunEndEncodedType(DataType, Generic[_RunEndType, _BasicValueT]): - @property - def run_end_type(self) -> _RunEndType: ... - @property - def value_type(self) -> _BasicValueT: ... - -_StorageT = TypeVar("_StorageT", bound=Array[Any] | ChunkedArray[Any]) - -class BaseExtensionType(DataType): - def __arrow_ext_class__(self) -> type[ExtensionArray[Any]]: ... - def __arrow_ext_scalar_class__(self) -> type[ExtensionScalar]: ... - @property - def extension_name(self) -> str: ... - @property - def storage_type(self) -> DataType: ... - def wrap_array(self, storage: _StorageT) -> _StorageT: ... - -class ExtensionType(BaseExtensionType): - def __init__(self, storage_type: DataType, extension_name: str) -> None: ... - def __arrow_ext_serialize__(self) -> bytes: ... - @classmethod - def __arrow_ext_deserialize__( - cls, storage_type: DataType, serialized: bytes - ) -> Self: ... - -class FixedShapeTensorType(BaseExtensionType, Generic[_ValueT]): - @property - def value_type(self) -> _ValueT: ... - @property - def shape(self) -> list[int]: ... - @property - def dim_names(self) -> list[str] | None: ... - @property - def permutation(self) -> list[int] | None: ... - -class Bool8Type(BaseExtensionType): ... -class UuidType(BaseExtensionType): ... -class JsonType(BaseExtensionType): ... - -class OpaqueType(BaseExtensionType): - @property - def type_name(self) -> str: ... - @property - def vendor_name(self) -> str: ... - -class UnknownExtensionType(ExtensionType): - def __init__(self, storage_type: DataType, serialized: bytes) -> None: ... - -def register_extension_type(ext_type: ExtensionType) -> None: ... -def unregister_extension_type(type_name: str) -> None: ... - -_StrOrBytes: TypeAlias = str | bytes -_MetadataMapping: TypeAlias = ( - Mapping[bytes, bytes] | Mapping[str, str] | Mapping[bytes, str] | Mapping[str, bytes] -) -_MetadataIterable: TypeAlias = Iterable[tuple[_StrOrBytes, _StrOrBytes]] -_KeyValueMetadataInput: TypeAlias = _MetadataMapping | _MetadataIterable | None -_DataTypeAlias: TypeAlias = Literal[ - "null", - "bool", - "boolean", - "i1", - "int8", - "i2", - "int16", - "i4", - "int32", - "i8", - "int64", - "u1", - "uint8", - "u2", - "uint16", - "u4", - "uint32", - "u8", - "uint64", - "f2", - "halffloat", - "float16", - "f4", - "float", - "float32", - "f8", - "double", - "float64", - "string", - "str", - "utf8", - "binary", - "large_string", - "large_str", - "large_utf8", - "large_binary", - "binary_view", - "string_view", - "date32", - "date64", - "date32[day]", - "date64[ms]", - "time32[s]", - "time32[ms]", - "time64[us]", - "time64[ns]", - "timestamp[s]", - "timestamp[ms]", - "timestamp[us]", - "timestamp[ns]", - "duration[s]", - "duration[ms]", - "duration[us]", - "duration[ns]", - "month_day_nano_interval", -] -_DataTypeAliasInput: TypeAlias = _DataTypeAlias | str -_DataTypeLike: TypeAlias = DataType | _DataTypeAliasInput -_FieldTypeInput: TypeAlias = _DataTypeLike | None - -class KeyValueMetadata(_Metadata, Mapping[bytes, bytes]): - def __init__( - self, - __arg0__: _KeyValueMetadataInput | KeyValueMetadata = None, - **kwargs: str, - ) -> None: ... - def equals(self, other: KeyValueMetadata) -> bool: ... - def __len__(self) -> int: ... - def __contains__(self, /, __key: object) -> bool: ... - def __getitem__(self, /, __key: Any) -> Any: ... - def __iter__(self) -> Iterator[bytes]: ... - def get_all(self, key: str) -> list[bytes]: ... - def to_dict(self) -> dict[bytes, bytes]: ... - -class Field(_Weakrefable, Generic[_DataTypeT_co]): - def equals(self, other: Field[Any], check_metadata: bool = False) -> bool: ... - def __hash__(self) -> int: ... - @property - def nullable(self) -> bool: ... - @property - def name(self) -> str: ... - @property - def metadata(self) -> dict[bytes, bytes] | None: ... - @property - def type(self) -> _DataTypeT_co: ... - def with_metadata( - self, - metadata: _MetadataMapping | Any, - ) -> Self: ... - def remove_metadata(self) -> Self: ... - def with_type(self, new_type: _DataTypeT) -> Field[_DataTypeT]: ... - def with_name(self, name: str) -> Self: ... - def with_nullable(self, nullable: bool) -> Field[_DataTypeT_co]: ... - def flatten(self) -> list[Field[Any]]: ... - def _export_to_c(self, out_ptr: int) -> None: ... - @classmethod - def _import_from_c(cls, in_ptr: int) -> Self: ... - def __arrow_c_schema__(self) -> Any: ... - @classmethod - def _import_from_c_capsule(cls, schema: Any) -> Self: ... - -_StructFieldTuple: TypeAlias = ( - tuple[str, Field[Any] | None] | tuple[str, _FieldTypeInput] -) -_StructFieldsInput: TypeAlias = ( - Iterable[Field[Any] | _StructFieldTuple] - | Mapping[str, Field[Any] | _FieldTypeInput] -) - -class Schema(_Weakrefable): - def __len__(self) -> int: ... - def __getitem__(self, key: str | int) -> Field[Any]: ... - - _field = __getitem__ - def __iter__(self) -> Iterator[Field[Any]]: ... - def __hash__(self) -> int: ... - def __sizeof__(self) -> int: ... - @property - def pandas_metadata(self) -> dict[bytes, bytes]: ... - @property - def names(self) -> list[str]: ... - @property - def types(self) -> list[DataType]: ... - @property - def metadata(self) -> dict[bytes, bytes]: ... - def empty_table(self) -> Table: ... - def equals(self, other: Schema, check_metadata: bool = False) -> bool: ... - @classmethod - def from_pandas( - cls, df: pd.DataFrame, preserve_index: bool | None = None - ) -> Schema: ... - def field(self, i: int | str | bytes) -> Field[Any]: ... - @deprecated("Use 'field' instead") - def field_by_name(self, name: str) -> Field[Any]: ... - def get_field_index(self, name: str) -> int: ... - def get_all_field_indices(self, name: str) -> list[int]: ... - def append(self, field: Field[Any]) -> Schema: ... - def insert(self, i: int, field: Field[Any]) -> Schema: ... - def remove(self, i: int) -> Schema: ... - def set(self, i: int, field: Field[Any]) -> Schema: ... - @deprecated("Use 'with_metadata' instead") - def add_metadata(self, metadata: _MetadataMapping) -> Schema: ... - def with_metadata(self, metadata: _MetadataMapping) -> Schema: ... - def serialize(self, memory_pool: MemoryPool | None = None) -> Buffer: ... - def remove_metadata(self) -> Schema: ... - def to_string( - self, - truncate_metadata: bool = True, - show_field_metadata: bool = True, - show_schema_metadata: bool = True, - element_size_limit: int | None = None, - ) -> str: ... - def _export_to_c(self, out_ptr: int) -> None: ... - @classmethod - def _import_from_c(cls, in_ptr: int) -> Schema: ... - def __arrow_c_schema__(self) -> Any: ... - @staticmethod - def _import_from_c_capsule(schema: Any) -> Schema: ... - -def unify_schemas( - schemas: Sequence[Schema], - *, - promote_options: Literal["default", "permissive"] = "default", -) -> Schema: ... -def field( - name: SupportsArrowSchema | str | bytes, - type: _DataTypeT | _DataTypeAliasInput | None = None, - nullable: bool = True, - metadata: _MetadataMapping | None = None, -) -> Field[_DataTypeT] | Field[Any]: ... -def null() -> NullType: ... -def bool_() -> BoolType: ... -def uint8() -> UInt8Type: ... -def int8() -> Int8Type: ... -def uint16() -> UInt16Type: ... -def int16() -> Int16Type: ... -def uint32() -> UInt32Type: ... -def int32() -> Int32Type: ... -def int64() -> Int64Type: ... -def uint64() -> UInt64Type: ... -def timestamp( - unit: _Unit | str, tz: _Tz | None = None -) -> TimestampType[_Unit, _Tz]: ... -def time32(unit: _Time32Unit | str) -> Time32Type[_Time32Unit]: ... -def time64(unit: _Time64Unit | str) -> Time64Type[_Time64Unit]: ... -def duration(unit: _Unit | str) -> DurationType[_Unit]: ... -def month_day_nano_interval() -> MonthDayNanoIntervalType: ... -def date32() -> Date32Type: ... -def date64() -> Date64Type: ... -def float16() -> Float16Type: ... -def float32() -> Float32Type: ... -def float64() -> Float64Type: ... -def decimal32( - precision: _Precision, scale: _Scale | Literal[0] = 0 -) -> Decimal32Type[_Precision, _Scale | Literal[0]]: ... -def decimal64( - precision: _Precision, scale: _Scale | Literal[0] = 0 -) -> Decimal64Type[_Precision, _Scale | Literal[0]]: ... -def decimal128( - precision: _Precision, scale: _Scale | Literal[0] = 0 -) -> Decimal128Type[_Precision, _Scale | Literal[0]]: ... -def decimal256( - precision: _Precision, scale: _Scale | Literal[0] = 0 -) -> Decimal256Type[_Precision, _Scale | Literal[0]]: ... -def string() -> StringType: ... - -utf8 = string - -def binary( - length: Literal[-1] | int = ..., -) -> BinaryType | FixedSizeBinaryType[bytes]: ... -def large_binary() -> LargeBinaryType: ... -def large_string() -> LargeStringType: ... - -large_utf8 = large_string - -def binary_view() -> BinaryViewType: ... -def string_view() -> StringViewType: ... -from typing import overload - -@overload -def list_( - value_type: _DataTypeT | Field[_DataTypeT], list_size: Literal[-1] | None = None -) -> ListType[_DataTypeT]: ... -@overload -def list_( - value_type: _DataTypeT | Field[_DataTypeT], list_size: _Size -) -> FixedSizeListType[_DataTypeT, _Size]: ... -def large_list( - value_type: _DataTypeT | Field[_DataTypeT], -) -> LargeListType[_DataTypeT]: ... -def list_view( - value_type: _DataTypeT | Field[_DataTypeT], -) -> ListViewType[_DataTypeT]: ... -def large_list_view( - value_type: _DataTypeT | Field[_DataTypeT], -) -> LargeListViewType[_DataTypeT]: ... -def map_( - key_type: _K | Field[_K] | _FieldTypeInput, - item_type: _ValueT | Field[_ValueT] | _FieldTypeInput, - keys_sorted: _Ordered | None = None, -) -> MapType[_K, _ValueT, _Ordered]: ... -def dictionary( - index_type: _IndexT | _DataTypeAliasInput, - value_type: _BasicValueT | _DataTypeAliasInput, - ordered: _Ordered | None = None, -) -> DictionaryType[_IndexT, _BasicValueT, _Ordered]: ... -def struct( - fields: _StructFieldsInput, -) -> StructType: ... -def sparse_union( - child_fields: list[Field[Any]], type_codes: list[int] | None = None -) -> SparseUnionType: ... -def dense_union( - child_fields: list[Field[Any]], type_codes: list[int] | None = None -) -> DenseUnionType: ... -def union( - child_fields: list[Field[Any]], - mode: Literal["sparse", "dense"] | int, - type_codes: list[int] | None = None, -) -> SparseUnionType | DenseUnionType: ... -def run_end_encoded( - run_end_type: _RunEndType | _DataTypeAliasInput | None, - value_type: _BasicValueT | _DataTypeAliasInput | None, -) -> RunEndEncodedType[_RunEndType, _BasicValueT]: ... -def json_(storage_type: DataType = ...) -> JsonType: ... -def uuid() -> UuidType: ... -def fixed_shape_tensor( - value_type: _ValueT, - shape: Sequence[int], - dim_names: Sequence[str] | None = None, - permutation: Sequence[int] | None = None, -) -> FixedShapeTensorType[_ValueT]: ... -def bool8() -> Bool8Type: ... -def opaque(storage_type: DataType, type_name: str, vendor_name: str) -> OpaqueType: ... -def type_for_alias(name: Any) -> DataType: ... -def schema( - fields: ( - Iterable[Field[Any]] - | Iterable[tuple[str, _FieldTypeInput]] - | Mapping[Any, _FieldTypeInput] - ), - metadata: _MetadataMapping | None = None, -) -> Schema: ... -def from_numpy_dtype(dtype: np.dtype[Any] | type | str) -> DataType: ... - -__all__ = [ - "_Weakrefable", - "_Metadata", - "DataType", - "_BasicDataType", - "NullType", - "BoolType", - "UInt8Type", - "Int8Type", - "UInt16Type", - "Int16Type", - "UInt32Type", - "Int32Type", - "UInt64Type", - "Int64Type", - "Float16Type", - "Float32Type", - "Float64Type", - "Date32Type", - "Date64Type", - "MonthDayNanoIntervalType", - "StringType", - "LargeStringType", - "StringViewType", - "BinaryType", - "LargeBinaryType", - "BinaryViewType", - "TimestampType", - "Time32Type", - "Time64Type", - "DurationType", - "FixedSizeBinaryType", - "Decimal32Type", - "Decimal64Type", - "Decimal128Type", - "Decimal256Type", - "ListType", - "LargeListType", - "ListViewType", - "LargeListViewType", - "FixedSizeListType", - "DictionaryMemo", - "DictionaryType", - "MapType", - "StructType", - "UnionType", - "SparseUnionType", - "DenseUnionType", - "RunEndEncodedType", - "BaseExtensionType", - "ExtensionType", - "FixedShapeTensorType", - "Bool8Type", - "UuidType", - "JsonType", - "OpaqueType", - "UnknownExtensionType", - "register_extension_type", - "unregister_extension_type", - "KeyValueMetadata", - "Field", - "Schema", - "unify_schemas", - "field", - "null", - "bool_", - "uint8", - "int8", - "uint16", - "int16", - "uint32", - "int32", - "int64", - "uint64", - "timestamp", - "time32", - "time64", - "duration", - "month_day_nano_interval", - "date32", - "date64", - "float16", - "float32", - "float64", - "decimal32", - "decimal64", - "decimal128", - "decimal256", - "string", - "utf8", - "binary", - "large_binary", - "large_string", - "large_utf8", - "binary_view", - "string_view", - "list_", - "large_list", - "list_view", - "large_list_view", - "map_", - "dictionary", - "struct", - "sparse_union", - "dense_union", - "union", - "run_end_encoded", - "json_", - "uuid", - "fixed_shape_tensor", - "bool8", - "opaque", - "type_for_alias", - "schema", - "from_numpy_dtype", - "_Unit", - "_Tz", - "_Time32Unit", - "_Time64Unit", - "_DataTypeT", -] +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import datetime as dt # noqa: F401 + +from collections.abc import Iterable, Iterator, Mapping, Sequence +from decimal import Decimal # noqa: F401 +from typing import Any, Generic, Literal, Protocol, TypeAlias, overload + +import numpy as np +import pandas as pd # type: ignore[import-not-found,import-untyped] +from typing_extensions import Self, TypeVar, deprecated + +from pyarrow._stubs_typing import SupportsArrowSchema, TimeUnit +from pyarrow.lib import ( # noqa: F401 + Array, + Buffer, + ChunkedArray, + ExtensionArray, + ExtensionScalar, + MemoryPool, + MonthDayNano, + Table, +) + +class _Weakrefable: ... +class _Metadata(_Weakrefable): ... + +class DataType(_Weakrefable): + def field(self, i: int) -> Field[Any]: ... + @property + def id(self) -> int: ... + @property + def bit_width(self) -> int: ... + @property + def byte_width(self) -> int: ... + @property + def num_fields(self) -> int: ... + @property + def num_buffers(self) -> int: ... + @property + def has_variadic_buffers(self) -> bool: ... + def __hash__(self) -> int: ... + def equals( + self, other: DataType | str, *, check_metadata: bool = False + ) -> bool: ... + def to_pandas_dtype(self) -> _PandasDtype: ... + def _export_to_c(self, out_ptr: int) -> None: ... + @classmethod + def _import_from_c(cls, in_ptr: int) -> Self: ... + def __arrow_c_schema__(self) -> Any: ... + @classmethod + def _import_from_c_capsule(cls, schema: Any) -> Self: ... + +_AsPyType = TypeVar("_AsPyType") +_DataTypeT = TypeVar("_DataTypeT", bound=DataType) +_DataTypeT_co = TypeVar("_DataTypeT_co", bound=DataType, covariant=True) +_Size = TypeVar("_Size", bound=int, default=int) +_PandasDtype: TypeAlias = ( + type[np.generic] | np.dtype[Any] | pd.api.extensions.ExtensionDtype +) + +class _BasicDataType(DataType, Generic[_AsPyType]): ... +class NullType(_BasicDataType[None]): ... +class BoolType(_BasicDataType[bool]): ... +class UInt8Type(_BasicDataType[int]): ... +class Int8Type(_BasicDataType[int]): ... +class UInt16Type(_BasicDataType[int]): ... +class Int16Type(_BasicDataType[int]): ... +class UInt32Type(_BasicDataType[int]): ... +class Int32Type(_BasicDataType[int]): ... +class UInt64Type(_BasicDataType[int]): ... +class Int64Type(_BasicDataType[int]): ... +class Float16Type(_BasicDataType[float]): ... +class Float32Type(_BasicDataType[float]): ... +class Float64Type(_BasicDataType[float]): ... +class Date32Type(_BasicDataType[dt.date]): ... +class Date64Type(_BasicDataType[dt.date]): ... +class MonthDayNanoIntervalType(_BasicDataType[MonthDayNano]): ... +class StringType(_BasicDataType[str]): ... +class LargeStringType(_BasicDataType[str]): ... +class StringViewType(_BasicDataType[str]): ... +class BinaryType(_BasicDataType[bytes]): ... +class LargeBinaryType(_BasicDataType[bytes]): ... +class BinaryViewType(_BasicDataType[bytes]): ... + +_Unit = TypeVar("_Unit", bound=TimeUnit, default=Literal["us"]) +_Tz = TypeVar("_Tz", str, None, default=None) + +class TimestampType(_BasicDataType[dt.datetime], Generic[_Unit, _Tz]): + @property + def unit(self) -> _Unit: ... + @property + def tz(self) -> _Tz: ... + +_Time32Unit = TypeVar("_Time32Unit", bound=Literal["s", "ms"]) + +class Time32Type(_BasicDataType[dt.time], Generic[_Time32Unit]): + @property + def unit(self) -> _Time32Unit: ... + +_Time64Unit = TypeVar("_Time64Unit", bound=Literal["us", "ns"]) + +class Time64Type(_BasicDataType[dt.time], Generic[_Time64Unit]): + @property + def unit(self) -> _Time64Unit: ... + +class DurationType(_BasicDataType[dt.timedelta], Generic[_Unit]): + @property + def unit(self) -> _Unit: ... + +_FixedSizeBinaryAsPyType = TypeVar("_FixedSizeBinaryAsPyType", default=bytes) + +class FixedSizeBinaryType(_BasicDataType[_FixedSizeBinaryAsPyType]): ... + +_Precision = TypeVar("_Precision", bound=int, default=int) +_Scale = TypeVar("_Scale", bound=int, default=int) +_Precision_co = TypeVar("_Precision_co", bound=int, default=int, covariant=True) +_Scale_co = TypeVar("_Scale_co", bound=int, default=int, covariant=True) + +class _HasPrecisionScale(Protocol[_Precision_co, _Scale_co]): + @property + def precision(self) -> _Precision_co: ... + @property + def scale(self) -> _Scale_co: ... + +class Decimal32Type( + FixedSizeBinaryType[Decimal], _HasPrecisionScale[_Precision_co, _Scale_co] +): ... +class Decimal64Type( + FixedSizeBinaryType[Decimal], _HasPrecisionScale[_Precision_co, _Scale_co] +): ... +class Decimal128Type( + FixedSizeBinaryType[Decimal], _HasPrecisionScale[_Precision_co, _Scale_co] +): ... +class Decimal256Type( + FixedSizeBinaryType[Decimal], _HasPrecisionScale[_Precision_co, _Scale_co] +): ... + +class ListType(DataType, Generic[_DataTypeT_co]): + @property + def value_field(self) -> Field[_DataTypeT_co]: ... + @property + def value_type(self) -> _DataTypeT_co: ... + +class LargeListType(DataType, Generic[_DataTypeT_co]): + @property + def value_field(self) -> Field[_DataTypeT_co]: ... + @property + def value_type(self) -> _DataTypeT_co: ... + +class ListViewType(DataType, Generic[_DataTypeT_co]): + @property + def value_field(self) -> Field[_DataTypeT_co]: ... + @property + def value_type(self) -> _DataTypeT_co: ... + +class LargeListViewType(DataType, Generic[_DataTypeT_co]): + @property + def value_field(self) -> Field[_DataTypeT_co]: ... + @property + def value_type(self) -> _DataTypeT_co: ... + +class FixedSizeListType(DataType, Generic[_DataTypeT_co, _Size]): + @property + def value_field(self) -> Field[_DataTypeT_co]: ... + @property + def value_type(self) -> _DataTypeT_co: ... + @property + def list_size(self) -> _Size: ... + +class DictionaryMemo(_Weakrefable): ... + +_IndexT = TypeVar( + "_IndexT", + UInt8Type, + Int8Type, + UInt16Type, + Int16Type, + UInt32Type, + Int32Type, + UInt64Type, + Int64Type, +) +_ValueT = TypeVar("_ValueT", bound=DataType, default=DataType) +_ValueT_co = TypeVar("_ValueT_co", bound=DataType, default=DataType, covariant=True) +_K = TypeVar("_K", bound=DataType, default=DataType) +_Ordered = TypeVar("_Ordered", Literal[True], Literal[False], default=Literal[False]) + +class DictionaryType(DataType, Generic[_IndexT, _ValueT_co, _Ordered]): + @property + def ordered(self) -> _Ordered: ... + @property + def index_type(self) -> _IndexT: ... + @property + def value_type(self) -> _ValueT_co: ... + +class MapType(DataType, Generic[_K, _ValueT, _Ordered]): + @property + def key_field(self) -> Field[_K]: ... + @property + def key_type(self) -> _K: ... + @property + def item_field(self) -> Field[_ValueT]: ... + @property + def item_type(self) -> _ValueT: ... + @property + def keys_sorted(self) -> _Ordered: ... + +class StructType(DataType): + def get_field_index(self, name: str | bytes) -> int: ... + def field(self, i: int | str | bytes) -> Field[Any]: ... + def get_all_field_indices(self, name: str | bytes) -> list[int]: ... + def __len__(self) -> int: ... + def __iter__(self) -> Iterator[Field[Any]]: ... + + __getitem__ = field + @property + def names(self) -> list[str]: ... + @property + def fields(self) -> list[Field[Any]]: ... + +class UnionType(DataType): + @property + def mode(self) -> Literal["sparse", "dense"]: ... + @property + def type_codes(self) -> list[int]: ... + def __len__(self) -> int: ... + def __iter__(self) -> Iterator[Field[Any]]: ... + def field(self, i: int) -> Field[Any]: ... + + __getitem__ = field + +class SparseUnionType(UnionType): + @property + def mode(self) -> Literal["sparse"]: ... + +class DenseUnionType(UnionType): + @property + def mode(self) -> Literal["dense"]: ... + +_RunEndType = TypeVar("_RunEndType", Int16Type, Int32Type, Int64Type) + +class RunEndEncodedType(DataType, Generic[_RunEndType, _ValueT_co]): + @property + def run_end_type(self) -> _RunEndType: ... + @property + def value_type(self) -> _ValueT_co: ... + +_StorageT = TypeVar("_StorageT", bound=Array[Any] | ChunkedArray[Any]) + +class BaseExtensionType(DataType): + def __arrow_ext_class__(self) -> type[ExtensionArray[Any]]: ... + def __arrow_ext_scalar_class__(self) -> type[ExtensionScalar]: ... + @property + def extension_name(self) -> str: ... + @property + def storage_type(self) -> DataType: ... + def wrap_array(self, storage: _StorageT) -> _StorageT: ... + +class ExtensionType(BaseExtensionType): + def __init__(self, storage_type: DataType, extension_name: str) -> None: ... + def __arrow_ext_serialize__(self) -> bytes: ... + @classmethod + def __arrow_ext_deserialize__( + cls, storage_type: DataType, serialized: bytes + ) -> Self: ... + +class FixedShapeTensorType(BaseExtensionType, Generic[_ValueT]): + @property + def value_type(self) -> _ValueT: ... + @property + def shape(self) -> list[int]: ... + @property + def dim_names(self) -> list[str] | None: ... + @property + def permutation(self) -> list[int] | None: ... + +class Bool8Type(BaseExtensionType): ... +class UuidType(BaseExtensionType): ... +class JsonType(BaseExtensionType): ... + +class OpaqueType(BaseExtensionType): + @property + def type_name(self) -> str: ... + @property + def vendor_name(self) -> str: ... + +class UnknownExtensionType(ExtensionType): + def __init__(self, storage_type: DataType, serialized: bytes) -> None: ... + +def register_extension_type(ext_type: BaseExtensionType) -> None: ... +def unregister_extension_type(type_name: str) -> None: ... + +_FieldName: TypeAlias = str | bytes +_StrOrBytes: TypeAlias = str | bytes +_MetadataMapping: TypeAlias = ( + Mapping[bytes, bytes] + | Mapping[str, str] + | Mapping[bytes, str] + | Mapping[str, bytes] +) +_MetadataIterable: TypeAlias = Iterable[tuple[_StrOrBytes, _StrOrBytes]] +_KeyValueMetadataInput: TypeAlias = _MetadataMapping | _MetadataIterable | None +_DataTypeAlias: TypeAlias = Literal[ + "null", + "bool", + "boolean", + "i1", + "int8", + "i2", + "int16", + "i4", + "int32", + "i8", + "int64", + "u1", + "uint8", + "u2", + "uint16", + "u4", + "uint32", + "u8", + "uint64", + "f2", + "halffloat", + "float16", + "f4", + "float", + "float32", + "f8", + "double", + "float64", + "string", + "str", + "utf8", + "binary", + "large_string", + "large_str", + "large_utf8", + "large_binary", + "binary_view", + "string_view", + "date32", + "date64", + "date32[day]", + "date64[ms]", + "time32[s]", + "time32[ms]", + "time64[us]", + "time64[ns]", + "timestamp[s]", + "timestamp[ms]", + "timestamp[us]", + "timestamp[ns]", + "duration[s]", + "duration[ms]", + "duration[us]", + "duration[ns]", + "month_day_nano_interval", +] +_DataTypeAliasInput: TypeAlias = _DataTypeAlias | str +_DataTypeLike: TypeAlias = DataType | _DataTypeAliasInput + +class KeyValueMetadata(_Metadata, Mapping[bytes, bytes]): + def __init__( + self, + __arg0__: _KeyValueMetadataInput | KeyValueMetadata = None, + **kwargs: str, + ) -> None: ... + def equals(self, other: KeyValueMetadata) -> bool: ... + def __len__(self) -> int: ... + def __contains__(self, /, __key: object) -> bool: ... + def __getitem__(self, /, __key: str | bytes) -> bytes: ... + def __iter__(self) -> Iterator[bytes]: ... + def key(self, i: int) -> bytes: ... + def value(self, i: int) -> bytes: ... + def get_all(self, key: str | bytes) -> list[bytes]: ... + def to_dict(self) -> dict[bytes, bytes]: ... + +class Field(_Weakrefable, Generic[_DataTypeT_co]): + def equals(self, other: Field[Any], check_metadata: bool = False) -> bool: ... + def __hash__(self) -> int: ... + @property + def nullable(self) -> bool: ... + @property + def name(self) -> str: ... + @property + def metadata(self) -> dict[bytes, bytes] | None: ... + @property + def type(self) -> _DataTypeT_co: ... + def with_metadata( + self, + metadata: _KeyValueMetadataInput | KeyValueMetadata, + ) -> Self: ... + def remove_metadata(self) -> Self: ... + def with_type(self, new_type: _DataTypeT) -> Field[_DataTypeT]: ... + def with_name(self, name: str | bytes) -> Self: ... + def with_nullable(self, nullable: bool) -> Field[_DataTypeT_co]: ... + def flatten(self) -> list[Field[Any]]: ... + def _export_to_c(self, out_ptr: int) -> None: ... + @classmethod + def _import_from_c(cls, in_ptr: int) -> Self: ... + def __arrow_c_schema__(self) -> Any: ... + @classmethod + def _import_from_c_capsule(cls, schema: Any) -> Self: ... + +_FieldTuple: TypeAlias = ( + tuple[_FieldName, _DataTypeLike] + | tuple[_FieldName, _DataTypeLike, bool | None] + | tuple[ + _FieldName, + _DataTypeLike, + bool | None, + _KeyValueMetadataInput | KeyValueMetadata, + ] +) +_StructFieldsInput: TypeAlias = ( + Iterable[Field[Any] | _FieldTuple] | Mapping[_FieldName, _DataTypeLike] +) +_SchemaFieldsInput: TypeAlias = _StructFieldsInput + +class Schema(_Weakrefable): + def __len__(self) -> int: ... + def __getitem__(self, key: int) -> Field[Any]: ... + + _field = __getitem__ + def __iter__(self) -> Iterator[Field[Any]]: ... + def __hash__(self) -> int: ... + def __sizeof__(self) -> int: ... + @property + def pandas_metadata(self) -> dict[str, Any] | None: ... + @property + def names(self) -> list[str]: ... + @property + def types(self) -> list[DataType]: ... + @property + def metadata(self) -> dict[bytes, bytes] | None: ... + def empty_table(self) -> Table: ... + def equals(self, other: Schema, check_metadata: bool = False) -> bool: ... + @classmethod + def from_pandas( + cls, df: pd.DataFrame, preserve_index: bool | None = None + ) -> Schema: ... + def field(self, i: int | str | bytes) -> Field[Any]: ... + @deprecated("Use 'field' instead") + def field_by_name(self, name: str | bytes) -> Field[Any] | None: ... + def get_field_index(self, name: str | bytes) -> int: ... + def get_all_field_indices(self, name: str | bytes) -> list[int]: ... + def append(self, field: Field[Any]) -> Schema: ... + def insert(self, i: int, field: Field[Any]) -> Schema: ... + def remove(self, i: int) -> Schema: ... + def set(self, i: int, field: Field[Any]) -> Schema: ... + @deprecated("Use 'with_metadata' instead") + def add_metadata( + self, metadata: _KeyValueMetadataInput | KeyValueMetadata + ) -> Schema: ... + def with_metadata( + self, metadata: _KeyValueMetadataInput | KeyValueMetadata + ) -> Schema: ... + def serialize(self, memory_pool: MemoryPool | None = None) -> Buffer: ... + def remove_metadata(self) -> Schema: ... + def to_string( + self, + truncate_metadata: bool = True, + show_field_metadata: bool = True, + show_schema_metadata: bool = True, + element_size_limit: int = 100, + ) -> str: ... + def _export_to_c(self, out_ptr: int) -> None: ... + @classmethod + def _import_from_c(cls, in_ptr: int) -> Schema: ... + def __arrow_c_schema__(self) -> Any: ... + @staticmethod + def _import_from_c_capsule(schema: Any) -> Schema: ... + +def unify_schemas( + schemas: Iterable[Schema], + *, + promote_options: Literal["default", "permissive"] = "default", +) -> Schema: ... +@overload +def field( + name: SupportsArrowSchema, + type: None = None, + nullable: bool | None = None, + metadata: _KeyValueMetadataInput | KeyValueMetadata = None, +) -> Field[Any]: ... +@overload +def field( + name: _FieldName, + type: _DataTypeT, + nullable: bool | None = None, + metadata: _KeyValueMetadataInput | KeyValueMetadata = None, +) -> Field[_DataTypeT]: ... +@overload +def field( + name: _FieldName, + type: _DataTypeAliasInput, + nullable: bool | None = None, + metadata: _KeyValueMetadataInput | KeyValueMetadata = None, +) -> Field[Any]: ... +def null() -> NullType: ... +def bool_() -> BoolType: ... +def uint8() -> UInt8Type: ... +def int8() -> Int8Type: ... +def uint16() -> UInt16Type: ... +def int16() -> Int16Type: ... +def uint32() -> UInt32Type: ... +def int32() -> Int32Type: ... +def int64() -> Int64Type: ... +def uint64() -> UInt64Type: ... +def timestamp( + unit: _Unit | str, tz: _Tz | None = None +) -> TimestampType[_Unit, _Tz]: ... +def time32(unit: _Time32Unit | str) -> Time32Type[_Time32Unit]: ... +def time64(unit: _Time64Unit | str) -> Time64Type[_Time64Unit]: ... +def duration(unit: _Unit | str) -> DurationType[_Unit]: ... +def month_day_nano_interval() -> MonthDayNanoIntervalType: ... +def date32() -> Date32Type: ... +def date64() -> Date64Type: ... +def float16() -> Float16Type: ... +def float32() -> Float32Type: ... +def float64() -> Float64Type: ... +def decimal32( + precision: _Precision, scale: _Scale | Literal[0] = 0 +) -> Decimal32Type[_Precision, _Scale | Literal[0]]: ... +def decimal64( + precision: _Precision, scale: _Scale | Literal[0] = 0 +) -> Decimal64Type[_Precision, _Scale | Literal[0]]: ... +def decimal128( + precision: _Precision, scale: _Scale | Literal[0] = 0 +) -> Decimal128Type[_Precision, _Scale | Literal[0]]: ... +def decimal256( + precision: _Precision, scale: _Scale | Literal[0] = 0 +) -> Decimal256Type[_Precision, _Scale | Literal[0]]: ... +def string() -> StringType: ... + +utf8 = string + +def binary( + length: Literal[-1] | int = ..., +) -> BinaryType | FixedSizeBinaryType[bytes]: ... +def large_binary() -> LargeBinaryType: ... +def large_string() -> LargeStringType: ... + +large_utf8 = large_string + +def binary_view() -> BinaryViewType: ... +def string_view() -> StringViewType: ... +@overload +def list_(value_type: _DataTypeT | Field[_DataTypeT]) -> ListType[_DataTypeT]: ... +@overload +def list_( + value_type: _DataTypeT | Field[_DataTypeT], list_size: Literal[-1] +) -> ListType[_DataTypeT]: ... +@overload +def list_( + value_type: _DataTypeT | Field[_DataTypeT], list_size: _Size +) -> ListType[_DataTypeT] | FixedSizeListType[_DataTypeT, _Size]: ... +def large_list( + value_type: _DataTypeT | Field[_DataTypeT], +) -> LargeListType[_DataTypeT]: ... +def list_view( + value_type: _DataTypeT | Field[_DataTypeT], +) -> ListViewType[_DataTypeT]: ... +def large_list_view( + value_type: _DataTypeT | Field[_DataTypeT], +) -> LargeListViewType[_DataTypeT]: ... +def map_( + key_type: _K | Field[_K] | _DataTypeAliasInput, + item_type: _ValueT | Field[_ValueT] | _DataTypeAliasInput, + keys_sorted: _Ordered | None = None, +) -> MapType[_K, _ValueT, _Ordered]: ... +def dictionary( + index_type: _IndexT | _DataTypeAliasInput, + value_type: _ValueT | _DataTypeAliasInput, + ordered: _Ordered | None = None, +) -> DictionaryType[_IndexT, _ValueT, _Ordered]: ... +def struct( + fields: _StructFieldsInput, +) -> StructType: ... +def sparse_union( + child_fields: Sequence[Field[Any]], type_codes: Sequence[int] | None = None +) -> SparseUnionType: ... +def dense_union( + child_fields: Sequence[Field[Any]], type_codes: Sequence[int] | None = None +) -> DenseUnionType: ... +def union( + child_fields: Sequence[Field[Any]], + mode: Literal["sparse", "dense"] | int, + type_codes: Sequence[int] | None = None, +) -> SparseUnionType | DenseUnionType: ... +def run_end_encoded( + run_end_type: _RunEndType | _DataTypeAliasInput, + value_type: _ValueT | _DataTypeAliasInput, +) -> RunEndEncodedType[_RunEndType, _ValueT]: ... +def json_(storage_type: DataType = ...) -> JsonType: ... +def uuid() -> UuidType: ... +def fixed_shape_tensor( + value_type: _ValueT, + shape: Sequence[int], + dim_names: Sequence[str] | None = None, + permutation: Sequence[int] | None = None, +) -> FixedShapeTensorType[_ValueT]: ... +def bool8() -> Bool8Type: ... +def opaque(storage_type: DataType, type_name: str, vendor_name: str) -> OpaqueType: ... +def type_for_alias(name: str) -> DataType: ... +def schema( + fields: SupportsArrowSchema | _SchemaFieldsInput, + metadata: _KeyValueMetadataInput | KeyValueMetadata = None, +) -> Schema: ... +def from_numpy_dtype(dtype: np.dtype[Any] | type | str) -> DataType: ... + +__all__ = [ + "_Weakrefable", + "_Metadata", + "DataType", + "_BasicDataType", + "NullType", + "BoolType", + "UInt8Type", + "Int8Type", + "UInt16Type", + "Int16Type", + "UInt32Type", + "Int32Type", + "UInt64Type", + "Int64Type", + "Float16Type", + "Float32Type", + "Float64Type", + "Date32Type", + "Date64Type", + "MonthDayNanoIntervalType", + "StringType", + "LargeStringType", + "StringViewType", + "BinaryType", + "LargeBinaryType", + "BinaryViewType", + "TimestampType", + "Time32Type", + "Time64Type", + "DurationType", + "FixedSizeBinaryType", + "Decimal32Type", + "Decimal64Type", + "Decimal128Type", + "Decimal256Type", + "ListType", + "LargeListType", + "ListViewType", + "LargeListViewType", + "FixedSizeListType", + "DictionaryMemo", + "DictionaryType", + "MapType", + "StructType", + "UnionType", + "SparseUnionType", + "DenseUnionType", + "RunEndEncodedType", + "BaseExtensionType", + "ExtensionType", + "FixedShapeTensorType", + "Bool8Type", + "UuidType", + "JsonType", + "OpaqueType", + "UnknownExtensionType", + "register_extension_type", + "unregister_extension_type", + "KeyValueMetadata", + "Field", + "Schema", + "unify_schemas", + "field", + "null", + "bool_", + "uint8", + "int8", + "uint16", + "int16", + "uint32", + "int32", + "int64", + "uint64", + "timestamp", + "time32", + "time64", + "duration", + "month_day_nano_interval", + "date32", + "date64", + "float16", + "float32", + "float64", + "decimal32", + "decimal64", + "decimal128", + "decimal256", + "string", + "utf8", + "binary", + "large_binary", + "large_string", + "large_utf8", + "binary_view", + "string_view", + "list_", + "large_list", + "list_view", + "large_list_view", + "map_", + "dictionary", + "struct", + "sparse_union", + "dense_union", + "union", + "run_end_encoded", + "json_", + "uuid", + "fixed_shape_tensor", + "bool8", + "opaque", + "type_for_alias", + "schema", + "from_numpy_dtype", + "_Unit", + "_Tz", + "_Time32Unit", + "_Time64Unit", + "_DataTypeT", +] diff --git a/python/pyarrow-stubs/pyarrow/error.pyi b/python/pyarrow-stubs/pyarrow/error.pyi index 8ee75c3ec414..67be73eadb6d 100644 --- a/python/pyarrow-stubs/pyarrow/error.pyi +++ b/python/pyarrow-stubs/pyarrow/error.pyi @@ -15,6 +15,8 @@ # specific language governing permissions and limitations # under the License. +from types import TracebackType + from typing_extensions import Self class ArrowException(Exception): ... @@ -41,7 +43,12 @@ have_signal_refcycle: bool class SignalStopHandler: def __enter__(self) -> Self: ... - def __exit__(self, exc_type, exc_value, exc_tb) -> None: ... + def __exit__( + self, + exc_type: type[BaseException] | None, + exc_value: BaseException | None, + exc_tb: TracebackType | None, + ) -> None: ... @property def stop_token(self) -> StopToken: ... From 9701c2fcc650d64fa6173dd01db954f3601486ab Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Sun, 14 Jun 2026 09:22:40 -0700 Subject: [PATCH 18/21] lint --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index f97ed61fbbc1..3f8971cf31f2 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -144,7 +144,7 @@ repos: - "--extend-select" - "Y" - "--per-file-ignores" - - "python/pyarrow-stubs/pyarrow/*.pyi:E301,E302,E305,E701" + - "python/pyarrow-stubs/pyarrow/*.pyi:E301,E302,E305,E701,F821" files: >- ^(c_glib|dev|python)/ types: From 8982d1ec6ac80d3b65bf115b81ad770b21c65dd8 Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Sun, 14 Jun 2026 09:44:05 -0700 Subject: [PATCH 19/21] more covariant types --- python/pyarrow-stubs/pyarrow/_types.pyi | 53 +++++++++++++++++-------- 1 file changed, 36 insertions(+), 17 deletions(-) diff --git a/python/pyarrow-stubs/pyarrow/_types.pyi b/python/pyarrow-stubs/pyarrow/_types.pyi index 218993886531..1e98aefc8186 100644 --- a/python/pyarrow-stubs/pyarrow/_types.pyi +++ b/python/pyarrow-stubs/pyarrow/_types.pyi @@ -196,30 +196,46 @@ _IndexT = TypeVar( UInt64Type, Int64Type, ) +_IndexT_co = TypeVar( + "_IndexT_co", + UInt8Type, + Int8Type, + UInt16Type, + Int16Type, + UInt32Type, + Int32Type, + UInt64Type, + Int64Type, + covariant=True, +) _ValueT = TypeVar("_ValueT", bound=DataType, default=DataType) _ValueT_co = TypeVar("_ValueT_co", bound=DataType, default=DataType, covariant=True) _K = TypeVar("_K", bound=DataType, default=DataType) +_K_co = TypeVar("_K_co", bound=DataType, default=DataType, covariant=True) _Ordered = TypeVar("_Ordered", Literal[True], Literal[False], default=Literal[False]) +_Ordered_co = TypeVar( + "_Ordered_co", Literal[True], Literal[False], default=Literal[False], covariant=True +) -class DictionaryType(DataType, Generic[_IndexT, _ValueT_co, _Ordered]): +class DictionaryType(DataType, Generic[_IndexT_co, _ValueT_co, _Ordered_co]): @property - def ordered(self) -> _Ordered: ... + def ordered(self) -> _Ordered_co: ... @property - def index_type(self) -> _IndexT: ... + def index_type(self) -> _IndexT_co: ... @property def value_type(self) -> _ValueT_co: ... -class MapType(DataType, Generic[_K, _ValueT, _Ordered]): +class MapType(DataType, Generic[_K_co, _ValueT_co, _Ordered_co]): @property - def key_field(self) -> Field[_K]: ... + def key_field(self) -> Field[_K_co]: ... @property - def key_type(self) -> _K: ... + def key_type(self) -> _K_co: ... @property - def item_field(self) -> Field[_ValueT]: ... + def item_field(self) -> Field[_ValueT_co]: ... @property - def item_type(self) -> _ValueT: ... + def item_type(self) -> _ValueT_co: ... @property - def keys_sorted(self) -> _Ordered: ... + def keys_sorted(self) -> _Ordered_co: ... class StructType(DataType): def get_field_index(self, name: str | bytes) -> int: ... @@ -254,10 +270,13 @@ class DenseUnionType(UnionType): def mode(self) -> Literal["dense"]: ... _RunEndType = TypeVar("_RunEndType", Int16Type, Int32Type, Int64Type) +_RunEndType_co = TypeVar( + "_RunEndType_co", Int16Type, Int32Type, Int64Type, covariant=True +) -class RunEndEncodedType(DataType, Generic[_RunEndType, _ValueT_co]): +class RunEndEncodedType(DataType, Generic[_RunEndType_co, _ValueT_co]): @property - def run_end_type(self) -> _RunEndType: ... + def run_end_type(self) -> _RunEndType_co: ... @property def value_type(self) -> _ValueT_co: ... @@ -280,9 +299,9 @@ class ExtensionType(BaseExtensionType): cls, storage_type: DataType, serialized: bytes ) -> Self: ... -class FixedShapeTensorType(BaseExtensionType, Generic[_ValueT]): +class FixedShapeTensorType(BaseExtensionType, Generic[_ValueT_co]): @property - def value_type(self) -> _ValueT: ... + def value_type(self) -> _ValueT_co: ... @property def shape(self) -> list[int]: ... @property @@ -380,7 +399,7 @@ class KeyValueMetadata(_Metadata, Mapping[bytes, bytes]): def __init__( self, __arg0__: _KeyValueMetadataInput | KeyValueMetadata = None, - **kwargs: str, + **kwargs: str | bytes, ) -> None: ... def equals(self, other: KeyValueMetadata) -> bool: ... def __len__(self) -> int: ... @@ -595,13 +614,13 @@ def struct( fields: _StructFieldsInput, ) -> StructType: ... def sparse_union( - child_fields: Sequence[Field[Any]], type_codes: Sequence[int] | None = None + child_fields: Iterable[Field[Any]], type_codes: Sequence[int] | None = None ) -> SparseUnionType: ... def dense_union( - child_fields: Sequence[Field[Any]], type_codes: Sequence[int] | None = None + child_fields: Iterable[Field[Any]], type_codes: Sequence[int] | None = None ) -> DenseUnionType: ... def union( - child_fields: Sequence[Field[Any]], + child_fields: Iterable[Field[Any]], mode: Literal["sparse", "dense"] | int, type_codes: Sequence[int] | None = None, ) -> SparseUnionType | DenseUnionType: ... From 604b09b9f63342b3e5eb92400d53b467f5fda4f2 Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Sun, 14 Jun 2026 21:15:15 +0200 Subject: [PATCH 20/21] Apply suggestions from code review Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- python/pyarrow-stubs/pyarrow/_types.pyi | 2 +- python/pyarrow/fs.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/python/pyarrow-stubs/pyarrow/_types.pyi b/python/pyarrow-stubs/pyarrow/_types.pyi index 1e98aefc8186..8063ac399219 100644 --- a/python/pyarrow-stubs/pyarrow/_types.pyi +++ b/python/pyarrow-stubs/pyarrow/_types.pyi @@ -22,7 +22,7 @@ from decimal import Decimal # noqa: F401 from typing import Any, Generic, Literal, Protocol, TypeAlias, overload import numpy as np -import pandas as pd # type: ignore[import-not-found,import-untyped] +import pandas as pd # type: ignore[import-not-found,import-untyped] # pyright: ignore[reportMissingImports,reportMissingTypeStubs] from typing_extensions import Self, TypeVar, deprecated from pyarrow._stubs_typing import SupportsArrowSchema, TimeUnit diff --git a/python/pyarrow/fs.py b/python/pyarrow/fs.py index f055c5081826..482650699ea5 100644 --- a/python/pyarrow/fs.py +++ b/python/pyarrow/fs.py @@ -111,7 +111,7 @@ def _ensure_filesystem(filesystem, *, use_mmap=False): else: # handle fsspec-compatible filesystems try: - import fsspec # type: ignore[import-untyped] + import fsspec # type: ignore[import-not-found,import-untyped] except ImportError: pass else: From 3e9e8642feec0aae9237a5a4d7eeb2593d64dff9 Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Sun, 14 Jun 2026 12:30:43 -0700 Subject: [PATCH 21/21] lint --- python/pyarrow-stubs/pyarrow/_types.pyi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyarrow-stubs/pyarrow/_types.pyi b/python/pyarrow-stubs/pyarrow/_types.pyi index 8063ac399219..941caa761446 100644 --- a/python/pyarrow-stubs/pyarrow/_types.pyi +++ b/python/pyarrow-stubs/pyarrow/_types.pyi @@ -22,7 +22,7 @@ from decimal import Decimal # noqa: F401 from typing import Any, Generic, Literal, Protocol, TypeAlias, overload import numpy as np -import pandas as pd # type: ignore[import-not-found,import-untyped] # pyright: ignore[reportMissingImports,reportMissingTypeStubs] +import pandas as pd # type: ignore[import-not-found,import-untyped] # pyright: ignore[reportMissingImports,reportMissingTypeStubs] # noqa: E501 from typing_extensions import Self, TypeVar, deprecated from pyarrow._stubs_typing import SupportsArrowSchema, TimeUnit