diff --git a/packages/zarr-metadata/changes/4083.feature.md b/packages/zarr-metadata/changes/4083.feature.md new file mode 100644 index 0000000000..c383495c1a --- /dev/null +++ b/packages/zarr-metadata/changes/4083.feature.md @@ -0,0 +1,21 @@ +Promoted a curated "front door" of names to the top-level `zarr_metadata` +namespace, so consumers can write e.g. `from zarr_metadata import +ArrayMetadataV3, ShardingIndexLocation, BLOSC_CNAME` instead of importing from +deep submodule paths. The front door covers every metadata-document TypedDict, +each codec/chunk-grid/chunk-key-encoding canonical type, the full data-type +trio for every dtype, and every constant + `Literal` pair. Deep submodule paths +continue to work unchanged. + +Several promoted names were given clearer, less ambiguous spellings than their +deep-module names, since they now appear bare at the top level: +`Endian`/`ENDIAN` → `Endianness`/`ENDIANNESS`, +`IndexLocation`/`INDEX_LOCATION` → `ShardingIndexLocation`/`SHARDING_INDEX_LOCATION`, +`RoundingMode`/`ROUNDING_MODE` → `CastRoundingMode`/`CAST_ROUNDING_MODE`, +`OutOfRangeMode`/`OUT_OF_RANGE_MODE` → `CastOutOfRangeMode`/`CAST_OUT_OF_RANGE_MODE`, +`DateTimeUnit` → `NumpyTimeUnit`, +`NamedConfig` → `NamedConfigV3`, and +`MetadataFieldV3` → `MetadataV3` (matching the name `zarrs` uses for this +`name`-or-`{name, configuration}` shape). + +Also added the `NUMPY_TIME_UNIT` runtime constant (a `Final` tuple paired with +the `NumpyTimeUnit` Literal) in `zarr_metadata.v3.data_type.numpy_timedelta64`. diff --git a/packages/zarr-metadata/src/zarr_metadata/__init__.py b/packages/zarr-metadata/src/zarr_metadata/__init__.py index 7c6461500e..46949570a2 100644 --- a/packages/zarr-metadata/src/zarr_metadata/__init__.py +++ b/packages/zarr-metadata/src/zarr_metadata/__init__.py @@ -1,7 +1,9 @@ from importlib.metadata import version -from zarr_metadata._common import JSONValue, NamedConfig +from zarr_metadata._common import JSONValue, NamedConfigV3 from zarr_metadata.v2.array import ( + ARRAY_DIMENSION_SEPARATOR_V2, + ARRAY_ORDER_V2, ArrayDimensionSeparatorV2, ArrayMetadataV2, ArrayMetadataV2Partial, @@ -13,35 +15,320 @@ from zarr_metadata.v2.codec import CodecMetadataV2 from zarr_metadata.v2.consolidated import ConsolidatedMetadataV2 from zarr_metadata.v2.group import GroupMetadataV2, GroupMetadataV2Partial, ZGroupMetadata -from zarr_metadata.v3._common import MetadataFieldV3 +from zarr_metadata.v3._common import MetadataV3 from zarr_metadata.v3.array import ArrayMetadataV3, ArrayMetadataV3Partial, ExtensionFieldV3 +from zarr_metadata.v3.chunk_grid.rectilinear import ( + RECTILINEAR_CHUNK_GRID_NAME, + RectilinearChunkGridMetadata, + RectilinearChunkGridName, +) +from zarr_metadata.v3.chunk_grid.regular import ( + REGULAR_CHUNK_GRID_NAME, + RegularChunkGridMetadata, + RegularChunkGridName, +) +from zarr_metadata.v3.chunk_key_encoding.default import ( + DEFAULT_CHUNK_KEY_ENCODING_NAME, + DEFAULT_CHUNK_KEY_ENCODING_SEPARATOR, + DefaultChunkKeyEncodingMetadata, + DefaultChunkKeyEncodingName, + DefaultChunkKeyEncodingSeparator, +) +from zarr_metadata.v3.chunk_key_encoding.v2 import ( + V2_CHUNK_KEY_ENCODING_NAME, + V2_CHUNK_KEY_ENCODING_SEPARATOR, + V2ChunkKeyEncodingMetadata, + V2ChunkKeyEncodingName, + V2ChunkKeyEncodingSeparator, +) +from zarr_metadata.v3.codec.blosc import ( + BLOSC_CNAME, + BLOSC_CODEC_NAME, + BLOSC_SHUFFLE, + BloscCName, + BloscCodecMetadata, + BloscCodecName, + BloscShuffle, +) +from zarr_metadata.v3.codec.bytes import ( + BYTES_CODEC_NAME, + ENDIANNESS, + BytesCodecMetadata, + BytesCodecName, + Endianness, +) +from zarr_metadata.v3.codec.cast_value import ( + CAST_OUT_OF_RANGE_MODE, + CAST_ROUNDING_MODE, + CAST_VALUE_CODEC_NAME, + CastOutOfRangeMode, + CastRoundingMode, + CastValueCodecMetadata, + CastValueCodecName, +) +from zarr_metadata.v3.codec.crc32c import CRC32C_CODEC_NAME, Crc32cCodecMetadata, Crc32cCodecName +from zarr_metadata.v3.codec.gzip import GZIP_CODEC_NAME, GzipCodecMetadata, GzipCodecName +from zarr_metadata.v3.codec.scale_offset import ( + SCALE_OFFSET_CODEC_NAME, + ScaleOffsetCodecMetadata, + ScaleOffsetCodecName, +) +from zarr_metadata.v3.codec.sharding_indexed import ( + SHARDING_INDEX_LOCATION, + SHARDING_INDEXED_CODEC_NAME, + ShardingIndexedCodecMetadata, + ShardingIndexedCodecName, + ShardingIndexLocation, +) +from zarr_metadata.v3.codec.transpose import ( + TRANSPOSE_CODEC_NAME, + TransposeCodecMetadata, + TransposeCodecName, +) +from zarr_metadata.v3.codec.zstd import ZSTD_CODEC_NAME, ZstdCodecMetadata, ZstdCodecName from zarr_metadata.v3.consolidated import ConsolidatedMetadataV3 +from zarr_metadata.v3.data_type.bool import ( + BOOL_DATA_TYPE_NAME, + BoolDataTypeName, + BoolFillValue, +) +from zarr_metadata.v3.data_type.bytes import ( + BYTES_DATA_TYPE_NAME, + BytesDataTypeName, + BytesFillValue, +) +from zarr_metadata.v3.data_type.complex64 import ( + COMPLEX64_DATA_TYPE_NAME, + Complex64DataTypeName, + Complex64FillValue, +) +from zarr_metadata.v3.data_type.complex128 import ( + COMPLEX128_DATA_TYPE_NAME, + Complex128DataTypeName, + Complex128FillValue, +) +from zarr_metadata.v3.data_type.float16 import ( + FLOAT16_DATA_TYPE_NAME, + Float16DataTypeName, + Float16FillValue, +) +from zarr_metadata.v3.data_type.float32 import ( + FLOAT32_DATA_TYPE_NAME, + Float32DataTypeName, + Float32FillValue, +) +from zarr_metadata.v3.data_type.float64 import ( + FLOAT64_DATA_TYPE_NAME, + Float64DataTypeName, + Float64FillValue, +) +from zarr_metadata.v3.data_type.int8 import ( + INT8_DATA_TYPE_NAME, + Int8DataTypeName, + Int8FillValue, +) +from zarr_metadata.v3.data_type.int16 import ( + INT16_DATA_TYPE_NAME, + Int16DataTypeName, + Int16FillValue, +) +from zarr_metadata.v3.data_type.int32 import ( + INT32_DATA_TYPE_NAME, + Int32DataTypeName, + Int32FillValue, +) +from zarr_metadata.v3.data_type.int64 import ( + INT64_DATA_TYPE_NAME, + Int64DataTypeName, + Int64FillValue, +) +from zarr_metadata.v3.data_type.numpy_datetime64 import ( + NUMPY_DATETIME64_DATA_TYPE_NAME, + NumpyDatetime64DataTypeName, + NumpyDatetime64FillValue, +) +from zarr_metadata.v3.data_type.numpy_timedelta64 import ( + NUMPY_TIME_UNIT, + NUMPY_TIMEDELTA64_DATA_TYPE_NAME, + NumpyTimedelta64DataTypeName, + NumpyTimedelta64FillValue, + NumpyTimeUnit, +) +from zarr_metadata.v3.data_type.raw import RawBytesDataTypeName, RawBytesFillValue +from zarr_metadata.v3.data_type.string import ( + STRING_DATA_TYPE_NAME, + StringDataTypeName, + StringFillValue, +) +from zarr_metadata.v3.data_type.struct import ( + STRUCT_DATA_TYPE_NAME, + StructDataTypeName, + StructFillValue, +) +from zarr_metadata.v3.data_type.uint8 import ( + UINT8_DATA_TYPE_NAME, + Uint8DataTypeName, + Uint8FillValue, +) +from zarr_metadata.v3.data_type.uint16 import ( + UINT16_DATA_TYPE_NAME, + Uint16DataTypeName, + Uint16FillValue, +) +from zarr_metadata.v3.data_type.uint32 import ( + UINT32_DATA_TYPE_NAME, + Uint32DataTypeName, + Uint32FillValue, +) +from zarr_metadata.v3.data_type.uint64 import ( + UINT64_DATA_TYPE_NAME, + Uint64DataTypeName, + Uint64FillValue, +) from zarr_metadata.v3.group import GroupMetadataV3, GroupMetadataV3Partial __version__ = version("zarr-metadata") __all__ = [ + "ARRAY_DIMENSION_SEPARATOR_V2", + "ARRAY_ORDER_V2", + "BLOSC_CNAME", + "BLOSC_CODEC_NAME", + "BLOSC_SHUFFLE", + "BOOL_DATA_TYPE_NAME", + "BYTES_CODEC_NAME", + "BYTES_DATA_TYPE_NAME", + "CAST_OUT_OF_RANGE_MODE", + "CAST_ROUNDING_MODE", + "CAST_VALUE_CODEC_NAME", + "COMPLEX64_DATA_TYPE_NAME", + "COMPLEX128_DATA_TYPE_NAME", + "CRC32C_CODEC_NAME", + "DEFAULT_CHUNK_KEY_ENCODING_NAME", + "DEFAULT_CHUNK_KEY_ENCODING_SEPARATOR", + "ENDIANNESS", + "FLOAT16_DATA_TYPE_NAME", + "FLOAT32_DATA_TYPE_NAME", + "FLOAT64_DATA_TYPE_NAME", + "GZIP_CODEC_NAME", + "INT8_DATA_TYPE_NAME", + "INT16_DATA_TYPE_NAME", + "INT32_DATA_TYPE_NAME", + "INT64_DATA_TYPE_NAME", + "NUMPY_DATETIME64_DATA_TYPE_NAME", + "NUMPY_TIMEDELTA64_DATA_TYPE_NAME", + "NUMPY_TIME_UNIT", + "RECTILINEAR_CHUNK_GRID_NAME", + "REGULAR_CHUNK_GRID_NAME", + "SCALE_OFFSET_CODEC_NAME", + "SHARDING_INDEXED_CODEC_NAME", + "SHARDING_INDEX_LOCATION", + "STRING_DATA_TYPE_NAME", + "STRUCT_DATA_TYPE_NAME", + "TRANSPOSE_CODEC_NAME", + "UINT8_DATA_TYPE_NAME", + "UINT16_DATA_TYPE_NAME", + "UINT32_DATA_TYPE_NAME", + "UINT64_DATA_TYPE_NAME", + "V2_CHUNK_KEY_ENCODING_NAME", + "V2_CHUNK_KEY_ENCODING_SEPARATOR", + "ZSTD_CODEC_NAME", "ArrayDimensionSeparatorV2", "ArrayMetadataV2", "ArrayMetadataV2Partial", "ArrayMetadataV3", "ArrayMetadataV3Partial", "ArrayOrderV2", + "BloscCName", + "BloscCodecMetadata", + "BloscCodecName", + "BloscShuffle", + "BoolDataTypeName", + "BoolFillValue", + "BytesCodecMetadata", + "BytesCodecName", + "BytesDataTypeName", + "BytesFillValue", + "CastOutOfRangeMode", + "CastRoundingMode", + "CastValueCodecMetadata", + "CastValueCodecName", "CodecMetadataV2", + "Complex64DataTypeName", + "Complex64FillValue", + "Complex128DataTypeName", + "Complex128FillValue", "ConsolidatedMetadataV2", "ConsolidatedMetadataV3", + "Crc32cCodecMetadata", + "Crc32cCodecName", "DataTypeMetadataV2", + "DefaultChunkKeyEncodingMetadata", + "DefaultChunkKeyEncodingName", + "DefaultChunkKeyEncodingSeparator", + "Endianness", "ExtensionFieldV3", + "Float16DataTypeName", + "Float16FillValue", + "Float32DataTypeName", + "Float32FillValue", + "Float64DataTypeName", + "Float64FillValue", "GroupMetadataV2", "GroupMetadataV2Partial", "GroupMetadataV3", "GroupMetadataV3Partial", + "GzipCodecMetadata", + "GzipCodecName", + "Int8DataTypeName", + "Int8FillValue", + "Int16DataTypeName", + "Int16FillValue", + "Int32DataTypeName", + "Int32FillValue", + "Int64DataTypeName", + "Int64FillValue", "JSONValue", - "MetadataFieldV3", - "NamedConfig", + "MetadataV3", + "NamedConfigV3", + "NumpyDatetime64DataTypeName", + "NumpyDatetime64FillValue", + "NumpyTimeUnit", + "NumpyTimedelta64DataTypeName", + "NumpyTimedelta64FillValue", + "RawBytesDataTypeName", + "RawBytesFillValue", + "RectilinearChunkGridMetadata", + "RectilinearChunkGridName", + "RegularChunkGridMetadata", + "RegularChunkGridName", + "ScaleOffsetCodecMetadata", + "ScaleOffsetCodecName", + "ShardingIndexLocation", + "ShardingIndexedCodecMetadata", + "ShardingIndexedCodecName", + "StringDataTypeName", + "StringFillValue", + "StructDataTypeName", + "StructFillValue", + "TransposeCodecMetadata", + "TransposeCodecName", + "Uint8DataTypeName", + "Uint8FillValue", + "Uint16DataTypeName", + "Uint16FillValue", + "Uint32DataTypeName", + "Uint32FillValue", + "Uint64DataTypeName", + "Uint64FillValue", + "V2ChunkKeyEncodingMetadata", + "V2ChunkKeyEncodingName", + "V2ChunkKeyEncodingSeparator", "ZArrayMetadata", "ZAttrsMetadata", "ZGroupMetadata", + "ZstdCodecMetadata", + "ZstdCodecName", "__version__", ] diff --git a/packages/zarr-metadata/src/zarr_metadata/_common.py b/packages/zarr-metadata/src/zarr_metadata/_common.py index f6064d863f..598a12e80c 100644 --- a/packages/zarr-metadata/src/zarr_metadata/_common.py +++ b/packages/zarr-metadata/src/zarr_metadata/_common.py @@ -24,7 +24,7 @@ """ -class NamedConfig(TypedDict): +class NamedConfigV3(TypedDict): """ Externally-tagged union member for a metadata field. diff --git a/packages/zarr-metadata/src/zarr_metadata/v3/__init__.py b/packages/zarr-metadata/src/zarr_metadata/v3/__init__.py index 7699aa744d..c897f20d52 100644 --- a/packages/zarr-metadata/src/zarr_metadata/v3/__init__.py +++ b/packages/zarr-metadata/src/zarr_metadata/v3/__init__.py @@ -1,6 +1,6 @@ """Zarr v3 metadata types.""" -from zarr_metadata.v3._common import MetadataFieldV3 +from zarr_metadata.v3._common import MetadataV3 from zarr_metadata.v3.array import ArrayMetadataV3, ExtensionFieldV3 from zarr_metadata.v3.consolidated import ConsolidatedMetadataV3 from zarr_metadata.v3.group import GroupMetadataV3 @@ -10,5 +10,5 @@ "ConsolidatedMetadataV3", "ExtensionFieldV3", "GroupMetadataV3", - "MetadataFieldV3", + "MetadataV3", ] diff --git a/packages/zarr-metadata/src/zarr_metadata/v3/_common.py b/packages/zarr-metadata/src/zarr_metadata/v3/_common.py index 8d8e21616a..3424587a43 100644 --- a/packages/zarr-metadata/src/zarr_metadata/v3/_common.py +++ b/packages/zarr-metadata/src/zarr_metadata/v3/_common.py @@ -2,12 +2,12 @@ This module is private (underscore-prefixed) and exists to avoid circular imports between leaf modules and sub-package `__init__.py` re-exports. -Public consumers should import `MetadataFieldV3` from `zarr_metadata.v3`. +Public consumers should import `MetadataV3` from `zarr_metadata.v3`. """ -from zarr_metadata._common import NamedConfig +from zarr_metadata._common import NamedConfigV3 -MetadataFieldV3 = str | NamedConfig +MetadataV3 = str | NamedConfigV3 """The JSON shape of any v3 metadata extension-point entry: either a bare short-hand name string or a `{name, configuration}` envelope. @@ -19,5 +19,5 @@ __all__ = [ - "MetadataFieldV3", + "MetadataV3", ] diff --git a/packages/zarr-metadata/src/zarr_metadata/v3/array.py b/packages/zarr-metadata/src/zarr_metadata/v3/array.py index d9cea4aef4..a8b0fa3358 100644 --- a/packages/zarr-metadata/src/zarr_metadata/v3/array.py +++ b/packages/zarr-metadata/src/zarr_metadata/v3/array.py @@ -6,7 +6,7 @@ from typing_extensions import TypedDict from zarr_metadata._common import JSONValue -from zarr_metadata.v3._common import MetadataFieldV3 +from zarr_metadata.v3._common import MetadataV3 class ExtensionFieldV3(TypedDict, extra_items=JSONValue): # type: ignore[call-arg] @@ -52,14 +52,14 @@ class ArrayMetadataV3(TypedDict, extra_items=ExtensionFieldV3): # type: ignore[ zarr_format: Literal[3] node_type: Literal["array"] - data_type: MetadataFieldV3 + data_type: MetadataV3 shape: tuple[int, ...] - chunk_grid: MetadataFieldV3 - chunk_key_encoding: MetadataFieldV3 + chunk_grid: MetadataV3 + chunk_key_encoding: MetadataV3 fill_value: JSONValue - codecs: tuple[MetadataFieldV3, ...] + codecs: tuple[MetadataV3, ...] attributes: NotRequired[Mapping[str, JSONValue]] - storage_transformers: NotRequired[tuple[MetadataFieldV3, ...]] + storage_transformers: NotRequired[tuple[MetadataV3, ...]] dimension_names: NotRequired[tuple[str | None, ...]] @@ -88,14 +88,14 @@ class ArrayMetadataV3Partial(TypedDict, total=False, extra_items=ExtensionFieldV zarr_format: Literal[3] node_type: Literal["array"] - data_type: MetadataFieldV3 + data_type: MetadataV3 shape: tuple[int, ...] - chunk_grid: MetadataFieldV3 - chunk_key_encoding: MetadataFieldV3 + chunk_grid: MetadataV3 + chunk_key_encoding: MetadataV3 fill_value: JSONValue - codecs: tuple[MetadataFieldV3, ...] + codecs: tuple[MetadataV3, ...] attributes: NotRequired[Mapping[str, JSONValue]] - storage_transformers: NotRequired[tuple[MetadataFieldV3, ...]] + storage_transformers: NotRequired[tuple[MetadataV3, ...]] dimension_names: NotRequired[tuple[str | None, ...]] diff --git a/packages/zarr-metadata/src/zarr_metadata/v3/codec/__init__.py b/packages/zarr-metadata/src/zarr_metadata/v3/codec/__init__.py index 8cc819496d..b4f357117f 100644 --- a/packages/zarr-metadata/src/zarr_metadata/v3/codec/__init__.py +++ b/packages/zarr-metadata/src/zarr_metadata/v3/codec/__init__.py @@ -11,7 +11,7 @@ `CodecConfiguration`, etc., import directly from the leaf submodule. For the field-level "any codec entry" alias (used in array metadata's -`codecs` list and in sharding's inner pipelines), import `MetadataFieldV3` +`codecs` list and in sharding's inner pipelines), import `MetadataV3` from `zarr_metadata.v3`. See https://zarr-specs.readthedocs.io/en/latest/v3/codecs/index.html diff --git a/packages/zarr-metadata/src/zarr_metadata/v3/codec/bytes.py b/packages/zarr-metadata/src/zarr_metadata/v3/codec/bytes.py index 522cbe10f5..04e746f898 100644 --- a/packages/zarr-metadata/src/zarr_metadata/v3/codec/bytes.py +++ b/packages/zarr-metadata/src/zarr_metadata/v3/codec/bytes.py @@ -14,10 +14,10 @@ BytesCodecName = Literal["bytes"] """Literal type of the `name` field of the `bytes` codec.""" -Endian = Literal["little", "big"] +Endianness = Literal["little", "big"] """Literal type of byte order of multi-byte numeric data.""" -ENDIAN: Final = ("little", "big") +ENDIANNESS: Final = ("little", "big") """Tuple of permitted values for the `endian` field of the `bytes` codec.""" @@ -28,7 +28,7 @@ class BytesCodecConfiguration(TypedDict): The `endian` field is required for multi-byte data types. """ - endian: NotRequired[Endian] + endian: NotRequired[Endianness] class BytesCodecObject(TypedDict): @@ -55,10 +55,10 @@ class BytesCodecObject(TypedDict): __all__ = [ "BYTES_CODEC_NAME", - "ENDIAN", + "ENDIANNESS", "BytesCodecConfiguration", "BytesCodecMetadata", "BytesCodecName", "BytesCodecObject", - "Endian", + "Endianness", ] diff --git a/packages/zarr-metadata/src/zarr_metadata/v3/codec/cast_value.py b/packages/zarr-metadata/src/zarr_metadata/v3/codec/cast_value.py index fd6fb2ee4a..7e9b071669 100644 --- a/packages/zarr-metadata/src/zarr_metadata/v3/codec/cast_value.py +++ b/packages/zarr-metadata/src/zarr_metadata/v3/codec/cast_value.py @@ -9,7 +9,7 @@ from typing_extensions import TypedDict from zarr_metadata._common import JSONValue -from zarr_metadata.v3._common import MetadataFieldV3 +from zarr_metadata.v3._common import MetadataV3 CAST_VALUE_CODEC_NAME: Final = "cast_value" """The `name` field value of the `cast_value` codec.""" @@ -17,7 +17,7 @@ CastValueCodecName = Literal["cast_value"] """Literal type of the `name` field of the `cast_value` codec.""" -RoundingMode = Literal[ +CastRoundingMode = Literal[ "nearest-even", "towards-zero", "towards-positive", @@ -29,7 +29,7 @@ Defaults to `"nearest-even"` if absent. """ -ROUNDING_MODE: Final = ( +CAST_ROUNDING_MODE: Final = ( "nearest-even", "towards-zero", "towards-positive", @@ -38,13 +38,13 @@ ) """Tuple of permitted values for the `rounding` field of the `cast_value` codec.""" -OutOfRangeMode = Literal["clamp", "wrap"] +CastOutOfRangeMode = Literal["clamp", "wrap"] """Literal type of permitted values for the `out_of_range` configuration field. If absent, out-of-range values are an encoding/decoding error. """ -OUT_OF_RANGE_MODE: Final = ("clamp", "wrap") +CAST_OUT_OF_RANGE_MODE: Final = ("clamp", "wrap") """Tuple of permitted values for the `out_of_range` field of the `cast_value` codec.""" ScalarMapEntry = tuple[JSONValue, JSONValue] @@ -71,9 +71,9 @@ class CastValueCodecConfiguration(TypedDict): bare-string primitive name or a `{name, configuration}` envelope. """ - data_type: MetadataFieldV3 - rounding: NotRequired[RoundingMode] - out_of_range: NotRequired[OutOfRangeMode] + data_type: MetadataV3 + rounding: NotRequired[CastRoundingMode] + out_of_range: NotRequired[CastOutOfRangeMode] scalar_map: NotRequired[ScalarMap] @@ -93,15 +93,15 @@ class CastValueCodecObject(TypedDict): __all__ = [ + "CAST_OUT_OF_RANGE_MODE", + "CAST_ROUNDING_MODE", "CAST_VALUE_CODEC_NAME", - "OUT_OF_RANGE_MODE", - "ROUNDING_MODE", + "CastOutOfRangeMode", + "CastRoundingMode", "CastValueCodecConfiguration", "CastValueCodecMetadata", "CastValueCodecName", "CastValueCodecObject", - "OutOfRangeMode", - "RoundingMode", "ScalarMap", "ScalarMapEntry", ] diff --git a/packages/zarr-metadata/src/zarr_metadata/v3/codec/sharding_indexed.py b/packages/zarr-metadata/src/zarr_metadata/v3/codec/sharding_indexed.py index 93a0774e4e..a1488f7c30 100644 --- a/packages/zarr-metadata/src/zarr_metadata/v3/codec/sharding_indexed.py +++ b/packages/zarr-metadata/src/zarr_metadata/v3/codec/sharding_indexed.py @@ -8,7 +8,7 @@ from typing_extensions import TypedDict -from zarr_metadata.v3._common import MetadataFieldV3 +from zarr_metadata.v3._common import MetadataV3 SHARDING_INDEXED_CODEC_NAME: Final = "sharding_indexed" """The `name` field value of the `sharding_indexed` codec.""" @@ -16,10 +16,10 @@ ShardingIndexedCodecName = Literal["sharding_indexed"] """Literal type of the `name` field of the `sharding_indexed` codec.""" -IndexLocation = Literal["start", "end"] +ShardingIndexLocation = Literal["start", "end"] """Literal type of the position of the shard index within the encoded shard.""" -INDEX_LOCATION: Final = ("start", "end") +SHARDING_INDEX_LOCATION: Final = ("start", "end") """Tuple of permitted values for the `index_location` field of the `sharding_indexed` codec.""" @@ -40,9 +40,9 @@ class ShardingIndexedCodecConfiguration(TypedDict): """ chunk_shape: tuple[int, ...] - codecs: tuple[MetadataFieldV3, ...] - index_codecs: tuple[MetadataFieldV3, ...] - index_location: NotRequired[IndexLocation] + codecs: tuple[MetadataV3, ...] + index_codecs: tuple[MetadataV3, ...] + index_location: NotRequired[ShardingIndexLocation] class ShardingIndexedCodecObject(TypedDict): @@ -61,9 +61,9 @@ class ShardingIndexedCodecObject(TypedDict): """ __all__ = [ - "INDEX_LOCATION", "SHARDING_INDEXED_CODEC_NAME", - "IndexLocation", + "SHARDING_INDEX_LOCATION", + "ShardingIndexLocation", "ShardingIndexedCodecConfiguration", "ShardingIndexedCodecMetadata", "ShardingIndexedCodecName", diff --git a/packages/zarr-metadata/src/zarr_metadata/v3/data_type/numpy_datetime64.py b/packages/zarr-metadata/src/zarr_metadata/v3/data_type/numpy_datetime64.py index 243d5fb6f6..8784160f71 100644 --- a/packages/zarr-metadata/src/zarr_metadata/v3/data_type/numpy_datetime64.py +++ b/packages/zarr-metadata/src/zarr_metadata/v3/data_type/numpy_datetime64.py @@ -14,7 +14,7 @@ NumpyDatetime64DataTypeName = Literal["numpy.datetime64"] """Literal type of the `name` field of the `numpy.datetime64` data type.""" -DateTimeUnit = Literal[ +NumpyTimeUnit = Literal[ "Y", "M", "W", "D", "h", "m", "s", "ms", "us", "μs", "ns", "ps", "fs", "as", "generic" ] """Time unit codes used by numpy.datetime64.""" @@ -32,7 +32,7 @@ class NumpyDatetime64Configuration(TypedDict): The multiplier relative to the unit. """ - unit: ReadOnly[DateTimeUnit] + unit: ReadOnly[NumpyTimeUnit] scale_factor: ReadOnly[int] @@ -52,9 +52,9 @@ class NumpyDatetime64(TypedDict): __all__ = [ "NUMPY_DATETIME64_DATA_TYPE_NAME", - "DateTimeUnit", "NumpyDatetime64", "NumpyDatetime64Configuration", "NumpyDatetime64DataTypeName", "NumpyDatetime64FillValue", + "NumpyTimeUnit", ] diff --git a/packages/zarr-metadata/src/zarr_metadata/v3/data_type/numpy_timedelta64.py b/packages/zarr-metadata/src/zarr_metadata/v3/data_type/numpy_timedelta64.py index 41e35e7aae..f5c8c77bf8 100644 --- a/packages/zarr-metadata/src/zarr_metadata/v3/data_type/numpy_timedelta64.py +++ b/packages/zarr-metadata/src/zarr_metadata/v3/data_type/numpy_timedelta64.py @@ -14,11 +14,30 @@ NumpyTimedelta64DataTypeName = Literal["numpy.timedelta64"] """Literal type of the `name` field of the `numpy.timedelta64` data type.""" -DateTimeUnit = Literal[ +NumpyTimeUnit = Literal[ "Y", "M", "W", "D", "h", "m", "s", "ms", "us", "μs", "ns", "ps", "fs", "as", "generic" ] """Time unit codes used by numpy.timedelta64.""" +NUMPY_TIME_UNIT: Final = ( + "Y", + "M", + "W", + "D", + "h", + "m", + "s", + "ms", + "us", + "μs", + "ns", + "ps", + "fs", + "as", + "generic", +) +"""Runtime tuple of the permitted `numpy.timedelta64`/`numpy.datetime64` unit strings.""" + class NumpyTimedelta64Configuration(TypedDict): """ @@ -32,7 +51,7 @@ class NumpyTimedelta64Configuration(TypedDict): The multiplier relative to the unit. """ - unit: ReadOnly[DateTimeUnit] + unit: ReadOnly[NumpyTimeUnit] scale_factor: ReadOnly[int] @@ -52,7 +71,8 @@ class NumpyTimedelta64(TypedDict): __all__ = [ "NUMPY_TIMEDELTA64_DATA_TYPE_NAME", - "DateTimeUnit", + "NUMPY_TIME_UNIT", + "NumpyTimeUnit", "NumpyTimedelta64", "NumpyTimedelta64Configuration", "NumpyTimedelta64DataTypeName", diff --git a/packages/zarr-metadata/src/zarr_metadata/v3/data_type/struct.py b/packages/zarr-metadata/src/zarr_metadata/v3/data_type/struct.py index 282bcc83d6..5291e5c309 100644 --- a/packages/zarr-metadata/src/zarr_metadata/v3/data_type/struct.py +++ b/packages/zarr-metadata/src/zarr_metadata/v3/data_type/struct.py @@ -10,7 +10,7 @@ from typing_extensions import ReadOnly, TypedDict from zarr_metadata._common import JSONValue -from zarr_metadata.v3._common import MetadataFieldV3 +from zarr_metadata.v3._common import MetadataV3 STRUCT_DATA_TYPE_NAME: Final = "struct" """The `name` field value of the `struct` data type.""" @@ -33,7 +33,7 @@ class StructField(TypedDict): """ name: ReadOnly[str] - data_type: ReadOnly[MetadataFieldV3] + data_type: ReadOnly[MetadataV3] class StructConfiguration(TypedDict): diff --git a/packages/zarr-metadata/tests/test_public_api.py b/packages/zarr-metadata/tests/test_public_api.py new file mode 100644 index 0000000000..d3270579c3 --- /dev/null +++ b/packages/zarr-metadata/tests/test_public_api.py @@ -0,0 +1,215 @@ +"""Test that the curated front-door names are accessible from the top-level zarr_metadata package.""" + +import re +from typing import get_args + +import zarr_metadata as zm + + +def _group_rank(s: str) -> int: + """RUF022 groups `__all__` as: SCREAMING_SNAKE (0), then TitleCase (1), then dunders (2). + + The exact intra-group ordering is ruff's own natural sort and is enforced by + ruff itself (pre-commit + CI); this test only asserts the grouping, not the + fragile tie-breaking, so it can't drift out of sync with ruff's implementation. + """ + if s.startswith("__") and s.endswith("__"): + return 2 + stripped = re.sub(r"[\d_]", "", s) + return 0 if stripped.isupper() else 1 + + +EXPECTED = [ + # Category A — metadata-document types + "ArrayMetadataV2", + "ArrayMetadataV2Partial", + "ZArrayMetadata", + "GroupMetadataV2", + "GroupMetadataV2Partial", + "ZGroupMetadata", + "ConsolidatedMetadataV2", + "ZAttrsMetadata", + "CodecMetadataV2", + "ArrayMetadataV3", + "ArrayMetadataV3Partial", + "ExtensionFieldV3", + "GroupMetadataV3", + "GroupMetadataV3Partial", + "ConsolidatedMetadataV3", + "NamedConfigV3", + "MetadataV3", + "JSONValue", + # v2 data-type encoding union + "DataTypeMetadataV2", + # Category B — codec canonical unions + "BloscCodecMetadata", + "BytesCodecMetadata", + "CastValueCodecMetadata", + "Crc32cCodecMetadata", + "GzipCodecMetadata", + "ScaleOffsetCodecMetadata", + "ShardingIndexedCodecMetadata", + "TransposeCodecMetadata", + "ZstdCodecMetadata", + # Category C — grid/key canonical unions + "RegularChunkGridMetadata", + "RectilinearChunkGridMetadata", + "DefaultChunkKeyEncodingMetadata", + "V2ChunkKeyEncodingMetadata", + # Category D — dtype trios + # bool + "BoolDataTypeName", + "BOOL_DATA_TYPE_NAME", + "BoolFillValue", + # int8/16/32/64 + "Int8DataTypeName", + "INT8_DATA_TYPE_NAME", + "Int8FillValue", + "Int16DataTypeName", + "INT16_DATA_TYPE_NAME", + "Int16FillValue", + "Int32DataTypeName", + "INT32_DATA_TYPE_NAME", + "Int32FillValue", + "Int64DataTypeName", + "INT64_DATA_TYPE_NAME", + "Int64FillValue", + # uint8/16/32/64 (actual casing is Uint, not UInt) + "Uint8DataTypeName", + "UINT8_DATA_TYPE_NAME", + "Uint8FillValue", + "Uint16DataTypeName", + "UINT16_DATA_TYPE_NAME", + "Uint16FillValue", + "Uint32DataTypeName", + "UINT32_DATA_TYPE_NAME", + "Uint32FillValue", + "Uint64DataTypeName", + "UINT64_DATA_TYPE_NAME", + "Uint64FillValue", + # float16/32/64 + "Float16DataTypeName", + "FLOAT16_DATA_TYPE_NAME", + "Float16FillValue", + "Float32DataTypeName", + "FLOAT32_DATA_TYPE_NAME", + "Float32FillValue", + "Float64DataTypeName", + "FLOAT64_DATA_TYPE_NAME", + "Float64FillValue", + # complex64/128 + "Complex64DataTypeName", + "COMPLEX64_DATA_TYPE_NAME", + "Complex64FillValue", + "Complex128DataTypeName", + "COMPLEX128_DATA_TYPE_NAME", + "Complex128FillValue", + # bytes + "BytesDataTypeName", + "BYTES_DATA_TYPE_NAME", + "BytesFillValue", + # string + "StringDataTypeName", + "STRING_DATA_TYPE_NAME", + "StringFillValue", + # numpy_datetime64 + "NumpyDatetime64DataTypeName", + "NUMPY_DATETIME64_DATA_TYPE_NAME", + "NumpyDatetime64FillValue", + # numpy_timedelta64 + "NumpyTimedelta64DataTypeName", + "NUMPY_TIMEDELTA64_DATA_TYPE_NAME", + "NumpyTimedelta64FillValue", + # struct + "StructDataTypeName", + "STRUCT_DATA_TYPE_NAME", + "StructFillValue", + # raw (no _DATA_TYPE_NAME constant) + "RawBytesDataTypeName", + "RawBytesFillValue", + # Category E — constant+Literal pairs + "ARRAY_ORDER_V2", + "ArrayOrderV2", + "ARRAY_DIMENSION_SEPARATOR_V2", + "ArrayDimensionSeparatorV2", + "ENDIANNESS", + "Endianness", + "BYTES_CODEC_NAME", + "BytesCodecName", + "BLOSC_CODEC_NAME", + "BloscCodecName", + "BLOSC_CNAME", + "BloscCName", + "BLOSC_SHUFFLE", + "BloscShuffle", + "CAST_ROUNDING_MODE", + "CastRoundingMode", + "CAST_OUT_OF_RANGE_MODE", + "CastOutOfRangeMode", + "CAST_VALUE_CODEC_NAME", + "CastValueCodecName", + "CRC32C_CODEC_NAME", + "Crc32cCodecName", + "GZIP_CODEC_NAME", + "GzipCodecName", + "SCALE_OFFSET_CODEC_NAME", + "ScaleOffsetCodecName", + "SHARDING_INDEX_LOCATION", + "ShardingIndexLocation", + "SHARDING_INDEXED_CODEC_NAME", + "ShardingIndexedCodecName", + "TRANSPOSE_CODEC_NAME", + "TransposeCodecName", + "ZSTD_CODEC_NAME", + "ZstdCodecName", + "REGULAR_CHUNK_GRID_NAME", + "RegularChunkGridName", + "RECTILINEAR_CHUNK_GRID_NAME", + "RectilinearChunkGridName", + "DEFAULT_CHUNK_KEY_ENCODING_NAME", + "DefaultChunkKeyEncodingName", + "DEFAULT_CHUNK_KEY_ENCODING_SEPARATOR", + "DefaultChunkKeyEncodingSeparator", + "V2_CHUNK_KEY_ENCODING_NAME", + "V2ChunkKeyEncodingName", + "V2_CHUNK_KEY_ENCODING_SEPARATOR", + "V2ChunkKeyEncodingSeparator", + "NUMPY_TIME_UNIT", + "NumpyTimeUnit", +] + + +def test_front_door_names_public() -> None: + missing = [n for n in EXPECTED if n not in zm.__all__ or not hasattr(zm, n)] + assert not missing, f"missing from top-level API: {missing}" + + +def test_front_door_is_exactly_expected() -> None: + """`__all__` must contain exactly the curated names (plus `__version__`). + + Guards against a name being promoted to the front door without a + corresponding, deliberate entry in `EXPECTED` — i.e. an accidental + addition to the public API surface. + """ + assert set(zm.__all__) - {"__version__"} == set(EXPECTED) + + +def test_all_is_grouped_and_unique() -> None: + ranks = [_group_rank(n) for n in zm.__all__] + assert ranks == sorted(ranks), "`__all__` groups out of order (SCREAMING, TitleCase, dunder)" + assert len(zm.__all__) == len(set(zm.__all__)) + + +def test_promoted_pairs_drift() -> None: + pairs = [ + (zm.ENDIANNESS, zm.Endianness), + (zm.BLOSC_CNAME, zm.BloscCName), + (zm.BLOSC_SHUFFLE, zm.BloscShuffle), + (zm.SHARDING_INDEX_LOCATION, zm.ShardingIndexLocation), + (zm.NUMPY_TIME_UNIT, zm.NumpyTimeUnit), + (zm.CAST_ROUNDING_MODE, zm.CastRoundingMode), + (zm.CAST_OUT_OF_RANGE_MODE, zm.CastOutOfRangeMode), + (zm.ARRAY_ORDER_V2, zm.ArrayOrderV2), + ] + for const, lit in pairs: + assert set(const) == set(get_args(lit)) diff --git a/packages/zarr-metadata/tests/v3/data_type/numpy_timedelta64/test_fixtures.py b/packages/zarr-metadata/tests/v3/data_type/numpy_timedelta64/test_fixtures.py index 1d4bd86a2d..2a6c651582 100644 --- a/packages/zarr-metadata/tests/v3/data_type/numpy_timedelta64/test_fixtures.py +++ b/packages/zarr-metadata/tests/v3/data_type/numpy_timedelta64/test_fixtures.py @@ -4,13 +4,16 @@ import json from pathlib import Path +from typing import get_args import pytest from pydantic import TypeAdapter from zarr_metadata.v3.data_type.numpy_timedelta64 import ( + NUMPY_TIME_UNIT, NumpyTimedelta64, NumpyTimedelta64FillValue, + NumpyTimeUnit, ) DIR = Path(__file__).parent @@ -24,3 +27,7 @@ def test_data_type() -> None: @pytest.mark.parametrize("case", FILL_VALUES.values(), ids=list(FILL_VALUES)) def test_fill_value(case: object) -> None: TypeAdapter(NumpyTimedelta64FillValue).validate_python(case) + + +def test_time_unit_constant_matches_literal() -> None: + assert set(NUMPY_TIME_UNIT) == set(get_args(NumpyTimeUnit))