Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/api/v3.md
Original file line number Diff line number Diff line change
@@ -1 +1 @@
::: pydantic_zarr.v3
::: pydantic_zarr.v3
1 change: 1 addition & 0 deletions docs/api/v3/codecs.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
::: pydantic_zarr.v3.codecs
7 changes: 6 additions & 1 deletion mkdocs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,10 @@ nav:
- API:
- core: api/core.md
- v2: api/v2.md
- v3: api/v3.md
- v3:
- Core: api/v3.md
- Codecs: api/v3/codecs.md


plugins:
- mkdocstrings:
Expand All @@ -47,6 +50,8 @@ plugins:
docstring_options:
ignore_init_summary: true
merge_init_into_class: true
extensions:
- griffe_pydantic:

markdown_extensions:
- pymdownx.highlight:
Expand Down
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,12 @@ Source = "https://github.com/zarr-developers/pydantic-zarr"
test = ["coverage", "pytest<8.4", "pytest-cov", "pytest-examples"]

docs = [
"griffe-pydantic",
"mkdocs-material",
"mkdocstrings[python]",
"pytest-examples",
"pydantic==2.11",
"zarr>=3.1.0"
"zarr>=3.1.0",
]

[tool.hatch]
Expand Down
3 changes: 2 additions & 1 deletion src/pydantic_zarr/v3.py → src/pydantic_zarr/v3/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
model_like,
tuplify_json,
)
from pydantic_zarr.v3.codecs import Codec

if TYPE_CHECKING:
from collections.abc import Sequence
Expand Down Expand Up @@ -95,7 +96,7 @@ class AnyNamedConfig(NamedConfig[str, Mapping[str, object]]):
"""


CodecLike = str | AnyNamedConfig
CodecLike = str | AnyNamedConfig | Codec
"""A type modelling the permissible declarations for codecs"""


Expand Down
152 changes: 152 additions & 0 deletions src/pydantic_zarr/v3/codecs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
from typing import Any, Literal

from pydantic import BaseModel, Field, PositiveInt, PrivateAttr, field_validator, model_serializer


class Codec(BaseModel):
"""
Base class for codec models.
"""

name: str
configuration: BaseModel
_codec_type: Literal["array-array", "array-bytes", "bytes-bytes"] = PrivateAttr()


class BloscConfiguration(BaseModel):
"""
Configuration for blosc codec.
"""

cname: Literal["lz4", "lz4hc", "blosclz", "zstd", "snappy", "zlib"]
clevel: Literal[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
shuffle: Literal["noshuffle", "shuffle", "bitshuffle"]
typesize: PositiveInt
blocksize: int


class Blosc(Codec):
"""
Blosc codec.
"""

name: Literal["blosc"] = "blosc"
configuration: BloscConfiguration
_codec_type: Literal["bytes-bytes"] = "bytes-bytes"


class BytesConfig(BaseModel):
"""
Configuration for bytes codec.
"""

endian: Literal["big", "little"] | None = None

@model_serializer
def ser_model(self) -> dict[str, Any]:
if self.endian is None:
return {}
else:
return super().model_dump()


class Bytes(Codec):
"""
Bytes codec.
"""

name: Literal["bytes"] = "bytes"
configuration: BytesConfig
_codec_type: Literal["array-bytes"] = "array-bytes"


class CRC32CConfig(BaseModel):
"""
Configuration for crc32c codec.
"""


class CRC32C(Codec):
"""
CRC32C codec.
"""

name: Literal["crc32c"] = "crc32c"
configuration: CRC32CConfig = Field(default=CRC32CConfig())

_codec_type: Literal["bytes-bytes"] = "bytes-bytes"


class GzipConfig(BaseModel):
"""
Configuration for gzip codec.
"""

level: Literal[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]


class Gzip(Codec):
"""
Gzip codec.
"""

name: Literal["gzip"] = "gzip"
configuration: GzipConfig

_codec_type: Literal["bytes-bytes"] = "bytes-bytes"


class ShardingConfig(BaseModel):
"""
Configuration for sharding codec.
"""

chunk_shape: tuple[int, ...]
codecs: tuple[Codec, ...]
# Default is recommended in the specification
index_codecs: tuple[Codec, ...] = Field(
default=(Bytes(configuration=BytesConfig(endian="little")), CRC32C())
)
index_location: Literal["start", "end"] = "end"

@field_validator("codecs", "index_codecs")
@classmethod
def check_single_array_bytes_codec(cls, codecs: tuple[Codec, ...]) -> tuple[Codec, ...]:
if sum([(codec._codec_type == "array-bytes") for codec in codecs]) != 1:
raise ValueError("Codec list must contain exactly one array-bytes codec")
return codecs


class Sharding(Codec):
"""
Sharding codec.
"""

name: Literal["sharding_indexed"] = "sharding_indexed"
configuration: ShardingConfig
_codec_type: Literal["bytes-bytes"] = "bytes-bytes"


class TransposeConfig(BaseModel):
"""
Configuration for transpose codec.
"""

order: tuple[int, ...]

@field_validator("order")
@classmethod
def check_order(cls, order: tuple[int, ...]) -> tuple[int, ...]:
if set(range(len(order))) != set(order):
raise ValueError("order must be a permutation of positive integers starting from 0")
return order


class Transpose(Codec):
"""
Transpose codec.
"""

name: Literal["transpose"] = "transpose"
configuration: TransposeConfig
_codec_type: Literal["array-array"] = "array-array"
5 changes: 5 additions & 0 deletions test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
import zarr

from pydantic_zarr.v3 import ArraySpec

ArraySpec.from_array(zarr.empty((1, 1, 1)))
7 changes: 4 additions & 3 deletions tests/test_pydantic_zarr/test_v3.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
RegularChunkingConfig,
auto_codecs,
)
from pydantic_zarr.v3.codecs import Bytes, BytesConfig, Gzip, GzipConfig

from .conftest import DTYPE_EXAMPLES_V3, DTypeExample

Expand All @@ -31,14 +32,14 @@ def test_serialize_deserialize() -> None:

group_attributes = {"group": True}

array_spec = ArraySpec(
array_spec: AnyArraySpec = ArraySpec(
attributes=array_attributes,
shape=[1000, 1000],
dimension_names=["rows", "columns"],
data_type="float64",
chunk_grid=NamedConfig(name="regular", configuration={"chunk_shape": [1000, 100]}),
chunk_key_encoding=NamedConfig(name="default", configuration={"separator": "/"}),
codecs=[NamedConfig(name="GZip", configuration={"level": 1})],
codecs=[Gzip(configuration=GzipConfig(level=1))],
fill_value="NaN",
storage_transformers=[],
)
Expand Down Expand Up @@ -205,7 +206,7 @@ def test_from_flat() -> None:

@staticmethod
def test_from_zarr_depth() -> None:
codecs = ({"name": "bytes", "configuration": {}},)
codecs = (Bytes(configuration=BytesConfig()),)
tree: dict[str, AnyGroupSpec | AnyArraySpec] = {
"": GroupSpec(members=None, attributes={"level": 0, "type": "group"}),
"/1": GroupSpec(members=None, attributes={"level": 1, "type": "group"}),
Expand Down
Loading