Skip to content

Commit 2d3a36c

Browse files
authored
fix: validate v3 dtypes when loading/creating v3 metadata (#2209)
1 parent fb28fa5 commit 2d3a36c

File tree

9 files changed

+291
-80
lines changed

9 files changed

+291
-80
lines changed

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -274,6 +274,7 @@ filterwarnings = [
274274
"ignore:PY_SSIZE_T_CLEAN will be required.*:DeprecationWarning",
275275
"ignore:The loop argument is deprecated since Python 3.8.*:DeprecationWarning",
276276
"ignore:Creating a zarr.buffer.gpu.*:UserWarning",
277+
"ignore:Duplicate name:UserWarning", # from ZipFile
277278
]
278279
markers = [
279280
"gpu: mark a test as requiring CuPy and GPU"

src/zarr/core/array_spec.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
from dataclasses import dataclass
44
from typing import TYPE_CHECKING, Any, Literal
55

6-
from zarr.core.common import parse_dtype, parse_fill_value, parse_order, parse_shapelike
6+
from zarr.core.common import parse_fill_value, parse_order, parse_shapelike
77

88
if TYPE_CHECKING:
99
import numpy as np
@@ -29,12 +29,11 @@ def __init__(
2929
prototype: BufferPrototype,
3030
) -> None:
3131
shape_parsed = parse_shapelike(shape)
32-
dtype_parsed = parse_dtype(dtype)
3332
fill_value_parsed = parse_fill_value(fill_value)
3433
order_parsed = parse_order(order)
3534

3635
object.__setattr__(self, "shape", shape_parsed)
37-
object.__setattr__(self, "dtype", dtype_parsed)
36+
object.__setattr__(self, "dtype", dtype)
3837
object.__setattr__(self, "fill_value", fill_value_parsed)
3938
object.__setattr__(self, "order", order_parsed)
4039
object.__setattr__(self, "prototype", prototype)

src/zarr/core/common.py

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,6 @@
1919
if TYPE_CHECKING:
2020
from collections.abc import Awaitable, Callable, Iterator
2121

22-
import numpy as np
23-
import numpy.typing as npt
2422

2523
ZARR_JSON = "zarr.json"
2624
ZARRAY_JSON = ".zarray"
@@ -155,11 +153,6 @@ def parse_shapelike(data: int | Iterable[int]) -> tuple[int, ...]:
155153
return data_tuple
156154

157155

158-
def parse_dtype(data: npt.DTypeLike) -> np.dtype[Any]:
159-
# todo: real validation
160-
return np.dtype(data)
161-
162-
163156
def parse_fill_value(data: Any) -> Any:
164157
# todo: real validation
165158
return data

src/zarr/core/metadata/v2.py

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from __future__ import annotations
22

33
from collections.abc import Iterable
4+
from enum import Enum
45
from typing import TYPE_CHECKING
56

67
if TYPE_CHECKING:
@@ -21,7 +22,7 @@
2122
from zarr.core.array_spec import ArraySpec
2223
from zarr.core.chunk_grids import RegularChunkGrid
2324
from zarr.core.chunk_key_encodings import parse_separator
24-
from zarr.core.common import ZARRAY_JSON, ZATTRS_JSON, parse_dtype, parse_shapelike
25+
from zarr.core.common import ZARRAY_JSON, ZATTRS_JSON, parse_shapelike
2526
from zarr.core.config import config, parse_indexing_order
2627
from zarr.core.metadata.common import ArrayMetadata, parse_attributes
2728

@@ -100,9 +101,24 @@ def _json_convert(
100101
else:
101102
return o.descr
102103
if np.isscalar(o):
103-
# convert numpy scalar to python type, and pass
104-
# python types through
105-
return getattr(o, "item", lambda: o)()
104+
out: Any
105+
if hasattr(o, "dtype") and o.dtype.kind == "M" and hasattr(o, "view"):
106+
# https://github.com/zarr-developers/zarr-python/issues/2119
107+
# `.item()` on a datetime type might or might not return an
108+
# integer, depending on the value.
109+
# Explicitly cast to an int first, and then grab .item()
110+
out = o.view("i8").item()
111+
else:
112+
# convert numpy scalar to python type, and pass
113+
# python types through
114+
out = getattr(o, "item", lambda: o)()
115+
if isinstance(out, complex):
116+
# python complex types are not JSON serializable, so we use the
117+
# serialization defined in the zarr v3 spec
118+
return [out.real, out.imag]
119+
return out
120+
if isinstance(o, Enum):
121+
return o.name
106122
raise TypeError
107123

108124
zarray_dict = self.to_dict()
@@ -157,6 +173,11 @@ def update_attributes(self, attributes: dict[str, JSON]) -> Self:
157173
return replace(self, attributes=attributes)
158174

159175

176+
def parse_dtype(data: npt.DTypeLike) -> np.dtype[Any]:
177+
# todo: real validation
178+
return np.dtype(data)
179+
180+
160181
def parse_zarr_format(data: object) -> Literal[2]:
161182
if data == 2:
162183
return 2

0 commit comments

Comments
 (0)