diff --git a/changes/3826.feature.md b/changes/3826.feature.md new file mode 100644 index 0000000000..41cc555a92 --- /dev/null +++ b/changes/3826.feature.md @@ -0,0 +1 @@ +Added a `subchunk_write_order` option to `ShardingCodec` to allow for `morton`, `unordered`, `lexicographic`, and `colexicographic` subchunk orderings. \ No newline at end of file diff --git a/docs/user-guide/performance.md b/docs/user-guide/performance.md index 6514b35a30..fa98e9466e 100644 --- a/docs/user-guide/performance.md +++ b/docs/user-guide/performance.md @@ -113,6 +113,13 @@ bytes within chunks of an array may improve the compression ratio, depending on the structure of the data, the compression algorithm used, and which compression filters (e.g., byte-shuffle) have been applied. +### Subchunk memory layout + +The order of chunks **within each shard** can be changed via the `subchunk_write_order` parameter of the `ShardingCodec`. That parameter is a string which must be one of `["morton", "lexicographic", "colexicographic", "unordered"]`. + +By default [`morton`](https://en.wikipedia.org/wiki/Z-order_curve) order provides good spatial locality however [`lexicographic` (i.e., row-major)](https://en.wikipedia.org/wiki/Row-_and_column-major_order), for example, may be better suited to "batched" workflows where some form of sequential reading through a fixed number of outer dimensions is desired. The options are `lexicographic`, `morton`, `unordered` (i.e., random), and `colexicographic`. + + ### Empty chunks It is possible to configure how Zarr handles the storage of chunks that are "empty" diff --git a/src/zarr/codecs/__init__.py b/src/zarr/codecs/__init__.py index 756bd97ed2..9a1b47b351 100644 --- a/src/zarr/codecs/__init__.py +++ b/src/zarr/codecs/__init__.py @@ -29,7 +29,7 @@ Zstd, ) from zarr.codecs.scale_offset import ScaleOffset -from zarr.codecs.sharding import ShardingCodec, ShardingCodecIndexLocation +from zarr.codecs.sharding import ShardingCodec, ShardingCodecIndexLocation, SubchunkWriteOrder from zarr.codecs.transpose import TransposeCodec from zarr.codecs.vlen_utf8 import VLenBytesCodec, VLenUTF8Codec from zarr.codecs.zstd import ZstdCodec @@ -47,6 +47,7 @@ "ScaleOffset", "ShardingCodec", "ShardingCodecIndexLocation", + "SubchunkWriteOrder", "TransposeCodec", "VLenBytesCodec", "VLenUTF8Codec", diff --git a/src/zarr/codecs/sharding.py b/src/zarr/codecs/sharding.py index 41f9b7e9c6..33c8602ecb 100644 --- a/src/zarr/codecs/sharding.py +++ b/src/zarr/codecs/sharding.py @@ -4,7 +4,7 @@ from dataclasses import dataclass, replace from enum import Enum from functools import lru_cache -from typing import TYPE_CHECKING, Any, NamedTuple, cast +from typing import TYPE_CHECKING, Any, Literal, NamedTuple, cast import numpy as np import numpy.typing as npt @@ -46,8 +46,6 @@ BasicIndexer, ChunkProjection, SelectorTuple, - _morton_order, - _morton_order_keys, c_order_iter, get_indexer, morton_order_iter, @@ -64,7 +62,7 @@ if TYPE_CHECKING: from collections.abc import Iterator - from typing import Self + from typing import Final, Self from zarr.core.common import JSON from zarr.core.dtype.wrapper import TBaseDType, TBaseScalar, ZDType @@ -83,6 +81,15 @@ class ShardingCodecIndexLocation(Enum): end = "end" +SubchunkWriteOrder = Literal["morton", "unordered", "lexicographic", "colexicographic"] +SUBCHUNK_WRITE_ORDER: Final[tuple[str, str, str, str]] = ( + "morton", + "unordered", + "lexicographic", + "colexicographic", +) + + def parse_index_location(data: object) -> ShardingCodecIndexLocation: return parse_enum(data, ShardingCodecIndexLocation) @@ -272,14 +279,13 @@ def to_dict_vectorized( dict mapping chunk coordinate tuples to Buffer or None """ starts, ends, valid = self.index.get_chunk_slices_vectorized(chunk_coords_array) - chunk_coords_keys = _morton_order_keys(self.index.chunks_per_shard) result: dict[tuple[int, ...], Buffer | None] = {} - for i, coords in enumerate(chunk_coords_keys): + for i, coords in enumerate(chunk_coords_array): if valid[i]: - result[coords] = self.buf[int(starts[i]) : int(ends[i])] + result[tuple(coords.ravel())] = self.buf[int(starts[i]) : int(ends[i])] else: - result[coords] = None + result[tuple(coords.ravel())] = None return result @@ -293,7 +299,9 @@ class ShardingCodec( chunk_shape: tuple[int, ...] codecs: tuple[Codec, ...] index_codecs: tuple[Codec, ...] + rng: np.random.Generator | None index_location: ShardingCodecIndexLocation = ShardingCodecIndexLocation.end + subchunk_write_order: SubchunkWriteOrder = "morton" def __init__( self, @@ -302,16 +310,24 @@ def __init__( codecs: Iterable[Codec | dict[str, JSON]] = (BytesCodec(),), index_codecs: Iterable[Codec | dict[str, JSON]] = (BytesCodec(), Crc32cCodec()), index_location: ShardingCodecIndexLocation | str = ShardingCodecIndexLocation.end, + subchunk_write_order: SubchunkWriteOrder = "morton", + rng: np.random.Generator | None = None, ) -> None: chunk_shape_parsed = parse_shapelike(chunk_shape) codecs_parsed = parse_codecs(codecs) index_codecs_parsed = parse_codecs(index_codecs) index_location_parsed = parse_index_location(index_location) + if subchunk_write_order not in SUBCHUNK_WRITE_ORDER: + raise ValueError( + f"Unrecognized subchunk write order: {subchunk_write_order}. Only {SUBCHUNK_WRITE_ORDER} are allowed." + ) object.__setattr__(self, "chunk_shape", chunk_shape_parsed) object.__setattr__(self, "codecs", codecs_parsed) object.__setattr__(self, "index_codecs", index_codecs_parsed) object.__setattr__(self, "index_location", index_location_parsed) + object.__setattr__(self, "subchunk_write_order", subchunk_write_order) + object.__setattr__(self, "rng", rng) # Use instance-local lru_cache to avoid memory leaks @@ -324,7 +340,7 @@ def __init__( # todo: typedict return type def __getstate__(self) -> dict[str, Any]: - return self.to_dict() + return {"rng": self.rng, **self.to_dict()} def __setstate__(self, state: dict[str, Any]) -> None: config = state["configuration"] @@ -332,6 +348,7 @@ def __setstate__(self, state: dict[str, Any]) -> None: object.__setattr__(self, "codecs", parse_codecs(config["codecs"])) object.__setattr__(self, "index_codecs", parse_codecs(config["index_codecs"])) object.__setattr__(self, "index_location", parse_index_location(config["index_location"])) + object.__setattr__(self, "rng", state["rng"]) # Use instance-local lru_cache to avoid memory leaks # object.__setattr__(self, "_get_chunk_spec", lru_cache()(self._get_chunk_spec)) @@ -509,6 +526,24 @@ async def _decode_partial_single( else: return out + def _subchunk_order_iter( + self, chunks_per_shard: tuple[int, ...], subchunk_write_order: SubchunkWriteOrder + ) -> Iterable[tuple[int, ...]]: + match subchunk_write_order: + case "morton": + subchunk_iter = morton_order_iter(chunks_per_shard) + case "lexicographic": + subchunk_iter = np.ndindex(chunks_per_shard) + case "colexicographic": + subchunk_iter = (c[::-1] for c in np.ndindex(chunks_per_shard[::-1])) + case "unordered": + subchunk_list = list(np.ndindex(chunks_per_shard)) + (self.rng if self.rng is not None else np.random.default_rng()).shuffle( + subchunk_list + ) + subchunk_iter = iter(subchunk_list) + return subchunk_iter + async def _encode_single( self, shard_array: NDBuffer, @@ -526,8 +561,7 @@ async def _encode_single( chunk_grid=ChunkGrid.from_sizes(shard_shape, chunk_shape), ) ) - - shard_builder = dict.fromkeys(morton_order_iter(chunks_per_shard)) + shard_builder = dict.fromkeys(self._subchunk_order_iter(chunks_per_shard, "lexicographic")) await self.codec_pipeline.write( [ @@ -570,7 +604,7 @@ async def _encode_partial_single( ) if self._is_complete_shard_write(indexer, chunks_per_shard): - shard_dict = dict.fromkeys(morton_order_iter(chunks_per_shard)) + shard_dict = dict.fromkeys(self._subchunk_order_iter(chunks_per_shard, "lexicographic")) else: shard_reader = await self._load_full_shard_maybe( byte_getter=byte_setter, @@ -580,7 +614,7 @@ async def _encode_partial_single( shard_reader = shard_reader or _ShardReader.create_empty(chunks_per_shard) # Use vectorized lookup for better performance shard_dict = shard_reader.to_dict_vectorized( - np.asarray(_morton_order(chunks_per_shard)) + np.array(list(self._subchunk_order_iter(chunks_per_shard, "lexicographic"))) ) await self.codec_pipeline.write( @@ -619,7 +653,7 @@ async def _encode_shard_dict( template = buffer_prototype.buffer.create_zero_length() chunk_start = 0 - for chunk_coords in morton_order_iter(chunks_per_shard): + for chunk_coords in self._subchunk_order_iter(chunks_per_shard, self.subchunk_write_order): value = map.get(chunk_coords) if value is None: continue diff --git a/src/zarr/testing/strategies.py b/src/zarr/testing/strategies.py index 1b0ede1148..74d9d7c683 100644 --- a/src/zarr/testing/strategies.py +++ b/src/zarr/testing/strategies.py @@ -13,7 +13,10 @@ import zarr from zarr.abc.store import RangeByteRequest, Store from zarr.codecs.bytes import BytesCodec -from zarr.core.array import Array +from zarr.codecs.crc32c_ import Crc32cCodec +from zarr.codecs.sharding import SUBCHUNK_WRITE_ORDER, ShardingCodec, SubchunkWriteOrder +from zarr.codecs.zstd import ZstdCodec +from zarr.core.array import Array, CompressorsLike, SerializerLike from zarr.core.chunk_key_encodings import DefaultChunkKeyEncoding from zarr.core.common import JSON, AccessModeLiteral, ZarrFormat from zarr.core.dtype import get_data_type_from_native_dtype @@ -127,6 +130,9 @@ def dimension_names(draw: st.DrawFn, *, ndim: int | None = None) -> list[None | return draw(st.none() | st.lists(st.none() | simple_text, min_size=ndim, max_size=ndim)) # type: ignore[arg-type] +subchunk_write_orders: st.SearchStrategy[SubchunkWriteOrder] = st.sampled_from(SUBCHUNK_WRITE_ORDER) + + @st.composite def array_metadata( draw: st.DrawFn, @@ -255,6 +261,7 @@ def arrays( arrays: st.SearchStrategy | None = None, attrs: st.SearchStrategy = attrs, zarr_formats: st.SearchStrategy = zarr_formats, + subchunk_write_orders: SearchStrategy[SubchunkWriteOrder] = subchunk_write_orders, open_mode: AccessModeLiteral = "w", ) -> AnyArray: store = draw(stores, label="store") @@ -266,20 +273,11 @@ def arrays( arrays = numpy_arrays(shapes=shapes) nparray = draw(arrays, label="array data") dim_names: None | list[str | None] = None + serializer: SerializerLike = "auto" + compressors_unsearched: CompressorsLike = "auto" # For v3 arrays, optionally use RectilinearChunkGridMetadata chunk_grid_meta: RegularChunkGridMetadata | RectilinearChunkGridMetadata | None = None - shard_shape = None - if zarr_format == 3: - chunk_grid_meta = draw(chunk_grids(shape=nparray.shape), label="chunk grid") - - # Sharding is only supported with regular chunk grids, and has complex - # divisibility constraints that don't play well with hypothesis shrinking. - # Disabled for now — sharding should be tested separately. - - dim_names = draw(dimension_names(ndim=nparray.ndim), label="dimension names") - else: - dim_names = None # test that None works too. fill_value = draw(st.one_of([st.none(), npst.from_dtype(nparray.dtype)])) @@ -295,17 +293,37 @@ def arrays( # - RectilinearChunkGridMetadata -> nested list of ints (triggers rectilinear path) # - v2 -> flat tuple of ints chunks_param: tuple[int, ...] | list[list[int]] - if zarr_format == 3 and chunk_grid_meta is not None: + shard_shape = None + dim_names = None + if zarr_format == 3: + chunk_grid_meta = draw(st.none() | chunk_grids(shape=nparray.shape), label="chunk grid") + dim_names = draw(dimension_names(ndim=nparray.ndim), label="dimension names") if isinstance(chunk_grid_meta, RectilinearChunkGridMetadata): chunks_param = [ list(dim) if isinstance(dim, tuple) else [dim] for dim in chunk_grid_meta.chunk_shapes ] - else: + elif isinstance(chunk_grid_meta, RegularChunkGridMetadata): chunks_param = chunk_grid_meta.chunk_shape + else: + chunks_param = draw(chunk_shapes(shape=nparray.shape), label="chunk shape") + + if all(s > c and c > 1 for s, c in zip(nparray.shape, chunks_param, strict=True)): + shard_shape = draw( + st.none() | shard_shapes(shape=nparray.shape, chunk_shape=chunks_param), + label="shard shape", + ) + if shard_shape is not None: + subchunk_write_order = draw(subchunk_write_orders) + serializer = ShardingCodec( + subchunk_write_order=subchunk_write_order, + codecs=[BytesCodec(), ZstdCodec()], + index_codecs=[BytesCodec(), Crc32cCodec()], + chunk_shape=chunks_param, + ) + compressors_unsearched = None else: chunks_param = draw(chunk_shapes(shape=nparray.shape), label="chunk shape") - a = root.create_array( array_path, shape=nparray.shape, @@ -313,9 +331,10 @@ def arrays( shards=shard_shape, dtype=nparray.dtype, attributes=attributes, - # compressor=compressor, # FIXME + compressors=compressors_unsearched, # FIXME fill_value=fill_value, dimension_names=dim_names, + serializer=serializer, ) assert isinstance(a, Array) @@ -329,12 +348,15 @@ def arrays( # Verify chunks — for rectilinear grids, .chunks raises if zarr_format == 3: - if isinstance(a.metadata.chunk_grid, RectilinearChunkGridMetadata): - assert shard_shape is None - else: - assert isinstance(a.metadata.chunk_grid, RegularChunkGridMetadata) - assert a.metadata.chunk_grid.chunk_shape == a.chunks + assert shard_shape == a.shards + if isinstance(a.metadata.chunk_grid, RegularChunkGridMetadata): + assert a.metadata.chunk_grid.chunk_shape == ( + a.shards if shard_shape is not None else a.chunks + ) assert shard_shape == a.shards + else: + assert isinstance(a.metadata.chunk_grid, RectilinearChunkGridMetadata) + assert shard_shape is None assert a.basename == name, (a.basename, name) assert dict(a.attrs) == expected_attrs diff --git a/tests/test_codecs/test_sharding.py b/tests/test_codecs/test_sharding.py index b31471337c..74e4a7e0d5 100644 --- a/tests/test_codecs/test_sharding.py +++ b/tests/test_codecs/test_sharding.py @@ -1,5 +1,5 @@ import pickle -from typing import Any +from typing import Any, get_args from unittest.mock import AsyncMock import numpy as np @@ -13,14 +13,16 @@ from zarr.abc.store import Store from zarr.codecs import ( BloscCodec, + BytesCodec, + Crc32cCodec, ShardingCodec, ShardingCodecIndexLocation, TransposeCodec, ) -from zarr.codecs.sharding import MAX_UINT_64, _ShardIndex +from zarr.codecs.sharding import MAX_UINT_64, SubchunkWriteOrder, _ShardIndex, _ShardReader from zarr.core.buffer import NDArrayLike, default_buffer_prototype from zarr.core.indexing import c_order_iter -from zarr.storage import StorePath, ZipStore +from zarr.storage import MemoryStore, StorePath, ZipStore from ..conftest import ArrayRequest from .test_codecs import _AsyncArrayProxy, order_from_dim @@ -822,6 +824,135 @@ def test_sharding_mixed_integer_list_indexing(store: Store) -> None: np.testing.assert_array_equal(c3, s3) +async def stored_data_and_get_order( + codec: ShardingCodec, chunks_per_shard: tuple[int, ...] +) -> list[tuple[int, ...]]: + shard_shape = tuple(c * s for c, s in zip(chunks_per_shard, codec.chunk_shape, strict=True)) + store = MemoryStore() + arr = zarr.create_array( + StorePath(store), + shape=shard_shape, + dtype="uint8", + chunks=shard_shape, + serializer=codec, + filters=None, + compressors=None, + fill_value=0, + ) + + arr[:] = np.arange(np.prod(shard_shape), dtype="uint8").reshape(shard_shape) + + shard_buf = await store.get("c/0/0", prototype=default_buffer_prototype()) + if shard_buf is None: + raise RuntimeError("data write failed") + index = (await _ShardReader.from_bytes(shard_buf, codec, chunks_per_shard)).index + offset_to_coord: dict[int, tuple[int, ...]] = dict( + zip( + index.get_chunk_slices_vectorized(np.array(list(np.ndindex(chunks_per_shard))))[ + 0 + ], # start + list(np.ndindex(chunks_per_shard)), # coord + strict=True, + ) + ) + + # The physical write order is recovered by sorting coordinates by start offset. + return [coord for _, coord in sorted(offset_to_coord.items())] + + +@pytest.mark.parametrize( + "subchunk_write_order", + get_args(SubchunkWriteOrder), +) +async def test_encoded_subchunk_write_order(subchunk_write_order: SubchunkWriteOrder) -> None: + """Subchunks must be physically laid out in the shard in the order specified by + ``subchunk_write_order``. We verify this by decoding the shard index and sorting + the chunk coordinates by their byte offset.""" + # Use a non-square chunks_per_shard so all three orderings are distinguishable. + chunks_per_shard = (3, 2) + chunk_shape = (4, 4) + seed = 0 + codec = ShardingCodec( + chunk_shape=chunk_shape, + codecs=[BytesCodec()], + index_codecs=[BytesCodec(), Crc32cCodec()], + index_location=ShardingCodecIndexLocation.end, + subchunk_write_order=subchunk_write_order, + rng=np.random.default_rng(seed=seed), + ) + + actual_order = await stored_data_and_get_order(codec, chunks_per_shard) + if subchunk_write_order != "unordered": + expected_order = list(codec._subchunk_order_iter(chunks_per_shard, subchunk_write_order)) + assert actual_order == expected_order + else: + same_order_same_seed = list( + ShardingCodec( + chunk_shape=chunk_shape, + codecs=[BytesCodec()], + index_codecs=[BytesCodec(), Crc32cCodec()], + index_location=ShardingCodecIndexLocation.end, + subchunk_write_order=subchunk_write_order, + rng=np.random.default_rng(seed=seed), + )._subchunk_order_iter(chunks_per_shard, subchunk_write_order) + ) + assert actual_order == same_order_same_seed + + +async def test_unordered_can_be_seeded() -> None: + orders = [] + chunks_per_shard = (3, 2) + chunk_shape = (4, 4) + seed = 0 + for _ in range(4): + codec = ShardingCodec( + chunk_shape=chunk_shape, + codecs=[BytesCodec()], + index_codecs=[BytesCodec(), Crc32cCodec()], + index_location=ShardingCodecIndexLocation.end, + subchunk_write_order="unordered", + rng=np.random.default_rng(seed=seed), + ) + orders.append(await stored_data_and_get_order(codec, chunks_per_shard)) + assert all(orders[0] == o for o in orders) + + +@pytest.mark.parametrize( + "subchunk_write_order", + get_args(SubchunkWriteOrder), +) +@pytest.mark.parametrize("do_partial", [True, False], ids=["partial", "complete"]) +def test_subchunk_write_order_roundtrip( + subchunk_write_order: SubchunkWriteOrder, do_partial: bool +) -> None: + """Data written with any ``subchunk_write_order`` must round-trip correctly.""" + chunks_per_shard = (3, 2) + chunk_shape = (4, 4) + shard_shape = tuple(c * s for c, s in zip(chunks_per_shard, chunk_shape, strict=True)) + data = np.arange(np.prod(shard_shape), dtype="uint16").reshape(shard_shape) + arr = zarr.create_array( + StorePath(MemoryStore()), + shape=shard_shape, + dtype=data.dtype, + chunks=shard_shape, + serializer=ShardingCodec( + chunk_shape=chunk_shape, + codecs=[BytesCodec()], + subchunk_write_order=subchunk_write_order, + ), + filters=None, + compressors=None, + fill_value=0, + ) + if do_partial: + sub_data = data[: (shard_shape[0] // 2)] + arr[: (shard_shape[0] // 2)] = data[: (shard_shape[0] // 2)] + data = np.vstack([sub_data, np.zeros_like(sub_data)]) + else: + arr[:] = data + np.testing.assert_array_equal(arr[:], data) + + def test_sharding_zero_dimensional() -> None: """Regression test for https://github.com/zarr-developers/zarr-python/issues/3751""" arr = zarr.create_array({}, shape=(), dtype="f4", chunks=(), shards=())