From c537fca2c520d1e9c502163c9d297ee145945716 Mon Sep 17 00:00:00 2001 From: David Stansby Date: Tue, 9 Sep 2025 11:17:36 +0100 Subject: [PATCH 1/3] Factor out code for guessing zarr version --- src/zarr/core/group.py | 89 +++++++++++++++++++++--------------------- 1 file changed, 44 insertions(+), 45 deletions(-) diff --git a/src/zarr/core/group.py b/src/zarr/core/group.py index e71c55c10f..6d6c3e2b58 100644 --- a/src/zarr/core/group.py +++ b/src/zarr/core/group.py @@ -132,6 +132,30 @@ def _parse_async_node( raise TypeError(f"Unknown node type, got {type(node)}") +async def _get_zarr_version(store_path: StorePath) -> Literal[2, 3]: + """ + Guess Zarr format from present metadata files in a Store. + """ + ( + zarr_json_bytes, + zgroup_bytes, + ) = await asyncio.gather( + (store_path / ZARR_JSON).exists(), + (store_path / ZGROUP_JSON).exists(), + ) + if zarr_json_bytes and zgroup_bytes: + # warn and favor v3 + msg = f"Both zarr.json (Zarr format 3) and .zgroup (Zarr format 2) metadata objects exist at {store_path}. Zarr format 3 will be used." + warnings.warn(msg, category=ZarrUserWarning, stacklevel=1) + if not zarr_json_bytes and not zgroup_bytes: + raise FileNotFoundError(f"could not find zarr.json or .zgroup objects in {store_path}") + # set zarr_format based on which keys were found + if zarr_json_bytes: + return 3 + else: + return 2 + + @dataclass(frozen=True) class ConsolidatedMetadata: """ @@ -513,6 +537,14 @@ async def open( to load consolidated metadata from a non-default key. """ store_path = await make_store_path(store) + # Guess zarr_format if not passed explicitly + if zarr_format is None: + zarr_format = await _get_zarr_version(store_path) + return await cls.open( + store=store, zarr_format=zarr_format, use_consolidated=use_consolidated + ) + assert zarr_format is not None + if not store_path.store.supports_consolidated_metadata: # Fail if consolidated metadata was requested but the Store doesn't support it if use_consolidated: @@ -524,12 +556,11 @@ async def open( # if use_consolidated was None (optional), the Store dictates it doesn't want consolidation use_consolidated = False - consolidated_key = ZMETADATA_V2_JSON - - if (zarr_format == 2 or zarr_format is None) and isinstance(use_consolidated, str): - consolidated_key = use_consolidated - if zarr_format == 2: + consolidated_key = ZMETADATA_V2_JSON + if isinstance(use_consolidated, str): + consolidated_key = use_consolidated + paths = [store_path / ZGROUP_JSON, store_path / ZATTRS_JSON] if use_consolidated or use_consolidated is None: paths.append(store_path / consolidated_key) @@ -546,43 +577,6 @@ async def open( else: maybe_consolidated_metadata_bytes = None - elif zarr_format == 3: - zarr_json_bytes = await (store_path / ZARR_JSON).get() - if zarr_json_bytes is None: - raise FileNotFoundError(store_path) - elif zarr_format is None: - ( - zarr_json_bytes, - zgroup_bytes, - zattrs_bytes, - maybe_consolidated_metadata_bytes, - ) = await asyncio.gather( - (store_path / ZARR_JSON).get(), - (store_path / ZGROUP_JSON).get(), - (store_path / ZATTRS_JSON).get(), - (store_path / str(consolidated_key)).get(), - ) - if zarr_json_bytes is not None and zgroup_bytes is not None: - # warn and favor v3 - msg = f"Both zarr.json (Zarr format 3) and .zgroup (Zarr format 2) metadata objects exist at {store_path}. Zarr format 3 will be used." - warnings.warn(msg, category=ZarrUserWarning, stacklevel=1) - if zarr_json_bytes is None and zgroup_bytes is None: - raise FileNotFoundError( - f"could not find zarr.json or .zgroup objects in {store_path}" - ) - # set zarr_format based on which keys were found - if zarr_json_bytes is not None: - zarr_format = 3 - else: - zarr_format = 2 - else: - msg = f"Invalid value for 'zarr_format'. Expected 2, 3, or None. Got '{zarr_format}'." # type: ignore[unreachable] - raise MetadataValidationError(msg) - - if zarr_format == 2: - # this is checked above, asserting here for mypy - assert zgroup_bytes is not None - if use_consolidated and maybe_consolidated_metadata_bytes is None: # the user requested consolidated metadata, but it was missing raise ValueError(consolidated_key) @@ -595,9 +589,11 @@ async def open( return cls._from_bytes_v2( store_path, zgroup_bytes, zattrs_bytes, maybe_consolidated_metadata_bytes ) - else: - # V3 groups are comprised of a zarr.json object - assert zarr_json_bytes is not None + + elif zarr_format == 3: + zarr_json_bytes = await (store_path / ZARR_JSON).get() + if zarr_json_bytes is None: + raise FileNotFoundError(store_path) if not isinstance(use_consolidated, bool | None): raise TypeError("use_consolidated must be a bool or None for Zarr format 3.") @@ -606,6 +602,9 @@ async def open( zarr_json_bytes, use_consolidated=use_consolidated, ) + else: + msg = f"Invalid value for 'zarr_format'. Expected 2, 3, or None. Got '{zarr_format}'." # type: ignore[unreachable] + raise MetadataValidationError(msg) @classmethod def _from_bytes_v2( From d4290b35b86937e88313712c4aeffd001a45b155 Mon Sep 17 00:00:00 2001 From: David Stansby Date: Tue, 9 Sep 2025 16:57:34 +0100 Subject: [PATCH 2/3] Reduce number of file operations --- src/zarr/core/group.py | 77 ++++++++++++++++++++---------------------- 1 file changed, 36 insertions(+), 41 deletions(-) diff --git a/src/zarr/core/group.py b/src/zarr/core/group.py index 6d6c3e2b58..ee4e1b58cf 100644 --- a/src/zarr/core/group.py +++ b/src/zarr/core/group.py @@ -132,30 +132,6 @@ def _parse_async_node( raise TypeError(f"Unknown node type, got {type(node)}") -async def _get_zarr_version(store_path: StorePath) -> Literal[2, 3]: - """ - Guess Zarr format from present metadata files in a Store. - """ - ( - zarr_json_bytes, - zgroup_bytes, - ) = await asyncio.gather( - (store_path / ZARR_JSON).exists(), - (store_path / ZGROUP_JSON).exists(), - ) - if zarr_json_bytes and zgroup_bytes: - # warn and favor v3 - msg = f"Both zarr.json (Zarr format 3) and .zgroup (Zarr format 2) metadata objects exist at {store_path}. Zarr format 3 will be used." - warnings.warn(msg, category=ZarrUserWarning, stacklevel=1) - if not zarr_json_bytes and not zgroup_bytes: - raise FileNotFoundError(f"could not find zarr.json or .zgroup objects in {store_path}") - # set zarr_format based on which keys were found - if zarr_json_bytes: - return 3 - else: - return 2 - - @dataclass(frozen=True) class ConsolidatedMetadata: """ @@ -537,12 +513,32 @@ async def open( to load consolidated metadata from a non-default key. """ store_path = await make_store_path(store) + + zarr_json_bytes = None + zarr_group_bytes = None + # Guess zarr_format if not passed explicitly if zarr_format is None: - zarr_format = await _get_zarr_version(store_path) - return await cls.open( - store=store, zarr_format=zarr_format, use_consolidated=use_consolidated + ( + zarr_json_bytes, + zarr_group_bytes, + ) = await asyncio.gather( + (store_path / ZARR_JSON).get(), + (store_path / ZGROUP_JSON).get(), ) + # set zarr_format based on which keys were found + if zarr_json_bytes is not None: + zarr_format = 3 + if zarr_group_bytes is not None: + msg = f"Both zarr.json (Zarr format 3) and .zgroup (Zarr format 2) metadata objects exist at {store_path}. Zarr format 3 will be used." + warnings.warn(msg, category=ZarrUserWarning, stacklevel=1) + elif zarr_group_bytes is not None: + zarr_format = 2 + else: + raise FileNotFoundError( + f"could not find zarr.json or .zgroup objects in {store_path}" + ) + assert zarr_format is not None if not store_path.store.supports_consolidated_metadata: @@ -561,37 +557,36 @@ async def open( if isinstance(use_consolidated, str): consolidated_key = use_consolidated - paths = [store_path / ZGROUP_JSON, store_path / ZATTRS_JSON] - if use_consolidated or use_consolidated is None: - paths.append(store_path / consolidated_key) + if zarr_group_bytes is None: + zarr_group_bytes = await (store_path / ZGROUP_JSON).get() + if zarr_group_bytes is None: + raise FileNotFoundError(store_path) - zgroup_bytes, zattrs_bytes, *rest = await asyncio.gather( - *[path.get() for path in paths] - ) - if zgroup_bytes is None: + zattrs_bytes = await (store_path / ZATTRS_JSON).get() + if zattrs_bytes is None: raise FileNotFoundError(store_path) if use_consolidated or use_consolidated is None: - maybe_consolidated_metadata_bytes = rest[0] - + consolidated_metadata_bytes = await (store_path / consolidated_key).get() else: - maybe_consolidated_metadata_bytes = None + consolidated_metadata_bytes = None - if use_consolidated and maybe_consolidated_metadata_bytes is None: + if use_consolidated and consolidated_metadata_bytes is None: # the user requested consolidated metadata, but it was missing raise ValueError(consolidated_key) elif use_consolidated is False: # the user explicitly opted out of consolidated_metadata. # Discard anything we might have read. - maybe_consolidated_metadata_bytes = None + consolidated_metadata_bytes = None return cls._from_bytes_v2( - store_path, zgroup_bytes, zattrs_bytes, maybe_consolidated_metadata_bytes + store_path, zarr_group_bytes, zattrs_bytes, consolidated_metadata_bytes ) elif zarr_format == 3: - zarr_json_bytes = await (store_path / ZARR_JSON).get() + if zarr_json_bytes is None: + zarr_json_bytes = await (store_path / ZARR_JSON).get() if zarr_json_bytes is None: raise FileNotFoundError(store_path) if not isinstance(use_consolidated, bool | None): From 058bd52bee429ad3a7a4c639c2ec1f9718048c57 Mon Sep 17 00:00:00 2001 From: David Stansby Date: Tue, 9 Sep 2025 21:31:03 +0100 Subject: [PATCH 3/3] Calrify open() docstring. --- src/zarr/core/group.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/zarr/core/group.py b/src/zarr/core/group.py index ee4e1b58cf..d7f9e4a184 100644 --- a/src/zarr/core/group.py +++ b/src/zarr/core/group.py @@ -488,7 +488,8 @@ async def open( zarr_format: ZarrFormat | None = 3, use_consolidated: bool | str | None = None, ) -> AsyncGroup: - """Open a new AsyncGroup + """ + Create a new AsyncGroup from an existing group. Parameters ----------