From 480108f82a271ffc531de5c2762cc72e4798b088 Mon Sep 17 00:00:00 2001 From: Matthew Iannucci Date: Tue, 7 Jan 2025 14:51:43 -0500 Subject: [PATCH] Sync with zarr 3.0 rc-2 (#544) * Start sync with zarr 3 rc1 * More tests updated * Working through updates * More updates * Update to latest api * Fix config issue with zstd * Update xarray for now, still failing * fix warnign filter * Update icechunk-python/pyproject.toml Co-authored-by: Joe Hamman * I couldnt get wildcard to work, so just added both * Add back tests for overwrite * Test overwrite for groups * Update backend test * Fix * Add pip list step * try ignoring warning --------- Co-authored-by: Joe Hamman Co-authored-by: Deepak Cherian --- .github/workflows/python-check.yaml | 9 ++ docs/docs/icechunk-python/dask.md | 2 +- .../notebooks/demo-dummy-data.ipynb | 2 +- icechunk-python/notebooks/demo-s3.ipynb | 2 +- icechunk-python/pyproject.toml | 8 +- .../tests/run_xarray_backends_tests.py | 8 +- icechunk-python/tests/test_can_read_old.py | 6 +- icechunk-python/tests/test_concurrency.py | 2 +- icechunk-python/tests/test_config.py | 21 ++-- icechunk-python/tests/test_conflicts.py | 4 +- icechunk-python/tests/test_dask.py | 2 - .../tests/test_distributed_writers.py | 2 +- icechunk-python/tests/test_regressions.py | 6 +- icechunk-python/tests/test_store.py | 2 +- icechunk-python/tests/test_timetravel.py | 2 +- icechunk-python/tests/test_virtual_ref.py | 6 +- icechunk-python/tests/test_zarr/test_array.py | 43 ++++--- icechunk-python/tests/test_zarr/test_group.py | 106 ++++++++++-------- 18 files changed, 126 insertions(+), 107 deletions(-) diff --git a/.github/workflows/python-check.yaml b/.github/workflows/python-check.yaml index 35da0bce..9fc13df4 100644 --- a/.github/workflows/python-check.yaml +++ b/.github/workflows/python-check.yaml @@ -95,6 +95,15 @@ jobs: restore-keys: | cache-hypothesis- + - name: describe environment + shell: bash + working-directory: icechunk-python + run: | + set -e + python3 -m venv .venv + source .venv/bin/activate + pip list + - name: pytest shell: bash working-directory: icechunk-python diff --git a/docs/docs/icechunk-python/dask.md b/docs/docs/icechunk-python/dask.md index c83de460..ce3df9fa 100644 --- a/docs/docs/icechunk-python/dask.md +++ b/docs/docs/icechunk-python/dask.md @@ -47,7 +47,7 @@ group = zarr.group(store=icechunk_sesion.store(), overwrite=True) zarray = group.create_array( "array", shape=shape, - chunk_shape=zarr_chunks, + chunks=zarr_chunks, dtype="f8", fill_value=float("nan"), ) diff --git a/icechunk-python/notebooks/demo-dummy-data.ipynb b/icechunk-python/notebooks/demo-dummy-data.ipynb index 8929809c..31bbb80c 100644 --- a/icechunk-python/notebooks/demo-dummy-data.ipynb +++ b/icechunk-python/notebooks/demo-dummy-data.ipynb @@ -125,7 +125,7 @@ " dimension_names=dims,\n", " attributes=attrs,\n", " data=array,\n", - " exists_ok=True,\n", + " overwrite=True,\n", " )\n", "\n", " return array" diff --git a/icechunk-python/notebooks/demo-s3.ipynb b/icechunk-python/notebooks/demo-s3.ipynb index 7777eebc..3d24476c 100644 --- a/icechunk-python/notebooks/demo-s3.ipynb +++ b/icechunk-python/notebooks/demo-s3.ipynb @@ -142,7 +142,7 @@ " fill_value=-1234567,\n", " dtype=oscar[var].dtype,\n", " data=oscar[var],\n", - " exists_ok=True,\n", + " overwrite=True,\n", " )\n", " print(session.commit(f\"wrote {var}\"))\n", " print(f\"committed; {time.time() - tic} seconds\")" diff --git a/icechunk-python/pyproject.toml b/icechunk-python/pyproject.toml index 86890898..e4892fcf 100644 --- a/icechunk-python/pyproject.toml +++ b/icechunk-python/pyproject.toml @@ -16,7 +16,7 @@ classifiers = [ license = { text = "Apache-2.0" } dynamic = ["version"] -dependencies = ["zarr==3.0.0b3"] +dependencies = ["zarr==3.0.0rc2"] [tool.poetry] name = "icechunk" @@ -39,7 +39,7 @@ test = [ "ruff", "dask>=2024.11.0", "distributed>=2024.11.0", - "xarray>=2024.11.0, <2025", + "xarray@git+https://github.com/pydata/xarray.git@main", "hypothesis", "pandas-stubs", "boto3-stubs[s3]", @@ -63,6 +63,10 @@ filterwarnings = [ "error", "ignore:Jupyter is migrating its paths to use:DeprecationWarning:", "ignore:Port 8787 is already in use:UserWarning:", + # TODO: this is raised for vlen-utf8, consolidated metadata, U1 dtype + "ignore:The codec `vlen-utf8` is currently not part in the Zarr format 3 specification.", + "ignore:The dtype ` None: class TestIcechunkStoreFilesystem(IcechunkStoreBase): @contextlib.contextmanager def create_zarr_target(self) -> Generator[IcechunkStore]: - if zarr.config.config["default_zarr_version"] == 2: + if zarr.config.config["default_zarr_format"] == 2: pytest.skip("v2 not supported") with tempfile.TemporaryDirectory() as tmpdir: repo = Repository.create(local_filesystem_storage(tmpdir)) @@ -58,7 +58,7 @@ def create_zarr_target(self) -> Generator[IcechunkStore]: class TestIcechunkStoreMemory(IcechunkStoreBase): @contextlib.contextmanager def create_zarr_target(self) -> Generator[IcechunkStore]: - if zarr.config.config["default_zarr_version"] == 2: + if zarr.config.config["default_zarr_format"] == 2: pytest.skip("v2 not supported") repo = Repository.create(in_memory_storage()) session = repo.writable_session("main") @@ -74,7 +74,7 @@ def test_pickle_dataarray(self) -> None: class TestIcechunkStoreMinio(IcechunkStoreBase): @contextlib.contextmanager def create_zarr_target(self) -> Generator[IcechunkStore]: - if zarr.config.config["default_zarr_version"] == 2: + if zarr.config.config["default_zarr_format"] == 2: pytest.skip("v2 not supported") repo = Repository.create( s3_storage( diff --git a/icechunk-python/tests/test_can_read_old.py b/icechunk-python/tests/test_can_read_old.py index 77c1bd32..edb171cb 100644 --- a/icechunk-python/tests/test_can_read_old.py +++ b/icechunk-python/tests/test_can_read_old.py @@ -74,7 +74,7 @@ async def write_a_test_repo() -> None: big_chunks = group1.create_array( "big_chunks", shape=(10, 10), - chunk_shape=(5, 5), + chunks=(5, 5), dtype="float32", fill_value=float("nan"), attributes={"this": "is a nice array", "icechunk": 1, "size": 42.0}, @@ -84,7 +84,7 @@ async def write_a_test_repo() -> None: small_chunks = group1.create_array( "small_chunks", shape=(5), - chunk_shape=(1), + chunks=(1), dtype="int8", fill_value=8, attributes={"this": "is a nice array", "icechunk": 1, "size": 42.0}, @@ -144,7 +144,7 @@ async def write_a_test_repo() -> None: group5.create_array( "inner", shape=(10, 10), - chunk_shape=(5, 5), + chunks=(5, 5), dtype="float32", fill_value=float("nan"), attributes={"this": "is a nice array", "icechunk": 1, "size": 42.0}, diff --git a/icechunk-python/tests/test_concurrency.py b/icechunk-python/tests/test_concurrency.py index db4ab7c4..06d0f185 100644 --- a/icechunk-python/tests/test_concurrency.py +++ b/icechunk-python/tests/test_concurrency.py @@ -50,7 +50,7 @@ async def test_concurrency() -> None: group = zarr.group(store=store, overwrite=True) array = group.create_array( - "array", shape=(N, N), chunk_shape=(1, 1), dtype="f8", fill_value=1e23 + "array", shape=(N, N), chunks=(1, 1), dtype="f8", fill_value=1e23 ) barrier = asyncio.Barrier(2 * N * N + 1) diff --git a/icechunk-python/tests/test_config.py b/icechunk-python/tests/test_config.py index 1918c8ec..91637926 100644 --- a/icechunk-python/tests/test_config.py +++ b/icechunk-python/tests/test_config.py @@ -57,13 +57,12 @@ def test_config_save() -> None: def test_no_inline_chunks(tmp_store: tuple[icechunk.IcechunkStore, str]) -> None: store = tmp_store[0] store_path = tmp_store[1] - array = zarr.open_array( + array = zarr.create_array( store=store, - mode="a", shape=(10), dtype="int64", zarr_format=3, - chunk_shape=(1), + chunks=(1,), fill_value=-1, ) array[:] = 42 @@ -78,15 +77,15 @@ def test_inline_chunks(tmp_store: tuple[icechunk.IcechunkStore, str]) -> None: store = tmp_store[0] store_path = tmp_store[1] - inline_array = zarr.open_array( + inline_array = zarr.create_array( store=store, - mode="a", - path="inline", + name="inline", shape=(10), dtype="int32", zarr_format=3, - chunk_shape=(1), + chunks=(1,), fill_value=-1, + compressors=None, ) inline_array[:] = 9 @@ -95,15 +94,15 @@ def test_inline_chunks(tmp_store: tuple[icechunk.IcechunkStore, str]) -> None: # inline_chunk_threshold is 40, we should have no chunks directory assert not os.path.isdir(f"{store_path}/chunks") - written_array = zarr.open_array( + written_array = zarr.create_array( store=store, - mode="a", - path="not_inline", + name="not_inline", shape=(10), dtype="int64", zarr_format=3, - chunk_shape=(1), + chunks=(1,), fill_value=-1, + compressors=None, ) written_array[:] = 3 diff --git a/icechunk-python/tests/test_conflicts.py b/icechunk-python/tests/test_conflicts.py index 3f31c3f9..1f7c4ded 100644 --- a/icechunk-python/tests/test_conflicts.py +++ b/icechunk-python/tests/test_conflicts.py @@ -18,9 +18,7 @@ def repo(tmpdir: Path) -> icechunk.Repository: store = session.store root = zarr.group(store=store) root.create_group("foo/bar") - root.create_array( - "foo/bar/some-array", shape=(10, 10), chunk_shape=(1, 1), dtype="i4" - ) + root.create_array("foo/bar/some-array", shape=(10, 10), chunks=(1, 1), dtype="i4") session.commit("commit 1") return repo diff --git a/icechunk-python/tests/test_dask.py b/icechunk-python/tests/test_dask.py index d2432ccc..031e32f2 100644 --- a/icechunk-python/tests/test_dask.py +++ b/icechunk-python/tests/test_dask.py @@ -10,7 +10,6 @@ def test_distributed() -> None: - pytest.xfail() with distributed.Client(): # type: ignore [no-untyped-call] ds = create_test_data().chunk(dim1=3, dim2=4) with roundtrip(ds) as actual: @@ -22,7 +21,6 @@ def test_distributed() -> None: def test_threaded() -> None: - pytest.xfail() with dask.config.set(scheduler="threads"): ds = create_test_data().chunk(dim1=3, dim2=4) with roundtrip(ds) as actual: diff --git a/icechunk-python/tests/test_distributed_writers.py b/icechunk-python/tests/test_distributed_writers.py index 1144879b..786f196a 100644 --- a/icechunk-python/tests/test_distributed_writers.py +++ b/icechunk-python/tests/test_distributed_writers.py @@ -60,7 +60,7 @@ async def test_distributed_writers() -> None: zarray = group.create_array( "array", shape=shape, - chunk_shape=(CHUNK_DIM_SIZE, CHUNK_DIM_SIZE), + chunks=(CHUNK_DIM_SIZE, CHUNK_DIM_SIZE), dtype="f8", fill_value=float("nan"), ) diff --git a/icechunk-python/tests/test_regressions.py b/icechunk-python/tests/test_regressions.py index e843513f..0d5927b4 100644 --- a/icechunk-python/tests/test_regressions.py +++ b/icechunk-python/tests/test_regressions.py @@ -55,8 +55,8 @@ async def test_issue_418() -> None: store = session.store root = zarr.Group.from_store(store=store, zarr_format=3) - time = root.require_array(name="time", shape=((2,)), chunk_shape=((1,)), dtype="i4") - root.require_array(name="lon", shape=((1,)), chunk_shape=((1,)), dtype="i4") + time = root.require_array(name="time", shape=((2,)), chunks=((1,)), dtype="i4") + root.require_array(name="lon", shape=((1,)), chunks=((1,)), dtype="i4") # Set longitude store.set_virtual_ref( @@ -81,7 +81,7 @@ async def test_issue_418() -> None: root = zarr.Group.open(store=store) time = cast(zarr.core.array.Array, root["time"]) - root.require_array(name="lon", shape=((1,)), chunk_shape=((1,)), dtype="i4") + root.require_array(name="lon", shape=((1,)), chunks=((1,)), dtype="i4") # resize the array and append a new chunk time.resize((3,)) diff --git a/icechunk-python/tests/test_store.py b/icechunk-python/tests/test_store.py index ee483ee7..9fc5a7ec 100644 --- a/icechunk-python/tests/test_store.py +++ b/icechunk-python/tests/test_store.py @@ -38,7 +38,7 @@ async def test_store_clear_chunk_list() -> None: await store.clear() group = zarr.group(store=store) - array = group.create_array(**array_kwargs, exists_ok=True) + array = group.create_array(**array_kwargs, overwrite=True) assert len([_ async for _ in store.list_prefix("/")]) == 2 array[:] = rng.integers( low=0, diff --git a/icechunk-python/tests/test_timetravel.py b/icechunk-python/tests/test_timetravel.py index 74748db0..b5720f18 100644 --- a/icechunk-python/tests/test_timetravel.py +++ b/icechunk-python/tests/test_timetravel.py @@ -19,7 +19,7 @@ def test_timetravel() -> None: group = zarr.group(store=store, overwrite=True) air_temp = group.create_array( - "air_temp", shape=(1000, 1000), chunk_shape=(100, 100), dtype="i4" + "air_temp", shape=(1000, 1000), chunks=(100, 100), dtype="i4" ) air_temp[:, :] = 42 diff --git a/icechunk-python/tests/test_virtual_ref.py b/icechunk-python/tests/test_virtual_ref.py index e1a50eb5..12934ab8 100644 --- a/icechunk-python/tests/test_virtual_ref.py +++ b/icechunk-python/tests/test_virtual_ref.py @@ -56,7 +56,9 @@ async def test_write_minio_virtual_refs() -> None: session = repo.writable_session("main") store = session.store - array = zarr.Array.create(store, shape=(5, 1, 3), chunk_shape=(1, 1, 1), dtype="i4") + array = zarr.create_array( + store, shape=(5, 1, 3), chunks=(1, 1, 1), dtype="i4", compressors=None + ) # We add the virtual chunk refs without checksum, with the right etag, and with the wrong wrong etag and datetime. # This way we can check retrieval operations that should fail @@ -198,7 +200,7 @@ async def test_from_s3_public_virtual_refs(tmpdir: Path) -> None: root = zarr.Group.from_store(store=store, zarr_format=3) year = root.require_array( - name="year", shape=((72,)), chunk_shape=((72,)), dtype="float32" + name="year", shape=((72,)), chunks=((72,)), dtype="float32", compressors=None ) store.set_virtual_ref( diff --git a/icechunk-python/tests/test_zarr/test_array.py b/icechunk-python/tests/test_zarr/test_array.py index 9e180fe7..69f30091 100644 --- a/icechunk-python/tests/test_zarr/test_array.py +++ b/icechunk-python/tests/test_zarr/test_array.py @@ -10,7 +10,7 @@ from icechunk import IcechunkStore from tests.conftest import parse_repo -from zarr import Array, Group +from zarr import Group, create_array from zarr.core.buffer import default_buffer_prototype from zarr.core.common import ZarrFormat from zarr.errors import ContainsArrayError, ContainsGroupError @@ -27,12 +27,12 @@ def store(request: pytest.FixtureRequest, tmpdir: Path) -> IcechunkStore: @pytest.mark.parametrize("store", ["memory"], indirect=["store"]) @pytest.mark.parametrize("zarr_format", [3]) -@pytest.mark.parametrize("exists_ok", [True, False]) +@pytest.mark.parametrize("overwrite", [True, False]) @pytest.mark.parametrize("extant_node", ["array", "group"]) def test_array_creation_existing_node( store: IcechunkStore, zarr_format: ZarrFormat, - exists_ok: bool, + overwrite: bool, extant_node: Literal["array", "group"], ) -> None: """ @@ -53,25 +53,24 @@ def test_array_creation_existing_node( new_shape = (2, 2) new_dtype = "float32" - if exists_ok: + if overwrite: # This is currently not supported by IcechunkStore - pytest.xfail("IcechunkStore does not support exists_ok=True") - # arr_new = Array.create( - # spath / "extant", - # shape=new_shape, - # dtype=new_dtype, - # exists_ok=exists_ok, - # zarr_format=zarr_format, - # ) - # assert arr_new.shape == new_shape - # assert arr_new.dtype == new_dtype + arr_new = create_array( + spath / "extant", + shape=new_shape, + dtype=new_dtype, + overwrite=overwrite, + zarr_format=zarr_format, + ) + assert arr_new.shape == new_shape + assert arr_new.dtype == new_dtype else: with pytest.raises(expected_exception): - Array.create( + create_array( spath / "extant", shape=new_shape, dtype=new_dtype, - exists_ok=exists_ok, + overwrite=overwrite, zarr_format=zarr_format, ) @@ -80,7 +79,7 @@ def test_array_creation_existing_node( @pytest.mark.parametrize("store", ["local"], indirect=["store"]) @pytest.mark.parametrize("zarr_format", [3]) def test_serializable_sync_array(store: IcechunkStore, zarr_format: ZarrFormat) -> None: - expected = Array.create( + expected = create_array( store=store, shape=(100,), chunks=(10,), zarr_format=zarr_format, dtype="i4" ) expected[:] = list(range(100)) @@ -104,12 +103,12 @@ def test_array_v3_fill_value( store: IcechunkStore, fill_value: int, dtype_str: str ) -> None: shape = (10,) - arr = Array.create( + arr = create_array( store=store, shape=shape, dtype=dtype_str, zarr_format=3, - chunk_shape=shape, + chunks=shape, fill_value=fill_value, ) @@ -120,12 +119,12 @@ def test_array_v3_fill_value( @pytest.mark.parametrize("store", ["memory"], indirect=True) async def test_array_v3_nan_fill_value(store: IcechunkStore) -> None: shape = (10,) - arr = Array.create( + arr = create_array( store=store, shape=shape, dtype=np.float64, zarr_format=3, - chunk_shape=shape, + chunks=shape, fill_value=np.nan, ) arr[:] = np.nan @@ -151,7 +150,7 @@ async def test_array_v3_nan_fill_value(store: IcechunkStore) -> None: async def test_special_complex_fill_values_roundtrip( store: IcechunkStore, fill_value: Any, expected: list[Any] ) -> None: - Array.create(store=store, shape=(1,), dtype=np.complex64, fill_value=fill_value) + create_array(store=store, shape=(1,), dtype=np.complex64, fill_value=fill_value) content = await store.get("zarr.json", prototype=default_buffer_prototype()) assert content is not None actual = json.loads(content.to_bytes()) diff --git a/icechunk-python/tests/test_zarr/test_group.py b/icechunk-python/tests/test_zarr/test_group.py index 793c1e36..04e0e809 100644 --- a/icechunk-python/tests/test_zarr/test_group.py +++ b/icechunk-python/tests/test_zarr/test_group.py @@ -11,7 +11,8 @@ import zarr.api.asynchronous from icechunk import IcechunkStore from tests.conftest import parse_repo -from zarr import Array, AsyncArray, AsyncGroup, Group +from zarr import Array, AsyncGroup, Group +from zarr.api.asynchronous import create_array from zarr.core.buffer import default_buffer_prototype from zarr.core.common import JSON, ZarrFormat from zarr.core.group import GroupMetadata @@ -28,7 +29,7 @@ def store(request: pytest.FixtureRequest) -> IcechunkStore: @pytest.fixture(params=[True, False]) -def exists_ok(request: pytest.FixtureRequest) -> bool: +def overwrite(request: pytest.FixtureRequest) -> bool: result = request.param if not isinstance(result, bool): raise TypeError("Wrong type returned by test fixture.") @@ -117,7 +118,7 @@ def test_group_members(store: IcechunkStore, zarr_format: ZarrFormat) -> None: subsubsubgroup = subsubgroup.create_group("subsubsubgroup") members_expected["subarray"] = group.create_array( - "subarray", shape=(100,), dtype="uint8", chunk_shape=(10,), exists_ok=True + "subarray", shape=(100,), dtype="uint8", chunks=(10,), overwrite=True ) # This is not supported by Icechunk, so we expect an error @@ -181,7 +182,7 @@ def test_group(store: IcechunkStore, zarr_format: ZarrFormat) -> None: # create an array from the "bar" group data = np.arange(0, 4 * 4, dtype="uint16").reshape((4, 4)) arr = bar.create_array( - "baz", shape=data.shape, dtype=data.dtype, chunk_shape=(2, 2), exists_ok=True + "baz", shape=data.shape, dtype=data.dtype, chunks=(2, 2), overwrite=True ) arr[:] = data @@ -207,25 +208,25 @@ def test_group(store: IcechunkStore, zarr_format: ZarrFormat) -> None: def test_group_create( - store: IcechunkStore, exists_ok: bool, zarr_format: ZarrFormat + store: IcechunkStore, overwrite: bool, zarr_format: ZarrFormat ) -> None: """ Test that `Group.create` works as expected. """ attributes = {"foo": 100} group = Group.from_store( - store, attributes=attributes, zarr_format=zarr_format, exists_ok=exists_ok + store, attributes=attributes, zarr_format=zarr_format, overwrite=overwrite ) assert group.attrs == attributes - if not exists_ok: + if not overwrite: with pytest.raises(ContainsGroupError): - _ = Group.from_store(store, exists_ok=exists_ok, zarr_format=zarr_format) + _ = Group.from_store(store, overwrite=overwrite, zarr_format=zarr_format) def test_group_open( - store: IcechunkStore, zarr_format: ZarrFormat, exists_ok: bool + store: IcechunkStore, zarr_format: ZarrFormat, overwrite: bool ) -> None: """ Test the `Group.open` method. @@ -238,26 +239,26 @@ def test_group_open( # create the group attrs = {"path": "foo"} group_created = Group.from_store( - store, attributes=attrs, zarr_format=zarr_format, exists_ok=exists_ok + store, attributes=attrs, zarr_format=zarr_format, overwrite=overwrite ) assert group_created.attrs == attrs assert group_created.metadata.zarr_format == zarr_format assert group_created.store_path == spath - # attempt to create a new group in place, to test exists_ok + # attempt to create a new group in place, to test overwrite new_attrs = {"path": "bar"} - if not exists_ok: + if not overwrite: with pytest.raises(ContainsGroupError): Group.from_store( - store, attributes=attrs, zarr_format=zarr_format, exists_ok=exists_ok + store, attributes=attrs, zarr_format=zarr_format, overwrite=overwrite ) else: if not store.supports_deletes: pytest.skip( - "Store does not support deletes but `exists_ok` is True, requiring deletes to override a group" + "Store does not support deletes but `overwrite` is True, requiring deletes to override a group" ) group_created_again = Group.from_store( - store, attributes=new_attrs, zarr_format=zarr_format, exists_ok=exists_ok + store, attributes=new_attrs, zarr_format=zarr_format, overwrite=overwrite ) assert group_created_again.attrs == new_attrs assert group_created_again.metadata.zarr_format == zarr_format @@ -271,7 +272,9 @@ def test_group_getitem(store: IcechunkStore, zarr_format: ZarrFormat) -> None: group = Group.from_store(store, zarr_format=zarr_format) subgroup = group.create_group(name="subgroup") - subarray = group.create_array(name="subarray", shape=(10,), chunk_shape=(10,)) + subarray = group.create_array( + name="subarray", shape=(10,), chunks=(10,), dtype="uint8" + ) assert group["subgroup"] == subgroup assert group["subarray"] == subarray @@ -286,7 +289,9 @@ def test_group_delitem(store: IcechunkStore, zarr_format: ZarrFormat) -> None: group = Group.from_store(store, zarr_format=zarr_format) subgroup = group.create_group(name="subgroup") - subarray = group.create_array(name="subarray", shape=(10,), chunk_shape=(10,)) + subarray = group.create_array( + name="subarray", shape=(10,), chunks=(10,), dtype="uint8" + ) assert group["subgroup"] == subgroup assert group["subarray"] == subarray @@ -347,7 +352,7 @@ def test_group_subgroups(store: IcechunkStore, zarr_format: ZarrFormat) -> None: keys = ("foo", "bar") subgroups_expected = tuple(group.create_group(k) for k in keys) # create a sub-array as well - _ = group.create_array("array", shape=(10,)) + _ = group.create_array("array", shape=(10,), dtype="uint8") subgroups_observed = tuple(a[1] for a in group.groups()) assert set(group.group_keys()) == set(keys) assert len(subgroups_observed) == len(subgroups_expected) @@ -360,7 +365,9 @@ def test_group_subarrays(store: IcechunkStore, zarr_format: ZarrFormat) -> None: """ group = Group.from_store(store, zarr_format=zarr_format) keys = ("foo", "bar") - subarrays_expected = tuple(group.create_array(k, shape=(10,)) for k in keys) + subarrays_expected = tuple( + group.create_array(k, shape=(10,), dtype="uint8") for k in keys + ) # create a sub-group as well _ = group.create_group("group") subarrays_observed = tuple(a[1] for a in group.arrays()) @@ -399,7 +406,7 @@ async def test_group_update_attributes_async( def test_group_create_array( store: IcechunkStore, zarr_format: ZarrFormat, - exists_ok: bool, + overwrite: bool, method: Literal["create_array", "array"], ) -> None: """ @@ -411,20 +418,24 @@ def test_group_create_array( data = np.arange(np.prod(shape)).reshape(shape).astype(dtype) if method == "create_array": - array = group.create_array(name="array", shape=shape, dtype=dtype, data=data) + array = group.create_array(name="array", shape=shape, dtype=dtype) + array[:] = data elif method == "array": with pytest.warns(DeprecationWarning): - array = group.array(name="array", shape=shape, dtype=dtype, data=data) + array = group.array(name="array", shape=shape, dtype=dtype) + array[:] = data else: raise AssertionError - if not exists_ok: + if not overwrite: if method == "create_array": with pytest.raises(ContainsArrayError): - group.create_array(name="array", shape=shape, dtype=dtype, data=data) + array = group.create_array(name="array", shape=shape, dtype=dtype) + array[:] = data elif method == "array": with pytest.raises(ContainsArrayError), pytest.warns(DeprecationWarning): - group.array(name="array", shape=shape, dtype=dtype, data=data) + array = group.array(name="array", shape=shape, dtype=dtype) + array[:] = data assert array.shape == shape assert array.dtype == np.dtype(dtype) assert np.array_equal(array[:], data) @@ -432,12 +443,12 @@ def test_group_create_array( @pytest.mark.parametrize("store", ["memory"], indirect=["store"]) @pytest.mark.parametrize("zarr_format", [3]) -@pytest.mark.parametrize("exists_ok", [True, False]) +@pytest.mark.parametrize("overwrite", [True, False]) @pytest.mark.parametrize("extant_node", ["array", "group"]) def test_group_creation_existing_node( store: IcechunkStore, zarr_format: ZarrFormat, - exists_ok: bool, + overwrite: bool, extant_node: Literal["array", "group"], ) -> None: """ @@ -461,28 +472,27 @@ def test_group_creation_existing_node( new_attributes = {"new": True} - if exists_ok: - pytest.xfail("exists_ok is not implemented for Group.from_store") - # node_new = Group.from_store( - # spath / "extant", - # attributes=new_attributes, - # zarr_format=zarr_format, - # exists_ok=exists_ok, - # ) - # assert node_new.attrs == new_attributes + if overwrite: + node_new = Group.from_store( + spath / "extant", + attributes=new_attributes, + zarr_format=zarr_format, + overwrite=overwrite, + ) + assert node_new.attrs == new_attributes else: with pytest.raises(expected_exception): Group.from_store( spath / "extant", attributes=new_attributes, zarr_format=zarr_format, - exists_ok=exists_ok, + overwrite=overwrite, ) async def test_asyncgroup_create( store: IcechunkStore, - exists_ok: bool, + overwrite: bool, zarr_format: ZarrFormat, ) -> None: """ @@ -493,7 +503,7 @@ async def test_asyncgroup_create( agroup = await AsyncGroup.from_store( store, attributes=attributes, - exists_ok=exists_ok, + overwrite=overwrite, zarr_format=zarr_format, ) @@ -502,24 +512,24 @@ async def test_asyncgroup_create( ) assert agroup.store_path == await make_store_path(store) - if not exists_ok: + if not overwrite: with pytest.raises(ContainsGroupError): agroup = await AsyncGroup.from_store( spath, attributes=attributes, - exists_ok=exists_ok, + overwrite=overwrite, zarr_format=zarr_format, ) # create an array at our target path collision_name = "foo" - _ = await AsyncArray.create( + _ = await create_array( spath / collision_name, shape=(10,), dtype="uint8", zarr_format=zarr_format ) with pytest.raises(ContainsArrayError): _ = await AsyncGroup.from_store( StorePath(store=store) / collision_name, attributes=attributes, - exists_ok=exists_ok, + overwrite=overwrite, zarr_format=zarr_format, ) @@ -534,7 +544,7 @@ async def test_asyncgroup_delitem(store: IcechunkStore, zarr_format: ZarrFormat) name=array_name, shape=(10,), dtype="uint8", - chunk_shape=(2,), + chunks=(2,), attributes={"foo": 100}, ) await agroup.delitem(array_name) @@ -581,11 +591,11 @@ async def test_group_members_async(store: IcechunkStore) -> None: GroupMetadata(), store_path=StorePath(store=store, path="root"), ) - a0 = await group.create_array("a0", shape=(1,)) + a0 = await group.create_array("a0", shape=(1,), dtype="uint8") g0 = await group.create_group("g0") - a1 = await g0.create_array("a1", shape=(1,)) + a1 = await g0.create_array("a1", shape=(1,), dtype="uint8") g1 = await g0.create_group("g1") - a2 = await g1.create_array("a2", shape=(1,)) + a2 = await g1.create_array("a2", shape=(1,), dtype="uint8") g2 = await g1.create_group("g2") # immediate children @@ -650,7 +660,7 @@ def test_delitem_removes_children(store: IcechunkStore, zarr_format: ZarrFormat) g1 = zarr.group(store=store, zarr_format=zarr_format) g1.create_group("0") g1.create_group("0/0") - arr = g1.create_array("0/0/0", shape=(1,)) + arr = g1.create_array("0/0/0", shape=(1,), dtype="uint8") arr[:] = 1 del g1["0"]