Skip to content

Commit

Permalink
Sync with zarr 3.0 rc-2 (#544)
Browse files Browse the repository at this point in the history
* Start sync with zarr 3 rc1

* More tests updated

* Working through updates

* More updates

* Update to latest api

* Fix config issue with zstd

* Update xarray for now,  still failing

* fix warnign filter

* Update icechunk-python/pyproject.toml

Co-authored-by: Joe Hamman <[email protected]>

* I couldnt get wildcard to work, so just added both

* Add back tests for overwrite

* Test overwrite for groups

* Update backend test

* Fix

* Add pip list step

* try ignoring warning

---------

Co-authored-by: Joe Hamman <[email protected]>
Co-authored-by: Deepak Cherian <[email protected]>
  • Loading branch information
3 people authored Jan 7, 2025
1 parent 61e9e5f commit 480108f
Show file tree
Hide file tree
Showing 18 changed files with 126 additions and 107 deletions.
9 changes: 9 additions & 0 deletions .github/workflows/python-check.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,15 @@ jobs:
restore-keys: |
cache-hypothesis-
- name: describe environment
shell: bash
working-directory: icechunk-python
run: |
set -e
python3 -m venv .venv
source .venv/bin/activate
pip list
- name: pytest
shell: bash
working-directory: icechunk-python
Expand Down
2 changes: 1 addition & 1 deletion docs/docs/icechunk-python/dask.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ group = zarr.group(store=icechunk_sesion.store(), overwrite=True)
zarray = group.create_array(
"array",
shape=shape,
chunk_shape=zarr_chunks,
chunks=zarr_chunks,
dtype="f8",
fill_value=float("nan"),
)
Expand Down
2 changes: 1 addition & 1 deletion icechunk-python/notebooks/demo-dummy-data.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@
" dimension_names=dims,\n",
" attributes=attrs,\n",
" data=array,\n",
" exists_ok=True,\n",
" overwrite=True,\n",
" )\n",
"\n",
" return array"
Expand Down
2 changes: 1 addition & 1 deletion icechunk-python/notebooks/demo-s3.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@
" fill_value=-1234567,\n",
" dtype=oscar[var].dtype,\n",
" data=oscar[var],\n",
" exists_ok=True,\n",
" overwrite=True,\n",
" )\n",
" print(session.commit(f\"wrote {var}\"))\n",
" print(f\"committed; {time.time() - tic} seconds\")"
Expand Down
8 changes: 6 additions & 2 deletions icechunk-python/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ classifiers = [
license = { text = "Apache-2.0" }
dynamic = ["version"]

dependencies = ["zarr==3.0.0b3"]
dependencies = ["zarr==3.0.0rc2"]

[tool.poetry]
name = "icechunk"
Expand All @@ -39,7 +39,7 @@ test = [
"ruff",
"dask>=2024.11.0",
"distributed>=2024.11.0",
"xarray>=2024.11.0, <2025",
"xarray@git+https://github.com/pydata/xarray.git@main",
"hypothesis",
"pandas-stubs",
"boto3-stubs[s3]",
Expand All @@ -63,6 +63,10 @@ filterwarnings = [
"error",
"ignore:Jupyter is migrating its paths to use:DeprecationWarning:",
"ignore:Port 8787 is already in use:UserWarning:",
# TODO: this is raised for vlen-utf8, consolidated metadata, U1 dtype
"ignore:The codec `vlen-utf8` is currently not part in the Zarr format 3 specification.",
"ignore:The dtype `<U1` is currently not part in the Zarr format 3 specification.",
"ignore::ResourceWarning",
]

[tool.pyright]
Expand Down
8 changes: 4 additions & 4 deletions icechunk-python/tests/run_xarray_backends_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
)
from xarray.tests.test_backends import (
ZarrBase,
default_zarr_version, # noqa: F401; needed otherwise not discovered
default_zarr_format, # noqa: F401; needed otherwise not discovered
)


Expand Down Expand Up @@ -47,7 +47,7 @@ def test_roundtrip_consolidated(self, consolidated: Any) -> None:
class TestIcechunkStoreFilesystem(IcechunkStoreBase):
@contextlib.contextmanager
def create_zarr_target(self) -> Generator[IcechunkStore]:
if zarr.config.config["default_zarr_version"] == 2:
if zarr.config.config["default_zarr_format"] == 2:
pytest.skip("v2 not supported")
with tempfile.TemporaryDirectory() as tmpdir:
repo = Repository.create(local_filesystem_storage(tmpdir))
Expand All @@ -58,7 +58,7 @@ def create_zarr_target(self) -> Generator[IcechunkStore]:
class TestIcechunkStoreMemory(IcechunkStoreBase):
@contextlib.contextmanager
def create_zarr_target(self) -> Generator[IcechunkStore]:
if zarr.config.config["default_zarr_version"] == 2:
if zarr.config.config["default_zarr_format"] == 2:
pytest.skip("v2 not supported")
repo = Repository.create(in_memory_storage())
session = repo.writable_session("main")
Expand All @@ -74,7 +74,7 @@ def test_pickle_dataarray(self) -> None:
class TestIcechunkStoreMinio(IcechunkStoreBase):
@contextlib.contextmanager
def create_zarr_target(self) -> Generator[IcechunkStore]:
if zarr.config.config["default_zarr_version"] == 2:
if zarr.config.config["default_zarr_format"] == 2:
pytest.skip("v2 not supported")
repo = Repository.create(
s3_storage(
Expand Down
6 changes: 3 additions & 3 deletions icechunk-python/tests/test_can_read_old.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ async def write_a_test_repo() -> None:
big_chunks = group1.create_array(
"big_chunks",
shape=(10, 10),
chunk_shape=(5, 5),
chunks=(5, 5),
dtype="float32",
fill_value=float("nan"),
attributes={"this": "is a nice array", "icechunk": 1, "size": 42.0},
Expand All @@ -84,7 +84,7 @@ async def write_a_test_repo() -> None:
small_chunks = group1.create_array(
"small_chunks",
shape=(5),
chunk_shape=(1),
chunks=(1),
dtype="int8",
fill_value=8,
attributes={"this": "is a nice array", "icechunk": 1, "size": 42.0},
Expand Down Expand Up @@ -144,7 +144,7 @@ async def write_a_test_repo() -> None:
group5.create_array(
"inner",
shape=(10, 10),
chunk_shape=(5, 5),
chunks=(5, 5),
dtype="float32",
fill_value=float("nan"),
attributes={"this": "is a nice array", "icechunk": 1, "size": 42.0},
Expand Down
2 changes: 1 addition & 1 deletion icechunk-python/tests/test_concurrency.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ async def test_concurrency() -> None:

group = zarr.group(store=store, overwrite=True)
array = group.create_array(
"array", shape=(N, N), chunk_shape=(1, 1), dtype="f8", fill_value=1e23
"array", shape=(N, N), chunks=(1, 1), dtype="f8", fill_value=1e23
)

barrier = asyncio.Barrier(2 * N * N + 1)
Expand Down
21 changes: 10 additions & 11 deletions icechunk-python/tests/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,13 +57,12 @@ def test_config_save() -> None:
def test_no_inline_chunks(tmp_store: tuple[icechunk.IcechunkStore, str]) -> None:
store = tmp_store[0]
store_path = tmp_store[1]
array = zarr.open_array(
array = zarr.create_array(
store=store,
mode="a",
shape=(10),
dtype="int64",
zarr_format=3,
chunk_shape=(1),
chunks=(1,),
fill_value=-1,
)
array[:] = 42
Expand All @@ -78,15 +77,15 @@ def test_inline_chunks(tmp_store: tuple[icechunk.IcechunkStore, str]) -> None:
store = tmp_store[0]
store_path = tmp_store[1]

inline_array = zarr.open_array(
inline_array = zarr.create_array(
store=store,
mode="a",
path="inline",
name="inline",
shape=(10),
dtype="int32",
zarr_format=3,
chunk_shape=(1),
chunks=(1,),
fill_value=-1,
compressors=None,
)

inline_array[:] = 9
Expand All @@ -95,15 +94,15 @@ def test_inline_chunks(tmp_store: tuple[icechunk.IcechunkStore, str]) -> None:
# inline_chunk_threshold is 40, we should have no chunks directory
assert not os.path.isdir(f"{store_path}/chunks")

written_array = zarr.open_array(
written_array = zarr.create_array(
store=store,
mode="a",
path="not_inline",
name="not_inline",
shape=(10),
dtype="int64",
zarr_format=3,
chunk_shape=(1),
chunks=(1,),
fill_value=-1,
compressors=None,
)

written_array[:] = 3
Expand Down
4 changes: 1 addition & 3 deletions icechunk-python/tests/test_conflicts.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,7 @@ def repo(tmpdir: Path) -> icechunk.Repository:
store = session.store
root = zarr.group(store=store)
root.create_group("foo/bar")
root.create_array(
"foo/bar/some-array", shape=(10, 10), chunk_shape=(1, 1), dtype="i4"
)
root.create_array("foo/bar/some-array", shape=(10, 10), chunks=(1, 1), dtype="i4")
session.commit("commit 1")

return repo
Expand Down
2 changes: 0 additions & 2 deletions icechunk-python/tests/test_dask.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@


def test_distributed() -> None:
pytest.xfail()
with distributed.Client(): # type: ignore [no-untyped-call]
ds = create_test_data().chunk(dim1=3, dim2=4)
with roundtrip(ds) as actual:
Expand All @@ -22,7 +21,6 @@ def test_distributed() -> None:


def test_threaded() -> None:
pytest.xfail()
with dask.config.set(scheduler="threads"):
ds = create_test_data().chunk(dim1=3, dim2=4)
with roundtrip(ds) as actual:
Expand Down
2 changes: 1 addition & 1 deletion icechunk-python/tests/test_distributed_writers.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ async def test_distributed_writers() -> None:
zarray = group.create_array(
"array",
shape=shape,
chunk_shape=(CHUNK_DIM_SIZE, CHUNK_DIM_SIZE),
chunks=(CHUNK_DIM_SIZE, CHUNK_DIM_SIZE),
dtype="f8",
fill_value=float("nan"),
)
Expand Down
6 changes: 3 additions & 3 deletions icechunk-python/tests/test_regressions.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,8 @@ async def test_issue_418() -> None:
store = session.store

root = zarr.Group.from_store(store=store, zarr_format=3)
time = root.require_array(name="time", shape=((2,)), chunk_shape=((1,)), dtype="i4")
root.require_array(name="lon", shape=((1,)), chunk_shape=((1,)), dtype="i4")
time = root.require_array(name="time", shape=((2,)), chunks=((1,)), dtype="i4")
root.require_array(name="lon", shape=((1,)), chunks=((1,)), dtype="i4")

# Set longitude
store.set_virtual_ref(
Expand All @@ -81,7 +81,7 @@ async def test_issue_418() -> None:

root = zarr.Group.open(store=store)
time = cast(zarr.core.array.Array, root["time"])
root.require_array(name="lon", shape=((1,)), chunk_shape=((1,)), dtype="i4")
root.require_array(name="lon", shape=((1,)), chunks=((1,)), dtype="i4")

# resize the array and append a new chunk
time.resize((3,))
Expand Down
2 changes: 1 addition & 1 deletion icechunk-python/tests/test_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ async def test_store_clear_chunk_list() -> None:
await store.clear()

group = zarr.group(store=store)
array = group.create_array(**array_kwargs, exists_ok=True)
array = group.create_array(**array_kwargs, overwrite=True)
assert len([_ async for _ in store.list_prefix("/")]) == 2
array[:] = rng.integers(
low=0,
Expand Down
2 changes: 1 addition & 1 deletion icechunk-python/tests/test_timetravel.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def test_timetravel() -> None:

group = zarr.group(store=store, overwrite=True)
air_temp = group.create_array(
"air_temp", shape=(1000, 1000), chunk_shape=(100, 100), dtype="i4"
"air_temp", shape=(1000, 1000), chunks=(100, 100), dtype="i4"
)

air_temp[:, :] = 42
Expand Down
6 changes: 4 additions & 2 deletions icechunk-python/tests/test_virtual_ref.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,9 @@ async def test_write_minio_virtual_refs() -> None:
session = repo.writable_session("main")
store = session.store

array = zarr.Array.create(store, shape=(5, 1, 3), chunk_shape=(1, 1, 1), dtype="i4")
array = zarr.create_array(
store, shape=(5, 1, 3), chunks=(1, 1, 1), dtype="i4", compressors=None
)

# We add the virtual chunk refs without checksum, with the right etag, and with the wrong wrong etag and datetime.
# This way we can check retrieval operations that should fail
Expand Down Expand Up @@ -198,7 +200,7 @@ async def test_from_s3_public_virtual_refs(tmpdir: Path) -> None:

root = zarr.Group.from_store(store=store, zarr_format=3)
year = root.require_array(
name="year", shape=((72,)), chunk_shape=((72,)), dtype="float32"
name="year", shape=((72,)), chunks=((72,)), dtype="float32", compressors=None
)

store.set_virtual_ref(
Expand Down
Loading

0 comments on commit 480108f

Please sign in to comment.