Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Sync with zarr 3.0 rc-2 #544

Merged
merged 22 commits into from
Jan 7, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions .github/workflows/python-check.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,15 @@ jobs:
restore-keys: |
cache-hypothesis-

- name: describe environment
shell: bash
working-directory: icechunk-python
run: |
set -e
python3 -m venv .venv
source .venv/bin/activate
pip list

- name: pytest
shell: bash
working-directory: icechunk-python
Expand Down
2 changes: 1 addition & 1 deletion docs/docs/icechunk-python/dask.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ group = zarr.group(store=icechunk_sesion.store(), overwrite=True)
zarray = group.create_array(
"array",
shape=shape,
chunk_shape=zarr_chunks,
chunks=zarr_chunks,
dtype="f8",
fill_value=float("nan"),
)
Expand Down
2 changes: 1 addition & 1 deletion icechunk-python/notebooks/demo-dummy-data.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@
" dimension_names=dims,\n",
" attributes=attrs,\n",
" data=array,\n",
" exists_ok=True,\n",
" overwrite=True,\n",
" )\n",
"\n",
" return array"
Expand Down
2 changes: 1 addition & 1 deletion icechunk-python/notebooks/demo-s3.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@
" fill_value=-1234567,\n",
" dtype=oscar[var].dtype,\n",
" data=oscar[var],\n",
" exists_ok=True,\n",
" overwrite=True,\n",
" )\n",
" print(session.commit(f\"wrote {var}\"))\n",
" print(f\"committed; {time.time() - tic} seconds\")"
Expand Down
8 changes: 6 additions & 2 deletions icechunk-python/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ classifiers = [
license = { text = "Apache-2.0" }
dynamic = ["version"]

dependencies = ["zarr==3.0.0b3"]
dependencies = ["zarr==3.0.0rc2"]

[tool.poetry]
name = "icechunk"
Expand All @@ -39,7 +39,7 @@ test = [
"ruff",
"dask>=2024.11.0",
"distributed>=2024.11.0",
"xarray>=2024.11.0, <2025",
"xarray@git+https://github.com/pydata/xarray.git@main",
"hypothesis",
"pandas-stubs",
"boto3-stubs[s3]",
Expand All @@ -63,6 +63,10 @@ filterwarnings = [
"error",
"ignore:Jupyter is migrating its paths to use:DeprecationWarning:",
"ignore:Port 8787 is already in use:UserWarning:",
# TODO: this is raised for vlen-utf8, consolidated metadata, U1 dtype
"ignore:The codec `vlen-utf8` is currently not part in the Zarr format 3 specification.",
"ignore:The dtype `<U1` is currently not part in the Zarr format 3 specification.",
"ignore::ResourceWarning",
]

[tool.pyright]
Expand Down
8 changes: 4 additions & 4 deletions icechunk-python/tests/run_xarray_backends_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
)
from xarray.tests.test_backends import (
ZarrBase,
default_zarr_version, # noqa: F401; needed otherwise not discovered
default_zarr_format, # noqa: F401; needed otherwise not discovered
)


Expand Down Expand Up @@ -47,7 +47,7 @@ def test_roundtrip_consolidated(self, consolidated: Any) -> None:
class TestIcechunkStoreFilesystem(IcechunkStoreBase):
@contextlib.contextmanager
def create_zarr_target(self) -> Generator[IcechunkStore]:
if zarr.config.config["default_zarr_version"] == 2:
if zarr.config.config["default_zarr_format"] == 2:
pytest.skip("v2 not supported")
with tempfile.TemporaryDirectory() as tmpdir:
repo = Repository.create(local_filesystem_storage(tmpdir))
Expand All @@ -58,7 +58,7 @@ def create_zarr_target(self) -> Generator[IcechunkStore]:
class TestIcechunkStoreMemory(IcechunkStoreBase):
@contextlib.contextmanager
def create_zarr_target(self) -> Generator[IcechunkStore]:
if zarr.config.config["default_zarr_version"] == 2:
if zarr.config.config["default_zarr_format"] == 2:
pytest.skip("v2 not supported")
repo = Repository.create(in_memory_storage())
session = repo.writable_session("main")
Expand All @@ -74,7 +74,7 @@ def test_pickle_dataarray(self) -> None:
class TestIcechunkStoreMinio(IcechunkStoreBase):
@contextlib.contextmanager
def create_zarr_target(self) -> Generator[IcechunkStore]:
if zarr.config.config["default_zarr_version"] == 2:
if zarr.config.config["default_zarr_format"] == 2:
pytest.skip("v2 not supported")
repo = Repository.create(
s3_storage(
Expand Down
6 changes: 3 additions & 3 deletions icechunk-python/tests/test_can_read_old.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ async def write_a_test_repo() -> None:
big_chunks = group1.create_array(
"big_chunks",
shape=(10, 10),
chunk_shape=(5, 5),
chunks=(5, 5),
dtype="float32",
fill_value=float("nan"),
attributes={"this": "is a nice array", "icechunk": 1, "size": 42.0},
Expand All @@ -84,7 +84,7 @@ async def write_a_test_repo() -> None:
small_chunks = group1.create_array(
"small_chunks",
shape=(5),
chunk_shape=(1),
chunks=(1),
dtype="int8",
fill_value=8,
attributes={"this": "is a nice array", "icechunk": 1, "size": 42.0},
Expand Down Expand Up @@ -144,7 +144,7 @@ async def write_a_test_repo() -> None:
group5.create_array(
"inner",
shape=(10, 10),
chunk_shape=(5, 5),
chunks=(5, 5),
dtype="float32",
fill_value=float("nan"),
attributes={"this": "is a nice array", "icechunk": 1, "size": 42.0},
Expand Down
2 changes: 1 addition & 1 deletion icechunk-python/tests/test_concurrency.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ async def test_concurrency() -> None:

group = zarr.group(store=store, overwrite=True)
array = group.create_array(
"array", shape=(N, N), chunk_shape=(1, 1), dtype="f8", fill_value=1e23
"array", shape=(N, N), chunks=(1, 1), dtype="f8", fill_value=1e23
)

barrier = asyncio.Barrier(2 * N * N + 1)
Expand Down
21 changes: 10 additions & 11 deletions icechunk-python/tests/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,13 +57,12 @@ def test_config_save() -> None:
def test_no_inline_chunks(tmp_store: tuple[icechunk.IcechunkStore, str]) -> None:
store = tmp_store[0]
store_path = tmp_store[1]
array = zarr.open_array(
array = zarr.create_array(
store=store,
mode="a",
shape=(10),
dtype="int64",
zarr_format=3,
chunk_shape=(1),
chunks=(1,),
fill_value=-1,
)
array[:] = 42
Expand All @@ -78,15 +77,15 @@ def test_inline_chunks(tmp_store: tuple[icechunk.IcechunkStore, str]) -> None:
store = tmp_store[0]
store_path = tmp_store[1]

inline_array = zarr.open_array(
inline_array = zarr.create_array(
store=store,
mode="a",
path="inline",
name="inline",
shape=(10),
dtype="int32",
zarr_format=3,
chunk_shape=(1),
chunks=(1,),
fill_value=-1,
compressors=None,
)

inline_array[:] = 9
Expand All @@ -95,15 +94,15 @@ def test_inline_chunks(tmp_store: tuple[icechunk.IcechunkStore, str]) -> None:
# inline_chunk_threshold is 40, we should have no chunks directory
assert not os.path.isdir(f"{store_path}/chunks")

written_array = zarr.open_array(
written_array = zarr.create_array(
store=store,
mode="a",
path="not_inline",
name="not_inline",
shape=(10),
dtype="int64",
zarr_format=3,
chunk_shape=(1),
chunks=(1,),
fill_value=-1,
compressors=None,
)

written_array[:] = 3
Expand Down
4 changes: 1 addition & 3 deletions icechunk-python/tests/test_conflicts.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,7 @@ def repo(tmpdir: Path) -> icechunk.Repository:
store = session.store
root = zarr.group(store=store)
root.create_group("foo/bar")
root.create_array(
"foo/bar/some-array", shape=(10, 10), chunk_shape=(1, 1), dtype="i4"
)
root.create_array("foo/bar/some-array", shape=(10, 10), chunks=(1, 1), dtype="i4")
session.commit("commit 1")

return repo
Expand Down
2 changes: 0 additions & 2 deletions icechunk-python/tests/test_dask.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@


def test_distributed() -> None:
pytest.xfail()
with distributed.Client(): # type: ignore [no-untyped-call]
ds = create_test_data().chunk(dim1=3, dim2=4)
with roundtrip(ds) as actual:
Expand All @@ -22,7 +21,6 @@ def test_distributed() -> None:


def test_threaded() -> None:
pytest.xfail()
with dask.config.set(scheduler="threads"):
ds = create_test_data().chunk(dim1=3, dim2=4)
with roundtrip(ds) as actual:
Expand Down
2 changes: 1 addition & 1 deletion icechunk-python/tests/test_distributed_writers.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ async def test_distributed_writers() -> None:
zarray = group.create_array(
"array",
shape=shape,
chunk_shape=(CHUNK_DIM_SIZE, CHUNK_DIM_SIZE),
chunks=(CHUNK_DIM_SIZE, CHUNK_DIM_SIZE),
dtype="f8",
fill_value=float("nan"),
)
Expand Down
6 changes: 3 additions & 3 deletions icechunk-python/tests/test_regressions.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,8 @@ async def test_issue_418() -> None:
store = session.store

root = zarr.Group.from_store(store=store, zarr_format=3)
time = root.require_array(name="time", shape=((2,)), chunk_shape=((1,)), dtype="i4")
root.require_array(name="lon", shape=((1,)), chunk_shape=((1,)), dtype="i4")
time = root.require_array(name="time", shape=((2,)), chunks=((1,)), dtype="i4")
root.require_array(name="lon", shape=((1,)), chunks=((1,)), dtype="i4")

# Set longitude
store.set_virtual_ref(
Expand All @@ -81,7 +81,7 @@ async def test_issue_418() -> None:

root = zarr.Group.open(store=store)
time = cast(zarr.core.array.Array, root["time"])
root.require_array(name="lon", shape=((1,)), chunk_shape=((1,)), dtype="i4")
root.require_array(name="lon", shape=((1,)), chunks=((1,)), dtype="i4")

# resize the array and append a new chunk
time.resize((3,))
Expand Down
2 changes: 1 addition & 1 deletion icechunk-python/tests/test_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ async def test_store_clear_chunk_list() -> None:
await store.clear()

group = zarr.group(store=store)
array = group.create_array(**array_kwargs, exists_ok=True)
array = group.create_array(**array_kwargs, overwrite=True)
assert len([_ async for _ in store.list_prefix("/")]) == 2
array[:] = rng.integers(
low=0,
Expand Down
2 changes: 1 addition & 1 deletion icechunk-python/tests/test_timetravel.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def test_timetravel() -> None:

group = zarr.group(store=store, overwrite=True)
air_temp = group.create_array(
"air_temp", shape=(1000, 1000), chunk_shape=(100, 100), dtype="i4"
"air_temp", shape=(1000, 1000), chunks=(100, 100), dtype="i4"
)

air_temp[:, :] = 42
Expand Down
6 changes: 4 additions & 2 deletions icechunk-python/tests/test_virtual_ref.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,9 @@ async def test_write_minio_virtual_refs() -> None:
session = repo.writable_session("main")
store = session.store

array = zarr.Array.create(store, shape=(5, 1, 3), chunk_shape=(1, 1, 1), dtype="i4")
array = zarr.create_array(
store, shape=(5, 1, 3), chunks=(1, 1, 1), dtype="i4", compressors=None
)

# We add the virtual chunk refs without checksum, with the right etag, and with the wrong wrong etag and datetime.
# This way we can check retrieval operations that should fail
Expand Down Expand Up @@ -198,7 +200,7 @@ async def test_from_s3_public_virtual_refs(tmpdir: Path) -> None:

root = zarr.Group.from_store(store=store, zarr_format=3)
year = root.require_array(
name="year", shape=((72,)), chunk_shape=((72,)), dtype="float32"
name="year", shape=((72,)), chunks=((72,)), dtype="float32", compressors=None
)

store.set_virtual_ref(
Expand Down
Loading
Loading