Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix small open_virtual_dataset bugs #923

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 6 additions & 5 deletions earthaccess/dmrpp_zarr.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,8 +92,8 @@ def open_virtual_mfdataset(
import xarray as xr

if access == "direct":
fs = earthaccess.get_s3_filesystem(results=granules[0])
fs.storage_options["anon"] = False # type: ignore
fs = earthaccess.get_s3_filesystem(results=granules) # type: ignore
fs.storage_options["anon"] = False
else:
fs = earthaccess.get_fsspec_https_session()
if parallel:
Expand All @@ -114,7 +114,7 @@ def open_virtual_mfdataset(
filetype="dmrpp", # type: ignore
group=group,
indexes={},
reader_options={"storage_options": fs.storage_options}, # type: ignore
reader_options={"storage_options": fs.storage_options},
)
)
if preprocess is not None:
Expand All @@ -127,6 +127,7 @@ def open_virtual_mfdataset(
vds = xr.combine_nested(vdatasets, **xr_combine_nested_kwargs)
if load:
refs = vds.virtualize.to_kerchunk(filepath=None, format="dict")
protocol = "s3" if "s3" in fs.protocol else fs.protocol
return xr.open_dataset(
"reference://",
engine="zarr",
Expand All @@ -135,8 +136,8 @@ def open_virtual_mfdataset(
"consolidated": False,
"storage_options": {
"fo": refs, # codespell:ignore
"remote_protocol": fs.protocol,
"remote_options": fs.storage_options, # type: ignore
"remote_protocol": protocol,
"remote_options": fs.storage_options,
},
},
)
Expand Down
5 changes: 4 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,10 @@ kerchunk = [
"xarray",
]
virtualizarr = [
"virtualizarr >=1.2.0"
"virtualizarr >=1.2.0",
"dask",
"zarr",
"h5py >=3.6.0",
]
dev = [
"bump-my-version >=0.10.0",
Expand Down
45 changes: 14 additions & 31 deletions tests/integration/test_virtualizarr.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,41 +15,24 @@
logger.info(f"earthaccess version: {earthaccess.__version__}")


@pytest.fixture(scope="module", params=["MUR25-JPL-L4-GLOB-v04.2"])
@pytest.fixture(
scope="module",
params=[
"MUR25-JPL-L4-GLOB-v04.2",
"AVHRR_OI-NCEI-L4-GLOB-v2.1",
"M2T1NXSLV",
],
)
def granule(request):
granules = earthaccess.search_data(
count=1, temporal=("2024"), short_name=request.param
)
return granules[0]


def test_dmrpp(granule):
from virtualizarr import open_virtual_dataset # type: ignore

fs = earthaccess.get_fsspec_https_session()
data_path = granule.data_links(access="indirect")[0]
dmrpp_path = data_path + ".dmrpp"

result = open_virtual_dataset(
dmrpp_path,
filetype="dmrpp", # type: ignore
indexes={},
reader_options={"storage_options": fs.storage_options}, # type: ignore
)

expected = open_virtual_dataset(
data_path,
indexes={},
reader_options={"storage_options": fs.storage_options}, # type: ignore
)

# TODO: replace with xr.testing when virtualizarr fill_val is fixed (https://github.com/zarr-developers/VirtualiZarr/issues/287)
# and dmrpp deflateLevel (zlib compression level) is always present (https://github.com/OPENDAP/bes/issues/954)
for var in result.variables:
assert var in expected.variables
assert result[var].dims == expected[var].dims
assert result[var].shape == expected[var].shape
assert result[var].dtype == expected[var].dtype
assert result[var].data.manifest == expected[var].data.manifest
assert set(result.coords) == set(expected.coords)
assert result.attrs == expected.attrs
def test_open_virtual_dataset(granule):
# Simply check that the dmrpp can be found, parsed, and loaded. Actual parser result is checked in virtualizarr
vds = earthaccess.open_virtual_dataset(granule, load=False)
assert vds is not None
vds_load = earthaccess.open_virtual_dataset(granule, load=True)
assert vds_load is not None
7 changes: 7 additions & 0 deletions uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading