diff --git a/earthaccess/kerchunk.py b/earthaccess/kerchunk.py index 4a4cad75..b0d9322d 100644 --- a/earthaccess/kerchunk.py +++ b/earthaccess/kerchunk.py @@ -23,11 +23,11 @@ def _get_chunk_metadata( def consolidate_metadata( granuales: list[earthaccess.results.DataGranule], - outfile: str, - storage_options: dict | None = None, kerchunk_options: dict | None = None, access: str = "direct", -) -> str: + outfile: str | None = None, + storage_options: dict | None = None, +) -> str | dict: try: import dask from kerchunk.combine import MultiZarrToZarr @@ -41,14 +41,16 @@ def consolidate_metadata( else: fs = earthaccess.get_fsspec_https_session() - # Write out metadata file for each granuale + # Get metadata for each granuale get_chunk_metadata = dask.delayed(_get_chunk_metadata) chunks = dask.compute(*[get_chunk_metadata(g, fs) for g in granuales]) chunks = sum(chunks, start=[]) - # Write combined metadata file + # Get combined metadata object mzz = MultiZarrToZarr(chunks, **(kerchunk_options or {})) - outfile = fsspec.utils.stringify_path(outfile) - mzz.translate(outfile, storage_options=storage_options or {}) - - return outfile + if outfile is not None: + output = fsspec.utils.stringify_path(outfile) + mzz.translate(outfile, storage_options=storage_options or {}) + return output + else: + return mzz.translate() diff --git a/tests/integration/test_kerchunk.py b/tests/integration/test_kerchunk.py index 44f55545..8652852e 100644 --- a/tests/integration/test_kerchunk.py +++ b/tests/integration/test_kerchunk.py @@ -1,12 +1,10 @@ -import logging import os -import unittest import earthaccess import pytest from fsspec.core import strip_protocol -pytest.importorskip("kerchunk") +kerchunk = pytest.importorskip("kerchunk") pytest.importorskip("dask") logger = logging.getLogger(__name__) @@ -43,17 +41,29 @@ def test_consolidate_metadata_outfile(tmp_path, granuales, protocol): assert result == outfile -def test_consolidate_metadata(tmp_path, granuales): +def test_consolidate_metadata_memory(tmp_path, granuales): + result = earthaccess.consolidate_metadata( + granuales, + access="indirect", + kerchunk_options={"concat_dims": "Time"}, + ) + assert isinstance(result, dict) + assert "refs" in result + + +@pytest.mark.parametrize("output", ["file", "memory"]) +def test_consolidate_metadata(tmp_path, granuales, output): xr = pytest.importorskip("xarray") # Open directly with `earthaccess.open` expected = xr.open_mfdataset(earthaccess.open(granuales)) # Open with kerchunk consolidated metadata file - metadata_file = earthaccess.consolidate_metadata( - granuales, - outfile=tmp_path / "metadata.json", - access="indirect", - kerchunk_options={"concat_dims": "Time"}, + if output == "file": + kwargs = {"outfile": tmp_path / "metadata.json"} + else: + kwargs = {} + metadata = earthaccess.consolidate_metadata( + granuales, access="indirect", kerchunk_options={"concat_dims": "Time"}, **kwargs ) fs = earthaccess.get_fsspec_https_session() @@ -64,7 +74,7 @@ def test_consolidate_metadata(tmp_path, granuales): backend_kwargs={ "consolidated": False, "storage_options": { - "fo": metadata_file, + "fo": metadata, "remote_protocol": "https", "remote_options": fs.storage_options, },