diff --git a/earthaccess/__init__.py b/earthaccess/__init__.py index b74948da..4977bf38 100644 --- a/earthaccess/__init__.py +++ b/earthaccess/__init__.py @@ -17,6 +17,7 @@ search_datasets, ) from .auth import Auth +from .kerchunk import consolidate_metadata from .search import DataCollections, DataGranules from .store import Store @@ -39,6 +40,7 @@ "Auth", "Store", "auth_environ", + "consolidate_metadata", ] __version__ = version("earthaccess") diff --git a/earthaccess/kerchunk.py b/earthaccess/kerchunk.py new file mode 100644 index 00000000..eb3f4cae --- /dev/null +++ b/earthaccess/kerchunk.py @@ -0,0 +1,58 @@ +from __future__ import annotations + +import fsspec +import s3fs + +import earthaccess + + +def _get_chunk_metadata( + granuale: earthaccess.results.DataGranule, + fs: fsspec.AbstractFileSystem | s3fs.S3FileSystem, +) -> list[dict]: + from kerchunk.hdf import SingleHdf5ToZarr + + metadata = [] + access = "direct" if isinstance(fs, s3fs.S3FileSystem) else "indirect" + for url in granuale.data_links(access=access): + with fs.open(url) as inf: + h5chunks = SingleHdf5ToZarr(inf, url) + m = h5chunks.translate() + metadata.append(m) + return metadata + + +def consolidate_metadata( + granuales: list[earthaccess.results.DataGranule], + kerchunk_options: dict | None = None, + access: str = "direct", + outfile: str | None = None, + storage_options: dict | None = None, +) -> str | dict: + try: + import dask + + from kerchunk.combine import MultiZarrToZarr + except ImportError as e: + raise ImportError( + "`earthaccess.consolidate_metadata` requires `dask` and `kerchunk` to be be installed" + ) from e + + if access == "direct": + fs = earthaccess.get_s3fs_session(provider=granuales[0]["meta"]["provider-id"]) + else: + fs = earthaccess.get_fsspec_https_session() + + # Get metadata for each granuale + get_chunk_metadata = dask.delayed(_get_chunk_metadata) + chunks = dask.compute(*[get_chunk_metadata(g, fs) for g in granuales]) + chunks = sum(chunks, start=[]) + + # Get combined metadata object + mzz = MultiZarrToZarr(chunks, **(kerchunk_options or {})) + if outfile is not None: + output = fsspec.utils.stringify_path(outfile) + mzz.translate(outfile, storage_options=storage_options or {}) + return output + else: + return mzz.translate() diff --git a/poetry.lock b/poetry.lock index 137bfd15..6200911d 100644 --- a/poetry.lock +++ b/poetry.lock @@ -259,6 +259,16 @@ files = [ [package.dependencies] python-dateutil = ">=2.7.0" +[[package]] +name = "asciitree" +version = "0.3.3" +description = "Draws ASCII trees." +optional = true +python-versions = "*" +files = [ + {file = "asciitree-0.3.3.tar.gz", hash = "sha256:4aa4b9b649f85e3fcb343363d97564aa1fb62e249677f2e18a96765145cc0f6e"}, +] + [[package]] name = "asttokens" version = "2.2.1" @@ -956,6 +966,17 @@ files = [ [package.extras] tests = ["asttokens", "littleutils", "pytest", "rich"] +[[package]] +name = "fasteners" +version = "0.18" +description = "A python package that provides useful locks" +optional = true +python-versions = ">=3.6" +files = [ + {file = "fasteners-0.18-py3-none-any.whl", hash = "sha256:1d4caf5f8db57b0e4107d94fd5a1d02510a450dced6ca77d1839064c1bacf20c"}, + {file = "fasteners-0.18.tar.gz", hash = "sha256:cb7c13ef91e0c7e4fe4af38ecaf6b904ec3f5ce0dda06d34924b6b74b869d953"}, +] + [[package]] name = "fastjsonschema" version = "2.18.0" @@ -1776,6 +1797,31 @@ toml = "*" rst2md = ["sphinx-gallery (>=0.7.0,<0.8.0)"] toml = ["toml"] +[[package]] +name = "kerchunk" +version = "0.1.2" +description = "Functions to make reference descriptions for ReferenceFileSystem" +optional = true +python-versions = ">=3.7" +files = [ + {file = "kerchunk-0.1.2-py3-none-any.whl", hash = "sha256:2504c56cd85c69e88cbb76fce45bfd5905e693060124b642221f5fcfddc7387a"}, + {file = "kerchunk-0.1.2.tar.gz", hash = "sha256:def9837a9713e49c6b4f805b5e155eee4a11d3ba5c9638f18e35f88a2d9891c9"}, +] + +[package.dependencies] +fsspec = "*" +numcodecs = "*" +numpy = "*" +ujson = "*" +zarr = "*" + +[package.extras] +cftime = ["cftime"] +fits = ["xarray"] +grib2 = ["cfgrib"] +hdf = ["h5py", "xarray"] +netcdf3 = ["scipy"] + [[package]] name = "kiwisolver" version = "1.4.4" @@ -2650,6 +2696,38 @@ jupyter-server = ">=1.8,<3" [package.extras] test = ["pytest", "pytest-console-scripts", "pytest-jupyter", "pytest-tornasync"] +[[package]] +name = "numcodecs" +version = "0.11.0" +description = "A Python package providing buffer compression and transformation codecs for use" +optional = true +python-versions = ">=3.8" +files = [ + {file = "numcodecs-0.11.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c0bc116752be45b4f9dca4315e5a2b4185e3b46f68c997dbb84aef334ceb5a1d"}, + {file = "numcodecs-0.11.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c27dfca402f69fbfa01c46fb572086e77f38121192160cc8ed1177dc30702c52"}, + {file = "numcodecs-0.11.0-cp310-cp310-win_amd64.whl", hash = "sha256:0fabc7dfdf64a9555bf8a34911e05b415793c67a1377207dc79cd96342291fa1"}, + {file = "numcodecs-0.11.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:7dae3f5678f247336c84e7315a0c59a4fec7c33eb7db72d78ff5c776479a812e"}, + {file = "numcodecs-0.11.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:32697785b786bb0039d3feeaabdc10f25eda6c149700cde954653aaa47637832"}, + {file = "numcodecs-0.11.0-cp311-cp311-win_amd64.whl", hash = "sha256:8c2f36b21162c6ebccc05d3fe896f86b91dcf8709946809f730cc23a37f8234d"}, + {file = "numcodecs-0.11.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:0c240858bf29e0ff254b1db60430e8b2658b8c8328b684f80033289d94807a7c"}, + {file = "numcodecs-0.11.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ee5bda16e9d26a7a39fc20b6c1cec23b4debc314df5cfae3ed505149c2eeafc4"}, + {file = "numcodecs-0.11.0-cp38-cp38-win_amd64.whl", hash = "sha256:bd05cdb853c7bcfde2efc809a9df2c5e205b96f70405b810e5788b45d0d81f73"}, + {file = "numcodecs-0.11.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:694dc2e80b1f169b7deb14bdd0a04b20e5f17ef32cb0f81b71ab690406ec6bd9"}, + {file = "numcodecs-0.11.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bf3925eeb37aed0e6c04d7fb9614133a3c8426dc77f8bda54c99c601a44b3bd3"}, + {file = "numcodecs-0.11.0-cp39-cp39-win_amd64.whl", hash = "sha256:11596b71267417425ea8afb407477a67d684f434c8b07b1dd59c25a97d5c3ccb"}, + {file = "numcodecs-0.11.0.tar.gz", hash = "sha256:6c058b321de84a1729299b0eae4d652b2e48ea1ca7f9df0da65cb13470e635eb"}, +] + +[package.dependencies] +entrypoints = "*" +numpy = ">=1.7" + +[package.extras] +docs = ["mock", "numpydoc", "sphinx", "sphinx-issues"] +msgpack = ["msgpack"] +test = ["coverage", "flake8", "pytest", "pytest-cov"] +zfpy = ["zfpy (>=1.0.0)"] + [[package]] name = "numpy" version = "1.24.4" @@ -4124,6 +4202,76 @@ files = [ {file = "tzdata-2023.3.tar.gz", hash = "sha256:11ef1e08e54acb0d4f95bdb1be05da659673de4acbd21bf9c69e94cc5e907a3a"}, ] +[[package]] +name = "ujson" +version = "5.8.0" +description = "Ultra fast JSON encoder and decoder for Python" +optional = true +python-versions = ">=3.8" +files = [ + {file = "ujson-5.8.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f4511560d75b15ecb367eef561554959b9d49b6ec3b8d5634212f9fed74a6df1"}, + {file = "ujson-5.8.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9399eaa5d1931a0ead49dce3ffacbea63f3177978588b956036bfe53cdf6af75"}, + {file = "ujson-5.8.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c4e7bb7eba0e1963f8b768f9c458ecb193e5bf6977090182e2b4f4408f35ac76"}, + {file = "ujson-5.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:40931d7c08c4ce99adc4b409ddb1bbb01635a950e81239c2382cfe24251b127a"}, + {file = "ujson-5.8.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d53039d39de65360e924b511c7ca1a67b0975c34c015dd468fca492b11caa8f7"}, + {file = "ujson-5.8.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:bdf04c6af3852161be9613e458a1fb67327910391de8ffedb8332e60800147a2"}, + {file = "ujson-5.8.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:a70f776bda2e5072a086c02792c7863ba5833d565189e09fabbd04c8b4c3abba"}, + {file = "ujson-5.8.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:f26629ac531d712f93192c233a74888bc8b8212558bd7d04c349125f10199fcf"}, + {file = "ujson-5.8.0-cp310-cp310-win32.whl", hash = "sha256:7ecc33b107ae88405aebdb8d82c13d6944be2331ebb04399134c03171509371a"}, + {file = "ujson-5.8.0-cp310-cp310-win_amd64.whl", hash = "sha256:3b27a8da7a080add559a3b73ec9ebd52e82cc4419f7c6fb7266e62439a055ed0"}, + {file = "ujson-5.8.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:193349a998cd821483a25f5df30b44e8f495423840ee11b3b28df092ddfd0f7f"}, + {file = "ujson-5.8.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4ddeabbc78b2aed531f167d1e70387b151900bc856d61e9325fcdfefb2a51ad8"}, + {file = "ujson-5.8.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5ce24909a9c25062e60653073dd6d5e6ec9d6ad7ed6e0069450d5b673c854405"}, + {file = "ujson-5.8.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:27a2a3c7620ebe43641e926a1062bc04e92dbe90d3501687957d71b4bdddaec4"}, + {file = "ujson-5.8.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2b852bdf920fe9f84e2a2c210cc45f1b64f763b4f7d01468b33f7791698e455e"}, + {file = "ujson-5.8.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:20768961a6a706170497129960762ded9c89fb1c10db2989c56956b162e2a8a3"}, + {file = "ujson-5.8.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:e0147d41e9fb5cd174207c4a2895c5e24813204499fd0839951d4c8784a23bf5"}, + {file = "ujson-5.8.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e3673053b036fd161ae7a5a33358ccae6793ee89fd499000204676baafd7b3aa"}, + {file = "ujson-5.8.0-cp311-cp311-win32.whl", hash = "sha256:a89cf3cd8bf33a37600431b7024a7ccf499db25f9f0b332947fbc79043aad879"}, + {file = "ujson-5.8.0-cp311-cp311-win_amd64.whl", hash = "sha256:3659deec9ab9eb19e8646932bfe6fe22730757c4addbe9d7d5544e879dc1b721"}, + {file = "ujson-5.8.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:102bf31c56f59538cccdfec45649780ae00657e86247c07edac434cb14d5388c"}, + {file = "ujson-5.8.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:299a312c3e85edee1178cb6453645217ba23b4e3186412677fa48e9a7f986de6"}, + {file = "ujson-5.8.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f2e385a7679b9088d7bc43a64811a7713cc7c33d032d020f757c54e7d41931ae"}, + {file = "ujson-5.8.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ad24ec130855d4430a682c7a60ca0bc158f8253ec81feed4073801f6b6cb681b"}, + {file = "ujson-5.8.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:16fde596d5e45bdf0d7de615346a102510ac8c405098e5595625015b0d4b5296"}, + {file = "ujson-5.8.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:6d230d870d1ce03df915e694dcfa3f4e8714369cce2346686dbe0bc8e3f135e7"}, + {file = "ujson-5.8.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:9571de0c53db5cbc265945e08f093f093af2c5a11e14772c72d8e37fceeedd08"}, + {file = "ujson-5.8.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:7cba16b26efe774c096a5e822e4f27097b7c81ed6fb5264a2b3f5fd8784bab30"}, + {file = "ujson-5.8.0-cp312-cp312-win32.whl", hash = "sha256:48c7d373ff22366eecfa36a52b9b55b0ee5bd44c2b50e16084aa88b9de038916"}, + {file = "ujson-5.8.0-cp312-cp312-win_amd64.whl", hash = "sha256:5ac97b1e182d81cf395ded620528c59f4177eee024b4b39a50cdd7b720fdeec6"}, + {file = "ujson-5.8.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:2a64cc32bb4a436e5813b83f5aab0889927e5ea1788bf99b930fad853c5625cb"}, + {file = "ujson-5.8.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:e54578fa8838ddc722539a752adfce9372474114f8c127bb316db5392d942f8b"}, + {file = "ujson-5.8.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9721cd112b5e4687cb4ade12a7b8af8b048d4991227ae8066d9c4b3a6642a582"}, + {file = "ujson-5.8.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9d9707e5aacf63fb919f6237d6490c4e0244c7f8d3dc2a0f84d7dec5db7cb54c"}, + {file = "ujson-5.8.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0be81bae295f65a6896b0c9030b55a106fb2dec69ef877253a87bc7c9c5308f7"}, + {file = "ujson-5.8.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:ae7f4725c344bf437e9b881019c558416fe84ad9c6b67426416c131ad577df67"}, + {file = "ujson-5.8.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:9ab282d67ef3097105552bf151438b551cc4bedb3f24d80fada830f2e132aeb9"}, + {file = "ujson-5.8.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:94c7bd9880fa33fcf7f6d7f4cc032e2371adee3c5dba2922b918987141d1bf07"}, + {file = "ujson-5.8.0-cp38-cp38-win32.whl", hash = "sha256:bf5737dbcfe0fa0ac8fa599eceafae86b376492c8f1e4b84e3adf765f03fb564"}, + {file = "ujson-5.8.0-cp38-cp38-win_amd64.whl", hash = "sha256:11da6bed916f9bfacf13f4fc6a9594abd62b2bb115acfb17a77b0f03bee4cfd5"}, + {file = "ujson-5.8.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:69b3104a2603bab510497ceabc186ba40fef38ec731c0ccaa662e01ff94a985c"}, + {file = "ujson-5.8.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9249fdefeb021e00b46025e77feed89cd91ffe9b3a49415239103fc1d5d9c29a"}, + {file = "ujson-5.8.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2873d196725a8193f56dde527b322c4bc79ed97cd60f1d087826ac3290cf9207"}, + {file = "ujson-5.8.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6a4dafa9010c366589f55afb0fd67084acd8added1a51251008f9ff2c3e44042"}, + {file = "ujson-5.8.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7a42baa647a50fa8bed53d4e242be61023bd37b93577f27f90ffe521ac9dc7a3"}, + {file = "ujson-5.8.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:f3554eaadffe416c6f543af442066afa6549edbc34fe6a7719818c3e72ebfe95"}, + {file = "ujson-5.8.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:fb87decf38cc82bcdea1d7511e73629e651bdec3a43ab40985167ab8449b769c"}, + {file = "ujson-5.8.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:407d60eb942c318482bbfb1e66be093308bb11617d41c613e33b4ce5be789adc"}, + {file = "ujson-5.8.0-cp39-cp39-win32.whl", hash = "sha256:0fe1b7edaf560ca6ab023f81cbeaf9946a240876a993b8c5a21a1c539171d903"}, + {file = "ujson-5.8.0-cp39-cp39-win_amd64.whl", hash = "sha256:3f9b63530a5392eb687baff3989d0fb5f45194ae5b1ca8276282fb647f8dcdb3"}, + {file = "ujson-5.8.0-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:efeddf950fb15a832376c0c01d8d7713479fbeceaed1eaecb2665aa62c305aec"}, + {file = "ujson-5.8.0-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7d8283ac5d03e65f488530c43d6610134309085b71db4f675e9cf5dff96a8282"}, + {file = "ujson-5.8.0-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eb0142f6f10f57598655340a3b2c70ed4646cbe674191da195eb0985a9813b83"}, + {file = "ujson-5.8.0-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:07d459aca895eb17eb463b00441986b021b9312c6c8cc1d06880925c7f51009c"}, + {file = "ujson-5.8.0-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:d524a8c15cfc863705991d70bbec998456a42c405c291d0f84a74ad7f35c5109"}, + {file = "ujson-5.8.0-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:d6f84a7a175c75beecde53a624881ff618e9433045a69fcfb5e154b73cdaa377"}, + {file = "ujson-5.8.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b748797131ac7b29826d1524db1cc366d2722ab7afacc2ce1287cdafccddbf1f"}, + {file = "ujson-5.8.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2e72ba76313d48a1a3a42e7dc9d1db32ea93fac782ad8dde6f8b13e35c229130"}, + {file = "ujson-5.8.0-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f504117a39cb98abba4153bf0b46b4954cc5d62f6351a14660201500ba31fe7f"}, + {file = "ujson-5.8.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:a8c91b6f4bf23f274af9002b128d133b735141e867109487d17e344d38b87d94"}, + {file = "ujson-5.8.0.tar.gz", hash = "sha256:78e318def4ade898a461b3d92a79f9441e7e0e4d2ad5419abed4336d702c7425"}, +] + [[package]] name = "uri-template" version = "1.3.0" @@ -4472,6 +4620,26 @@ files = [ idna = ">=2.0" multidict = ">=4.0" +[[package]] +name = "zarr" +version = "2.16.0" +description = "An implementation of chunked, compressed, N-dimensional arrays for Python" +optional = true +python-versions = ">=3.8" +files = [ + {file = "zarr-2.16.0-py3-none-any.whl", hash = "sha256:6cf9e6e4c58b9233262e024394e68921a438a6af5a7428bd6bdb1e4e8d05b69a"}, + {file = "zarr-2.16.0.tar.gz", hash = "sha256:84e36b695bda0ecea52af9861271984cb22a5c864679907b7b9ba3f79b684f7e"}, +] + +[package.dependencies] +asciitree = "*" +fasteners = "*" +numcodecs = ">=0.10.0" +numpy = ">=1.20" + +[package.extras] +jupyter = ["ipytree (>=0.2.2)", "ipywidgets (>=8.0.0)", "notebook"] + [[package]] name = "zipp" version = "3.16.2" @@ -4487,6 +4655,9 @@ files = [ docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-ignore-flaky", "pytest-mypy (>=0.9.1)", "pytest-ruff"] +[extras] +kerchunk = ["dask", "kerchunk"] + [metadata] lock-version = "2.0" python-versions = ">=3.8,<4.0" diff --git a/pyproject.toml b/pyproject.toml index c10583b6..ed26a264 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,7 +38,11 @@ s3fs = ">=2021.11, <2024" fsspec = ">=2022.1" tinynetrc = "^1.3.1" multimethod = ">=1.8" +kerchunk = { version = ">=0.1.2", optional = true } +dask = { version = ">=2022.1.0", optional = true } +[tool.poetry.extras] +kerchunk = ["kerchunk", "dask"] [tool.poetry.dev-dependencies] python-magic = ">=0.4" diff --git a/tests/integration/test_kerchunk.py b/tests/integration/test_kerchunk.py new file mode 100644 index 00000000..39c95e99 --- /dev/null +++ b/tests/integration/test_kerchunk.py @@ -0,0 +1,86 @@ +import logging +import os +import unittest + +import earthaccess +import pytest +from fsspec.core import strip_protocol + +kerchunk = pytest.importorskip("kerchunk") +pytest.importorskip("dask") + +logger = logging.getLogger(__name__) +assertions = unittest.TestCase("__init__") + +assertions.assertTrue("EARTHDATA_USERNAME" in os.environ) +assertions.assertTrue("EARTHDATA_PASSWORD" in os.environ) + +logger.info(f"Current username: {os.environ['EARTHDATA_USERNAME']}") +logger.info(f"earthaccess version: {earthaccess.__version__}") + + +@pytest.fixture(scope="module") +def granuales(): + granuales = earthaccess.search_data( + count=2, + short_name="SEA_SURFACE_HEIGHT_ALT_GRIDS_L4_2SATS_5DAY_6THDEG_V_JPL2205", + cloud_hosted=True, + ) + return granuales + + +@pytest.mark.parametrize("protocol", ["", "file://"]) +def test_consolidate_metadata_outfile(tmp_path, granuales, protocol): + outfile = f"{protocol}{tmp_path / 'metadata.json'}" + assert not os.path.exists(outfile) + result = earthaccess.consolidate_metadata( + granuales, + outfile=outfile, + access="indirect", + kerchunk_options={"concat_dims": "Time"}, + ) + assert os.path.exists(strip_protocol(outfile)) + assert result == outfile + + +def test_consolidate_metadata_memory(tmp_path, granuales): + result = earthaccess.consolidate_metadata( + granuales, + access="indirect", + kerchunk_options={"concat_dims": "Time"}, + ) + assert isinstance(result, dict) + assert "refs" in result + + +@pytest.mark.parametrize("output", ["file", "memory"]) +def test_consolidate_metadata(tmp_path, granuales, output): + xr = pytest.importorskip("xarray") + # Open directly with `earthaccess.open` + expected = xr.open_mfdataset(earthaccess.open(granuales)) + + # Open with kerchunk consolidated metadata file + if output == "file": + kwargs = {"outfile": tmp_path / "metadata.json"} + else: + kwargs = {} + metadata = earthaccess.consolidate_metadata( + granuales, access="indirect", kerchunk_options={"concat_dims": "Time"}, **kwargs + ) + + fs = earthaccess.get_fsspec_https_session() + result = xr.open_dataset( + "reference://", + engine="zarr", + chunks={}, + backend_kwargs={ + "consolidated": False, + "storage_options": { + "fo": metadata, + "remote_protocol": "https", + "remote_options": fs.storage_options, + }, + }, + ) + + xr.testing.assert_equal(result, expected)