From f0e53bd3c82111c77fa70664e942148ffa2c9a6c Mon Sep 17 00:00:00 2001 From: Matt Fisher Date: Sat, 6 Jul 2024 12:45:59 -0600 Subject: [PATCH] Extract duplicated function --- tests/integration/sample.py | 41 +++++++++++++++++++++++ tests/integration/test_cloud_download.py | 32 ++++-------------- tests/integration/test_cloud_open.py | 32 ++++-------------- tests/integration/test_onprem_download.py | 32 ++++-------------- tests/integration/test_onprem_open.py | 41 ++++++----------------- 5 files changed, 69 insertions(+), 109 deletions(-) create mode 100644 tests/integration/sample.py diff --git a/tests/integration/sample.py b/tests/integration/sample.py new file mode 100644 index 00000000..cd8e1f5e --- /dev/null +++ b/tests/integration/sample.py @@ -0,0 +1,41 @@ +import logging +import random + +logger = logging.getLogger(__name__) + + +def get_sample_granules( + granules: list, + sample_size: int, + max_granule_size: int | float, + round_ndigits: int = None, +): + """Return a list of randomly-sampled granules and their size in MB. + + Attempt to find only granules smaller or equal to max_granule_size. May return a + sample smaller than sample_size. + """ + sample = [] + total_size = 0 + max_tries = sample_size * 2 + tries = 0 + + while tries <= max_tries: + g = random.sample(granules, 1)[0] + if g.size() > max_granule_size: + logger.debug( + f"Granule {g['meta']['concept-id']} exceded max size: {g.size()}." + "Trying another random sample." + ) + tries += 1 + continue + else: + logger.debug( + f"Adding granule to random sample: {g['meta']['concept-id']} size: {g.size()}" + ) + sample.append(g) + total_size += g.size() + if len(sample) >= sample_size: + break + + return sample, round(total_size, round_ndigits) diff --git a/tests/integration/test_cloud_download.py b/tests/integration/test_cloud_download.py index 4e8f9519..99ba4c35 100644 --- a/tests/integration/test_cloud_download.py +++ b/tests/integration/test_cloud_download.py @@ -1,4 +1,3 @@ -# package imports import logging import os import random @@ -10,6 +9,8 @@ import pytest from earthaccess import Auth, DataCollections, DataGranules, Store +from .sample import get_sample_granules + logger = logging.getLogger(__name__) @@ -71,30 +72,6 @@ store = Store(auth) -def get_sample_granules(granules, sample_size, max_granule_size): - """Returns a list with sample granules and their size in MB if - the total size is less than the max_granule_size. - """ - files_to_download = [] - total_size = 0 - max_tries = sample_size * 2 - tries = 0 - - while tries <= max_tries: - g = random.sample(granules, 1)[0] - if g.size() > max_granule_size: - # print(f"G: {g['meta']['concept-id']} exceded max size: {g.size()}") - tries += 1 - continue - else: - # print(f"Adding : {g['meta']['concept-id']} size: {g.size()}") - files_to_download.append(g) - total_size += g.size() - if len(files_to_download) >= sample_size: - break - return files_to_download, round(total_size) - - @pytest.mark.parametrize("daac", daac_list) def test_earthaccess_can_download_cloud_collection_granules(daac): """Tests that we can download cloud collections using HTTPS links.""" @@ -110,6 +87,7 @@ def test_earthaccess_can_download_cloud_collection_granules(daac): logger.info(f"Cloud hosted collections for {daac_shortname}: {hits}") collections = collection_query.get(collections_count) assertions.assertGreater(len(collections), collections_sample_size) + # We sample n cloud hosted collections from the results random_collections = random.sample(collections, collections_sample_size) for collection in random_collections: @@ -121,7 +99,9 @@ def test_earthaccess_can_download_cloud_collection_granules(daac): assert isinstance(granules[0], earthaccess.results.DataGranule) local_path = f"./tests/integration/data/{concept_id}" granules_to_download, total_size_cmr = get_sample_granules( - granules, granules_sample_size, granules_max_size + granules, + granules_sample_size, + granules_max_size, ) if len(granules_to_download) == 0: logger.warning( diff --git a/tests/integration/test_cloud_open.py b/tests/integration/test_cloud_open.py index b69eba15..ee2cdaec 100644 --- a/tests/integration/test_cloud_open.py +++ b/tests/integration/test_cloud_open.py @@ -1,4 +1,3 @@ -# package imports import logging import os import random @@ -9,6 +8,8 @@ import pytest from earthaccess import Auth, DataCollections, DataGranules, Store +from .sample import get_sample_granules + logger = logging.getLogger(__name__) @@ -70,30 +71,6 @@ store = Store(auth) -def get_sample_granules(granules, sample_size, max_granule_size): - """Returns a list with sample granules and their size in MB if - the total size is less than the max_granule_size. - """ - files_to_download = [] - total_size = 0 - max_tries = sample_size * 2 - tries = 0 - - while tries <= max_tries: - g = random.sample(granules, 1)[0] - if g.size() > max_granule_size: - # print(f"G: {g['meta']['concept-id']} exceded max size: {g.size()}") - tries += 1 - continue - else: - # print(f"Adding : {g['meta']['concept-id']} size: {g.size()}") - files_to_download.append(g) - total_size += g.size() - if len(files_to_download) >= sample_size: - break - return files_to_download, round(total_size, 2) - - def supported_collection(data_links): for url in data_links: if "podaac-tools.jpl.nasa.gov/drive" in url: @@ -131,7 +108,10 @@ def test_earthaccess_can_open_onprem_collection_granules(daac): logger.warning(f"PODAAC DRIVE is not supported at the moment: {data_links}") continue granules_to_open, total_size_cmr = get_sample_granules( - granules, granules_sample_size, granules_max_size + granules, + granules_sample_size, + granules_max_size, + round_ndigits=2, ) if len(granules_to_open) == 0: logger.debug( diff --git a/tests/integration/test_onprem_download.py b/tests/integration/test_onprem_download.py index 242a3c26..3595d9d2 100644 --- a/tests/integration/test_onprem_download.py +++ b/tests/integration/test_onprem_download.py @@ -1,4 +1,3 @@ -# package imports import logging import os import random @@ -10,6 +9,8 @@ import pytest from earthaccess import Auth, DataCollections, DataGranules, Store +from .sample import get_sample_granules + logger = logging.getLogger(__name__) @@ -63,30 +64,6 @@ store = Store(auth) -def get_sample_granules(granules, sample_size, max_granule_size): - """Returns a list with sample granules and their size in MB if - the total size is less than the max_granule_size. - """ - files_to_download = [] - total_size = 0 - max_tries = sample_size * 2 - tries = 0 - - while tries <= max_tries: - g = random.sample(granules, 1)[0] - if g.size() > max_granule_size: - # print(f"G: {g['meta']['concept-id']} exceded max size: {g.size()}") - tries += 1 - continue - else: - # print(f"Adding : {g['meta']['concept-id']} size: {g.size()}") - files_to_download.append(g) - total_size += g.size() - if len(files_to_download) >= sample_size: - break - return files_to_download, round(total_size, 2) - - def supported_collection(data_links): for url in data_links: if "podaac-tools.jpl.nasa.gov/drive" in url: @@ -125,7 +102,10 @@ def test_earthaccess_can_download_onprem_collection_granules(daac): continue local_path = f"./tests/integration/data/{concept_id}" granules_to_download, total_size_cmr = get_sample_granules( - granules, granules_sample_size, granules_max_size + granules, + granules_sample_size, + granules_max_size, + round_ndigits=2, ) if len(granules_to_download) == 0: logger.debug( diff --git a/tests/integration/test_onprem_open.py b/tests/integration/test_onprem_open.py index 2a455c44..eab1409e 100644 --- a/tests/integration/test_onprem_open.py +++ b/tests/integration/test_onprem_open.py @@ -1,4 +1,3 @@ -# package imports import logging import os import random @@ -9,8 +8,9 @@ import pytest from earthaccess import Auth, DataCollections, DataGranules, Store -logger = logging.getLogger(__name__) +from .sample import get_sample_granules +logger = logging.getLogger(__name__) daacs_list = [ { @@ -22,20 +22,20 @@ "granules_max_size_mb": 100, }, { - "short_name": "LPDAAC", + "short_name": "GES_DISC", "collections_count": 100, "collections_sample_size": 2, "granules_count": 100, "granules_sample_size": 2, - "granules_max_size_mb": 100, + "granules_max_size_mb": 130, }, { - "short_name": "GES_DISC", + "short_name": "LPDAAC", "collections_count": 100, "collections_sample_size": 2, "granules_count": 100, "granules_sample_size": 2, - "granules_max_size_mb": 130, + "granules_max_size_mb": 100, }, { "short_name": "ORNLDAAC", @@ -62,30 +62,6 @@ store = Store(auth) -def get_sample_granules(granules, sample_size, max_granule_size): - """Returns a list with sample granules and their size in MB if - the total size is less than the max_granule_size. - """ - files_to_download = [] - total_size = 0 - max_tries = sample_size * 2 - tries = 0 - - while tries <= max_tries: - g = random.sample(granules, 1)[0] - if g.size() > max_granule_size: - # print(f"G: {g['meta']['concept-id']} exceded max size: {g.size()}") - tries += 1 - continue - else: - # print(f"Adding : {g['meta']['concept-id']} size: {g.size()}") - files_to_download.append(g) - total_size += g.size() - if len(files_to_download) >= sample_size: - break - return files_to_download, round(total_size, 2) - - def supported_collection(data_links): for url in data_links: if "podaac-tools.jpl.nasa.gov/drive" in url: @@ -123,7 +99,10 @@ def test_earthaccess_can_open_onprem_collection_granules(daac): logger.warning(f"PODAAC DRIVE is not supported at the moment: {data_links}") continue granules_to_open, total_size_cmr = get_sample_granules( - granules, granules_sample_size, granules_max_size + granules, + granules_sample_size, + granules_max_size, + round_ndigits=2, ) if len(granules_to_open) == 0: logger.debug(