Skip to content

Commit

Permalink
Extract duplicated function
Browse files Browse the repository at this point in the history
  • Loading branch information
mfisher87 committed Jul 6, 2024
1 parent 30ddd87 commit f0e53bd
Show file tree
Hide file tree
Showing 5 changed files with 69 additions and 109 deletions.
41 changes: 41 additions & 0 deletions tests/integration/sample.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import logging
import random

logger = logging.getLogger(__name__)


def get_sample_granules(
granules: list,
sample_size: int,
max_granule_size: int | float,
round_ndigits: int = None,
):
"""Return a list of randomly-sampled granules and their size in MB.
Attempt to find only granules smaller or equal to max_granule_size. May return a
sample smaller than sample_size.
"""
sample = []
total_size = 0
max_tries = sample_size * 2
tries = 0

while tries <= max_tries:
g = random.sample(granules, 1)[0]
if g.size() > max_granule_size:
logger.debug(
f"Granule {g['meta']['concept-id']} exceded max size: {g.size()}."
"Trying another random sample."
)
tries += 1
continue
else:
logger.debug(
f"Adding granule to random sample: {g['meta']['concept-id']} size: {g.size()}"
)
sample.append(g)
total_size += g.size()
if len(sample) >= sample_size:
break

return sample, round(total_size, round_ndigits)
32 changes: 6 additions & 26 deletions tests/integration/test_cloud_download.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
# package imports
import logging
import os
import random
Expand All @@ -10,6 +9,8 @@
import pytest
from earthaccess import Auth, DataCollections, DataGranules, Store

from .sample import get_sample_granules

logger = logging.getLogger(__name__)


Expand Down Expand Up @@ -71,30 +72,6 @@
store = Store(auth)


def get_sample_granules(granules, sample_size, max_granule_size):
"""Returns a list with sample granules and their size in MB if
the total size is less than the max_granule_size.
"""
files_to_download = []
total_size = 0
max_tries = sample_size * 2
tries = 0

while tries <= max_tries:
g = random.sample(granules, 1)[0]
if g.size() > max_granule_size:
# print(f"G: {g['meta']['concept-id']} exceded max size: {g.size()}")
tries += 1
continue
else:
# print(f"Adding : {g['meta']['concept-id']} size: {g.size()}")
files_to_download.append(g)
total_size += g.size()
if len(files_to_download) >= sample_size:
break
return files_to_download, round(total_size)


@pytest.mark.parametrize("daac", daac_list)
def test_earthaccess_can_download_cloud_collection_granules(daac):
"""Tests that we can download cloud collections using HTTPS links."""
Expand All @@ -110,6 +87,7 @@ def test_earthaccess_can_download_cloud_collection_granules(daac):
logger.info(f"Cloud hosted collections for {daac_shortname}: {hits}")
collections = collection_query.get(collections_count)
assertions.assertGreater(len(collections), collections_sample_size)

# We sample n cloud hosted collections from the results
random_collections = random.sample(collections, collections_sample_size)
for collection in random_collections:
Expand All @@ -121,7 +99,9 @@ def test_earthaccess_can_download_cloud_collection_granules(daac):
assert isinstance(granules[0], earthaccess.results.DataGranule)
local_path = f"./tests/integration/data/{concept_id}"
granules_to_download, total_size_cmr = get_sample_granules(
granules, granules_sample_size, granules_max_size
granules,
granules_sample_size,
granules_max_size,
)
if len(granules_to_download) == 0:
logger.warning(
Expand Down
32 changes: 6 additions & 26 deletions tests/integration/test_cloud_open.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
# package imports
import logging
import os
import random
Expand All @@ -9,6 +8,8 @@
import pytest
from earthaccess import Auth, DataCollections, DataGranules, Store

from .sample import get_sample_granules

logger = logging.getLogger(__name__)


Expand Down Expand Up @@ -70,30 +71,6 @@
store = Store(auth)


def get_sample_granules(granules, sample_size, max_granule_size):
"""Returns a list with sample granules and their size in MB if
the total size is less than the max_granule_size.
"""
files_to_download = []
total_size = 0
max_tries = sample_size * 2
tries = 0

while tries <= max_tries:
g = random.sample(granules, 1)[0]
if g.size() > max_granule_size:
# print(f"G: {g['meta']['concept-id']} exceded max size: {g.size()}")
tries += 1
continue
else:
# print(f"Adding : {g['meta']['concept-id']} size: {g.size()}")
files_to_download.append(g)
total_size += g.size()
if len(files_to_download) >= sample_size:
break
return files_to_download, round(total_size, 2)


def supported_collection(data_links):
for url in data_links:
if "podaac-tools.jpl.nasa.gov/drive" in url:
Expand Down Expand Up @@ -131,7 +108,10 @@ def test_earthaccess_can_open_onprem_collection_granules(daac):
logger.warning(f"PODAAC DRIVE is not supported at the moment: {data_links}")
continue
granules_to_open, total_size_cmr = get_sample_granules(
granules, granules_sample_size, granules_max_size
granules,
granules_sample_size,
granules_max_size,
round_ndigits=2,
)
if len(granules_to_open) == 0:
logger.debug(
Expand Down
32 changes: 6 additions & 26 deletions tests/integration/test_onprem_download.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
# package imports
import logging
import os
import random
Expand All @@ -10,6 +9,8 @@
import pytest
from earthaccess import Auth, DataCollections, DataGranules, Store

from .sample import get_sample_granules

logger = logging.getLogger(__name__)


Expand Down Expand Up @@ -63,30 +64,6 @@
store = Store(auth)


def get_sample_granules(granules, sample_size, max_granule_size):
"""Returns a list with sample granules and their size in MB if
the total size is less than the max_granule_size.
"""
files_to_download = []
total_size = 0
max_tries = sample_size * 2
tries = 0

while tries <= max_tries:
g = random.sample(granules, 1)[0]
if g.size() > max_granule_size:
# print(f"G: {g['meta']['concept-id']} exceded max size: {g.size()}")
tries += 1
continue
else:
# print(f"Adding : {g['meta']['concept-id']} size: {g.size()}")
files_to_download.append(g)
total_size += g.size()
if len(files_to_download) >= sample_size:
break
return files_to_download, round(total_size, 2)


def supported_collection(data_links):
for url in data_links:
if "podaac-tools.jpl.nasa.gov/drive" in url:
Expand Down Expand Up @@ -125,7 +102,10 @@ def test_earthaccess_can_download_onprem_collection_granules(daac):
continue
local_path = f"./tests/integration/data/{concept_id}"
granules_to_download, total_size_cmr = get_sample_granules(
granules, granules_sample_size, granules_max_size
granules,
granules_sample_size,
granules_max_size,
round_ndigits=2,
)
if len(granules_to_download) == 0:
logger.debug(
Expand Down
41 changes: 10 additions & 31 deletions tests/integration/test_onprem_open.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
# package imports
import logging
import os
import random
Expand All @@ -9,8 +8,9 @@
import pytest
from earthaccess import Auth, DataCollections, DataGranules, Store

logger = logging.getLogger(__name__)
from .sample import get_sample_granules

logger = logging.getLogger(__name__)

daacs_list = [
{
Expand All @@ -22,20 +22,20 @@
"granules_max_size_mb": 100,
},
{
"short_name": "LPDAAC",
"short_name": "GES_DISC",
"collections_count": 100,
"collections_sample_size": 2,
"granules_count": 100,
"granules_sample_size": 2,
"granules_max_size_mb": 100,
"granules_max_size_mb": 130,
},
{
"short_name": "GES_DISC",
"short_name": "LPDAAC",
"collections_count": 100,
"collections_sample_size": 2,
"granules_count": 100,
"granules_sample_size": 2,
"granules_max_size_mb": 130,
"granules_max_size_mb": 100,
},
{
"short_name": "ORNLDAAC",
Expand All @@ -62,30 +62,6 @@
store = Store(auth)


def get_sample_granules(granules, sample_size, max_granule_size):
"""Returns a list with sample granules and their size in MB if
the total size is less than the max_granule_size.
"""
files_to_download = []
total_size = 0
max_tries = sample_size * 2
tries = 0

while tries <= max_tries:
g = random.sample(granules, 1)[0]
if g.size() > max_granule_size:
# print(f"G: {g['meta']['concept-id']} exceded max size: {g.size()}")
tries += 1
continue
else:
# print(f"Adding : {g['meta']['concept-id']} size: {g.size()}")
files_to_download.append(g)
total_size += g.size()
if len(files_to_download) >= sample_size:
break
return files_to_download, round(total_size, 2)


def supported_collection(data_links):
for url in data_links:
if "podaac-tools.jpl.nasa.gov/drive" in url:
Expand Down Expand Up @@ -123,7 +99,10 @@ def test_earthaccess_can_open_onprem_collection_granules(daac):
logger.warning(f"PODAAC DRIVE is not supported at the moment: {data_links}")
continue
granules_to_open, total_size_cmr = get_sample_granules(
granules, granules_sample_size, granules_max_size
granules,
granules_sample_size,
granules_max_size,
round_ndigits=2,
)
if len(granules_to_open) == 0:
logger.debug(
Expand Down

0 comments on commit f0e53bd

Please sign in to comment.