Skip to content

Commit

Permalink
Corrected the bug in the url and created specific test cases (CIROH-U…
Browse files Browse the repository at this point in the history
…A#17)

* Update urlgennwm.py

Removed unused imports
Fixed urlbaseinput function for correct test result (and correct function)

* move test to test folder and add pytest.ini

pytest.ini is required in order to run the test
with `pytest` instead of just `python -m pytest`.

* format files with black

* Update setup.py

Increment version number to 1.0.6

---------

Co-authored-by: James S. Halgren <[email protected]>
  • Loading branch information
RohanSunkarapalli and James S. Halgren authored Oct 31, 2023
1 parent 430a44b commit ee4da98
Show file tree
Hide file tree
Showing 4 changed files with 254 additions and 30 deletions.
67 changes: 41 additions & 26 deletions nwmurl/urlgennwm.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,9 @@
#from gevent import monkey
#monkey.patch_all()
from dateutil import rrule
from datetime import datetime, timezone
from datetime import datetime, timezone, timedelta
from itertools import product
import time
import os
from datetime import datetime, timedelta

#from concurrent.futures import ThreadPoolExecutor
#import gevent
#import requests
from functools import partial
from tqdm import tqdm

rundict = {
1: "short_range",
Expand All @@ -38,6 +30,7 @@
vardict = {1: "channel_rt", 2: "land", 3: "reservoir", 4: "terrain_rt", 5: "forcing"}
geodict = {1: "conus", 2: "hawaii", 3: "puertorico"}


def selectvar(vardict, varinput):
return vardict[varinput]

Expand Down Expand Up @@ -173,13 +166,14 @@ def select_lead_time(lead_time=None, default=None):
2: "s3://noaa-nwm-retrospective-2-1-pds/model_output/",
}


def selecturlbase(urlbasedict, urlbaseinput, defaulturlbase=""):
if urlbaseinput:
if urlbaseinput in urlbasedict:
return urlbasedict[urlbaseinput]
else:
return defaulturlbase


def generate_urls_retro(
start_date=None,
end_date=None,
Expand Down Expand Up @@ -214,14 +208,15 @@ def generate_urls_retro(
file_list.extend(file_names)
else:
file_list.append(file_names)
if write_to_file == True:
if write_to_file == True:
if os.path.exists("retro_filenamelist.txt"):
os.remove("retro_filenamelist.txt")
os.remove("retro_filenamelist.txt")
with open("retro_filenamelist.txt", "wt") as file:
for item in file_list:
file.write(f"{item}\n")
return file_list



def create_file_list(
runinput,
varinput,
Expand Down Expand Up @@ -479,28 +474,48 @@ def generate_url_retro(date, file_type, urlbase_prefix, retrospective_var_types=
]

return url


def generate_urls_operational(start_date,end_date, fcst_cycle, lead_time, varinput, geoinput, runinput, urlbaseinput, meminput, write_to_file=False):


def generate_urls_operational(
start_date,
end_date,
fcst_cycle,
lead_time,
varinput,
geoinput,
runinput,
urlbaseinput,
meminput,
write_to_file=False,
):
start_date = start_date
end_date = end_date
end_date = end_date
fcst_cycle = fcst_cycle
# fcst_cycle = None # Retrieves a full day for each day within the range given.
#lead_time = [1]
# lead_time = [1]
lead_time = lead_time
varinput = varinput
#vardict = {1: "channel_rt", 2: "land", 3: "reservoir", 4: "terrain_rt", 5: "forcing"}
# vardict = {1: "channel_rt", 2: "land", 3: "reservoir", 4: "terrain_rt", 5: "forcing"}
geoinput = geoinput
#geodict = {1: "conus", 2: "hawaii", 3: "puertorico"}
# geodict = {1: "conus", 2: "hawaii", 3: "puertorico"}
meminput = meminput
urlbaseinput = urlbaseinput
runinput = runinput

if runinput == 1 or runinput == 5 or runinput == 6 or runinput == 7 or runinput == 8 or runinput == 9 or runinput == 10 or runinput == 11:
meminput = None
print("no ensumble members available for the given runinput therefore, meminput set to None")

if (
runinput == 1
or runinput == 5
or runinput == 6
or runinput == 7
or runinput == 8
or runinput == 9
or runinput == 10
or runinput == 11
):
meminput = None
print(
"no ensemble members available for the given runinput therefore, meminput set to None"
)
# rundict = {
# 1: "short_range",
# 2: "medium_range",
Expand All @@ -526,7 +541,7 @@ def generate_urls_operational(start_date,end_date, fcst_cycle, lead_time, varinp
urlbaseinput,
lead_time,
)
if (write_to_file==True):
if write_to_file == True:
if os.path.exists("filenamelist.txt"):
os.remove("filenamelist.txt")
if urlbaseinput == 9:
Expand Down
2 changes: 2 additions & 0 deletions pytest.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[pytest]
pythonpath = nwmurl
8 changes: 4 additions & 4 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@
"""

# Other information
VERSION = '0.1.5'
DESCRIPTION = 'URL generator tool for National Water Model data'
VERSION = "0.1.6"
DESCRIPTION = "URL generator tool for National Water Model data"

setup(
name="nwmurl",
Expand All @@ -45,13 +45,13 @@
long_description=LONG_DESCRIPTION,
long_description_content_type="text/x-rst",
packages=find_packages(),
keywords=['python', 'NWM', 'url'],
keywords=["python", "NWM", "url"],
classifiers=[
"Development Status :: 1 - Planning",
"Intended Audience :: Developers",
"Programming Language :: Python :: 3",
"Operating System :: Unix",
"Operating System :: MacOS :: MacOS X",
"Operating System :: Microsoft :: Windows",
]
],
)
207 changes: 207 additions & 0 deletions test/test_cases.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,207 @@
import pytest
from datetime import datetime
from urlgennwm import (
selectvar,
selectgeo,
selectrun,
makename,
run_type,
fhprefix,
varsuffix,
run_typesuffix,
select_forecast_cycle,
select_lead_time,
selecturlbase,
create_file_list,
)


def test_selectvar():
assert selectvar({1: "channel_rt"}, 1) == "channel_rt"


def test_selectgeo():
assert selectgeo({1: "conus"}, 1) == "conus"


def test_selectrun():
assert selectrun({1: "short_range"}, 1) == "short_range"


def test_makename():
assert makename(
datetime(2022, 1, 1, 0, 0, 0, 0),
"short_range",
"channel_rt",
0,
1,
"conus",
"forcing",
fhprefix="f",
runsuffix="_test",
varsuffix="_test",
run_typesuffix="_test",
urlbase_prefix="https://example.com/",
) == "https://example.com/nwm.20220101/forcing_test/nwm.t00z.short_range_test.channel_rt_test.f001.conus.nc"

@pytest.mark.parametrize("runinput, varinput, geoinput, expected_output", [
(5, 5, 2, "forcing_analysis_assim_hawaii"),
(5, 5, 3, "forcing_analysis_assim_puertorico"),
(2, 5, 7, "forcing_medium_range"),
(1, 5, 7, "forcing_short_range"),
(1, 3, 3, "short_range_puertorico"),
(1, 5, 2, "forcing_short_range_hawaii"),
(1, 5, 3, "forcing_short_range_puertorico"),
(5, 5, 7, "forcing_analysis_assim"),
(6, 5, 7, "forcing_analysis_assim_extend"),
(5, 3, 3, "analysis_assim_puertorico"),
(10, 3, 3, "analysis_assim_puertorico_no_da"),
(1, 3, 3, "short_range_puertorico"),
(11, 3, 3, "short_range_puertorico_no_da"),
(2, 2, 2, "default_value") # Add a test case for default value
])
def test_run_type(runinput, varinput, geoinput, expected_output):
assert run_type(runinput, varinput, geoinput, "default_value") == expected_output


def test_fhprefix():
assert fhprefix(5) == "tm"
assert fhprefix(1) == "f"
assert fhprefix(10) == "tm"


def test_varsuffix():
assert varsuffix(1) == "_1"
assert varsuffix(7) == "_7"
assert varsuffix(8) == ""


def test_run_typesuffix():
assert run_typesuffix(1) == "_mem1"
assert run_typesuffix(7) == "_mem7"
assert run_typesuffix(8) == ""


def test_select_forecast_cycle():
assert select_forecast_cycle(12, 0) == 12
assert select_forecast_cycle(None, 0) == 0


def test_select_lead_time():
assert select_lead_time(240, 0) == 240
assert select_lead_time(None, 0) == 0


def test_selecturlbase():
assert selecturlbase({1: "https://example.com/"}, 1) == "https://example.com/"
assert selecturlbase({1: "https://example.com/"}, 2, "default") == "default"

fcst_cycle_values = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]
lead_time_values = [1, 2, 240]
valid_base_urls = [
"",
"https://nomads.ncep.noaa.gov/pub/data/nccf/com/nwm/prod/",
"https://storage.googleapis.com/national-water-model/",
"https://storage.cloud.google.com/national-water-model/",
"gs://national-water-model/",
"gcs://national-water-model/",
"https://noaa-nwm-pds.s3.amazonaws.com/",
"https://ciroh-nwm-zarr-copy.s3.amazonaws.com/national-water-model/",
]

valid_folder_names = [
"analysis_assim",
"analysis_assim_alaska",
"analysis_assim_alaska_no_da",
"analysis_assim_coastal_atlgulf",
"analysis_assim_coastal_hawaii",
"analysis_assim_coastal_pacific",
"analysis_assim_coastal_puertorico",
"analysis_assim_extend",
"analysis_assim_extend_alaska",
"analysis_assim_extend_alaska_no_da",
"analysis_assim_extend_coastal_atlgulf",
"analysis_assim_extend_coastal_pacific",
"analysis_assim_extend_no_da",
"analysis_assim_hawaii",
"analysis_assim_hawaii_no_da",
"analysis_assim_long",
"analysis_assim_long_no_da",
"analysis_assim_no_da",
"analysis_assim_puertorico",
"analysis_assim_puertorico_no_da",
"forcing_analysis_assim",
"forcing_analysis_assim_alaska",
"forcing_analysis_assim_extend",
"forcing_analysis_assim_extend_alaska",
"forcing_analysis_assim_hawaii",
"forcing_analysis_assim_puertorico",
"forcing_medium_range",
"forcing_medium_range_alaska",
"forcing_medium_range_blend",
"forcing_medium_range_blend_alaska",
"forcing_short_range",
"forcing_short_range_alaska",
"forcing_short_range_hawaii",
"forcing_short_range_puertorico",
"long_range_mem1",
"long_range_mem2",
"long_range_mem3",
"long_range_mem4",
"medium_range_alaska_mem1",
"medium_range_alaska_mem2",
"medium_range_alaska_mem3",
"medium_range_alaska_mem4",
"medium_range_alaska_mem5",
"medium_range_alaska_mem6",
"medium_range_alaska_no_da",
"medium_range_blend",
"medium_range_blend_alaska",
"medium_range_blend_coastal_atlgulf",
"medium_range_blend_coastal_pacific",
"medium_range_coastal_atlgulf_mem1",
"short_range",
"medium_range",
"long_range_mem7",
"medium_range_no_da_mem6"
]
import requests

def is_valid_url(url):
try:
response = requests.head(url)
return response.status_code < 400
except requests.ConnectionError:
return False


@pytest.mark.parametrize("runinput, varinput, geoinput, meminput, start_date, end_date, fcst_cycle, urlbaseinput, lead_time, expected_output", [
(1, 1, 1, 0, "201809170000", "201809172300", fcst_cycle_values, 3, None, ["expected_file_name_1"]),
(5, 5, 2, 1, "201809170000", "201809171200", fcst_cycle_values, 1, lead_time_values, ["expected_file_name_2"]),
(2, 5, 3, 3, "201809170600", "201809171800", fcst_cycle_values, 2, lead_time_values, ["expected_file_name_3"]),
(1, 1, 5, 4, "201809170200", "201809171400", fcst_cycle_values, 4, lead_time_values, ["expected_file_name_4"]),
(2, 2, 4, 5, "201809170800", "201809172000", fcst_cycle_values, 5, lead_time_values, ["expected_file_name_5"]),
(3, 1, 5, 6, "201809171000", "201809172200", fcst_cycle_values, 6, lead_time_values, ["expected_file_name_6"]),
(4, 2, 5, 7, "201809171200", "201809172400", fcst_cycle_values, 7, lead_time_values, ["expected_file_name_7"]),
(5, 5, 1, 8, "201809171400", "201809172600", fcst_cycle_values, 8, lead_time_values, ["expected_file_name_8"]),
(6, 1, 16, 9, "201809171600", "201809172800", fcst_cycle_values, 9, lead_time_values, ["expected_file_name_9"]),
(8, 5, 3, 12, "201809172200", "201809173400", fcst_cycle_values, 12, lead_time_values, ["expected_file_name_12"]),
(11, 1, 3, 18, "201809173400", "201809174600", fcst_cycle_values, 18, lead_time_values, ["expected_file_name_18"]),
])
def test_create_file_list(runinput, varinput, geoinput, meminput, start_date, end_date, fcst_cycle, urlbaseinput, lead_time, expected_output):
file_list = create_file_list(runinput, varinput, geoinput, meminput, start_date, end_date, fcst_cycle, urlbaseinput, lead_time)
assert isinstance(file_list, list)
assert all(isinstance(file_name, str) for file_name in file_list)
for url in file_list:
# assert is_valid_url(url), f"Invalid URL: {url}"
assert any(substring in url for substring in valid_folder_names), f"No valid folder name found in URL: {url}"


# Check if all base URLs exist in the predefined list
for url in file_list:
assert any(url.startswith(base_url) for base_url in valid_base_urls), f"Invalid base URL in generated URL: {url}"



if __name__ == "__main__":
pytest.main()

0 comments on commit ee4da98

Please sign in to comment.