From b30c059b5cf17eb36ab3060d0edfef9b2f178bec Mon Sep 17 00:00:00 2001 From: Barry Baker Date: Tue, 12 Mar 2024 10:45:32 -0400 Subject: [PATCH 01/49] add new nesdis_viirs_aod_gridded data from AWS - remove old ftp --- monetio/sat/__init__.py | 6 +- monetio/sat/nesdis_edr_viirs.py | 110 -------- monetio/sat/nesdis_eps_viirs.py | 192 -------------- monetio/sat/nesdis_viirs_aod_gridded.py | 324 ++++++++++++++++++++++++ 4 files changed, 326 insertions(+), 306 deletions(-) delete mode 100644 monetio/sat/nesdis_edr_viirs.py delete mode 100644 monetio/sat/nesdis_eps_viirs.py create mode 100644 monetio/sat/nesdis_viirs_aod_gridded.py diff --git a/monetio/sat/__init__.py b/monetio/sat/__init__.py index 29f96114..beb89117 100644 --- a/monetio/sat/__init__.py +++ b/monetio/sat/__init__.py @@ -4,8 +4,7 @@ _omps_nadir_mm, goes, modis_ornl, - nesdis_edr_viirs, - nesdis_eps_viirs, + nesdis_viirs_aod_gridded nesdis_frp, ) @@ -13,8 +12,7 @@ "_gridded_eos_mm", "_modis_l2_mm", "_omps_nadir_mm", - "nesdis_edr_viirs", - "nesdis_eps_viirs", + "nesdis_viirs_aod_gridded "nesdis_frp", "modis_ornl", "goes", diff --git a/monetio/sat/nesdis_edr_viirs.py b/monetio/sat/nesdis_edr_viirs.py deleted file mode 100644 index 0eb1ee93..00000000 --- a/monetio/sat/nesdis_edr_viirs.py +++ /dev/null @@ -1,110 +0,0 @@ -import os - -import xarray as xr - -server = "ftp.star.nesdis.noaa.gov" -base_dir = "/pub/smcd/jhuang/npp.viirs.aerosol.data/edraot550/" - - -def open_dataset(date, resolution="high", datapath="."): - current = change_dir(datapath) # noqa: F841 - # check resolution; by default 0.1 degree data is assumed - if resolution in {"high", "h"}: - # this is the 0.1 degree data - nlat = 1800 - nlon = 3600 - lon, lat = _get_latlons(nlat, nlon) - fname, date = download_data(date, resolution="high") - else: - nlat = 720 - nlon = 1440 - lon, lat = _get_latlons(nlat, nlon) - fname, date = download_data(date, resolution=0.25) - # unzip fname - fname = _unzip_file(fname) - # read the data - data = read_data(fname, lat, lon, date) - return data - - -def open_mfdataset(dates, resolution="high", datapath="."): - das = [] - for i in dates: - das.append(open_dataset(i, resolution=resolution, datapath=datapath)) - ds = xr.concat(das, dim="time") - return ds - - -def read_data(fname, lat, lon, date): - from numpy import float32, fromfile, nan - from pandas import to_datetime - - f = fromfile(fname, dtype=float32) - nlat, nlon = lon.shape - aot = f.reshape(2, nlat, nlon)[0, :, :].reshape(1, nlat, nlon) - aot[aot < -999] = nan - datearr = to_datetime([date]) - da = xr.DataArray(aot, coords=[datearr, range(nlat), range(nlon)], dims=["time", "y", "x"]) - da["latitude"] = (("y", "x"), lat) - da["longitude"] = (("y", "x"), lon) - da.attrs["units"] = "" - da.name = "VIIRS EDR AOD" - da.attrs["long_name"] = "Aerosol Optical Depth" - da.attrs[ - "source" - ] = "ftp://ftp.star.nesdis.noaa.gov/pub/smcd/jhuang/npp.viirs.aerosol.data/edraot550" - return da - - -def _unzip_file(fname): - import subprocess - - subprocess.run(["gunzip", "-f", fname]) - return fname[:-3] - - -def change_dir(to_path): - current = os.getcwd() - os.chdir(to_path) - return current - - -def download_data(date, resolution="high"): - import ftplib - from datetime import datetime - - if isinstance(date, datetime): - year = date.strftime("%Y") - yyyymmdd = date.strftime("%Y%m%d") - else: - from pandas import Timestamp - - date = Timestamp(date) - year = date.strftime("%Y") - yyyymmdd = date.strftime("%Y%m%d") - if resolution == "high": - file = f"npp_aot550_edr_gridded_0.10_{yyyymmdd}.high.bin.gz" - else: - file = f"npp_aot550_edr_gridded_0.25_{yyyymmdd}.high.bin.gz" - ftp = ftplib.FTP(server) - ftp.login() - # print(base_dir) - # print(year) - # print(base_dir + year) - ftp.cwd(base_dir + year) - # print(file) - ftp.retrbinary("RETR " + file, open(file, "wb").write) - return file, date - - -def _get_latlons(nlat, nlon): - from numpy import linspace, meshgrid - - lon_min = -179.875 - lon_max = -1 * lon_min - lat_min = -89.875 - lat_max = -1.0 * lat_min - lons = linspace(lon_min, lon_max, nlon) - lats = linspace(lat_min, lat_max, nlat) - lon, lat = meshgrid(lons, lats) - return lon, lat diff --git a/monetio/sat/nesdis_eps_viirs.py b/monetio/sat/nesdis_eps_viirs.py deleted file mode 100644 index c59902c6..00000000 --- a/monetio/sat/nesdis_eps_viirs.py +++ /dev/null @@ -1,192 +0,0 @@ -import os - -import xarray as xr - -server = "ftp.star.nesdis.noaa.gov" -base_dir = "/pub/smcd/VIIRS_Aerosol/npp.viirs.aerosol.data/epsaot550/" - - -def open_dataset(date, datapath="."): - """Short summary. - - Parameters - ---------- - date : type - Description of parameter `date`. - datapath : type - Description of parameter `datapath`. - - Returns - ------- - type - Description of returned object. - - """ - current = change_dir(datapath) - nlat = 720 - nlon = 1440 - lon, lat = _get_latlons(nlat, nlon) - if isinstance(date, str): - fname, date = download_data(date) - else: - fname, date = download_data(date) - print(fname) - data = read_data(fname, lat, lon, date) - change_dir(current) - return data.where(data > 0) - - -def open_mfdataset(dates, datapath="."): - """Short summary. - - Parameters - ---------- - dates : type - Description of parameter `dates`. - datapath : type - Description of parameter `datapath`. - - Returns - ------- - type - Description of returned object. - - """ - from xarray import concat - - das = [] - for i in dates: - print(i) - das.append(open_dataset(i, datapath=datapath)) - ds = concat(das, dim="time") - return ds - - -def read_data(fname, lat, lon, date): - """Short summary. - - Parameters - ---------- - fname : type - Description of parameter `fname`. - lat : type - Description of parameter `lat`. - lon : type - Description of parameter `lon`. - date : type - Description of parameter `date`. - - Returns - ------- - type - Description of returned object. - - """ - from pandas import to_datetime - - f = xr.open_dataset(fname) - datearr = to_datetime([date]) - da = f["aot_ip_out"] - da = da.rename({"nlat": "y", "nlon": "x"}) - da["latitude"] = (("y", "x"), lat) - da["longitude"] = (("y", "x"), lon) - da = da.expand_dims("time") - da["time"] = datearr - da.attrs["units"] = "" - da.name = "VIIRS EPS AOT" - da.attrs["long_name"] = "Aerosol Optical Thickness" - da.attrs[ - "source" - ] = "ftp://ftp.star.nesdis.noaa.gov/pub/smcd/VIIRS_Aerosol/npp.viirs.aerosol.data/epsaot550" - return da - - -def change_dir(to_path): - """Short summary. - - Parameters - ---------- - to_path : type - Description of parameter `to_path`. - - Returns - ------- - type - Description of returned object. - - """ - current = os.getcwd() - os.chdir(to_path) - return current - - -def download_data(date, resolution="high"): - """Short summary. - - Parameters - ---------- - date : type - Description of parameter `date`. - resolution : type - Description of parameter `resolution`. - - Returns - ------- - type - Description of returned object. - - """ - import ftplib - from datetime import datetime - - from pandas import DatetimeIndex - - if isinstance(date, datetime) or isinstance(date, DatetimeIndex): - year = date.strftime("%Y") - yyyymmdd = date.strftime("%Y%m%d") - else: - from pandas import Timestamp - - date = Timestamp(date) - year = date.strftime("%Y") - yyyymmdd = date.strftime("%Y%m%d") - # npp_eaot_ip_gridded_0.25_20181222.high.nc - # print(year, yyyymmdd) - file = f"npp_eaot_ip_gridded_0.25_{yyyymmdd}.high.nc" - exists = os.path.isfile(file) - if ~exists: - ftp = ftplib.FTP(server) - ftp.login() - ftp.cwd(base_dir + year) - ftp.retrbinary("RETR " + file, open(file, "wb").write) - else: - print(f"File Already Exists! Reading: {file}") - return file, date - - -def _get_latlons(nlat, nlon): - """Short summary. - - Parameters - ---------- - nlat : type - Description of parameter `nlat`. - nlon : type - Description of parameter `nlon`. - - Returns - ------- - type - Description of returned object. - - """ - from numpy import linspace, meshgrid - - lon_min = -179.875 - lon_max = -1 * lon_min - lat_min = -89.875 - lat_max = -1.0 * lat_min - lons = linspace(lon_min, lon_max, nlon) - lats = linspace(lat_max, lat_min, nlat) - lon, lat = meshgrid(lons, lats) - return lon, lat diff --git a/monetio/sat/nesdis_viirs_aod_gridded.py b/monetio/sat/nesdis_viirs_aod_gridded.py new file mode 100644 index 00000000..ddd19abd --- /dev/null +++ b/monetio/sat/nesdis_viirs_aod_gridded.py @@ -0,0 +1,324 @@ +import s3fs + + +# Create list of available daily data file paths & total size of files +def create_daily_aod_list(data_resolution, satellite, date_generated, fs): + """ + Creates a list of daily AOD (Aerosol Optical Depth) files and calculates the total size of the files. + + Parameters: + - data_resolution (str): The resolution of the AOD data. + - satellite (str): The satellite name. Can be 'both', 'SNPP', or 'NOAA20'. + - date_generated (list): A list of dates for which to check the existence of AOD files. + - fs (FileSystem): The file system object used to check file existence and size. + + Returns: + - nodd_file_list (list): A list of paths to the existing AOD files. + - nodd_total_size (int): The total size of the existing AOD files in bytes. + """ + # Loop through observation dates & check for files + nodd_file_list = [] + nodd_total_size = 0 + for date in date_generated: + file_date = date.strftime("%Y%m%d") + year = file_date[:4] + if satellite == "both": + sat_list = ["npp", "noaa20"] + for sat_name in sat_list: + file_name = ( + "viirs_eps_" + + sat_name + + "_aod_" + + data_resolution + + "_deg_" + + file_date + + ".nc" + ) + if sat_name == "npp": + prod_path = ( + "noaa-jpss/SNPP/VIIRS/SNPP_VIIRS_Aerosol_Optical_Depth_Gridded_Reprocessed/" + + data_resolution[:4] + + "_Degrees_Daily/" + + year + + "/" + ) + elif sat_name == "noaa20": + prod_path = ( + "noaa-jpss/NOAA20/VIIRS/NOAA20_VIIRS_Aerosol_Optical_Depth_Gridded_Reprocessed/" + + data_resolution[:4] + + "_Degrees_Daily/" + + year + + "/" + ) + # If file exists, add path to list and add file size to total + if fs.exists(prod_path + file_name) == True: + nodd_file_list.extend(fs.ls(prod_path + file_name)) + nodd_total_size = nodd_total_size + fs.size(prod_path + file_name) + else: + if satellite == "SNPP": + sat_name = "npp" + elif satellite == "NOAA20": + sat_name = "noaa20" + file_name = ( + "viirs_eps_" + sat_name + "_aod_" + data_resolution + "_deg_" + file_date + ".nc" + ) + prod_path = ( + "noaa-jpss/" + + satellite + + "/VIIRS/" + + satellite + + "_VIIRS_Aerosol_Optical_Depth_Gridded_Reprocessed/" + + data_resolution[:4] + + "_Degrees_Daily/" + + year + + "/" + ) + # If file exists, add path to list and add file size to total + if fs.exists(prod_path + file_name) == True: + nodd_file_list.extend(fs.ls(prod_path + file_name)) + nodd_total_size = nodd_total_size + fs.size(prod_path + file_name) + + return nodd_file_list, nodd_total_size + + +# Create list of available monthly data file paths & total size of files +def create_monthly_aod_list(satellite, date_generated, fs): + """ + Creates a list of monthly AOD (Aerosol Optical Depth) files for a given satellite and date range. + + Args: + satellite (str): The satellite name. Can be 'both', 'SNPP', or 'NOAA20'. + date_generated (list): A list of datetime objects representing the observation dates. + fs: The file system object used to check for file existence and retrieve file information. + + Returns: + tuple: A tuple containing the list of file paths and the total size of the files. + + """ + # Loop through observation dates & check for files + nodd_file_list = [] + nodd_total_size = 0 + year_month_list = [] + for date in date_generated: + file_date = date.strftime("%Y%m%d") + year_month = file_date[:6] + if year_month not in year_month_list: + year_month_list.append(year_month) + if satellite == "both": + sat_list = ["snpp", "noaa20"] + for sat_name in sat_list: + file_name = "viirs_aod_monthly_" + sat_name + "_0.250_deg_" + year_month + ".nc" + if sat_name == "snpp": + prod_path = "noaa-jpss/SNPP/VIIRS/SNPP_VIIRS_Aerosol_Optical_Depth_Gridded_Reprocessed/0.25_Degrees_Monthly/" + elif sat_name == "noaa20": + prod_path = "noaa-jpss/NOAA20/VIIRS/NOAA20_VIIRS_Aerosol_Optical_Depth_Gridded_Reprocessed/0.25_Degrees_Monthly/" + # If file exists, add path to list and add file size to total + if fs.exists(prod_path + file_name) == True: + nodd_file_list.extend(fs.ls(prod_path + file_name)) + nodd_total_size = nodd_total_size + fs.size(prod_path + file_name) + else: + if satellite == "SNPP": + sat_name = "snpp" + elif satellite == "NOAA20": + sat_name = "noaa20" + file_name = "viirs_aod_monthly_" + sat_name + "_0.250_deg_" + year_month + ".nc" + prod_path = ( + "noaa-jpss/" + + satellite + + "/VIIRS/" + + satellite + + "_VIIRS_Aerosol_Optical_Depth_Gridded_Reprocessed/0.25_Degrees_Monthly/" + ) + # If file exists, add path to list and add file size to total + if fs.exists(prod_path + file_name) == True: + nodd_file_list.extend(fs.ls(prod_path + file_name)) + nodd_total_size = nodd_total_size + fs.size(prod_path + file_name) + + return nodd_file_list, nodd_total_size + + +# Create list of available weekly data file paths & total size of files +def create_weekly_aod_list(satellite, date_generated, fs): + """ + Creates a list of files and calculates the total size of files for a given satellite, observation dates, and file system. + + Parameters: + satellite (str): The satellite name. Can be 'both', 'SNPP', or 'NOAA20'. + date_generated (list): A list of observation dates. + fs (FileSystem): The file system object. + + Returns: + tuple: A tuple containing the list of files and the total size of files. + """ + # Loop through observation dates & check for files + nodd_file_list = [] + nodd_total_size = 0 + for date in date_generated: + file_date = date.strftime("%Y%m%d") + year = file_date[:4] + if satellite == "both": + sat_list = ["SNPP", "NOAA20"] + for sat_name in sat_list: + prod_path = ( + "noaa-jpss/" + + sat_name + + "/VIIRS/" + + sat_name + + "_VIIRS_Aerosol_Optical_Depth_Gridded_Reprocessed/0.25_Degrees_Weekly/" + + year + + "/" + ) + # Get list of all files in given year on NODD + all_files = fs.ls(prod_path) + # Loop through files, check if file date falls within observation date range + for file in all_files: + file_start = file.split("/")[-1].split("_")[7].split(".")[0].split("-")[0] + file_end = file.split("/")[-1].split("_")[7].split(".")[0].split("-")[1] + # If file within observation range, add path to list and add file size to total + if file_date >= file_start and file_date <= file_end: + if file not in nodd_file_list: + nodd_file_list.append(file) + nodd_total_size = nodd_total_size + fs.size(file) + else: + prod_path = ( + "noaa-jpss/" + + satellite + + "/VIIRS/" + + satellite + + "_VIIRS_Aerosol_Optical_Depth_Gridded_Reprocessed/0.25_Degrees_Weekly/" + + year + + "/" + ) + # Get list of all files in given year on NODD + all_files = fs.ls(prod_path) + # Loop through files, check if file date falls within observation date range + for file in all_files: + file_start = file.split("/")[-1].split("_")[7].split(".")[0].split("-")[0] + file_end = file.split("/")[-1].split("_")[7].split(".")[0].split("-")[1] + # If file within observation range, add path to list and add file size to total + if file_date >= file_start and file_date <= file_end: + if file not in nodd_file_list: + nodd_file_list.append(file) + nodd_total_size = nodd_total_size + fs.size(file) + + return nodd_file_list, nodd_total_size + + +def open_dataset( + date, satellite, data_resolution="0.1", averaging_time="daily", download=False, save_path="./" +): + """ + Opens a dataset for the given date, satellite, data resolution, and averaging time. + + Parameters: + date (str or datetime.datetime): The date for which to open the dataset. + satellite (str): The satellite to retrieve data from. Valid values are 'SNPP', 'NOAA20', or 'both'. + data_resolution (str, optional): The data resolution. Valid values are '0.050', '0.100', or '0.250'. Defaults to '0.1'. + averaging_time (str, optional): The averaging time. Valid values are 'daily', 'weekly', or 'monthly'. Defaults to 'daily'. + + Returns: + xarray.Dataset: The opened dataset. + + Raises: + ValueError: If the input values are invalid. + """ + import xarray as xr + import pandas as pd + + if satellite not in ("SNPP", "NOAA20", "both"): + print( + "Invalid input for 'satellite': Valid values are 'SNPP', 'NOAA20', 'both'. Setting default to SNPP" + ) + satellite = "SNPP" + + if data_resolution not in ("0.050", "0.100", "0.250"): + print( + "Invalid input data_resolution. Valid values are '0.050', '0.100', '0.250'. Setting default to 0.1" + ) + data_resolution = str(0.1) + else: + str(data_resolution) + + if isinstance(date, str): + date_generated = [pd.Timestamp(date)] + else: + date_generated = [date] + + # Access AWS using anonymous credentials + fs = s3fs.S3FileSystem(anon=True) + + if averaging_time == "monthly": + file_list, _ = create_monthly_aod_list(satellite, date_generated, fs) + elif averaging_time == "weekly": + file_list, _ = create_weekly_aod_list(satellite, date_generated, fs) + else: + file_list, _ = create_daily_aod_list(data_resolution, satellite, date_generated, fs) + + aws_file = fs.open(file_list[0]) + + dset = xr.open_dataset(aws_file) + + # add datetime + dset = dset.expand_dims(time=date_generated) + + return dset + + +def open_mfdataset( + dates, satellite, data_resolution="0.1", averaging_time="daily", download=False, save_path="./" +): + """ + Opens and combines multiple NetCDF files into a single xarray dataset. + + Parameters: + dates (pandas.DatetimeIndex): The dates for which to retrieve the data. + satellite (str): The satellite name. Valid values are 'SNPP', 'NOAA20', or 'both'. + data_resolution (str, optional): The data resolution. Valid values are '0.050', '0.100', or '0.250'. Defaults to '0.1'. + averaging_time (str, optional): The averaging time. Valid values are 'daily', 'weekly', or 'monthly'. Defaults to 'daily'. + download (bool, optional): Whether to download the data from AWS. Defaults to False. + save_path (str, optional): The path to save the downloaded data. Defaults to './'. + + Returns: + xarray.Dataset: The combined dataset containing the data for the specified dates. + + Raises: + ValueError: If the input parameters are invalid. + + """ + import xarray as xr + import pandas as pd + import s3fs + + if satellite not in ("SNPP", "NOAA20", "both"): + print( + "Invalid input for 'satellite': Valid values are 'SNPP', 'NOAA20', 'both'. Setting default to SNPP" + ) + satellite = "SNPP" + + if data_resolution not in ("0.050", "0.100", "0.250"): + print( + "Invalid input for data_resolution. Valid values are '0.050', '0.100', '0.250'. Setting default to 0.1" + ) + data_resolution = "0.1" + + if not isinstance(dates, pd.DatetimeIndex): + raise ValueError("Expecting pandas.DatetimeIndex for 'dates' parameter.") + + # Access AWS using anonymous credentials + fs = s3fs.S3FileSystem(anon=True) + + if averaging_time == "monthly": + file_list, total_size = create_monthly_aod_list(satellite, dates, fs) + elif averaging_time == "weekly": + file_list, total_size = create_weekly_aod_list(satellite, dates, fs) + else: + file_list, total_size = create_daily_aod_list(data_resolution, satellite, dates, fs) + + print(file_list) + aws_files = [fs.open(f) for f in file_list] + + dset = xr.open_mfdataset(aws_files, concat_dim={"time": dates}, combine="nested") + + dset["time"] = dates + + return dset From b0ada7150032911485eba7a72a484a998dd421f4 Mon Sep 17 00:00:00 2001 From: Barry Baker Date: Tue, 12 Mar 2024 10:45:52 -0400 Subject: [PATCH 02/49] update --- monetio/sat/nesdis_viirs_aod_gridded.py | 185 ++++++++---------------- 1 file changed, 58 insertions(+), 127 deletions(-) diff --git a/monetio/sat/nesdis_viirs_aod_gridded.py b/monetio/sat/nesdis_viirs_aod_gridded.py index ddd19abd..8dfc508e 100644 --- a/monetio/sat/nesdis_viirs_aod_gridded.py +++ b/monetio/sat/nesdis_viirs_aod_gridded.py @@ -1,7 +1,8 @@ import s3fs +import xarray as xr +import pandas as pd -# Create list of available daily data file paths & total size of files def create_daily_aod_list(data_resolution, satellite, date_generated, fs): """ Creates a list of daily AOD (Aerosol Optical Depth) files and calculates the total size of the files. @@ -20,59 +21,27 @@ def create_daily_aod_list(data_resolution, satellite, date_generated, fs): nodd_file_list = [] nodd_total_size = 0 for date in date_generated: - file_date = date.strftime("%Y%m%d") + file_date = date.strftime('%Y%m%d') year = file_date[:4] - if satellite == "both": - sat_list = ["npp", "noaa20"] + if satellite == 'both': + sat_list = ['npp', 'noaa20'] for sat_name in sat_list: - file_name = ( - "viirs_eps_" - + sat_name - + "_aod_" - + data_resolution - + "_deg_" - + file_date - + ".nc" - ) - if sat_name == "npp": - prod_path = ( - "noaa-jpss/SNPP/VIIRS/SNPP_VIIRS_Aerosol_Optical_Depth_Gridded_Reprocessed/" - + data_resolution[:4] - + "_Degrees_Daily/" - + year - + "/" - ) - elif sat_name == "noaa20": - prod_path = ( - "noaa-jpss/NOAA20/VIIRS/NOAA20_VIIRS_Aerosol_Optical_Depth_Gridded_Reprocessed/" - + data_resolution[:4] - + "_Degrees_Daily/" - + year - + "/" - ) + file_name = 'viirs_eps_' + sat_name + '_aod_' + data_resolution + '_deg_' + file_date + '.nc' + if sat_name == 'npp': + prod_path = 'noaa-jpss/SNPP/VIIRS/SNPP_VIIRS_Aerosol_Optical_Depth_Gridded_Reprocessed/' + data_resolution[:4] + '_Degrees_Daily/' + year + '/' + elif sat_name == 'noaa20': + prod_path = 'noaa-jpss/NOAA20/VIIRS/NOAA20_VIIRS_Aerosol_Optical_Depth_Gridded_Reprocessed/' + data_resolution[:4] + '_Degrees_Daily/' + year + '/' # If file exists, add path to list and add file size to total if fs.exists(prod_path + file_name) == True: nodd_file_list.extend(fs.ls(prod_path + file_name)) nodd_total_size = nodd_total_size + fs.size(prod_path + file_name) else: - if satellite == "SNPP": - sat_name = "npp" - elif satellite == "NOAA20": - sat_name = "noaa20" - file_name = ( - "viirs_eps_" + sat_name + "_aod_" + data_resolution + "_deg_" + file_date + ".nc" - ) - prod_path = ( - "noaa-jpss/" - + satellite - + "/VIIRS/" - + satellite - + "_VIIRS_Aerosol_Optical_Depth_Gridded_Reprocessed/" - + data_resolution[:4] - + "_Degrees_Daily/" - + year - + "/" - ) + if satellite == 'SNPP': + sat_name = 'npp' + elif satellite == 'NOAA20': + sat_name = 'noaa20' + file_name = 'viirs_eps_' + sat_name + '_aod_' + data_resolution + '_deg_' + file_date + '.nc' + prod_path = 'noaa-jpss/' + satellite + '/VIIRS/' + satellite + '_VIIRS_Aerosol_Optical_Depth_Gridded_Reprocessed/' + data_resolution[:4] + '_Degrees_Daily/' + year + '/' # If file exists, add path to list and add file size to total if fs.exists(prod_path + file_name) == True: nodd_file_list.extend(fs.ls(prod_path + file_name)) @@ -100,35 +69,29 @@ def create_monthly_aod_list(satellite, date_generated, fs): nodd_total_size = 0 year_month_list = [] for date in date_generated: - file_date = date.strftime("%Y%m%d") + file_date = date.strftime('%Y%m%d') year_month = file_date[:6] if year_month not in year_month_list: year_month_list.append(year_month) - if satellite == "both": - sat_list = ["snpp", "noaa20"] + if satellite == 'both': + sat_list = ['snpp', 'noaa20'] for sat_name in sat_list: - file_name = "viirs_aod_monthly_" + sat_name + "_0.250_deg_" + year_month + ".nc" - if sat_name == "snpp": - prod_path = "noaa-jpss/SNPP/VIIRS/SNPP_VIIRS_Aerosol_Optical_Depth_Gridded_Reprocessed/0.25_Degrees_Monthly/" - elif sat_name == "noaa20": - prod_path = "noaa-jpss/NOAA20/VIIRS/NOAA20_VIIRS_Aerosol_Optical_Depth_Gridded_Reprocessed/0.25_Degrees_Monthly/" + file_name = 'viirs_aod_monthly_' + sat_name + '_0.250_deg_' + year_month + '.nc' + if sat_name == 'snpp': + prod_path = 'noaa-jpss/SNPP/VIIRS/SNPP_VIIRS_Aerosol_Optical_Depth_Gridded_Reprocessed/0.25_Degrees_Monthly/' + elif sat_name == 'noaa20': + prod_path = 'noaa-jpss/NOAA20/VIIRS/NOAA20_VIIRS_Aerosol_Optical_Depth_Gridded_Reprocessed/0.25_Degrees_Monthly/' # If file exists, add path to list and add file size to total if fs.exists(prod_path + file_name) == True: nodd_file_list.extend(fs.ls(prod_path + file_name)) nodd_total_size = nodd_total_size + fs.size(prod_path + file_name) else: - if satellite == "SNPP": - sat_name = "snpp" - elif satellite == "NOAA20": - sat_name = "noaa20" - file_name = "viirs_aod_monthly_" + sat_name + "_0.250_deg_" + year_month + ".nc" - prod_path = ( - "noaa-jpss/" - + satellite - + "/VIIRS/" - + satellite - + "_VIIRS_Aerosol_Optical_Depth_Gridded_Reprocessed/0.25_Degrees_Monthly/" - ) + if satellite == 'SNPP': + sat_name = 'snpp' + elif satellite == 'NOAA20': + sat_name = 'noaa20' + file_name = 'viirs_aod_monthly_' + sat_name + '_0.250_deg_' + year_month + '.nc' + prod_path = 'noaa-jpss/' + satellite + '/VIIRS/' + satellite + '_VIIRS_Aerosol_Optical_Depth_Gridded_Reprocessed/0.25_Degrees_Monthly/' # If file exists, add path to list and add file size to total if fs.exists(prod_path + file_name) == True: nodd_file_list.extend(fs.ls(prod_path + file_name)) @@ -154,47 +117,31 @@ def create_weekly_aod_list(satellite, date_generated, fs): nodd_file_list = [] nodd_total_size = 0 for date in date_generated: - file_date = date.strftime("%Y%m%d") + file_date = date.strftime('%Y%m%d') year = file_date[:4] - if satellite == "both": - sat_list = ["SNPP", "NOAA20"] + if satellite == 'both': + sat_list = ['SNPP', 'NOAA20'] for sat_name in sat_list: - prod_path = ( - "noaa-jpss/" - + sat_name - + "/VIIRS/" - + sat_name - + "_VIIRS_Aerosol_Optical_Depth_Gridded_Reprocessed/0.25_Degrees_Weekly/" - + year - + "/" - ) + prod_path = 'noaa-jpss/' + sat_name + '/VIIRS/' + sat_name + '_VIIRS_Aerosol_Optical_Depth_Gridded_Reprocessed/0.25_Degrees_Weekly/' + year + '/' # Get list of all files in given year on NODD all_files = fs.ls(prod_path) # Loop through files, check if file date falls within observation date range for file in all_files: - file_start = file.split("/")[-1].split("_")[7].split(".")[0].split("-")[0] - file_end = file.split("/")[-1].split("_")[7].split(".")[0].split("-")[1] + file_start = file.split('/')[-1].split('_')[7].split('.')[0].split('-')[0] + file_end = file.split('/')[-1].split('_')[7].split('.')[0].split('-')[1] # If file within observation range, add path to list and add file size to total if file_date >= file_start and file_date <= file_end: if file not in nodd_file_list: nodd_file_list.append(file) nodd_total_size = nodd_total_size + fs.size(file) else: - prod_path = ( - "noaa-jpss/" - + satellite - + "/VIIRS/" - + satellite - + "_VIIRS_Aerosol_Optical_Depth_Gridded_Reprocessed/0.25_Degrees_Weekly/" - + year - + "/" - ) + prod_path = 'noaa-jpss/' + satellite + '/VIIRS/' + satellite + '_VIIRS_Aerosol_Optical_Depth_Gridded_Reprocessed/0.25_Degrees_Weekly/' + year + '/' # Get list of all files in given year on NODD all_files = fs.ls(prod_path) # Loop through files, check if file date falls within observation date range for file in all_files: - file_start = file.split("/")[-1].split("_")[7].split(".")[0].split("-")[0] - file_end = file.split("/")[-1].split("_")[7].split(".")[0].split("-")[1] + file_start = file.split('/')[-1].split('_')[7].split('.')[0].split('-')[0] + file_end = file.split('/')[-1].split('_')[7].split('.')[0].split('-')[1] # If file within observation range, add path to list and add file size to total if file_date >= file_start and file_date <= file_end: if file not in nodd_file_list: @@ -204,9 +151,7 @@ def create_weekly_aod_list(satellite, date_generated, fs): return nodd_file_list, nodd_total_size -def open_dataset( - date, satellite, data_resolution="0.1", averaging_time="daily", download=False, save_path="./" -): +def open_dataset(date, satellite, data_resolution='0.1', averaging_time='daily', download=False, save_path='./'): """ Opens a dataset for the given date, satellite, data resolution, and averaging time. @@ -224,17 +169,12 @@ def open_dataset( """ import xarray as xr import pandas as pd + if satellite not in ('SNPP', 'NOAA20', 'both'): + print("Invalid input for 'satellite': Valid values are 'SNPP', 'NOAA20', 'both'. Setting default to SNPP") + satellite = 'SNPP' - if satellite not in ("SNPP", "NOAA20", "both"): - print( - "Invalid input for 'satellite': Valid values are 'SNPP', 'NOAA20', 'both'. Setting default to SNPP" - ) - satellite = "SNPP" - - if data_resolution not in ("0.050", "0.100", "0.250"): - print( - "Invalid input data_resolution. Valid values are '0.050', '0.100', '0.250'. Setting default to 0.1" - ) + if data_resolution not in ('0.050', '0.100', '0.250'): + print("Invalid input data_resolution. Valid values are '0.050', '0.100', '0.250'. Setting default to 0.1") data_resolution = str(0.1) else: str(data_resolution) @@ -247,9 +187,9 @@ def open_dataset( # Access AWS using anonymous credentials fs = s3fs.S3FileSystem(anon=True) - if averaging_time == "monthly": + if averaging_time == 'monthly': file_list, _ = create_monthly_aod_list(satellite, date_generated, fs) - elif averaging_time == "weekly": + elif averaging_time == 'weekly': file_list, _ = create_weekly_aod_list(satellite, date_generated, fs) else: file_list, _ = create_daily_aod_list(data_resolution, satellite, date_generated, fs) @@ -264,9 +204,7 @@ def open_dataset( return dset -def open_mfdataset( - dates, satellite, data_resolution="0.1", averaging_time="daily", download=False, save_path="./" -): +def open_mfdataset(dates, satellite, data_resolution='0.1', averaging_time='daily', download=False, save_path='./'): """ Opens and combines multiple NetCDF files into a single xarray dataset. @@ -285,21 +223,14 @@ def open_mfdataset( ValueError: If the input parameters are invalid. """ - import xarray as xr - import pandas as pd - import s3fs - if satellite not in ("SNPP", "NOAA20", "both"): - print( - "Invalid input for 'satellite': Valid values are 'SNPP', 'NOAA20', 'both'. Setting default to SNPP" - ) - satellite = "SNPP" + if satellite not in ('SNPP', 'NOAA20', 'both'): + print("Invalid input for 'satellite': Valid values are 'SNPP', 'NOAA20', 'both'. Setting default to SNPP") + satellite = 'SNPP' - if data_resolution not in ("0.050", "0.100", "0.250"): - print( - "Invalid input for data_resolution. Valid values are '0.050', '0.100', '0.250'. Setting default to 0.1" - ) - data_resolution = "0.1" + if data_resolution not in ('0.050', '0.100', '0.250'): + print("Invalid input for data_resolution. Valid values are '0.050', '0.100', '0.250'. Setting default to 0.1") + data_resolution = '0.1' if not isinstance(dates, pd.DatetimeIndex): raise ValueError("Expecting pandas.DatetimeIndex for 'dates' parameter.") @@ -307,9 +238,9 @@ def open_mfdataset( # Access AWS using anonymous credentials fs = s3fs.S3FileSystem(anon=True) - if averaging_time == "monthly": + if averaging_time == 'monthly': file_list, total_size = create_monthly_aod_list(satellite, dates, fs) - elif averaging_time == "weekly": + elif averaging_time == 'weekly': file_list, total_size = create_weekly_aod_list(satellite, dates, fs) else: file_list, total_size = create_daily_aod_list(data_resolution, satellite, dates, fs) @@ -317,8 +248,8 @@ def open_mfdataset( print(file_list) aws_files = [fs.open(f) for f in file_list] - dset = xr.open_mfdataset(aws_files, concat_dim={"time": dates}, combine="nested") + dset = xr.open_mfdataset(aws_files, concat_dim={'time': dates}, combine='nested') - dset["time"] = dates + dset['time'] = dates return dset From 7510ea06309322d11804885cc0d141e211ac8762 Mon Sep 17 00:00:00 2001 From: Barry Baker Date: Tue, 12 Mar 2024 10:50:07 -0400 Subject: [PATCH 03/49] fix formatting --- monetio/sat/__init__.py | 4 ++-- monetio/sat/nesdis_viirs_aod_gridded.py | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/monetio/sat/__init__.py b/monetio/sat/__init__.py index beb89117..1a310389 100644 --- a/monetio/sat/__init__.py +++ b/monetio/sat/__init__.py @@ -4,7 +4,7 @@ _omps_nadir_mm, goes, modis_ornl, - nesdis_viirs_aod_gridded + nesdis_viirs_aod_gridded, nesdis_frp, ) @@ -12,7 +12,7 @@ "_gridded_eos_mm", "_modis_l2_mm", "_omps_nadir_mm", - "nesdis_viirs_aod_gridded + "nesdis_viirs_aod_gridded", "nesdis_frp", "modis_ornl", "goes", diff --git a/monetio/sat/nesdis_viirs_aod_gridded.py b/monetio/sat/nesdis_viirs_aod_gridded.py index 8dfc508e..bd615b46 100644 --- a/monetio/sat/nesdis_viirs_aod_gridded.py +++ b/monetio/sat/nesdis_viirs_aod_gridded.py @@ -32,7 +32,7 @@ def create_daily_aod_list(data_resolution, satellite, date_generated, fs): elif sat_name == 'noaa20': prod_path = 'noaa-jpss/NOAA20/VIIRS/NOAA20_VIIRS_Aerosol_Optical_Depth_Gridded_Reprocessed/' + data_resolution[:4] + '_Degrees_Daily/' + year + '/' # If file exists, add path to list and add file size to total - if fs.exists(prod_path + file_name) == True: + if fs.exists(prod_path + file_name) is True: nodd_file_list.extend(fs.ls(prod_path + file_name)) nodd_total_size = nodd_total_size + fs.size(prod_path + file_name) else: @@ -43,7 +43,7 @@ def create_daily_aod_list(data_resolution, satellite, date_generated, fs): file_name = 'viirs_eps_' + sat_name + '_aod_' + data_resolution + '_deg_' + file_date + '.nc' prod_path = 'noaa-jpss/' + satellite + '/VIIRS/' + satellite + '_VIIRS_Aerosol_Optical_Depth_Gridded_Reprocessed/' + data_resolution[:4] + '_Degrees_Daily/' + year + '/' # If file exists, add path to list and add file size to total - if fs.exists(prod_path + file_name) == True: + if fs.exists(prod_path + file_name) is True: nodd_file_list.extend(fs.ls(prod_path + file_name)) nodd_total_size = nodd_total_size + fs.size(prod_path + file_name) @@ -82,7 +82,7 @@ def create_monthly_aod_list(satellite, date_generated, fs): elif sat_name == 'noaa20': prod_path = 'noaa-jpss/NOAA20/VIIRS/NOAA20_VIIRS_Aerosol_Optical_Depth_Gridded_Reprocessed/0.25_Degrees_Monthly/' # If file exists, add path to list and add file size to total - if fs.exists(prod_path + file_name) == True: + if fs.exists(prod_path + file_name) is True: nodd_file_list.extend(fs.ls(prod_path + file_name)) nodd_total_size = nodd_total_size + fs.size(prod_path + file_name) else: @@ -93,7 +93,7 @@ def create_monthly_aod_list(satellite, date_generated, fs): file_name = 'viirs_aod_monthly_' + sat_name + '_0.250_deg_' + year_month + '.nc' prod_path = 'noaa-jpss/' + satellite + '/VIIRS/' + satellite + '_VIIRS_Aerosol_Optical_Depth_Gridded_Reprocessed/0.25_Degrees_Monthly/' # If file exists, add path to list and add file size to total - if fs.exists(prod_path + file_name) == True: + if fs.exists(prod_path + file_name) is True: nodd_file_list.extend(fs.ls(prod_path + file_name)) nodd_total_size = nodd_total_size + fs.size(prod_path + file_name) From 9f8769cffe57459b85090684ad478bf0698e9891 Mon Sep 17 00:00:00 2001 From: Barry Baker Date: Tue, 12 Mar 2024 15:19:51 -0400 Subject: [PATCH 04/49] move imports to module --- monetio/sat/nesdis_viirs_aod_gridded.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/monetio/sat/nesdis_viirs_aod_gridded.py b/monetio/sat/nesdis_viirs_aod_gridded.py index bd615b46..297350f3 100644 --- a/monetio/sat/nesdis_viirs_aod_gridded.py +++ b/monetio/sat/nesdis_viirs_aod_gridded.py @@ -1,6 +1,4 @@ -import s3fs -import xarray as xr -import pandas as pd + def create_daily_aod_list(data_resolution, satellite, date_generated, fs): @@ -169,6 +167,8 @@ def open_dataset(date, satellite, data_resolution='0.1', averaging_time='daily', """ import xarray as xr import pandas as pd + import s3fs + if satellite not in ('SNPP', 'NOAA20', 'both'): print("Invalid input for 'satellite': Valid values are 'SNPP', 'NOAA20', 'both'. Setting default to SNPP") satellite = 'SNPP' @@ -223,6 +223,9 @@ def open_mfdataset(dates, satellite, data_resolution='0.1', averaging_time='dail ValueError: If the input parameters are invalid. """ + import xarray as xr + import pandas as pd + import s3fs if satellite not in ('SNPP', 'NOAA20', 'both'): print("Invalid input for 'satellite': Valid values are 'SNPP', 'NOAA20', 'both'. Setting default to SNPP") From 4f21f1cf154fee8149a82b72fecc0689c5e018ca Mon Sep 17 00:00:00 2001 From: Barry Baker Date: Tue, 12 Mar 2024 15:20:30 -0400 Subject: [PATCH 05/49] remove blank lines at beginning --- monetio/sat/nesdis_viirs_aod_gridded.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/monetio/sat/nesdis_viirs_aod_gridded.py b/monetio/sat/nesdis_viirs_aod_gridded.py index 297350f3..5c9174d5 100644 --- a/monetio/sat/nesdis_viirs_aod_gridded.py +++ b/monetio/sat/nesdis_viirs_aod_gridded.py @@ -1,6 +1,3 @@ - - - def create_daily_aod_list(data_resolution, satellite, date_generated, fs): """ Creates a list of daily AOD (Aerosol Optical Depth) files and calculates the total size of the files. From 22e32904a8320d0ebc8e51d1eeda8b7a0d920ac6 Mon Sep 17 00:00:00 2001 From: Barry Baker Date: Tue, 12 Mar 2024 15:26:36 -0400 Subject: [PATCH 06/49] remove erronious code --- monetio/sat/nesdis_viirs_aod_gridded.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/monetio/sat/nesdis_viirs_aod_gridded.py b/monetio/sat/nesdis_viirs_aod_gridded.py index 5c9174d5..d978773f 100644 --- a/monetio/sat/nesdis_viirs_aod_gridded.py +++ b/monetio/sat/nesdis_viirs_aod_gridded.py @@ -172,12 +172,11 @@ def open_dataset(date, satellite, data_resolution='0.1', averaging_time='daily', if data_resolution not in ('0.050', '0.100', '0.250'): print("Invalid input data_resolution. Valid values are '0.050', '0.100', '0.250'. Setting default to 0.1") - data_resolution = str(0.1) - else: - str(data_resolution) + data_resolution = '0.100' + if isinstance(date, str): - date_generated = [pd.Timestamp(date)] + date_generated = [pd.Timestampdate] else: date_generated = [date] From 80d9131e47e74cc37c47c2b9d647d89ad377fb65 Mon Sep 17 00:00:00 2001 From: Barry Baker Date: Tue, 12 Mar 2024 15:27:19 -0400 Subject: [PATCH 07/49] fixes --- monetio/sat/nesdis_viirs_aod_gridded.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/monetio/sat/nesdis_viirs_aod_gridded.py b/monetio/sat/nesdis_viirs_aod_gridded.py index d978773f..345277fc 100644 --- a/monetio/sat/nesdis_viirs_aod_gridded.py +++ b/monetio/sat/nesdis_viirs_aod_gridded.py @@ -176,7 +176,7 @@ def open_dataset(date, satellite, data_resolution='0.1', averaging_time='daily', if isinstance(date, str): - date_generated = [pd.Timestampdate] + date_generated = [pd.Timestamp(date)] else: date_generated = [date] From 0862863c41d471476ecc2e4e36ff09b8afe113bf Mon Sep 17 00:00:00 2001 From: Barry Baker Date: Wed, 13 Mar 2024 10:12:50 -0400 Subject: [PATCH 08/49] update for NRT products from NESDIS STAR https server --- monetio/__init__.py | 2 + monetio/sat/__init__.py | 6 +- monetio/sat/nesdis_eps_viirs_aod_nrt.py | 144 ++++++++++++++++++ ...ded.py => nesdis_viirs_aod_aws_gridded.py} | 1 - 4 files changed, 150 insertions(+), 3 deletions(-) create mode 100644 monetio/sat/nesdis_eps_viirs_aod_nrt.py rename monetio/sat/{nesdis_viirs_aod_gridded.py => nesdis_viirs_aod_aws_gridded.py} (97%) diff --git a/monetio/__init__.py b/monetio/__init__.py index 9c60c152..e6356f4f 100644 --- a/monetio/__init__.py +++ b/monetio/__init__.py @@ -38,6 +38,8 @@ # # satellite obs "goes", + "nesdis_eps_viirs_aod_nrt", + "nesdis_viirs_aod_aws_gridded", # # models "camx", diff --git a/monetio/sat/__init__.py b/monetio/sat/__init__.py index 1a310389..704cd63b 100644 --- a/monetio/sat/__init__.py +++ b/monetio/sat/__init__.py @@ -4,7 +4,8 @@ _omps_nadir_mm, goes, modis_ornl, - nesdis_viirs_aod_gridded, + nesdis_viirs_aod_aws_gridded, + nesdis_viirs_aod_nrt, nesdis_frp, ) @@ -12,7 +13,8 @@ "_gridded_eos_mm", "_modis_l2_mm", "_omps_nadir_mm", - "nesdis_viirs_aod_gridded", + "nesdis_viirs_aod_aws_gridded", + "nesdis_viirs_aod_nrt", "nesdis_frp", "modis_ornl", "goes", diff --git a/monetio/sat/nesdis_eps_viirs_aod_nrt.py b/monetio/sat/nesdis_eps_viirs_aod_nrt.py new file mode 100644 index 00000000..b4e9a93f --- /dev/null +++ b/monetio/sat/nesdis_eps_viirs_aod_nrt.py @@ -0,0 +1,144 @@ +import pandas as pd + +server = "ftp.star.nesdis.noaa.gov" +base_dir = "/pub/smcd/VIIRS_Aerosol/npp.viirs.aerosol.data/epsaot550/" + + +def build_urls(dates, *, daily=True, res=0.1, sat='noaa20'): + """Construct URLs for downloading NEPS data. + + Parameters + ---------- + dates : pd.DatetimeIndex or iterable of datetime + Dates to download data for. + daily : bool, optional + Whether to download daily (default) or sub-daily data. + res : float, optional + Resolution of data in km, only used for sub-daily data. + sat : str, optional + Satellite platform, only used for sub-daily data. + + Returns + ------- + pd.Series + Series with URLs and corresponding file names. + + Notes + ----- + The `res` and `sat` parameters are only used for sub-daily data. + """ + + from collections.abc import Iterable + if isinstance(dates,Iterable): + dates = pd.DatetimeIndex(dates) + else: + dates = pd.DatetimeIndex([dates]) + if daily: + dates = dates.floor("D").unique() + else: # monthly + dates = dates.floor("m").unique() + sat = sat.lower() + urls = [] + fnames = [] + print("Building VIIRS URLs...") + base_url = "https://www.star.nesdis.noaa.gov/pub/smcd/VIIRS_Aerosol/viirs_aerosol_gridded_data/{}/aod/eps/".format(sat) + if sat == 'snpp': + sat = 'npp' + for dt in dates: + if daily: + fname = "viirs_eps_{}_aod_{}_deg_{}_nrt.nc".format(sat,str(res).ljust(5,'0'), dt.strftime('%Y%m%d')) + url = base_url + dt.strftime(r"%Y/") + fname + urls.append(url) + fnames.append(fname) + + # Note: files needed for comparison + urls = pd.Series(urls, index=None) + fnames = pd.Series(fnames, index=None) + return urls, fnames + + +def retrieve(url, fname): + """Download files from the airnowtech S3 server. + + Parameters + ---------- + url : string + Description of parameter `url`. + fname : string + Description of parameter `fname`. + + Returns + ------- + None + + """ + import requests + import os + + if not os.path.isfile(fname): + print("\n Retrieving: " + fname) + print(url) + print("\n") + r = requests.get(url) + r.raise_for_status() + with open(fname, "wb") as f: + f.write(r.content) + else: + print("\n File Exists: " + fname) + + +def open_dataset(datestr, sat='noaa20', res=0.1, daily=True, add_timestamp=True): + import xarray as xr + import pandas as pd + if isinstance(datestr,pd.Timestamp) == False: + d = pd.to_datetime(datestr) + else: + d = datestr + if sat.lower() == 'noaa20': + sat = 'noaa20' + else: + sat = 'snpp' + + #if (res != 0.1) or (res != 0.25): + # res = 0.1 # assume resolution is 0.1 if wrong value supplied + + urls, fnames = build_urls(d, sat=sat,res=res, daily=daily) + + url = urls.values[0] + fname = fnames.values[0] + + retrieve(url,fname) + + dset = xr.open_dataset(fname) + + if add_timestamp: + dset['time'] = d + dset = dset.expand_dims('time') + dset = dset.set_coords(['time']) + return dset + + +def open_mfdataset(datestr, sat='noaa20', res=0.1, daily=True, add_timestamp=True): + import xarray as xr + import pandas as pd + + if isinstance(datestr,pd.DatetimeIndex) == False: + print('Please provide a pandas.DatetimeIndex') + exit + else: + d = datestr + + if sat.lower() == 'noaa20': + sat = 'noaa20' + else: + sat = 'snpp' + + urls, fnames = build_urls(d, sat=sat,res=res, daily=daily) + + for url, fname in zip(urls, fnames): + retrieve(url, fname) + + dset = xr.open_mfdataset(fnames,combine='nested', concat_dim={'time':d}) + dset['time'] = d + + return dset diff --git a/monetio/sat/nesdis_viirs_aod_gridded.py b/monetio/sat/nesdis_viirs_aod_aws_gridded.py similarity index 97% rename from monetio/sat/nesdis_viirs_aod_gridded.py rename to monetio/sat/nesdis_viirs_aod_aws_gridded.py index 345277fc..466dee70 100644 --- a/monetio/sat/nesdis_viirs_aod_gridded.py +++ b/monetio/sat/nesdis_viirs_aod_aws_gridded.py @@ -174,7 +174,6 @@ def open_dataset(date, satellite, data_resolution='0.1', averaging_time='daily', print("Invalid input data_resolution. Valid values are '0.050', '0.100', '0.250'. Setting default to 0.1") data_resolution = '0.100' - if isinstance(date, str): date_generated = [pd.Timestamp(date)] else: From 7a57d217d01e29de655e658a196269e8118af8fb Mon Sep 17 00:00:00 2001 From: Barry Baker Date: Wed, 13 Mar 2024 13:06:33 -0400 Subject: [PATCH 09/49] changes for precommit --- monetio/sat/__init__.py | 2 +- monetio/sat/nesdis_eps_viirs_aod_nrt.py | 103 +++++------ monetio/sat/nesdis_viirs_aod_aws_gridded.py | 186 +++++++++++++------- 3 files changed, 181 insertions(+), 110 deletions(-) diff --git a/monetio/sat/__init__.py b/monetio/sat/__init__.py index 704cd63b..195280c6 100644 --- a/monetio/sat/__init__.py +++ b/monetio/sat/__init__.py @@ -4,9 +4,9 @@ _omps_nadir_mm, goes, modis_ornl, + nesdis_frp, nesdis_viirs_aod_aws_gridded, nesdis_viirs_aod_nrt, - nesdis_frp, ) __all__ = [ diff --git a/monetio/sat/nesdis_eps_viirs_aod_nrt.py b/monetio/sat/nesdis_eps_viirs_aod_nrt.py index b4e9a93f..c3cee0e9 100644 --- a/monetio/sat/nesdis_eps_viirs_aod_nrt.py +++ b/monetio/sat/nesdis_eps_viirs_aod_nrt.py @@ -4,32 +4,33 @@ base_dir = "/pub/smcd/VIIRS_Aerosol/npp.viirs.aerosol.data/epsaot550/" -def build_urls(dates, *, daily=True, res=0.1, sat='noaa20'): +def build_urls(dates, *, daily=True, res=0.1, sat="noaa20"): """Construct URLs for downloading NEPS data. - Parameters - ---------- - dates : pd.DatetimeIndex or iterable of datetime - Dates to download data for. - daily : bool, optional - Whether to download daily (default) or sub-daily data. - res : float, optional - Resolution of data in km, only used for sub-daily data. - sat : str, optional - Satellite platform, only used for sub-daily data. - - Returns - ------- - pd.Series - Series with URLs and corresponding file names. - - Notes - ----- - The `res` and `sat` parameters are only used for sub-daily data. + Parameters + ---------- + dates : pd.DatetimeIndex or iterable of datetime + Dates to download data for. + daily : bool, optional + Whether to download daily (default) or sub-daily data. + res : float, optional + Resolution of data in km, only used for sub-daily data. + sat : str, optional + Satellite platform, only used for sub-daily data. + + Returns + ------- + pd.Series + Series with URLs and corresponding file names. + + Notes + ----- + The `res` and `sat` parameters are only used for sub-daily data. """ from collections.abc import Iterable - if isinstance(dates,Iterable): + + if isinstance(dates, Iterable): dates = pd.DatetimeIndex(dates) else: dates = pd.DatetimeIndex([dates]) @@ -41,12 +42,14 @@ def build_urls(dates, *, daily=True, res=0.1, sat='noaa20'): urls = [] fnames = [] print("Building VIIRS URLs...") - base_url = "https://www.star.nesdis.noaa.gov/pub/smcd/VIIRS_Aerosol/viirs_aerosol_gridded_data/{}/aod/eps/".format(sat) - if sat == 'snpp': - sat = 'npp' + base_url = f"https://www.star.nesdis.noaa.gov/pub/smcd/VIIRS_Aerosol/viirs_aerosol_gridded_data/{sat}/aod/eps/" + if sat == "snpp": + sat = "npp" for dt in dates: if daily: - fname = "viirs_eps_{}_aod_{}_deg_{}_nrt.nc".format(sat,str(res).ljust(5,'0'), dt.strftime('%Y%m%d')) + fname = "viirs_eps_{}_aod_{}_deg_{}_nrt.nc".format( + sat, str(res).ljust(5, "0"), dt.strftime("%Y%m%d") + ) url = base_url + dt.strftime(r"%Y/") + fname urls.append(url) fnames.append(fname) @@ -72,9 +75,10 @@ def retrieve(url, fname): None """ - import requests import os + import requests + if not os.path.isfile(fname): print("\n Retrieving: " + fname) print(url) @@ -87,58 +91,59 @@ def retrieve(url, fname): print("\n File Exists: " + fname) -def open_dataset(datestr, sat='noaa20', res=0.1, daily=True, add_timestamp=True): - import xarray as xr +def open_dataset(datestr, sat="noaa20", res=0.1, daily=True, add_timestamp=True): import pandas as pd - if isinstance(datestr,pd.Timestamp) == False: + import xarray as xr + + if ~isinstance(datestr, pd.Timestamp): d = pd.to_datetime(datestr) else: d = datestr - if sat.lower() == 'noaa20': - sat = 'noaa20' + if sat.lower() == "noaa20": + sat = "noaa20" else: - sat = 'snpp' + sat = "snpp" - #if (res != 0.1) or (res != 0.25): + # if (res != 0.1) or (res != 0.25): # res = 0.1 # assume resolution is 0.1 if wrong value supplied - urls, fnames = build_urls(d, sat=sat,res=res, daily=daily) + urls, fnames = build_urls(d, sat=sat, res=res, daily=daily) url = urls.values[0] fname = fnames.values[0] - retrieve(url,fname) + retrieve(url, fname) dset = xr.open_dataset(fname) if add_timestamp: - dset['time'] = d - dset = dset.expand_dims('time') - dset = dset.set_coords(['time']) - return dset + dset["time"] = d + dset = dset.expand_dims("time") + dset = dset.set_coords(["time"]) + return dset -def open_mfdataset(datestr, sat='noaa20', res=0.1, daily=True, add_timestamp=True): - import xarray as xr +def open_mfdataset(datestr, sat="noaa20", res=0.1, daily=True, add_timestamp=True): import pandas as pd + import xarray as xr - if isinstance(datestr,pd.DatetimeIndex) == False: - print('Please provide a pandas.DatetimeIndex') + if isinstance(datestr, pd.DatetimeIndex) is False: + print("Please provide a pandas.DatetimeIndex") exit else: d = datestr - if sat.lower() == 'noaa20': - sat = 'noaa20' + if sat.lower() == "noaa20": + sat = "noaa20" else: - sat = 'snpp' + sat = "snpp" - urls, fnames = build_urls(d, sat=sat,res=res, daily=daily) + urls, fnames = build_urls(d, sat=sat, res=res, daily=daily) for url, fname in zip(urls, fnames): retrieve(url, fname) - dset = xr.open_mfdataset(fnames,combine='nested', concat_dim={'time':d}) - dset['time'] = d + dset = xr.open_mfdataset(fnames, combine="nested", concat_dim={"time": d}) + dset["time"] = d return dset diff --git a/monetio/sat/nesdis_viirs_aod_aws_gridded.py b/monetio/sat/nesdis_viirs_aod_aws_gridded.py index 466dee70..c0f0914d 100644 --- a/monetio/sat/nesdis_viirs_aod_aws_gridded.py +++ b/monetio/sat/nesdis_viirs_aod_aws_gridded.py @@ -16,27 +16,59 @@ def create_daily_aod_list(data_resolution, satellite, date_generated, fs): nodd_file_list = [] nodd_total_size = 0 for date in date_generated: - file_date = date.strftime('%Y%m%d') + file_date = date.strftime("%Y%m%d") year = file_date[:4] - if satellite == 'both': - sat_list = ['npp', 'noaa20'] + if satellite == "both": + sat_list = ["npp", "noaa20"] for sat_name in sat_list: - file_name = 'viirs_eps_' + sat_name + '_aod_' + data_resolution + '_deg_' + file_date + '.nc' - if sat_name == 'npp': - prod_path = 'noaa-jpss/SNPP/VIIRS/SNPP_VIIRS_Aerosol_Optical_Depth_Gridded_Reprocessed/' + data_resolution[:4] + '_Degrees_Daily/' + year + '/' - elif sat_name == 'noaa20': - prod_path = 'noaa-jpss/NOAA20/VIIRS/NOAA20_VIIRS_Aerosol_Optical_Depth_Gridded_Reprocessed/' + data_resolution[:4] + '_Degrees_Daily/' + year + '/' + file_name = ( + "viirs_eps_" + + sat_name + + "_aod_" + + data_resolution + + "_deg_" + + file_date + + ".nc" + ) + if sat_name == "npp": + prod_path = ( + "noaa-jpss/SNPP/VIIRS/SNPP_VIIRS_Aerosol_Optical_Depth_Gridded_Reprocessed/" + + data_resolution[:4] + + "_Degrees_Daily/" + + year + + "/" + ) + elif sat_name == "noaa20": + prod_path = ( + "noaa-jpss/NOAA20/VIIRS/NOAA20_VIIRS_Aerosol_Optical_Depth_Gridded_Reprocessed/" + + data_resolution[:4] + + "_Degrees_Daily/" + + year + + "/" + ) # If file exists, add path to list and add file size to total if fs.exists(prod_path + file_name) is True: nodd_file_list.extend(fs.ls(prod_path + file_name)) nodd_total_size = nodd_total_size + fs.size(prod_path + file_name) else: - if satellite == 'SNPP': - sat_name = 'npp' - elif satellite == 'NOAA20': - sat_name = 'noaa20' - file_name = 'viirs_eps_' + sat_name + '_aod_' + data_resolution + '_deg_' + file_date + '.nc' - prod_path = 'noaa-jpss/' + satellite + '/VIIRS/' + satellite + '_VIIRS_Aerosol_Optical_Depth_Gridded_Reprocessed/' + data_resolution[:4] + '_Degrees_Daily/' + year + '/' + if satellite == "SNPP": + sat_name = "npp" + elif satellite == "NOAA20": + sat_name = "noaa20" + file_name = ( + "viirs_eps_" + sat_name + "_aod_" + data_resolution + "_deg_" + file_date + ".nc" + ) + prod_path = ( + "noaa-jpss/" + + satellite + + "/VIIRS/" + + satellite + + "_VIIRS_Aerosol_Optical_Depth_Gridded_Reprocessed/" + + data_resolution[:4] + + "_Degrees_Daily/" + + year + + "/" + ) # If file exists, add path to list and add file size to total if fs.exists(prod_path + file_name) is True: nodd_file_list.extend(fs.ls(prod_path + file_name)) @@ -64,29 +96,35 @@ def create_monthly_aod_list(satellite, date_generated, fs): nodd_total_size = 0 year_month_list = [] for date in date_generated: - file_date = date.strftime('%Y%m%d') + file_date = date.strftime("%Y%m%d") year_month = file_date[:6] if year_month not in year_month_list: year_month_list.append(year_month) - if satellite == 'both': - sat_list = ['snpp', 'noaa20'] + if satellite == "both": + sat_list = ["snpp", "noaa20"] for sat_name in sat_list: - file_name = 'viirs_aod_monthly_' + sat_name + '_0.250_deg_' + year_month + '.nc' - if sat_name == 'snpp': - prod_path = 'noaa-jpss/SNPP/VIIRS/SNPP_VIIRS_Aerosol_Optical_Depth_Gridded_Reprocessed/0.25_Degrees_Monthly/' - elif sat_name == 'noaa20': - prod_path = 'noaa-jpss/NOAA20/VIIRS/NOAA20_VIIRS_Aerosol_Optical_Depth_Gridded_Reprocessed/0.25_Degrees_Monthly/' + file_name = "viirs_aod_monthly_" + sat_name + "_0.250_deg_" + year_month + ".nc" + if sat_name == "snpp": + prod_path = "noaa-jpss/SNPP/VIIRS/SNPP_VIIRS_Aerosol_Optical_Depth_Gridded_Reprocessed/0.25_Degrees_Monthly/" + elif sat_name == "noaa20": + prod_path = "noaa-jpss/NOAA20/VIIRS/NOAA20_VIIRS_Aerosol_Optical_Depth_Gridded_Reprocessed/0.25_Degrees_Monthly/" # If file exists, add path to list and add file size to total if fs.exists(prod_path + file_name) is True: nodd_file_list.extend(fs.ls(prod_path + file_name)) nodd_total_size = nodd_total_size + fs.size(prod_path + file_name) else: - if satellite == 'SNPP': - sat_name = 'snpp' - elif satellite == 'NOAA20': - sat_name = 'noaa20' - file_name = 'viirs_aod_monthly_' + sat_name + '_0.250_deg_' + year_month + '.nc' - prod_path = 'noaa-jpss/' + satellite + '/VIIRS/' + satellite + '_VIIRS_Aerosol_Optical_Depth_Gridded_Reprocessed/0.25_Degrees_Monthly/' + if satellite == "SNPP": + sat_name = "snpp" + elif satellite == "NOAA20": + sat_name = "noaa20" + file_name = "viirs_aod_monthly_" + sat_name + "_0.250_deg_" + year_month + ".nc" + prod_path = ( + "noaa-jpss/" + + satellite + + "/VIIRS/" + + satellite + + "_VIIRS_Aerosol_Optical_Depth_Gridded_Reprocessed/0.25_Degrees_Monthly/" + ) # If file exists, add path to list and add file size to total if fs.exists(prod_path + file_name) is True: nodd_file_list.extend(fs.ls(prod_path + file_name)) @@ -112,31 +150,47 @@ def create_weekly_aod_list(satellite, date_generated, fs): nodd_file_list = [] nodd_total_size = 0 for date in date_generated: - file_date = date.strftime('%Y%m%d') + file_date = date.strftime("%Y%m%d") year = file_date[:4] - if satellite == 'both': - sat_list = ['SNPP', 'NOAA20'] + if satellite == "both": + sat_list = ["SNPP", "NOAA20"] for sat_name in sat_list: - prod_path = 'noaa-jpss/' + sat_name + '/VIIRS/' + sat_name + '_VIIRS_Aerosol_Optical_Depth_Gridded_Reprocessed/0.25_Degrees_Weekly/' + year + '/' + prod_path = ( + "noaa-jpss/" + + sat_name + + "/VIIRS/" + + sat_name + + "_VIIRS_Aerosol_Optical_Depth_Gridded_Reprocessed/0.25_Degrees_Weekly/" + + year + + "/" + ) # Get list of all files in given year on NODD all_files = fs.ls(prod_path) # Loop through files, check if file date falls within observation date range for file in all_files: - file_start = file.split('/')[-1].split('_')[7].split('.')[0].split('-')[0] - file_end = file.split('/')[-1].split('_')[7].split('.')[0].split('-')[1] + file_start = file.split("/")[-1].split("_")[7].split(".")[0].split("-")[0] + file_end = file.split("/")[-1].split("_")[7].split(".")[0].split("-")[1] # If file within observation range, add path to list and add file size to total if file_date >= file_start and file_date <= file_end: if file not in nodd_file_list: nodd_file_list.append(file) nodd_total_size = nodd_total_size + fs.size(file) else: - prod_path = 'noaa-jpss/' + satellite + '/VIIRS/' + satellite + '_VIIRS_Aerosol_Optical_Depth_Gridded_Reprocessed/0.25_Degrees_Weekly/' + year + '/' + prod_path = ( + "noaa-jpss/" + + satellite + + "/VIIRS/" + + satellite + + "_VIIRS_Aerosol_Optical_Depth_Gridded_Reprocessed/0.25_Degrees_Weekly/" + + year + + "/" + ) # Get list of all files in given year on NODD all_files = fs.ls(prod_path) # Loop through files, check if file date falls within observation date range for file in all_files: - file_start = file.split('/')[-1].split('_')[7].split('.')[0].split('-')[0] - file_end = file.split('/')[-1].split('_')[7].split('.')[0].split('-')[1] + file_start = file.split("/")[-1].split("_")[7].split(".")[0].split("-")[0] + file_end = file.split("/")[-1].split("_")[7].split(".")[0].split("-")[1] # If file within observation range, add path to list and add file size to total if file_date >= file_start and file_date <= file_end: if file not in nodd_file_list: @@ -146,7 +200,9 @@ def create_weekly_aod_list(satellite, date_generated, fs): return nodd_file_list, nodd_total_size -def open_dataset(date, satellite, data_resolution='0.1', averaging_time='daily', download=False, save_path='./'): +def open_dataset( + date, satellite, data_resolution="0.1", averaging_time="daily", download=False, save_path="./" +): """ Opens a dataset for the given date, satellite, data resolution, and averaging time. @@ -162,18 +218,22 @@ def open_dataset(date, satellite, data_resolution='0.1', averaging_time='daily', Raises: ValueError: If the input values are invalid. """ - import xarray as xr import pandas as pd import s3fs + import xarray as xr + + if satellite not in ("SNPP", "NOAA20", "both"): + print( + "Invalid input for 'satellite': Valid values are 'SNPP', 'NOAA20', 'both'. Setting default to SNPP" + ) + satellite = "SNPP" - if satellite not in ('SNPP', 'NOAA20', 'both'): - print("Invalid input for 'satellite': Valid values are 'SNPP', 'NOAA20', 'both'. Setting default to SNPP") - satellite = 'SNPP' + if data_resolution not in ("0.050", "0.100", "0.250"): + print( + "Invalid input data_resolution. Valid values are '0.050', '0.100', '0.250'. Setting default to 0.1" + ) + data_resolution = "0.100" - if data_resolution not in ('0.050', '0.100', '0.250'): - print("Invalid input data_resolution. Valid values are '0.050', '0.100', '0.250'. Setting default to 0.1") - data_resolution = '0.100' - if isinstance(date, str): date_generated = [pd.Timestamp(date)] else: @@ -182,9 +242,9 @@ def open_dataset(date, satellite, data_resolution='0.1', averaging_time='daily', # Access AWS using anonymous credentials fs = s3fs.S3FileSystem(anon=True) - if averaging_time == 'monthly': + if averaging_time == "monthly": file_list, _ = create_monthly_aod_list(satellite, date_generated, fs) - elif averaging_time == 'weekly': + elif averaging_time == "weekly": file_list, _ = create_weekly_aod_list(satellite, date_generated, fs) else: file_list, _ = create_daily_aod_list(data_resolution, satellite, date_generated, fs) @@ -199,7 +259,9 @@ def open_dataset(date, satellite, data_resolution='0.1', averaging_time='daily', return dset -def open_mfdataset(dates, satellite, data_resolution='0.1', averaging_time='daily', download=False, save_path='./'): +def open_mfdataset( + dates, satellite, data_resolution="0.1", averaging_time="daily", download=False, save_path="./" +): """ Opens and combines multiple NetCDF files into a single xarray dataset. @@ -218,17 +280,21 @@ def open_mfdataset(dates, satellite, data_resolution='0.1', averaging_time='dail ValueError: If the input parameters are invalid. """ - import xarray as xr import pandas as pd import s3fs + import xarray as xr - if satellite not in ('SNPP', 'NOAA20', 'both'): - print("Invalid input for 'satellite': Valid values are 'SNPP', 'NOAA20', 'both'. Setting default to SNPP") - satellite = 'SNPP' + if satellite not in ("SNPP", "NOAA20", "both"): + print( + "Invalid input for 'satellite': Valid values are 'SNPP', 'NOAA20', 'both'. Setting default to SNPP" + ) + satellite = "SNPP" - if data_resolution not in ('0.050', '0.100', '0.250'): - print("Invalid input for data_resolution. Valid values are '0.050', '0.100', '0.250'. Setting default to 0.1") - data_resolution = '0.1' + if data_resolution not in ("0.050", "0.100", "0.250"): + print( + "Invalid input for data_resolution. Valid values are '0.050', '0.100', '0.250'. Setting default to 0.1" + ) + data_resolution = "0.1" if not isinstance(dates, pd.DatetimeIndex): raise ValueError("Expecting pandas.DatetimeIndex for 'dates' parameter.") @@ -236,9 +302,9 @@ def open_mfdataset(dates, satellite, data_resolution='0.1', averaging_time='dail # Access AWS using anonymous credentials fs = s3fs.S3FileSystem(anon=True) - if averaging_time == 'monthly': + if averaging_time == "monthly": file_list, total_size = create_monthly_aod_list(satellite, dates, fs) - elif averaging_time == 'weekly': + elif averaging_time == "weekly": file_list, total_size = create_weekly_aod_list(satellite, dates, fs) else: file_list, total_size = create_daily_aod_list(data_resolution, satellite, dates, fs) @@ -246,8 +312,8 @@ def open_mfdataset(dates, satellite, data_resolution='0.1', averaging_time='dail print(file_list) aws_files = [fs.open(f) for f in file_list] - dset = xr.open_mfdataset(aws_files, concat_dim={'time': dates}, combine='nested') + dset = xr.open_mfdataset(aws_files, concat_dim={"time": dates}, combine="nested") - dset['time'] = dates + dset["time"] = dates return dset From 29c6a1cbb267d96d8f9697061721c1c161cfc18b Mon Sep 17 00:00:00 2001 From: Barry Baker Date: Wed, 13 Mar 2024 13:12:39 -0400 Subject: [PATCH 10/49] update __init__.py --- monetio/sat/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/monetio/sat/__init__.py b/monetio/sat/__init__.py index 195280c6..a8606abc 100644 --- a/monetio/sat/__init__.py +++ b/monetio/sat/__init__.py @@ -4,9 +4,9 @@ _omps_nadir_mm, goes, modis_ornl, + nesdis_eps_viirs_aod_nrt, nesdis_frp, nesdis_viirs_aod_aws_gridded, - nesdis_viirs_aod_nrt, ) __all__ = [ @@ -14,7 +14,7 @@ "_modis_l2_mm", "_omps_nadir_mm", "nesdis_viirs_aod_aws_gridded", - "nesdis_viirs_aod_nrt", + "nesdis_eps_viirs_aod_nrt", "nesdis_frp", "modis_ornl", "goes", From c53bc29b6cf59d4a7cb0622727f16c731ff4c9d8 Mon Sep 17 00:00:00 2001 From: Barry Baker Date: Wed, 13 Mar 2024 13:31:44 -0400 Subject: [PATCH 11/49] format attempt fix --- monetio/sat/nesdis_eps_viirs_aod_nrt.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/monetio/sat/nesdis_eps_viirs_aod_nrt.py b/monetio/sat/nesdis_eps_viirs_aod_nrt.py index c3cee0e9..074523ca 100644 --- a/monetio/sat/nesdis_eps_viirs_aod_nrt.py +++ b/monetio/sat/nesdis_eps_viirs_aod_nrt.py @@ -9,10 +9,8 @@ def build_urls(dates, *, daily=True, res=0.1, sat="noaa20"): Parameters ---------- - dates : pd.DatetimeIndex or iterable of datetime - Dates to download data for. - daily : bool, optional - Whether to download daily (default) or sub-daily data. + dates : pd.DatetimeIndex or iterable of datetime Dates to download data for. + daily : bool, optional Whether to download daily (default) or sub-daily data. res : float, optional Resolution of data in km, only used for sub-daily data. sat : str, optional From c7216c70f4af32ccb5a1835cbb0fd9c133f2d2e6 Mon Sep 17 00:00:00 2001 From: Barry Baker Date: Wed, 13 Mar 2024 13:33:54 -0400 Subject: [PATCH 12/49] format fix --- monetio/sat/nesdis_eps_viirs_aod_nrt.py | 34 +++++++++++++------------ 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/monetio/sat/nesdis_eps_viirs_aod_nrt.py b/monetio/sat/nesdis_eps_viirs_aod_nrt.py index 074523ca..f1747d00 100644 --- a/monetio/sat/nesdis_eps_viirs_aod_nrt.py +++ b/monetio/sat/nesdis_eps_viirs_aod_nrt.py @@ -7,23 +7,25 @@ def build_urls(dates, *, daily=True, res=0.1, sat="noaa20"): """Construct URLs for downloading NEPS data. - Parameters - ---------- - dates : pd.DatetimeIndex or iterable of datetime Dates to download data for. - daily : bool, optional Whether to download daily (default) or sub-daily data. + Parameters + ---------- + dates : pd.DatetimeIndex or iterable of datetime + Dates to download data for. + daily : bool, optional + Whether to download daily (default) or sub-daily data. res : float, optional - Resolution of data in km, only used for sub-daily data. - sat : str, optional - Satellite platform, only used for sub-daily data. - - Returns - ------- - pd.Series - Series with URLs and corresponding file names. - - Notes - ----- - The `res` and `sat` parameters are only used for sub-daily data. + Resolution of data in km, only used for sub-daily data. + sat : str, optional + Satellite platform, only used for sub-daily data. + + Returns + ------- + pd.Series + Series with URLs and corresponding file names. + + Notes + ----- + The `res` and `sat` parameters are only used for sub-daily data. """ from collections.abc import Iterable From 1674be176e5b2cab9b9392193c4b861f83182808 Mon Sep 17 00:00:00 2001 From: zmoon Date: Thu, 14 Mar 2024 16:34:22 -0600 Subject: [PATCH 13/49] Add h5netcdf to dev env --- environment-dev.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/environment-dev.yml b/environment-dev.yml index 9f03be24..f9fc97c5 100644 --- a/environment-dev.yml +++ b/environment-dev.yml @@ -15,6 +15,7 @@ dependencies: - xarray # # optional + - h5netcdf - joblib - lxml - pyhdf <0.11 From fee81d658bb76cf216053433a6d3b39d8b6d0a9c Mon Sep 17 00:00:00 2001 From: zmoon Date: Thu, 14 Mar 2024 16:36:32 -0600 Subject: [PATCH 14/49] Add initial test for gridded VIIRS AOD --- tests/test_viirs_aod.py | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 tests/test_viirs_aod.py diff --git a/tests/test_viirs_aod.py b/tests/test_viirs_aod.py new file mode 100644 index 00000000..32fe3273 --- /dev/null +++ b/tests/test_viirs_aod.py @@ -0,0 +1,10 @@ +from monetio.sat.nesdis_viirs_aod_aws_gridded import open_dataset + + +def test_open_dataset(): + date = "2020-01-01" # a date when we have both + ds = open_dataset(date, "SNPP") + assert tuple(ds.dims) == ("time", "lat", "lon") + assert ds.sizes["time"] == 1 + assert ds.attrs["satellite_name"] == "NPP" + assert ds.attrs["spatial_resolution"].strip().startswith("0.1") From de7f76c19f10b7d5761a9f0c928d5b76b09dff1b Mon Sep 17 00:00:00 2001 From: zmoon Date: Thu, 14 Mar 2024 16:51:40 -0600 Subject: [PATCH 15/49] Satellite and resolution cases --- tests/test_viirs_aod.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/tests/test_viirs_aod.py b/tests/test_viirs_aod.py index 32fe3273..7c6b3a1d 100644 --- a/tests/test_viirs_aod.py +++ b/tests/test_viirs_aod.py @@ -1,10 +1,18 @@ +import pytest + from monetio.sat.nesdis_viirs_aod_aws_gridded import open_dataset -def test_open_dataset(): +@pytest.mark.parametrize("sat", ["SNPP", "NOAA20"]) +@pytest.mark.parametrize("res", [0.05, 0.1, 0.25]) +def test_open_dataset(sat, res): date = "2020-01-01" # a date when we have both - ds = open_dataset(date, "SNPP") - assert tuple(ds.dims) == ("time", "lat", "lon") + s_res = f"{res:.3f}" + + ds = open_dataset(date, sat, s_res) + assert set(ds.dims) == {"time", "lat", "lon"} assert ds.sizes["time"] == 1 - assert ds.attrs["satellite_name"] == "NPP" - assert ds.attrs["spatial_resolution"].strip().startswith("0.1") + assert ds.sizes["lat"] == int(180 / res) + assert ds.sizes["lon"] == int(360 / res) + assert ds.attrs["satellite_name"] == "NPP" if sat == "NPP" else "NOAA 20" + assert ds.attrs["spatial_resolution"].strip().startswith(str(res)) From 57e74492aa9f7c612e28cbbd90915ab11280a121 Mon Sep 17 00:00:00 2001 From: zmoon Date: Thu, 14 Mar 2024 16:59:52 -0600 Subject: [PATCH 16/49] Fix satellite name check --- tests/test_viirs_aod.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_viirs_aod.py b/tests/test_viirs_aod.py index 7c6b3a1d..454e2a8e 100644 --- a/tests/test_viirs_aod.py +++ b/tests/test_viirs_aod.py @@ -6,7 +6,7 @@ @pytest.mark.parametrize("sat", ["SNPP", "NOAA20"]) @pytest.mark.parametrize("res", [0.05, 0.1, 0.25]) def test_open_dataset(sat, res): - date = "2020-01-01" # a date when we have both + date = "2020-01-01" # a date when we have both SNPP and NOAA-20 data available s_res = f"{res:.3f}" ds = open_dataset(date, sat, s_res) @@ -14,5 +14,5 @@ def test_open_dataset(sat, res): assert ds.sizes["time"] == 1 assert ds.sizes["lat"] == int(180 / res) assert ds.sizes["lon"] == int(360 / res) - assert ds.attrs["satellite_name"] == "NPP" if sat == "NPP" else "NOAA 20" + assert ds.attrs["satellite_name"] == ("NPP" if sat == "SNPP" else "NOAA 20") assert ds.attrs["spatial_resolution"].strip().startswith(str(res)) From 20da100951948c2f1e6eabada2aa728ed97606a4 Mon Sep 17 00:00:00 2001 From: Barry Baker Date: Mon, 18 Mar 2024 13:35:18 -0400 Subject: [PATCH 17/49] Update monetio/sat/nesdis_viirs_aod_aws_gridded.py Co-authored-by: Zachary Moon --- monetio/sat/nesdis_viirs_aod_aws_gridded.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/monetio/sat/nesdis_viirs_aod_aws_gridded.py b/monetio/sat/nesdis_viirs_aod_aws_gridded.py index c0f0914d..4cffbdbe 100644 --- a/monetio/sat/nesdis_viirs_aod_aws_gridded.py +++ b/monetio/sat/nesdis_viirs_aod_aws_gridded.py @@ -207,7 +207,7 @@ def open_dataset( Opens a dataset for the given date, satellite, data resolution, and averaging time. Parameters: - date (str or datetime.datetime): The date for which to open the dataset. + date (str or datetime-like): The date for which to open the dataset. satellite (str): The satellite to retrieve data from. Valid values are 'SNPP', 'NOAA20', or 'both'. data_resolution (str, optional): The data resolution. Valid values are '0.050', '0.100', or '0.250'. Defaults to '0.1'. averaging_time (str, optional): The averaging time. Valid values are 'daily', 'weekly', or 'monthly'. Defaults to 'daily'. From e8d651508066ab170a80e5d1504ba71b07de273b Mon Sep 17 00:00:00 2001 From: zmoon Date: Mon, 18 Mar 2024 14:15:15 -0600 Subject: [PATCH 18/49] Skip VIIRS test on 3.6 since s3fs fails to import --- tests/test_viirs_aod.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/test_viirs_aod.py b/tests/test_viirs_aod.py index 454e2a8e..d6bbc8ac 100644 --- a/tests/test_viirs_aod.py +++ b/tests/test_viirs_aod.py @@ -1,7 +1,12 @@ +import sys + import pytest from monetio.sat.nesdis_viirs_aod_aws_gridded import open_dataset +if sys.version_info < (3, 7): + pytest.skip("s3fs requires Python 3.7+", allow_module_level=True) + @pytest.mark.parametrize("sat", ["SNPP", "NOAA20"]) @pytest.mark.parametrize("res", [0.05, 0.1, 0.25]) From d2c6fa40e0b67e6083bbc25352621134e1ff5f7f Mon Sep 17 00:00:00 2001 From: Barry Baker Date: Tue, 19 Mar 2024 13:44:18 -0400 Subject: [PATCH 19/49] adding error checking and other recommended fixes --- monetio/__init__.py | 1 + monetio/sat/__init__.py | 2 + monetio/sat/nesdis_avhrr_aot_aws_gridded.py | 139 ++++++++ monetio/sat/nesdis_eps_viirs_aod_nrt.py | 70 +++- monetio/sat/nesdis_viirs_aod_aws_gridded.py | 334 +++++++++---------- monetio/sat/nesdis_viirs_ndvi_aws_gridded.py | 104 ++++++ 6 files changed, 460 insertions(+), 190 deletions(-) create mode 100644 monetio/sat/nesdis_avhrr_aot_aws_gridded.py create mode 100644 monetio/sat/nesdis_viirs_ndvi_aws_gridded.py diff --git a/monetio/__init__.py b/monetio/__init__.py index e6356f4f..5023ddca 100644 --- a/monetio/__init__.py +++ b/monetio/__init__.py @@ -40,6 +40,7 @@ "goes", "nesdis_eps_viirs_aod_nrt", "nesdis_viirs_aod_aws_gridded", + "nesdis_avhrr_aot_aws_gridded", # # models "camx", diff --git a/monetio/sat/__init__.py b/monetio/sat/__init__.py index a8606abc..e050a041 100644 --- a/monetio/sat/__init__.py +++ b/monetio/sat/__init__.py @@ -4,6 +4,7 @@ _omps_nadir_mm, goes, modis_ornl, + nesdis_avhrr_aot_aws_gridded, nesdis_eps_viirs_aod_nrt, nesdis_frp, nesdis_viirs_aod_aws_gridded, @@ -14,6 +15,7 @@ "_modis_l2_mm", "_omps_nadir_mm", "nesdis_viirs_aod_aws_gridded", + "nesdis_avhrr_aot_aws_gridded", "nesdis_eps_viirs_aod_nrt", "nesdis_frp", "modis_ornl", diff --git a/monetio/sat/nesdis_avhrr_aot_aws_gridded.py b/monetio/sat/nesdis_avhrr_aot_aws_gridded.py new file mode 100644 index 00000000..09194c8b --- /dev/null +++ b/monetio/sat/nesdis_avhrr_aot_aws_gridded.py @@ -0,0 +1,139 @@ +def create_daily_aod_list(date_generated, fs, fail_on_error=True): + """ + Creates a list of daily AOD (Aerosol Optical Depth) files and calculates the total size of the files. + + Parameters: + - date_generated (list): A list of dates for which to check the existence of AOD files. + - fs (FileSystem): The file system object used to check file existence and size. + + Returns: + - nodd_file_list (list): A list of paths to the existing AOD files. + - nodd_total_size (int): The total size of the existing AOD files in bytes. + """ + # Loop through observation dates & check for files + nodd_file_list = [] + nodd_total_size = 0 + for date in date_generated: + file_date = date.strftime("%Y%m%d") + year = file_date[:4] + prod_path = "noaa-cdr-aerosol-optical-thickness-pds/data/daily/" + year + "/" + file_name = fs.glob(prod_path + "AOT_AVHRR_*_daily-avg_" + file_date + "_*.nc") + # If file exists, add path to list and add file size to total + print(file_name) + if fs.exists(file_name[0]) is True: + nodd_file_list.append(file_name[0]) + nodd_total_size = nodd_total_size + fs.size(file_name[0]) + return nodd_file_list, nodd_total_size + + +def create_monthly_aod_list(date_generated, fs): + """ + Creates a list of daily AOD (Aerosol Optical Depth) files and calculates the total size of the files. + + Parameters: + - data_resolution (str): The resolution of the AOD data. + - satellite (str): The satellite name. Can be 'both', 'SNPP', or 'NOAA20'. + - date_generated (list): A list of dates for which to check the existence of AOD files. + - fs (FileSystem): The file system object used to check file existence and size. + + Returns: + - nodd_file_list (list): A list of paths to the existing AOD files. + - nodd_total_size (int): The total size of the existing AOD files in bytes. + """ + # Loop through observation dates & check for files + nodd_file_list = [] + nodd_total_size = 0 + for date in date_generated: + file_date = date.strftime("%Y%m%d") + year = file_date[:4] + prod_path = "noaa-cdr-aerosol-optical-thickness-pds/data/daily/" + year + "/" + file_name = fs.glob(prod_path + "AOT_AVHRR_*_daily-avg_" + file_date + "_*.nc") + # If file exists, add path to list and add file size to total + if fs.exists(file_name[0]) is True: + nodd_file_list.append(file_name[0]) + nodd_total_size = nodd_total_size + fs.size(file_name[0]) + return nodd_file_list, nodd_total_size + + +def open_dataset(date, averaging_time="daily", download=False, save_path="./"): + """ + Opens a dataset for the given date, satellite, data resolution, and averaging time. + + Parameters: + date (str or datetime.datetime): The date for which to open the dataset. + averaging_time (str, optional): The averaging time. Valid values are 'daily', 'weekly', or 'monthly'. Defaults to 'daily'. + + Returns: + xarray.Dataset: The opened dataset. + + Raises: + ValueError: If the input values are invalid. + """ + import pandas as pd + import s3fs + import xarray as xr + + if isinstance(date, str): + date_generated = [pd.Timestamp(date)] + else: + date_generated = [date] + + # Access AWS using anonymous credentials + fs = s3fs.S3FileSystem(anon=True) + + if averaging_time == "monthly": + file_list, _ = create_monthly_aod_list(date_generated, fs) + else: + file_list, _ = create_daily_aod_list(date_generated, fs) + + aws_file = fs.open(file_list[0]) + + dset = xr.open_dataset(aws_file) + + # add datetime + # dset = dset.expand_dims(time=date_generated) + + return dset + + +def open_mfdataset(dates, averaging_time="daily", download=False, save_path="./"): + """ + Opens and combines multiple NetCDF files into a single xarray dataset. + + Parameters: + dates (pandas.DatetimeIndex): The dates for which to retrieve the data. + satellite (str): The satellite name. Valid values are 'SNPP', 'NOAA20', or 'both'. + data_resolution (str, optional): The data resolution. Valid values are '0.050', '0.100', or '0.250'. Defaults to '0.1'. + averaging_time (str, optional): The averaging time. Valid values are 'daily', 'weekly', or 'monthly'. Defaults to 'daily'. + download (bool, optional): Whether to download the data from AWS. Defaults to False. + save_path (str, optional): The path to save the downloaded data. Defaults to './'. + + Returns: + xarray.Dataset: The combined dataset containing the data for the specified dates. + + Raises: + ValueError: If the input parameters are invalid. + + """ + import pandas as pd + import s3fs + import xarray as xr + + if not isinstance(dates, pd.DatetimeIndex): + raise ValueError("Expecting pandas.DatetimeIndex for 'dates' parameter.") + + # Access AWS using anonymous credentials + fs = s3fs.S3FileSystem(anon=True) + + if averaging_time == "monthly": + file_list, total_size = create_monthly_aod_list(dates, fs) + else: + file_list, total_size = create_daily_aod_list(dates, fs) + + aws_files = [fs.open(f) for f in file_list] + + dset = xr.open_mfdataset(aws_files, concat_dim={"time": dates}, combine="nested") + + dset["time"] = dates + + return dset diff --git a/monetio/sat/nesdis_eps_viirs_aod_nrt.py b/monetio/sat/nesdis_eps_viirs_aod_nrt.py index f1747d00..ee0631a6 100644 --- a/monetio/sat/nesdis_eps_viirs_aod_nrt.py +++ b/monetio/sat/nesdis_eps_viirs_aod_nrt.py @@ -60,6 +60,19 @@ def build_urls(dates, *, daily=True, res=0.1, sat="noaa20"): return urls, fnames +def check_remote_file_exists(file_url): + import requests + + r = requests.head(file_url, stream=True, verify=False) + + if r.status_code == 200: + _ = next(r.iter_content(10)) + return True + else: + print(f"HTTP Error {r.status_code} - {r.reason}") + return False + + def retrieve(url, fname): """Download files from the airnowtech S3 server. @@ -91,7 +104,7 @@ def retrieve(url, fname): print("\n File Exists: " + fname) -def open_dataset(datestr, sat="noaa20", res=0.1, daily=True, add_timestamp=True): +def open_dataset(datestr, satellite="noaa20", res=0.1, daily=True, add_timestamp=True): import pandas as pd import xarray as xr @@ -99,19 +112,30 @@ def open_dataset(datestr, sat="noaa20", res=0.1, daily=True, add_timestamp=True) d = pd.to_datetime(datestr) else: d = datestr - if sat.lower() == "noaa20": - sat = "noaa20" - else: - sat = "snpp" + + try: + if satellite.lower() not in ("noaa20", "snpp"): + raise ValueError + elif satellite.lower() == "noaa20": + sat = "noaa20" + else: + sat = "snpp" + except ValueError: + print("Invalid input for 'sat': Valid values are 'noaa20' or 'snpp'") # if (res != 0.1) or (res != 0.25): # res = 0.1 # assume resolution is 0.1 if wrong value supplied urls, fnames = build_urls(d, sat=sat, res=res, daily=daily) - url = urls.values[0] fname = fnames.values[0] + try: + if check_remote_file_exists(url) is False: + raise ValueError + except ValueError: + print("File does not exist on NOAA HTTPS server.", url) + return ValueError retrieve(url, fname) dset = xr.open_dataset(fname) @@ -123,24 +147,38 @@ def open_dataset(datestr, sat="noaa20", res=0.1, daily=True, add_timestamp=True) return dset -def open_mfdataset(datestr, sat="noaa20", res=0.1, daily=True, add_timestamp=True): +def open_mfdataset(dates, satellite="noaa20", res=0.1, daily=True): import pandas as pd import xarray as xr - if isinstance(datestr, pd.DatetimeIndex) is False: + try: + if isinstance(dates, pd.DatetimeIndex): + d = dates + else: + raise TypeError + except TypeError: print("Please provide a pandas.DatetimeIndex") - exit - else: - d = datestr - - if sat.lower() == "noaa20": - sat = "noaa20" - else: - sat = "snpp" + return + + try: + if satellite.lower() not in ("noaa20", "snpp"): + raise ValueError + elif satellite.lower() == "noaa20": + sat = "noaa20" + else: + sat = "snpp" + except ValueError: + print("Invalid input for 'sat': Valid values are 'noaa20' or 'snpp'") urls, fnames = build_urls(d, sat=sat, res=res, daily=daily) for url, fname in zip(urls, fnames): + try: + if check_remote_file_exists(url) is False: + raise ValueError + except ValueError: + print("File does not exist on NOAA HTTPS server.", url) + return retrieve(url, fname) dset = xr.open_mfdataset(fnames, combine="nested", concat_dim={"time": d}) diff --git a/monetio/sat/nesdis_viirs_aod_aws_gridded.py b/monetio/sat/nesdis_viirs_aod_aws_gridded.py index c0f0914d..5c8e71c2 100644 --- a/monetio/sat/nesdis_viirs_aod_aws_gridded.py +++ b/monetio/sat/nesdis_viirs_aod_aws_gridded.py @@ -1,4 +1,4 @@ -def create_daily_aod_list(data_resolution, satellite, date_generated, fs): +def create_daily_aod_list(data_resolution, satellite, date_generated, fs, warning=False): """ Creates a list of daily AOD (Aerosol Optical Depth) files and calculates the total size of the files. @@ -12,73 +12,51 @@ def create_daily_aod_list(data_resolution, satellite, date_generated, fs): - nodd_file_list (list): A list of paths to the existing AOD files. - nodd_total_size (int): The total size of the existing AOD files in bytes. """ + import warnings + # Loop through observation dates & check for files nodd_file_list = [] nodd_total_size = 0 for date in date_generated: file_date = date.strftime("%Y%m%d") year = file_date[:4] - if satellite == "both": - sat_list = ["npp", "noaa20"] - for sat_name in sat_list: - file_name = ( - "viirs_eps_" - + sat_name - + "_aod_" - + data_resolution - + "_deg_" - + file_date - + ".nc" - ) - if sat_name == "npp": - prod_path = ( - "noaa-jpss/SNPP/VIIRS/SNPP_VIIRS_Aerosol_Optical_Depth_Gridded_Reprocessed/" - + data_resolution[:4] - + "_Degrees_Daily/" - + year - + "/" - ) - elif sat_name == "noaa20": - prod_path = ( - "noaa-jpss/NOAA20/VIIRS/NOAA20_VIIRS_Aerosol_Optical_Depth_Gridded_Reprocessed/" - + data_resolution[:4] - + "_Degrees_Daily/" - + year - + "/" - ) - # If file exists, add path to list and add file size to total - if fs.exists(prod_path + file_name) is True: - nodd_file_list.extend(fs.ls(prod_path + file_name)) - nodd_total_size = nodd_total_size + fs.size(prod_path + file_name) - else: - if satellite == "SNPP": - sat_name = "npp" - elif satellite == "NOAA20": - sat_name = "noaa20" - file_name = ( - "viirs_eps_" + sat_name + "_aod_" + data_resolution + "_deg_" + file_date + ".nc" - ) - prod_path = ( - "noaa-jpss/" - + satellite - + "/VIIRS/" - + satellite - + "_VIIRS_Aerosol_Optical_Depth_Gridded_Reprocessed/" - + data_resolution[:4] - + "_Degrees_Daily/" - + year - + "/" - ) - # If file exists, add path to list and add file size to total + + if satellite == "SNPP": + sat_name = "npp" + elif satellite == "NOAA20": + sat_name = "noaa20" + file_name = ( + "viirs_eps_" + sat_name + "_aod_" + data_resolution + "_deg_" + file_date + ".nc" + ) + prod_path = ( + "noaa-jpss/" + + satellite + + "/VIIRS/" + + satellite + + "_VIIRS_Aerosol_Optical_Depth_Gridded_Reprocessed/" + + data_resolution[:4] + + "_Degrees_Daily/" + + year + + "/" + ) + # If file exists, add path to list and add file size to total + try: if fs.exists(prod_path + file_name) is True: nodd_file_list.extend(fs.ls(prod_path + file_name)) nodd_total_size = nodd_total_size + fs.size(prod_path + file_name) + elif warning: + warnings.warn("File does not exist on AWS: " + prod_path + file_name) + else: + raise ValueError + except ValueError: + print("File does not exist on AWS: " + prod_path + file_name) + return [], 0 return nodd_file_list, nodd_total_size # Create list of available monthly data file paths & total size of files -def create_monthly_aod_list(satellite, date_generated, fs): +def create_monthly_aod_list(satellite, date_generated, fs, warning=False): """ Creates a list of monthly AOD (Aerosol Optical Depth) files for a given satellite and date range. @@ -91,6 +69,8 @@ def create_monthly_aod_list(satellite, date_generated, fs): tuple: A tuple containing the list of file paths and the total size of the files. """ + import warnings + # Loop through observation dates & check for files nodd_file_list = [] nodd_total_size = 0 @@ -100,41 +80,36 @@ def create_monthly_aod_list(satellite, date_generated, fs): year_month = file_date[:6] if year_month not in year_month_list: year_month_list.append(year_month) - if satellite == "both": - sat_list = ["snpp", "noaa20"] - for sat_name in sat_list: - file_name = "viirs_aod_monthly_" + sat_name + "_0.250_deg_" + year_month + ".nc" - if sat_name == "snpp": - prod_path = "noaa-jpss/SNPP/VIIRS/SNPP_VIIRS_Aerosol_Optical_Depth_Gridded_Reprocessed/0.25_Degrees_Monthly/" - elif sat_name == "noaa20": - prod_path = "noaa-jpss/NOAA20/VIIRS/NOAA20_VIIRS_Aerosol_Optical_Depth_Gridded_Reprocessed/0.25_Degrees_Monthly/" - # If file exists, add path to list and add file size to total - if fs.exists(prod_path + file_name) is True: - nodd_file_list.extend(fs.ls(prod_path + file_name)) - nodd_total_size = nodd_total_size + fs.size(prod_path + file_name) - else: - if satellite == "SNPP": - sat_name = "snpp" - elif satellite == "NOAA20": - sat_name = "noaa20" - file_name = "viirs_aod_monthly_" + sat_name + "_0.250_deg_" + year_month + ".nc" - prod_path = ( - "noaa-jpss/" - + satellite - + "/VIIRS/" - + satellite - + "_VIIRS_Aerosol_Optical_Depth_Gridded_Reprocessed/0.25_Degrees_Monthly/" - ) - # If file exists, add path to list and add file size to total + + if satellite == "SNPP": + sat_name = "snpp" + elif satellite == "NOAA20": + sat_name = "noaa20" + file_name = "viirs_aod_monthly_" + sat_name + "_0.250_deg_" + year_month + ".nc" + prod_path = ( + "noaa-jpss/" + + satellite + + "/VIIRS/" + + satellite + + "_VIIRS_Aerosol_Optical_Depth_Gridded_Reprocessed/0.25_Degrees_Monthly/" + ) + # If file exists, add path to list and add file size to total + try: if fs.exists(prod_path + file_name) is True: nodd_file_list.extend(fs.ls(prod_path + file_name)) nodd_total_size = nodd_total_size + fs.size(prod_path + file_name) + elif warning: + warnings.warn("File does not exist on AWS: " + prod_path + file_name) + else: + raise ValueError("File does not exist on AWS: " + prod_path + file_name) + except ValueError: + print("A error has occured:") return nodd_file_list, nodd_total_size # Create list of available weekly data file paths & total size of files -def create_weekly_aod_list(satellite, date_generated, fs): +def create_weekly_aod_list(satellite, date_generated, fs, warning=False): """ Creates a list of files and calculates the total size of files for a given satellite, observation dates, and file system. @@ -152,62 +127,37 @@ def create_weekly_aod_list(satellite, date_generated, fs): for date in date_generated: file_date = date.strftime("%Y%m%d") year = file_date[:4] - if satellite == "both": - sat_list = ["SNPP", "NOAA20"] - for sat_name in sat_list: - prod_path = ( - "noaa-jpss/" - + sat_name - + "/VIIRS/" - + sat_name - + "_VIIRS_Aerosol_Optical_Depth_Gridded_Reprocessed/0.25_Degrees_Weekly/" - + year - + "/" - ) - # Get list of all files in given year on NODD - all_files = fs.ls(prod_path) - # Loop through files, check if file date falls within observation date range - for file in all_files: - file_start = file.split("/")[-1].split("_")[7].split(".")[0].split("-")[0] - file_end = file.split("/")[-1].split("_")[7].split(".")[0].split("-")[1] - # If file within observation range, add path to list and add file size to total - if file_date >= file_start and file_date <= file_end: - if file not in nodd_file_list: - nodd_file_list.append(file) - nodd_total_size = nodd_total_size + fs.size(file) - else: - prod_path = ( - "noaa-jpss/" - + satellite - + "/VIIRS/" - + satellite - + "_VIIRS_Aerosol_Optical_Depth_Gridded_Reprocessed/0.25_Degrees_Weekly/" - + year - + "/" - ) - # Get list of all files in given year on NODD - all_files = fs.ls(prod_path) - # Loop through files, check if file date falls within observation date range - for file in all_files: - file_start = file.split("/")[-1].split("_")[7].split(".")[0].split("-")[0] - file_end = file.split("/")[-1].split("_")[7].split(".")[0].split("-")[1] - # If file within observation range, add path to list and add file size to total - if file_date >= file_start and file_date <= file_end: - if file not in nodd_file_list: - nodd_file_list.append(file) - nodd_total_size = nodd_total_size + fs.size(file) + + prod_path = ( + "noaa-jpss/" + + satellite + + "/VIIRS/" + + satellite + + "_VIIRS_Aerosol_Optical_Depth_Gridded_Reprocessed/0.25_Degrees_Weekly/" + + year + + "/" + ) + # Get list of all files in given year on NODD + all_files = fs.ls(prod_path) + # Loop through files, check if file date falls within observation date range + for file in all_files: + file_start = file.split("/")[-1].split("_")[7].split(".")[0].split("-")[0] + file_end = file.split("/")[-1].split("_")[7].split(".")[0].split("-")[1] + # If file within observation range, add path to list and add file size to total + if file_date >= file_start and file_date <= file_end: + if file not in nodd_file_list: + nodd_file_list.append(file) + nodd_total_size = nodd_total_size + fs.size(file) return nodd_file_list, nodd_total_size -def open_dataset( - date, satellite, data_resolution="0.1", averaging_time="daily", download=False, save_path="./" -): +def open_dataset(date, satellite="SNPP", data_resolution=0.1, averaging_time="daily"): """ Opens a dataset for the given date, satellite, data resolution, and averaging time. Parameters: - date (str or datetime.datetime): The date for which to open the dataset. + date (str or datetime-like): The date for which to open the dataset. satellite (str): The satellite to retrieve data from. Valid values are 'SNPP', 'NOAA20', or 'both'. data_resolution (str, optional): The data resolution. Valid values are '0.050', '0.100', or '0.250'. Defaults to '0.1'. averaging_time (str, optional): The averaging time. Valid values are 'daily', 'weekly', or 'monthly'. Defaults to 'daily'. @@ -222,17 +172,20 @@ def open_dataset( import s3fs import xarray as xr - if satellite not in ("SNPP", "NOAA20", "both"): - print( - "Invalid input for 'satellite': Valid values are 'SNPP', 'NOAA20', 'both'. Setting default to SNPP" - ) - satellite = "SNPP" - - if data_resolution not in ("0.050", "0.100", "0.250"): - print( - "Invalid input data_resolution. Valid values are '0.050', '0.100', '0.250'. Setting default to 0.1" - ) - data_resolution = "0.100" + try: + if satellite not in ("SNPP", "NOAA20"): + raise ValueError + except ValueError: + print('Invalid input for "satellite": Valid values are "SNPP" or "NOAA20"') + return + + data_resolution = str(data_resolution).ljust(5, "0") + try: + if data_resolution not in ("0.050", "0.100", "0.250"): + raise ValueError + except ValueError: + print('Invalid input for "data_resolution": Valid values are "0.050", "0.100", or "0.250"') + return if isinstance(date, str): date_generated = [pd.Timestamp(date)] @@ -242,14 +195,28 @@ def open_dataset( # Access AWS using anonymous credentials fs = s3fs.S3FileSystem(anon=True) - if averaging_time == "monthly": - file_list, _ = create_monthly_aod_list(satellite, date_generated, fs) - elif averaging_time == "weekly": - file_list, _ = create_weekly_aod_list(satellite, date_generated, fs) - else: - file_list, _ = create_daily_aod_list(data_resolution, satellite, date_generated, fs) - - aws_file = fs.open(file_list[0]) + try: + if averaging_time.lower() == "monthly": + file_list, _ = create_monthly_aod_list(satellite, date_generated, fs) + elif averaging_time.lower() == "weekly": + file_list, _ = create_weekly_aod_list(satellite, date_generated, fs) + elif averaging_time.lower() == "daily": + file_list, _ = create_daily_aod_list(data_resolution, satellite, date_generated, fs) + else: + raise ValueError + except ValueError: + print( + "Invalid input for 'averaging_time': Valid values are 'daily', 'weekly', or 'monthly'" + ) + return + try: + if len(file_list) == 0: + raise ValueError + else: + aws_file = fs.open(file_list[0]) + except ValueError: + print("Files not available for product and date:", date_generated[0]) + return dset = xr.open_dataset(aws_file) @@ -259,9 +226,7 @@ def open_dataset( return dset -def open_mfdataset( - dates, satellite, data_resolution="0.1", averaging_time="daily", download=False, save_path="./" -): +def open_mfdataset(dates, satellite="SNPP", data_resolution=0.1, averaging_time="daily"): """ Opens and combines multiple NetCDF files into a single xarray dataset. @@ -284,33 +249,54 @@ def open_mfdataset( import s3fs import xarray as xr - if satellite not in ("SNPP", "NOAA20", "both"): - print( - "Invalid input for 'satellite': Valid values are 'SNPP', 'NOAA20', 'both'. Setting default to SNPP" - ) - satellite = "SNPP" - - if data_resolution not in ("0.050", "0.100", "0.250"): - print( - "Invalid input for data_resolution. Valid values are '0.050', '0.100', '0.250'. Setting default to 0.1" - ) - data_resolution = "0.1" - - if not isinstance(dates, pd.DatetimeIndex): - raise ValueError("Expecting pandas.DatetimeIndex for 'dates' parameter.") + try: + if satellite not in ("SNPP", "NOAA20"): + raise ValueError + except ValueError: + print('Invalid input for "satellite": Valid values are "SNPP" or "NOAA20"') + + data_resolution = str(data_resolution).ljust(5, "0") + try: + if data_resolution not in ("0.050", "0.100", "0.250"): + raise ValueError + except ValueError: + print('Invalid input for "data_resolution": Valid values are "0.050", "0.100", or "0.250"') + return + + try: + if not isinstance(dates, pd.DatetimeIndex): + raise ValueError("Expecting pandas.DatetimeIndex for 'dates' parameter.") + except ValueError: + print("Invalid input for 'dates': Expecting pandas.DatetimeIndex") + return # Access AWS using anonymous credentials fs = s3fs.S3FileSystem(anon=True) - if averaging_time == "monthly": - file_list, total_size = create_monthly_aod_list(satellite, dates, fs) - elif averaging_time == "weekly": - file_list, total_size = create_weekly_aod_list(satellite, dates, fs) - else: - file_list, total_size = create_daily_aod_list(data_resolution, satellite, dates, fs) - - print(file_list) - aws_files = [fs.open(f) for f in file_list] + try: + if averaging_time.lower() == "monthly": + file_list, _ = create_monthly_aod_list(satellite, dates, fs) + elif averaging_time.lower() == "weekly": + file_list, _ = create_weekly_aod_list(satellite, dates, fs) + elif averaging_time.lower() == "daily": + file_list, _ = create_daily_aod_list(data_resolution, satellite, dates, fs) + else: + raise ValueError + except ValueError: + print( + "Invalid input for 'averaging_time': Valid values are 'daily', 'weekly', or 'monthly'" + ) + return + + try: + if not file_list: + raise ValueError + aws_files = [] + for f in file_list: + aws_files.append(fs.open(f)) + except ValueError: + print("File not available for product and date") + return dset = xr.open_mfdataset(aws_files, concat_dim={"time": dates}, combine="nested") diff --git a/monetio/sat/nesdis_viirs_ndvi_aws_gridded.py b/monetio/sat/nesdis_viirs_ndvi_aws_gridded.py new file mode 100644 index 00000000..02a33f8f --- /dev/null +++ b/monetio/sat/nesdis_viirs_ndvi_aws_gridded.py @@ -0,0 +1,104 @@ +def create_daily_vhi_list(date_generated, fs, fail_on_error=True): + """ + Creates a list of daily vhi (Vegetative Health Index) files and calculates the total size of the files. + + Parameters: + - date_generated (list): A list of dates for which to check the existence of AOD files. + - fs (FileSystem): The file system object used to check file existence and size. + + Returns: + - nodd_file_list (list): A list of paths to the existing AOD files. + - nodd_total_size (int): The total size of the existing AOD files in bytes. + """ + # Loop through observation dates & check for files + nodd_file_list = [] + nodd_total_size = 0 + for date in date_generated: + file_date = date.strftime("%Y%m%d") + year = file_date[:4] + prod_path = "noaa-cdr-ndvi-pds/data/" + year + "/" + file_name = fs.glob(prod_path + "VIIRS-Land_*_" + file_date + "_*.nc") + # If file exists, add path to list and add file size to total + print(file_name) + if fs.exists(file_name[0]) is True: + nodd_file_list.append(file_name[0]) + nodd_total_size = nodd_total_size + fs.size(file_name[0]) + return nodd_file_list, nodd_total_size + + +def open_dataset(date, download=False, save_path="./"): + """ + Opens a dataset for the given date, satellite, data resolution, and averaging time. + + Parameters: + date (str or datetime.datetime): The date for which to open the dataset. + averaging_time (str, optional): The averaging time. Valid values are 'daily', 'weekly', or 'monthly'. Defaults to 'daily'. + + Returns: + xarray.Dataset: The opened dataset. + + Raises: + ValueError: If the input values are invalid. + """ + import pandas as pd + import s3fs + import xarray as xr + + if isinstance(date, str): + date_generated = [pd.Timestamp(date)] + else: + date_generated = [date] + + # Access AWS using anonymous credentials + fs = s3fs.S3FileSystem(anon=True) + + file_list, _ = create_daily_vhi_list(date_generated, fs) + + aws_file = fs.open(file_list[0]) + + dset = xr.open_dataset(aws_file) + + # add datetime + # dset = dset.expand_dims(time=date_generated) + + return dset + + +def open_mfdataset(dates, download=False, save_path="./"): + """ + Opens and combines multiple NetCDF files into a single xarray dataset. + + Parameters: + dates (pandas.DatetimeIndex): The dates for which to retrieve the data. + satellite (str): The satellite name. Valid values are 'SNPP', 'NOAA20', or 'both'. + data_resolution (str, optional): The data resolution. Valid values are '0.050', '0.100', or '0.250'. Defaults to '0.1'. + averaging_time (str, optional): The averaging time. Valid values are 'daily', 'weekly', or 'monthly'. Defaults to 'daily'. + download (bool, optional): Whether to download the data from AWS. Defaults to False. + save_path (str, optional): The path to save the downloaded data. Defaults to './'. + + Returns: + xarray.Dataset: The combined dataset containing the data for the specified dates. + + Raises: + ValueError: If the input parameters are invalid. + + """ + import pandas as pd + import s3fs + import xarray as xr + + if not isinstance(dates, pd.DatetimeIndex): + raise ValueError("Expecting pandas.DatetimeIndex for 'dates' parameter.") + + # Access AWS using anonymous credentials + fs = s3fs.S3FileSystem(anon=True) + + file_list, total_size = create_daily_vhi_list(dates, fs) + + aws_files = [fs.open(f) for f in file_list] + + dset = xr.open_mfdataset(aws_files, concat_dim={"time": dates}, combine="nested") + + dset["time"] = dates + + return dset From 95ec96594e254eca6f4b174f1109b3dc3f25234e Mon Sep 17 00:00:00 2001 From: Barry Baker Date: Tue, 19 Mar 2024 13:48:27 -0400 Subject: [PATCH 20/49] remove untracked files - accident --- monetio/__init__.py | 2 +- monetio/sat/__init__.py | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/monetio/__init__.py b/monetio/__init__.py index 5023ddca..08158d74 100644 --- a/monetio/__init__.py +++ b/monetio/__init__.py @@ -40,7 +40,7 @@ "goes", "nesdis_eps_viirs_aod_nrt", "nesdis_viirs_aod_aws_gridded", - "nesdis_avhrr_aot_aws_gridded", + # "nesdis_avhrr_aot_aws_gridded", # # models "camx", diff --git a/monetio/sat/__init__.py b/monetio/sat/__init__.py index e050a041..7692fb2b 100644 --- a/monetio/sat/__init__.py +++ b/monetio/sat/__init__.py @@ -1,10 +1,9 @@ -from . import ( +from . import ( # nesdis_avhrr_aot_aws_gridded, _gridded_eos_mm, _modis_l2_mm, _omps_nadir_mm, goes, modis_ornl, - nesdis_avhrr_aot_aws_gridded, nesdis_eps_viirs_aod_nrt, nesdis_frp, nesdis_viirs_aod_aws_gridded, @@ -15,7 +14,7 @@ "_modis_l2_mm", "_omps_nadir_mm", "nesdis_viirs_aod_aws_gridded", - "nesdis_avhrr_aot_aws_gridded", + # "nesdis_avhrr_aot_aws_gridded", "nesdis_eps_viirs_aod_nrt", "nesdis_frp", "modis_ornl", From 0e4f23ee040243427fff0e9dc0dc937a310b2225 Mon Sep 17 00:00:00 2001 From: Barry Baker Date: Tue, 19 Mar 2024 13:59:49 -0400 Subject: [PATCH 21/49] fix avhrr and ndvi --- monetio/__init__.py | 3 +- monetio/sat/__init__.py | 4 +- monetio/sat/nesdis_avhrr_aot_aws_gridded.py | 59 +++++++++++++++----- monetio/sat/nesdis_viirs_ndvi_aws_gridded.py | 44 +++++++++++---- 4 files changed, 84 insertions(+), 26 deletions(-) diff --git a/monetio/__init__.py b/monetio/__init__.py index 08158d74..0e68e10e 100644 --- a/monetio/__init__.py +++ b/monetio/__init__.py @@ -40,7 +40,8 @@ "goes", "nesdis_eps_viirs_aod_nrt", "nesdis_viirs_aod_aws_gridded", - # "nesdis_avhrr_aot_aws_gridded", + "nesdis_avhrr_aot_aws_gridded", + "nesdis_viirs_ndvi_aws_gridded", # # models "camx", diff --git a/monetio/sat/__init__.py b/monetio/sat/__init__.py index 7692fb2b..786a0d34 100644 --- a/monetio/sat/__init__.py +++ b/monetio/sat/__init__.py @@ -7,6 +7,7 @@ nesdis_eps_viirs_aod_nrt, nesdis_frp, nesdis_viirs_aod_aws_gridded, + nesdis_viirs_ndvi_aws_gridded, ) __all__ = [ @@ -14,7 +15,8 @@ "_modis_l2_mm", "_omps_nadir_mm", "nesdis_viirs_aod_aws_gridded", - # "nesdis_avhrr_aot_aws_gridded", + "nesdis_avhrr_aot_aws_gridded", + "nesdis_viirs_ndvi_aws_gridded", "nesdis_eps_viirs_aod_nrt", "nesdis_frp", "modis_ornl", diff --git a/monetio/sat/nesdis_avhrr_aot_aws_gridded.py b/monetio/sat/nesdis_avhrr_aot_aws_gridded.py index 09194c8b..a511cdc8 100644 --- a/monetio/sat/nesdis_avhrr_aot_aws_gridded.py +++ b/monetio/sat/nesdis_avhrr_aot_aws_gridded.py @@ -81,12 +81,25 @@ def open_dataset(date, averaging_time="daily", download=False, save_path="./"): # Access AWS using anonymous credentials fs = s3fs.S3FileSystem(anon=True) - if averaging_time == "monthly": - file_list, _ = create_monthly_aod_list(date_generated, fs) - else: - file_list, _ = create_daily_aod_list(date_generated, fs) - - aws_file = fs.open(file_list[0]) + try: + if averaging_time.lower() == "monthly": + file_list, _ = create_monthly_aod_list(date_generated, fs) + elif averaging_time.lower() == "daily": + file_list, _ = create_daily_aod_list(date_generated, fs) + else: + raise ValueError + except ValueError: + print("Invalid input for 'averaging_time': Valid values are 'daily' or 'monthly'") + return + + try: + if len(file_list) == 0: + raise ValueError + else: + aws_file = fs.open(file_list[0]) + except ValueError: + print("Files not available for product and date:", date_generated[0]) + return dset = xr.open_dataset(aws_file) @@ -119,18 +132,36 @@ def open_mfdataset(dates, averaging_time="daily", download=False, save_path="./" import s3fs import xarray as xr - if not isinstance(dates, pd.DatetimeIndex): - raise ValueError("Expecting pandas.DatetimeIndex for 'dates' parameter.") + try: + if not isinstance(dates, pd.DatetimeIndex): + raise ValueError("Expecting pandas.DatetimeIndex for 'dates' parameter.") + except ValueError: + print("Invalid input for 'dates': Expecting pandas.DatetimeIndex") + return # Access AWS using anonymous credentials fs = s3fs.S3FileSystem(anon=True) - if averaging_time == "monthly": - file_list, total_size = create_monthly_aod_list(dates, fs) - else: - file_list, total_size = create_daily_aod_list(dates, fs) - - aws_files = [fs.open(f) for f in file_list] + try: + if averaging_time.lower() == "monthly": + file_list, _ = create_monthly_aod_list(dates, fs) + elif averaging_time.lower() == "daily": + file_list, _ = create_daily_aod_list(dates, fs) + else: + raise ValueError + except ValueError: + print("Invalid input for 'averaging_time': Valid values are 'daily' or 'monthly'") + return + + try: + if not file_list: + raise ValueError + aws_files = [] + for f in file_list: + aws_files.append(fs.open(f)) + except ValueError: + print("File not available for product and date") + return dset = xr.open_mfdataset(aws_files, concat_dim={"time": dates}, combine="nested") diff --git a/monetio/sat/nesdis_viirs_ndvi_aws_gridded.py b/monetio/sat/nesdis_viirs_ndvi_aws_gridded.py index 02a33f8f..5d0bc90b 100644 --- a/monetio/sat/nesdis_viirs_ndvi_aws_gridded.py +++ b/monetio/sat/nesdis_viirs_ndvi_aws_gridded.py @@ -1,4 +1,4 @@ -def create_daily_vhi_list(date_generated, fs, fail_on_error=True): +def create_daily_vhi_list(date_generated, fs): """ Creates a list of daily vhi (Vegetative Health Index) files and calculates the total size of the files. @@ -19,14 +19,19 @@ def create_daily_vhi_list(date_generated, fs, fail_on_error=True): prod_path = "noaa-cdr-ndvi-pds/data/" + year + "/" file_name = fs.glob(prod_path + "VIIRS-Land_*_" + file_date + "_*.nc") # If file exists, add path to list and add file size to total - print(file_name) - if fs.exists(file_name[0]) is True: - nodd_file_list.append(file_name[0]) - nodd_total_size = nodd_total_size + fs.size(file_name[0]) + try: + if fs.exists(prod_path + file_name) is True: + nodd_file_list.extend(fs.ls(prod_path + file_name)) + nodd_total_size = nodd_total_size + fs.size(prod_path + file_name) + else: + raise ValueError + except ValueError: + print("File does not exist on AWS: " + prod_path + file_name) + return [], 0 return nodd_file_list, nodd_total_size -def open_dataset(date, download=False, save_path="./"): +def open_dataset(date): """ Opens a dataset for the given date, satellite, data resolution, and averaging time. @@ -54,7 +59,14 @@ def open_dataset(date, download=False, save_path="./"): file_list, _ = create_daily_vhi_list(date_generated, fs) - aws_file = fs.open(file_list[0]) + try: + if len(file_list) == 0: + raise ValueError + else: + aws_file = fs.open(file_list[0]) + except ValueError: + print("Files not available for product and date:", date_generated[0]) + return dset = xr.open_dataset(aws_file) @@ -87,15 +99,27 @@ def open_mfdataset(dates, download=False, save_path="./"): import s3fs import xarray as xr - if not isinstance(dates, pd.DatetimeIndex): - raise ValueError("Expecting pandas.DatetimeIndex for 'dates' parameter.") + try: + if not isinstance(dates, pd.DatetimeIndex): + raise ValueError("Expecting pandas.DatetimeIndex for 'dates' parameter.") + except ValueError: + print("Invalid input for 'dates': Expecting pandas.DatetimeIndex") + return # Access AWS using anonymous credentials fs = s3fs.S3FileSystem(anon=True) file_list, total_size = create_daily_vhi_list(dates, fs) - aws_files = [fs.open(f) for f in file_list] + try: + if not file_list: + raise ValueError + aws_files = [] + for f in file_list: + aws_files.append(fs.open(f)) + except ValueError: + print("File not available for product and date") + return dset = xr.open_mfdataset(aws_files, concat_dim={"time": dates}, combine="nested") From 28572b3c64ae302c5f4f2f6c1640447e6f7829b3 Mon Sep 17 00:00:00 2001 From: zmoon Date: Tue, 19 Mar 2024 13:07:55 -0600 Subject: [PATCH 22/49] Test input validation right now these don't pass --- monetio/sat/nesdis_viirs_aod_aws_gridded.py | 1 - tests/test_viirs_aod.py | 19 +++++++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/monetio/sat/nesdis_viirs_aod_aws_gridded.py b/monetio/sat/nesdis_viirs_aod_aws_gridded.py index 5c8e71c2..03cfc1f6 100644 --- a/monetio/sat/nesdis_viirs_aod_aws_gridded.py +++ b/monetio/sat/nesdis_viirs_aod_aws_gridded.py @@ -243,7 +243,6 @@ def open_mfdataset(dates, satellite="SNPP", data_resolution=0.1, averaging_time= Raises: ValueError: If the input parameters are invalid. - """ import pandas as pd import s3fs diff --git a/tests/test_viirs_aod.py b/tests/test_viirs_aod.py index d6bbc8ac..ff7c4912 100644 --- a/tests/test_viirs_aod.py +++ b/tests/test_viirs_aod.py @@ -21,3 +21,22 @@ def test_open_dataset(sat, res): assert ds.sizes["lon"] == int(360 / res) assert ds.attrs["satellite_name"] == ("NPP" if sat == "SNPP" else "NOAA 20") assert ds.attrs["spatial_resolution"].strip().startswith(str(res)) + + +def test_open_dataset_bad_input(): + with pytest.raises(ValueError, match="Invalid input"): + open_dataset("2020-01-01", satellite="GOES-16") + + with pytest.raises(ValueError, match="Invalid input"): + open_dataset("2020-01-01", data_resolution=100) + + with pytest.raises(ValueError, match="Invalid input"): + open_dataset("2020-01-01", averaging_time="asdf") + + +def test_open_dataset_no_data(): + with ( + pytest.raises(ValueError, match="Invalid date"), + # pytest.warns(UserWarning, match="not available"), + ): + open_dataset("1900-01-01") From 20f307f230d79e3fa4e0faaad5d6b858a5030be1 Mon Sep 17 00:00:00 2001 From: zmoon Date: Tue, 19 Mar 2024 13:34:04 -0600 Subject: [PATCH 23/49] Raise --- monetio/sat/nesdis_viirs_aod_aws_gridded.py | 62 +++++++++------------ tests/test_viirs_aod.py | 6 +- 2 files changed, 29 insertions(+), 39 deletions(-) diff --git a/monetio/sat/nesdis_viirs_aod_aws_gridded.py b/monetio/sat/nesdis_viirs_aod_aws_gridded.py index 03cfc1f6..9f0bb8da 100644 --- a/monetio/sat/nesdis_viirs_aod_aws_gridded.py +++ b/monetio/sat/nesdis_viirs_aod_aws_gridded.py @@ -158,7 +158,7 @@ def open_dataset(date, satellite="SNPP", data_resolution=0.1, averaging_time="da Parameters: date (str or datetime-like): The date for which to open the dataset. - satellite (str): The satellite to retrieve data from. Valid values are 'SNPP', 'NOAA20', or 'both'. + satellite (str): The satellite to retrieve data from. Valid values are 'SNPP', or 'NOAA20'. data_resolution (str, optional): The data resolution. Valid values are '0.050', '0.100', or '0.250'. Defaults to '0.1'. averaging_time (str, optional): The averaging time. Valid values are 'daily', 'weekly', or 'monthly'. Defaults to 'daily'. @@ -172,20 +172,18 @@ def open_dataset(date, satellite="SNPP", data_resolution=0.1, averaging_time="da import s3fs import xarray as xr - try: - if satellite not in ("SNPP", "NOAA20"): - raise ValueError - except ValueError: - print('Invalid input for "satellite": Valid values are "SNPP" or "NOAA20"') - return + if satellite not in {"SNPP", "NOAA20"}: + raise ValueError( + f'Invalid input for "satellite" {satellite!r}: Valid values are "SNPP" or "NOAA20"' + ) + data_resolution_in = data_resolution data_resolution = str(data_resolution).ljust(5, "0") - try: - if data_resolution not in ("0.050", "0.100", "0.250"): - raise ValueError - except ValueError: - print('Invalid input for "data_resolution": Valid values are "0.050", "0.100", or "0.250"') - return + if data_resolution not in ("0.050", "0.100", "0.250"): + raise ValueError( + f'Invalid input for "data_resolution" {data_resolution_in!r}: ' + 'Valid values are "0.050", "0.100", or "0.250"' + ) if isinstance(date, str): date_generated = [pd.Timestamp(date)] @@ -195,32 +193,26 @@ def open_dataset(date, satellite="SNPP", data_resolution=0.1, averaging_time="da # Access AWS using anonymous credentials fs = s3fs.S3FileSystem(anon=True) - try: - if averaging_time.lower() == "monthly": - file_list, _ = create_monthly_aod_list(satellite, date_generated, fs) - elif averaging_time.lower() == "weekly": - file_list, _ = create_weekly_aod_list(satellite, date_generated, fs) - elif averaging_time.lower() == "daily": - file_list, _ = create_daily_aod_list(data_resolution, satellite, date_generated, fs) - else: - raise ValueError - except ValueError: - print( - "Invalid input for 'averaging_time': Valid values are 'daily', 'weekly', or 'monthly'" + if averaging_time.lower() == "monthly": + file_list, _ = create_monthly_aod_list(satellite, date_generated, fs) + elif averaging_time.lower() == "weekly": + file_list, _ = create_weekly_aod_list(satellite, date_generated, fs) + elif averaging_time.lower() == "daily": + file_list, _ = create_daily_aod_list(data_resolution, satellite, date_generated, fs) + else: + raise ValueError( + f"Invalid input for 'averaging_time' {averaging_time!r}: " + "Valid values are 'daily', 'weekly', or 'monthly'" ) - return - try: - if len(file_list) == 0: - raise ValueError - else: - aws_file = fs.open(file_list[0]) - except ValueError: - print("Files not available for product and date:", date_generated[0]) - return + + if len(file_list) == 0: + raise ValueError(f"Files not available for product and date: {date_generated[0]}") + + aws_file = fs.open(file_list[0]) dset = xr.open_dataset(aws_file) - # add datetime + # Add datetime dset = dset.expand_dims(time=date_generated) return dset diff --git a/tests/test_viirs_aod.py b/tests/test_viirs_aod.py index ff7c4912..1900868d 100644 --- a/tests/test_viirs_aod.py +++ b/tests/test_viirs_aod.py @@ -26,6 +26,7 @@ def test_open_dataset(sat, res): def test_open_dataset_bad_input(): with pytest.raises(ValueError, match="Invalid input"): open_dataset("2020-01-01", satellite="GOES-16") + open_dataset("2020-01-01", satellite="both") with pytest.raises(ValueError, match="Invalid input"): open_dataset("2020-01-01", data_resolution=100) @@ -35,8 +36,5 @@ def test_open_dataset_bad_input(): def test_open_dataset_no_data(): - with ( - pytest.raises(ValueError, match="Invalid date"), - # pytest.warns(UserWarning, match="not available"), - ): + with pytest.raises(ValueError, match="Files not available"): open_dataset("1900-01-01") From f54b0a69a002d1acc69c4e2c554dd233e5aa1004 Mon Sep 17 00:00:00 2001 From: zmoon Date: Tue, 19 Mar 2024 13:36:43 -0600 Subject: [PATCH 24/49] Clean up docstring a bit --- monetio/sat/nesdis_viirs_aod_aws_gridded.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/monetio/sat/nesdis_viirs_aod_aws_gridded.py b/monetio/sat/nesdis_viirs_aod_aws_gridded.py index 9f0bb8da..39ed61d3 100644 --- a/monetio/sat/nesdis_viirs_aod_aws_gridded.py +++ b/monetio/sat/nesdis_viirs_aod_aws_gridded.py @@ -153,20 +153,23 @@ def create_weekly_aod_list(satellite, date_generated, fs, warning=False): def open_dataset(date, satellite="SNPP", data_resolution=0.1, averaging_time="daily"): - """ - Opens a dataset for the given date, satellite, data resolution, and averaging time. + """Load VIIRS AOD data + for the given date, satellite, data resolution, and averaging time. Parameters: date (str or datetime-like): The date for which to open the dataset. - satellite (str): The satellite to retrieve data from. Valid values are 'SNPP', or 'NOAA20'. - data_resolution (str, optional): The data resolution. Valid values are '0.050', '0.100', or '0.250'. Defaults to '0.1'. - averaging_time (str, optional): The averaging time. Valid values are 'daily', 'weekly', or 'monthly'. Defaults to 'daily'. + satellite (str): The satellite to retrieve data from. + Valid values are 'SNPP', or 'NOAA20'. + data_resolution (str, optional): The data resolution. + Valid values are '0.050', '0.100', or '0.250'. Defaults to '0.1'. + averaging_time (str, optional): The averaging time. + Valid values are 'daily', 'weekly', or 'monthly'. Defaults to 'daily'. Returns: xarray.Dataset: The opened dataset. Raises: - ValueError: If the input values are invalid. + ValueError: If the input parameters are invalid. """ import pandas as pd import s3fs From 8e402ccad6b837137e255aeba7f56c75a6a7383b Mon Sep 17 00:00:00 2001 From: zmoon Date: Tue, 19 Mar 2024 13:41:00 -0600 Subject: [PATCH 25/49] consistency --- monetio/sat/nesdis_viirs_aod_aws_gridded.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/monetio/sat/nesdis_viirs_aod_aws_gridded.py b/monetio/sat/nesdis_viirs_aod_aws_gridded.py index 39ed61d3..caee0893 100644 --- a/monetio/sat/nesdis_viirs_aod_aws_gridded.py +++ b/monetio/sat/nesdis_viirs_aod_aws_gridded.py @@ -177,15 +177,15 @@ def open_dataset(date, satellite="SNPP", data_resolution=0.1, averaging_time="da if satellite not in {"SNPP", "NOAA20"}: raise ValueError( - f'Invalid input for "satellite" {satellite!r}: Valid values are "SNPP" or "NOAA20"' + f"Invalid input for 'satellite' {satellite!r}: Valid values are 'SNPP' or 'NOAA20'" ) data_resolution_in = data_resolution data_resolution = str(data_resolution).ljust(5, "0") - if data_resolution not in ("0.050", "0.100", "0.250"): + if data_resolution not in {"0.050", "0.100", "0.250"}: raise ValueError( - f'Invalid input for "data_resolution" {data_resolution_in!r}: ' - 'Valid values are "0.050", "0.100", or "0.250"' + f"Invalid input for 'data_resolution' {data_resolution_in!r}: " + "Valid values are '0.050', '0.100', or '0.250'" ) if isinstance(date, str): From 1d1f55a522bcd496cd511bc738061d82fae86290 Mon Sep 17 00:00:00 2001 From: zmoon Date: Tue, 19 Mar 2024 13:43:26 -0600 Subject: [PATCH 26/49] More notes --- monetio/sat/nesdis_viirs_aod_aws_gridded.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/monetio/sat/nesdis_viirs_aod_aws_gridded.py b/monetio/sat/nesdis_viirs_aod_aws_gridded.py index caee0893..65180c71 100644 --- a/monetio/sat/nesdis_viirs_aod_aws_gridded.py +++ b/monetio/sat/nesdis_viirs_aod_aws_gridded.py @@ -153,7 +153,7 @@ def create_weekly_aod_list(satellite, date_generated, fs, warning=False): def open_dataset(date, satellite="SNPP", data_resolution=0.1, averaging_time="daily"): - """Load VIIRS AOD data + """Load VIIRS AOD data from AWS for the given date, satellite, data resolution, and averaging time. Parameters: @@ -162,6 +162,7 @@ def open_dataset(date, satellite="SNPP", data_resolution=0.1, averaging_time="da Valid values are 'SNPP', or 'NOAA20'. data_resolution (str, optional): The data resolution. Valid values are '0.050', '0.100', or '0.250'. Defaults to '0.1'. + Only has an effect when `averaging_time` is 'daily'. averaging_time (str, optional): The averaging time. Valid values are 'daily', 'weekly', or 'monthly'. Defaults to 'daily'. From ce1b64470190affd6ee861af34b4373a2608e2e0 Mon Sep 17 00:00:00 2001 From: zmoon Date: Tue, 19 Mar 2024 14:08:54 -0600 Subject: [PATCH 27/49] Add date info to open_dataset docstrings --- monetio/sat/nesdis_eps_viirs_aod_nrt.py | 15 +++++++++++---- monetio/sat/nesdis_viirs_aod_aws_gridded.py | 2 ++ 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/monetio/sat/nesdis_eps_viirs_aod_nrt.py b/monetio/sat/nesdis_eps_viirs_aod_nrt.py index ee0631a6..9138f3f3 100644 --- a/monetio/sat/nesdis_eps_viirs_aod_nrt.py +++ b/monetio/sat/nesdis_eps_viirs_aod_nrt.py @@ -104,14 +104,21 @@ def retrieve(url, fname): print("\n File Exists: " + fname) -def open_dataset(datestr, satellite="noaa20", res=0.1, daily=True, add_timestamp=True): +def open_dataset(date, satellite="noaa20", res=0.1, daily=True, add_timestamp=True): + """ + Parameters + ---------- + datestr : str or datetime-like + The date for which to open the dataset. + 2022-10-29 to current is available. + """ import pandas as pd import xarray as xr - if ~isinstance(datestr, pd.Timestamp): - d = pd.to_datetime(datestr) + if not isinstance(date, pd.Timestamp): + d = pd.to_datetime(date) else: - d = datestr + d = date try: if satellite.lower() not in ("noaa20", "snpp"): diff --git a/monetio/sat/nesdis_viirs_aod_aws_gridded.py b/monetio/sat/nesdis_viirs_aod_aws_gridded.py index 65180c71..bdae4ebf 100644 --- a/monetio/sat/nesdis_viirs_aod_aws_gridded.py +++ b/monetio/sat/nesdis_viirs_aod_aws_gridded.py @@ -158,6 +158,8 @@ def open_dataset(date, satellite="SNPP", data_resolution=0.1, averaging_time="da Parameters: date (str or datetime-like): The date for which to open the dataset. + SNPP has data from 2012-01-19 to 2020-12-31. + NOAA20 has data from 2018-01-01 to 2020-12-31. satellite (str): The satellite to retrieve data from. Valid values are 'SNPP', or 'NOAA20'. data_resolution (str, optional): The data resolution. From 9212c80049f00ae2f3e9e8c8430557f4f66c1c40 Mon Sep 17 00:00:00 2001 From: zmoon Date: Tue, 19 Mar 2024 14:09:43 -0600 Subject: [PATCH 28/49] sp --- monetio/sat/nesdis_viirs_aod_aws_gridded.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/monetio/sat/nesdis_viirs_aod_aws_gridded.py b/monetio/sat/nesdis_viirs_aod_aws_gridded.py index bdae4ebf..c2a60118 100644 --- a/monetio/sat/nesdis_viirs_aod_aws_gridded.py +++ b/monetio/sat/nesdis_viirs_aod_aws_gridded.py @@ -103,7 +103,7 @@ def create_monthly_aod_list(satellite, date_generated, fs, warning=False): else: raise ValueError("File does not exist on AWS: " + prod_path + file_name) except ValueError: - print("A error has occured:") + print("A error has occurred:") return nodd_file_list, nodd_total_size From efa245b0d971c5a6220ed0fc49348f6a98cdf18d Mon Sep 17 00:00:00 2001 From: zmoon Date: Tue, 19 Mar 2024 14:22:30 -0600 Subject: [PATCH 29/49] Raise in mf version too --- monetio/sat/nesdis_viirs_aod_aws_gridded.py | 77 +++++++++------------ tests/test_viirs_aod.py | 22 +++++- 2 files changed, 55 insertions(+), 44 deletions(-) diff --git a/monetio/sat/nesdis_viirs_aod_aws_gridded.py b/monetio/sat/nesdis_viirs_aod_aws_gridded.py index c2a60118..f27a6fd8 100644 --- a/monetio/sat/nesdis_viirs_aod_aws_gridded.py +++ b/monetio/sat/nesdis_viirs_aod_aws_gridded.py @@ -242,60 +242,51 @@ def open_mfdataset(dates, satellite="SNPP", data_resolution=0.1, averaging_time= Raises: ValueError: If the input parameters are invalid. """ + from collections.abc import Iterable + import pandas as pd import s3fs import xarray as xr - try: - if satellite not in ("SNPP", "NOAA20"): - raise ValueError - except ValueError: - print('Invalid input for "satellite": Valid values are "SNPP" or "NOAA20"') + if satellite not in {"SNPP", "NOAA20", "both"}: + raise ValueError( + f"Invalid input for 'satellite' {satellite!r}: Valid values are 'SNPP' or 'NOAA20'" + ) + data_resolution_in = data_resolution data_resolution = str(data_resolution).ljust(5, "0") - try: - if data_resolution not in ("0.050", "0.100", "0.250"): - raise ValueError - except ValueError: - print('Invalid input for "data_resolution": Valid values are "0.050", "0.100", or "0.250"') - return - - try: - if not isinstance(dates, pd.DatetimeIndex): - raise ValueError("Expecting pandas.DatetimeIndex for 'dates' parameter.") - except ValueError: - print("Invalid input for 'dates': Expecting pandas.DatetimeIndex") - return + if data_resolution not in {"0.050", "0.100", "0.250"}: + raise ValueError( + f"Invalid input for 'data_resolution' {data_resolution_in!r}: " + "Valid values are '0.050', '0.100', or '0.250'" + ) + + if isinstance(dates, Iterable): + dates = pd.DatetimeIndex(dates) + else: + dates = pd.DatetimeIndex([dates]) # Access AWS using anonymous credentials fs = s3fs.S3FileSystem(anon=True) - try: - if averaging_time.lower() == "monthly": - file_list, _ = create_monthly_aod_list(satellite, dates, fs) - elif averaging_time.lower() == "weekly": - file_list, _ = create_weekly_aod_list(satellite, dates, fs) - elif averaging_time.lower() == "daily": - file_list, _ = create_daily_aod_list(data_resolution, satellite, dates, fs) - else: - raise ValueError - except ValueError: - print( - "Invalid input for 'averaging_time': Valid values are 'daily', 'weekly', or 'monthly'" + if averaging_time.lower() == "monthly": + file_list, _ = create_monthly_aod_list(satellite, dates, fs) + elif averaging_time.lower() == "weekly": + file_list, _ = create_weekly_aod_list(satellite, dates, fs) + elif averaging_time.lower() == "daily": + file_list, _ = create_daily_aod_list(data_resolution, satellite, dates, fs) + else: + raise ValueError( + f"Invalid input for 'averaging_time' {averaging_time!r}: " + "Valid values are 'daily', 'weekly', or 'monthly'" ) - return - - try: - if not file_list: - raise ValueError - aws_files = [] - for f in file_list: - aws_files.append(fs.open(f)) - except ValueError: - print("File not available for product and date") - return - - dset = xr.open_mfdataset(aws_files, concat_dim={"time": dates}, combine="nested") + + if len(file_list) == 0: + raise ValueError(f"Files not available for product and dates: {dates}") + + aws_files = [fs.open(f) for f in file_list] + + dset = xr.open_mfdataset(aws_files, concat_dim="time", combine="nested") dset["time"] = dates diff --git a/tests/test_viirs_aod.py b/tests/test_viirs_aod.py index 1900868d..dfed7f04 100644 --- a/tests/test_viirs_aod.py +++ b/tests/test_viirs_aod.py @@ -2,7 +2,7 @@ import pytest -from monetio.sat.nesdis_viirs_aod_aws_gridded import open_dataset +from monetio.sat.nesdis_viirs_aod_aws_gridded import open_dataset, open_mfdataset if sys.version_info < (3, 7): pytest.skip("s3fs requires Python 3.7+", allow_module_level=True) @@ -26,6 +26,8 @@ def test_open_dataset(sat, res): def test_open_dataset_bad_input(): with pytest.raises(ValueError, match="Invalid input"): open_dataset("2020-01-01", satellite="GOES-16") + + with pytest.raises(ValueError, match="Invalid input"): open_dataset("2020-01-01", satellite="both") with pytest.raises(ValueError, match="Invalid input"): @@ -38,3 +40,21 @@ def test_open_dataset_bad_input(): def test_open_dataset_no_data(): with pytest.raises(ValueError, match="Files not available"): open_dataset("1900-01-01") + + +def test_open_mfdataset_bad_input(): + cases = [ + {"satellite": "GOES-16"}, + # {"satellite": "both"}, + {"data_resolution": 100}, + {"averaging_time": "asdf"}, + ] + for case in cases: + with pytest.raises(ValueError, match="Invalid input"): + open_mfdataset(["2020-01-01"], **case) + + +def test_open_mfdataset(): + ds = open_mfdataset(["2020-01-01", "2020-01-02"], satellite="both", data_resolution=0.25) + assert set(ds.dims) == {"time", "lat", "lon"} + assert ds.sizes["time"] == 1 From 915e9fe4e0c5dec5e2e0f82e93f93f5d0784b36f Mon Sep 17 00:00:00 2001 From: zmoon Date: Tue, 19 Mar 2024 14:24:08 -0600 Subject: [PATCH 30/49] 'both' option doesn't work currently since sat_name doesn't get set --- monetio/sat/nesdis_viirs_aod_aws_gridded.py | 12 ++++++------ tests/test_viirs_aod.py | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/monetio/sat/nesdis_viirs_aod_aws_gridded.py b/monetio/sat/nesdis_viirs_aod_aws_gridded.py index f27a6fd8..e88b2cbd 100644 --- a/monetio/sat/nesdis_viirs_aod_aws_gridded.py +++ b/monetio/sat/nesdis_viirs_aod_aws_gridded.py @@ -4,7 +4,7 @@ def create_daily_aod_list(data_resolution, satellite, date_generated, fs, warnin Parameters: - data_resolution (str): The resolution of the AOD data. - - satellite (str): The satellite name. Can be 'both', 'SNPP', or 'NOAA20'. + - satellite (str): The satellite name. Can be 'SNPP' or 'NOAA20'. - date_generated (list): A list of dates for which to check the existence of AOD files. - fs (FileSystem): The file system object used to check file existence and size. @@ -61,7 +61,7 @@ def create_monthly_aod_list(satellite, date_generated, fs, warning=False): Creates a list of monthly AOD (Aerosol Optical Depth) files for a given satellite and date range. Args: - satellite (str): The satellite name. Can be 'both', 'SNPP', or 'NOAA20'. + satellite (str): The satellite name. Can be 'SNPP' or 'NOAA20'. date_generated (list): A list of datetime objects representing the observation dates. fs: The file system object used to check for file existence and retrieve file information. @@ -114,7 +114,7 @@ def create_weekly_aod_list(satellite, date_generated, fs, warning=False): Creates a list of files and calculates the total size of files for a given satellite, observation dates, and file system. Parameters: - satellite (str): The satellite name. Can be 'both', 'SNPP', or 'NOAA20'. + satellite (str): The satellite name. Can be 'SNPP' or 'NOAA20'. date_generated (list): A list of observation dates. fs (FileSystem): The file system object. @@ -161,7 +161,7 @@ def open_dataset(date, satellite="SNPP", data_resolution=0.1, averaging_time="da SNPP has data from 2012-01-19 to 2020-12-31. NOAA20 has data from 2018-01-01 to 2020-12-31. satellite (str): The satellite to retrieve data from. - Valid values are 'SNPP', or 'NOAA20'. + Valid values are 'SNPP' or 'NOAA20'. data_resolution (str, optional): The data resolution. Valid values are '0.050', '0.100', or '0.250'. Defaults to '0.1'. Only has an effect when `averaging_time` is 'daily'. @@ -230,7 +230,7 @@ def open_mfdataset(dates, satellite="SNPP", data_resolution=0.1, averaging_time= Parameters: dates (pandas.DatetimeIndex): The dates for which to retrieve the data. - satellite (str): The satellite name. Valid values are 'SNPP', 'NOAA20', or 'both'. + satellite (str): The satellite name. Valid values are 'SNPP' or 'NOAA20'. data_resolution (str, optional): The data resolution. Valid values are '0.050', '0.100', or '0.250'. Defaults to '0.1'. averaging_time (str, optional): The averaging time. Valid values are 'daily', 'weekly', or 'monthly'. Defaults to 'daily'. download (bool, optional): Whether to download the data from AWS. Defaults to False. @@ -248,7 +248,7 @@ def open_mfdataset(dates, satellite="SNPP", data_resolution=0.1, averaging_time= import s3fs import xarray as xr - if satellite not in {"SNPP", "NOAA20", "both"}: + if satellite not in {"SNPP", "NOAA20"}: raise ValueError( f"Invalid input for 'satellite' {satellite!r}: Valid values are 'SNPP' or 'NOAA20'" ) diff --git a/tests/test_viirs_aod.py b/tests/test_viirs_aod.py index dfed7f04..7a327240 100644 --- a/tests/test_viirs_aod.py +++ b/tests/test_viirs_aod.py @@ -45,7 +45,7 @@ def test_open_dataset_no_data(): def test_open_mfdataset_bad_input(): cases = [ {"satellite": "GOES-16"}, - # {"satellite": "both"}, + {"satellite": "both"}, {"data_resolution": 100}, {"averaging_time": "asdf"}, ] From 535dc1290d3d1ef267cf9ab7354b7ab03a91351f Mon Sep 17 00:00:00 2001 From: zmoon Date: Tue, 19 Mar 2024 14:31:57 -0600 Subject: [PATCH 31/49] Initial mf test --- tests/test_viirs_aod.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/test_viirs_aod.py b/tests/test_viirs_aod.py index 7a327240..f500affa 100644 --- a/tests/test_viirs_aod.py +++ b/tests/test_viirs_aod.py @@ -1,5 +1,6 @@ import sys +import pandas as pd import pytest from monetio.sat.nesdis_viirs_aod_aws_gridded import open_dataset, open_mfdataset @@ -21,6 +22,7 @@ def test_open_dataset(sat, res): assert ds.sizes["lon"] == int(360 / res) assert ds.attrs["satellite_name"] == ("NPP" if sat == "SNPP" else "NOAA 20") assert ds.attrs["spatial_resolution"].strip().startswith(str(res)) + assert (ds.time == pd.DatetimeIndex([date])).all() def test_open_dataset_bad_input(): @@ -55,6 +57,8 @@ def test_open_mfdataset_bad_input(): def test_open_mfdataset(): - ds = open_mfdataset(["2020-01-01", "2020-01-02"], satellite="both", data_resolution=0.25) + ds = open_mfdataset(["2020-01-01", "2020-01-02"], satellite="SNPP", data_resolution=0.25) assert set(ds.dims) == {"time", "lat", "lon"} - assert ds.sizes["time"] == 1 + assert ds.sizes["time"] == 2 + assert ds.attrs["spatial_resolution"].strip().startswith("0.25") + assert (ds.time == pd.DatetimeIndex(["2020-01-01", "2020-01-02"])).all() From 0d4a4100021a82b50241ea4ce1880b9e5325717d Mon Sep 17 00:00:00 2001 From: zmoon Date: Tue, 19 Mar 2024 14:34:27 -0600 Subject: [PATCH 32/49] Update mf docstring --- monetio/sat/nesdis_viirs_aod_aws_gridded.py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/monetio/sat/nesdis_viirs_aod_aws_gridded.py b/monetio/sat/nesdis_viirs_aod_aws_gridded.py index e88b2cbd..d4e81323 100644 --- a/monetio/sat/nesdis_viirs_aod_aws_gridded.py +++ b/monetio/sat/nesdis_viirs_aod_aws_gridded.py @@ -162,9 +162,10 @@ def open_dataset(date, satellite="SNPP", data_resolution=0.1, averaging_time="da NOAA20 has data from 2018-01-01 to 2020-12-31. satellite (str): The satellite to retrieve data from. Valid values are 'SNPP' or 'NOAA20'. - data_resolution (str, optional): The data resolution. - Valid values are '0.050', '0.100', or '0.250'. Defaults to '0.1'. + data_resolution (float or str, optional): The data resolution. + Valid values are '0.050', '0.100', or '0.250'. Defaults to 0.1°. Only has an effect when `averaging_time` is 'daily'. + For 'weekly' and 'monthly' data, the resolution is always 0.25. averaging_time (str, optional): The averaging time. Valid values are 'daily', 'weekly', or 'monthly'. Defaults to 'daily'. @@ -230,11 +231,16 @@ def open_mfdataset(dates, satellite="SNPP", data_resolution=0.1, averaging_time= Parameters: dates (pandas.DatetimeIndex): The dates for which to retrieve the data. - satellite (str): The satellite name. Valid values are 'SNPP' or 'NOAA20'. - data_resolution (str, optional): The data resolution. Valid values are '0.050', '0.100', or '0.250'. Defaults to '0.1'. - averaging_time (str, optional): The averaging time. Valid values are 'daily', 'weekly', or 'monthly'. Defaults to 'daily'. - download (bool, optional): Whether to download the data from AWS. Defaults to False. - save_path (str, optional): The path to save the downloaded data. Defaults to './'. + SNPP has data from 2012-01-19 to 2020-12-31. + NOAA20 has data from 2018-01-01 to 2020-12-31. + satellite (str): The satellite name. + Valid values are 'SNPP' or 'NOAA20'. + data_resolution (float or str, optional): The data resolution. + Valid values are '0.050', '0.100', or '0.250'. Defaults to 0.1°. + Only has an effect when `averaging_time` is 'daily'. + For 'weekly' and 'monthly' data, the resolution is always 0.25. + averaging_time (str, optional): The averaging time. + Valid values are 'daily', 'weekly', or 'monthly'. Defaults to 'daily'. Returns: xarray.Dataset: The combined dataset containing the data for the specified dates. From 4a6a1bec112bdccd44562ebda497501bf1e11da9 Mon Sep 17 00:00:00 2001 From: zmoon Date: Tue, 19 Mar 2024 15:08:36 -0600 Subject: [PATCH 33/49] Skip missing daily file with warning by default as long as have at least one --- monetio/sat/nesdis_viirs_aod_aws_gridded.py | 40 ++++++++++++-------- tests/test_viirs_aod.py | 41 ++++++++++++++++++++- 2 files changed, 64 insertions(+), 17 deletions(-) diff --git a/monetio/sat/nesdis_viirs_aod_aws_gridded.py b/monetio/sat/nesdis_viirs_aod_aws_gridded.py index d4e81323..3410a0eb 100644 --- a/monetio/sat/nesdis_viirs_aod_aws_gridded.py +++ b/monetio/sat/nesdis_viirs_aod_aws_gridded.py @@ -40,17 +40,16 @@ def create_daily_aod_list(data_resolution, satellite, date_generated, fs, warnin + "/" ) # If file exists, add path to list and add file size to total - try: - if fs.exists(prod_path + file_name) is True: - nodd_file_list.extend(fs.ls(prod_path + file_name)) - nodd_total_size = nodd_total_size + fs.size(prod_path + file_name) - elif warning: - warnings.warn("File does not exist on AWS: " + prod_path + file_name) + if fs.exists(prod_path + file_name) is True: + nodd_file_list.extend(fs.ls(prod_path + file_name)) + nodd_total_size = nodd_total_size + fs.size(prod_path + file_name) + else: + msg = "File does not exist on AWS: " + prod_path + file_name + if warning: + warnings.warn(msg, stacklevel=2) + nodd_file_list.append(None) else: - raise ValueError - except ValueError: - print("File does not exist on AWS: " + prod_path + file_name) - return [], 0 + raise ValueError(msg) return nodd_file_list, nodd_total_size @@ -225,7 +224,9 @@ def open_dataset(date, satellite="SNPP", data_resolution=0.1, averaging_time="da return dset -def open_mfdataset(dates, satellite="SNPP", data_resolution=0.1, averaging_time="daily"): +def open_mfdataset( + dates, satellite="SNPP", data_resolution=0.1, averaging_time="daily", error_missing=False +): """ Opens and combines multiple NetCDF files into a single xarray dataset. @@ -241,6 +242,8 @@ def open_mfdataset(dates, satellite="SNPP", data_resolution=0.1, averaging_time= For 'weekly' and 'monthly' data, the resolution is always 0.25. averaging_time (str, optional): The averaging time. Valid values are 'daily', 'weekly', or 'monthly'. Defaults to 'daily'. + error_missing (bool, optional): If False, skip missing files with warning + and continue processing. Otherwise, raise an error. Returns: xarray.Dataset: The combined dataset containing the data for the specified dates. @@ -280,20 +283,27 @@ def open_mfdataset(dates, satellite="SNPP", data_resolution=0.1, averaging_time= elif averaging_time.lower() == "weekly": file_list, _ = create_weekly_aod_list(satellite, dates, fs) elif averaging_time.lower() == "daily": - file_list, _ = create_daily_aod_list(data_resolution, satellite, dates, fs) + file_list, _ = create_daily_aod_list( + data_resolution, satellite, dates, fs, warning=not error_missing + ) else: raise ValueError( f"Invalid input for 'averaging_time' {averaging_time!r}: " "Valid values are 'daily', 'weekly', or 'monthly'" ) - if len(file_list) == 0: + if len(file_list) == 0 or all(f is None for f in file_list): raise ValueError(f"Files not available for product and dates: {dates}") - aws_files = [fs.open(f) for f in file_list] + dates_good = [] + aws_files = [] + for d, f in zip(dates, file_list): + if f is not None: + aws_files.append(fs.open(f)) + dates_good.append(d) dset = xr.open_mfdataset(aws_files, concat_dim="time", combine="nested") - dset["time"] = dates + dset["time"] = dates_good return dset diff --git a/tests/test_viirs_aod.py b/tests/test_viirs_aod.py index f500affa..840395de 100644 --- a/tests/test_viirs_aod.py +++ b/tests/test_viirs_aod.py @@ -11,7 +11,7 @@ @pytest.mark.parametrize("sat", ["SNPP", "NOAA20"]) @pytest.mark.parametrize("res", [0.05, 0.1, 0.25]) -def test_open_dataset(sat, res): +def test_open_dataset_daily(sat, res): date = "2020-01-01" # a date when we have both SNPP and NOAA-20 data available s_res = f"{res:.3f}" @@ -56,9 +56,46 @@ def test_open_mfdataset_bad_input(): open_mfdataset(["2020-01-01"], **case) -def test_open_mfdataset(): +def test_open_mfdataset_daily(): ds = open_mfdataset(["2020-01-01", "2020-01-02"], satellite="SNPP", data_resolution=0.25) assert set(ds.dims) == {"time", "lat", "lon"} assert ds.sizes["time"] == 2 assert ds.attrs["spatial_resolution"].strip().startswith("0.25") assert (ds.time == pd.DatetimeIndex(["2020-01-01", "2020-01-02"])).all() + + +def test_open_mfdataset_monthly(): + with pytest.raises(ValueError, match="conflicting sizes for dimension 'time'"): + open_mfdataset(["2020-01-01", "2020-01-02"], averaging_time="monthly") + + months = pd.date_range(start="2020-01-01", freq="MS", periods=2) + ds = open_mfdataset(months, averaging_time="monthly") + assert ds.sizes["time"] == 2 + + +def test_open_mfdataset_daily_warning(): + dates = ["2012-01-18", "2012-01-19"] # 2012-01-19 is the first available date + + # Warn and skip by default + with pytest.warns(match="File does not exist on AWS:"): + ds = open_mfdataset(dates, satellite="SNPP", data_resolution=0.25, averaging_time="daily") + assert ds.sizes["time"] == 1 + assert (ds.time == pd.DatetimeIndex([dates[1]])).all() + + # Error optionally + with pytest.raises(ValueError, match="File does not exist on AWS:"): + ds = open_mfdataset( + dates, + satellite="SNPP", + data_resolution=0.25, + averaging_time="daily", + error_missing=True, + ) + + +def test_open_mfdataset_no_data(): + with ( + pytest.raises(ValueError, match="Files not available"), + pytest.warns(match="File does not exist on AWS:"), + ): + open_mfdataset(["1900-01-01"]) From efc3b3338ddd68ca02b61f9f1f697e24afbb9320 Mon Sep 17 00:00:00 2001 From: zmoon Date: Tue, 19 Mar 2024 15:35:49 -0600 Subject: [PATCH 34/49] Skip for monthly as well --- monetio/sat/nesdis_viirs_aod_aws_gridded.py | 30 ++++++++++------- tests/test_viirs_aod.py | 36 +++++++++++++++++++-- 2 files changed, 51 insertions(+), 15 deletions(-) diff --git a/monetio/sat/nesdis_viirs_aod_aws_gridded.py b/monetio/sat/nesdis_viirs_aod_aws_gridded.py index 3410a0eb..4333e240 100644 --- a/monetio/sat/nesdis_viirs_aod_aws_gridded.py +++ b/monetio/sat/nesdis_viirs_aod_aws_gridded.py @@ -93,16 +93,16 @@ def create_monthly_aod_list(satellite, date_generated, fs, warning=False): + "_VIIRS_Aerosol_Optical_Depth_Gridded_Reprocessed/0.25_Degrees_Monthly/" ) # If file exists, add path to list and add file size to total - try: - if fs.exists(prod_path + file_name) is True: - nodd_file_list.extend(fs.ls(prod_path + file_name)) - nodd_total_size = nodd_total_size + fs.size(prod_path + file_name) - elif warning: - warnings.warn("File does not exist on AWS: " + prod_path + file_name) + if fs.exists(prod_path + file_name) is True: + nodd_file_list.extend(fs.ls(prod_path + file_name)) + nodd_total_size = nodd_total_size + fs.size(prod_path + file_name) + else: + msg = "File does not exist on AWS: " + prod_path + file_name + if warning: + warnings.warn(msg, stacklevel=2) + nodd_file_list.append(None) else: - raise ValueError("File does not exist on AWS: " + prod_path + file_name) - except ValueError: - print("A error has occurred:") + raise ValueError(msg) return nodd_file_list, nodd_total_size @@ -211,7 +211,7 @@ def open_dataset(date, satellite="SNPP", data_resolution=0.1, averaging_time="da "Valid values are 'daily', 'weekly', or 'monthly'" ) - if len(file_list) == 0: + if len(file_list) == 0 or all(f is None for f in file_list): raise ValueError(f"Files not available for product and date: {date_generated[0]}") aws_file = fs.open(file_list[0]) @@ -279,9 +279,9 @@ def open_mfdataset( fs = s3fs.S3FileSystem(anon=True) if averaging_time.lower() == "monthly": - file_list, _ = create_monthly_aod_list(satellite, dates, fs) + file_list, _ = create_monthly_aod_list(satellite, dates, fs, warning=not error_missing) elif averaging_time.lower() == "weekly": - file_list, _ = create_weekly_aod_list(satellite, dates, fs) + file_list, _ = create_weekly_aod_list(satellite, dates, fs, warning=not error_missing) elif averaging_time.lower() == "daily": file_list, _ = create_daily_aod_list( data_resolution, satellite, dates, fs, warning=not error_missing @@ -295,6 +295,12 @@ def open_mfdataset( if len(file_list) == 0 or all(f is None for f in file_list): raise ValueError(f"Files not available for product and dates: {dates}") + if not len(file_list) == len(dates): + raise ValueError( + "'dates' and discovered file list are not the same length. " + "Consider the time frequency ('averaging_time') when constructing your dates input." + ) + dates_good = [] aws_files = [] for d, f in zip(dates, file_list): diff --git a/tests/test_viirs_aod.py b/tests/test_viirs_aod.py index 840395de..f5505e0f 100644 --- a/tests/test_viirs_aod.py +++ b/tests/test_viirs_aod.py @@ -40,7 +40,7 @@ def test_open_dataset_bad_input(): def test_open_dataset_no_data(): - with pytest.raises(ValueError, match="Files not available"): + with pytest.raises(ValueError, match="File does not exist on AWS:"): open_dataset("1900-01-01") @@ -65,7 +65,7 @@ def test_open_mfdataset_daily(): def test_open_mfdataset_monthly(): - with pytest.raises(ValueError, match="conflicting sizes for dimension 'time'"): + with pytest.raises(ValueError, match="not the same length"): open_mfdataset(["2020-01-01", "2020-01-02"], averaging_time="monthly") months = pd.date_range(start="2020-01-01", freq="MS", periods=2) @@ -78,7 +78,12 @@ def test_open_mfdataset_daily_warning(): # Warn and skip by default with pytest.warns(match="File does not exist on AWS:"): - ds = open_mfdataset(dates, satellite="SNPP", data_resolution=0.25, averaging_time="daily") + ds = open_mfdataset( + dates, + satellite="SNPP", + data_resolution=0.25, + averaging_time="daily", + ) assert ds.sizes["time"] == 1 assert (ds.time == pd.DatetimeIndex([dates[1]])).all() @@ -93,6 +98,31 @@ def test_open_mfdataset_daily_warning(): ) +def test_open_mfdataset_monthly_warning(): + dates = ["2011-12-01", "2012-01-01"] + + # Warn and skip by default + with pytest.warns(match="File does not exist on AWS:"): + ds = open_mfdataset( + dates, + satellite="SNPP", + data_resolution=0.25, + averaging_time="monthly", + ) + assert ds.sizes["time"] == 1 + assert (ds.time == pd.DatetimeIndex([dates[1]])).all() + + # Error optionally + with pytest.raises(ValueError, match="File does not exist on AWS:"): + ds = open_mfdataset( + dates, + satellite="SNPP", + data_resolution=0.25, + averaging_time="monthly", + error_missing=True, + ) + + def test_open_mfdataset_no_data(): with ( pytest.raises(ValueError, match="Files not available"), From cec582337bbb75b861dc11c69f072c8e21b42037 Mon Sep 17 00:00:00 2001 From: zmoon Date: Tue, 19 Mar 2024 15:37:02 -0600 Subject: [PATCH 35/49] Always error for not found file in open_dataset since nothing to return, unlike mfdataset, where we still have something as long as there is at least one file that does exist --- monetio/sat/nesdis_viirs_aod_aws_gridded.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/monetio/sat/nesdis_viirs_aod_aws_gridded.py b/monetio/sat/nesdis_viirs_aod_aws_gridded.py index 4333e240..8fdc5d1a 100644 --- a/monetio/sat/nesdis_viirs_aod_aws_gridded.py +++ b/monetio/sat/nesdis_viirs_aod_aws_gridded.py @@ -200,11 +200,13 @@ def open_dataset(date, satellite="SNPP", data_resolution=0.1, averaging_time="da fs = s3fs.S3FileSystem(anon=True) if averaging_time.lower() == "monthly": - file_list, _ = create_monthly_aod_list(satellite, date_generated, fs) + file_list, _ = create_monthly_aod_list(satellite, date_generated, fs, warning=False) elif averaging_time.lower() == "weekly": - file_list, _ = create_weekly_aod_list(satellite, date_generated, fs) + file_list, _ = create_weekly_aod_list(satellite, date_generated, fs, warning=False) elif averaging_time.lower() == "daily": - file_list, _ = create_daily_aod_list(data_resolution, satellite, date_generated, fs) + file_list, _ = create_daily_aod_list( + data_resolution, satellite, date_generated, fs, warning=False + ) else: raise ValueError( f"Invalid input for 'averaging_time' {averaging_time!r}: " From 62ea6486c06981ca3b1815e2327eba300810a95b Mon Sep 17 00:00:00 2001 From: zmoon Date: Tue, 19 Mar 2024 15:45:53 -0600 Subject: [PATCH 36/49] Fix test for pre 3.10 --- tests/test_viirs_aod.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/test_viirs_aod.py b/tests/test_viirs_aod.py index f5505e0f..2a841d72 100644 --- a/tests/test_viirs_aod.py +++ b/tests/test_viirs_aod.py @@ -124,8 +124,7 @@ def test_open_mfdataset_monthly_warning(): def test_open_mfdataset_no_data(): - with ( - pytest.raises(ValueError, match="Files not available"), - pytest.warns(match="File does not exist on AWS:"), + with pytest.raises(ValueError, match="Files not available"), pytest.warns( + match="File does not exist on AWS:" ): open_mfdataset(["1900-01-01"]) From 192c0e3106a83166ac3d4b96a58f70f2f6d3278f Mon Sep 17 00:00:00 2001 From: zmoon Date: Wed, 14 Aug 2024 15:11:53 -0500 Subject: [PATCH 37/49] NDVI --- monetio/sat/nesdis_viirs_aod_aws_gridded.py | 632 +++++++++---------- monetio/sat/nesdis_viirs_ndvi_aws_gridded.py | 94 ++- tests/test_viirs_ndvi.py | 49 ++ 3 files changed, 403 insertions(+), 372 deletions(-) create mode 100644 tests/test_viirs_ndvi.py diff --git a/monetio/sat/nesdis_viirs_aod_aws_gridded.py b/monetio/sat/nesdis_viirs_aod_aws_gridded.py index 8fdc5d1a..1b25500c 100644 --- a/monetio/sat/nesdis_viirs_aod_aws_gridded.py +++ b/monetio/sat/nesdis_viirs_aod_aws_gridded.py @@ -1,317 +1,315 @@ -def create_daily_aod_list(data_resolution, satellite, date_generated, fs, warning=False): - """ - Creates a list of daily AOD (Aerosol Optical Depth) files and calculates the total size of the files. - - Parameters: - - data_resolution (str): The resolution of the AOD data. - - satellite (str): The satellite name. Can be 'SNPP' or 'NOAA20'. - - date_generated (list): A list of dates for which to check the existence of AOD files. - - fs (FileSystem): The file system object used to check file existence and size. - - Returns: - - nodd_file_list (list): A list of paths to the existing AOD files. - - nodd_total_size (int): The total size of the existing AOD files in bytes. - """ - import warnings - - # Loop through observation dates & check for files - nodd_file_list = [] - nodd_total_size = 0 - for date in date_generated: - file_date = date.strftime("%Y%m%d") - year = file_date[:4] - - if satellite == "SNPP": - sat_name = "npp" - elif satellite == "NOAA20": - sat_name = "noaa20" - file_name = ( - "viirs_eps_" + sat_name + "_aod_" + data_resolution + "_deg_" + file_date + ".nc" - ) - prod_path = ( - "noaa-jpss/" - + satellite - + "/VIIRS/" - + satellite - + "_VIIRS_Aerosol_Optical_Depth_Gridded_Reprocessed/" - + data_resolution[:4] - + "_Degrees_Daily/" - + year - + "/" - ) - # If file exists, add path to list and add file size to total - if fs.exists(prod_path + file_name) is True: - nodd_file_list.extend(fs.ls(prod_path + file_name)) - nodd_total_size = nodd_total_size + fs.size(prod_path + file_name) - else: - msg = "File does not exist on AWS: " + prod_path + file_name - if warning: - warnings.warn(msg, stacklevel=2) - nodd_file_list.append(None) - else: - raise ValueError(msg) - - return nodd_file_list, nodd_total_size - - -# Create list of available monthly data file paths & total size of files -def create_monthly_aod_list(satellite, date_generated, fs, warning=False): - """ - Creates a list of monthly AOD (Aerosol Optical Depth) files for a given satellite and date range. - - Args: - satellite (str): The satellite name. Can be 'SNPP' or 'NOAA20'. - date_generated (list): A list of datetime objects representing the observation dates. - fs: The file system object used to check for file existence and retrieve file information. - - Returns: - tuple: A tuple containing the list of file paths and the total size of the files. - - """ - import warnings - - # Loop through observation dates & check for files - nodd_file_list = [] - nodd_total_size = 0 - year_month_list = [] - for date in date_generated: - file_date = date.strftime("%Y%m%d") - year_month = file_date[:6] - if year_month not in year_month_list: - year_month_list.append(year_month) - - if satellite == "SNPP": - sat_name = "snpp" - elif satellite == "NOAA20": - sat_name = "noaa20" - file_name = "viirs_aod_monthly_" + sat_name + "_0.250_deg_" + year_month + ".nc" - prod_path = ( - "noaa-jpss/" - + satellite - + "/VIIRS/" - + satellite - + "_VIIRS_Aerosol_Optical_Depth_Gridded_Reprocessed/0.25_Degrees_Monthly/" - ) - # If file exists, add path to list and add file size to total - if fs.exists(prod_path + file_name) is True: - nodd_file_list.extend(fs.ls(prod_path + file_name)) - nodd_total_size = nodd_total_size + fs.size(prod_path + file_name) - else: - msg = "File does not exist on AWS: " + prod_path + file_name - if warning: - warnings.warn(msg, stacklevel=2) - nodd_file_list.append(None) - else: - raise ValueError(msg) - - return nodd_file_list, nodd_total_size - - -# Create list of available weekly data file paths & total size of files -def create_weekly_aod_list(satellite, date_generated, fs, warning=False): - """ - Creates a list of files and calculates the total size of files for a given satellite, observation dates, and file system. - - Parameters: - satellite (str): The satellite name. Can be 'SNPP' or 'NOAA20'. - date_generated (list): A list of observation dates. - fs (FileSystem): The file system object. - - Returns: - tuple: A tuple containing the list of files and the total size of files. - """ - # Loop through observation dates & check for files - nodd_file_list = [] - nodd_total_size = 0 - for date in date_generated: - file_date = date.strftime("%Y%m%d") - year = file_date[:4] - - prod_path = ( - "noaa-jpss/" - + satellite - + "/VIIRS/" - + satellite - + "_VIIRS_Aerosol_Optical_Depth_Gridded_Reprocessed/0.25_Degrees_Weekly/" - + year - + "/" - ) - # Get list of all files in given year on NODD - all_files = fs.ls(prod_path) - # Loop through files, check if file date falls within observation date range - for file in all_files: - file_start = file.split("/")[-1].split("_")[7].split(".")[0].split("-")[0] - file_end = file.split("/")[-1].split("_")[7].split(".")[0].split("-")[1] - # If file within observation range, add path to list and add file size to total - if file_date >= file_start and file_date <= file_end: - if file not in nodd_file_list: - nodd_file_list.append(file) - nodd_total_size = nodd_total_size + fs.size(file) - - return nodd_file_list, nodd_total_size - - -def open_dataset(date, satellite="SNPP", data_resolution=0.1, averaging_time="daily"): - """Load VIIRS AOD data from AWS - for the given date, satellite, data resolution, and averaging time. - - Parameters: - date (str or datetime-like): The date for which to open the dataset. - SNPP has data from 2012-01-19 to 2020-12-31. - NOAA20 has data from 2018-01-01 to 2020-12-31. - satellite (str): The satellite to retrieve data from. - Valid values are 'SNPP' or 'NOAA20'. - data_resolution (float or str, optional): The data resolution. - Valid values are '0.050', '0.100', or '0.250'. Defaults to 0.1°. - Only has an effect when `averaging_time` is 'daily'. - For 'weekly' and 'monthly' data, the resolution is always 0.25. - averaging_time (str, optional): The averaging time. - Valid values are 'daily', 'weekly', or 'monthly'. Defaults to 'daily'. - - Returns: - xarray.Dataset: The opened dataset. - - Raises: - ValueError: If the input parameters are invalid. - """ - import pandas as pd - import s3fs - import xarray as xr - - if satellite not in {"SNPP", "NOAA20"}: - raise ValueError( - f"Invalid input for 'satellite' {satellite!r}: Valid values are 'SNPP' or 'NOAA20'" - ) - - data_resolution_in = data_resolution - data_resolution = str(data_resolution).ljust(5, "0") - if data_resolution not in {"0.050", "0.100", "0.250"}: - raise ValueError( - f"Invalid input for 'data_resolution' {data_resolution_in!r}: " - "Valid values are '0.050', '0.100', or '0.250'" - ) - - if isinstance(date, str): - date_generated = [pd.Timestamp(date)] - else: - date_generated = [date] - - # Access AWS using anonymous credentials - fs = s3fs.S3FileSystem(anon=True) - - if averaging_time.lower() == "monthly": - file_list, _ = create_monthly_aod_list(satellite, date_generated, fs, warning=False) - elif averaging_time.lower() == "weekly": - file_list, _ = create_weekly_aod_list(satellite, date_generated, fs, warning=False) - elif averaging_time.lower() == "daily": - file_list, _ = create_daily_aod_list( - data_resolution, satellite, date_generated, fs, warning=False - ) - else: - raise ValueError( - f"Invalid input for 'averaging_time' {averaging_time!r}: " - "Valid values are 'daily', 'weekly', or 'monthly'" - ) - - if len(file_list) == 0 or all(f is None for f in file_list): - raise ValueError(f"Files not available for product and date: {date_generated[0]}") - - aws_file = fs.open(file_list[0]) - - dset = xr.open_dataset(aws_file) - - # Add datetime - dset = dset.expand_dims(time=date_generated) - - return dset - - -def open_mfdataset( - dates, satellite="SNPP", data_resolution=0.1, averaging_time="daily", error_missing=False -): - """ - Opens and combines multiple NetCDF files into a single xarray dataset. - - Parameters: - dates (pandas.DatetimeIndex): The dates for which to retrieve the data. - SNPP has data from 2012-01-19 to 2020-12-31. - NOAA20 has data from 2018-01-01 to 2020-12-31. - satellite (str): The satellite name. - Valid values are 'SNPP' or 'NOAA20'. - data_resolution (float or str, optional): The data resolution. - Valid values are '0.050', '0.100', or '0.250'. Defaults to 0.1°. - Only has an effect when `averaging_time` is 'daily'. - For 'weekly' and 'monthly' data, the resolution is always 0.25. - averaging_time (str, optional): The averaging time. - Valid values are 'daily', 'weekly', or 'monthly'. Defaults to 'daily'. - error_missing (bool, optional): If False, skip missing files with warning - and continue processing. Otherwise, raise an error. - - Returns: - xarray.Dataset: The combined dataset containing the data for the specified dates. - - Raises: - ValueError: If the input parameters are invalid. - """ - from collections.abc import Iterable - - import pandas as pd - import s3fs - import xarray as xr - - if satellite not in {"SNPP", "NOAA20"}: - raise ValueError( - f"Invalid input for 'satellite' {satellite!r}: Valid values are 'SNPP' or 'NOAA20'" - ) - - data_resolution_in = data_resolution - data_resolution = str(data_resolution).ljust(5, "0") - if data_resolution not in {"0.050", "0.100", "0.250"}: - raise ValueError( - f"Invalid input for 'data_resolution' {data_resolution_in!r}: " - "Valid values are '0.050', '0.100', or '0.250'" - ) - - if isinstance(dates, Iterable): - dates = pd.DatetimeIndex(dates) - else: - dates = pd.DatetimeIndex([dates]) - - # Access AWS using anonymous credentials - fs = s3fs.S3FileSystem(anon=True) - - if averaging_time.lower() == "monthly": - file_list, _ = create_monthly_aod_list(satellite, dates, fs, warning=not error_missing) - elif averaging_time.lower() == "weekly": - file_list, _ = create_weekly_aod_list(satellite, dates, fs, warning=not error_missing) - elif averaging_time.lower() == "daily": - file_list, _ = create_daily_aod_list( - data_resolution, satellite, dates, fs, warning=not error_missing - ) - else: - raise ValueError( - f"Invalid input for 'averaging_time' {averaging_time!r}: " - "Valid values are 'daily', 'weekly', or 'monthly'" - ) - - if len(file_list) == 0 or all(f is None for f in file_list): - raise ValueError(f"Files not available for product and dates: {dates}") - - if not len(file_list) == len(dates): - raise ValueError( - "'dates' and discovered file list are not the same length. " - "Consider the time frequency ('averaging_time') when constructing your dates input." - ) - - dates_good = [] - aws_files = [] - for d, f in zip(dates, file_list): - if f is not None: - aws_files.append(fs.open(f)) - dates_good.append(d) - - dset = xr.open_mfdataset(aws_files, concat_dim="time", combine="nested") - - dset["time"] = dates_good - - return dset +def create_daily_aod_list(data_resolution, satellite, date_generated, fs, warning=False): + """ + Creates a list of daily AOD (Aerosol Optical Depth) files and calculates the total size of the files. + + Parameters: + data_resolution (str): The resolution of the AOD data. + satellite (str): The satellite name. Can be 'SNPP' or 'NOAA20'. + date_generated (list): A list of dates for which to check the existence of AOD files. + fs (FileSystem): The file system object used to check file existence and size. + + Returns: + tuple: A tuple containing the list of file paths and the total size of the files. + """ + import warnings + + # Loop through observation dates & check for files + nodd_file_list = [] + nodd_total_size = 0 + for date in date_generated: + file_date = date.strftime("%Y%m%d") + year = file_date[:4] + + if satellite == "SNPP": + sat_name = "npp" + elif satellite == "NOAA20": + sat_name = "noaa20" + file_name = ( + "viirs_eps_" + sat_name + "_aod_" + data_resolution + "_deg_" + file_date + ".nc" + ) + prod_path = ( + "noaa-jpss/" + + satellite + + "/VIIRS/" + + satellite + + "_VIIRS_Aerosol_Optical_Depth_Gridded_Reprocessed/" + + data_resolution[:4] + + "_Degrees_Daily/" + + year + + "/" + ) + # If file exists, add path to list and add file size to total + if fs.exists(prod_path + file_name) is True: + nodd_file_list.extend(fs.ls(prod_path + file_name)) + nodd_total_size = nodd_total_size + fs.size(prod_path + file_name) + else: + msg = "File does not exist on AWS: " + prod_path + file_name + if warning: + warnings.warn(msg, stacklevel=2) + nodd_file_list.append(None) + else: + raise ValueError(msg) + + return nodd_file_list, nodd_total_size + + +# Create list of available monthly data file paths & total size of files +def create_monthly_aod_list(satellite, date_generated, fs, warning=False): + """ + Creates a list of monthly AOD (Aerosol Optical Depth) files for a given satellite and date range. + + Args: + satellite (str): The satellite name. Can be 'SNPP' or 'NOAA20'. + date_generated (list): A list of datetime objects representing the observation dates. + fs: The file system object used to check for file existence and retrieve file information. + + Returns: + tuple: A tuple containing the list of file paths and the total size of the files. + """ + import warnings + + # Loop through observation dates & check for files + nodd_file_list = [] + nodd_total_size = 0 + year_month_list = [] + for date in date_generated: + file_date = date.strftime("%Y%m%d") + year_month = file_date[:6] + if year_month not in year_month_list: + year_month_list.append(year_month) + + if satellite == "SNPP": + sat_name = "snpp" + elif satellite == "NOAA20": + sat_name = "noaa20" + file_name = "viirs_aod_monthly_" + sat_name + "_0.250_deg_" + year_month + ".nc" + prod_path = ( + "noaa-jpss/" + + satellite + + "/VIIRS/" + + satellite + + "_VIIRS_Aerosol_Optical_Depth_Gridded_Reprocessed/0.25_Degrees_Monthly/" + ) + # If file exists, add path to list and add file size to total + if fs.exists(prod_path + file_name) is True: + nodd_file_list.extend(fs.ls(prod_path + file_name)) + nodd_total_size = nodd_total_size + fs.size(prod_path + file_name) + else: + msg = "File does not exist on AWS: " + prod_path + file_name + if warning: + warnings.warn(msg, stacklevel=2) + nodd_file_list.append(None) + else: + raise ValueError(msg) + + return nodd_file_list, nodd_total_size + + +# Create list of available weekly data file paths & total size of files +def create_weekly_aod_list(satellite, date_generated, fs, warning=False): + """ + Creates a list of files and calculates the total size of files for a given satellite, observation dates, and file system. + + Parameters: + satellite (str): The satellite name. Can be 'SNPP' or 'NOAA20'. + date_generated (list): A list of observation dates. + fs (FileSystem): The file system object. + + Returns: + tuple: A tuple containing the list of files and the total size of files. + """ + # Loop through observation dates & check for files + nodd_file_list = [] + nodd_total_size = 0 + for date in date_generated: + file_date = date.strftime("%Y%m%d") + year = file_date[:4] + + prod_path = ( + "noaa-jpss/" + + satellite + + "/VIIRS/" + + satellite + + "_VIIRS_Aerosol_Optical_Depth_Gridded_Reprocessed/0.25_Degrees_Weekly/" + + year + + "/" + ) + # Get list of all files in given year on NODD + all_files = fs.ls(prod_path) + # Loop through files, check if file date falls within observation date range + for file in all_files: + file_start = file.split("/")[-1].split("_")[7].split(".")[0].split("-")[0] + file_end = file.split("/")[-1].split("_")[7].split(".")[0].split("-")[1] + # If file within observation range, add path to list and add file size to total + if file_date >= file_start and file_date <= file_end: + if file not in nodd_file_list: + nodd_file_list.append(file) + nodd_total_size = nodd_total_size + fs.size(file) + + return nodd_file_list, nodd_total_size + + +def open_dataset(date, satellite="SNPP", data_resolution=0.1, averaging_time="daily"): + """Load VIIRS AOD data from AWS + for the given date, satellite, data resolution, and averaging time. + + Parameters: + date (str or datetime-like): The date for which to open the dataset. + SNPP has data from 2012-01-19 to 2020-12-31. + NOAA20 has data from 2018-01-01 to 2020-12-31. + satellite (str): The satellite to retrieve data from. + Valid values are 'SNPP' or 'NOAA20'. + data_resolution (float or str, optional): The data resolution. + Valid values are '0.050', '0.100', or '0.250'. Defaults to 0.1°. + Only has an effect when `averaging_time` is 'daily'. + For 'weekly' and 'monthly' data, the resolution is always 0.25. + averaging_time (str, optional): The averaging time. + Valid values are 'daily', 'weekly', or 'monthly'. Defaults to 'daily'. + + Returns: + xarray.Dataset: The opened dataset. + + Raises: + ValueError: If the input parameters are invalid. + """ + import pandas as pd + import s3fs + import xarray as xr + + if satellite not in {"SNPP", "NOAA20"}: + raise ValueError( + f"Invalid input for 'satellite' {satellite!r}: Valid values are 'SNPP' or 'NOAA20'" + ) + + data_resolution_in = data_resolution + data_resolution = str(data_resolution).ljust(5, "0") + if data_resolution not in {"0.050", "0.100", "0.250"}: + raise ValueError( + f"Invalid input for 'data_resolution' {data_resolution_in!r}: " + "Valid values are '0.050', '0.100', or '0.250'" + ) + + if isinstance(date, str): + date_generated = [pd.Timestamp(date)] + else: + date_generated = [date] + + # Access AWS using anonymous credentials + fs = s3fs.S3FileSystem(anon=True) + + if averaging_time.lower() == "monthly": + file_list, _ = create_monthly_aod_list(satellite, date_generated, fs, warning=False) + elif averaging_time.lower() == "weekly": + file_list, _ = create_weekly_aod_list(satellite, date_generated, fs, warning=False) + elif averaging_time.lower() == "daily": + file_list, _ = create_daily_aod_list( + data_resolution, satellite, date_generated, fs, warning=False + ) + else: + raise ValueError( + f"Invalid input for 'averaging_time' {averaging_time!r}: " + "Valid values are 'daily', 'weekly', or 'monthly'" + ) + + if len(file_list) == 0 or all(f is None for f in file_list): + raise ValueError(f"Files not available for product and date: {date_generated[0]}") + + aws_file = fs.open(file_list[0]) + + dset = xr.open_dataset(aws_file) + + # Add datetime + dset = dset.expand_dims(time=date_generated) + + return dset + + +def open_mfdataset( + dates, satellite="SNPP", data_resolution=0.1, averaging_time="daily", error_missing=False +): + """ + Opens and combines multiple NetCDF files into a single xarray dataset. + + Parameters: + dates (pandas.DatetimeIndex): The dates for which to retrieve the data. + SNPP has data from 2012-01-19 to 2020-12-31. + NOAA20 has data from 2018-01-01 to 2020-12-31. + satellite (str): The satellite name. + Valid values are 'SNPP' or 'NOAA20'. + data_resolution (float or str, optional): The data resolution. + Valid values are '0.050', '0.100', or '0.250'. Defaults to 0.1°. + Only has an effect when `averaging_time` is 'daily'. + For 'weekly' and 'monthly' data, the resolution is always 0.25. + averaging_time (str, optional): The averaging time. + Valid values are 'daily', 'weekly', or 'monthly'. Defaults to 'daily'. + error_missing (bool, optional): If False (default), skip missing files with warning + and continue processing. Otherwise, raise an error. + + Returns: + xarray.Dataset: The combined dataset containing the data for the specified dates. + + Raises: + ValueError: If the input parameters are invalid. + """ + from collections.abc import Iterable + + import pandas as pd + import s3fs + import xarray as xr + + if satellite not in {"SNPP", "NOAA20"}: + raise ValueError( + f"Invalid input for 'satellite' {satellite!r}: Valid values are 'SNPP' or 'NOAA20'" + ) + + data_resolution_in = data_resolution + data_resolution = str(data_resolution).ljust(5, "0") + if data_resolution not in {"0.050", "0.100", "0.250"}: + raise ValueError( + f"Invalid input for 'data_resolution' {data_resolution_in!r}: " + "Valid values are '0.050', '0.100', or '0.250'" + ) + + if isinstance(dates, Iterable) and not isinstance(dates, str): + dates = pd.DatetimeIndex(dates) + else: + dates = pd.DatetimeIndex([dates]) + + # Access AWS using anonymous credentials + fs = s3fs.S3FileSystem(anon=True) + + if averaging_time.lower() == "monthly": + file_list, _ = create_monthly_aod_list(satellite, dates, fs, warning=not error_missing) + elif averaging_time.lower() == "weekly": + file_list, _ = create_weekly_aod_list(satellite, dates, fs, warning=not error_missing) + elif averaging_time.lower() == "daily": + file_list, _ = create_daily_aod_list( + data_resolution, satellite, dates, fs, warning=not error_missing + ) + else: + raise ValueError( + f"Invalid input for 'averaging_time' {averaging_time!r}: " + "Valid values are 'daily', 'weekly', or 'monthly'" + ) + + if len(file_list) == 0 or all(f is None for f in file_list): + raise ValueError(f"Files not available for product and dates: {dates}") + + if not len(file_list) == len(dates): + raise ValueError( + "'dates' and discovered file list are not the same length. " + "Consider the time frequency ('averaging_time') when constructing your dates input." + ) + + dates_good = [] + aws_files = [] + for d, f in zip(dates, file_list): + if f is not None: + aws_files.append(fs.open(f)) + dates_good.append(d) + + dset = xr.open_mfdataset(aws_files, concat_dim="time", combine="nested") + + dset["time"] = dates_good + + return dset diff --git a/monetio/sat/nesdis_viirs_ndvi_aws_gridded.py b/monetio/sat/nesdis_viirs_ndvi_aws_gridded.py index 5d0bc90b..096b200b 100644 --- a/monetio/sat/nesdis_viirs_ndvi_aws_gridded.py +++ b/monetio/sat/nesdis_viirs_ndvi_aws_gridded.py @@ -1,15 +1,16 @@ -def create_daily_vhi_list(date_generated, fs): +def create_daily_vhi_list(date_generated, fs, warning=False): """ Creates a list of daily vhi (Vegetative Health Index) files and calculates the total size of the files. Parameters: - - date_generated (list): A list of dates for which to check the existence of AOD files. - - fs (FileSystem): The file system object used to check file existence and size. + date_generated (list): A list of dates for which to check the existence of AOD files. + fs (FileSystem): The file system object used to check file existence and size. Returns: - - nodd_file_list (list): A list of paths to the existing AOD files. - - nodd_total_size (int): The total size of the existing AOD files in bytes. + tuple: A tuple containing the list of file paths and the total size of the files. """ + import warnings + # Loop through observation dates & check for files nodd_file_list = [] nodd_total_size = 0 @@ -17,33 +18,35 @@ def create_daily_vhi_list(date_generated, fs): file_date = date.strftime("%Y%m%d") year = file_date[:4] prod_path = "noaa-cdr-ndvi-pds/data/" + year + "/" - file_name = fs.glob(prod_path + "VIIRS-Land_*_" + file_date + "_*.nc") + file_names = fs.glob(prod_path + "VIIRS-Land_*_" + file_date + "_*.nc") # If file exists, add path to list and add file size to total - try: - if fs.exists(prod_path + file_name) is True: - nodd_file_list.extend(fs.ls(prod_path + file_name)) - nodd_total_size = nodd_total_size + fs.size(prod_path + file_name) + if file_names: + nodd_file_list.extend(file_names) + nodd_total_size = nodd_total_size + sum(fs.size(f) for f in file_names) + else: + msg = "File does not exist on AWS: " + prod_path + "VIIRS-Land_*_" + file_date + "_*.nc" + if warning: + warnings.warn(msg) + nodd_file_list.append(None) else: - raise ValueError - except ValueError: - print("File does not exist on AWS: " + prod_path + file_name) - return [], 0 + raise ValueError(msg) + return nodd_file_list, nodd_total_size def open_dataset(date): """ - Opens a dataset for the given date, satellite, data resolution, and averaging time. + Opens a dataset for the given date. Parameters: - date (str or datetime.datetime): The date for which to open the dataset. - averaging_time (str, optional): The averaging time. Valid values are 'daily', 'weekly', or 'monthly'. Defaults to 'daily'. + date (str or datetime-like): The date for which to open the dataset. + 1981--present are available. Returns: xarray.Dataset: The opened dataset. Raises: - ValueError: If the input values are invalid. + ValueError: If the input parameters are invalid. """ import pandas as pd import s3fs @@ -59,70 +62,51 @@ def open_dataset(date): file_list, _ = create_daily_vhi_list(date_generated, fs) - try: - if len(file_list) == 0: - raise ValueError - else: - aws_file = fs.open(file_list[0]) - except ValueError: - print("Files not available for product and date:", date_generated[0]) - return + if len(file_list) == 0 or all(f is None for f in file_list): + raise ValueError(f"Files not available for product and date: {date_generated[0]}") - dset = xr.open_dataset(aws_file) + aws_file = fs.open(file_list[0]) - # add datetime - # dset = dset.expand_dims(time=date_generated) + dset = xr.open_dataset(aws_file) return dset -def open_mfdataset(dates, download=False, save_path="./"): +def open_mfdataset(dates, error_missing=False): """ Opens and combines multiple NetCDF files into a single xarray dataset. Parameters: dates (pandas.DatetimeIndex): The dates for which to retrieve the data. - satellite (str): The satellite name. Valid values are 'SNPP', 'NOAA20', or 'both'. - data_resolution (str, optional): The data resolution. Valid values are '0.050', '0.100', or '0.250'. Defaults to '0.1'. - averaging_time (str, optional): The averaging time. Valid values are 'daily', 'weekly', or 'monthly'. Defaults to 'daily'. - download (bool, optional): Whether to download the data from AWS. Defaults to False. - save_path (str, optional): The path to save the downloaded data. Defaults to './'. - + error_missing (bool, optional): If False (default), skip missing files with warning + and continue processing. Otherwise, raise an error. Returns: xarray.Dataset: The combined dataset containing the data for the specified dates. Raises: ValueError: If the input parameters are invalid. - """ + from collections.abc import Iterable + import pandas as pd import s3fs import xarray as xr - try: - if not isinstance(dates, pd.DatetimeIndex): - raise ValueError("Expecting pandas.DatetimeIndex for 'dates' parameter.") - except ValueError: - print("Invalid input for 'dates': Expecting pandas.DatetimeIndex") - return + if isinstance(dates, Iterable) and not isinstance(dates, str): + dates = pd.DatetimeIndex(dates) + else: + dates = pd.DatetimeIndex([dates]) # Access AWS using anonymous credentials fs = s3fs.S3FileSystem(anon=True) - file_list, total_size = create_daily_vhi_list(dates, fs) + file_list, _ = create_daily_vhi_list(dates, fs, warning=not error_missing) - try: - if not file_list: - raise ValueError - aws_files = [] - for f in file_list: - aws_files.append(fs.open(f)) - except ValueError: - print("File not available for product and date") - return + if len(file_list) == 0 or all(f is None for f in file_list): + raise ValueError(f"Files not available for product and dates: {dates}") - dset = xr.open_mfdataset(aws_files, concat_dim={"time": dates}, combine="nested") + aws_files = [fs.open(f) for f in file_list if f is not None] - dset["time"] = dates + dset = xr.open_mfdataset(aws_files, concat_dim="time", combine="nested") return dset diff --git a/tests/test_viirs_ndvi.py b/tests/test_viirs_ndvi.py new file mode 100644 index 00000000..5975b146 --- /dev/null +++ b/tests/test_viirs_ndvi.py @@ -0,0 +1,49 @@ +import sys + +import pandas as pd +import pytest + +from monetio.sat.nesdis_viirs_ndvi_aws_gridded import open_dataset, open_mfdataset + +if sys.version_info < (3, 7): + pytest.skip("s3fs requires Python 3.7+", allow_module_level=True) + + +def test_open_dataset_no_data(): + with pytest.raises(ValueError, match="File does not exist on AWS:"): + open_dataset("1900-01-01") + + +def test_open_dataset(): + date = "2023-01-01" + ds = open_dataset(date) + assert set(ds.dims) >= {"time", "latitude", "longitude"} + assert ds.sizes["time"] == 1 + assert ds.sizes["latitude"] == 3600 + assert ds.sizes["longitude"] == 7200 + assert ds["time"] == pd.to_datetime(date) + assert "NDVI" in ds.data_vars + assert ds["NDVI"].dims == ("time", "latitude", "longitude") + + +def test_open_mfdataset(): + dates = ["2023-01-01", "2023-01-02"] + ds = open_mfdataset(dates) + assert (ds["time"] == pd.DatetimeIndex(dates)).all() + + +def test_open_mfdataset_error(): + dates = ["1900-01-01", "2023-01-01"] + + with pytest.warns(UserWarning, match="File does not exist on AWS:"): + ds = open_mfdataset(dates) + assert ds.sizes["time"] == 1 + assert ds["time"] == pd.to_datetime(dates[-1]) + + with pytest.raises(ValueError, match="File does not exist on AWS:"): + _ = open_mfdataset(dates, error_missing=True) + + with pytest.raises(ValueError, match="Files not available for product and dates"), pytest.warns( + UserWarning, match="File does not exist on AWS:" + ): + _ = open_mfdataset(dates[:1], error_missing=False) From 402067b2a19cb75fcecbe5d2745498749ca7be46 Mon Sep 17 00:00:00 2001 From: zmoon Date: Wed, 14 Aug 2024 15:50:04 -0500 Subject: [PATCH 38/49] AVHRR AOT --- monetio/sat/nesdis_avhrr_aot_aws_gridded.py | 163 +++++++++---------- monetio/sat/nesdis_viirs_ndvi_aws_gridded.py | 5 +- 2 files changed, 84 insertions(+), 84 deletions(-) diff --git a/monetio/sat/nesdis_avhrr_aot_aws_gridded.py b/monetio/sat/nesdis_avhrr_aot_aws_gridded.py index a511cdc8..f7a71eb3 100644 --- a/monetio/sat/nesdis_avhrr_aot_aws_gridded.py +++ b/monetio/sat/nesdis_avhrr_aot_aws_gridded.py @@ -1,15 +1,16 @@ -def create_daily_aod_list(date_generated, fs, fail_on_error=True): +def create_daily_aod_list(date_generated, fs, warning=False): """ Creates a list of daily AOD (Aerosol Optical Depth) files and calculates the total size of the files. Parameters: - - date_generated (list): A list of dates for which to check the existence of AOD files. - - fs (FileSystem): The file system object used to check file existence and size. + date_generated (list): A list of dates for which to check the existence of AOD files. + fs (FileSystem): The file system object used to check file existence and size. Returns: - - nodd_file_list (list): A list of paths to the existing AOD files. - - nodd_total_size (int): The total size of the existing AOD files in bytes. + tuple: A tuple containing the list of file paths and the total size of the files. """ + import warnings + # Loop through observation dates & check for files nodd_file_list = [] nodd_total_size = 0 @@ -17,51 +18,68 @@ def create_daily_aod_list(date_generated, fs, fail_on_error=True): file_date = date.strftime("%Y%m%d") year = file_date[:4] prod_path = "noaa-cdr-aerosol-optical-thickness-pds/data/daily/" + year + "/" - file_name = fs.glob(prod_path + "AOT_AVHRR_*_daily-avg_" + file_date + "_*.nc") + patt = "AOT_AVHRR_*_daily-avg_" + file_names = fs.glob(prod_path + patt + file_date + "_*.nc") # If file exists, add path to list and add file size to total - print(file_name) - if fs.exists(file_name[0]) is True: - nodd_file_list.append(file_name[0]) - nodd_total_size = nodd_total_size + fs.size(file_name[0]) + if file_names: + nodd_file_list.extend(file_names) + nodd_total_size = nodd_total_size + sum(fs.size(f) for f in file_names) + else: + msg = "File does not exist on AWS: " + prod_path + patt + file_date + "_*.nc" + if warning: + warnings.warn(msg) + nodd_file_list.append(None) + else: + raise ValueError(msg) + return nodd_file_list, nodd_total_size -def create_monthly_aod_list(date_generated, fs): +def create_monthly_aod_list(date_generated, fs, warning=False): """ Creates a list of daily AOD (Aerosol Optical Depth) files and calculates the total size of the files. Parameters: - - data_resolution (str): The resolution of the AOD data. - - satellite (str): The satellite name. Can be 'both', 'SNPP', or 'NOAA20'. - - date_generated (list): A list of dates for which to check the existence of AOD files. - - fs (FileSystem): The file system object used to check file existence and size. + date_generated (list): A list of dates for which to check the existence of AOD files. + fs (FileSystem): The file system object used to check file existence and size. Returns: - - nodd_file_list (list): A list of paths to the existing AOD files. - - nodd_total_size (int): The total size of the existing AOD files in bytes. + tuple: A tuple containing the list of file paths and the total size of the files. """ + import warnings + # Loop through observation dates & check for files nodd_file_list = [] nodd_total_size = 0 for date in date_generated: file_date = date.strftime("%Y%m%d") year = file_date[:4] - prod_path = "noaa-cdr-aerosol-optical-thickness-pds/data/daily/" + year + "/" - file_name = fs.glob(prod_path + "AOT_AVHRR_*_daily-avg_" + file_date + "_*.nc") + prod_path = "noaa-cdr-aerosol-optical-thickness-pds/data/monthly/" + year + "/" + patt = "AOT_AVHRR_*_daily-avg_" + file_names = fs.glob(prod_path + patt + file_date + "_*.nc") # If file exists, add path to list and add file size to total - if fs.exists(file_name[0]) is True: - nodd_file_list.append(file_name[0]) - nodd_total_size = nodd_total_size + fs.size(file_name[0]) + if file_names: + nodd_file_list.extend(file_names) + nodd_total_size = nodd_total_size + sum(fs.size(f) for f in file_names) + else: + msg = "File does not exist on AWS: " + prod_path + patt + file_date + "_*.nc" + if warning: + warnings.warn(msg) + nodd_file_list.append(None) + else: + raise ValueError(msg) + return nodd_file_list, nodd_total_size -def open_dataset(date, averaging_time="daily", download=False, save_path="./"): +def open_dataset(date, averaging_time="daily"): """ Opens a dataset for the given date, satellite, data resolution, and averaging time. Parameters: date (str or datetime.datetime): The date for which to open the dataset. - averaging_time (str, optional): The averaging time. Valid values are 'daily', 'weekly', or 'monthly'. Defaults to 'daily'. + averaging_time (str, optional): The averaging time. + Valid values are 'daily', or 'monthly'. Defaults to 'daily'. Returns: xarray.Dataset: The opened dataset. @@ -81,45 +99,34 @@ def open_dataset(date, averaging_time="daily", download=False, save_path="./"): # Access AWS using anonymous credentials fs = s3fs.S3FileSystem(anon=True) - try: - if averaging_time.lower() == "monthly": - file_list, _ = create_monthly_aod_list(date_generated, fs) - elif averaging_time.lower() == "daily": - file_list, _ = create_daily_aod_list(date_generated, fs) - else: - raise ValueError - except ValueError: - print("Invalid input for 'averaging_time': Valid values are 'daily' or 'monthly'") - return - - try: - if len(file_list) == 0: - raise ValueError - else: - aws_file = fs.open(file_list[0]) - except ValueError: - print("Files not available for product and date:", date_generated[0]) - return + if averaging_time.lower() == "monthly": + file_list, _ = create_monthly_aod_list(date_generated, fs) + elif averaging_time.lower() == "daily": + file_list, _ = create_daily_aod_list(date_generated, fs) + else: + raise ValueError( + f"Invalid input for 'averaging_time' {averaging_time!r}: " + "Valid values are 'daily' or 'monthly'" + ) - dset = xr.open_dataset(aws_file) + if len(file_list) == 0 or all(f is None for f in file_list): + raise ValueError(f"Files not available for product and date: {date_generated[0]}") + + aws_file = fs.open(file_list[0]) - # add datetime - # dset = dset.expand_dims(time=date_generated) + dset = xr.open_dataset(aws_file) return dset -def open_mfdataset(dates, averaging_time="daily", download=False, save_path="./"): +def open_mfdataset(dates, averaging_time="daily", error_missing=False): """ Opens and combines multiple NetCDF files into a single xarray dataset. Parameters: dates (pandas.DatetimeIndex): The dates for which to retrieve the data. - satellite (str): The satellite name. Valid values are 'SNPP', 'NOAA20', or 'both'. - data_resolution (str, optional): The data resolution. Valid values are '0.050', '0.100', or '0.250'. Defaults to '0.1'. - averaging_time (str, optional): The averaging time. Valid values are 'daily', 'weekly', or 'monthly'. Defaults to 'daily'. - download (bool, optional): Whether to download the data from AWS. Defaults to False. - save_path (str, optional): The path to save the downloaded data. Defaults to './'. + averaging_time (str, optional): The averaging time. + Valid values are 'daily', 'weekly', or 'monthly'. Defaults to 'daily'. Returns: xarray.Dataset: The combined dataset containing the data for the specified dates. @@ -128,43 +135,35 @@ def open_mfdataset(dates, averaging_time="daily", download=False, save_path="./" ValueError: If the input parameters are invalid. """ + from collections.abc import Iterable + import pandas as pd import s3fs import xarray as xr - try: - if not isinstance(dates, pd.DatetimeIndex): - raise ValueError("Expecting pandas.DatetimeIndex for 'dates' parameter.") - except ValueError: - print("Invalid input for 'dates': Expecting pandas.DatetimeIndex") - return + if isinstance(dates, Iterable) and not isinstance(dates, str): + dates = pd.DatetimeIndex(dates) + else: + dates = pd.DatetimeIndex([dates]) # Access AWS using anonymous credentials fs = s3fs.S3FileSystem(anon=True) - try: - if averaging_time.lower() == "monthly": - file_list, _ = create_monthly_aod_list(dates, fs) - elif averaging_time.lower() == "daily": - file_list, _ = create_daily_aod_list(dates, fs) - else: - raise ValueError - except ValueError: - print("Invalid input for 'averaging_time': Valid values are 'daily' or 'monthly'") - return - - try: - if not file_list: - raise ValueError - aws_files = [] - for f in file_list: - aws_files.append(fs.open(f)) - except ValueError: - print("File not available for product and date") - return - - dset = xr.open_mfdataset(aws_files, concat_dim={"time": dates}, combine="nested") - - dset["time"] = dates + if averaging_time.lower() == "monthly": + file_list, _ = create_monthly_aod_list(dates, fs, warning=not error_missing) + elif averaging_time.lower() == "daily": + file_list, _ = create_daily_aod_list(dates, fs, warning=not error_missing) + else: + raise ValueError( + f"Invalid input for 'averaging_time' {averaging_time!r}: " + "Valid values are 'daily' or 'monthly'" + ) + + if len(file_list) == 0 or all(f is None for f in file_list): + raise ValueError(f"Files not available for product and dates: {dates}") + + aws_files = [fs.open(f) for f in file_list if f is not None] + + dset = xr.open_mfdataset(aws_files, concat_dim="time", combine="nested") return dset diff --git a/monetio/sat/nesdis_viirs_ndvi_aws_gridded.py b/monetio/sat/nesdis_viirs_ndvi_aws_gridded.py index 096b200b..79140a94 100644 --- a/monetio/sat/nesdis_viirs_ndvi_aws_gridded.py +++ b/monetio/sat/nesdis_viirs_ndvi_aws_gridded.py @@ -18,13 +18,14 @@ def create_daily_vhi_list(date_generated, fs, warning=False): file_date = date.strftime("%Y%m%d") year = file_date[:4] prod_path = "noaa-cdr-ndvi-pds/data/" + year + "/" - file_names = fs.glob(prod_path + "VIIRS-Land_*_" + file_date + "_*.nc") + patt = "VIIRS-Land_*_" + file_names = fs.glob(prod_path + patt + file_date + "_*.nc") # If file exists, add path to list and add file size to total if file_names: nodd_file_list.extend(file_names) nodd_total_size = nodd_total_size + sum(fs.size(f) for f in file_names) else: - msg = "File does not exist on AWS: " + prod_path + "VIIRS-Land_*_" + file_date + "_*.nc" + msg = "File does not exist on AWS: " + prod_path + patt + file_date + "_*.nc" if warning: warnings.warn(msg) nodd_file_list.append(None) From 45679c42e11caa11a84aaac88323b0e5dec245f6 Mon Sep 17 00:00:00 2001 From: zmoon Date: Wed, 14 Aug 2024 15:57:23 -0500 Subject: [PATCH 39/49] Drop decode error culprit ds.TIMEOFDAY [25920000 values with dtype=datetime64[ns]] Coordinates: * latitude (latitude) float32 89.97 89.93 89.88 ... -89.88 -89.93 -89.97 * longitude (longitude) float32 -180.0 -179.9 -179.9 ... 179.9 179.9 180.0 * time (time) datetime64[ns] 2023-01-01 Attributes: scale_factor: 0.01 add_offset: 0.0 long_name: Time since Start of Data Day valid_range: [ 0 2399] _FillValue: -9999 grid_mapping: crs --- monetio/sat/nesdis_viirs_ndvi_aws_gridded.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/monetio/sat/nesdis_viirs_ndvi_aws_gridded.py b/monetio/sat/nesdis_viirs_ndvi_aws_gridded.py index 79140a94..50895a55 100644 --- a/monetio/sat/nesdis_viirs_ndvi_aws_gridded.py +++ b/monetio/sat/nesdis_viirs_ndvi_aws_gridded.py @@ -68,7 +68,7 @@ def open_dataset(date): aws_file = fs.open(file_list[0]) - dset = xr.open_dataset(aws_file) + dset = xr.open_dataset(aws_file, drop_variables=["TIMEOFDAY"]) return dset @@ -108,6 +108,11 @@ def open_mfdataset(dates, error_missing=False): aws_files = [fs.open(f) for f in file_list if f is not None] - dset = xr.open_mfdataset(aws_files, concat_dim="time", combine="nested") + dset = xr.open_mfdataset( + aws_files, + concat_dim="time", + combine="nested", + drop_variables=["TIMEOFDAY"], + ) return dset From f88a7fec5cc42e80505cba1f3924eef72b0a2ad9 Mon Sep 17 00:00:00 2001 From: zmoon Date: Thu, 15 Aug 2024 10:11:12 -0500 Subject: [PATCH 40/49] Hopefully avoid TIMEOFDAY warnings --- monetio/sat/nesdis_viirs_ndvi_aws_gridded.py | 26 ++++++++++++++++++-- tests/test_viirs_ndvi.py | 12 +++++++++ 2 files changed, 36 insertions(+), 2 deletions(-) diff --git a/monetio/sat/nesdis_viirs_ndvi_aws_gridded.py b/monetio/sat/nesdis_viirs_ndvi_aws_gridded.py index 50895a55..746a363e 100644 --- a/monetio/sat/nesdis_viirs_ndvi_aws_gridded.py +++ b/monetio/sat/nesdis_viirs_ndvi_aws_gridded.py @@ -68,7 +68,18 @@ def open_dataset(date): aws_file = fs.open(file_list[0]) - dset = xr.open_dataset(aws_file, drop_variables=["TIMEOFDAY"]) + dset = xr.open_dataset(aws_file, decode_cf=False) + + # Deal with TIMEOFDAY variable manually to avoid warnings + m = dset["TIMEOFDAY"].attrs.pop("scale_factor") # 0.01 + b = dset["TIMEOFDAY"].attrs.pop("add_offset") # 0 + fv = dset["TIMEOFDAY"].attrs.pop("_FillValue") # -9999 + dset["TIMEOFDAY"] = dset["TIMEOFDAY"] * m + b + dset["TIMEOFDAY"].attrs.update(units="hours") # -> auto timedelta conversion + dset = xr.decode_cf(dset) + dset["TIMEOFDAY"] = dset["TIMEOFDAY"].where( + dset["TIMEOFDAY"] != pd.Timedelta(fv * m + b, unit="hours") + ) return dset @@ -112,7 +123,18 @@ def open_mfdataset(dates, error_missing=False): aws_files, concat_dim="time", combine="nested", - drop_variables=["TIMEOFDAY"], + decode_cf=False, + ) + + # Deal with TIMEOFDAY variable manually to avoid warnings + m = dset["TIMEOFDAY"].attrs.pop("scale_factor") # 0.01 + b = dset["TIMEOFDAY"].attrs.pop("add_offset") # 0 + fv = dset["TIMEOFDAY"].attrs.pop("_FillValue") # -9999 + dset["TIMEOFDAY"] = dset["TIMEOFDAY"] * m + b + dset["TIMEOFDAY"].attrs.update(units="hours") # -> auto timedelta conversion + dset = xr.decode_cf(dset) + dset["TIMEOFDAY"] = dset["TIMEOFDAY"].where( + dset["TIMEOFDAY"] != pd.Timedelta(fv * m + b, unit="hours") ) return dset diff --git a/tests/test_viirs_ndvi.py b/tests/test_viirs_ndvi.py index 5975b146..b599f53c 100644 --- a/tests/test_viirs_ndvi.py +++ b/tests/test_viirs_ndvi.py @@ -1,5 +1,6 @@ import sys +import numpy as np import pandas as pd import pytest @@ -17,14 +18,25 @@ def test_open_dataset_no_data(): def test_open_dataset(): date = "2023-01-01" ds = open_dataset(date) + assert set(ds.dims) >= {"time", "latitude", "longitude"} assert ds.sizes["time"] == 1 assert ds.sizes["latitude"] == 3600 assert ds.sizes["longitude"] == 7200 assert ds["time"] == pd.to_datetime(date) + assert "NDVI" in ds.data_vars assert ds["NDVI"].dims == ("time", "latitude", "longitude") + assert np.abs(ds["NDVI"]).max().item() < 5 + q = 0.02 + a, b = ds["NDVI"].quantile((q, 1 - q)).values + assert -1 < a < b < 1 + + assert ds["TIMEOFDAY"].isnull().sum() > 0 + assert ds["TIMEOFDAY"].to_series().min() >= pd.Timedelta("0h") + assert ds["TIMEOFDAY"].to_series().max() < pd.Timedelta("24h") + def test_open_mfdataset(): dates = ["2023-01-01", "2023-01-02"] From 957aa4a7da53685de3e8a93fba1f1f2ab2ae9c19 Mon Sep 17 00:00:00 2001 From: zmoon Date: Thu, 15 Aug 2024 10:12:45 -0500 Subject: [PATCH 41/49] AVHRR AOT test --- tests/test_avhrr_aot.py | 49 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 tests/test_avhrr_aot.py diff --git a/tests/test_avhrr_aot.py b/tests/test_avhrr_aot.py new file mode 100644 index 00000000..3ced3050 --- /dev/null +++ b/tests/test_avhrr_aot.py @@ -0,0 +1,49 @@ +import sys + +import pandas as pd +import pytest + +from monetio.sat.nesdis_avhrr_aot_aws_gridded import open_dataset, open_mfdataset + +if sys.version_info < (3, 7): + pytest.skip("s3fs requires Python 3.7+", allow_module_level=True) + + +def test_open_dataset_no_data(): + with pytest.raises(ValueError, match="File does not exist on AWS:"): + open_dataset("1900-01-01") + + +def test_open_dataset(): + date = "2023-01-01" + ds = open_dataset(date) + assert set(ds.dims) >= {"time", "latitude", "longitude"} + assert ds.sizes["time"] == 1 + assert ds.sizes["latitude"] == 1800 + assert ds.sizes["longitude"] == 3600 + assert ds["time"] == pd.to_datetime(date) + assert "aot1" in ds.data_vars + assert ds["aot1"].dims == ("time", "latitude", "longitude") + + +def test_open_mfdataset(): + dates = ["2023-01-01", "2023-01-02"] + ds = open_mfdataset(dates) + assert (ds["time"] == pd.DatetimeIndex(dates)).all() + + +def test_open_mfdataset_error(): + dates = ["1900-01-01", "2023-01-01"] + + with pytest.warns(UserWarning, match="File does not exist on AWS:"): + ds = open_mfdataset(dates) + assert ds.sizes["time"] == 1 + assert ds["time"] == pd.to_datetime(dates[-1]) + + with pytest.raises(ValueError, match="File does not exist on AWS:"): + _ = open_mfdataset(dates, error_missing=True) + + with pytest.raises(ValueError, match="Files not available for product and dates"), pytest.warns( + UserWarning, match="File does not exist on AWS:" + ): + _ = open_mfdataset(dates[:1], error_missing=False) From ddd6fca9efe32491a01b9d21050ce6af914c4357 Mon Sep 17 00:00:00 2001 From: zmoon Date: Thu, 15 Aug 2024 12:08:03 -0500 Subject: [PATCH 42/49] NESDIS VIIRS AOD NRT --- monetio/sat/nesdis_eps_viirs_aod_nrt.py | 239 +++++++++----------- monetio/sat/nesdis_viirs_aod_aws_gridded.py | 2 +- 2 files changed, 110 insertions(+), 131 deletions(-) diff --git a/monetio/sat/nesdis_eps_viirs_aod_nrt.py b/monetio/sat/nesdis_eps_viirs_aod_nrt.py index 9138f3f3..fe094e2f 100644 --- a/monetio/sat/nesdis_eps_viirs_aod_nrt.py +++ b/monetio/sat/nesdis_eps_viirs_aod_nrt.py @@ -1,10 +1,7 @@ import pandas as pd -server = "ftp.star.nesdis.noaa.gov" -base_dir = "/pub/smcd/VIIRS_Aerosol/npp.viirs.aerosol.data/epsaot550/" - -def build_urls(dates, *, daily=True, res=0.1, sat="noaa20"): +def build_urls(dates, *, daily=True, data_resolution=0.1, satellite="NOAA20"): """Construct URLs for downloading NEPS data. Parameters @@ -12,11 +9,11 @@ def build_urls(dates, *, daily=True, res=0.1, sat="noaa20"): dates : pd.DatetimeIndex or iterable of datetime Dates to download data for. daily : bool, optional - Whether to download daily (default) or sub-daily data. - res : float, optional - Resolution of data in km, only used for sub-daily data. - sat : str, optional - Satellite platform, only used for sub-daily data. + Whether to download daily (default) or monthly data. + data_resolution : float, optional + Resolution of data in degrees (0.1 or 0.25). + satellite : str, optional + Satellite platform, 'SNPP' or 'NOAA20'. Returns ------- @@ -27,92 +24,78 @@ def build_urls(dates, *, daily=True, res=0.1, sat="noaa20"): ----- The `res` and `sat` parameters are only used for sub-daily data. """ - + import warnings from collections.abc import Iterable - if isinstance(dates, Iterable): + if isinstance(dates, Iterable) and not isinstance(dates, str): dates = pd.DatetimeIndex(dates) else: dates = pd.DatetimeIndex([dates]) + if daily: dates = dates.floor("D").unique() else: # monthly - dates = dates.floor("m").unique() - sat = sat.lower() + dates = dates.to_period("M").to_timestamp().unique() + + if data_resolution != 0.25 and not daily: + warnings.warn( + "Monthly data is only available at 0.25 deg resolution, " + f"got 'data_resolution' {data_resolution!r}" + ) + + sat_dirname = satellite.lower() + if satellite.upper() == "SNPP": + sat = "npp" if daily else "snpp" + elif satellite.upper() == "NOAA20": + sat = "noaa20" + res = str(data_resolution).ljust(5, "0") + aod_dirname = "aod/eps" if daily else "aod_monthly" + urls = [] fnames = [] + print("Building VIIRS URLs...") - base_url = f"https://www.star.nesdis.noaa.gov/pub/smcd/VIIRS_Aerosol/viirs_aerosol_gridded_data/{sat}/aod/eps/" - if sat == "snpp": - sat = "npp" - for dt in dates: + base_url = ( + "https://www.star.nesdis.noaa.gov/pub/smcd/VIIRS_Aerosol/viirs_aerosol_gridded_data/" + f"{sat_dirname}/{aod_dirname}/" + ) + + for date in dates: if daily: - fname = "viirs_eps_{}_aod_{}_deg_{}_nrt.nc".format( - sat, str(res).ljust(5, "0"), dt.strftime("%Y%m%d") + fname = "{}/viirs_eps_{}_aod_{}_deg_{}_nrt.nc".format( + date.strftime("%Y"), + sat, + res, + date.strftime("%Y%m%d"), ) - url = base_url + dt.strftime(r"%Y/") + fname + else: + fname = "viirs_aod_monthly_{}_{}_deg_{}_nrt.nc".format( + sat, + res, + date.strftime("%Y%m"), + ) + url = base_url + fname urls.append(url) fnames.append(fname) # Note: files needed for comparison urls = pd.Series(urls, index=None) fnames = pd.Series(fnames, index=None) - return urls, fnames - - -def check_remote_file_exists(file_url): - import requests - r = requests.head(file_url, stream=True, verify=False) - - if r.status_code == 200: - _ = next(r.iter_content(10)) - return True - else: - print(f"HTTP Error {r.status_code} - {r.reason}") - return False - - -def retrieve(url, fname): - """Download files from the airnowtech S3 server. - - Parameters - ---------- - url : string - Description of parameter `url`. - fname : string - Description of parameter `fname`. - - Returns - ------- - None - - """ - import os - - import requests - - if not os.path.isfile(fname): - print("\n Retrieving: " + fname) - print(url) - print("\n") - r = requests.get(url) - r.raise_for_status() - with open(fname, "wb") as f: - f.write(r.content) - else: - print("\n File Exists: " + fname) + return urls, fnames -def open_dataset(date, satellite="noaa20", res=0.1, daily=True, add_timestamp=True): +def open_dataset(date, *, satellite="NOAA20", data_resolution=0.1, daily=True): """ Parameters ---------- - datestr : str or datetime-like + date : str or datetime-like The date for which to open the dataset. - 2022-10-29 to current is available. """ + from io import BytesIO + import pandas as pd + import requests import xarray as xr if not isinstance(date, pd.Timestamp): @@ -120,75 +103,71 @@ def open_dataset(date, satellite="noaa20", res=0.1, daily=True, add_timestamp=Tr else: d = date - try: - if satellite.lower() not in ("noaa20", "snpp"): - raise ValueError - elif satellite.lower() == "noaa20": - sat = "noaa20" - else: - sat = "snpp" - except ValueError: - print("Invalid input for 'sat': Valid values are 'noaa20' or 'snpp'") - - # if (res != 0.1) or (res != 0.25): - # res = 0.1 # assume resolution is 0.1 if wrong value supplied - - urls, fnames = build_urls(d, sat=sat, res=res, daily=daily) - url = urls.values[0] - fname = fnames.values[0] - - try: - if check_remote_file_exists(url) is False: - raise ValueError - except ValueError: - print("File does not exist on NOAA HTTPS server.", url) - return ValueError - retrieve(url, fname) - - dset = xr.open_dataset(fname) - - if add_timestamp: - dset["time"] = d - dset = dset.expand_dims("time") - dset = dset.set_coords(["time"]) + if satellite.lower() not in ("noaa20", "snpp"): + raise ValueError( + f"Invalid input for 'satellite' {satellite!r}: " "Valid values are 'NOAA20' or 'SNPP'" + ) + + if data_resolution not in {0.1, 0.25}: + raise ValueError( + f"Invalid input for 'data_resolution' {data_resolution!r}: " + "Valid values are 0.1 or 0.25" + ) + + urls, _ = build_urls(d, satellite=satellite, data_resolution=data_resolution, daily=daily) + + r = requests.get(urls[0], stream=True) + r.raise_for_status() + dset = xr.open_dataset(BytesIO(r.content)) + + dset = dset.expand_dims(time=[d]).set_coords(["time"]) + return dset -def open_mfdataset(dates, satellite="noaa20", res=0.1, daily=True): +def open_mfdataset(dates, satellite="NOAA20", data_resolution=0.1, daily=True, error_missing=False): + import warnings + from collections.abc import Iterable + from io import BytesIO + import pandas as pd + import requests import xarray as xr - try: - if isinstance(dates, pd.DatetimeIndex): - d = dates - else: - raise TypeError - except TypeError: - print("Please provide a pandas.DatetimeIndex") - return - - try: - if satellite.lower() not in ("noaa20", "snpp"): - raise ValueError - elif satellite.lower() == "noaa20": - sat = "noaa20" + if isinstance(dates, Iterable) and not isinstance(dates, str): + dates = pd.DatetimeIndex(dates) + else: + dates = pd.DatetimeIndex([dates]) + + if satellite.lower() not in ("noaa20", "snpp"): + raise ValueError( + f"Invalid input for 'satellite' {satellite!r}: " "Valid values are 'NOAA20' or 'SNPP'" + ) + + if data_resolution not in {0.1, 0.25}: + raise ValueError( + f"Invalid input for 'data_resolution' {data_resolution!r}: " + "Valid values are 0.1 or 0.25" + ) + + urls, _ = build_urls(dates, satellite=satellite, data_resolution=data_resolution, daily=daily) + + dsets = [] + for url, date in zip(urls, dates): + r = requests.get(url, stream=True) + if r.status_code != 200: + msg = f"Failed to access file on NESDIS FTP server: {url}" + if error_missing: + raise RuntimeError(msg) + else: + warnings.warn(msg) else: - sat = "snpp" - except ValueError: - print("Invalid input for 'sat': Valid values are 'noaa20' or 'snpp'") - - urls, fnames = build_urls(d, sat=sat, res=res, daily=daily) - - for url, fname in zip(urls, fnames): - try: - if check_remote_file_exists(url) is False: - raise ValueError - except ValueError: - print("File does not exist on NOAA HTTPS server.", url) - return - retrieve(url, fname) - - dset = xr.open_mfdataset(fnames, combine="nested", concat_dim={"time": d}) - dset["time"] = d + ds = xr.open_dataset(BytesIO(r.content)).expand_dims(time=[date]).set_coords(["time"]) + dsets.append(ds) + + if len(dsets) == 0: + raise ValueError(f"Files not available for product and dates: {dates}") + + dset = xr.concat(dsets, dim="time") return dset diff --git a/monetio/sat/nesdis_viirs_aod_aws_gridded.py b/monetio/sat/nesdis_viirs_aod_aws_gridded.py index 1b25500c..23e74a7a 100644 --- a/monetio/sat/nesdis_viirs_aod_aws_gridded.py +++ b/monetio/sat/nesdis_viirs_aod_aws_gridded.py @@ -219,7 +219,7 @@ def open_dataset(date, satellite="SNPP", data_resolution=0.1, averaging_time="da dset = xr.open_dataset(aws_file) # Add datetime - dset = dset.expand_dims(time=date_generated) + dset = dset.expand_dims(time=date_generated).set_coords(["time"]) return dset From 1d3fd7ce1355bcf2202ec819edb31cefa6159ee7 Mon Sep 17 00:00:00 2001 From: zmoon Date: Thu, 15 Aug 2024 12:16:32 -0500 Subject: [PATCH 43/49] Add NESDIS VIIRS AOD NRT tests --- tests/test_viirs_aod_nrt.py | 86 +++++++++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) create mode 100644 tests/test_viirs_aod_nrt.py diff --git a/tests/test_viirs_aod_nrt.py b/tests/test_viirs_aod_nrt.py new file mode 100644 index 00000000..4fb6a189 --- /dev/null +++ b/tests/test_viirs_aod_nrt.py @@ -0,0 +1,86 @@ +import warnings + +import pandas as pd +import pytest + +from monetio.sat.nesdis_eps_viirs_aod_nrt import open_dataset, open_mfdataset + +NOW = pd.Timestamp.now("UTC") +TODAY = NOW.floor("D") + +with warnings.catch_warnings(): + warnings.filterwarnings( + "ignore", + message="Converting to Period representation will drop timezone information.", + ) + THIS_MONTH = TODAY.to_period("M").to_timestamp() + +LAST_MONTH = THIS_MONTH - pd.DateOffset(months=1) +LAST_LAST_MONTH = LAST_MONTH - pd.DateOffset(months=1) + + +@pytest.mark.parametrize("res", [0.25, 0.1]) +@pytest.mark.parametrize("sat", ["NOAA20", "SNPP"]) +def test_open_dataset_daily(sat, res): + # Note: only NRT + date = (TODAY - pd.Timedelta(days=2)).tz_localize(None) + ds = open_dataset(date, satellite=sat, data_resolution=res) + + assert date.strftime(r"%Y%m%d") in ds.attrs["dataset_name"] + assert ds.attrs["spatial_resolution"].strip() == f"{res:.2f} degree" + assert ds.attrs["satellite_name"] == ("Suomi NPP" if sat == "SNPP" else "NOAA 20") + + assert set(ds.dims) == {"time", "lat", "lon"} + assert ds.sizes["time"] == 1 + assert ds.sizes["lat"] == int(180 / res) + assert ds.sizes["lon"] == int(360 / res) + assert (ds.time == pd.DatetimeIndex([date])).all() + assert "AOD550" in ds.data_vars + + +@pytest.mark.parametrize("sat", ["NOAA20", "SNPP"]) +def test_open_dataset_monthly(sat): + # Seems like only one is stored + if NOW - THIS_MONTH.tz_localize("UTC") > pd.Timedelta(hours=12): + date = LAST_MONTH + else: + date = LAST_LAST_MONTH + + ds = open_dataset(date, satellite=sat, daily=False, data_resolution=0.25) + assert ds.sizes["time"] == 1 + + +def test_open_mfdataset(): + today = TODAY.tz_localize(None) + dates = [today - pd.Timedelta(days=2), today - pd.Timedelta(days=3)] + ds = open_mfdataset(dates) + assert ds.sizes["time"] == len(dates) + + +def test_missing_date(): + from requests.exceptions import HTTPError + + with pytest.raises(HTTPError): + open_dataset("1900-01-01") + + +def test_missing_date_mf(): + # No dsets collected + with pytest.raises(ValueError, match="Files not available for product and dates"), pytest.warns( + UserWarning, match="Failed to access file" + ): + open_mfdataset("1900-01-01") + + # Error during dsets collection + with pytest.raises(RuntimeError, match="Failed to access file"): + open_mfdataset("1900-01-01", error_missing=True) + + one_good = ["1900-01-01", TODAY.tz_localize(None) - pd.Timedelta(days=2)] + with pytest.warns(UserWarning, match="Failed to access file"): + ds = open_mfdataset(one_good) + assert ds.sizes["time"] == 1 + + with pytest.raises(RuntimeError, match="Failed to access file"), pytest.warns( + UserWarning, match="Failed to access file" + ): + open_mfdataset(one_good, error_missing=True) From 2dbf8d6e489e6e6c7436c3494c34b13c55e961cb Mon Sep 17 00:00:00 2001 From: zmoon Date: Thu, 15 Aug 2024 12:25:54 -0500 Subject: [PATCH 44/49] Remove warns --- tests/test_viirs_aod_nrt.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/test_viirs_aod_nrt.py b/tests/test_viirs_aod_nrt.py index 4fb6a189..d07e8608 100644 --- a/tests/test_viirs_aod_nrt.py +++ b/tests/test_viirs_aod_nrt.py @@ -80,7 +80,5 @@ def test_missing_date_mf(): ds = open_mfdataset(one_good) assert ds.sizes["time"] == 1 - with pytest.raises(RuntimeError, match="Failed to access file"), pytest.warns( - UserWarning, match="Failed to access file" - ): + with pytest.raises(RuntimeError, match="Failed to access file"): open_mfdataset(one_good, error_missing=True) From b754cad71bb31b5cb993a9f7fab4f6d3dce6bf3b Mon Sep 17 00:00:00 2001 From: zmoon Date: Thu, 15 Aug 2024 12:43:55 -0500 Subject: [PATCH 45/49] Sort / most sat aren't in top-level --- monetio/__init__.py | 4 ---- monetio/sat/__init__.py | 13 ++++++------- 2 files changed, 6 insertions(+), 11 deletions(-) diff --git a/monetio/__init__.py b/monetio/__init__.py index 23e157ac..cb14bf8b 100644 --- a/monetio/__init__.py +++ b/monetio/__init__.py @@ -39,10 +39,6 @@ # # satellite obs "goes", - "nesdis_eps_viirs_aod_nrt", - "nesdis_viirs_aod_aws_gridded", - "nesdis_avhrr_aot_aws_gridded", - "nesdis_viirs_ndvi_aws_gridded", # # models "camx", diff --git a/monetio/sat/__init__.py b/monetio/sat/__init__.py index 111ba8cd..660a4059 100644 --- a/monetio/sat/__init__.py +++ b/monetio/sat/__init__.py @@ -1,4 +1,4 @@ -from . import ( # nesdis_avhrr_aot_aws_gridded, +from . import ( _gridded_eos_mm, _modis_l2_mm, _mopitt_l3_mm, @@ -7,6 +7,7 @@ _tropomi_l2_no2_mm, goes, modis_ornl, + nesdis_avhrr_aot_aws_gridded, nesdis_eps_viirs_aod_nrt, nesdis_frp, nesdis_viirs_aod_aws_gridded, @@ -19,16 +20,14 @@ "_mopitt_l3_mm", "_omps_l3_mm", "_omps_nadir_mm", - "nesdis_viirs_aod_aws_gridded", - "nesdis_avhrr_aot_aws_gridded", - "nesdis_viirs_ndvi_aws_gridded", - "nesdis_eps_viirs_aod_nrt", "_tropomi_l2_no2_mm", "goes", "modis_ornl", - "nesdis_edr_viirs", - "nesdis_eps_viirs", + "nesdis_avhrr_aot_aws_gridded", + "nesdis_eps_viirs_aod_nrt", "nesdis_frp", + "nesdis_viirs_aod_aws_gridded", + "nesdis_viirs_ndvi_aws_gridded", ] __name__ = "sat" From 79dd79c60fd5f6a32b71039553f5013ff3677ad8 Mon Sep 17 00:00:00 2001 From: bbakernoaa Date: Tue, 19 Nov 2024 10:08:26 -0500 Subject: [PATCH 46/49] updates --- monetio/sat/nesdis_avhrr_aot_aws_gridded.py | 132 +++-- monetio/sat/nesdis_viirs_aod_aws_gridded.py | 512 +++++++++++-------- monetio/sat/nesdis_viirs_ndvi_aws_gridded.py | 317 ++++++++---- 3 files changed, 617 insertions(+), 344 deletions(-) diff --git a/monetio/sat/nesdis_avhrr_aot_aws_gridded.py b/monetio/sat/nesdis_avhrr_aot_aws_gridded.py index f7a71eb3..75da18cc 100644 --- a/monetio/sat/nesdis_avhrr_aot_aws_gridded.py +++ b/monetio/sat/nesdis_avhrr_aot_aws_gridded.py @@ -1,31 +1,105 @@ -def create_daily_aod_list(date_generated, fs, warning=False): +""" +NOAA Climate Data Record (CDR) Aerosol Optical Depth (AOD) Dataset Access Module + +This module provides access to NOAA's satellite-derived Aerosol Optical Depth data: + +Aerosol Optical Depth (AOD): + - Source: NOAA CDR AVHRR AOT (Aerosol Optical Thickness) + - Period: 1981-present + - Sensor: Advanced Very High Resolution Radiometer (AVHRR) + - Resolution: 0.1° x 0.1° (approximately 11km at equator) + - Coverage: Global over ocean + - Temporal Resolution: + * Daily averages + * Monthly averages + - Key Variables: + * aot_550: Aerosol Optical Thickness at 550nm + * number_of_retrievals: Number of valid retrievals + * quality_flags: Quality assurance flags + - AWS Path: noaa-cdr-aerosol-optical-thickness-pds/ + +Dataset Description: + The AVHRR AOT CDR provides a consistent, long-term record of aerosol optical + thickness over global oceans. This parameter is crucial for: + - Climate change studies + - Atmospheric correction + - Air quality monitoring + - Radiative forcing calculations + +Data Access: + Files are stored in NetCDF format on AWS S3, organized by: + - Daily data: /data/daily/YYYY/ + - Monthly data: /data/monthly/YYYY/ + +Usage: + >>> # Single date access (daily) + >>> dataset = open_dataset("2023-01-01") + + >>> # Monthly data + >>> dataset = open_dataset("2023-01-01", averaging_time=AveragingTime.MONTHLY) + + >>> # Multiple dates + >>> dates = pd.date_range("2023-01-01", "2023-01-10") + >>> dataset_multi = open_mfdataset(dates) + +References: + - Dataset Documentation: https://www.ncdc.noaa.gov/cdr/atmospheric/aerosol-optical-thickness + - Algorithm Theoretical Basis Document (ATBD): + https://www.ncdc.noaa.gov/cdr/atmospheric/aerosol-optical-thickness/documentation + +Notes: + - Data is only available over ocean surfaces + - Quality flags should be consulted for optimal data usage + - Monthly averages are computed from daily data +""" +from s3fs import S3FileSystem +from enum import Enum +from datetime import datetime +from pathlib import Path +import warnings +import pandas as pd +import s3fs +import xarray as xr + +AOD_BASE_PATH = "noaa-cdr-aerosol-optical-thickness-pds/data/daily" +AOD_FILE_PATTERN = "AOT_AVHRR_*_daily-avg_" + +class AveragingTime(Enum): + DAILY = "daily" + MONTHLY = "monthly" + +def create_daily_aod_list( + date_generated: List[datetime], + fs: S3FileSystem, + warning: bool = False +) -> Tuple[List[str], int]: """ Creates a list of daily AOD (Aerosol Optical Depth) files and calculates the total size of the files. Parameters: date_generated (list): A list of dates for which to check the existence of AOD files. fs (FileSystem): The file system object used to check file existence and size. + warning (bool, optional): If True, warns instead of raising error when file not found. Defaults to False. Returns: - tuple: A tuple containing the list of file paths and the total size of the files. + tuple[list[str | None], int]: A tuple containing: + - List of file paths (str) or None for missing files if warning=True + - Total size of the files in bytes """ - import warnings - # Loop through observation dates & check for files nodd_file_list = [] nodd_total_size = 0 for date in date_generated: file_date = date.strftime("%Y%m%d") year = file_date[:4] - prod_path = "noaa-cdr-aerosol-optical-thickness-pds/data/daily/" + year + "/" - patt = "AOT_AVHRR_*_daily-avg_" - file_names = fs.glob(prod_path + patt + file_date + "_*.nc") + prod_path = Path(AOD_BASE_PATH) / year + file_names = fs.glob(str(prod_path / f"{AOD_FILE_PATTERN}{file_date}_*.nc")) # If file exists, add path to list and add file size to total if file_names: nodd_file_list.extend(file_names) - nodd_total_size = nodd_total_size + sum(fs.size(f) for f in file_names) + nodd_total_size += sum(fs.size(f) for f in file_names) else: - msg = "File does not exist on AWS: " + prod_path + patt + file_date + "_*.nc" + msg = f"File does not exist on AWS: {prod_path}/{AOD_FILE_PATTERN}{file_date}_*.nc" if warning: warnings.warn(msg) nodd_file_list.append(None) @@ -34,7 +108,6 @@ def create_daily_aod_list(date_generated, fs, warning=False): return nodd_file_list, nodd_total_size - def create_monthly_aod_list(date_generated, fs, warning=False): """ Creates a list of daily AOD (Aerosol Optical Depth) files and calculates the total size of the files. @@ -46,8 +119,6 @@ def create_monthly_aod_list(date_generated, fs, warning=False): Returns: tuple: A tuple containing the list of file paths and the total size of the files. """ - import warnings - # Loop through observation dates & check for files nodd_file_list = [] nodd_total_size = 0 @@ -60,7 +131,7 @@ def create_monthly_aod_list(date_generated, fs, warning=False): # If file exists, add path to list and add file size to total if file_names: nodd_file_list.extend(file_names) - nodd_total_size = nodd_total_size + sum(fs.size(f) for f in file_names) + nodd_total_size += sum(fs.size(f) for f in file_names) else: msg = "File does not exist on AWS: " + prod_path + patt + file_date + "_*.nc" if warning: @@ -71,8 +142,10 @@ def create_monthly_aod_list(date_generated, fs, warning=False): return nodd_file_list, nodd_total_size - -def open_dataset(date, averaging_time="daily"): +def open_dataset( + date: Union[str, datetime], + averaging_time: AveragingTime = AveragingTime.DAILY +) -> xr.Dataset: """ Opens a dataset for the given date, satellite, data resolution, and averaging time. @@ -87,10 +160,6 @@ def open_dataset(date, averaging_time="daily"): Raises: ValueError: If the input values are invalid. """ - import pandas as pd - import s3fs - import xarray as xr - if isinstance(date, str): date_generated = [pd.Timestamp(date)] else: @@ -99,9 +168,9 @@ def open_dataset(date, averaging_time="daily"): # Access AWS using anonymous credentials fs = s3fs.S3FileSystem(anon=True) - if averaging_time.lower() == "monthly": + if averaging_time == AveragingTime.MONTHLY: file_list, _ = create_monthly_aod_list(date_generated, fs) - elif averaging_time.lower() == "daily": + elif averaging_time == AveragingTime.DAILY: file_list, _ = create_daily_aod_list(date_generated, fs) else: raise ValueError( @@ -112,14 +181,12 @@ def open_dataset(date, averaging_time="daily"): if len(file_list) == 0 or all(f is None for f in file_list): raise ValueError(f"Files not available for product and date: {date_generated[0]}") - aws_file = fs.open(file_list[0]) - - dset = xr.open_dataset(aws_file) + with fs.open(file_list[0]) as aws_file: + dset = xr.open_dataset(aws_file) return dset - -def open_mfdataset(dates, averaging_time="daily", error_missing=False): +def open_mfdataset(dates, averaging_time: AveragingTime = AveragingTime.DAILY, error_missing=False): """ Opens and combines multiple NetCDF files into a single xarray dataset. @@ -137,10 +204,6 @@ def open_mfdataset(dates, averaging_time="daily", error_missing=False): """ from collections.abc import Iterable - import pandas as pd - import s3fs - import xarray as xr - if isinstance(dates, Iterable) and not isinstance(dates, str): dates = pd.DatetimeIndex(dates) else: @@ -149,9 +212,9 @@ def open_mfdataset(dates, averaging_time="daily", error_missing=False): # Access AWS using anonymous credentials fs = s3fs.S3FileSystem(anon=True) - if averaging_time.lower() == "monthly": + if averaging_time == AveragingTime.MONTHLY: file_list, _ = create_monthly_aod_list(dates, fs, warning=not error_missing) - elif averaging_time.lower() == "daily": + elif averaging_time == AveragingTime.DAILY: file_list, _ = create_daily_aod_list(dates, fs, warning=not error_missing) else: raise ValueError( @@ -164,6 +227,5 @@ def open_mfdataset(dates, averaging_time="daily", error_missing=False): aws_files = [fs.open(f) for f in file_list if f is not None] - dset = xr.open_mfdataset(aws_files, concat_dim="time", combine="nested") - - return dset + with xr.open_mfdataset(aws_files, concat_dim="time", combine="nested") as dset: + return dset diff --git a/monetio/sat/nesdis_viirs_aod_aws_gridded.py b/monetio/sat/nesdis_viirs_aod_aws_gridded.py index 23e74a7a..ec13ab1e 100644 --- a/monetio/sat/nesdis_viirs_aod_aws_gridded.py +++ b/monetio/sat/nesdis_viirs_aod_aws_gridded.py @@ -1,306 +1,388 @@ -def create_daily_aod_list(data_resolution, satellite, date_generated, fs, warning=False): +""" +NOAA VIIRS Aerosol Optical Depth (AOD) Dataset Access Module + +This module provides access to NOAA's VIIRS-derived Aerosol Optical Depth data: + +Data Products: + 1. Daily AOD: + - Resolution options: 0.05°, 0.10°, 0.25° + - Coverage: Global over ocean + - Variables: AOD at 550nm, quality flags + - Path: noaa-jpss/{satellite}/VIIRS/{resolution}_Degrees_Daily/ + + 2. Weekly AOD: + - Fixed resolution: 0.25° + - Coverage: Global over ocean + - Variables: Weekly averaged AOD + - Path: noaa-jpss/{satellite}/VIIRS/0.25_Degrees_Weekly/ + + 3. Monthly AOD: + - Fixed resolution: 0.25° + - Coverage: Global over ocean + - Variables: Monthly averaged AOD + - Path: noaa-jpss/{satellite}/VIIRS/0.25_Degrees_Monthly/ + +Satellites: + - SNPP: Data available from 2012-01-19 to present + - NOAA20: Data available from 2018-01-01 to present + +References: + - VIIRS AOD Algorithm: https://www.star.nesdis.noaa.gov/jpss/documents/ATBD/ATBD_EPS_Aerosol_AOD_v3.0.1.pdf + - Data Access: https://www.avl.class.noaa.gov/saa/products/welcome +""" + +from typing import List, Tuple, Union +from datetime import datetime +import warnings +import pandas as pd +import s3fs +import xarray as xr +from enum import Enum +from functools import lru_cache +from pathlib import Path + +class AveragingTime(str, Enum): + """Enumeration of valid averaging time periods.""" + DAILY = "daily" + WEEKLY = "weekly" + MONTHLY = "monthly" + +class Satellite(str, Enum): + """Enumeration of valid satellites.""" + SNPP = "SNPP" + NOAA20 = "NOAA20" + +# Configuration dictionary for data products +PRODUCT_CONFIG = { + AveragingTime.DAILY: { + "path_template": "noaa-jpss/{satellite}/VIIRS/{satellite}_VIIRS_Aerosol_Optical_Depth_Gridded_Reprocessed/{resolution}_Degrees_Daily/{year}/", + "file_template": "viirs_eps_{sat_name}_aod_{resolution}_deg_{date}.nc", + "resolutions": {"0.050", "0.100", "0.250"}, + }, + AveragingTime.WEEKLY: { + "path_template": "noaa-jpss/{satellite}/VIIRS/{satellite}_VIIRS_Aerosol_Optical_Depth_Gridded_Reprocessed/0.25_Degrees_Weekly/{year}/", + "resolutions": {"0.250"}, + }, + AveragingTime.MONTHLY: { + "path_template": "noaa-jpss/{satellite}/VIIRS/{satellite}_VIIRS_Aerosol_Optical_Depth_Gridded_Reprocessed/0.25_Degrees_Monthly/", + "file_template": "viirs_aod_monthly_{sat_name}_0.250_deg_{date}.nc", + "resolutions": {"0.250"}, + } +} + +@lru_cache(maxsize=128) +def _get_satellite_name(satellite: str) -> str: + """Get the lowercase satellite name used in file paths.""" + return "npp" if satellite == "SNPP" else "noaa20" + +def validate_inputs(satellite: str, data_resolution: str, averaging_time: str) -> None: """ - Creates a list of daily AOD (Aerosol Optical Depth) files and calculates the total size of the files. + Validate input parameters. - Parameters: - data_resolution (str): The resolution of the AOD data. - satellite (str): The satellite name. Can be 'SNPP' or 'NOAA20'. - date_generated (list): A list of dates for which to check the existence of AOD files. - fs (FileSystem): The file system object used to check file existence and size. + Args: + satellite: Satellite name + data_resolution: Data resolution + averaging_time: Averaging period + + Raises: + ValueError: If inputs are invalid + """ + if satellite not in {s.value for s in Satellite}: + raise ValueError(f"Invalid satellite: {satellite}. Must be one of {list(Satellite)}") + + if averaging_time not in {t.value for t in AveragingTime}: + raise ValueError(f"Invalid averaging_time: {averaging_time}. Must be one of {list(AveragingTime)}") + + if data_resolution not in PRODUCT_CONFIG[averaging_time]["resolutions"]: + raise ValueError( + f"Invalid resolution {data_resolution} for {averaging_time} data. " + f"Valid resolutions: {PRODUCT_CONFIG[averaging_time]['resolutions']}" + ) + +def create_daily_aod_list( + data_resolution: str, + satellite: str, + date_generated: List[datetime], + fs: s3fs.S3FileSystem, + warning: bool = False +) -> Tuple[List[str], int]: + """ + Creates a list of daily AOD files and calculates their total size. + + Args: + data_resolution: Data resolution + satellite: Satellite name + date_generated: List of dates to process + fs: S3 filesystem object + warning: Whether to warn instead of raise errors Returns: - tuple: A tuple containing the list of file paths and the total size of the files. + Tuple of (file_list, total_size) """ - import warnings + validate_inputs(satellite, data_resolution, AveragingTime.DAILY) + + file_list = [] + total_size = 0 + sat_name = _get_satellite_name(satellite) + config = PRODUCT_CONFIG[AveragingTime.DAILY] - # Loop through observation dates & check for files - nodd_file_list = [] - nodd_total_size = 0 for date in date_generated: file_date = date.strftime("%Y%m%d") year = file_date[:4] - if satellite == "SNPP": - sat_name = "npp" - elif satellite == "NOAA20": - sat_name = "noaa20" - file_name = ( - "viirs_eps_" + sat_name + "_aod_" + data_resolution + "_deg_" + file_date + ".nc" + file_name = config["file_template"].format( + sat_name=sat_name, + resolution=data_resolution, + date=file_date ) - prod_path = ( - "noaa-jpss/" - + satellite - + "/VIIRS/" - + satellite - + "_VIIRS_Aerosol_Optical_Depth_Gridded_Reprocessed/" - + data_resolution[:4] - + "_Degrees_Daily/" - + year - + "/" + + prod_path = config["path_template"].format( + satellite=satellite, + resolution=data_resolution[:4], + year=year ) - # If file exists, add path to list and add file size to total - if fs.exists(prod_path + file_name) is True: - nodd_file_list.extend(fs.ls(prod_path + file_name)) - nodd_total_size = nodd_total_size + fs.size(prod_path + file_name) + + full_path = prod_path + file_name + + if fs.exists(full_path): + file_list.extend(fs.ls(full_path)) + total_size += fs.size(full_path) else: - msg = "File does not exist on AWS: " + prod_path + file_name + msg = f"File does not exist: {full_path}" if warning: warnings.warn(msg, stacklevel=2) - nodd_file_list.append(None) + file_list.append(None) else: raise ValueError(msg) - return nodd_file_list, nodd_total_size + return file_list, total_size - -# Create list of available monthly data file paths & total size of files -def create_monthly_aod_list(satellite, date_generated, fs, warning=False): +def create_monthly_aod_list( + satellite: str, + date_generated: List[datetime], + fs: s3fs.S3FileSystem, + warning: bool = False +) -> Tuple[List[str], int]: """ - Creates a list of monthly AOD (Aerosol Optical Depth) files for a given satellite and date range. + Creates a list of monthly AOD files and calculates their total size. Args: - satellite (str): The satellite name. Can be 'SNPP' or 'NOAA20'. - date_generated (list): A list of datetime objects representing the observation dates. - fs: The file system object used to check for file existence and retrieve file information. + satellite: Satellite name + date_generated: List of dates to process + fs: S3 filesystem object + warning: Whether to warn instead of raise errors Returns: - tuple: A tuple containing the list of file paths and the total size of the files. + Tuple of (file_list, total_size) """ - import warnings + validate_inputs(satellite, "0.250", AveragingTime.MONTHLY) + + file_list = [] + total_size = 0 + processed_months = set() + sat_name = _get_satellite_name(satellite) + config = PRODUCT_CONFIG[AveragingTime.MONTHLY] - # Loop through observation dates & check for files - nodd_file_list = [] - nodd_total_size = 0 - year_month_list = [] for date in date_generated: - file_date = date.strftime("%Y%m%d") - year_month = file_date[:6] - if year_month not in year_month_list: - year_month_list.append(year_month) - - if satellite == "SNPP": - sat_name = "snpp" - elif satellite == "NOAA20": - sat_name = "noaa20" - file_name = "viirs_aod_monthly_" + sat_name + "_0.250_deg_" + year_month + ".nc" - prod_path = ( - "noaa-jpss/" - + satellite - + "/VIIRS/" - + satellite - + "_VIIRS_Aerosol_Optical_Depth_Gridded_Reprocessed/0.25_Degrees_Monthly/" - ) - # If file exists, add path to list and add file size to total - if fs.exists(prod_path + file_name) is True: - nodd_file_list.extend(fs.ls(prod_path + file_name)) - nodd_total_size = nodd_total_size + fs.size(prod_path + file_name) - else: - msg = "File does not exist on AWS: " + prod_path + file_name - if warning: - warnings.warn(msg, stacklevel=2) - nodd_file_list.append(None) - else: - raise ValueError(msg) + year_month = date.strftime("%Y%m") + if year_month in processed_months: + continue + + processed_months.add(year_month) + file_name = config["file_template"].format( + sat_name=sat_name, + date=year_month + ) - return nodd_file_list, nodd_total_size + prod_path = config["path_template"].format(satellite=satellite) + full_path = prod_path + file_name + + if fs.exists(full_path): + file_list.extend(fs.ls(full_path)) + total_size += fs.size(full_path) + else: + msg = f"File does not exist: {full_path}" + if warning: + warnings.warn(msg, stacklevel=2) + file_list.append(None) + else: + raise ValueError(msg) + return file_list, total_size -# Create list of available weekly data file paths & total size of files -def create_weekly_aod_list(satellite, date_generated, fs, warning=False): +def create_weekly_aod_list( + satellite: str, + date_generated: List[datetime], + fs: s3fs.S3FileSystem, + warning: bool = False +) -> Tuple[List[str], int]: """ - Creates a list of files and calculates the total size of files for a given satellite, observation dates, and file system. + Creates a list of weekly AOD files and calculates their total size. - Parameters: - satellite (str): The satellite name. Can be 'SNPP' or 'NOAA20'. - date_generated (list): A list of observation dates. - fs (FileSystem): The file system object. + Args: + satellite: Satellite name + date_generated: List of dates to process + fs: S3 filesystem object + warning: Whether to warn instead of raise errors Returns: - tuple: A tuple containing the list of files and the total size of files. + Tuple of (file_list, total_size) """ - # Loop through observation dates & check for files - nodd_file_list = [] - nodd_total_size = 0 + validate_inputs(satellite, "0.250", AveragingTime.WEEKLY) + + file_list = [] + total_size = 0 + config = PRODUCT_CONFIG[AveragingTime.WEEKLY] + for date in date_generated: file_date = date.strftime("%Y%m%d") year = file_date[:4] - prod_path = ( - "noaa-jpss/" - + satellite - + "/VIIRS/" - + satellite - + "_VIIRS_Aerosol_Optical_Depth_Gridded_Reprocessed/0.25_Degrees_Weekly/" - + year - + "/" + prod_path = config["path_template"].format( + satellite=satellite, + year=year ) - # Get list of all files in given year on NODD - all_files = fs.ls(prod_path) - # Loop through files, check if file date falls within observation date range - for file in all_files: - file_start = file.split("/")[-1].split("_")[7].split(".")[0].split("-")[0] - file_end = file.split("/")[-1].split("_")[7].split(".")[0].split("-")[1] - # If file within observation range, add path to list and add file size to total - if file_date >= file_start and file_date <= file_end: - if file not in nodd_file_list: - nodd_file_list.append(file) - nodd_total_size = nodd_total_size + fs.size(file) - - return nodd_file_list, nodd_total_size - - -def open_dataset(date, satellite="SNPP", data_resolution=0.1, averaging_time="daily"): - """Load VIIRS AOD data from AWS - for the given date, satellite, data resolution, and averaging time. - - Parameters: - date (str or datetime-like): The date for which to open the dataset. - SNPP has data from 2012-01-19 to 2020-12-31. - NOAA20 has data from 2018-01-01 to 2020-12-31. - satellite (str): The satellite to retrieve data from. + + try: + all_files = fs.ls(prod_path) + for file in all_files: + file_name = Path(file).name + date_range = file_name.split("_")[7].split(".")[0] + file_start, file_end = date_range.split("-") + + if file_start <= file_date <= file_end and file not in file_list: + file_list.append(file) + total_size += fs.size(file) + except Exception as e: + if warning: + warnings.warn(str(e), stacklevel=2) + else: + raise ValueError(str(e)) + + return file_list, total_size + +def open_dataset( + date: Union[str, datetime], + satellite: str = "SNPP", + data_resolution: Union[float, str] = 0.1, + averaging_time: str = "daily" +) -> xr.Dataset: + """ + Load VIIRS AOD data from AWS for the given parameters. + + Args: + date: The date for which to open the dataset. + SNPP has data from 2012-01-19 to present. + NOAA20 has data from 2018-01-01 to present. + satellite: The satellite to retrieve data from. Valid values are 'SNPP' or 'NOAA20'. - data_resolution (float or str, optional): The data resolution. + data_resolution: The data resolution. Valid values are '0.050', '0.100', or '0.250'. Defaults to 0.1°. - Only has an effect when `averaging_time` is 'daily'. - For 'weekly' and 'monthly' data, the resolution is always 0.25. - averaging_time (str, optional): The averaging time. - Valid values are 'daily', 'weekly', or 'monthly'. Defaults to 'daily'. + Only has effect when averaging_time is 'daily'. + For 'weekly' and 'monthly' data, resolution is always 0.25. + averaging_time: The averaging time period. + Valid values are 'daily', 'weekly', or 'monthly'. Returns: xarray.Dataset: The opened dataset. Raises: - ValueError: If the input parameters are invalid. + ValueError: If input parameters are invalid. """ - import pandas as pd - import s3fs - import xarray as xr - - if satellite not in {"SNPP", "NOAA20"}: - raise ValueError( - f"Invalid input for 'satellite' {satellite!r}: Valid values are 'SNPP' or 'NOAA20'" - ) - - data_resolution_in = data_resolution - data_resolution = str(data_resolution).ljust(5, "0") - if data_resolution not in {"0.050", "0.100", "0.250"}: - raise ValueError( - f"Invalid input for 'data_resolution' {data_resolution_in!r}: " - "Valid values are '0.050', '0.100', or '0.250'" - ) + validate_inputs(satellite, str(data_resolution).ljust(5, "0"), averaging_time) if isinstance(date, str): date_generated = [pd.Timestamp(date)] else: date_generated = [date] - # Access AWS using anonymous credentials fs = s3fs.S3FileSystem(anon=True) - if averaging_time.lower() == "monthly": - file_list, _ = create_monthly_aod_list(satellite, date_generated, fs, warning=False) - elif averaging_time.lower() == "weekly": - file_list, _ = create_weekly_aod_list(satellite, date_generated, fs, warning=False) - elif averaging_time.lower() == "daily": - file_list, _ = create_daily_aod_list( - data_resolution, satellite, date_generated, fs, warning=False - ) - else: - raise ValueError( - f"Invalid input for 'averaging_time' {averaging_time!r}: " - "Valid values are 'daily', 'weekly', or 'monthly'" - ) + # Get file list based on averaging time + if averaging_time == AveragingTime.MONTHLY: + file_list, _ = create_monthly_aod_list(satellite, date_generated, fs) + elif averaging_time == AveragingTime.WEEKLY: + file_list, _ = create_weekly_aod_list(satellite, date_generated, fs) + else: # daily + data_resolution = str(data_resolution).ljust(5, "0") + file_list, _ = create_daily_aod_list(data_resolution, satellite, date_generated, fs) if len(file_list) == 0 or all(f is None for f in file_list): - raise ValueError(f"Files not available for product and date: {date_generated[0]}") + raise ValueError(f"Files not available for {averaging_time} data and date: {date_generated[0]}") - aws_file = fs.open(file_list[0]) - - dset = xr.open_dataset(aws_file) - - # Add datetime + # Open and process dataset + dset = xr.open_dataset(fs.open(file_list[0])) dset = dset.expand_dims(time=date_generated).set_coords(["time"]) return dset - def open_mfdataset( - dates, satellite="SNPP", data_resolution=0.1, averaging_time="daily", error_missing=False -): + dates: Union[pd.DatetimeIndex, datetime, str], + satellite: str = "SNPP", + data_resolution: Union[float, str] = 0.1, + averaging_time: str = "daily", + error_missing: bool = False +) -> xr.Dataset: """ - Opens and combines multiple NetCDF files into a single xarray dataset. + Opens and combines multiple NetCDF files into a single dataset. - Parameters: - dates (pandas.DatetimeIndex): The dates for which to retrieve the data. - SNPP has data from 2012-01-19 to 2020-12-31. - NOAA20 has data from 2018-01-01 to 2020-12-31. - satellite (str): The satellite name. + Args: + dates: The dates for which to retrieve the data. + SNPP has data from 2012-01-19 to present. + NOAA20 has data from 2018-01-01 to present. + satellite: The satellite name. Valid values are 'SNPP' or 'NOAA20'. - data_resolution (float or str, optional): The data resolution. + data_resolution: The data resolution. Valid values are '0.050', '0.100', or '0.250'. Defaults to 0.1°. - Only has an effect when `averaging_time` is 'daily'. - For 'weekly' and 'monthly' data, the resolution is always 0.25. - averaging_time (str, optional): The averaging time. - Valid values are 'daily', 'weekly', or 'monthly'. Defaults to 'daily'. - error_missing (bool, optional): If False (default), skip missing files with warning + Only has effect when averaging_time is 'daily'. + For 'weekly' and 'monthly' data, resolution is always 0.25. + averaging_time: The averaging time period. + Valid values are 'daily', 'weekly', or 'monthly'. + error_missing: If False (default), skip missing files with warning and continue processing. Otherwise, raise an error. Returns: - xarray.Dataset: The combined dataset containing the data for the specified dates. + xarray.Dataset: The combined dataset for specified dates. Raises: - ValueError: If the input parameters are invalid. + ValueError: If input parameters are invalid. """ - from collections.abc import Iterable + # Validate inputs + validate_inputs(satellite, str(data_resolution).ljust(5, "0"), averaging_time) - import pandas as pd - import s3fs - import xarray as xr - - if satellite not in {"SNPP", "NOAA20"}: - raise ValueError( - f"Invalid input for 'satellite' {satellite!r}: Valid values are 'SNPP' or 'NOAA20'" - ) - - data_resolution_in = data_resolution - data_resolution = str(data_resolution).ljust(5, "0") - if data_resolution not in {"0.050", "0.100", "0.250"}: - raise ValueError( - f"Invalid input for 'data_resolution' {data_resolution_in!r}: " - "Valid values are '0.050', '0.100', or '0.250'" - ) - - if isinstance(dates, Iterable) and not isinstance(dates, str): - dates = pd.DatetimeIndex(dates) - else: + # Convert dates to DatetimeIndex + if isinstance(dates, (str, datetime)): dates = pd.DatetimeIndex([dates]) + elif not isinstance(dates, pd.DatetimeIndex): + dates = pd.DatetimeIndex(dates) - # Access AWS using anonymous credentials fs = s3fs.S3FileSystem(anon=True) - if averaging_time.lower() == "monthly": - file_list, _ = create_monthly_aod_list(satellite, dates, fs, warning=not error_missing) - elif averaging_time.lower() == "weekly": - file_list, _ = create_weekly_aod_list(satellite, dates, fs, warning=not error_missing) - elif averaging_time.lower() == "daily": + # Get file list based on averaging time + if averaging_time == AveragingTime.MONTHLY: + file_list, _ = create_monthly_aod_list( + satellite, dates, fs, warning=not error_missing + ) + elif averaging_time == AveragingTime.WEEKLY: + file_list, _ = create_weekly_aod_list( + satellite, dates, fs, warning=not error_missing + ) + else: # daily + data_resolution = str(data_resolution).ljust(5, "0") file_list, _ = create_daily_aod_list( data_resolution, satellite, dates, fs, warning=not error_missing ) - else: - raise ValueError( - f"Invalid input for 'averaging_time' {averaging_time!r}: " - "Valid values are 'daily', 'weekly', or 'monthly'" - ) if len(file_list) == 0 or all(f is None for f in file_list): - raise ValueError(f"Files not available for product and dates: {dates}") + raise ValueError(f"Files not available for {averaging_time} data and dates: {dates}") if not len(file_list) == len(dates): raise ValueError( "'dates' and discovered file list are not the same length. " - "Consider the time frequency ('averaging_time') when constructing your dates input." + f"Check your dates input for {averaging_time} frequency." ) + # Process valid files and dates dates_good = [] aws_files = [] for d, f in zip(dates, file_list): @@ -308,8 +390,8 @@ def open_mfdataset( aws_files.append(fs.open(f)) dates_good.append(d) + # Combine datasets dset = xr.open_mfdataset(aws_files, concat_dim="time", combine="nested") - dset["time"] = dates_good - return dset + return dset \ No newline at end of file diff --git a/monetio/sat/nesdis_viirs_ndvi_aws_gridded.py b/monetio/sat/nesdis_viirs_ndvi_aws_gridded.py index 746a363e..a03cc8c1 100644 --- a/monetio/sat/nesdis_viirs_ndvi_aws_gridded.py +++ b/monetio/sat/nesdis_viirs_ndvi_aws_gridded.py @@ -1,124 +1,264 @@ -def create_daily_vhi_list(date_generated, fs, warning=False): +""" +NOAA Climate Data Record (CDR) and Near Real-Time (NRT) Dataset Access Module + +This module provides access to various NOAA satellite-derived environmental datasets: + +1. Vegetation Health Index (VHI): + - Available from both VIIRS (2012-present) and AVHRR (1981-2012) sensors + - Source: NOAA CDR + - Resolution: 4km global + - Frequency: Daily + - Variables: NDVI, VCI, TCI, VHI + - AWS Path: noaa-cdr-vegetation-health-pds/ + +2. Leaf Area Index (LAI) and Fraction of Photosynthetically Active Radiation (FPAR): + - Available from VIIRS sensor (2012-present) + - Source: NOAA VIIRS + - Resolution: 500m global + - Frequency: Daily + - Variables: LAI, FPAR + - AWS Path: noaa-viirs-lai-fpar/ + +3. Snow Cover Extent: + - Available from Interactive Multisensor Snow and Ice Mapping System (IMS) + - Source: NOAA CDR + - Resolution: 4km Northern Hemisphere + - Frequency: Daily + - Variables: Snow Cover, Sea Ice + - AWS Path: noaa-cdr-snow-cover-extent-ims-nrt/ + +Data Access: + All datasets are accessed through AWS S3 buckets in NetCDF format. + Files are organized by year and contain daily observations. + +Usage: + >>> # Single date access + >>> dataset = open_dataset("2023-01-01", data_type="vhi", sensor="viirs") + + >>> # Multiple dates + >>> dates = pd.date_range("2023-01-01", "2023-01-10") + >>> dataset_multi = open_mfdataset(dates, data_type="snow", sensor="ims") + + >>> # Historical AVHRR data + >>> dataset_avhrr = open_dataset("2000-01-01", data_type="vhi", sensor="avhrr") + +References: + - VHI: https://www.ncdc.noaa.gov/cdr/terrestrial/vegetation-health + - LAI/FPAR: https://www.star.nesdis.noaa.gov/jpss/EDRs/products_Vegetation.php + - Snow Cover: https://www.ncdc.noaa.gov/snow-and-ice/snow-cover + +Note: + This module requires active internet connection and access to AWS S3 buckets. + Some datasets might have temporal gaps or missing data. +""" +from typing import List, Tuple, Union +from datetime import datetime +import warnings +import pandas as pd +import s3fs +import xarray as xr +from functools import lru_cache + +# Configuration dictionary for different data products +DATA_CONFIGS = { + "vhi": { + "viirs": { + "path": "noaa-cdr-ndvi-pds/data/", + "pattern": "VIIRS-Land_*_" + }, + "avhrr": { + "path": "noaa-cdr-vegetation-health-pds/data/", + "pattern": "AVHRR-Land_*_" + } + }, + "lai_fpar": { + "viirs": { + "path": "noaa-viirs-lai-fpar/data/", + "pattern": "VIIRS-Land-LAI-FPAR_*_" + } + }, + "snow": { + "ims": { + "path": "noaa-cdr-snow-cover-extent-ims-nrt/", + "pattern": "snow_cover_extent_*_" + } + } +} + +def validate_inputs(date_generated: List[datetime], data_type: str, sensor: str) -> None: + """ + Validates input parameters. + + Args: + date_generated: List of dates to process + data_type: Type of data product + sensor: Sensor type + + Raises: + ValueError: If inputs are invalid + """ + if data_type not in DATA_CONFIGS: + raise ValueError(f"Unsupported data type: {data_type}. Available types: {list(DATA_CONFIGS.keys())}") + + if sensor not in DATA_CONFIGS[data_type]: + raise ValueError( + f"Unsupported sensor '{sensor}' for data type '{data_type}'. " + f"Available sensors: {list(DATA_CONFIGS[data_type].keys())}" + ) + +@lru_cache(maxsize=128) +def _get_cached_file_list(year: str, prod_path: str, pattern: str, file_date: str) -> List[str]: + """ + Cached version of file listing to improve performance for repeated requests. + """ + fs = s3fs.S3FileSystem(anon=True) + return fs.glob(f"{prod_path}{year}/{pattern}{file_date}_*.nc") + +def create_daily_data_list( + date_generated: List[datetime], + fs: s3fs.S3FileSystem, + data_type: str = "vhi", + sensor: str = "viirs", + warning: bool = False +) -> Tuple[List[str], int]: """ - Creates a list of daily vhi (Vegetative Health Index) files and calculates the total size of the files. + Creates a list of daily data files and calculates the total size of the files. - Parameters: - date_generated (list): A list of dates for which to check the existence of AOD files. - fs (FileSystem): The file system object used to check file existence and size. + Args: + date_generated: List of dates to process + fs: S3 filesystem object + data_type: Type of data product + sensor: Sensor type + warning: Whether to warn instead of raising an error for missing files Returns: - tuple: A tuple containing the list of file paths and the total size of the files. + Tuple containing list of file paths and total size """ - import warnings + validate_inputs(date_generated, data_type, sensor) + + file_list = [] + total_size = 0 + config = DATA_CONFIGS[data_type][sensor] - # Loop through observation dates & check for files - nodd_file_list = [] - nodd_total_size = 0 for date in date_generated: file_date = date.strftime("%Y%m%d") year = file_date[:4] - prod_path = "noaa-cdr-ndvi-pds/data/" + year + "/" - patt = "VIIRS-Land_*_" - file_names = fs.glob(prod_path + patt + file_date + "_*.nc") - # If file exists, add path to list and add file size to total - if file_names: - nodd_file_list.extend(file_names) - nodd_total_size = nodd_total_size + sum(fs.size(f) for f in file_names) - else: - msg = "File does not exist on AWS: " + prod_path + patt + file_date + "_*.nc" - if warning: - warnings.warn(msg) - nodd_file_list.append(None) + + try: + file_names = _get_cached_file_list( + year, + config["path"], + config["pattern"], + file_date + ) + + if file_names: + file_list.extend(file_names) + total_size += sum(fs.size(f) for f in file_names) else: - raise ValueError(msg) + raise FileNotFoundError( + f"No files found for {data_type} ({sensor}) on {file_date}" + ) - return nodd_file_list, nodd_total_size + except Exception as e: + if warning: + warnings.warn(str(e)) + file_list.append(None) + else: + raise ValueError(str(e)) + return file_list, total_size -def open_dataset(date): +def process_timeofday(dataset: xr.Dataset) -> xr.Dataset: """ - Opens a dataset for the given date. + Process TIMEOFDAY variable in dataset. - Parameters: - date (str or datetime-like): The date for which to open the dataset. - 1981--present are available. + Args: + dataset: Input xarray dataset Returns: - xarray.Dataset: The opened dataset. - - Raises: - ValueError: If the input parameters are invalid. + Processed dataset """ - import pandas as pd - import s3fs - import xarray as xr + if "TIMEOFDAY" in dataset: + m = dataset["TIMEOFDAY"].attrs.pop("scale_factor") + b = dataset["TIMEOFDAY"].attrs.pop("add_offset") + fv = dataset["TIMEOFDAY"].attrs.pop("_FillValue") - if isinstance(date, str): - date_generated = [pd.Timestamp(date)] - else: - date_generated = [date] + dataset["TIMEOFDAY"] = dataset["TIMEOFDAY"] * m + b + dataset["TIMEOFDAY"].attrs.update(units="hours") + dataset = xr.decode_cf(dataset) - # Access AWS using anonymous credentials - fs = s3fs.S3FileSystem(anon=True) + dataset["TIMEOFDAY"] = dataset["TIMEOFDAY"].where( + dataset["TIMEOFDAY"] != pd.Timedelta(fv * m + b, unit="hours") + ) + else: + dataset = xr.decode_cf(dataset) - file_list, _ = create_daily_vhi_list(date_generated, fs) + return dataset - if len(file_list) == 0 or all(f is None for f in file_list): - raise ValueError(f"Files not available for product and date: {date_generated[0]}") +def open_dataset( + date: Union[str, datetime], + data_type: str = "vhi", + sensor: str = "viirs" +) -> xr.Dataset: + """ + Opens a dataset for the given date. - aws_file = fs.open(file_list[0]) + Args: + date: Date to process + data_type: Type of data product + sensor: Sensor type - dset = xr.open_dataset(aws_file, decode_cf=False) + Returns: + Opened xarray dataset + """ + date_generated = [pd.Timestamp(date)] if isinstance(date, str) else [date] - # Deal with TIMEOFDAY variable manually to avoid warnings - m = dset["TIMEOFDAY"].attrs.pop("scale_factor") # 0.01 - b = dset["TIMEOFDAY"].attrs.pop("add_offset") # 0 - fv = dset["TIMEOFDAY"].attrs.pop("_FillValue") # -9999 - dset["TIMEOFDAY"] = dset["TIMEOFDAY"] * m + b - dset["TIMEOFDAY"].attrs.update(units="hours") # -> auto timedelta conversion - dset = xr.decode_cf(dset) - dset["TIMEOFDAY"] = dset["TIMEOFDAY"].where( - dset["TIMEOFDAY"] != pd.Timedelta(fv * m + b, unit="hours") - ) + fs = s3fs.S3FileSystem(anon=True) + file_list, _ = create_daily_data_list(date_generated, fs, data_type, sensor) - return dset + if len(file_list) == 0 or all(f is None for f in file_list): + raise ValueError(f"Files not available for {data_type} ({sensor}) and date: {date_generated[0]}") + dset = xr.open_dataset(fs.open(file_list[0]), decode_cf=False) + return process_timeofday(dset) -def open_mfdataset(dates, error_missing=False): +def open_mfdataset( + dates: Union[pd.DatetimeIndex, datetime, str], + data_type: str = "vhi", + sensor: str = "viirs", + error_missing: bool = False +) -> xr.Dataset: """ - Opens and combines multiple NetCDF files into a single xarray dataset. + Opens and combines multiple NetCDF files into a single dataset. - Parameters: - dates (pandas.DatetimeIndex): The dates for which to retrieve the data. - error_missing (bool, optional): If False (default), skip missing files with warning - and continue processing. Otherwise, raise an error. - Returns: - xarray.Dataset: The combined dataset containing the data for the specified dates. + Args: + dates: Dates to process + data_type: Type of data product + sensor: Sensor type + error_missing: Whether to raise error on missing files - Raises: - ValueError: If the input parameters are invalid. + Returns: + Combined xarray dataset """ - from collections.abc import Iterable - - import pandas as pd - import s3fs - import xarray as xr - - if isinstance(dates, Iterable) and not isinstance(dates, str): - dates = pd.DatetimeIndex(dates) - else: + if isinstance(dates, (str, datetime)): dates = pd.DatetimeIndex([dates]) + elif not isinstance(dates, pd.DatetimeIndex): + dates = pd.DatetimeIndex(dates) - # Access AWS using anonymous credentials fs = s3fs.S3FileSystem(anon=True) - - file_list, _ = create_daily_vhi_list(dates, fs, warning=not error_missing) + file_list, _ = create_daily_data_list( + dates, + fs, + data_type=data_type, + sensor=sensor, + warning=not error_missing + ) if len(file_list) == 0 or all(f is None for f in file_list): - raise ValueError(f"Files not available for product and dates: {dates}") + raise ValueError(f"Files not available for {data_type} ({sensor}) and dates: {dates}") aws_files = [fs.open(f) for f in file_list if f is not None] - dset = xr.open_mfdataset( aws_files, concat_dim="time", @@ -126,15 +266,4 @@ def open_mfdataset(dates, error_missing=False): decode_cf=False, ) - # Deal with TIMEOFDAY variable manually to avoid warnings - m = dset["TIMEOFDAY"].attrs.pop("scale_factor") # 0.01 - b = dset["TIMEOFDAY"].attrs.pop("add_offset") # 0 - fv = dset["TIMEOFDAY"].attrs.pop("_FillValue") # -9999 - dset["TIMEOFDAY"] = dset["TIMEOFDAY"] * m + b - dset["TIMEOFDAY"].attrs.update(units="hours") # -> auto timedelta conversion - dset = xr.decode_cf(dset) - dset["TIMEOFDAY"] = dset["TIMEOFDAY"].where( - dset["TIMEOFDAY"] != pd.Timedelta(fv * m + b, unit="hours") - ) - - return dset + return process_timeofday(dset) \ No newline at end of file From ceae7cb3aad1fb9d65e6630e0e7aed85cdd2b24f Mon Sep 17 00:00:00 2001 From: bbakernoaa Date: Tue, 19 Nov 2024 10:40:02 -0500 Subject: [PATCH 47/49] update lai bucket --- monetio/sat/nesdis_viirs_ndvi_aws_gridded.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/monetio/sat/nesdis_viirs_ndvi_aws_gridded.py b/monetio/sat/nesdis_viirs_ndvi_aws_gridded.py index a03cc8c1..8ebcc74d 100644 --- a/monetio/sat/nesdis_viirs_ndvi_aws_gridded.py +++ b/monetio/sat/nesdis_viirs_ndvi_aws_gridded.py @@ -64,23 +64,27 @@ "vhi": { "viirs": { "path": "noaa-cdr-ndvi-pds/data/", - "pattern": "VIIRS-Land_*_" + "pattern": "VIIRS-Land_*" }, "avhrr": { "path": "noaa-cdr-vegetation-health-pds/data/", - "pattern": "AVHRR-Land_*_" + "pattern": "AVHRR-Land_*" } }, "lai_fpar": { "viirs": { - "path": "noaa-viirs-lai-fpar/data/", - "pattern": "VIIRS-Land-LAI-FPAR_*_" + "path": "noaa-cdr-leaf-area-index-fapar-pds/data/", + "pattern": "VIIRS-Land_*" + }, + 'avhrr': { + "path": "noaa-cdr-leaf-area-index-fapar-pds/data/", + "pattern": "AVHRR-Land_*" } }, "snow": { "ims": { "path": "noaa-cdr-snow-cover-extent-ims-nrt/", - "pattern": "snow_cover_extent_*_" + "pattern": "snow_cover_extent_*" } } } From 129b644f8b56d7e7c3cc895d8edeee016584d106 Mon Sep 17 00:00:00 2001 From: bbakernoaa Date: Tue, 19 Nov 2024 10:47:57 -0500 Subject: [PATCH 48/49] imports --- monetio/sat/nesdis_avhrr_aot_aws_gridded.py | 1 + monetio/sat/nesdis_viirs_aod_aws_gridded.py | 12 +++++++----- monetio/sat/nesdis_viirs_ndvi_aws_gridded.py | 9 +++++++-- 3 files changed, 15 insertions(+), 7 deletions(-) diff --git a/monetio/sat/nesdis_avhrr_aot_aws_gridded.py b/monetio/sat/nesdis_avhrr_aot_aws_gridded.py index 75da18cc..dae9b0ee 100644 --- a/monetio/sat/nesdis_avhrr_aot_aws_gridded.py +++ b/monetio/sat/nesdis_avhrr_aot_aws_gridded.py @@ -56,6 +56,7 @@ from enum import Enum from datetime import datetime from pathlib import Path +from typing import List, Tuple, Union import warnings import pandas as pd import s3fs diff --git a/monetio/sat/nesdis_viirs_aod_aws_gridded.py b/monetio/sat/nesdis_viirs_aod_aws_gridded.py index ec13ab1e..7acbb937 100644 --- a/monetio/sat/nesdis_viirs_aod_aws_gridded.py +++ b/monetio/sat/nesdis_viirs_aod_aws_gridded.py @@ -30,16 +30,18 @@ - VIIRS AOD Algorithm: https://www.star.nesdis.noaa.gov/jpss/documents/ATBD/ATBD_EPS_Aerosol_AOD_v3.0.1.pdf - Data Access: https://www.avl.class.noaa.gov/saa/products/welcome """ - -from typing import List, Tuple, Union +# Standard library imports from datetime import datetime +from enum import Enum +from functools import lru_cache +from pathlib import Path +from typing import List, Tuple, Union import warnings + +# Third-party imports import pandas as pd import s3fs import xarray as xr -from enum import Enum -from functools import lru_cache -from pathlib import Path class AveragingTime(str, Enum): """Enumeration of valid averaging time periods.""" diff --git a/monetio/sat/nesdis_viirs_ndvi_aws_gridded.py b/monetio/sat/nesdis_viirs_ndvi_aws_gridded.py index 8ebcc74d..60c8ddb4 100644 --- a/monetio/sat/nesdis_viirs_ndvi_aws_gridded.py +++ b/monetio/sat/nesdis_viirs_ndvi_aws_gridded.py @@ -51,13 +51,18 @@ This module requires active internet connection and access to AWS S3 buckets. Some datasets might have temporal gaps or missing data. """ -from typing import List, Tuple, Union +# Standard library imports from datetime import datetime +from enum import Enum +from functools import lru_cache +from pathlib import Path +from typing import List, Tuple, Union import warnings + +# Third-party imports import pandas as pd import s3fs import xarray as xr -from functools import lru_cache # Configuration dictionary for different data products DATA_CONFIGS = { From 561b61637bbeda2625bc2d06db8efe91faa4c8aa Mon Sep 17 00:00:00 2001 From: bbakernoaa Date: Tue, 19 Nov 2024 10:54:33 -0500 Subject: [PATCH 49/49] pass kwarg to xaray --- monetio/sat/nesdis_viirs_ndvi_aws_gridded.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/monetio/sat/nesdis_viirs_ndvi_aws_gridded.py b/monetio/sat/nesdis_viirs_ndvi_aws_gridded.py index 60c8ddb4..02365839 100644 --- a/monetio/sat/nesdis_viirs_ndvi_aws_gridded.py +++ b/monetio/sat/nesdis_viirs_ndvi_aws_gridded.py @@ -236,7 +236,8 @@ def open_mfdataset( dates: Union[pd.DatetimeIndex, datetime, str], data_type: str = "vhi", sensor: str = "viirs", - error_missing: bool = False + error_missing: bool = False, + **kwargs ) -> xr.Dataset: """ Opens and combines multiple NetCDF files into a single dataset. @@ -273,6 +274,7 @@ def open_mfdataset( concat_dim="time", combine="nested", decode_cf=False, + **kwargs ) return process_timeofday(dset) \ No newline at end of file