You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Currently when bias_correct is flagged for get_seas_forecast_da the function loads all files from the mars.seas data folder. This data might be incomplete (uneven amount of data temporally) and also include the validation and test years for a particular model.
Propose to:
Update the code so start and end dates for bias correction period can be input by the user. This would vary depending on what the model was trained on, e.g. in my example validation/test was 2015-2022, so for bias correction only want to use data from 01/01/2005 - 31/12/2014 (10 years).
Perhaps also add a check to see what data is in the store, to make sure it's complete for that period?
I did a quick manual fix for point 1, but it isn't well integrated with the other functions. E.g. not sure the best approach for making the date ranges user-defined only when bias_correct=True. We wouldn't want default values like i have here - just pasting for demo!
def get_seas_forecast_da(
hemisphere: str,
date: str,
bias_correct: bool = True,
bias_start: str = "2005-01-01",
bias_end: str = "2014-12-31",
source_path: object = os.path.join(".", "data", "mars.seas"),
) -> tuple:
"""
Atmospheric model Ensemble 15-day forecast (Set III - ENS)
Coordinates:
* time (time) datetime64[ns] 2022-04-01 ... 2022-0...
* yc (yc) float64 5.388e+06 ... -5.388e+06
* xc (xc) float64 -5.388e+06 ... 5.388e+06
:param hemisphere: string, typically either 'north' or 'south'
:param date:
:param bias_correct:
:param source_path:
"""
seas_file = os.path.join(
source_path, hemisphere, "siconca",
"{}.nc".format(date.replace(day=1).strftime("%Y%m%d")))
if os.path.exists(seas_file):
seas_da = xr.open_dataset(seas_file).siconc
else:
logging.warning("No SEAS data available at {}".format(seas_file))
return None
if bias_correct:
# Let's have some maximum, though it's quite high
# (start_date, end_date) = (date - dt.timedelta(days=10 * 365),
# date + dt.timedelta(days=10 * 365))
# obs_da = get_obs_da(hemisphere, start_date, end_date)
obs_da = get_obs_da(hemisphere, bias_start, bias_end)
seas_hist_files = dict(
sorted({
os.path.abspath(el):
dt.datetime.strptime(os.path.basename(el)[0:8], "%Y%m%d")
for el in glob.glob(
os.path.join(source_path, hemisphere, "siconca", "*.nc"))
if re.search(r'^\d{8}\.nc$', os.path.basename(el)) and
el != seas_file
}.items()))
def strip_overlapping_time(ds):
data_file = os.path.abspath(ds.encoding["source"])
try:
idx = list(seas_hist_files.keys()).index(data_file)
except ValueError:
logging.exception("\n{} not in \n\n{}".format(
data_file, seas_hist_files))
return None
if idx < len(seas_hist_files) - 1:
max_date = seas_hist_files[
list(seas_hist_files.keys())[idx + 1]] \
- dt.timedelta(days=1)
logging.debug("Stripping {} to {}".format(data_file, max_date))
return ds.sel(time=slice(None, max_date))
else:
logging.debug("Not stripping {}".format(data_file))
return ds
hist_da = xr.open_mfdataset(seas_hist_files,
preprocess=strip_overlapping_time).siconc
## restrict to user-specified date range
hist_da = hist_da.sel(time=slice(bias_start, bias_end))
debiaser = LinearScaling(delta_type="additive",
variable="siconc",
reasonable_physical_range=[0., 1.])
logging.info("Debiaser input ranges: obs {:.2f} - {:.2f}, "
"hist {:.2f} - {:.2f}, fut {:.2f} - {:.2f}".format(
float(obs_da.min()), float(obs_da.max()),
float(hist_da.min()), float(hist_da.max()),
float(seas_da.min()), float(seas_da.max())))
logging.info("Debiaser date ranges: obs {} - {}, "
"hist {} - {}, fut {} - {}".format(
np.datetime_as_string(obs_da.time.values.min(), unit="D"),
np.datetime_as_string(obs_da.time.values.max(), unit="D"),
np.datetime_as_string(hist_da.time.values.min(), unit="D"),
np.datetime_as_string(hist_da.time.values.max(), unit="D"),
np.datetime_as_string(seas_da.time.values.min(), unit="D"),
np.datetime_as_string(seas_da.time.values.max(), unit="D")))
seas_array = debiaser.apply(obs_da.values, hist_da.values,
seas_da.values)
seas_da.values = seas_array
logging.info("Debiaser output range: {:.2f} - {:.2f}".format(
float(seas_da.min()), float(seas_da.max())))
logging.info("Returning SEAS data from {} from {}".format(seas_file, date))
# This isn't great looking, but we know we're not dealing with huge
# indexes in here
date_location = list(seas_da.time.values).index(pd.Timestamp(date))
if date_location > 0:
logging.warning("SEAS forecast started {} day before the requested "
"date {}, make sure you account for this!".format(
date_location, date))
seas_da = seas_da.sel(time=slice(date, None))
logging.debug("SEAS data range: {} - {}, {} dates".format(
pd.to_datetime(min(seas_da.time.values)).strftime("%Y-%m-%d"),
pd.to_datetime(max(seas_da.time.values)).strftime("%Y-%m-%d"),
len(seas_da.time)))
return seas_da
Linking in @JimCircadian as might be useful for the southern ocean analysis.
The text was updated successfully, but these errors were encountered:
Currently when bias_correct is flagged for
get_seas_forecast_da
the function loads all files from the mars.seas data folder. This data might be incomplete (uneven amount of data temporally) and also include the validation and test years for a particular model.Propose to:
I did a quick manual fix for point 1, but it isn't well integrated with the other functions. E.g. not sure the best approach for making the date ranges user-defined only when
bias_correct=True
. We wouldn't want default values like i have here - just pasting for demo!Linking in @JimCircadian as might be useful for the southern ocean analysis.
The text was updated successfully, but these errors were encountered: