Specify date range for bias correction #290

EllieBowler · 2024-08-05T10:49:30Z

Currently when bias_correct is flagged for get_seas_forecast_da the function loads all files from the mars.seas data folder. This data might be incomplete (uneven amount of data temporally) and also include the validation and test years for a particular model.

Propose to:

Update the code so start and end dates for bias correction period can be input by the user. This would vary depending on what the model was trained on, e.g. in my example validation/test was 2015-2022, so for bias correction only want to use data from 01/01/2005 - 31/12/2014 (10 years).
Perhaps also add a check to see what data is in the store, to make sure it's complete for that period?

I did a quick manual fix for point 1, but it isn't well integrated with the other functions. E.g. not sure the best approach for making the date ranges user-defined only when bias_correct=True. We wouldn't want default values like i have here - just pasting for demo!

def get_seas_forecast_da(
        hemisphere: str,
        date: str,
        bias_correct: bool = True,
        bias_start: str = "2005-01-01",
        bias_end: str = "2014-12-31", 
        source_path: object = os.path.join(".", "data", "mars.seas"),
) -> tuple:
    """
    Atmospheric model Ensemble 15-day forecast (Set III - ENS)

Coordinates:
  * time                          (time) datetime64[ns] 2022-04-01 ... 2022-0...
  * yc                            (yc) float64 5.388e+06 ... -5.388e+06
  * xc                            (xc) float64 -5.388e+06 ... 5.388e+06

    :param hemisphere: string, typically either 'north' or 'south'
    :param date:
    :param bias_correct:
    :param source_path:
    """

    seas_file = os.path.join(
        source_path, hemisphere, "siconca",
        "{}.nc".format(date.replace(day=1).strftime("%Y%m%d")))

    if os.path.exists(seas_file):
        seas_da = xr.open_dataset(seas_file).siconc
    else:
        logging.warning("No SEAS data available at {}".format(seas_file))
        return None

    if bias_correct:
        # Let's have some maximum, though it's quite high
#         (start_date, end_date) = (date - dt.timedelta(days=10 * 365),
#                                   date + dt.timedelta(days=10 * 365))
#         obs_da = get_obs_da(hemisphere, start_date, end_date)
    
        obs_da = get_obs_da(hemisphere, bias_start, bias_end)
        seas_hist_files = dict(
            sorted({
                os.path.abspath(el):
                    dt.datetime.strptime(os.path.basename(el)[0:8], "%Y%m%d")
                for el in glob.glob(
                    os.path.join(source_path, hemisphere, "siconca", "*.nc"))
                if re.search(r'^\d{8}\.nc$', os.path.basename(el)) and
                el != seas_file
            }.items()))

        def strip_overlapping_time(ds):
            data_file = os.path.abspath(ds.encoding["source"])

            try:
                idx = list(seas_hist_files.keys()).index(data_file)
            except ValueError:
                logging.exception("\n{} not in \n\n{}".format(
                    data_file, seas_hist_files))
                return None

            if idx < len(seas_hist_files) - 1:
                max_date = seas_hist_files[
                               list(seas_hist_files.keys())[idx + 1]] \
                           - dt.timedelta(days=1)
                logging.debug("Stripping {} to {}".format(data_file, max_date))
                return ds.sel(time=slice(None, max_date))
            else:
                logging.debug("Not stripping {}".format(data_file))
                return ds

        hist_da = xr.open_mfdataset(seas_hist_files,
                                    preprocess=strip_overlapping_time).siconc
        ## restrict to user-specified date range
        hist_da = hist_da.sel(time=slice(bias_start, bias_end))
        
        debiaser = LinearScaling(delta_type="additive",
                                 variable="siconc",
                                 reasonable_physical_range=[0., 1.])

        logging.info("Debiaser input ranges: obs {:.2f} - {:.2f}, "
                     "hist {:.2f} - {:.2f}, fut {:.2f} - {:.2f}".format(
                         float(obs_da.min()), float(obs_da.max()),
                         float(hist_da.min()), float(hist_da.max()),
                         float(seas_da.min()), float(seas_da.max())))
        
        logging.info("Debiaser date ranges: obs {} - {}, "
                     "hist {} - {}, fut {} - {}".format(
                         np.datetime_as_string(obs_da.time.values.min(), unit="D"), 
                         np.datetime_as_string(obs_da.time.values.max(), unit="D"),
                         np.datetime_as_string(hist_da.time.values.min(), unit="D"), 
                         np.datetime_as_string(hist_da.time.values.max(), unit="D"),
                         np.datetime_as_string(seas_da.time.values.min(), unit="D"), 
                         np.datetime_as_string(seas_da.time.values.max(), unit="D")))

        seas_array = debiaser.apply(obs_da.values, hist_da.values,
                                    seas_da.values)
        seas_da.values = seas_array
        logging.info("Debiaser output range: {:.2f} - {:.2f}".format(
            float(seas_da.min()), float(seas_da.max())))

    logging.info("Returning SEAS data from {} from {}".format(seas_file, date))

    # This isn't great looking, but we know we're not dealing with huge
    # indexes in here
    date_location = list(seas_da.time.values).index(pd.Timestamp(date))
    if date_location > 0:
        logging.warning("SEAS forecast started {} day before the requested "
                        "date {}, make sure you account for this!".format(
                            date_location, date))

    seas_da = seas_da.sel(time=slice(date, None))
    logging.debug("SEAS data range: {} - {}, {} dates".format(
        pd.to_datetime(min(seas_da.time.values)).strftime("%Y-%m-%d"),
        pd.to_datetime(max(seas_da.time.values)).strftime("%Y-%m-%d"),
        len(seas_da.time)))

    return seas_da

Linking in @JimCircadian as might be useful for the southern ocean analysis.

The text was updated successfully, but these errors were encountered:

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Specify date range for bias correction #290

Specify date range for bias correction #290

EllieBowler commented Aug 5, 2024 •

edited

Loading

Specify date range for bias correction #290

Specify date range for bias correction #290

Comments

EllieBowler commented Aug 5, 2024 • edited Loading

EllieBowler commented Aug 5, 2024 •

edited

Loading