diff --git a/README.md b/README.md index 0f2697a..f3d5d25 100644 --- a/README.md +++ b/README.md @@ -117,7 +117,7 @@ $ python evaluate_xaj.py --exp expcamels001 Run the following code to see the results of the evaluation: ```Shell -$ python post_process.py --exp expcamels001 +$ python visualize.py --exp expcamels001 ``` You will see the results in the `example` directory. diff --git a/hydromodel/datasets/data_preprocess.py b/hydromodel/datasets/data_preprocess.py index 14ff720..210164b 100644 --- a/hydromodel/datasets/data_preprocess.py +++ b/hydromodel/datasets/data_preprocess.py @@ -1,7 +1,7 @@ """ Author: Wenyu Ouyang Date: 2022-10-25 21:16:22 -LastEditTime: 2024-03-27 16:31:55 +LastEditTime: 2024-03-27 18:17:14 LastEditors: Wenyu Ouyang Description: preprocess data for models in hydro-model-xaj FilePath: \hydro-model-xaj\hydromodel\datasets\data_preprocess.py @@ -13,10 +13,12 @@ from hydrodataset import Camels import numpy as np import pandas as pd +from pint import UnitRegistry from sklearn.model_selection import KFold import xarray as xr from hydrodata.utils.utils import streamflow_unit_conv +from hydrodata.cleaner.dmca_esr import rainfall_runoff_event_identify from hydromodel import CACHE_DIR, SETTING from hydromodel.datasets import * @@ -432,7 +434,7 @@ def get_ts_from_diffsource(data_type, data_dir, periods, basin_ids): data_dir The directory of the data source periods - The periods of the time series data + The periods of the time series data, [start_date, end_date] basin_ids The ids of the basins @@ -473,6 +475,7 @@ def get_ts_from_diffsource(data_type, data_dir, periods, basin_ids): r_mmd = streamflow_unit_conv(qobs_, basin_area, target_unit=target_unit) ts_data[flow_name] = r_mmd[flow_name] ts_data[flow_name].attrs["units"] = target_unit + ts_data = ts_data.sel(time=slice(periods[0], periods[1])) else: raise NotImplementedError( "You should set the data type as 'camels' or 'owndata'" @@ -519,3 +522,23 @@ def cross_val_split_tsdata( # cross validation train_and_test_data = cross_valid_data(ts_data, periods, warmup, cv_fold) return train_and_test_data + + +def get_rr_events(rain, flow, basin_area): + ureg = UnitRegistry() + # trans unit to mm/day + flow_threshold = streamflow_unit_conv( + np.array([100]) * ureg.m**3 / ureg.s, + basin_area.isel(basin=0).to_array().to_numpy() * ureg.km**2, + target_unit="mm/h", + ) + rr_events = {} + for basin in basin_area.basin.values: + rr_event = rainfall_runoff_event_identify( + rain.sel(basin=basin).to_series(), + flow.sel(basin=basin).to_series(), + multiple=1, + flow_threshold=flow_threshold[0], + ) + rr_events[basin] = rr_event + return rr_events diff --git a/hydromodel/datasets/data_postprocess.py b/hydromodel/datasets/data_visualize.py similarity index 91% rename from hydromodel/datasets/data_postprocess.py rename to hydromodel/datasets/data_visualize.py index 1871fc0..8232f0b 100644 --- a/hydromodel/datasets/data_postprocess.py +++ b/hydromodel/datasets/data_visualize.py @@ -1,11 +1,11 @@ """Show results of calibration and validation.""" import os -from matplotlib import pyplot as plt +from matplotlib import dates, pyplot as plt import numpy as np import pandas as pd -from hydroutils import hydro_file, hydro_stat +from hydroutils import hydro_file, hydro_stat, hydro_plot def plot_sim_and_obs( @@ -54,6 +54,32 @@ def plot_train_iteration(likelihood, save_fig): plt.close() +def plot_rr_events(rr_events, rain, flow, save_dir=None): + for i in range(len(rr_events)): + beginning_time = rr_events["BEGINNING_RAIN"].iloc[i] + end_time = rr_events["END_FLOW"].iloc[i] # Ensure this column exists + + # Filter data for the specific time period + filtered_rain_data = rain.sel(time=slice(beginning_time, end_time)) + filter_flow_data = flow.sel(time=slice(beginning_time, end_time)) + + # Plotting + hydro_plot.plot_rainfall_runoff( + filtered_rain_data.time.values, + filtered_rain_data.values, + [filter_flow_data.values], + title=f"Rainfall-Runoff Event {i}", + leg_lst=["Flow"], + xlabel="Time", + ylabel="Flow (mm/h)", + ) + if save_dir: + if not os.path.exists(save_dir): + os.makedirs(save_dir) + save_fig = os.path.join(save_dir, f"rr_event_{i}.png") + plt.savefig(save_fig, bbox_inches="tight") + + def show_events_result( warmup_length, save_dir, diff --git a/hydromodel/trainers/calibrate_ga.py b/hydromodel/trainers/calibrate_ga.py index 800c18f..0c55e38 100644 --- a/hydromodel/trainers/calibrate_ga.py +++ b/hydromodel/trainers/calibrate_ga.py @@ -20,7 +20,7 @@ from hydroutils import hydro_file, hydro_stat -from hydromodel.datasets.data_postprocess import plot_sim_and_obs, plot_train_iteration +from datasets.data_visualize import plot_sim_and_obs, plot_train_iteration from hydromodel.models.model_config import read_model_param_dict from hydromodel.models.model_dict import MODEL_DICT, rmse43darr diff --git a/hydromodel/trainers/train.py b/hydromodel/trainers/train.py deleted file mode 100644 index 66bf12f..0000000 --- a/hydromodel/trainers/train.py +++ /dev/null @@ -1,53 +0,0 @@ -""" -Author: Wenyu Ouyang -Date: 2022-08-06 18:39:15 -LastEditTime: 2024-03-27 09:41:06 -LastEditors: Wenyu Ouyang -Description: We want to build a trainer class for calibration but not done yet. -FilePath: \hydro-model-xaj\hydromodel\trainers\train.py -Copyright (c) 2021-2022 Wenyu Ouyang. All rights reserved. -""" - -import matplotlib -import os -import sys -from pathlib import Path - -sys.path.append(os.path.dirname(Path(os.path.abspath(__file__)).parent)) - -matplotlib.use("Agg") -exp = "exp61561" -reps = [5000, 10000] -ngs = [1000, 2000] -book = "HF" -source = "sources" - - -class Trainer: - # TODO: build a trainer class for calibration - def __init__(self, exp, book, source, rep, ngs): - self.exp = exp - self.warmup_length = 365 - self.model = { - "name": "xaj_mz", - "source_type": source, - "source_book": book, - } - self.algorithm = { - "name": "SCE_UA", - "random_seed": 1234, - "rep": rep, - "ngs": ngs, - "kstop": int(rep / 5), - "peps": 0.001, - "pcento": 0.001, - } - self.comment = "{}{}rep{}ngs{}".format(book, source, rep, ngs) - - -for i in range(len(reps)): - for j in range(len(ngs)): - rep = reps[i] - ng = ngs[j] - xaj_calibrate = Trainer(exp, book, source, rep, ng) - # evaluate(xaj_calibrate) diff --git a/scripts/preprocess.py b/scripts/preprocess.py new file mode 100644 index 0000000..02c5eb6 --- /dev/null +++ b/scripts/preprocess.py @@ -0,0 +1,94 @@ +""" +Author: Wenyu Ouyang +Date: 2024-03-25 09:21:56 +LastEditTime: 2024-03-27 18:20:38 +LastEditors: Wenyu Ouyang +Description: preprocess data in an exp before training +FilePath: \hydro-model-xaj\scripts\preprocess.py +Copyright (c) 2023-2024 Wenyu Ouyang. All rights reserved. +""" + +from pathlib import Path +import sys +import os +import argparse + +current_script_path = Path(os.path.realpath(__file__)) +repo_path = current_script_path.parent.parent +sys.path.append(str(repo_path)) +from hydromodel.datasets.data_visualize import plot_rr_events +from hydromodel.datasets.data_preprocess import ( + get_basin_area, + get_ts_from_diffsource, + get_rr_events, +) + + +def main(args): + data_path = args.data_dir + data_type = args.data_type + basin_ids = args.basin_id + periods = args.period + exp = args.exp + where_save = Path(os.path.join(repo_path, "result", exp)) + if os.path.exists(where_save) is False: + os.makedirs(where_save) + ts_data = get_ts_from_diffsource(data_type, data_path, periods, basin_ids) + basin_area = get_basin_area(data_type, data_path, basin_ids) + rr_events = get_rr_events(ts_data["prcp"], ts_data["flow"], basin_area) + for basin, event in rr_events.items(): + basin_rr_dir = os.path.join(where_save, f"{basin}_rr_events") + plot_rr_events( + event, + ts_data["prcp"].sel(basin=basin), + ts_data["flow"].sel(basin=basin), + basin_rr_dir, + ) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Prepare data.") + parser.add_argument( + "--data_type", + dest="data_type", + help="CAMELS dataset or your own data, such as 'camels' or 'owndata'", + # default="camels", + default="owndata", + type=str, + ) + parser.add_argument( + "--data_dir", + dest="data_dir", + help="The directory of the CAMELS dataset or your own data, for CAMELS," + + " as we use SETTING to set the data path, you can directly choose camels_us;" + + " for your own data, you should set the absolute path of your data directory", + # default="camels_us", + default="C:\\Users\\wenyu\\OneDrive\\data\\biliuhe", + type=str, + ) + parser.add_argument( + "--exp", + dest="exp", + help="An exp is corresponding to one data setting", + # default="expcamels001", + default="expbiliuhe001", + type=str, + ) + parser.add_argument( + "--basin_id", + dest="basin_id", + help="The basins' ids", + # default=["01439500", "06885500", "08104900", "09510200"], + default=["21401550"], + nargs="+", + ) + parser.add_argument( + "--period", + dest="period", + help="The whole period", + # default=["2007-01-01", "2014-01-01"], + default=["2012-06-10 00:00", "2022-08-31 23:00"], + nargs="+", + ) + args = parser.parse_args() + main(args) diff --git a/scripts/post_process.py b/scripts/visualize.py similarity index 95% rename from scripts/post_process.py rename to scripts/visualize.py index c748cdd..f781e19 100644 --- a/scripts/post_process.py +++ b/scripts/visualize.py @@ -1,10 +1,10 @@ """ Author: Wenyu Ouyang Date: 2022-11-19 17:27:05 -LastEditTime: 2024-03-27 16:27:03 +LastEditTime: 2024-03-27 17:54:11 LastEditors: Wenyu Ouyang Description: the script to postprocess results -FilePath: \hydro-model-xaj\scripts\post_process.py +FilePath: \hydro-model-xaj\scripts\visualize.py Copyright (c) 2021-2022 Wenyu Ouyang. All rights reserved. """ @@ -15,7 +15,7 @@ repo_dir = os.path.dirname(Path(os.path.abspath(__file__)).parent) sys.path.append(repo_dir) -from hydromodel.datasets.data_postprocess import plot_sim_and_obs +from hydromodel.datasets.data_visualize import plot_sim_and_obs from hydromodel.trainers.evaluate import Evaluator, read_yaml_config diff --git a/test/test_data_postprocess.py b/test/test_data_postprocess.py index 7c82f27..a3b7afd 100644 --- a/test/test_data_postprocess.py +++ b/test/test_data_postprocess.py @@ -14,7 +14,7 @@ from hydroutils import hydro_time -from hydromodel.datasets.data_postprocess import show_events_result, show_ts_result +from hydromodel.datasets.data_visualize import show_events_result, show_ts_result from hydromodel.models.xaj import xaj from hydromodel.trainers.calibrate_sceua import calibrate_by_sceua from hydromodel.trainers.evaluate import _read_save_sceua_calibrated_params