From 57084e95afdfbed85456028cc741515aab96a642 Mon Sep 17 00:00:00 2001 From: "ouyang,wenyu" Date: Wed, 27 Mar 2024 11:28:08 +0800 Subject: [PATCH] plot cv results --- hydromodel/datasets/data_postprocess.py | 75 ++----------------- hydromodel/trainers/evaluate.py | 16 ++++- scripts/post_process.py | 49 ++++++++++--- test/test_data_postprocess.py | 95 ++++++++++++++++--------- test/test_show_results.py | 74 ------------------- 5 files changed, 119 insertions(+), 190 deletions(-) diff --git a/hydromodel/datasets/data_postprocess.py b/hydromodel/datasets/data_postprocess.py index 00412d2..1871fc0 100644 --- a/hydromodel/datasets/data_postprocess.py +++ b/hydromodel/datasets/data_postprocess.py @@ -1,9 +1,9 @@ """Show results of calibration and validation.""" + import os from matplotlib import pyplot as plt import numpy as np import pandas as pd -import spotpy from hydroutils import hydro_file, hydro_stat @@ -54,18 +54,15 @@ def plot_train_iteration(likelihood, save_fig): plt.close() -def show_sceua_cali_result( - sceua_calibrated_file, +def show_events_result( warmup_length, save_dir, - basin_id, train_period, - result_unit="mm/hour", basin_area=None, prcp=None, ): """ - Plot all year result to see the effect of optimized parameters + Plot all events result to see the effect of optimized parameters Parameters ---------- @@ -84,65 +81,7 @@ def show_sceua_cali_result( ------- None """ - # Load the results gained with the sceua sampler, stored in SCEUA_xaj.csv - # results = [] - # for chunk in pd.read_csv(sceua_calibrated_file, chunksize=100000 ): - # results.append(chunk) - # results = pd.concat(results) - results = spotpy.analyser.load_csv_results(sceua_calibrated_file) # 读取结果 - # Plot how the objective function was minimized during sampling - if not os.path.exists(save_dir): # 绘制采样过程中目标函数的最小化情况 - os.makedirs(save_dir) - plot_train_iteration( - results["like1"], - os.path.join(save_dir, "train_iteration.png"), # 绘制迭代中的RMSE - ) - # Plot the best model run - # Find the run_id with the minimal objective function value - bestindex, bestobjf = spotpy.analyser.get_minlikeindex( - results - ) # 绘制最佳模型图并找到run—id - # Select best model run - best_model_run = results[bestindex] # 选择最佳模型结果 - # Filter results for simulation results #最佳模型模拟结果 - fields = [word for word in best_model_run.dtype.names if word.startswith("sim")] - best_simulation = list(best_model_run[fields]) - convert_unit_sim = units.convert_unit( - np.array(best_simulation).reshape(1, -1), - # np.array(list(map(float, best_simulation)), dtype=float).reshape(1, -1), - result_unit, - units.unit["streamflow"], - basin_area=basin_area, - ) - convert_unit_obs = units.convert_unit( - np.array(spot_setup.evaluation()).reshape(1, -1), - result_unit, - units.unit["streamflow"], - basin_area=basin_area, - ) - - # save calibrated results of calibration period #保存率定的结果 - train_result_file = os.path.join( - save_dir, - "train_qsim_" + spot_setup.model["name"] + "_" + str(basin_id) + ".csv", - ) - pd.DataFrame(convert_unit_sim.reshape(-1, 1)).to_csv( - train_result_file, - sep=",", - index=False, - header=False, - ) - # calculation rmse、nashsutcliffe and bias for training period - stat_error = hydro_stat.stat_error( - convert_unit_obs, - convert_unit_sim, - ) - print("Training Metrics:", basin_id, stat_error) - hydro_file.serialize_json_np( - stat_error, os.path.join(save_dir, "train_metrics.json") - ) - - # 循还画图 + # TODO: not finished time = pd.read_excel( "D:/研究生/毕业论文/new毕业论文/预答辩/碧流河水库/站点信息/洪水率定时间.xlsx" ) @@ -253,7 +192,7 @@ def show_sceua_cali_result( plot_sim_and_obs(t_range_train, best_simulation, obs, prcp[:], save_fig) -def show_test_result(basin_id, test_date, qsim, obs, save_dir): +def show_ts_result(basin_id, test_date, qsim, obs, save_dir): stat_error = hydro_stat.stat_error(obs.reshape(1, -1), qsim.reshape(1, -1)) print("Test Metrics:", basin_id, stat_error) hydro_file.serialize_json_np( @@ -371,7 +310,3 @@ def show_test_result(basin_id, test_date, qsim, obs, save_dir): prcp[:], save_fig, ) - # summarize_parameters(one_model_one_hyperparam_setting_dir, {"name": "xaj_mz"}) - # renormalize_params(one_model_one_hyperparam_setting_dir, {"name":"xaj_mz"}) - # summarize_metrics(one_model_one_hyperparam_setting_dir,{"name":"xaj_mz"}) - # save_streamflow(one_model_one_hyperparam_setting_dir,{"name":"xaj_mz"}) diff --git a/hydromodel/trainers/evaluate.py b/hydromodel/trainers/evaluate.py index 3ad2844..63d02c3 100644 --- a/hydromodel/trainers/evaluate.py +++ b/hydromodel/trainers/evaluate.py @@ -1,7 +1,7 @@ """ Author: Wenyu Ouyang Date: 2022-10-25 21:16:22 -LastEditTime: 2024-03-27 10:42:57 +LastEditTime: 2024-03-27 11:18:09 LastEditors: Wenyu Ouyang Description: Plots for calibration and testing results FilePath: \hydro-model-xaj\hydromodel\trainers\evaluate.py @@ -197,9 +197,13 @@ def _summarize_metrics(self, basin_ids): model_name = self.model_info["name"] file_path = os.path.join(result_dir, f"{model_name}_evaluation_results.nc") ds = xr.open_dataset(file_path) + # for metrics, warmup_length should be considered + warmup_length = self.config["warmup"] + qobs = ds["qobs"].transpose("basin", "time").to_numpy()[:, warmup_length:] + qsim = ds["qsim"].transpose("basin", "time").to_numpy()[:, warmup_length:] test_metrics = hydro_stat.stat_error( - ds["qobs"].transpose("basin", "time").to_numpy(), - ds["qsim"].transpose("basin", "time").to_numpy(), + qobs, + qsim, ) metric_dfs_test = pd.DataFrame(test_metrics, index=basin_ids) metric_file_test = os.path.join(result_dir, "basins_metrics.csv") @@ -224,6 +228,12 @@ def _save_evaluate_results(self, qsim, qobs, obs_ds): print(f"Results saved to: {file_path}") + def load_results(self): + result_dir = self.save_dir + model_name = self.model_info["name"] + file_path = os.path.join(result_dir, f"{model_name}_evaluation_results.nc") + return xr.open_dataset(file_path) + def _read_save_sceua_calibrated_params(basin_id, save_dir, sceua_calibrated_file_name): """ diff --git a/scripts/post_process.py b/scripts/post_process.py index 8b19b27..e838797 100644 --- a/scripts/post_process.py +++ b/scripts/post_process.py @@ -1,7 +1,7 @@ """ Author: Wenyu Ouyang Date: 2022-11-19 17:27:05 -LastEditTime: 2024-03-27 08:37:58 +LastEditTime: 2024-03-27 11:23:02 LastEditors: Wenyu Ouyang Description: the script to postprocess results FilePath: \hydro-model-xaj\scripts\post_process.py @@ -15,20 +15,47 @@ repo_dir = os.path.dirname(Path(os.path.abspath(__file__)).parent) sys.path.append(repo_dir) -from hydromodel.trainers.evaluate import read_yaml_config +from hydromodel.datasets.data_postprocess import plot_sim_and_obs +from hydromodel.trainers.evaluate import Evaluator, read_yaml_config -def statistics(args): +def visualize(args): exp = args.exp cali_dir = Path(os.path.join(repo_dir, "result", exp)) cali_config = read_yaml_config(os.path.join(cali_dir, "config.yaml")) - if os.path.exists(cali_dir) is False: - raise NotImplementedError( - "You should run prepare_data and calibrate scripts at first." - ) - print( - "Compare evaluation results of different calibrated models in an exp directory" - ) + kfold = cali_config["cv_fold"] + basins = cali_config["basin_id"] + warmup = cali_config["warmup"] + for fold in range(kfold): + print(f"Start to evaluate the {fold+1}-th fold") + fold_dir = os.path.join(cali_dir, f"sceua_xaj_cv{fold+1}") + # evaluate both train and test period for all basins + eval_train_dir = os.path.join(fold_dir, "train") + eval_test_dir = os.path.join(fold_dir, "test") + train_eval = Evaluator(cali_dir, fold_dir, eval_train_dir) + test_eval = Evaluator(cali_dir, fold_dir, eval_test_dir) + ds_train = train_eval.load_results() + ds_test = test_eval.load_results() + for basin in basins: + save_fig_train = os.path.join(eval_train_dir, f"train_sim_obs_{basin}.png") + plot_sim_and_obs( + ds_train["time"].isel(time=slice(warmup, None)), + ds_train["qsim"].sel(basin=basin).isel(time=slice(warmup, None)), + ds_train["qobs"].sel(basin=basin).isel(time=slice(warmup, None)), + save_fig_train, + xlabel="Date", + ylabel=None, + ) + save_fig_test = os.path.join(eval_test_dir, f"test_sim_obs_{basin}.png") + plot_sim_and_obs( + ds_test["time"].isel(time=slice(warmup, None)), + ds_test["qsim"].sel(basin=basin).isel(time=slice(warmup, None)), + ds_test["qobs"].sel(basin=basin).isel(time=slice(warmup, None)), + save_fig_test, + xlabel="Date", + ylabel=None, + ) + print(f"Finish visualizing the {fold}-th fold") if __name__ == "__main__": @@ -43,4 +70,4 @@ def statistics(args): type=str, ) the_args = parser.parse_args() - statistics(the_args) + visualize(the_args) diff --git a/test/test_data_postprocess.py b/test/test_data_postprocess.py index 121ea70..7c82f27 100644 --- a/test/test_data_postprocess.py +++ b/test/test_data_postprocess.py @@ -1,20 +1,23 @@ """ Author: Wenyu Ouyang Date: 2022-10-25 21:16:22 -LastEditTime: 2024-03-26 17:01:09 +LastEditTime: 2024-03-27 10:59:29 LastEditors: Wenyu Ouyang Description: Test for data preprocess FilePath: \hydro-model-xaj\test\test_data_postprocess.py Copyright (c) 2021-2022 Wenyu Ouyang. All rights reserved. """ -import os -import pandas as pd -import numpy as np import matplotlib.pyplot as plt import spotpy from spotpy.examples.spot_setup_hymod_python import spot_setup as hymod_setup -from trainers.evaluate import _read_save_sceua_calibrated_params + +from hydroutils import hydro_time + +from hydromodel.datasets.data_postprocess import show_events_result, show_ts_result +from hydromodel.models.xaj import xaj +from hydromodel.trainers.calibrate_sceua import calibrate_by_sceua +from hydromodel.trainers.evaluate import _read_save_sceua_calibrated_params def test_run_hymod_calibration(): @@ -62,35 +65,63 @@ def test_run_hymod_calibration(): plt.show() -def test_read_save_sceua_calibrated_params(tmpdir): - # Create a temporary directory for testing - temp_dir = tmpdir.mkdir("test_data") - - # Generate some dummy data - results = np.array( - [(1, 2, 3), (4, 5, 6), (7, 8, 9)], - dtype=[("par1", int), ("par2", int), ("par3", int)], +def test_show_calibrate_sceua_result(p_and_e, qobs, warmup_length, db_name, basin_area): + sampler = calibrate_by_sceua( + p_and_e, + qobs, + db_name, + warmup_length, + model={ + "name": "xaj_mz", + "source_type": "sources", + "source_book": "HF", + }, + algorithm={ + "name": "SCE_UA", + "random_seed": 1234, + "rep": 5, + "ngs": 7, + "kstop": 3, + "peps": 0.1, + "pcento": 0.1, + }, ) - spotpy.analyser.load_csv_results = lambda _: results - spotpy.analyser.get_minlikeindex = lambda _: (0, 0) - - # Call the function - basin_id = "test_basin" - save_dir = temp_dir - sceua_calibrated_file_name = "test_results.csv" - result = _read_save_sceua_calibrated_params( - basin_id, save_dir, sceua_calibrated_file_name + train_period = hydro_time.t_range_days(["2012-01-01", "2017-01-01"]) + show_events_result( + sampler.setup, + db_name, + warmup_length=warmup_length, + save_dir=db_name, + basin_id="basin_id", + train_period=train_period, + basin_area=basin_area, ) - # Check if the file is saved correctly - expected_file_path = os.path.join(save_dir, basin_id + "_calibrate_params.txt") - assert os.path.exists(expected_file_path) - # Check if the saved file contains the expected data - expected_data = pd.DataFrame([(1, 2, 3)], columns=["par1", "par2", "par3"]) - saved_data = pd.read_csv(expected_file_path) - pd.testing.assert_frame_equal(saved_data, expected_data) +def test_show_test_result(p_and_e, qobs, warmup_length, db_name, basin_area): + params = _read_save_sceua_calibrated_params("basin_id", db_name, db_name) + qsim, _ = xaj( + p_and_e, + params, + warmup_length=warmup_length, + name="xaj_mz", + source_type="sources", + source_book="HF", + ) - # Check if the returned result is correct - expected_result = np.array([(1, 2, 3)]) - np.testing.assert_array_equal(result, expected_result) + qsim = units.convert_unit( + qsim, + unit_now="mm/day", + unit_final=units.unit["streamflow"], + basin_area=basin_area, + ) + qobs = units.convert_unit( + qobs[warmup_length:, :, :], + unit_now="mm/day", + unit_final=units.unit["streamflow"], + basin_area=basin_area, + ) + test_period = hydro_time.t_range_days(["2012-01-01", "2017-01-01"]) + show_ts_result( + "basin_id", test_period[warmup_length:], qsim, qobs, save_dir=db_name + ) diff --git a/test/test_show_results.py b/test/test_show_results.py index 8c706bc..26092fa 100644 --- a/test/test_show_results.py +++ b/test/test_show_results.py @@ -7,77 +7,3 @@ FilePath: \hydro-model-xaj\test\test_show_results.py Copyright (c) 2021-2022 Wenyu Ouyang. All rights reserved. """ - -from hydromodel.datasets.data_postprocess import ( - show_sceua_cali_result, -) -from hydromodel.models.xaj import xaj -from hydromodel.trainers.calibrate_sceua import calibrate_by_sceua -from datasets.data_postprocess import show_test_result - - -from hydroutils import hydro_time - -from trainers.evaluate import _read_save_sceua_calibrated_params - - -def test_show_calibrate_sceua_result(p_and_e, qobs, warmup_length, db_name, basin_area): - sampler = calibrate_by_sceua( - p_and_e, - qobs, - db_name, - warmup_length, - model={ - "name": "xaj_mz", - "source_type": "sources", - "source_book": "HF", - }, - algorithm={ - "name": "SCE_UA", - "random_seed": 1234, - "rep": 5, - "ngs": 7, - "kstop": 3, - "peps": 0.1, - "pcento": 0.1, - }, - ) - train_period = hydro_time.t_range_days(["2012-01-01", "2017-01-01"]) - show_sceua_cali_result( - sampler.setup, - db_name, - warmup_length=warmup_length, - save_dir=db_name, - basin_id="basin_id", - train_period=train_period, - basin_area=basin_area, - ) - - -def test_show_test_result(p_and_e, qobs, warmup_length, db_name, basin_area): - params = _read_save_sceua_calibrated_params("basin_id", db_name, db_name) - qsim, _ = xaj( - p_and_e, - params, - warmup_length=warmup_length, - name="xaj_mz", - source_type="sources", - source_book="HF", - ) - - qsim = units.convert_unit( - qsim, - unit_now="mm/day", - unit_final=units.unit["streamflow"], - basin_area=basin_area, - ) - qobs = units.convert_unit( - qobs[warmup_length:, :, :], - unit_now="mm/day", - unit_final=units.unit["streamflow"], - basin_area=basin_area, - ) - test_period = hydro_time.t_range_days(["2012-01-01", "2017-01-01"]) - show_test_result( - "basin_id", test_period[warmup_length:], qsim, qobs, save_dir=db_name - )