Skip to content

Commit

Permalink
plot cv results
Browse files Browse the repository at this point in the history
  • Loading branch information
OuyangWenyu committed Mar 27, 2024
1 parent f3c1d38 commit 57084e9
Show file tree
Hide file tree
Showing 5 changed files with 119 additions and 190 deletions.
75 changes: 5 additions & 70 deletions hydromodel/datasets/data_postprocess.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
"""Show results of calibration and validation."""

import os
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
import spotpy

from hydroutils import hydro_file, hydro_stat

Expand Down Expand Up @@ -54,18 +54,15 @@ def plot_train_iteration(likelihood, save_fig):
plt.close()


def show_sceua_cali_result(
sceua_calibrated_file,
def show_events_result(
warmup_length,
save_dir,
basin_id,
train_period,
result_unit="mm/hour",
basin_area=None,
prcp=None,
):
"""
Plot all year result to see the effect of optimized parameters
Plot all events result to see the effect of optimized parameters
Parameters
----------
Expand All @@ -84,65 +81,7 @@ def show_sceua_cali_result(
-------
None
"""
# Load the results gained with the sceua sampler, stored in SCEUA_xaj.csv
# results = []
# for chunk in pd.read_csv(sceua_calibrated_file, chunksize=100000 ):
# results.append(chunk)
# results = pd.concat(results)
results = spotpy.analyser.load_csv_results(sceua_calibrated_file) # 读取结果
# Plot how the objective function was minimized during sampling
if not os.path.exists(save_dir): # 绘制采样过程中目标函数的最小化情况
os.makedirs(save_dir)
plot_train_iteration(
results["like1"],
os.path.join(save_dir, "train_iteration.png"), # 绘制迭代中的RMSE
)
# Plot the best model run
# Find the run_id with the minimal objective function value
bestindex, bestobjf = spotpy.analyser.get_minlikeindex(
results
) # 绘制最佳模型图并找到run—id
# Select best model run
best_model_run = results[bestindex] # 选择最佳模型结果
# Filter results for simulation results #最佳模型模拟结果
fields = [word for word in best_model_run.dtype.names if word.startswith("sim")]
best_simulation = list(best_model_run[fields])
convert_unit_sim = units.convert_unit(
np.array(best_simulation).reshape(1, -1),
# np.array(list(map(float, best_simulation)), dtype=float).reshape(1, -1),
result_unit,
units.unit["streamflow"],
basin_area=basin_area,
)
convert_unit_obs = units.convert_unit(
np.array(spot_setup.evaluation()).reshape(1, -1),
result_unit,
units.unit["streamflow"],
basin_area=basin_area,
)

# save calibrated results of calibration period #保存率定的结果
train_result_file = os.path.join(
save_dir,
"train_qsim_" + spot_setup.model["name"] + "_" + str(basin_id) + ".csv",
)
pd.DataFrame(convert_unit_sim.reshape(-1, 1)).to_csv(
train_result_file,
sep=",",
index=False,
header=False,
)
# calculation rmse、nashsutcliffe and bias for training period
stat_error = hydro_stat.stat_error(
convert_unit_obs,
convert_unit_sim,
)
print("Training Metrics:", basin_id, stat_error)
hydro_file.serialize_json_np(
stat_error, os.path.join(save_dir, "train_metrics.json")
)

# 循还画图
# TODO: not finished
time = pd.read_excel(
"D:/研究生/毕业论文/new毕业论文/预答辩/碧流河水库/站点信息/洪水率定时间.xlsx"
)
Expand Down Expand Up @@ -253,7 +192,7 @@ def show_sceua_cali_result(
plot_sim_and_obs(t_range_train, best_simulation, obs, prcp[:], save_fig)


def show_test_result(basin_id, test_date, qsim, obs, save_dir):
def show_ts_result(basin_id, test_date, qsim, obs, save_dir):
stat_error = hydro_stat.stat_error(obs.reshape(1, -1), qsim.reshape(1, -1))
print("Test Metrics:", basin_id, stat_error)
hydro_file.serialize_json_np(
Expand Down Expand Up @@ -371,7 +310,3 @@ def show_test_result(basin_id, test_date, qsim, obs, save_dir):
prcp[:],
save_fig,
)
# summarize_parameters(one_model_one_hyperparam_setting_dir, {"name": "xaj_mz"})
# renormalize_params(one_model_one_hyperparam_setting_dir, {"name":"xaj_mz"})
# summarize_metrics(one_model_one_hyperparam_setting_dir,{"name":"xaj_mz"})
# save_streamflow(one_model_one_hyperparam_setting_dir,{"name":"xaj_mz"})
16 changes: 13 additions & 3 deletions hydromodel/trainers/evaluate.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""
Author: Wenyu Ouyang
Date: 2022-10-25 21:16:22
LastEditTime: 2024-03-27 10:42:57
LastEditTime: 2024-03-27 11:18:09
LastEditors: Wenyu Ouyang
Description: Plots for calibration and testing results
FilePath: \hydro-model-xaj\hydromodel\trainers\evaluate.py
Expand Down Expand Up @@ -197,9 +197,13 @@ def _summarize_metrics(self, basin_ids):
model_name = self.model_info["name"]
file_path = os.path.join(result_dir, f"{model_name}_evaluation_results.nc")
ds = xr.open_dataset(file_path)
# for metrics, warmup_length should be considered
warmup_length = self.config["warmup"]
qobs = ds["qobs"].transpose("basin", "time").to_numpy()[:, warmup_length:]
qsim = ds["qsim"].transpose("basin", "time").to_numpy()[:, warmup_length:]
test_metrics = hydro_stat.stat_error(
ds["qobs"].transpose("basin", "time").to_numpy(),
ds["qsim"].transpose("basin", "time").to_numpy(),
qobs,
qsim,
)
metric_dfs_test = pd.DataFrame(test_metrics, index=basin_ids)
metric_file_test = os.path.join(result_dir, "basins_metrics.csv")
Expand All @@ -224,6 +228,12 @@ def _save_evaluate_results(self, qsim, qobs, obs_ds):

print(f"Results saved to: {file_path}")

def load_results(self):
result_dir = self.save_dir
model_name = self.model_info["name"]
file_path = os.path.join(result_dir, f"{model_name}_evaluation_results.nc")
return xr.open_dataset(file_path)


def _read_save_sceua_calibrated_params(basin_id, save_dir, sceua_calibrated_file_name):
"""
Expand Down
49 changes: 38 additions & 11 deletions scripts/post_process.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""
Author: Wenyu Ouyang
Date: 2022-11-19 17:27:05
LastEditTime: 2024-03-27 08:37:58
LastEditTime: 2024-03-27 11:23:02
LastEditors: Wenyu Ouyang
Description: the script to postprocess results
FilePath: \hydro-model-xaj\scripts\post_process.py
Expand All @@ -15,20 +15,47 @@

repo_dir = os.path.dirname(Path(os.path.abspath(__file__)).parent)
sys.path.append(repo_dir)
from hydromodel.trainers.evaluate import read_yaml_config
from hydromodel.datasets.data_postprocess import plot_sim_and_obs
from hydromodel.trainers.evaluate import Evaluator, read_yaml_config


def statistics(args):
def visualize(args):
exp = args.exp
cali_dir = Path(os.path.join(repo_dir, "result", exp))
cali_config = read_yaml_config(os.path.join(cali_dir, "config.yaml"))
if os.path.exists(cali_dir) is False:
raise NotImplementedError(
"You should run prepare_data and calibrate scripts at first."
)
print(
"Compare evaluation results of different calibrated models in an exp directory"
)
kfold = cali_config["cv_fold"]
basins = cali_config["basin_id"]
warmup = cali_config["warmup"]
for fold in range(kfold):
print(f"Start to evaluate the {fold+1}-th fold")
fold_dir = os.path.join(cali_dir, f"sceua_xaj_cv{fold+1}")
# evaluate both train and test period for all basins
eval_train_dir = os.path.join(fold_dir, "train")
eval_test_dir = os.path.join(fold_dir, "test")
train_eval = Evaluator(cali_dir, fold_dir, eval_train_dir)
test_eval = Evaluator(cali_dir, fold_dir, eval_test_dir)
ds_train = train_eval.load_results()
ds_test = test_eval.load_results()
for basin in basins:
save_fig_train = os.path.join(eval_train_dir, f"train_sim_obs_{basin}.png")
plot_sim_and_obs(
ds_train["time"].isel(time=slice(warmup, None)),
ds_train["qsim"].sel(basin=basin).isel(time=slice(warmup, None)),
ds_train["qobs"].sel(basin=basin).isel(time=slice(warmup, None)),
save_fig_train,
xlabel="Date",
ylabel=None,
)
save_fig_test = os.path.join(eval_test_dir, f"test_sim_obs_{basin}.png")
plot_sim_and_obs(
ds_test["time"].isel(time=slice(warmup, None)),
ds_test["qsim"].sel(basin=basin).isel(time=slice(warmup, None)),
ds_test["qobs"].sel(basin=basin).isel(time=slice(warmup, None)),
save_fig_test,
xlabel="Date",
ylabel=None,
)
print(f"Finish visualizing the {fold}-th fold")


if __name__ == "__main__":
Expand All @@ -43,4 +70,4 @@ def statistics(args):
type=str,
)
the_args = parser.parse_args()
statistics(the_args)
visualize(the_args)
95 changes: 63 additions & 32 deletions test/test_data_postprocess.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,23 @@
"""
Author: Wenyu Ouyang
Date: 2022-10-25 21:16:22
LastEditTime: 2024-03-26 17:01:09
LastEditTime: 2024-03-27 10:59:29
LastEditors: Wenyu Ouyang
Description: Test for data preprocess
FilePath: \hydro-model-xaj\test\test_data_postprocess.py
Copyright (c) 2021-2022 Wenyu Ouyang. All rights reserved.
"""

import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import spotpy
from spotpy.examples.spot_setup_hymod_python import spot_setup as hymod_setup
from trainers.evaluate import _read_save_sceua_calibrated_params

from hydroutils import hydro_time

from hydromodel.datasets.data_postprocess import show_events_result, show_ts_result
from hydromodel.models.xaj import xaj
from hydromodel.trainers.calibrate_sceua import calibrate_by_sceua
from hydromodel.trainers.evaluate import _read_save_sceua_calibrated_params


def test_run_hymod_calibration():
Expand Down Expand Up @@ -62,35 +65,63 @@ def test_run_hymod_calibration():
plt.show()


def test_read_save_sceua_calibrated_params(tmpdir):
# Create a temporary directory for testing
temp_dir = tmpdir.mkdir("test_data")

# Generate some dummy data
results = np.array(
[(1, 2, 3), (4, 5, 6), (7, 8, 9)],
dtype=[("par1", int), ("par2", int), ("par3", int)],
def test_show_calibrate_sceua_result(p_and_e, qobs, warmup_length, db_name, basin_area):
sampler = calibrate_by_sceua(
p_and_e,
qobs,
db_name,
warmup_length,
model={
"name": "xaj_mz",
"source_type": "sources",
"source_book": "HF",
},
algorithm={
"name": "SCE_UA",
"random_seed": 1234,
"rep": 5,
"ngs": 7,
"kstop": 3,
"peps": 0.1,
"pcento": 0.1,
},
)
spotpy.analyser.load_csv_results = lambda _: results
spotpy.analyser.get_minlikeindex = lambda _: (0, 0)

# Call the function
basin_id = "test_basin"
save_dir = temp_dir
sceua_calibrated_file_name = "test_results.csv"
result = _read_save_sceua_calibrated_params(
basin_id, save_dir, sceua_calibrated_file_name
train_period = hydro_time.t_range_days(["2012-01-01", "2017-01-01"])
show_events_result(
sampler.setup,
db_name,
warmup_length=warmup_length,
save_dir=db_name,
basin_id="basin_id",
train_period=train_period,
basin_area=basin_area,
)

# Check if the file is saved correctly
expected_file_path = os.path.join(save_dir, basin_id + "_calibrate_params.txt")
assert os.path.exists(expected_file_path)

# Check if the saved file contains the expected data
expected_data = pd.DataFrame([(1, 2, 3)], columns=["par1", "par2", "par3"])
saved_data = pd.read_csv(expected_file_path)
pd.testing.assert_frame_equal(saved_data, expected_data)
def test_show_test_result(p_and_e, qobs, warmup_length, db_name, basin_area):
params = _read_save_sceua_calibrated_params("basin_id", db_name, db_name)
qsim, _ = xaj(
p_and_e,
params,
warmup_length=warmup_length,
name="xaj_mz",
source_type="sources",
source_book="HF",
)

# Check if the returned result is correct
expected_result = np.array([(1, 2, 3)])
np.testing.assert_array_equal(result, expected_result)
qsim = units.convert_unit(
qsim,
unit_now="mm/day",
unit_final=units.unit["streamflow"],
basin_area=basin_area,
)
qobs = units.convert_unit(
qobs[warmup_length:, :, :],
unit_now="mm/day",
unit_final=units.unit["streamflow"],
basin_area=basin_area,
)
test_period = hydro_time.t_range_days(["2012-01-01", "2017-01-01"])
show_ts_result(
"basin_id", test_period[warmup_length:], qsim, qobs, save_dir=db_name
)
Loading

0 comments on commit 57084e9

Please sign in to comment.