diff --git a/env-dev.yml b/env-dev.yml index 2fea46a..a0bfd98 100644 --- a/env-dev.yml +++ b/env-dev.yml @@ -9,7 +9,7 @@ dependencies: - numba - scikit-learn - deap - - spotpy=1.5.14 + - spotpy - yaml - bmipy - pyarrow diff --git a/hydromodel/models/model_config.py b/hydromodel/models/model_config.py index f307600..0679534 100644 --- a/hydromodel/models/model_config.py +++ b/hydromodel/models/model_config.py @@ -1,12 +1,13 @@ """ Author: Wenyu Ouyang Date: 2022-10-25 21:16:22 -LastEditTime: 2024-03-21 20:12:31 +LastEditTime: 2024-03-26 11:13:08 LastEditors: Wenyu Ouyang Description: some basic config for hydro-model-xaj models FilePath: \hydro-model-xaj\hydromodel\models\model_config.py Copyright (c) 2021-2022 Wenyu Ouyang. All rights reserved. """ + from collections import OrderedDict # NOTE: Don't change the parameter settings @@ -42,7 +43,8 @@ "LM": [60.0, 90.0], "DM": [60.0, 120.0], "C": [0.0, 0.2], - "SM": [50, 100.0], + "SM": [1, 100.0], + # "SM": [50, 100.0], "EX": [1.0, 1.5], "KI": [0.0, 0.7], "KG": [0.0, 0.7], @@ -77,14 +79,18 @@ ], "param_range": OrderedDict( { - "K": [0.5, 1.0], - "B": [0.2, 0.4], - "IM": [0.07, 0.1], + "K": [0.1, 1.0], + # "K": [0.5, 1.0], + "B": [0.1, 0.4], + # "B": [0.2, 0.4], + "IM": [0.01, 0.1], + # "IM": [0.07, 0.1], "UM": [0.0, 20.0], "LM": [60.0, 90.0], "DM": [60.0, 120.0], "C": [0.0, 0.2], - "SM": [5, 10], + "SM": [1.0, 100.0], + # "SM": [5, 10], "EX": [1.0, 1.5], "KI": [0.0, 0.7], "KG": [0.0, 0.7], @@ -93,23 +99,6 @@ "CI": [0.0, 0.9], "CG": [0.98, 0.998], "KERNEL": [1, 15], - - # "K": [0.1, 1.0], - # "B": [0.1, 0.4], - # "IM": [0.01, 0.1], - # "UM": [0.0, 20.0], - # "LM": [60.0, 90.0], - # "DM": [60.0, 120.0], - # "C": [0.0, 0.2], - # "SM": [5, 10], - # "EX": [1.0, 1.5], - # "KI": [0.0, 0.7], - # "KG": [0.0, 0.7], - # "A": [0.0, 2.9], - # "THETA": [0.0, 6.5], - # "CI": [0.0, 0.9], - # "CG": [0.98, 0.998], - # "KERNEL": [1, 15], } ), }, diff --git a/hydromodel/models/model_dict.py b/hydromodel/models/model_dict.py index 1ea2139..a9361d0 100644 --- a/hydromodel/models/model_dict.py +++ b/hydromodel/models/model_dict.py @@ -1,10 +1,30 @@ +""" +Author: Wenyu Ouyang +Date: 2024-03-23 08:25:49 +LastEditTime: 2024-03-26 11:44:04 +LastEditors: Wenyu Ouyang +Description: CRITERION_DICT and MODEL_DICT +FilePath: \hydro-model-xaj\hydromodel\models\model_dict.py +Copyright (c) 2023-2024 Wenyu Ouyang. All rights reserved. +""" +import numpy as np from spotpy.objectivefunctions import rmse from hydromodel.models.xaj import xaj from hydromodel.models.gr4j import gr4j from hydromodel.models.hymod import hymod + +def rmse43darr(obs, sim): + rmses = np.sqrt(np.nanmean((sim - obs) ** 2, axis=0)) + rmse = rmses.mean(axis=0) + if rmse is np.nan: + raise ValueError("RMSE is nan, please check the input data.") + return rmse + + CRITERION_DICT = { - "RMSE": rmse, + "RMSE": rmse43darr, + "spotpy_rmse": rmse, } MODEL_DICT = { diff --git a/hydromodel/models/xaj.py b/hydromodel/models/xaj.py index a6a5062..2a5965a 100644 --- a/hydromodel/models/xaj.py +++ b/hydromodel/models/xaj.py @@ -1,7 +1,7 @@ """ Author: Wenyu Ouyang Date: 2021-12-10 23:01:02 -LastEditTime: 2024-03-22 20:51:39 +LastEditTime: 2024-03-26 11:06:12 LastEditors: Wenyu Ouyang Description: Core code for XinAnJiang model FilePath: /hydro-model-xaj/hydromodel/models/xaj.py @@ -20,7 +20,7 @@ # @jit -@jit(nopython=True) +# @jit(nopython=True) def calculate_evap(lm, c, wu0, wl0, prcp, pet) -> tuple[np.array, np.array, np.array]: """ Three-layers evaporation model from "Watershed Hydrologic Simulation" written by Prof. RenJun Zhao. @@ -64,7 +64,7 @@ def calculate_evap(lm, c, wu0, wl0, prcp, pet) -> tuple[np.array, np.array, np.a # @jit -@jit(nopython=True) +# @jit(nopython=True) def calculate_prcp_runoff(b, im, wm, w0, pe) -> tuple[np.array, np.array]: """ Calculates the amount of runoff generated from rainfall after entering the underlying surface. @@ -609,7 +609,7 @@ def sources5mm( # @jit -@jit(nopython=True) +# @jit(nopython=True) def linear_reservoir(x, weight, last_y=None) -> np.array: """ Linear reservoir's release function @@ -702,7 +702,6 @@ def uh_gamma(a, theta, len_uh=15): return w - def xaj( p_and_e, params: Union[np.array, list], @@ -743,7 +742,7 @@ def xaj( """ # default values for some function parameters model_name = kwargs["name"] if "name" in kwargs else "xaj" - source_type = kwargs["source_type"] if "source_type" in kwargs else "sources5mm" + source_type = kwargs["source_type"] if "source_type" in kwargs else "sources" source_book = kwargs["source_book"] if "source_book" in kwargs else "HF" # params param_ranges = MODEL_PARAM_DICT[model_name]["param_range"] @@ -753,13 +752,16 @@ def xaj( route_method = "MZ" else: raise NotImplementedError( - "We don't provide this route method now! Please use 'CS' or 'MZ'!" + "We don't provide this route method now! Please use 'CSL' or 'MZ'!" + ) + if np.isnan(params).any(): + raise ValueError( + "Parameters contain NaN values. Please check your opt algorithm" ) xaj_params = [ (value[1] - value[0]) * params[:, i] + value[0] for i, (key, value) in enumerate(param_ranges.items()) ] - # xaj_params = [param_ranges[key] for key in param_ranges] k = xaj_params[0] b = xaj_params[1] @@ -775,7 +777,6 @@ def xaj( # ki+kg should be smaller than 1; if not, we scale them ki = np.where(ki_ + kg_ < 1.0, ki_, (1.0 - PRECISION) / (ki_ + kg_) * ki_) kg = np.where(ki_ + kg_ < 1.0, kg_, (1.0 - PRECISION) / (ki_ + kg_) * kg_) - if route_method == "CSL": cs = xaj_params[11] l = xaj_params[12] diff --git a/hydromodel/trainers/calibrate_ga.py b/hydromodel/trainers/calibrate_ga.py index 1a07c78..9facfcd 100644 --- a/hydromodel/trainers/calibrate_ga.py +++ b/hydromodel/trainers/calibrate_ga.py @@ -23,6 +23,7 @@ from hydromodel.models.model_config import MODEL_PARAM_DICT from hydromodel.models.model_dict import MODEL_DICT from hydromodel.trainers.train_utils import plot_sim_and_obs, plot_train_iteration +from models.model_dict import rmse43darr def evaluate(individual, x_input, y_true, warmup_length, model): @@ -56,8 +57,7 @@ def evaluate(individual, x_input, y_true, warmup_length, model): x_input, params, warmup_length=warmup_length, **model ) # Calculate RMSE for multi-dim arrays - rmses = np.sqrt(np.nanmean((sim - y_true[warmup_length:, :, :]) ** 2, axis=0)) - return rmses.mean(axis=0) + return rmse43darr(y_true[warmup_length:, :, :], sim) def checkBounds(min, max): diff --git a/hydromodel/trainers/calibrate_sceua.py b/hydromodel/trainers/calibrate_sceua.py index 893b2bb..6656a91 100644 --- a/hydromodel/trainers/calibrate_sceua.py +++ b/hydromodel/trainers/calibrate_sceua.py @@ -1,3 +1,4 @@ +import os from typing import Union import numpy as np import spotpy @@ -11,6 +12,8 @@ class SpotSetup(object): def __init__(self, p_and_e, qobs, warmup_length=365, model=None, metric=None): """ Set up for Spotpy + NOTE: once for a basin in one sampler or + for all basins in one sampler with one parameter combination Parameters ---------- @@ -78,7 +81,7 @@ def simulation(self, x: ParameterSet) -> Union[list, np.array]: sim, _ = MODEL_DICT[self.model["name"]]( self.p_and_e, params, warmup_length=self.warmup_length, **self.model ) - return sim[:, 0, 0] + return sim def evaluation(self) -> Union[list, np.array]: """ @@ -89,8 +92,7 @@ def evaluation(self) -> Union[list, np.array]: Union[list, np.array] observation """ - # TODO: we only support one basin's calibration now - return self.true_obs[:, 0, 0] + return self.true_obs def objectivefunction( self, @@ -135,7 +137,7 @@ def objectivefunction( ) / pd.Timedelta(hours=1) start_num = int(start_num) end_num = int(end_num) - like_ = self.obj_func( + like_ = CRITERION_DICT[self.metric["obj_func"]]( evaluation[start_num:end_num,], simulation[start_num:end_num,] ) count += 1 @@ -145,7 +147,14 @@ def objectivefunction( def calibrate_by_sceua( - p_and_e, qobs, dbname, warmup_length=365, model=None, algorithm=None, metric=None + basins, + p_and_e, + qobs, + dbname, + warmup_length=365, + model=None, + algorithm=None, + metric=None, ): """ Function for calibrating model by SCE-UA @@ -154,6 +163,8 @@ def calibrate_by_sceua( Parameters ---------- + basins + basin ids p_and_e inputs of model qobs @@ -206,25 +217,28 @@ def calibrate_by_sceua( peps = algorithm["peps"] pcento = algorithm["pcento"] np.random.seed(random_seed) # Makes the results reproduceable - - # Initialize the xaj example - # In this case, we tell the setup which algorithm we want to use, so - # we can use this exmaple for different algorithms - spot_setup = SpotSetup( - p_and_e, - qobs, - warmup_length=warmup_length, - model=model, - metric=metric, - ) - # Select number of maximum allowed repetitions # 选择允许的最大重复次数 - sampler = spotpy.algorithms.sceua( - spot_setup, - dbname=dbname, - dbformat="csv", - random_state=random_seed, - ) - # Start the sampler, one can specify ngs, kstop, peps and pcento id desired - sampler.sample(rep, ngs=ngs, kstop=kstop, peps=peps, pcento=pcento) - print("Calibrate Finished!") + for i in range(len(basins)): + # Initialize the xaj example + # In this case, we tell the setup which algorithm we want to use, so + # we can use this exmaple for different algorithms + spot_setup = SpotSetup( + p_and_e[:, i : i + 1, :], + qobs[:, i : i + 1, :], + warmup_length=warmup_length, + model=model, + metric=metric, + ) + db_basin = os.path.join(dbname, basins[i]) + if not os.path.exists(db_basin): + os.makedirs(db_basin) + # Select number of maximum allowed repetitions + sampler = spotpy.algorithms.sceua( + spot_setup, + dbname=db_basin, + dbformat="csv", + random_state=random_seed, + ) + # Start the sampler, one can specify ngs, kstop, peps and pcento id desired + sampler.sample(rep, ngs=ngs, kstop=kstop, peps=peps, pcento=pcento) + print("Calibrate Finished!") return sampler diff --git a/requirements.txt b/requirements.txt index c031e65..1b47891 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,7 +2,7 @@ ipykernel numba scikit-learn deap -spotpy==1.5.14 +spotpy pytest bmipy pyyaml diff --git a/requirements_dev.txt b/requirements_dev.txt index d9c0900..140b3ba 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -18,7 +18,7 @@ ipykernel numba scikit-learn deap -spotpy==1.5.14 +spotpy pytest bmipy pyyaml diff --git a/scripts/calibrate_xaj_camels.py b/scripts/calibrate_xaj_camels.py index 579a86f..18bd949 100644 --- a/scripts/calibrate_xaj_camels.py +++ b/scripts/calibrate_xaj_camels.py @@ -1,7 +1,7 @@ """ Author: Wenyu Ouyang Date: 2022-11-19 17:27:05 -LastEditTime: 2024-03-25 20:52:08 +LastEditTime: 2024-03-26 10:16:49 LastEditors: Wenyu Ouyang Description: the script to calibrate a model for CAMELS basin FilePath: \hydro-model-xaj\scripts\calibrate_xaj_camels.py @@ -65,6 +65,7 @@ def main(args): r_mmd = streamflow_unit_conv(qobs_, basin_area, target_unit="mm/d") qobs = np.expand_dims(r_mmd["streamflow"].to_numpy().transpose(1, 0), axis=2) calibrate_by_sceua( + basin_ids, p_and_e, qobs, os.path.join(where_save, "sceua_xaj"), diff --git a/test/conftest.py b/test/conftest.py index e544187..b8d90ca 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -19,25 +19,30 @@ def camels(): @pytest.fixture() -def basin_area(camels): - return camels.read_area(["01013500"]) +def basins(): + return ["01013500"] @pytest.fixture() -def p_and_e(camels): +def basin_area(camels, basins): + return camels.read_area(basins) + + +@pytest.fixture() +def p_and_e(camels, basins): p_and_e = camels.read_ts_xrdataset( - ["01013500"], ["2010-01-01", "2014-01-01"], ["prcp", "PET"] + basins, ["2010-01-01", "2014-01-01"], ["prcp", "PET"] ) # three dims: sequence (time), batch (basin), feature (variable) return p_and_e.to_array().to_numpy().transpose(2, 1, 0) @pytest.fixture() -def qobs(basin_area, camels): +def qobs(basin_area, camels, basins): import pint_xarray # noqa qobs_ = camels.read_ts_xrdataset( - ["01013500"], ["2010-01-01", "2014-01-01"], ["streamflow"] + basins, ["2010-01-01", "2014-01-01"], ["streamflow"] ) # we use pint package to handle the unit conversion # trans unit to mm/day diff --git a/test/test_calibrate.py b/test/test_calibrate.py index de2451b..bb2f698 100644 --- a/test/test_calibrate.py +++ b/test/test_calibrate.py @@ -24,9 +24,10 @@ def db_dir(): return the_dir -def test_calibrate_xaj_sceua(p_and_e, qobs, warmup_length, db_dir): +def test_calibrate_xaj_sceua(basins, p_and_e, qobs, warmup_length, db_dir): # just for testing, so the hyper-param is chosen for quick running calibrate_by_sceua( + basins, p_and_e, qobs, os.path.join(db_dir, "sceua_xaj"), diff --git a/test/test_data_postprocess.py b/test/test_data_postprocess.py index 3c6aa85..2db5253 100644 --- a/test/test_data_postprocess.py +++ b/test/test_data_postprocess.py @@ -1,9 +1,58 @@ """ Author: Wenyu Ouyang Date: 2022-10-25 21:16:22 -LastEditTime: 2024-03-25 14:59:43 +LastEditTime: 2024-03-26 11:56:33 LastEditors: Wenyu Ouyang Description: Test for data preprocess FilePath: \hydro-model-xaj\test\test_data_postprocess.py Copyright (c) 2021-2022 Wenyu Ouyang. All rights reserved. """ + +import spotpy +from spotpy.examples.spot_setup_hymod_python import spot_setup as hymod_setup +import matplotlib.pyplot as plt + + +def test_run_hymod_calibration(): + # a case from spotpy example + setup = hymod_setup(spotpy.objectivefunctions.rmse) + + # 创建SCE-UA算法的sampler + sampler = spotpy.algorithms.sceua(setup, dbname="test/SCEUA_hymod", dbformat="csv") + + # 设置校准参数 + repetitions = 5000 # 最大迭代次数 + + # 运行sampler + sampler.sample(repetitions, ngs=7, kstop=3, peps=0.1, pcento=0.1) + + # 从CSV文件加载结果 + results = spotpy.analyser.load_csv_results("test/SCEUA_hymod") + + # 绘制目标函数的变化 + plt.figure(figsize=(9, 5)) + plt.plot(results["like1"]) + plt.ylabel("RMSE") + plt.xlabel("Iteration") + plt.show() + + # 寻找最佳模型运行结果 + bestindex, bestobjf = spotpy.analyser.get_minlikeindex(results) + best_model_run = results[bestindex] + + # 提取并绘制最佳模型运行的结果 + fields = [word for word in best_model_run.dtype.names if word.startswith("sim")] + best_simulation = list(best_model_run[fields]) + + plt.figure(figsize=(16, 9)) + plt.plot( + best_simulation, + color="black", + linestyle="solid", + label="Best objf.=" + str(bestobjf), + ) + plt.plot(setup.evaluation(), "r.", markersize=3, label="Observation data") + plt.xlabel("Number of Observation Points") + plt.ylabel("Discharge [l s-1]") + plt.legend(loc="upper right") + plt.show()