for loop for basins when calibrating with SCEUA; use new version of s…

…potpy
OuyangWenyu · Mar 26, 2024 · a522194 · a522194
1 parent 9fe2ba2
commit a522194
Show file tree

Hide file tree

Showing 12 changed files with 153 additions and 73 deletions.
diff --git a/env-dev.yml b/env-dev.yml
@@ -9,7 +9,7 @@ dependencies:
   - numba
   - scikit-learn
   - deap
-  - spotpy=1.5.14
+  - spotpy
   - yaml
   - bmipy
   - pyarrow

diff --git a/hydromodel/models/model_config.py b/hydromodel/models/model_config.py
@@ -1,12 +1,13 @@
 """
 Author: Wenyu Ouyang
 Date: 2022-10-25 21:16:22
-LastEditTime: 2024-03-21 20:12:31
+LastEditTime: 2024-03-26 11:13:08
 LastEditors: Wenyu Ouyang
 Description: some basic config for hydro-model-xaj models
 FilePath: \hydro-model-xaj\hydromodel\models\model_config.py
 Copyright (c) 2021-2022 Wenyu Ouyang. All rights reserved.
 """
+
 from collections import OrderedDict
 
 # NOTE: Don't change the parameter settings
@@ -42,7 +43,8 @@
                 "LM": [60.0, 90.0],
                 "DM": [60.0, 120.0],
                 "C": [0.0, 0.2],
-                "SM": [50, 100.0],
+                "SM": [1, 100.0],
+                # "SM": [50, 100.0],
                 "EX": [1.0, 1.5],
                 "KI": [0.0, 0.7],
                 "KG": [0.0, 0.7],
@@ -77,14 +79,18 @@
         ],
         "param_range": OrderedDict(
             {
-                "K": [0.5, 1.0],
-                "B": [0.2, 0.4],
-                "IM": [0.07, 0.1],
+                "K": [0.1, 1.0],
+                # "K": [0.5, 1.0],
+                "B": [0.1, 0.4],
+                # "B": [0.2, 0.4],
+                "IM": [0.01, 0.1],
+                # "IM": [0.07, 0.1],
                 "UM": [0.0, 20.0],
                 "LM": [60.0, 90.0],
                 "DM": [60.0, 120.0],
                 "C": [0.0, 0.2],
-                "SM": [5, 10],
+                "SM": [1.0, 100.0],
+                # "SM": [5, 10],
                 "EX": [1.0, 1.5],
                 "KI": [0.0, 0.7],
                 "KG": [0.0, 0.7],
@@ -93,23 +99,6 @@
                 "CI": [0.0, 0.9],
                 "CG": [0.98, 0.998],
                 "KERNEL": [1, 15],
-
-                # "K": [0.1, 1.0],
-                # "B": [0.1, 0.4],
-                # "IM": [0.01, 0.1],
-                # "UM": [0.0, 20.0],
-                # "LM": [60.0, 90.0],
-                # "DM": [60.0, 120.0],
-                # "C": [0.0, 0.2],
-                # "SM": [5, 10],
-                # "EX": [1.0, 1.5],
-                # "KI": [0.0, 0.7],
-                # "KG": [0.0, 0.7],
-                # "A": [0.0, 2.9],
-                # "THETA": [0.0, 6.5],
-                # "CI": [0.0, 0.9],
-                # "CG": [0.98, 0.998],
-                # "KERNEL": [1, 15],
             }
         ),
     },

diff --git a/hydromodel/models/model_dict.py b/hydromodel/models/model_dict.py
@@ -1,10 +1,30 @@
+"""
+Author: Wenyu Ouyang
+Date: 2024-03-23 08:25:49
+LastEditTime: 2024-03-26 11:44:04
+LastEditors: Wenyu Ouyang
+Description: CRITERION_DICT and MODEL_DICT
+FilePath: \hydro-model-xaj\hydromodel\models\model_dict.py
+Copyright (c) 2023-2024 Wenyu Ouyang. All rights reserved.
+"""
+import numpy as np
 from spotpy.objectivefunctions import rmse
 from hydromodel.models.xaj import xaj
 from hydromodel.models.gr4j import gr4j
 from hydromodel.models.hymod import hymod
 
+
+def rmse43darr(obs, sim):
+    rmses = np.sqrt(np.nanmean((sim - obs) ** 2, axis=0))
+    rmse = rmses.mean(axis=0)
+    if rmse is np.nan:
+        raise ValueError("RMSE is nan, please check the input data.")
+    return rmse
+
+
 CRITERION_DICT = {
-    "RMSE": rmse,
+    "RMSE": rmse43darr,
+    "spotpy_rmse": rmse,
 }
 
 MODEL_DICT = {

diff --git a/hydromodel/models/xaj.py b/hydromodel/models/xaj.py
@@ -1,7 +1,7 @@
 """
 Author: Wenyu Ouyang
 Date: 2021-12-10 23:01:02
-LastEditTime: 2024-03-22 20:51:39
+LastEditTime: 2024-03-26 11:06:12
 LastEditors: Wenyu Ouyang
 Description: Core code for XinAnJiang model
 FilePath: /hydro-model-xaj/hydromodel/models/xaj.py
@@ -20,7 +20,7 @@
 
 
 # @jit
-@jit(nopython=True)
+# @jit(nopython=True)
 def calculate_evap(lm, c, wu0, wl0, prcp, pet) -> tuple[np.array, np.array, np.array]:
     """
     Three-layers evaporation model from "Watershed Hydrologic Simulation" written by Prof. RenJun Zhao.
@@ -64,7 +64,7 @@ def calculate_evap(lm, c, wu0, wl0, prcp, pet) -> tuple[np.array, np.array, np.a
 
 
 # @jit
-@jit(nopython=True)
+# @jit(nopython=True)
 def calculate_prcp_runoff(b, im, wm, w0, pe) -> tuple[np.array, np.array]:
     """
     Calculates the amount of runoff generated from rainfall after entering the underlying surface.
@@ -609,7 +609,7 @@ def sources5mm(
 
 
 # @jit
-@jit(nopython=True)
+# @jit(nopython=True)
 def linear_reservoir(x, weight, last_y=None) -> np.array:
     """
     Linear reservoir's release function
@@ -702,7 +702,6 @@ def uh_gamma(a, theta, len_uh=15):
     return w
 
 
-
 def xaj(
     p_and_e,
     params: Union[np.array, list],
@@ -743,7 +742,7 @@ def xaj(
     """
     # default values for some function parameters
     model_name = kwargs["name"] if "name" in kwargs else "xaj"
-    source_type = kwargs["source_type"] if "source_type" in kwargs else "sources5mm"
+    source_type = kwargs["source_type"] if "source_type" in kwargs else "sources"
     source_book = kwargs["source_book"] if "source_book" in kwargs else "HF"
     # params
     param_ranges = MODEL_PARAM_DICT[model_name]["param_range"]
@@ -753,13 +752,16 @@ def xaj(
         route_method = "MZ"
     else:
         raise NotImplementedError(
-            "We don't provide this route method now! Please use 'CS' or 'MZ'!"
+            "We don't provide this route method now! Please use 'CSL' or 'MZ'!"
+        )
+    if np.isnan(params).any():
+        raise ValueError(
+            "Parameters contain NaN values. Please check your opt algorithm"
         )
     xaj_params = [
         (value[1] - value[0]) * params[:, i] + value[0]
         for i, (key, value) in enumerate(param_ranges.items())
     ]
-    # xaj_params = [param_ranges[key] for key in param_ranges]
 
     k = xaj_params[0]
     b = xaj_params[1]
@@ -775,7 +777,6 @@ def xaj(
     # ki+kg should be smaller than 1; if not, we scale them
     ki = np.where(ki_ + kg_ < 1.0, ki_, (1.0 - PRECISION) / (ki_ + kg_) * ki_)
     kg = np.where(ki_ + kg_ < 1.0, kg_, (1.0 - PRECISION) / (ki_ + kg_) * kg_)
-
     if route_method == "CSL":
         cs = xaj_params[11]
         l = xaj_params[12]

diff --git a/hydromodel/trainers/calibrate_ga.py b/hydromodel/trainers/calibrate_ga.py
@@ -23,6 +23,7 @@
 from hydromodel.models.model_config import MODEL_PARAM_DICT
 from hydromodel.models.model_dict import MODEL_DICT
 from hydromodel.trainers.train_utils import plot_sim_and_obs, plot_train_iteration
+from models.model_dict import rmse43darr
 
 
 def evaluate(individual, x_input, y_true, warmup_length, model):
@@ -56,8 +57,7 @@ def evaluate(individual, x_input, y_true, warmup_length, model):
         x_input, params, warmup_length=warmup_length, **model
     )
     # Calculate RMSE for multi-dim arrays
-    rmses = np.sqrt(np.nanmean((sim - y_true[warmup_length:, :, :]) ** 2, axis=0))
-    return rmses.mean(axis=0)
+    return rmse43darr(y_true[warmup_length:, :, :], sim)
 
 
 def checkBounds(min, max):

diff --git a/hydromodel/trainers/calibrate_sceua.py b/hydromodel/trainers/calibrate_sceua.py
@@ -1,3 +1,4 @@
+import os
 from typing import Union
 import numpy as np
 import spotpy
@@ -11,6 +12,8 @@ class SpotSetup(object):
     def __init__(self, p_and_e, qobs, warmup_length=365, model=None, metric=None):
         """
         Set up for Spotpy
+        NOTE: once for a basin in one sampler or
+        for all basins in one sampler with one parameter combination
 
         Parameters
         ----------
@@ -78,7 +81,7 @@ def simulation(self, x: ParameterSet) -> Union[list, np.array]:
         sim, _ = MODEL_DICT[self.model["name"]](
             self.p_and_e, params, warmup_length=self.warmup_length, **self.model
         )
-        return sim[:, 0, 0]
+        return sim
 
     def evaluation(self) -> Union[list, np.array]:
         """
@@ -89,8 +92,7 @@ def evaluation(self) -> Union[list, np.array]:
         Union[list, np.array]
             observation
         """
-        # TODO: we only support one basin's calibration now
-        return self.true_obs[:, 0, 0]
+        return self.true_obs
 
     def objectivefunction(
         self,
@@ -135,7 +137,7 @@ def objectivefunction(
                 ) / pd.Timedelta(hours=1)
                 start_num = int(start_num)
                 end_num = int(end_num)
-                like_ = self.obj_func(
+                like_ = CRITERION_DICT[self.metric["obj_func"]](
                     evaluation[start_num:end_num,], simulation[start_num:end_num,]
                 )
                 count += 1
@@ -145,7 +147,14 @@ def objectivefunction(
 
 
 def calibrate_by_sceua(
-    p_and_e, qobs, dbname, warmup_length=365, model=None, algorithm=None, metric=None
+    basins,
+    p_and_e,
+    qobs,
+    dbname,
+    warmup_length=365,
+    model=None,
+    algorithm=None,
+    metric=None,
 ):
     """
     Function for calibrating model by SCE-UA
@@ -154,6 +163,8 @@ def calibrate_by_sceua(
 
     Parameters
     ----------
+    basins
+        basin ids
     p_and_e
         inputs of model
     qobs
@@ -206,25 +217,28 @@ def calibrate_by_sceua(
     peps = algorithm["peps"]
     pcento = algorithm["pcento"]
     np.random.seed(random_seed)  # Makes the results reproduceable
-
-    # Initialize the xaj example
-    # In this case, we tell the setup which algorithm we want to use, so
-    # we can use this exmaple for different algorithms
-    spot_setup = SpotSetup(
-        p_and_e,
-        qobs,
-        warmup_length=warmup_length,
-        model=model,
-        metric=metric,
-    )
-    # Select number of maximum allowed repetitions # 选择允许的最大重复次数
-    sampler = spotpy.algorithms.sceua(
-        spot_setup,
-        dbname=dbname,
-        dbformat="csv",
-        random_state=random_seed,
-    )
-    # Start the sampler, one can specify ngs, kstop, peps and pcento id desired
-    sampler.sample(rep, ngs=ngs, kstop=kstop, peps=peps, pcento=pcento)
-    print("Calibrate Finished!")
+    for i in range(len(basins)):
+        # Initialize the xaj example
+        # In this case, we tell the setup which algorithm we want to use, so
+        # we can use this exmaple for different algorithms
+        spot_setup = SpotSetup(
+            p_and_e[:, i : i + 1, :],
+            qobs[:, i : i + 1, :],
+            warmup_length=warmup_length,
+            model=model,
+            metric=metric,
+        )
+        db_basin = os.path.join(dbname, basins[i])
+        if not os.path.exists(db_basin):
+            os.makedirs(db_basin)
+        # Select number of maximum allowed repetitions
+        sampler = spotpy.algorithms.sceua(
+            spot_setup,
+            dbname=db_basin,
+            dbformat="csv",
+            random_state=random_seed,
+        )
+        # Start the sampler, one can specify ngs, kstop, peps and pcento id desired
+        sampler.sample(rep, ngs=ngs, kstop=kstop, peps=peps, pcento=pcento)
+        print("Calibrate Finished!")
     return sampler
diff --git a/requirements.txt b/requirements.txt
@@ -2,7 +2,7 @@ ipykernel
 numba
 scikit-learn
 deap
-spotpy==1.5.14
+spotpy
 pytest
 bmipy
 pyyaml

diff --git a/requirements_dev.txt b/requirements_dev.txt
@@ -18,7 +18,7 @@ ipykernel
 numba
 scikit-learn
 deap
-spotpy==1.5.14
+spotpy
 pytest
 bmipy
 pyyaml

diff --git a/scripts/calibrate_xaj_camels.py b/scripts/calibrate_xaj_camels.py
@@ -1,7 +1,7 @@
 """
 Author: Wenyu Ouyang
 Date: 2022-11-19 17:27:05
-LastEditTime: 2024-03-25 20:52:08
+LastEditTime: 2024-03-26 10:16:49
 LastEditors: Wenyu Ouyang
 Description: the script to calibrate a model for CAMELS basin
 FilePath: \hydro-model-xaj\scripts\calibrate_xaj_camels.py
@@ -65,6 +65,7 @@ def main(args):
     r_mmd = streamflow_unit_conv(qobs_, basin_area, target_unit="mm/d")
     qobs = np.expand_dims(r_mmd["streamflow"].to_numpy().transpose(1, 0), axis=2)
     calibrate_by_sceua(
+        basin_ids,
         p_and_e,
         qobs,
         os.path.join(where_save, "sceua_xaj"),

diff --git a/test/conftest.py b/test/conftest.py
@@ -19,25 +19,30 @@ def camels():
 
 
 @pytest.fixture()
-def basin_area(camels):
-    return camels.read_area(["01013500"])
+def basins():
+    return ["01013500"]
 
 
 @pytest.fixture()
-def p_and_e(camels):
+def basin_area(camels, basins):
+    return camels.read_area(basins)
+
+
+@pytest.fixture()
+def p_and_e(camels, basins):
     p_and_e = camels.read_ts_xrdataset(
-        ["01013500"], ["2010-01-01", "2014-01-01"], ["prcp", "PET"]
+        basins, ["2010-01-01", "2014-01-01"], ["prcp", "PET"]
     )
     # three dims: sequence (time), batch (basin), feature (variable)
     return p_and_e.to_array().to_numpy().transpose(2, 1, 0)
 
 
 @pytest.fixture()
-def qobs(basin_area, camels):
+def qobs(basin_area, camels, basins):
     import pint_xarray  # noqa
 
     qobs_ = camels.read_ts_xrdataset(
-        ["01013500"], ["2010-01-01", "2014-01-01"], ["streamflow"]
+        basins, ["2010-01-01", "2014-01-01"], ["streamflow"]
     )
     # we use pint package to handle the unit conversion
     # trans unit to mm/day
-Original file line number
+Diff line change
@@ Expand Up / @@ -9,7 +9,7 @@ dependencies: @@
       - numba
       - scikit-learn
       - deap
-      - spotpy=1.5.14
+      - spotpy
       - yaml
       - bmipy
       - pyarrow
@@ Expand Down @@