Merge branch 'xiaonig-hourly' into dev

OuyangWenyu · Mar 21, 2024 · e439362 · e439362
2 parents 56bc492 + b22b501
commit e439362
Show file tree

Hide file tree

Showing 23 changed files with 1,046 additions and 23,950 deletions.
diff --git a/definitions.py b/definitions.py
@@ -10,7 +10,7 @@
 import os
 from pathlib import Path
 
-ROOT_DIR = os.path.dirname(os.path.abspath(__file__))  # This is your Project Root
+ROOT_DIR = os.path.dirname(os.path.abspath('/home/ldaning/code/biye/hydro-model-xaj/definitions.py'))  # This is your Project Root
 path = Path(ROOT_DIR)
 DATASET_DIR = os.path.join(path.parent.parent.absolute(), "data")
 print("Please Check your directory:")

diff --git a/hydromodel/calibrate/calibrate_sceua.py b/hydromodel/calibrate/calibrate_sceua.py
@@ -1,6 +1,7 @@
 from typing import Union
 import numpy as np
 import spotpy
+import pandas as pd
 from spotpy.parameter import Uniform, ParameterSet
 from spotpy.objectivefunctions import rmse
 from hydromodel.models.model_config import MODEL_PARAM_DICT
@@ -14,7 +15,7 @@ def __init__(
         self,
         p_and_e,
         qobs,
-        warmup_length=30,
+        warmup_length=365,
         model={
             "name": "xaj_mz",
             "source_type": "sources",
@@ -127,24 +128,52 @@ def objectivefunction(
         float
             likelihood
         """
-        # SPOTPY expects to get one or multiple values back,
-        # that define the performance of the model run
+        # 切片
+        #    time = pd.read_excel('/home/ldaning/code/biye/hydro-model-xaj/hydromodel/example/zhandian/洪水率定时间1.xlsx')
+        #     calibrate_starttime = pd.to_datetime("2014-7-10 0:00")
+        #     calibrate_endtime = pd.to_datetime("2020-6-24 0:00")
+        #     total = 0
+        #     count = 0
+        #     for i in range(len(time)):
+        #        if(time.iloc[i,0]<calibrate_endtime):
+        #            start_num = (time.iloc[i,0]-calibrate_starttime-pd.Timedelta(hours=365))/pd.Timedelta(hours=1)
+        #           end_num = (time.iloc[i,1]-calibrate_starttime-pd.Timedelta(hours=365))/pd.Timedelta(hours=1)
+        #           start_num = int(start_num)
+        #           end_num = int(end_num)
+        #           if not self.obj_func:
+        #      # This is used if not overwritten by user
+        #             like_ = rmse(evaluation[start_num:end_num,], simulation[start_num:end_num,])
+        #            total += like_
+        #            count += 1
+
+        #        else:
+        # Way to ensure flexible spot setup class
+        #          like_ = self.obj_func(evaluation[start_num:end_num,], simulation[start_num:end_num,])
+        #          total += like_
+        #         count += 1
+
+        #    like=total/count
+        #   return like
         if not self.obj_func:
             # This is used if not overwritten by user
             like = rmse(evaluation, simulation)
+
         else:
             # Way to ensure flexible spot setup class
             like = self.obj_func(evaluation, simulation)
         return like
 
+        # SPOTPY expects to get one or multiple values back,
+        # that define the performance of the model run
+
 
 def calibrate_by_sceua(
     p_and_e,
     qobs,
     dbname,
-    warmup_length=30,
+    warmup_length=365,
     model={
-        "name": "xaj_mz",
+        "name": "xaj_mz",  # 模型
         "source_type": "sources",
         "source_book": "HF",
     },
@@ -154,8 +183,8 @@ def calibrate_by_sceua(
         "rep": 1000,
         "ngs": 1000,
         "kstop": 500,
-        "peps": 0.001,
-        "pcento": 0.001,
+        "peps": 0.1,
+        "pcento": 0.1,
     },
 ):
     """
@@ -199,9 +228,9 @@ def calibrate_by_sceua(
         qobs,
         warmup_length=warmup_length,
         model=model,
-        obj_func=spotpy.objectivefunctions.rmse,
+        obj_func=spotpy.objectivefunctions.rmse,  # 均方根误差
     )
-    # Select number of maximum allowed repetitions
+    # Select number of maximum allowed repetitions # 选择允许的最大重复次数
     sampler = spotpy.algorithms.sceua(
         spot_setup,
         dbname=dbname,

diff --git a/hydromodel/data/data_postprocess.py b/hydromodel/data/data_postprocess.py
@@ -34,13 +34,15 @@ def read_save_sceua_calibrated_params(basin_id, save_dir, sceua_calibrated_file_
 
     """
     results = spotpy.analyser.load_csv_results(sceua_calibrated_file_name)
-    bestindex, bestobjf = spotpy.analyser.get_minlikeindex(results)
+    bestindex, bestobjf = spotpy.analyser.get_minlikeindex(
+        results
+    )  # 结果数组中具有最小目标函数的位置的索引
     best_model_run = results[bestindex]
     fields = [word for word in best_model_run.dtype.names if word.startswith("par")]
     best_calibrate_params = pd.DataFrame(list(best_model_run[fields]))
     save_file = os.path.join(save_dir, basin_id + "_calibrate_params.txt")
     best_calibrate_params.to_csv(save_file, sep=",", index=False, header=True)
-    return np.array(best_calibrate_params).reshape(1, -1)
+    return np.array(best_calibrate_params).reshape(1, -1)  # 返回一列最佳的结果
 
 
 def summarize_parameters(result_dir, model_info: dict):
@@ -136,9 +138,15 @@ def summarize_metrics(result_dir, model_info: dict):
         test_metric = hydro_file.unserialize_json(test_metric_file)
 
         for key, value in train_metric.items():
-            train_metrics[key] = value if count == 0 else train_metrics[key] + value
+            if count == 0:
+                train_metrics[key] = value
+            else:
+                train_metrics[key] = train_metrics[key] + value
         for key, value in test_metric.items():
-            test_metrics[key] = value if count == 0 else test_metrics[key] + value
+            if count == 0:
+                test_metrics[key] = value
+            else:
+                test_metrics[key] = test_metrics[key] + value
         count = count + 1
     metric_dfs_train = pd.DataFrame(train_metrics, index=basin_ids).transpose()
     metric_dfs_test = pd.DataFrame(test_metrics, index=basin_ids).transpose()
@@ -206,21 +214,21 @@ def read_and_save_et_ouputs(result_dir, fold: int):
     )
     train_period = data_info_train["time"]
     test_period = data_info_test["time"]
-    train_np_file = os.path.join(exp_dir, f"basins_lump_p_pe_q_fold{fold}_train.npy")
-    test_np_file = os.path.join(exp_dir, f"basins_lump_p_pe_q_fold{fold}_test.npy")
+    train_np_file = os.path.join(exp_dir, "data_info_fold" + str(fold) + "_train.npy")
+    test_np_file = os.path.join(exp_dir, "data_info_fold" + str(fold) + "_test.npy")
     train_data = np.load(train_np_file)
     test_data = np.load(test_np_file)
     es_test = []
     es_train = []
     for i in range(len(basins_id)):
         _, e_train = xaj(
-            train_data[:, i : i + 1, 0:2],
+            train_data[:, :, 0:2],
             param_values[basins_id[i]].values.reshape(1, -1),
             warmup_length=warmup_length,
             **model_func_param,
         )
         _, e_test = xaj(
-            test_data[:, i : i + 1, 0:2],
+            test_data[:, :, 0:2],
             param_values[basins_id[i]].values.reshape(1, -1),
             warmup_length=warmup_length,
             **model_func_param,
@@ -241,11 +249,12 @@ def read_and_save_et_ouputs(result_dir, fold: int):
 
 if __name__ == "__main__":
     one_model_one_hyperparam_setting_dir = os.path.join(
-        definitions.ROOT_DIR,
-        "hydromodel",
-        "example",
-        "exp61561",
-        "Dec08_11-38-48_LAPTOP-DNQOPPMS_fold1_HFsourcesrep1000ngs1000",
+        "/home/ldaning/code/biye/hydro-model-xaj/hydromodel/example/model_run_wuxi7"
+        # definitions.ROOT_DIR,
+        # "hydromodel",
+        # "example",
+        # "exp61561",
+        # "Dec08_11-38-48_LAPTOP-DNQOPPMS_fold1_HFsourcesrep1000ngs1000",
     )
     read_and_save_et_ouputs(one_model_one_hyperparam_setting_dir, fold=1)
     # summarize_parameters(one_model_one_hyperparam_setting_dir, {"name": "xaj_mz"})

diff --git a/hydromodel/data/data_preprocess.py b/hydromodel/data/data_preprocess.py
@@ -1,12 +1,13 @@
 """
 Author: Wenyu Ouyang
 Date: 2022-10-25 21:16:22
-LastEditTime: 2022-12-04 14:55:55
+LastEditTime: 2024-03-21 18:36:25
 LastEditors: Wenyu Ouyang
 Description: preprocess data for models in hydro-model-xaj
 FilePath: \hydro-model-xaj\hydromodel\data\data_preprocess.py
 Copyright (c) 2021-2022 Wenyu Ouyang. All rights reserved.
 """
+
 import numpy as np
 import pandas as pd
 from sklearn.model_selection import KFold
@@ -135,6 +136,8 @@ def split_train_test(json_file, npy_file, train_period, test_period):
     data_info_train = OrderedDict(
         {
             "time": [str(t)[:10] for t in hydro_time.t_range_days(train_period)],
+            # TODO: for time, more detailed time is needed, so we need to change the format of time
+            # "time": [str(t)[:16] for t in hydro_time.t_range_days(train_period)],
             "basin": data_info["basin"],
             "variable": data_info["variable"],
             "area": data_info["area"],
@@ -143,6 +146,8 @@ def split_train_test(json_file, npy_file, train_period, test_period):
     data_info_test = OrderedDict(
         {
             "time": [str(t)[:10] for t in hydro_time.t_range_days(test_period)],
+            # TODO: for time, more detailed time is needed, so we need to change the format of time
+            # "time": [str(t)[:16] for t in hydro_time.t_range_days(test_period)],
             "basin": data_info["basin"],
             "variable": data_info["variable"],
             "area": data_info["area"],
@@ -159,7 +164,7 @@ def split_train_test(json_file, npy_file, train_period, test_period):
     hydro_file.serialize_numpy(data[ind3, :, :], test_npy_file)
 
 
-def cross_valid_data(json_file, npy_file, period, warmup, cv_fold, time_unit="D"):
+def cross_valid_data(json_file, npy_file, period, warmup, cv_fold, time_unit="h"):
     """
     Split all data to train and test parts with same format