Skip to content

Commit

Permalink
Merge branch 'xiaonig-hourly' into dev
Browse files Browse the repository at this point in the history
  • Loading branch information
OuyangWenyu committed Mar 21, 2024
2 parents 56bc492 + b22b501 commit e439362
Show file tree
Hide file tree
Showing 23 changed files with 1,046 additions and 23,950 deletions.
2 changes: 1 addition & 1 deletion definitions.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import os
from pathlib import Path

ROOT_DIR = os.path.dirname(os.path.abspath(__file__)) # This is your Project Root
ROOT_DIR = os.path.dirname(os.path.abspath('/home/ldaning/code/biye/hydro-model-xaj/definitions.py')) # This is your Project Root
path = Path(ROOT_DIR)
DATASET_DIR = os.path.join(path.parent.parent.absolute(), "data")
print("Please Check your directory:")
Expand Down
47 changes: 38 additions & 9 deletions hydromodel/calibrate/calibrate_sceua.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from typing import Union
import numpy as np
import spotpy
import pandas as pd
from spotpy.parameter import Uniform, ParameterSet
from spotpy.objectivefunctions import rmse
from hydromodel.models.model_config import MODEL_PARAM_DICT
Expand All @@ -14,7 +15,7 @@ def __init__(
self,
p_and_e,
qobs,
warmup_length=30,
warmup_length=365,
model={
"name": "xaj_mz",
"source_type": "sources",
Expand Down Expand Up @@ -127,24 +128,52 @@ def objectivefunction(
float
likelihood
"""
# SPOTPY expects to get one or multiple values back,
# that define the performance of the model run
# 切片
# time = pd.read_excel('/home/ldaning/code/biye/hydro-model-xaj/hydromodel/example/zhandian/洪水率定时间1.xlsx')
# calibrate_starttime = pd.to_datetime("2014-7-10 0:00")
# calibrate_endtime = pd.to_datetime("2020-6-24 0:00")
# total = 0
# count = 0
# for i in range(len(time)):
# if(time.iloc[i,0]<calibrate_endtime):
# start_num = (time.iloc[i,0]-calibrate_starttime-pd.Timedelta(hours=365))/pd.Timedelta(hours=1)
# end_num = (time.iloc[i,1]-calibrate_starttime-pd.Timedelta(hours=365))/pd.Timedelta(hours=1)
# start_num = int(start_num)
# end_num = int(end_num)
# if not self.obj_func:
# # This is used if not overwritten by user
# like_ = rmse(evaluation[start_num:end_num,], simulation[start_num:end_num,])
# total += like_
# count += 1

# else:
# Way to ensure flexible spot setup class
# like_ = self.obj_func(evaluation[start_num:end_num,], simulation[start_num:end_num,])
# total += like_
# count += 1

# like=total/count
# return like
if not self.obj_func:
# This is used if not overwritten by user
like = rmse(evaluation, simulation)

else:
# Way to ensure flexible spot setup class
like = self.obj_func(evaluation, simulation)
return like

# SPOTPY expects to get one or multiple values back,
# that define the performance of the model run


def calibrate_by_sceua(
p_and_e,
qobs,
dbname,
warmup_length=30,
warmup_length=365,
model={
"name": "xaj_mz",
"name": "xaj_mz", # 模型
"source_type": "sources",
"source_book": "HF",
},
Expand All @@ -154,8 +183,8 @@ def calibrate_by_sceua(
"rep": 1000,
"ngs": 1000,
"kstop": 500,
"peps": 0.001,
"pcento": 0.001,
"peps": 0.1,
"pcento": 0.1,
},
):
"""
Expand Down Expand Up @@ -199,9 +228,9 @@ def calibrate_by_sceua(
qobs,
warmup_length=warmup_length,
model=model,
obj_func=spotpy.objectivefunctions.rmse,
obj_func=spotpy.objectivefunctions.rmse, # 均方根误差
)
# Select number of maximum allowed repetitions
# Select number of maximum allowed repetitions # 选择允许的最大重复次数
sampler = spotpy.algorithms.sceua(
spot_setup,
dbname=dbname,
Expand Down
35 changes: 22 additions & 13 deletions hydromodel/data/data_postprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,15 @@ def read_save_sceua_calibrated_params(basin_id, save_dir, sceua_calibrated_file_
"""
results = spotpy.analyser.load_csv_results(sceua_calibrated_file_name)
bestindex, bestobjf = spotpy.analyser.get_minlikeindex(results)
bestindex, bestobjf = spotpy.analyser.get_minlikeindex(
results
) # 结果数组中具有最小目标函数的位置的索引
best_model_run = results[bestindex]
fields = [word for word in best_model_run.dtype.names if word.startswith("par")]
best_calibrate_params = pd.DataFrame(list(best_model_run[fields]))
save_file = os.path.join(save_dir, basin_id + "_calibrate_params.txt")
best_calibrate_params.to_csv(save_file, sep=",", index=False, header=True)
return np.array(best_calibrate_params).reshape(1, -1)
return np.array(best_calibrate_params).reshape(1, -1) # 返回一列最佳的结果


def summarize_parameters(result_dir, model_info: dict):
Expand Down Expand Up @@ -136,9 +138,15 @@ def summarize_metrics(result_dir, model_info: dict):
test_metric = hydro_file.unserialize_json(test_metric_file)

for key, value in train_metric.items():
train_metrics[key] = value if count == 0 else train_metrics[key] + value
if count == 0:
train_metrics[key] = value
else:
train_metrics[key] = train_metrics[key] + value
for key, value in test_metric.items():
test_metrics[key] = value if count == 0 else test_metrics[key] + value
if count == 0:
test_metrics[key] = value
else:
test_metrics[key] = test_metrics[key] + value
count = count + 1
metric_dfs_train = pd.DataFrame(train_metrics, index=basin_ids).transpose()
metric_dfs_test = pd.DataFrame(test_metrics, index=basin_ids).transpose()
Expand Down Expand Up @@ -206,21 +214,21 @@ def read_and_save_et_ouputs(result_dir, fold: int):
)
train_period = data_info_train["time"]
test_period = data_info_test["time"]
train_np_file = os.path.join(exp_dir, f"basins_lump_p_pe_q_fold{fold}_train.npy")
test_np_file = os.path.join(exp_dir, f"basins_lump_p_pe_q_fold{fold}_test.npy")
train_np_file = os.path.join(exp_dir, "data_info_fold" + str(fold) + "_train.npy")
test_np_file = os.path.join(exp_dir, "data_info_fold" + str(fold) + "_test.npy")
train_data = np.load(train_np_file)
test_data = np.load(test_np_file)
es_test = []
es_train = []
for i in range(len(basins_id)):
_, e_train = xaj(
train_data[:, i : i + 1, 0:2],
train_data[:, :, 0:2],
param_values[basins_id[i]].values.reshape(1, -1),
warmup_length=warmup_length,
**model_func_param,
)
_, e_test = xaj(
test_data[:, i : i + 1, 0:2],
test_data[:, :, 0:2],
param_values[basins_id[i]].values.reshape(1, -1),
warmup_length=warmup_length,
**model_func_param,
Expand All @@ -241,11 +249,12 @@ def read_and_save_et_ouputs(result_dir, fold: int):

if __name__ == "__main__":
one_model_one_hyperparam_setting_dir = os.path.join(
definitions.ROOT_DIR,
"hydromodel",
"example",
"exp61561",
"Dec08_11-38-48_LAPTOP-DNQOPPMS_fold1_HFsourcesrep1000ngs1000",
"/home/ldaning/code/biye/hydro-model-xaj/hydromodel/example/model_run_wuxi7"
# definitions.ROOT_DIR,
# "hydromodel",
# "example",
# "exp61561",
# "Dec08_11-38-48_LAPTOP-DNQOPPMS_fold1_HFsourcesrep1000ngs1000",
)
read_and_save_et_ouputs(one_model_one_hyperparam_setting_dir, fold=1)
# summarize_parameters(one_model_one_hyperparam_setting_dir, {"name": "xaj_mz"})
Expand Down
9 changes: 7 additions & 2 deletions hydromodel/data/data_preprocess.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
"""
Author: Wenyu Ouyang
Date: 2022-10-25 21:16:22
LastEditTime: 2022-12-04 14:55:55
LastEditTime: 2024-03-21 18:36:25
LastEditors: Wenyu Ouyang
Description: preprocess data for models in hydro-model-xaj
FilePath: \hydro-model-xaj\hydromodel\data\data_preprocess.py
Copyright (c) 2021-2022 Wenyu Ouyang. All rights reserved.
"""

import numpy as np
import pandas as pd
from sklearn.model_selection import KFold
Expand Down Expand Up @@ -135,6 +136,8 @@ def split_train_test(json_file, npy_file, train_period, test_period):
data_info_train = OrderedDict(
{
"time": [str(t)[:10] for t in hydro_time.t_range_days(train_period)],
# TODO: for time, more detailed time is needed, so we need to change the format of time
# "time": [str(t)[:16] for t in hydro_time.t_range_days(train_period)],
"basin": data_info["basin"],
"variable": data_info["variable"],
"area": data_info["area"],
Expand All @@ -143,6 +146,8 @@ def split_train_test(json_file, npy_file, train_period, test_period):
data_info_test = OrderedDict(
{
"time": [str(t)[:10] for t in hydro_time.t_range_days(test_period)],
# TODO: for time, more detailed time is needed, so we need to change the format of time
# "time": [str(t)[:16] for t in hydro_time.t_range_days(test_period)],
"basin": data_info["basin"],
"variable": data_info["variable"],
"area": data_info["area"],
Expand All @@ -159,7 +164,7 @@ def split_train_test(json_file, npy_file, train_period, test_period):
hydro_file.serialize_numpy(data[ind3, :, :], test_npy_file)


def cross_valid_data(json_file, npy_file, period, warmup, cv_fold, time_unit="D"):
def cross_valid_data(json_file, npy_file, period, warmup, cv_fold, time_unit="h"):
"""
Split all data to train and test parts with same format
Expand Down
Loading

0 comments on commit e439362

Please sign in to comment.