Skip to content

Commit

Permalink
refactor utils func; unify read_area interface
Browse files Browse the repository at this point in the history
  • Loading branch information
OuyangWenyu committed Dec 18, 2023
1 parent a2d49ac commit 8a86006
Show file tree
Hide file tree
Showing 17 changed files with 164 additions and 848 deletions.
9 changes: 6 additions & 3 deletions .github/workflows/publish-to-pypi.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
name: Publish Python distributions to PyPI

on: push
on:
push:
branches:
- master

jobs:
build-n-publish:
Expand All @@ -9,10 +12,10 @@ jobs:

steps:
- uses: actions/checkout@master
- name: Set up Python 3.9
- name: Set up Python 3.10
uses: actions/setup-python@v1
with:
python-version: 3.9
python-version: 3.10

- name: Install pypa/build
run: >-
Expand Down
21 changes: 0 additions & 21 deletions docs/source/hydromodel.utils.rst

This file was deleted.

24 changes: 12 additions & 12 deletions hydromodel/app/calibrate_xaj.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,22 +9,22 @@
import sys
from pathlib import Path

from hydroutils import hydro_file

sys.path.append(os.path.dirname(Path(os.path.abspath(__file__)).parent.parent))
import definitions
from hydromodel.calibrate.calibrate_sceua import calibrate_by_sceua
from hydromodel.utils import hydro_utils
from hydromodel.data.data_postprocess import (
renormalize_params,
read_save_sceua_calibrated_params,
save_streamflow,
summarize_metrics,
summarize_parameters,
)
from hydromodel.visual.pyspot_plots import show_calibrate_result, show_test_result
from hydromodel.utils.plots import show_calibrate_result, show_test_result
from hydromodel.models.xaj import xaj
from hydromodel.calibrate.calibrate_ga import calibrate_by_ga, show_ga_result
from hydromodel.utils import hydro_constant
from hydromodel.utils import units


def calibrate(args):
Expand Down Expand Up @@ -63,10 +63,10 @@ def calibrate(args):
raise FileNotFoundError(
"The data files are not found, please run datapreprocess4calibrate.py first."
)
data_train = hydro_utils.unserialize_numpy(train_data_file)
data_test = hydro_utils.unserialize_numpy(test_data_file)
data_info_train = hydro_utils.unserialize_json_ordered(train_data_info_file)
data_info_test = hydro_utils.unserialize_json_ordered(test_data_info_file)
data_train = hydro_file.unserialize_numpy(train_data_file)
data_test = hydro_file.unserialize_numpy(test_data_file)
data_info_train = hydro_file.unserialize_json_ordered(train_data_info_file)
data_info_test = hydro_file.unserialize_json_ordered(test_data_info_file)
current_time = datetime.now().strftime("%b%d_%H-%M-%S")
save_dir = os.path.join(
data_dir,
Expand All @@ -80,7 +80,7 @@ def calibrate(args):
)
if os.path.exists(save_dir) is False:
os.makedirs(save_dir)
hydro_utils.serialize_json(vars(args), os.path.join(save_dir, "args.json"))
hydro_file.serialize_json(vars(args), os.path.join(save_dir, "args.json"))
if algo_info["name"] == "SCE_UA":
for i in range(len(data_info_train["basin"])):
basin_id = data_info_train["basin"][i]
Expand Down Expand Up @@ -123,16 +123,16 @@ def calibrate(args):
**model_info,
)

qsim = hydro_constant.convert_unit(
qsim = units.convert_unit(
qsim,
unit_now="mm/day",
unit_final=hydro_constant.unit["streamflow"],
unit_final=units.unit["streamflow"],
basin_area=basin_area,
)
qobs = hydro_constant.convert_unit(
qobs = units.convert_unit(
data_test[warmup:, i : i + 1, -1:],
unit_now="mm/day",
unit_final=hydro_constant.unit["streamflow"],
unit_final=units.unit["streamflow"],
basin_area=basin_area,
)
test_result_file = os.path.join(
Expand Down
37 changes: 23 additions & 14 deletions hydromodel/calibrate/calibrate_ga.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,12 @@
"""Calibrate XAJ model using DEAP"""
"""
Author: Wenyu Ouyang
Date: 2021-12-10 23:01:02
LastEditTime: 2023-12-17 21:10:45
LastEditors: Wenyu Ouyang
Description: Calibrate XAJ model using DEAP
FilePath: \hydro-model-xaj\hydromodel\calibrate\calibrate_ga.py
Copyright (c) 2023-2024 Wenyu Ouyang. All rights reserved.
"""
import os
import pickle
from deap import base, creator
Expand All @@ -10,17 +18,17 @@
import sys
from pathlib import Path

from hydroutils import hydro_file, hydro_stat


sys.path.append(os.path.dirname(Path(os.path.abspath(__file__)).parent.parent))
import definitions
from hydromodel.models.model_config import MODEL_PARAM_DICT
from hydromodel.utils import hydro_constant, hydro_utils
from hydromodel.utils import stat
from hydromodel.utils.stat import statRmse
from hydromodel.utils import units
from hydromodel.utils.plots import plot_sim_and_obs, plot_train_iteration
from hydromodel.models.gr4j import gr4j
from hydromodel.models.hymod import hymod
from hydromodel.models.xaj import xaj
from hydromodel.visual.hydro_plot import plot_sim_and_obs, plot_train_iteration


def evaluate(individual, x_input, y_true, warmup_length, model):
Expand Down Expand Up @@ -58,7 +66,8 @@ def evaluate(individual, x_input, y_true, warmup_length, model):
sim = hymod(x_input, params, warmup_length=warmup_length, **model)
else:
raise NotImplementedError("We don't provide this model now")
rmses = statRmse(y_true[warmup_length:, :, :], sim)
# Calculate RMSE for multi-dim arrays
rmses = np.sqrt(np.nanmean((sim - y_true[warmup_length:, :, :]) ** 2, axis=0))
rmse = rmses.mean(axis=0)
# print(f"-----------------RMSE: {str(rmse)}------------------------")
return rmse
Expand Down Expand Up @@ -291,16 +300,16 @@ def show_ga_result(
warmup_length=warmup_length,
**model_info,
)
convert_unit_sim = hydro_constant.convert_unit(
convert_unit_sim = units.convert_unit(
np.array(best_simulation).reshape(1, -1),
result_unit,
hydro_constant.unit["streamflow"],
units.unit["streamflow"],
basin_area=basin_area,
)
convert_unit_obs = hydro_constant.convert_unit(
convert_unit_obs = units.convert_unit(
np.array(the_data[warmup_length:, :, -1:]).reshape(1, -1),
result_unit,
hydro_constant.unit["streamflow"],
units.unit["streamflow"],
basin_area=basin_area,
)
# save calibrated results of calibration period
Expand All @@ -315,12 +324,12 @@ def show_ga_result(
header=False,
)
# calculation rmse、nashsutcliffe and bias for training period
stat_error = stat.statError(
stat_error = hydro_stat.stat_error(
convert_unit_obs,
convert_unit_sim,
)
print(f"{train_test_flag}ing metrics:", basin_id, stat_error)
hydro_utils.serialize_json_np(
hydro_file.serialize_json_np(
stat_error, os.path.join(deap_dir, f"{train_test_flag}_metrics.json")
)
t_range = pd.to_datetime(the_period[warmup_length:]).values.astype("datetime64[D]")
Expand Down Expand Up @@ -354,8 +363,8 @@ def show_ga_result(
)
train_data_info_file = os.path.join(data_dir, "data_info_fold0_train.json")
train_data_file = os.path.join(data_dir, "basins_lump_p_pe_q_fold0_train.npy")
data_train = hydro_utils.unserialize_numpy(train_data_file)
data_info_train = hydro_utils.unserialize_json_ordered(train_data_info_file)
data_train = hydro_file.unserialize_numpy(train_data_file)
data_info_train = hydro_file.unserialize_json_ordered(train_data_info_file)
model_info = {
"name": "xaj_mz",
"source_type": "sources",
Expand Down
15 changes: 8 additions & 7 deletions hydromodel/data/camels_format_data.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
import collections
import hydrodataset
from hydrodataset import hydro_utils
import os
from typing import Union
import pandas as pd
import numpy as np
from pandas.core.dtypes.common import is_string_dtype, is_numeric_dtype
from tqdm import tqdm

from hydroutils import hydro_time
import hydrodataset


class MyCamels(hydrodataset.Camels):
def __init__(self, data_path, download=False, region: str = "CC"):
Expand Down Expand Up @@ -176,7 +177,7 @@ def read_target_cols(
return np.array([])
else:
nf = len(target_cols)
t_range_list = hydro_utils.t_range_days(t_range)
t_range_list = hydro_time.t_range_days(t_range)
nt = t_range_list.shape[0]
y = np.full([len(gage_id_lst), nt, nf], np.nan)
for j in tqdm(range(len(target_cols)), desc="Read Q/SSM/ET data of CAMELS-CC"):
Expand Down Expand Up @@ -215,8 +216,8 @@ def read_target_cols(
)
else:
final_date = date[-1] + np.timedelta64(8, "D")
date_all = hydro_utils.t_range_days(
hydro_utils.t_days_lst2range([date[0], final_date])
date_all = hydro_time.t_range_days(
hydro_time.t_days_lst2range([date[0], final_date])
)
t_range_final = np.intersect1d(date_all, t_range_list)
[_, ind3, ind4] = np.intersect1d(
Expand Down Expand Up @@ -291,7 +292,7 @@ def read_relevant_cols(
np.array
forcing data
"""
t_range_list = hydro_utils.t_range_days(t_range)
t_range_list = hydro_time.t_range_days(t_range)
nt = t_range_list.shape[0]
x = np.full([len(gage_id_lst), nt, len(var_lst)], np.nan)
for k in tqdm(range(len(gage_id_lst)), desc="Read forcing data of CAMELS-CC"):
Expand Down Expand Up @@ -384,7 +385,7 @@ def read_constant_cols(
else:
return out

def read_basin_area(self, object_ids) -> np.array:
def read_area(self, object_ids) -> np.array:
return self.read_constant_cols(object_ids, ["Area"], is_return_dict=False)

def read_mean_prep(self, object_ids) -> np.array:
Expand Down
16 changes: 8 additions & 8 deletions hydromodel/data/data_postprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@
from pathlib import Path
import sys

from hydroutils import hydro_file

sys.path.append(os.path.dirname(Path(os.path.abspath(__file__)).parent.parent))
import definitions
from hydromodel.utils import hydro_utils
from hydromodel.models.model_config import MODEL_PARAM_DICT
from hydromodel.models.xaj import xaj

Expand Down Expand Up @@ -132,8 +132,8 @@ def summarize_metrics(result_dir, model_info: dict):
basin_ids.append(basin_id)
train_metric_file = os.path.join(basin_dir, "train_metrics.json")
test_metric_file = os.path.join(basin_dir, "test_metrics.json")
train_metric = hydro_utils.unserialize_json(train_metric_file)
test_metric = hydro_utils.unserialize_json(test_metric_file)
train_metric = hydro_file.unserialize_json(train_metric_file)
test_metric = hydro_file.unserialize_json(test_metric_file)

for key, value in train_metric.items():
train_metrics[key] = value if count == 0 else train_metrics[key] + value
Expand Down Expand Up @@ -176,11 +176,11 @@ def save_streamflow(result_dir, model_info: dict, fold: int):
streamflow_dfs_test.columns = basin_ids
streamflow_dfs_train.columns = basin_ids
test_info_file = path.parent.joinpath("data_info_fold" + str(fold) + "_test.json")
test_info = hydro_utils.unserialize_json(test_info_file)
test_info = hydro_file.unserialize_json(test_info_file)
date_test = test_info["time"][-streamflow_dfs_test.shape[0] :]
streamflow_dfs_test.index = date_test
train_info_file = path.parent.joinpath("data_info_fold" + str(fold) + "_train.json")
train_info = hydro_utils.unserialize_json(train_info_file)
train_info = hydro_file.unserialize_json(train_info_file)
date_train = train_info["time"][-streamflow_dfs_train.shape[0] :]
streamflow_dfs_train.index = date_train
eva_csv_file_test = os.path.join(result_dir, "basin_qsim_test.csv")
Expand All @@ -194,14 +194,14 @@ def read_and_save_et_ouputs(result_dir, fold: int):
param_values = pd.read_csv(prameter_file, index_col=0)
basins_id = param_values.columns.values
args_file = os.path.join(result_dir, "args.json")
args = hydro_utils.unserialize_json(args_file)
args = hydro_file.unserialize_json(args_file)
warmup_length = args["warmup_length"]
model_func_param = args["model"]
exp_dir = pathlib.Path(result_dir).parent
data_info_train = hydro_utils.unserialize_json(
data_info_train = hydro_file.unserialize_json(
exp_dir.joinpath(f"data_info_fold{fold}_train.json")
)
data_info_test = hydro_utils.unserialize_json(
data_info_test = hydro_file.unserialize_json(
exp_dir.joinpath(f"data_info_fold{fold}_test.json")
)
train_period = data_info_train["time"]
Expand Down
Loading

0 comments on commit 8a86006

Please sign in to comment.