diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..d4a2c44 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,21 @@ +# http://editorconfig.org + +root = true + +[*] +indent_style = space +indent_size = 4 +trim_trailing_whitespace = true +insert_final_newline = true +charset = utf-8 +end_of_line = lf + +[*.bat] +indent_style = tab +end_of_line = crlf + +[LICENSE] +insert_final_newline = false + +[Makefile] +indent_style = tab diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 0000000..c89fc62 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,25 @@ +--- +name: Bug Report +about: Create a bug report to help us improve +labels: bug +--- + + + +### Environment Information + +- hydromodel version: +- Python version: +- Operating System: + +### Description + +Describe what you were trying to get done. +Tell us what happened, what went wrong, and what you expected to happen. + +### What I Did + +``` +Paste the command(s) you ran and the output. +If there was a crash, please include the traceback here. +``` diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml new file mode 100644 index 0000000..de46af5 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -0,0 +1,10 @@ +contact_links: + - name: Ask questions + url: https://github.com/OuyangWenyu/hydro-model-xaj/discussions/categories/q-a + about: Please ask and answer questions here. + - name: Ideas + url: https://github.com/OuyangWenyu/hydro-model-xaj/discussions/categories/ideas + about: Please share your ideas here. + - name: Ask questions from the GIS community + url: https://gis.stackexchange.com + about: To get answers from questions in the GIS community, please ask and answer questions here. diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md new file mode 100644 index 0000000..cf7acb9 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -0,0 +1,18 @@ +--- +name: Feature Request +about: Submit a feature request to help us improve +labels: Feature Request +--- + + + +### Description + +Describe the feature (e.g., new functions/tutorials) you would like to propose. +Tell us what can be achieved with this new feature and what's the expected outcome. + +### Source code + +``` +Paste your source code here if have sample code to share. +``` diff --git a/.github/workflows/docs-build.yml b/.github/workflows/docs-build.yml new file mode 100644 index 0000000..876561c --- /dev/null +++ b/.github/workflows/docs-build.yml @@ -0,0 +1,51 @@ +name: docs-build +on: + pull_request: + branches: + - master + +jobs: + deploy: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + with: + fetch-depth: 0 + - uses: actions/setup-python@v4 + with: + python-version: "3.10" + - name: Install GDAL + run: | + python -m pip install --upgrade pip + pip install --find-links=https://girder.github.io/large_image_wheels --no-cache GDAL pyproj + - name: Test GDAL installation + run: | + python -c "from osgeo import gdal" + gdalinfo --version + - name: Install dependencies + run: | + pip install --no-cache-dir Cython + pip install -r requirements.txt -r requirements_dev.txt -r requirements_docs.txt + pip install . + - name: Discover typos with codespell + run: codespell --skip="*.csv,*.geojson,*.json,*.js,*.html,*cff,*.pdf,./.git" --ignore-words-list="aci,acount,acounts,fallow,hart,hist,nd,ned,ois,wqs" + - name: PKG-TEST + run: | + python -m unittest discover tests/ + - name: Build docs + run: | + mkdocs build + # - name: Deploy to Netlify + # uses: nwtgck/actions-netlify@v2.0 + # with: + # publish-dir: "./site" + # production-branch: master + # github-token: ${{ secrets.GITHUB_TOKEN }} + # deploy-message: "Deploy from GitHub Actions" + # enable-pull-request-comment: true + # enable-commit-comment: false + # overwrites-pull-request-comment: true + # env: + # NETLIFY_AUTH_TOKEN: ${{ secrets.NETLIFY_AUTH_TOKEN }} + # NETLIFY_SITE_ID: ${{ secrets.NETLIFY_SITE_ID }} + # timeout-minutes: 10 diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml new file mode 100644 index 0000000..6ea88e5 --- /dev/null +++ b/.github/workflows/docs.yml @@ -0,0 +1,28 @@ +name: docs +on: + push: + branches: + - master +jobs: + deploy: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + with: + python-version: 3.9 + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install --user --no-cache-dir Cython + pip install --user -r requirements.txt + pip install . + - name: Discover typos with codespell + run: | + pip install codespell + codespell --skip="*.csv,*.geojson,*.json,*.js,*.html,*cff,./.git" --ignore-words-list="aci,acount,acounts,fallow,hart,hist,nd,ned,ois,wqs,watermask" + - name: PKG-TEST + run: | + python -m unittest discover tests/ + - run: pip install -r requirements_docs.txt + - run: mkdocs gh-deploy --force diff --git a/.github/workflows/macos.yml b/.github/workflows/macos.yml new file mode 100644 index 0000000..4b3ed55 --- /dev/null +++ b/.github/workflows/macos.yml @@ -0,0 +1,38 @@ +on: + push: + branches: + - master + pull_request: + branches: + - master + +name: macOS build +jobs: + test-macOS: + runs-on: ${{ matrix.os }} + name: ${{ matrix.os }} (${{ matrix.python-version}}) + strategy: + fail-fast: false + matrix: + os: ["macOS-latest"] + python-version: ["3.10"] + steps: + - name: Checkout code + uses: actions/checkout@v3 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version}} + - name: Install GDAL + run: | + brew install gdal + - name: Test GDAL installation + run: | + gdalinfo --version + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install --no-cache-dir Cython + pip install -r requirements.txt + pip install . diff --git a/.github/workflows/pypi.yml b/.github/workflows/pypi.yml new file mode 100644 index 0000000..2bbb0c0 --- /dev/null +++ b/.github/workflows/pypi.yml @@ -0,0 +1,30 @@ +# This workflows will upload a Python Package using Twine when a release is created +# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries + +name: pypi + +on: + release: + types: [created] + +jobs: + deploy: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: "3.x" + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install setuptools wheel twine + - name: Build and publish + env: + TWINE_USERNAME: ${{ secrets.PYPI_USERS }} + TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} + run: | + python setup.py sdist bdist_wheel + twine upload dist/* diff --git a/.github/workflows/ubuntu.yml b/.github/workflows/ubuntu.yml new file mode 100644 index 0000000..1c07b9b --- /dev/null +++ b/.github/workflows/ubuntu.yml @@ -0,0 +1,47 @@ +on: + push: + branches: + - master + - dev + pull_request: + branches: + - master + - dev + +name: Linux build +jobs: + py-check: + runs-on: ${{ matrix.config.os }} + name: ${{ matrix.config.os }} (${{ matrix.config.py }}) + strategy: + fail-fast: false + matrix: + config: + - { os: ubuntu-latest, py: "3.9" } + - { os: ubuntu-latest, py: "3.10" } + - { os: ubuntu-latest, py: "3.11" } + steps: + - name: Checkout Code + uses: actions/checkout@v3 + - name: Setup Python + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.config.py }} + - name: Install GDAL + run: | + python -m pip install --upgrade pip + pip install --no-cache-dir Cython + pip install --find-links=https://girder.github.io/large_image_wheels --no-cache GDAL + - name: Test GDAL installation + run: | + python -c "from osgeo import gdal" + gdalinfo --version + - name: Install dependencies + run: | + pip install --user -r requirements.txt + pip install --user -r requirements_dev.txt + pip install . + - name: PKG-TEST + run: | + python -m unittest discover tests/ + diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml new file mode 100644 index 0000000..9e9deaa --- /dev/null +++ b/.github/workflows/windows.yml @@ -0,0 +1,33 @@ +on: + push: + branches: + - master + - dev + pull_request: + branches: + - master + - dev + +name: Windows build +jobs: + test-windows: + runs-on: windows-latest + steps: + - uses: actions/checkout@v3 + - name: Install miniconda + uses: conda-incubator/setup-miniconda@v2 + with: + auto-activate-base: true + python-version: "3.10" + - name: Install GDAL + run: conda install -c conda-forge gdal --yes + - name: Test GDAL installation + run: | + python -c "from osgeo import gdal" + gdalinfo --version + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install --no-cache-dir Cython + pip install -r requirements.txt + pip install . diff --git a/AUTHORS.rst b/AUTHORS.rst new file mode 100644 index 0000000..a199192 --- /dev/null +++ b/AUTHORS.rst @@ -0,0 +1,13 @@ +======= +Credits +======= + +Development Lead +---------------- + +* Wenyu Ouyang + +Contributors +------------ + +None yet. Why not be the first? diff --git a/hydromodel/app/dockerfile b/hydromodel/app/dockerfile new file mode 100644 index 0000000..af94fa3 --- /dev/null +++ b/hydromodel/app/dockerfile @@ -0,0 +1,13 @@ +FROM ewatercycle/wflow-grpc4bmi +MAINTAINER wangjingyi <1160527180@qq.com> + +# Install grpc4bmi +RUN pip install grpc4bmi==0.2.8 + +# Set environment +WORKDIR /home +ENV BMI_MODULE=bmixaj +ENV BMI_CLASS=Bmixaj +ENV BMI_PORT=55555 +ENTRYPOINT ["run-bmi-server", "--name", "xaj.xaj_bmi.xajBmi", "--path", "/home/wangjingyi/code/hydro-model-xaj/xaj"] +EXPOSE 55555 \ No newline at end of file diff --git a/hydromodel/calibrate/calibrate_ga_xaj_bmi.py b/hydromodel/calibrate/calibrate_ga_xaj_bmi.py new file mode 100644 index 0000000..71dd54a --- /dev/null +++ b/hydromodel/calibrate/calibrate_ga_xaj_bmi.py @@ -0,0 +1,381 @@ +"""Calibrate XAJ model using DEAP""" +import os +import pickle +import random +import sys +from pathlib import Path + +import numpy as np +import pandas as pd +from deap import base, creator +from deap import tools +from tqdm import tqdm + +sys.path.append(os.path.dirname(Path(os.path.abspath(__file__)).parent.parent)) +import definitions +from hydromodel.models.model_config import MODEL_PARAM_DICT +from hydromodel.utils import hydro_constant, hydro_utils +from hydromodel.utils import stat +from hydromodel.utils.stat import statRmse +from hydromodel.visual.hydro_plot import plot_sim_and_obs, plot_train_iteration +from hydromodel.models.xaj_bmi import xajBmi + + +def evaluate(individual, x_input, y_true, warmup_length, model): + """ + Calculate fitness for optimization + + Parameters + ---------- + individual + individual is the params of XAJ (see details in xaj.py); we initialize all parameters in range [0,1] + x_input + input of XAJ + y_true + observation data; we use the part after warmup period + warmup_length + the length of warmup period + model + model's name: "xaj", "xaj_mz", "gr4j", or "hymod" + + Returns + ------- + float + fitness + """ + # print("Calculate fitness:") + # NOTE: Now only support one basin's calibration for once now + params = np.array(individual).reshape(1, -1) + if model["name"] in ["xaj", "xaj_mz"]: + # xaj model's output include streamflow and evaporation now, + # but now we only calibrate the model with streamflow + model = xajBmi() + model.initialize(os.path.relpath('runxaj.yaml'), params, x_input) + while model.get_current_time() <= model.get_end_time('train'): + model.update() + sim = model.get_value("discharge") + sim = np.expand_dims(sim, 0) + sim = np.expand_dims(sim, 1) + sim = np.transpose(sim, [2, 1, 0]) + else: + raise NotImplementedError("We don't provide this model now") + rmses = statRmse(y_true[warmup_length:, :, :], sim) + rmse = rmses.mean(axis=0) + # print(f"-----------------RMSE: {str(rmse)}------------------------") + return rmse + + +def checkBounds(min, max): + """ + A decorator to set bounds for individuals in a population + + Parameters + ---------- + min + the lower bound of individuals + max + the upper bound of individuals + + Returns + ------- + Function + a wrapper for clipping data into a given bound + """ + + def decorator(func): + def wrapper(*args, **kargs): + offspring = func(*args, **kargs) + for child in offspring: + for i in range(len(child)): + if child[i] > max: + child[i] = max + elif child[i] < min: + child[i] = min + return offspring + + return wrapper + + return decorator + + +MIN = 0 +MAX = 1 + + +def calibrate_by_ga( + input_data, observed_output, deap_dir, warmup_length=30, model=None, ga_param=None +): + """ + Use GA algorithm to find optimal parameters for hydrologic models + + Parameters + ---------- + input_data + the input data for model + observed_output + the "true" values, i.e. observations + deap_dir + the directory to save the results + warmup_length + the length of warmup period + model + the model setting + ga_param + random_seed: 1234 + run_counts: int = 40, running counts + pop_num: int = 50, the number of individuals in the population + cross_prob: float = 0.5, the probability with which two individuals are crossed + mut_prob: float=0.5, the probability for mutating an individual + + Returns + ------- + toolbox.population + optimal_params + """ + if model is None: + model = { + "name": "xaj_mz", + "source_type": "sources", + "source_book": "HF", + } + if ga_param is None: + ga_param = { + "random_seed": 1234, + "run_counts": 5, + "pop_num": 50, + "cross_prob": 0.5, + "mut_prob": 0.5, + "save_freq": 1, + } + np.random.seed(ga_param["random_seed"]) + param_num = len(MODEL_PARAM_DICT[model["name"]]["param_name"]) + creator.create("FitnessMin", base.Fitness, weights=(-1.0,)) + creator.create("Individual", list, fitness=creator.FitnessMin) + toolbox = base.Toolbox() + toolbox.register("attribute", random.random) + toolbox.register( + "individual", + tools.initRepeat, + creator.Individual, + toolbox.attribute, + n=param_num, + ) + toolbox.register("population", tools.initRepeat, list, toolbox.individual) + + toolbox.register("mate", tools.cxTwoPoint) + toolbox.register("mutate", tools.mutGaussian, mu=0, sigma=1, indpb=0.1) + toolbox.register("select", tools.selTournament, tournsize=3) + toolbox.register( + "evaluate", + evaluate, + x_input=input_data, + y_true=observed_output, + warmup_length=warmup_length, + model=model, + ) + + toolbox.decorate("mate", checkBounds(MIN, MAX)) + toolbox.decorate("mutate", checkBounds(MIN, MAX)) + pop = toolbox.population(n=ga_param["pop_num"]) + # cxpb is the probability with which two individuals are crossed + # mutpb is the probability for mutating an individual + cxpb, mutpb = ga_param["cross_prob"], ga_param["mut_prob"] + + # save the best individual + halloffame = tools.HallOfFame(maxsize=1) + logbook = tools.Logbook() + stats = tools.Statistics(lambda ind: ind.fitness.values) + stats.register("avg", np.mean) + stats.register("min", np.min) + + # Evaluate the entire population for the first time + print("Initiliazing population...") + fitnesses = map(toolbox.evaluate, pop) + for ind, fit in zip(pop, fitnesses): + ind.fitness.values = fit + halloffame.update(pop) + record = stats.compile(pop) + logbook.record(gen=0, evals=len(pop), **record) + cp = dict( + population=pop, + generation=0, + halloffame=halloffame, + logbook=logbook, + rndstate=random.getstate(), + ) + with open(os.path.join(deap_dir, "epoch0.pkl"), "wb") as cp_file: + pickle.dump(cp, cp_file) + + for gen in tqdm(range(ga_param["run_counts"]), desc="GA calibrating"): + + print(f"Generation {gen} started...") + # Select the next generation individuals + offspring = toolbox.select(pop, len(pop)) + # Clone the selected individuals + offspring = list(map(toolbox.clone, offspring)) + + # Apply crossover and mutation on the offspring + for child1, child2 in zip(offspring[::2], offspring[1::2]): + if random.random() < cxpb: + toolbox.mate(child1, child2) + del child1.fitness.values + del child2.fitness.values + + for mutant in offspring: + if random.random() < mutpb: + toolbox.mutate(mutant) + del mutant.fitness.values + + # Evaluate the individuals with an invalid fitness + invalid_ind = [ind for ind in offspring if not ind.fitness.valid] + fitnesses = map(toolbox.evaluate, invalid_ind) + for ind, fit in tqdm( + zip(invalid_ind, fitnesses), + desc=f"{str(gen + 1)} generation fitness calculating", + ): + ind.fitness.values = fit + + halloffame.update(offspring) + record = stats.compile(offspring) + # +1 means start from 1, 0 means initial generation + logbook.record(gen=gen + 1, evals=len(invalid_ind), **record) + # The population is entirely replaced by the offspring + pop[:] = offspring + print( + f"Best individual of {str(gen + 1)}" + + f" generation is: {halloffame[0]}, {halloffame[0].fitness.values}" + ) + if gen % ga_param["save_freq"] == 0: + # Fill the dictionary using the dict(key=value[, ...]) constructor + cp = dict( + population=pop, + generation=gen + 1, + halloffame=halloffame, + logbook=logbook, + rndstate=random.getstate(), + ) + + with open( + os.path.join(deap_dir, f"epoch{str(gen + 1)}.pkl"), "wb" + ) as cp_file: + pickle.dump(cp, cp_file) + print(f"Files of generation {gen} saved.") + top10 = tools.selBest(pop, k=10) + return pop + + +def show_ga_result( + deap_dir, + warmup_length, + basin_id, + the_data, + the_period, + basin_area, + model_info, + result_unit="mm/day", + train_mode=True, +): + """ + show the result of GA + """ + # https://stackoverflow.com/questions/61065222/python-deap-and-multiprocessing-on-windows-attributeerror + creator.create("FitnessMin", base.Fitness, weights=(-1.0,)) + creator.create("Individual", list, fitness=creator.FitnessMin) + with open(os.path.join(deap_dir, "epoch2.pkl"), "rb") as cp_file: + cp = pickle.load(cp_file) + pop = cp["population"] + logbook = cp["logbook"] + halloffame = cp["halloffame"] + print(f"Best individual is: {halloffame[0]}, {halloffame[0].fitness.values}") + train_test_flag = "train" if train_mode else "test" + + model = xajBmi() + model.initialize("runxaj.yaml", np.array(list(halloffame[0])).reshape(1, -1), the_data[:, :, 0:2]) + while model.get_current_time() <= model.get_end_time('train'): + model.update() + best_simulation = model.get_value("discharge") + + convert_unit_sim = hydro_constant.convert_unit( + np.array(best_simulation).reshape(1, -1), + # best_simulation, + result_unit, + hydro_constant.unit["streamflow"], + basin_area=basin_area, + ) + convert_unit_obs = hydro_constant.convert_unit( + np.array(the_data[warmup_length:, :, -1:]).reshape(1, -1), + result_unit, + hydro_constant.unit["streamflow"], + basin_area=basin_area, + ) + # save calibrated results of calibration period + the_result_file = os.path.join( + deap_dir, + f"{train_test_flag}_qsim_" + model_info["name"] + "_" + str(basin_id) + ".csv", + ) + pd.DataFrame(convert_unit_sim.reshape(-1, 1)).to_csv( + the_result_file, + sep=",", + index=False, + header=False, + ) + # calculation rmse、nashsutcliffe and bias for training period + stat_error = stat.statError( + convert_unit_obs, + convert_unit_sim, + ) + print(f"{train_test_flag}ing metrics:", basin_id, stat_error) + hydro_utils.serialize_json_np( + stat_error, os.path.join(deap_dir, f"{train_test_flag}_metrics.json") + ) + t_range = pd.to_datetime(the_period[warmup_length:]).values.astype("datetime64[D]") + save_fig = os.path.join(deap_dir, f"{train_test_flag}_results.png") + if train_mode: + save_param_file = os.path.join(deap_dir, basin_id + "_calibrate_params.txt") + pd.DataFrame(list(halloffame[0])).to_csv( + save_param_file, sep=",", index=False, header=True + ) + fit_mins = logbook.select("min") + plot_train_iteration(fit_mins, os.path.join(deap_dir, "train_iteration.png")) + plot_sim_and_obs( + t_range, + convert_unit_sim.flatten(), + convert_unit_obs.flatten(), + save_fig, + ) + + +if __name__ == "__main__": + data_dir = os.path.join( + definitions.ROOT_DIR, + "hydromodel", + "example", + "exp004", + ) + deap_dir = os.path.join( + data_dir, + "Dec25_16-33-56_LAPTOP-DNQOPPMS_fold0_HFsources", + "60668", + ) + train_data_info_file = os.path.join(data_dir, "data_info_fold0_train.json") + train_data_file = os.path.join(data_dir, "basins_lump_p_pe_q_fold0_train.npy") + data_train = hydro_utils.unserialize_numpy(train_data_file) + data_info_train = hydro_utils.unserialize_json_ordered(train_data_info_file) + model_info = { + "name": "xaj_mz", + "source_type": "sources", + "source_book": "HF", + } + train_period = data_info_train["time"] + basin_area = data_info_train["area"][0] + + show_ga_result( + deap_dir, + 365, + "60668", + data_train[:, 0:1, :], + train_period, + basin_area, + model_info, + result_unit="mm/day", + ) diff --git a/hydromodel/calibrate/calibrate_sceua_xaj_bmi.py b/hydromodel/calibrate/calibrate_sceua_xaj_bmi.py new file mode 100644 index 0000000..404cd4b --- /dev/null +++ b/hydromodel/calibrate/calibrate_sceua_xaj_bmi.py @@ -0,0 +1,209 @@ +from typing import Union +import numpy as np +import spotpy +from spotpy.parameter import Uniform, ParameterSet +from spotpy.objectivefunctions import rmse +from hydromodel.models.model_config import MODEL_PARAM_DICT +from hydromodel.models.xaj_bmi import xajBmi + + +class SpotSetup(object): + def __init__( + self, + p_and_e, + qobs, + warmup_length=30, + model={ + "name": "xaj_mz", + "source_type": "sources", + "source_book": "HF", + }, + obj_func=None, + ): + """ + Set up for Spotpy + + Parameters + ---------- + p_and_e + inputs of model + qobs + observation data + warmup_length + GR4J model need warmup period + model + we support "gr4j", "hymod", and "xaj" + model_func_param + parameters of model function + obj_func + objective function, typically RMSE + """ + self.parameter_names = MODEL_PARAM_DICT[model["name"]]["param_name"] + self.model = model + self.params = [] + for par_name in self.parameter_names: + # All parameters' range are [0,1], we will transform them to normal range in the model + self.params.append(Uniform(par_name, low=0.0, high=1.0)) + # Just a way to keep this example flexible and applicable to various examples + self.obj_func = obj_func + # Load Observation data from file + self.p_and_e = p_and_e + # chose observation data after warmup period + self.true_obs = qobs[warmup_length:, :, :] + self.warmup_length = warmup_length + + def parameters(self): + return spotpy.parameter.generate(self.params) + + def simulation(self, x: ParameterSet) -> Union[list, np.array]: + """ + run xaj model + + Parameters + ---------- + x + the parameters of xaj. This function only has this one parameter. + + Returns + ------- + Union[list, np.array] + simulated result from xaj + """ + # Here the model is started with one parameter combination + # parameter, 2-dim variable: [basin=1, parameter] + params = np.array(x).reshape(1, -1) + model = xajBmi() + # xaj model's output include streamflow and evaporation now, + # but now we only calibrate the model with streamflow + + model.initialize('runxaj.yaml', params, self.p_and_e) + while model.get_current_time() <= model.get_end_time('train'): + model.update() + sim=model.get_value("discharge") + sim = np.expand_dims(sim, 0) + sim = np.expand_dims(sim, 1) + sim = np.transpose(sim, [2,1,0]) + return sim[:, 0, 0] + + def evaluation(self) -> Union[list, np.array]: + """ + read observation values + + Returns + ------- + Union[list, np.array] + observation + """ + # TODO: we only support one basin's calibration now + + return self.true_obs[:, 0, 0] + + def objective_function( + self, + simulation: Union[list, np.array], + evaluation: Union[list, np.array], + params=None, + ) -> float: + """ + A user defined objective function to calculate fitness. + + Parameters + ---------- + simulation: + simulation results + evaluation: + evaluation results + params: + parameters leading to the simulation + + Returns + ------- + float + likelihood + """ + # SPOTPY expects to get one or multiple values back, + # that define the performance of the model run + print(simulation.shape, evaluation.shape) + if not self.obj_func: + # This is used if not overwritten by user + like = rmse(evaluation, simulation) + else: + # Way to ensure flexible spot setup class + like = self.obj_func(evaluation, simulation) + return like + + +def calibrate_by_sceua( + p_and_e, + qobs, + dbname, + warmup_length=30, + model={ + "name": "xaj_mz", + "source_type": "sources", + "source_book": "HF", + }, + algorithm={ + "name": "SCE_UA", + "random_seed": 1234, + "rep": 1000, + "ngs": 1000, + "kstop": 500, + "peps": 0.001, + "pcento": 0.001, + }, +): + """ + Function for calibrating model by SCE-UA + + Now we only support one basin's calibration in one sampler + + Parameters + ---------- + p_and_e + inputs of model + qobs + observation data + dbname + where save the result file of sampler + warmup_length + the length of warmup period + model + we support "gr4j", "hymod", and "xaj", parameters for hydro model + calibrate_algo + calibrate algorithm. For example, if you want to calibrate xaj model, + and use sce-ua algorithm -- random seed=2000, rep=5000, ngs=7, kstop=3, peps=0.1, pcento=0.1 + + Returns + ------- + None + """ + random_seed = algorithm["random_seed"] + rep = algorithm["rep"] + ngs = algorithm["ngs"] + kstop = algorithm["kstop"] + peps = algorithm["peps"] + pcento = algorithm["pcento"] + np.random.seed(random_seed) # Makes the results reproduceable + + # Initialize the xaj example + # In this case, we tell the setup which algorithm we want to use, so + # we can use this exmaple for different algorithms + spot_setup = SpotSetup( + p_and_e, + qobs, + warmup_length=warmup_length, + model=model, + obj_func=spotpy.objectivefunctions.rmse, + ) + # Select number of maximum allowed repetitions + sampler = spotpy.algorithms.sceua( + spot_setup, + dbname=dbname, + dbformat="csv", + random_state=random_seed, + ) + # Start the sampler, one can specify ngs, kstop, peps and pcento id desired + sampler.sample(rep, ngs=ngs, kstop=kstop, peps=peps, pcento=pcento) + print("Calibrate Finished!") + return sampler diff --git a/hydromodel/models/configuration.py b/hydromodel/models/configuration.py new file mode 100644 index 0000000..219abd8 --- /dev/null +++ b/hydromodel/models/configuration.py @@ -0,0 +1,27 @@ +import yaml +import numpy as np +from hydromodel.models.xaj import xaj_state as xaj_state + + +def read_config(config_file): + with open(config_file, encoding='utf8') as f: + config = yaml.safe_load(f) + return config + + +def extract_forcing(forcing_data): + # p_and_e_df = forcing_data[["rainfall[mm]", "TURC [mm d-1]"]] + p_and_e_df = forcing_data[["prcp(mm/day)", "petfao56(mm/day)"]] + p_and_e = np.expand_dims(p_and_e_df.values, axis=1) + return p_and_e_df, p_and_e + + +def warmup(p_and_e_warmup, params_state, warmup_length, model_info): + q_sim, es, *w0, w1, w2, s0, fr0, qi0, qg0 = xaj_state( + p_and_e_warmup, + params_state, + warmup_length=warmup_length, + model_info=model_info, + return_state=True, + ) + return q_sim, es, *w0, w1, w2, s0, fr0, qi0, qg0 diff --git a/hydromodel/models/xaj_bmi.py b/hydromodel/models/xaj_bmi.py new file mode 100644 index 0000000..e94f620 --- /dev/null +++ b/hydromodel/models/xaj_bmi.py @@ -0,0 +1,319 @@ +from typing import Tuple +from bmipy import Bmi +import numpy as np + +from hydromodel.models import configuration +import datetime +import logging + +from hydromodel.models.xaj import xaj_route, xaj_runoff + +logger = logging.getLogger(__name__) + +PRECISION = 1e-5 + + +class xajBmi(Bmi): + """Empty model wrapped in a BMI interface.""" + + name = "hydro-model-xaj" + input_var_names = ("precipitation", "ETp") + output_var_names = ("ET", "discharge") + var_units = { + "precipitation": "mm/day", + "ETp": "mm/day", + "discharge": "mm/day", + "ET": "mm/day", + } + + def __init__(self): + """Create a Bmi model that is ready for initialization.""" + self.time_step = 0 + + def initialize(self, config_file, params, p_and_e): + try: + logger.info("xaj: initialize_model") + config = configuration.read_config(config_file) + model_name = config["model_name"] + source_type = config["source_type"] + source_book = config["source_book"] + # forcing_data = pd.read_csv(config['forcing_data']) + model_info = { + "model_name": model_name, + "source_type": source_type, + "source_book": source_book, + } + # p_and_e_df, p_and_e = configuration.extract_forcing(forcing_data) + p_and_e_warmup = p_and_e[0 : config["warmup_length"], :, :] + ( + self.q_sim_state, + self.es_state, + self.w0, + self.w1, + self.w2, + self.s0, + self.fr0, + self.qi0, + self.qg0, + ) = configuration.warmup( + p_and_e_warmup, params, config["warmup_length"], model_info + ) + + self.params = params + self.warmup_length = config["warmup_length"] + self._start_time_str = config["start_time_str"] + self._end_time_str = config["end_time_str"] + self._time_units = config["time_units"] + # self.p_and_e_df = p_and_e_df + self.p_and_e = p_and_e + self.config = config + self.model_info = model_info + + train_period = config["train_period"] + test_period = config["test_period"] + self.train_start_time_str = train_period[0] + self.train_end_time_str = train_period[1] + self.test_start_time_str = test_period[0] + self.test_end_time_str = test_period[1] + + except Exception: + import traceback + + traceback.print_exc() + raise + + def update(self): + """Update model for a single time step.""" + + self.time_step += 1 + p_and_e_sim = self.p_and_e[ + self.warmup_length : self.time_step + self.warmup_length + ] + # p_and_e_sim = self.p_and_e[0:self.time_step] + ( + self.runoff_im, + self.rss_, + self.ris_, + self.rgs_, + self.es_runoff, + self.rss, + ) = xaj_runoff( + p_and_e_sim, + w0=self.w0, + s0=self.s0, + fr0=self.fr0, + params_runoff=self.params, + return_state=False, + model_info=self.model_info, + ) + if self.time_step + self.warmup_length >= self.p_and_e.shape[0]: + q_sim, es = xaj_route( + p_and_e_sim, + params_route=self.params, + model_info=self.model_info, + runoff_im=self.runoff_im, + rss_=self.rss_, + ris_=self.ris_, + rgs_=self.rgs_, + rss=self.rss, + qi0=self.qi0, + qg0=self.qg0, + es=self.es_runoff, + ) + self.p_sim = p_and_e_sim[:, :, 0] + self.e_sim = p_and_e_sim[:, :, 1] + # q_sim = convert_unit( + # q_sim, + # unit_now="mm/day", + # unit_final=unit["streamflow"], + # basin_area=float(self.basin_area), + # ) + self.q_sim = q_sim[:, :, :] + self.es = es[:, :, :] + + def update_until(self, time): + while self.get_current_time() + 0.001 < time: + self.update() + + def finalize(self) -> None: + """Finalize model.""" + self.model = None + + def get_component_name(self) -> str: + return "xaj" + + def get_input_item_count(self) -> int: + return len(self.input_var_names) + + def get_output_item_count(self) -> int: + return len(self.output_var_names) + + def get_input_var_names(self) -> Tuple[str]: + return self.input_var_names + + def get_output_var_names(self) -> Tuple[str]: + return self.output_var_names + + def get_var_grid(self, name: str) -> int: + raise NotImplementedError() + + def get_var_type(self, name: str) -> str: + return "float64" + + def get_var_units(self, name: str) -> str: + return self.var_units[name] + + def get_var_itemsize(self, name: str) -> int: + return np.dtype(self.get_var_type(name)).itemsize + + def get_var_nbytes(self, name: str) -> int: + return self.get_value_ptr(name).nbytes + + def get_var_location(self, name: str) -> str: + raise NotImplementedError() + + def get_start_time(self, period): + if period == "train": + return self.start_Time(self.train_start_time_str) + if period == "test": + return self.start_Time(self.test_start_time_str) + # return self.start_Time(self._start_time_str) + + def get_current_time(self): + # return self.start_Time(self._start_time_str) + datetime.timedelta(self.time_step+self.warmup_length) + if self._time_units == "hours": + time_step = datetime.timedelta(hours=self.time_step) + elif self._time_units == "days": + time_step = datetime.timedelta(days=self.time_step) + return self.start_Time(self._start_time_str) + time_step + + def get_end_time(self, period): + if period == "train": + return self.end_Time(self.train_end_time_str) + if period == "test": + return self.end_Time(self.test_end_time_str) + # return self.end_Time(self._end_time_str) + + def get_time_units(self) -> str: + return self._time_units + + def get_time_step(self) -> float: + return 1 + + def get_value(self, name: str) -> None: + logger.info("getting value for var %s", name) + return self.get_value_ptr(name).flatten() + + def get_value_ptr(self, name: str) -> np.ndarray: + if name == "discharge": + return self.q_sim + elif name == "ET": + return self.es + + def get_value_at_indices(self, name: str, inds: np.ndarray) -> np.ndarray: + return self.get_value_ptr(name).take(inds) + + def set_value(self, name: str, src: np.ndarray): + val = self.get_value_ptr(name) + val[:] = src.reshape(val.shape) + + def set_value_at_indices( + self, name: str, inds: np.ndarray, src: np.ndarray + ) -> None: + val = self.get_value_ptr(name) + val.flat[inds] = src + + # Grid information + def get_grid_rank(self, grid: int) -> int: + raise NotImplementedError() + + def get_grid_size(self, grid: int) -> int: + raise NotImplementedError() + + def get_grid_type(self, grid: int) -> str: + raise NotImplementedError() + + # Uniform rectilinear + def get_grid_shape(self, grid: int, shape: np.ndarray) -> np.ndarray: + raise NotImplementedError() + + def get_grid_spacing(self, grid: int, spacing: np.ndarray) -> np.ndarray: + raise NotImplementedError() + + def get_grid_origin(self, grid: int, origin: np.ndarray) -> np.ndarray: + raise NotImplementedError() + + # Non-uniform rectilinear, curvilinear + def get_grid_x(self, grid: int, x: np.ndarray) -> np.ndarray: + raise NotImplementedError() + + def get_grid_y(self, grid: int, y: np.ndarray) -> np.ndarray: + raise NotImplementedError() + + def get_grid_z(self, grid: int, z: np.ndarray) -> np.ndarray: + raise NotImplementedError() + + def get_grid_node_count(self, grid: int) -> int: + raise NotImplementedError() + + def get_grid_edge_count(self, grid: int) -> int: + raise NotImplementedError() + + def get_grid_face_count(self, grid: int) -> int: + raise NotImplementedError() + + def get_grid_edge_nodes(self, grid: int, edge_nodes: np.ndarray) -> np.ndarray: + raise NotImplementedError() + + def get_grid_face_edges(self, grid: int, face_edges: np.ndarray) -> np.ndarray: + raise NotImplementedError() + + def get_grid_face_nodes(self, grid: int, face_nodes: np.ndarray) -> np.ndarray: + raise NotImplementedError() + + def get_grid_nodes_per_face( + self, grid: int, nodes_per_face: np.ndarray + ) -> np.ndarray: + raise NotImplementedError() + + def start_Time(self, _start_time_str): + try: + if " " in _start_time_str: + date, time = _start_time_str.split(" ") + else: + date = _start_time_str + time = None + year, month, day = date.split("-") + self._startTime = datetime.date(int(year), int(month), int(day)) + + if time: + hour, minute, second = time.split(":") + # self._startTime = self._startTime.replace(hour=int(hour), + # minute=int(minute), + # second=int(second)) + self._startTime = datetime.datetime( + int(year), int(month), int(day), int(hour), int(minute), int(second) + ) + except ValueError as e: + raise ValueError("Invalid start date format!") from e + + return self._startTime + + def end_Time(self, _end_time_str): + try: + if " " in _end_time_str: + date, time = _end_time_str.split(" ") + else: + date = _end_time_str + time = None + year, month, day = date.split("-") + self._endTime = datetime.date(int(year), int(month), int(day)) + + if time: + hour, minute, second = time.split(":") + self._endTime = datetime.datetime( + int(year), int(month), int(day), int(hour), int(minute), int(second) + ) + except ValueError as e: + raise ValueError("Invalid start date format!") from e + return self._endTime diff --git a/hydromodel/utils/constant_unit.py b/hydromodel/utils/constant_unit.py new file mode 100644 index 0000000..f09eef7 --- /dev/null +++ b/hydromodel/utils/constant_unit.py @@ -0,0 +1,63 @@ +""" +Author: Wenyu Ouyang +Date: 2022-12-08 09:24:54 +LastEditTime: 2022-12-08 09:51:54 +LastEditors: Wenyu Ouyang +Description: some constant for hydro model +FilePath: /hydro-model-xaj/hydromodel/utils/hydro_constant.py +Copyright (c) 2021-2022 Wenyu Ouyang. All rights reserved. +""" +# unify the unit of each variable +unit = {"streamflow": "m3/s"} +def convert_unit(data, unit_now, unit_final, **kwargs): + """ + convert unit of variable + + Parameters + ---------- + data + data to be converted + unit_now + unit of variable now + unit_final + unit of variable after conversion + **kwargs + other parameters required for conversion + + Returns + ------- + data + data after conversion + """ + if unit_now == "mm/day" and unit_final == "m3/s": + result = mm_per_day_to_m3_per_sec(basin_area=kwargs["basin_area"], q=data) + else: + raise ValueError("unit conversion not supported") + return result + + +def mm_per_day_to_m3_per_sec(basin_area, q): + """ + trans mm/day to m3/s for xaj models + + Parameters + ---------- + basin_area + we need to know the area of a basin so that we can perform this transformation + q + original streamflow data + + Returns + ------- + + """ + # 1 ft3 = 0.02831685 m3 + # ft3tom3 = 2.831685e-2 + # 1 km2 = 10^6 m2 + km2tom2 = 1e6 + # 1 m = 1000 mm + mtomm = 1000 + # 1 day = 24 * 3600 s + daytos = 24 * 3600 + q_trans = q * basin_area * km2tom2 / (mtomm * daytos) + return q_trans diff --git a/hydromodel/utils/dmca_esr.py b/hydromodel/utils/dmca_esr.py new file mode 100644 index 0000000..397fd77 --- /dev/null +++ b/hydromodel/utils/dmca_esr.py @@ -0,0 +1,223 @@ +import numpy as np + + +def movmean(X, n): + ones = np.ones(X.shape) + kernel = np.ones(n) + return np.convolve(X, kernel, mode='same') / np.convolve(ones, kernel, mode='same') + + +def step1_step2_tr_and_fluctuations_timeseries(rain, flow, rain_min, max_window): + """ + :param rain: 降雨量向量,单位mm/h,需注意与mm/day之间的单位转化 + :param flow: 径流量向量,单位m³/h,需注意与m³/day之间的单位转化 + :param rain_min: 最小降雨量阈值 + :param max_window: 场次划分最大窗口,决定场次长度 + """ + rain = rain.T + flow = flow.T + rain_int = np.nancumsum(rain) + flow_int = np.nancumsum(flow) + T = rain.size + rain_mean = np.empty(((max_window - 1) // 2, T)) + flow_mean = np.empty(((max_window - 1) // 2, T)) + fluct_rain = np.empty(((max_window - 1) // 2, T)) + fluct_flow = np.empty(((max_window - 1) // 2, T)) + F_rain = np.empty((max_window - 1) // 2) + F_flow = np.empty((max_window - 1) // 2) + F_rain_flow = np.empty((max_window - 1) // 2) + rho = np.empty((max_window - 1) // 2) + for window in np.arange(3, max_window + 1, 2): + int_index = int((window - 1) / 2 - 1) + start_slice = int(window - 0.5 * (window - 1)) + dst_slice = int(T - 0.5 * (window - 1)) + # 新建一个循环体长度*数据长度的大数组 + rain_mean[int_index] = movmean(rain_int, window) + flow_mean[int_index] = movmean(flow_int, window) + fluct_rain[int_index] = rain_int - rain_mean[int_index, :] + F_rain[int_index] = (1 / (T - window + 1)) * np.nansum( + (fluct_rain[int_index, start_slice:dst_slice]) ** 2) + fluct_flow[int_index, np.newaxis] = flow_int - flow_mean[int_index, :] + F_flow[int_index] = (1 / (T - window + 1)) * np.nansum( + (fluct_flow[int_index, start_slice:dst_slice]) ** 2) + F_rain_flow[int_index] = (1 / (T - window + 1)) * np.nansum( + (fluct_rain[int_index, start_slice:dst_slice]) * ( + fluct_flow[int_index, start_slice:dst_slice])) + rho[int_index] = F_rain_flow[int_index] / ( + np.sqrt(F_rain[int_index]) * np.sqrt(F_flow[int_index])) + pos_min = np.argmin(rho) + Tr = pos_min + 1 + tol_fluct_rain = (rain_min / (2 * Tr + 1)) * Tr + tol_fluct_flow = flow_int[-1] / 1e15 + fluct_rain[pos_min, np.fabs(fluct_rain[pos_min, :]) < tol_fluct_rain] = 0 + fluct_flow[pos_min, np.fabs(fluct_flow[pos_min, :]) < tol_fluct_flow] = 0 + fluct_rain_Tr = fluct_rain[pos_min, :] + fluct_flow_Tr = fluct_flow[pos_min, :] + fluct_bivariate_Tr = fluct_rain_Tr * fluct_flow_Tr + fluct_bivariate_Tr[np.fabs(fluct_bivariate_Tr) < np.finfo(np.float64).eps] = 0 # 便于比较 + return Tr, fluct_rain_Tr, fluct_flow_Tr, fluct_bivariate_Tr + + +def step3_core_identification(fluct_bivariate_Tr): + d = np.diff(fluct_bivariate_Tr, prepend=[0], append=[0]) # 计算相邻数值差分,为0代表两端点处于0区间 + d[np.fabs(d) < np.finfo(np.float64).eps] = 0 # 确保计算正确 + d = np.logical_not(d) # 求0-1数组,为真代表为0区间 + d0 = np.logical_not(np.convolve(d, [1, 1], 'valid')) # 对相邻元素做OR,代表原数组数值是否处于某一0区间,再取反表示取有效值 + valid = np.logical_or(fluct_bivariate_Tr, d0) # 有效core + d_ = np.diff(valid, prepend=[0], append=[0]) # 求差分方便取上下边沿 + beginning_core = np.argwhere(d_ == 1) # 上边沿为begin + end_core = np.argwhere(d_ == -1) - 1 # 下边沿为end + return beginning_core, end_core + + +def step4_end_rain_events(beginning_core, end_core, rain, fluct_rain_Tr, rain_min): + end_rain = end_core.copy() + rain = rain.T + for g in range(end_core.size): + if end_core[g] + 2 < fluct_rain_Tr.size and \ + (np.fabs(fluct_rain_Tr[end_core[g] + 1]) < np.finfo(np.float64).eps and np.fabs(fluct_rain_Tr[end_core[g] + 2]) < np.finfo(np.float64).eps): + # case 1&2 + if np.fabs(rain[end_core[g]]) < np.finfo(np.float64).eps: + # case 1 + while end_rain[g] > beginning_core[g] and np.fabs(rain[end_rain[g]]) < np.finfo(np.float64).eps: + end_rain[g] = end_rain[g] - 1 + else: + # case 2 + bound = beginning_core[g + 1] if g + 1 < beginning_core.size else rain.size + while end_rain[g] < bound and rain[end_rain[g]] > rain_min: + end_rain[g] = end_rain[g] + 1 + end_rain[g] = end_rain[g] - 1 # 回到最后一个 + else: + # case 3 + # 若在降水,先跳过 + while end_rain[g] >= beginning_core[g] and rain[end_rain[g]] > rain_min: + end_rain[g] = end_rain[g] - 1 + while end_rain[g] >= beginning_core[g] and rain[end_rain[g]] < rain_min: + end_rain[g] = end_rain[g] - 1 + return end_rain + + +def step5_beginning_rain_events(beginning_core, end_rain, rain, fluct_rain_Tr, rain_min): + beginning_rain = beginning_core.copy() + rain = rain.T + for g in range(beginning_core.size): + if beginning_core[g] - 2 >= 0 \ + and (np.fabs(fluct_rain_Tr[beginning_core[g] - 1]) < np.finfo(np.float64).eps and np.fabs(fluct_rain_Tr[beginning_core[g] - 2]) < np.finfo( + np.float64).eps) \ + and np.fabs(rain[beginning_core[g]]) < np.finfo(np.float64).eps: + # case 1 + while beginning_rain[g] < end_rain[g] and np.fabs(rain[beginning_rain[g]]) < np.finfo(np.float64).eps: + beginning_rain[g] = beginning_rain[g] + 1 + else: + # case 2&3 + bound = end_rain[g - 1] if g - 1 >= 0 else -1 + while beginning_rain[g] > bound and rain[beginning_rain[g]] > rain_min: + beginning_rain[g] = beginning_rain[g] - 1 + beginning_rain[g] = beginning_rain[g] + 1 # 回到第一个 + return beginning_rain + + +def step6_checks_on_rain_events(beginning_rain, end_rain, rain, rain_min, beginning_core, end_core): + rain = rain.T + beginning_rain = beginning_rain.copy() + end_rain = end_rain.copy() + if beginning_rain[0] == 0: # 掐头 + beginning_rain = beginning_rain[1:] + end_rain = end_rain[1:] + beginning_core = beginning_core[1:] + end_core = end_core[1:] + if end_rain[-1] == rain.size - 1: # 去尾 + beginning_rain = beginning_rain[:-2] + end_rain = end_rain[:-2] + beginning_core = beginning_core[:-2] + end_core = end_core[:-2] + error_time_reversed = beginning_rain > end_rain + error_wrong_delimiter = np.logical_or(rain[beginning_rain - 1] > rain_min, rain[end_rain + 1] > rain_min) + beginning_rain[error_time_reversed] = -2 + beginning_rain[error_wrong_delimiter] = -2 + end_rain[error_time_reversed] = -2 + end_rain[error_wrong_delimiter] = -2 + beginning_core[error_time_reversed] = -2 + beginning_core[error_wrong_delimiter] = -2 + end_core[error_time_reversed] = -2 + end_core[error_wrong_delimiter] = -2 + beginning_rain = beginning_rain[beginning_rain != -2] + end_rain = end_rain[end_rain != -2] + beginning_core = beginning_core[beginning_core != -2] + end_core = end_core[end_core != -2] + return beginning_rain, end_rain, beginning_core, end_core + + +def step7_end_flow_events(end_rain_checked, beginning_core, end_core, rain, fluct_rain_Tr, fluct_flow_Tr, Tr): + end_flow = np.empty(end_core.size, dtype=int) + for g in range(end_rain_checked.size): + if end_core[g] + 2 < fluct_rain_Tr.size and \ + (np.fabs(fluct_rain_Tr[end_core[g] + 1]) < np.finfo(np.float64).eps and np.fabs(fluct_rain_Tr[end_core[g] + 2]) < np.finfo(np.float64).eps): + # case 1 + end_flow[g] = end_rain_checked[g] + bound = beginning_core[g + 1] + Tr if g + 1 < beginning_core.size else rain.size + bound = min(bound, rain.size) # 防溢出 + # 若flow为负,先跳过 + while end_flow[g] < bound and fluct_flow_Tr[end_flow[g]] <= 0: + end_flow[g] = end_flow[g] + 1 + while end_flow[g] < bound and fluct_flow_Tr[end_flow[g]] > 0: + end_flow[g] = end_flow[g] + 1 + end_flow[g] = end_flow[g] - 1 # 回到最后一个 + else: + # case 2 + end_flow[g] = end_core[g] + while end_flow[g] >= beginning_core[g] and fluct_flow_Tr[end_flow[g]] <= 0: + end_flow[g] = end_flow[g] - 1 + return end_flow + + +def step8_beginning_flow_events(beginning_rain_checked, end_rain_checked, rain, beginning_core, fluct_rain_Tr, fluct_flow_Tr): + beginning_flow = np.empty(beginning_rain_checked.size, dtype=int) + for g in range(beginning_rain_checked.size): + if beginning_core[g] - 2 >= 0 \ + and (np.fabs(fluct_rain_Tr[beginning_core[g] - 1]) < np.finfo(np.float64).eps and np.fabs(fluct_rain_Tr[beginning_core[g] - 2]) < np.finfo( + np.float64).eps): + beginning_flow[g] = beginning_rain_checked[g] # case 1 + else: + beginning_flow[g] = beginning_core[g] # case 2 + while beginning_flow[g] < end_rain_checked[g] and fluct_flow_Tr[beginning_flow[g]] >= 0: + beginning_flow[g] = beginning_flow[g] + 1 + return beginning_flow + + +def step9_checks_on_flow_events(beginning_rain_checked, end_rain_checked, beginning_flow, end_flow, fluct_flow_Tr): + error_time_reversed = beginning_flow > end_flow + error_wrong_fluct = np.logical_or(np.logical_or(fluct_flow_Tr[beginning_flow] > 0, fluct_flow_Tr[end_flow] < 0), np.logical_or(beginning_flow < + beginning_rain_checked, end_flow < end_rain_checked)) + beginning_flow[error_time_reversed] = -3 + beginning_flow[error_wrong_fluct] = -3 + end_flow[error_time_reversed] = -3 + end_flow[error_wrong_fluct] = -3 + beginning_flow = beginning_flow[beginning_flow != -3] + end_flow = end_flow[end_flow != -3] + return beginning_flow, end_flow + + +def step10_checks_on_overlapping_events(beginning_rain_ungrouped, end_rain_ungrouped, beginning_flow_ungrouped, end_flow_ungrouped, time): + # rain + order1 = np.reshape(np.hstack((np.reshape(beginning_rain_ungrouped, (-1, 1)), + np.reshape(end_rain_ungrouped, (-1, 1)))), (1, -1)) + reversed1 = np.diff(order1) <= 0 + order1[np.hstack((reversed1, [[False]]))] = -2 + order1[np.hstack(([[False]], reversed1))] = -2 + order1 = order1[order1 != -2] + # flow + order2 = np.reshape(np.hstack((np.reshape(beginning_flow_ungrouped, (-1, 1)), + np.reshape(end_flow_ungrouped, (-1, 1)))), (1, -1)) + reversed2 = np.diff(order2) <= 0 + order2[np.hstack((reversed2, [[False]]))] = -3 + order2[np.hstack(([[False]], reversed2))] = -3 + order2 = order2[order2 != -3] + # group + rain_grouped = np.reshape(order1, (-1, 2)).T + beginning_rain_grouped = rain_grouped[0] + end_rain_grouped = rain_grouped[1] + flow_grouped = np.reshape(order2, (-1, 2)).T + beginning_flow_grouped = flow_grouped[0] + end_flow_grouped = flow_grouped[1] + return time[beginning_rain_grouped], time[end_rain_grouped], time[beginning_flow_grouped], time[end_flow_grouped] \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..245c186 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,14 @@ +ipykernel +numpy +numba +pandas +scikit-learn +deap +spotpy==1.5.14 +seaborn +tqdm +pytest +hydrodataset +bmipy +pyyaml +requests \ No newline at end of file diff --git a/requirements_dev.txt b/requirements_dev.txt new file mode 100644 index 0000000..c4e98f7 --- /dev/null +++ b/requirements_dev.txt @@ -0,0 +1,14 @@ +black +black[jupyter] +pip +bump2version +wheel +watchdog +flake8 +tox +coverage +Sphinx +twine + +pytest +pytest-runner \ No newline at end of file diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..2e09fd3 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,26 @@ +[bumpversion] +current_version = 0.0.1 +commit = True +tag = True + +[bumpversion:file:setup.py] +search = version='{current_version}' +replace = version='{new_version}' + +[bumpversion:file:hydromodel/__init__.py] +search = __version__ = '{current_version}' +replace = __version__ = '{new_version}' + +[bdist_wheel] +universal = 1 + +[flake8] +exclude = docs + +[aliases] +# Define setup.py command aliases here +test = pytest + +[tool:pytest] +collect_ignore = ['setup.py'] + diff --git a/test/runxaj.yaml b/test/runxaj.yaml new file mode 100644 index 0000000..ae57a87 --- /dev/null +++ b/test/runxaj.yaml @@ -0,0 +1,46 @@ +# xaj model configuration +# The current model runs on the daily time step + start_time_str: "1990-01-01" + end_time_str: "2009-12-31" + time_units: "days" + # The current model runs on the hourly time step + # basin_area: "66400" + # start_time_str: "2021-06-01 00:00:00" + # end_time_str: "2021-08-31 23:00:00" + # time_units: "hours" +# initial condition + forcing_data: "/hydromodel/example/01013500_lump_p_pe_q.txt" + json_file: "/hydromodel/example/data_info.json" + npy_file: "/hydromodel/example/basins_lump_p_pe_q.npy" + # forcing_file: "/home/wangjingyi/code/hydro-model-xaj/scripts/尼尔基降雨.csv" + warmup_length: 360 + basin_area: 2252.7 + + + train_period: ["1990-01-01", "2000-12-31"] + test_period: ["2001-01-01", "2009-12-31"] + period: ["1990-01-01", "2009-12-31"] + cv_fold: 1 + algorithm: "SCE_UA" + +#model_info + model_name: "xaj_mz" + source_type: "sources" + source_book: "HF" + +#algorithm_SCE_UA + # algorithm_name: "SCE_UA" + # random_seed: 1234 + # rep: 2 + # ngs: 2 + # kstop: 1 + # peps: 0.001 + # pcento: 0.001 +#algorithm_GA + algorithm_name: "GA" + random_seed: 1234 + run_counts: 3 + pop_num: 50 + cross_prob: 0.5 + mut_prob: 0.5 + save_freq: 1 \ No newline at end of file diff --git a/test/test_rr_event_iden.py b/test/test_rr_event_iden.py new file mode 100644 index 0000000..0034e38 --- /dev/null +++ b/test/test_rr_event_iden.py @@ -0,0 +1,96 @@ +""" +Author: Wenyu Ouyang +Date: 2023-10-28 09:23:22 +LastEditTime: 2023-10-28 16:19:22 +LastEditors: Wenyu Ouyang +Description: Test for rainfall-runoff event identification +FilePath: \hydro-model-xaj\test\test_rr_event_iden.py +Copyright (c) 2023-2024 Wenyu Ouyang. All rights reserved. +""" +import os +import pandas as pd +import definitions +from hydromodel.utils.dmca_esr import ( + step1_step2_tr_and_fluctuations_timeseries, + step3_core_identification, + step4_end_rain_events, + step5_beginning_rain_events, + step6_checks_on_rain_events, + step7_end_flow_events, + step8_beginning_flow_events, + step9_checks_on_flow_events, + step10_checks_on_overlapping_events, +) + + +def test_rainfall_runoff_event_identify(): + rain = pd.read_csv( + os.path.join(definitions.ROOT_DIR, "hydromodel/example/division_rain.csv") + )["rain"] + flow = pd.read_csv( + os.path.join(definitions.ROOT_DIR, "hydromodel/example/division_flow.csv") + )["flow"] + time = rain.index.to_numpy() + rain = rain.to_numpy() / 24 + flow = flow.to_numpy() / 24 + rain_min = 0.02 + max_window = 100 + ( + Tr, + fluct_rain_Tr, + fluct_flow_Tr, + fluct_bivariate_Tr, + ) = step1_step2_tr_and_fluctuations_timeseries(rain, flow, rain_min, max_window) + beginning_core, end_core = step3_core_identification(fluct_bivariate_Tr) + end_rain = step4_end_rain_events( + beginning_core, end_core, rain, fluct_rain_Tr, rain_min + ) + beginning_rain = step5_beginning_rain_events( + beginning_core, end_rain, rain, fluct_rain_Tr, rain_min + ) + ( + beginning_rain_checked, + end_rain_checked, + beginning_core, + end_core, + ) = step6_checks_on_rain_events( + beginning_rain, end_rain, rain, rain_min, beginning_core, end_core + ) + end_flow = step7_end_flow_events( + end_rain_checked, + beginning_core, + end_core, + rain, + fluct_rain_Tr, + fluct_flow_Tr, + Tr, + ) + beginning_flow = step8_beginning_flow_events( + beginning_rain_checked, + end_rain_checked, + rain, + beginning_core, + fluct_rain_Tr, + fluct_flow_Tr, + ) + beginning_flow_checked, end_flow_checked = step9_checks_on_flow_events( + beginning_rain_checked, + end_rain_checked, + beginning_flow, + end_flow, + fluct_flow_Tr, + ) + ( + BEGINNING_RAIN, + END_RAIN, + BEGINNING_FLOW, + END_FLOW, + ) = step10_checks_on_overlapping_events( + beginning_rain_checked, + end_rain_checked, + beginning_flow_checked, + end_flow_checked, + time, + ) + print(BEGINNING_RAIN, END_RAIN, BEGINNING_FLOW, END_FLOW) + print(len(BEGINNING_RAIN), len(END_RAIN), len(BEGINNING_FLOW), len(END_FLOW)) diff --git a/test/test_xaj_bmi.py b/test/test_xaj_bmi.py new file mode 100644 index 0000000..875a9d7 --- /dev/null +++ b/test/test_xaj_bmi.py @@ -0,0 +1,326 @@ +import logging + +import definitions +from hydromodel.models.configuration import read_config +from hydromodel.models.xaj_bmi import xajBmi +import pandas as pd +import os +from pathlib import Path +import numpy as np +import fnmatch +import socket +from datetime import datetime + +from hydromodel.utils import hydro_utils +from hydromodel.data.data_preprocess import ( + cross_valid_data, + split_train_test, +) +from hydromodel.calibrate.calibrate_sceua_xaj_bmi import calibrate_by_sceua +from hydromodel.calibrate.calibrate_ga_xaj_bmi import ( + calibrate_by_ga, + show_ga_result, +) +from hydromodel.visual.pyspot_plots import show_calibrate_result, show_test_result +from hydromodel.data.data_postprocess import ( + renormalize_params, + read_save_sceua_calibrated_params, + save_streamflow, + summarize_metrics, + summarize_parameters, +) +from hydromodel.utils import hydro_constant + +logging.basicConfig(level=logging.INFO) + + +def test_bmi(): + """ + model = xajBmi() + print(model.get_component_name()) + model.initialize("runxaj.yaml") + print("Start time:", model.get_start_time()) + print("End time:", model.get_end_time()) + print("Current time:", model.get_current_time()) + print("Time step:", model.get_time_step()) + print("Time units:", model.get_time_units()) + print(model.get_input_var_names()) + print(model.get_output_var_names()) + + discharge = [] + ET = [] + time = [] + while model.get_current_time() <= model.get_end_time(): + time.append(model.get_current_time()) + model.update() + + discharge=model.get_value("discharge") + ET=model.get_value("ET") + + results = pd.DataFrame({ + 'discharge': discharge.flatten(), + 'ET': ET.flatten(), + }) + results.to_csv('/home/wangjingyi/code/hydro-model-xaj/scripts/xaj.csv') + model.finalize() + """ + # 模型率定 + config = read_config(os.path.relpath("runxaj.yaml")) + forcing_data = Path(str(definitions.ROOT_DIR) + str(config["forcing_data"])) + train_period = config["train_period"] + test_period = config["test_period"] + # period = config['period'] + json_file = Path(str(definitions.ROOT_DIR) + str(config["json_file"])) + npy_file = Path(str(definitions.ROOT_DIR) + str(config["npy_file"])) + cv_fold = config["cv_fold"] + warmup_length = config["warmup_length"] + # model_info + model_name = config["model_name"] + source_type = config["source_type"] + source_book = config["source_book"] + # algorithm + algorithm_name = config["algorithm_name"] + + if cv_fold <= 1: + # no cross validation + periods = np.sort( + [train_period[0], train_period[1], test_period[0], test_period[1]] + ) + else: + cross_valid_data(json_file, npy_file, periods, warmup_length, cv_fold) + split_train_test(json_file, npy_file, train_period, test_period) + + kfold = [ + int(f_name[len("data_info_fold") : -len("_test.json")]) + for f_name in os.listdir(os.path.dirname(forcing_data)) + if fnmatch.fnmatch(f_name, "*_fold*_test.json") + ] + kfold = np.sort(kfold) + for fold in kfold: + print(f"Start to calibrate the {fold}-th fold") + train_data_info_file = os.path.join( + os.path.dirname(forcing_data), f"data_info_fold{str(fold)}_train.json" + ) + train_data_file = os.path.join( + os.path.dirname(forcing_data), + f"basins_lump_p_pe_q_fold{str(fold)}_train.npy", + ) + test_data_info_file = os.path.join( + os.path.dirname(forcing_data), f"data_info_fold{str(fold)}_test.json" + ) + test_data_file = os.path.join( + os.path.dirname(forcing_data), + f"basins_lump_p_pe_q_fold{str(fold)}_test.npy", + ) + if ( + os.path.exists(train_data_info_file) is False + or os.path.exists(train_data_file) is False + or os.path.exists(test_data_info_file) is False + or os.path.exists(test_data_file) is False + ): + raise FileNotFoundError( + "The data files are not found, please run datapreprocess4calibrate.py first." + ) + data_train = hydro_utils.unserialize_numpy(train_data_file) + data_test = hydro_utils.unserialize_numpy(test_data_file) + data_info_train = hydro_utils.unserialize_json_ordered(train_data_info_file) + data_info_test = hydro_utils.unserialize_json_ordered(test_data_info_file) + current_time = datetime.now().strftime("%b%d_%H-%M-%S") + # one directory for one model + one hyperparam setting and one basin + save_dir = os.path.join( + os.path.dirname(forcing_data), + current_time + "_" + socket.gethostname() + "_fold" + str(fold), + ) + if algorithm_name == "SCE_UA": + random_seed = config["random_seed"] + rep = config["rep"] + ngs = config["ngs"] + kstop = config["kstop"] + peps = config["peps"] + pcento = config["pcento"] + for i in range(len(data_info_train["basin"])): + basin_id = data_info_train["basin"][i] + basin_area = data_info_train["area"][i] + # one directory for one model + one hyperparam setting and one basin + spotpy_db_dir = os.path.join( + save_dir, + basin_id, + ) + + if not os.path.exists(spotpy_db_dir): + os.makedirs(spotpy_db_dir) + db_name = os.path.join(spotpy_db_dir, "SCEUA_" + model_name) + sampler = calibrate_by_sceua( + data_train[:, i : i + 1, 0:2], + data_train[:, i : i + 1, -1:], + db_name, + warmup_length=warmup_length, + model={ + "name": model_name, + "source_type": source_type, + "source_book": source_book, + }, + algorithm={ + "name": algorithm_name, + "random_seed": random_seed, + "rep": rep, + "ngs": ngs, + "kstop": kstop, + "peps": peps, + "pcento": pcento, + }, + ) + + show_calibrate_result( + sampler.setup, + db_name, + warmup_length=warmup_length, + save_dir=spotpy_db_dir, + basin_id=basin_id, + train_period=data_info_train["time"], + basin_area=basin_area, + ) + params = read_save_sceua_calibrated_params( + basin_id, spotpy_db_dir, db_name + ) + + model = xajBmi() + model.initialize( + os.path.relpath("runxaj.yaml"), params, data_test[:, i : i + 1, 0:2] + ) + j = 0 + while j <= len(data_info_test["time"]): + model.update() + j += 1 + q_sim = model.get_value("discharge") + + qsim = hydro_constant.convert_unit( + q_sim, + unit_now="mm/day", + unit_final=hydro_constant.unit["streamflow"], + basin_area=basin_area, + ) + + qobs = hydro_constant.convert_unit( + data_test[warmup_length:, i : i + 1, -1:], + unit_now="mm/day", + unit_final=hydro_constant.unit["streamflow"], + basin_area=basin_area, + ) + test_result_file = os.path.join( + spotpy_db_dir, + "test_qsim_" + model_name + "_" + str(basin_id) + ".csv", + ) + pd.DataFrame(qsim.reshape(-1, 1)).to_csv( + test_result_file, + sep=",", + index=False, + header=False, + ) + test_date = pd.to_datetime( + data_info_test["time"][warmup_length:] + ).values.astype("datetime64[D]") + show_test_result( + basin_id, test_date, qsim, qobs, save_dir=spotpy_db_dir + ) + elif algorithm_name == "GA": + random_seed = config["random_seed"] + run_counts = config["run_counts"] + pop_num = config["pop_num"] + cross_prob = config["cross_prob"] + mut_prob = config["mut_prob"] + save_freq = config["save_freq"] + for i in range(len(data_info_train["basin"])): + basin_id = data_info_train["basin"][i] + basin_area = data_info_train["area"][i] + # one directory for one model + one hyperparam setting and one basin + deap_db_dir = os.path.join(save_dir, basin_id) + + if not os.path.exists(deap_db_dir): + os.makedirs(deap_db_dir) + calibrate_by_ga( + data_train[:, i : i + 1, 0:2], + data_train[:, i : i + 1, -1:], + deap_db_dir, + warmup_length=warmup_length, + model={ + "name": model_name, + "source_type": source_type, + "source_book": source_book, + }, + ga_param={ + "name": algorithm_name, + "random_seed": random_seed, + "run_counts": run_counts, + "pop_num": pop_num, + "cross_prob": cross_prob, + "mut_prob": mut_prob, + "save_freq": save_freq, + }, + ) + show_ga_result( + deap_db_dir, + warmup_length=warmup_length, + basin_id=basin_id, + the_data=data_train[:, i : i + 1, :], + the_period=data_info_train["time"], + basin_area=basin_area, + model_info={ + "name": model_name, + "source_type": source_type, + "source_book": source_book, + }, + train_mode=True, + ) + show_ga_result( + deap_db_dir, + warmup_length=warmup_length, + basin_id=basin_id, + the_data=data_test[:, i : i + 1, :], + the_period=data_info_test["time"], + basin_area=basin_area, + model_info={ + "name": model_name, + "source_type": source_type, + "source_book": source_book, + }, + train_mode=False, + ) + else: + raise NotImplementedError( + "We don't provide this calibrate method! Choose from 'SCE_UA' or 'GA'!" + ) + summarize_parameters( + save_dir, + model_info={ + "name": model_name, + "source_type": source_type, + "source_book": source_book, + }, + ) + renormalize_params( + save_dir, + model_info={ + "name": model_name, + "source_type": source_type, + "source_book": source_book, + }, + ) + summarize_metrics( + save_dir, + model_info={ + "name": model_name, + "source_type": source_type, + "source_book": source_book, + }, + ) + save_streamflow( + save_dir, + model_info={ + "name": model_name, + "source_type": source_type, + "source_book": source_book, + }, + fold=fold, + ) + print(f"Finish calibrating the {fold}-th fold")