Skip to content

Commit

Permalink
add preprocess.py for rainfall-runoff events
Browse files Browse the repository at this point in the history
  • Loading branch information
OuyangWenyu committed Mar 27, 2024
1 parent 236de11 commit 7b8e430
Show file tree
Hide file tree
Showing 8 changed files with 153 additions and 63 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ $ python evaluate_xaj.py --exp expcamels001
Run the following code to see the results of the evaluation:

```Shell
$ python post_process.py --exp expcamels001
$ python visualize.py --exp expcamels001
```

You will see the results in the `example` directory.
Expand Down
27 changes: 25 additions & 2 deletions hydromodel/datasets/data_preprocess.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""
Author: Wenyu Ouyang
Date: 2022-10-25 21:16:22
LastEditTime: 2024-03-27 16:31:55
LastEditTime: 2024-03-27 18:17:14
LastEditors: Wenyu Ouyang
Description: preprocess data for models in hydro-model-xaj
FilePath: \hydro-model-xaj\hydromodel\datasets\data_preprocess.py
Expand All @@ -13,10 +13,12 @@
from hydrodataset import Camels
import numpy as np
import pandas as pd
from pint import UnitRegistry
from sklearn.model_selection import KFold
import xarray as xr

from hydrodata.utils.utils import streamflow_unit_conv
from hydrodata.cleaner.dmca_esr import rainfall_runoff_event_identify

from hydromodel import CACHE_DIR, SETTING
from hydromodel.datasets import *
Expand Down Expand Up @@ -432,7 +434,7 @@ def get_ts_from_diffsource(data_type, data_dir, periods, basin_ids):
data_dir
The directory of the data source
periods
The periods of the time series data
The periods of the time series data, [start_date, end_date]
basin_ids
The ids of the basins
Expand Down Expand Up @@ -473,6 +475,7 @@ def get_ts_from_diffsource(data_type, data_dir, periods, basin_ids):
r_mmd = streamflow_unit_conv(qobs_, basin_area, target_unit=target_unit)
ts_data[flow_name] = r_mmd[flow_name]
ts_data[flow_name].attrs["units"] = target_unit
ts_data = ts_data.sel(time=slice(periods[0], periods[1]))
else:
raise NotImplementedError(
"You should set the data type as 'camels' or 'owndata'"
Expand Down Expand Up @@ -519,3 +522,23 @@ def cross_val_split_tsdata(
# cross validation
train_and_test_data = cross_valid_data(ts_data, periods, warmup, cv_fold)
return train_and_test_data


def get_rr_events(rain, flow, basin_area):
ureg = UnitRegistry()
# trans unit to mm/day
flow_threshold = streamflow_unit_conv(
np.array([100]) * ureg.m**3 / ureg.s,
basin_area.isel(basin=0).to_array().to_numpy() * ureg.km**2,
target_unit="mm/h",
)
rr_events = {}
for basin in basin_area.basin.values:
rr_event = rainfall_runoff_event_identify(
rain.sel(basin=basin).to_series(),
flow.sel(basin=basin).to_series(),
multiple=1,
flow_threshold=flow_threshold[0],
)
rr_events[basin] = rr_event
return rr_events
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
"""Show results of calibration and validation."""

import os
from matplotlib import pyplot as plt
from matplotlib import dates, pyplot as plt
import numpy as np
import pandas as pd

from hydroutils import hydro_file, hydro_stat
from hydroutils import hydro_file, hydro_stat, hydro_plot


def plot_sim_and_obs(
Expand Down Expand Up @@ -54,6 +54,32 @@ def plot_train_iteration(likelihood, save_fig):
plt.close()


def plot_rr_events(rr_events, rain, flow, save_dir=None):
for i in range(len(rr_events)):
beginning_time = rr_events["BEGINNING_RAIN"].iloc[i]
end_time = rr_events["END_FLOW"].iloc[i] # Ensure this column exists

# Filter data for the specific time period
filtered_rain_data = rain.sel(time=slice(beginning_time, end_time))
filter_flow_data = flow.sel(time=slice(beginning_time, end_time))

# Plotting
hydro_plot.plot_rainfall_runoff(
filtered_rain_data.time.values,
filtered_rain_data.values,
[filter_flow_data.values],
title=f"Rainfall-Runoff Event {i}",
leg_lst=["Flow"],
xlabel="Time",
ylabel="Flow (mm/h)",
)
if save_dir:
if not os.path.exists(save_dir):
os.makedirs(save_dir)
save_fig = os.path.join(save_dir, f"rr_event_{i}.png")
plt.savefig(save_fig, bbox_inches="tight")


def show_events_result(
warmup_length,
save_dir,
Expand Down
2 changes: 1 addition & 1 deletion hydromodel/trainers/calibrate_ga.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from hydroutils import hydro_file, hydro_stat


from hydromodel.datasets.data_postprocess import plot_sim_and_obs, plot_train_iteration
from datasets.data_visualize import plot_sim_and_obs, plot_train_iteration
from hydromodel.models.model_config import read_model_param_dict
from hydromodel.models.model_dict import MODEL_DICT, rmse43darr

Expand Down
53 changes: 0 additions & 53 deletions hydromodel/trainers/train.py

This file was deleted.

94 changes: 94 additions & 0 deletions scripts/preprocess.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
"""
Author: Wenyu Ouyang
Date: 2024-03-25 09:21:56
LastEditTime: 2024-03-27 18:20:38
LastEditors: Wenyu Ouyang
Description: preprocess data in an exp before training
FilePath: \hydro-model-xaj\scripts\preprocess.py
Copyright (c) 2023-2024 Wenyu Ouyang. All rights reserved.
"""

from pathlib import Path
import sys
import os
import argparse

current_script_path = Path(os.path.realpath(__file__))
repo_path = current_script_path.parent.parent
sys.path.append(str(repo_path))
from hydromodel.datasets.data_visualize import plot_rr_events
from hydromodel.datasets.data_preprocess import (
get_basin_area,
get_ts_from_diffsource,
get_rr_events,
)


def main(args):
data_path = args.data_dir
data_type = args.data_type
basin_ids = args.basin_id
periods = args.period
exp = args.exp
where_save = Path(os.path.join(repo_path, "result", exp))
if os.path.exists(where_save) is False:
os.makedirs(where_save)
ts_data = get_ts_from_diffsource(data_type, data_path, periods, basin_ids)
basin_area = get_basin_area(data_type, data_path, basin_ids)
rr_events = get_rr_events(ts_data["prcp"], ts_data["flow"], basin_area)
for basin, event in rr_events.items():
basin_rr_dir = os.path.join(where_save, f"{basin}_rr_events")
plot_rr_events(
event,
ts_data["prcp"].sel(basin=basin),
ts_data["flow"].sel(basin=basin),
basin_rr_dir,
)


if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Prepare data.")
parser.add_argument(
"--data_type",
dest="data_type",
help="CAMELS dataset or your own data, such as 'camels' or 'owndata'",
# default="camels",
default="owndata",
type=str,
)
parser.add_argument(
"--data_dir",
dest="data_dir",
help="The directory of the CAMELS dataset or your own data, for CAMELS,"
+ " as we use SETTING to set the data path, you can directly choose camels_us;"
+ " for your own data, you should set the absolute path of your data directory",
# default="camels_us",
default="C:\\Users\\wenyu\\OneDrive\\data\\biliuhe",
type=str,
)
parser.add_argument(
"--exp",
dest="exp",
help="An exp is corresponding to one data setting",
# default="expcamels001",
default="expbiliuhe001",
type=str,
)
parser.add_argument(
"--basin_id",
dest="basin_id",
help="The basins' ids",
# default=["01439500", "06885500", "08104900", "09510200"],
default=["21401550"],
nargs="+",
)
parser.add_argument(
"--period",
dest="period",
help="The whole period",
# default=["2007-01-01", "2014-01-01"],
default=["2012-06-10 00:00", "2022-08-31 23:00"],
nargs="+",
)
args = parser.parse_args()
main(args)
6 changes: 3 additions & 3 deletions scripts/post_process.py → scripts/visualize.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
"""
Author: Wenyu Ouyang
Date: 2022-11-19 17:27:05
LastEditTime: 2024-03-27 16:27:03
LastEditTime: 2024-03-27 17:54:11
LastEditors: Wenyu Ouyang
Description: the script to postprocess results
FilePath: \hydro-model-xaj\scripts\post_process.py
FilePath: \hydro-model-xaj\scripts\visualize.py
Copyright (c) 2021-2022 Wenyu Ouyang. All rights reserved.
"""

Expand All @@ -15,7 +15,7 @@

repo_dir = os.path.dirname(Path(os.path.abspath(__file__)).parent)
sys.path.append(repo_dir)
from hydromodel.datasets.data_postprocess import plot_sim_and_obs
from hydromodel.datasets.data_visualize import plot_sim_and_obs
from hydromodel.trainers.evaluate import Evaluator, read_yaml_config


Expand Down
2 changes: 1 addition & 1 deletion test/test_data_postprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

from hydroutils import hydro_time

from hydromodel.datasets.data_postprocess import show_events_result, show_ts_result
from hydromodel.datasets.data_visualize import show_events_result, show_ts_result
from hydromodel.models.xaj import xaj
from hydromodel.trainers.calibrate_sceua import calibrate_by_sceua
from hydromodel.trainers.evaluate import _read_save_sceua_calibrated_params
Expand Down

0 comments on commit 7b8e430

Please sign in to comment.