Skip to content

Commit

Permalink
Added CAMELS_AUS_v2.0.0 data; Modify the calculation of the function …
Browse files Browse the repository at this point in the history
…unit of read_target_cols() --only streamflow_MLd; Added unit selection to read_mean_prcp().
  • Loading branch information
EstelleMing committed Nov 11, 2024
1 parent fbbdbef commit 528d0a5
Show file tree
Hide file tree
Showing 5 changed files with 286 additions and 14 deletions.
62 changes: 62 additions & 0 deletions examples/aus_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
import os
import numpy as np
import pandas as pd
from tqdm import tqdm
from hydrodataset import Camels

directory = 'basin_flow'
if not os.path.exists(directory):
os.makedirs(directory)

camels_aus_path = "/ftproot/camels/camels_aus/"
camels_aus_v2_path="/home/estelle/data/waterism/datasets-origin/camels/camels_aus_v2/"

aus_region = "AUS"
aus_v2_region = "AUS_v2"
# ------------------------------ AUS --------------------------------
camels_aus = Camels(camels_aus_path, download=False, region=aus_region)
camels_aus_v2=Camels(camels_aus_v2_path, download=False, region=aus_v2_region)
gage_ids = camels_aus.read_object_ids()

# Return -> np.array: forcing data
hydro_info = camels_aus.read_relevant_cols(
gage_ids[:],
["2015-01-01", "2022-02-15"],
["et_morton_point_SILO", "precipitation_SILO", "et_morton_actual_SILO"]
)

gages_to_nan = ['403213A', '224213A', '224214A', '227225A']

# 1 Megaliters Per Day = 0.011574074074074 Cubic Meters Per Second
# ML_to_m3_per_s = 0.011574074074074

t_info = pd.date_range(start="2015-01-01", end="2022-02-14", freq='D')
formatted_time = t_info.strftime('%Y-%m-%d %H:%M:%S')

for i, gage_id in enumerate(gage_ids):
hydro_data = hydro_info[i]

if gage_id in gages_to_nan:
streamflow_data_m3_per_s = np.nan * np.ones_like(hydro_data[:, 0])
else:
# Return -> np.array: streamflow data, 3-dim [station, time, streamflow(ML/d)]
streamflow_info = camels_aus_v2.read_target_cols(
gage_ids[i:i+1],
["2015-01-01", "2022-02-15"],
target_cols=["streamflow_MLd"],
)
streamflow_data_m3_per_s = (streamflow_info[0,:,0]/35.314666721489)

pet = hydro_data[:, 0]
prcp = hydro_data[:, 1]
flow = streamflow_data_m3_per_s
et = hydro_data[:, 2]
node1_flow = np.nan * np.ones_like(flow) # NA for node1_flow
merged_row = np.column_stack([formatted_time, pet, prcp, flow, et, node1_flow])
# tiem pet(mm/day) prcp(mm/day) flow(m^3/s) et(mm/day) node1_flow(m^3/s)
columns = ["time", "pet(mm/day)", "prcp(mm/day)", "flow(m^3/s)", "et(mm/day)", "node1_flow(m^3/s)"]
df = pd.DataFrame(merged_row, columns=columns)
filename = f'basin_{gage_id}.csv'
file_path = os.path.join(directory, filename)
df.to_csv(file_path, index=False)

41 changes: 41 additions & 0 deletions examples/aus_v2_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import os
import numpy as np

from hydrodataset import Camels

camels_aus_v2_path="/home/estelle/data/waterism/datasets-origin/camels/camels_aus_v2/"

aus_v2_region = "AUS_v2"

# ---------------------------- AUS-V2 -------------------------------
camels_aus_v2=Camels(camels_aus_v2_path, download=False, region=aus_v2_region)

gage_ids = camels_aus_v2.read_object_ids()

p_mean_info=camels_aus_v2.read_mean_prcp(
gage_ids[:5],unit="mm/h"
)
print(p_mean_info)

attrs = camels_aus_v2.read_constant_cols(
gage_ids[:5], var_lst=["catchment_area", "geol_sec", "metamorph"]
)
print(attrs)
forcings = camels_aus_v2.read_relevant_cols(
gage_ids[:5],
["1990-01-01", "2010-01-01"],
var_lst=["precipitation_AGCD", "et_morton_actual_SILO", "tmin_SILO"],
)
print(forcings.shape)
flows = camels_aus_v2.read_target_cols(
gage_ids[:5],
["2015-01-01", "2022-01-01"],
target_cols=["streamflow_MLd", "streamflow_mmd"],
)
print(flows)
streamflow_types = camels_aus_v2.get_target_cols()
print(streamflow_types)
focing_types = camels_aus_v2.get_relevant_cols()
print(focing_types)
attr_types = camels_aus_v2.get_constant_cols()
print(attr_types)
29 changes: 29 additions & 0 deletions examples/scripts.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,9 @@
camels_cl_path = os.path.join("camels", "camels_cl")
camels_gb_path = os.path.join("camels", "camels_gb")
camels_us_path = os.path.join("camels", "camels_us")
camels_aus_v2_path=os.path.join("camels","camels_aus_v2")

aus_v2_region = "AUS_v2"
aus_region = "AUS"
br_region = "BR"
cl_region = "CL"
Expand Down Expand Up @@ -171,6 +173,33 @@
attr_types[:3], np.array(["station_name", "drainage_division", "river_region"])
)

# # ---------------------------- AUS-V2 -------------------------------
camels_aus_v2=Camels(camels_aus_v2_path, download=False, region=aus_v2_region)
gage_ids = camels_aus_v2.read_object_ids()
assert gage_ids.size == 561
attrs = camels_aus_v2.read_constant_cols(
gage_ids[:5], var_lst=["catchment_area", "geol_sec", "metamorph"]
)
print(attrs)
forcings = camels_aus_v2.read_relevant_cols(
gage_ids[:5],
["1990-01-01", "2010-01-01"],
var_lst=["precipitation_AGCD", "et_morton_actual_SILO", "tmin_SILO"],
)
print(forcings.shape)
flows = camels_aus_v2.read_target_cols(
gage_ids[:5],
["1990-01-01", "2010-01-01"],
target_cols=["streamflow_MLd", "streamflow_mmd"],
)
print(flows.shape)
streamflow_types = camels_aus_v2.get_target_cols()
print(streamflow_types)
focing_types = camels_aus_v2.get_relevant_cols()
print(focing_types)
attr_types = camels_aus_v2.get_constant_cols()
print(attr_types)

# ------------------------------ BR --------------------------------
camels_br = Camels(camels_br_path, download=False, region=br_region)
gage_ids = camels_br.read_object_ids()
Expand Down
2 changes: 1 addition & 1 deletion hydrodataset/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def read_setting(setting_path):

# set some constants for datasets
DATASETS = ["CAMELS", "Caravan", "GRDC", "HYSETS", "LamaH", "MOPEX"]
CAMELS_REGIONS = ["AUS", "BR", "CL", "GB", "US"]
CAMELS_REGIONS = ["AUS", "BR", "CL", "GB", "US", "AUS_v2"]
LAMAH_REGIONS = ["CE"]
# For CANOPEX, We don't treat it as a dataset, but a special case for MOPEX. We only have CANOPEX now.
MOPEX_REGIONS = ["CA"]
Expand Down
Loading

0 comments on commit 528d0a5

Please sign in to comment.