Added CAMELS_AUS_v2.0.0 data; Modify the calculation of the function …

…unit of read_target_cols() --only streamflow_MLd; Added unit selection to read_mean_prcp().
OuyangWenyu · Nov 11, 2024 · 528d0a5 · 528d0a5
1 parent fbbdbef
commit 528d0a5
Show file tree

Hide file tree

Showing 5 changed files with 286 additions and 14 deletions.
diff --git a/examples/aus_test.py b/examples/aus_test.py
@@ -0,0 +1,62 @@
+import os
+import numpy as np
+import pandas as pd
+from tqdm import tqdm
+from hydrodataset import Camels
+
+directory = 'basin_flow'
+if not os.path.exists(directory):
+    os.makedirs(directory)
+
+camels_aus_path = "/ftproot/camels/camels_aus/"
+camels_aus_v2_path="/home/estelle/data/waterism/datasets-origin/camels/camels_aus_v2/"
+
+aus_region = "AUS"
+aus_v2_region = "AUS_v2"
+# ------------------------------ AUS --------------------------------
+camels_aus = Camels(camels_aus_path, download=False, region=aus_region)
+camels_aus_v2=Camels(camels_aus_v2_path, download=False, region=aus_v2_region)
+gage_ids = camels_aus.read_object_ids()
+
+# Return -> np.array: forcing data
+hydro_info = camels_aus.read_relevant_cols(
+    gage_ids[:],
+    ["2015-01-01", "2022-02-15"],
+    ["et_morton_point_SILO", "precipitation_SILO", "et_morton_actual_SILO"]
+)
+
+gages_to_nan = ['403213A', '224213A', '224214A', '227225A']
+
+# 1 Megaliters Per Day = 0.011574074074074 Cubic Meters Per Second
+# ML_to_m3_per_s = 0.011574074074074
+
+t_info = pd.date_range(start="2015-01-01", end="2022-02-14", freq='D')
+formatted_time = t_info.strftime('%Y-%m-%d %H:%M:%S')
+
+for i, gage_id in enumerate(gage_ids):
+    hydro_data = hydro_info[i]
+
+    if gage_id in gages_to_nan:
+        streamflow_data_m3_per_s = np.nan * np.ones_like(hydro_data[:, 0])
+    else:
+        # Return -> np.array: streamflow data, 3-dim [station, time, streamflow(ML/d)]
+        streamflow_info = camels_aus_v2.read_target_cols(
+            gage_ids[i:i+1],
+            ["2015-01-01", "2022-02-15"],
+            target_cols=["streamflow_MLd"],
+        )
+        streamflow_data_m3_per_s = (streamflow_info[0,:,0]/35.314666721489)
+
+    pet = hydro_data[:, 0]
+    prcp = hydro_data[:, 1]
+    flow = streamflow_data_m3_per_s
+    et = hydro_data[:, 2]
+    node1_flow = np.nan * np.ones_like(flow)  # NA for node1_flow
+    merged_row = np.column_stack([formatted_time, pet, prcp, flow, et, node1_flow])
+    # tiem pet(mm/day) prcp(mm/day) flow(m^3/s) et(mm/day) node1_flow(m^3/s)
+    columns = ["time", "pet(mm/day)", "prcp(mm/day)", "flow(m^3/s)", "et(mm/day)", "node1_flow(m^3/s)"]
+    df = pd.DataFrame(merged_row, columns=columns)
+    filename = f'basin_{gage_id}.csv'
+    file_path = os.path.join(directory, filename)
+    df.to_csv(file_path, index=False)
+
diff --git a/examples/aus_v2_test.py b/examples/aus_v2_test.py
@@ -0,0 +1,41 @@
+import os
+import numpy as np
+
+from hydrodataset import Camels
+
+camels_aus_v2_path="/home/estelle/data/waterism/datasets-origin/camels/camels_aus_v2/"
+
+aus_v2_region = "AUS_v2"
+
+# ---------------------------- AUS-V2 -------------------------------
+camels_aus_v2=Camels(camels_aus_v2_path, download=False, region=aus_v2_region)
+
+gage_ids = camels_aus_v2.read_object_ids()
+
+p_mean_info=camels_aus_v2.read_mean_prcp(
+    gage_ids[:5],unit="mm/h"
+)
+print(p_mean_info)
+
+attrs = camels_aus_v2.read_constant_cols(
+    gage_ids[:5], var_lst=["catchment_area", "geol_sec", "metamorph"]
+)
+print(attrs)
+forcings = camels_aus_v2.read_relevant_cols(
+    gage_ids[:5],
+    ["1990-01-01", "2010-01-01"],
+    var_lst=["precipitation_AGCD", "et_morton_actual_SILO", "tmin_SILO"],
+)
+print(forcings.shape)
+flows = camels_aus_v2.read_target_cols(
+    gage_ids[:5],
+    ["2015-01-01", "2022-01-01"],
+    target_cols=["streamflow_MLd", "streamflow_mmd"],
+)
+print(flows)
+streamflow_types = camels_aus_v2.get_target_cols()
+print(streamflow_types)
+focing_types = camels_aus_v2.get_relevant_cols()
+print(focing_types)
+attr_types = camels_aus_v2.get_constant_cols()
+print(attr_types)
diff --git a/examples/scripts.py b/examples/scripts.py
@@ -18,7 +18,9 @@
 camels_cl_path = os.path.join("camels", "camels_cl")
 camels_gb_path = os.path.join("camels", "camels_gb")
 camels_us_path = os.path.join("camels", "camels_us")
+camels_aus_v2_path=os.path.join("camels","camels_aus_v2")
 
+aus_v2_region = "AUS_v2"
 aus_region = "AUS"
 br_region = "BR"
 cl_region = "CL"
@@ -171,6 +173,33 @@
     attr_types[:3], np.array(["station_name", "drainage_division", "river_region"])
 )
 
+# # ---------------------------- AUS-V2 -------------------------------
+camels_aus_v2=Camels(camels_aus_v2_path, download=False, region=aus_v2_region)
+gage_ids = camels_aus_v2.read_object_ids()
+assert gage_ids.size == 561
+attrs = camels_aus_v2.read_constant_cols(
+    gage_ids[:5], var_lst=["catchment_area", "geol_sec", "metamorph"]
+)
+print(attrs)
+forcings = camels_aus_v2.read_relevant_cols(
+    gage_ids[:5],
+    ["1990-01-01", "2010-01-01"],
+    var_lst=["precipitation_AGCD", "et_morton_actual_SILO", "tmin_SILO"],
+)
+print(forcings.shape)
+flows = camels_aus_v2.read_target_cols(
+    gage_ids[:5],
+    ["1990-01-01", "2010-01-01"],
+    target_cols=["streamflow_MLd", "streamflow_mmd"],
+)
+print(flows.shape)
+streamflow_types = camels_aus_v2.get_target_cols()
+print(streamflow_types)
+focing_types = camels_aus_v2.get_relevant_cols()
+print(focing_types)
+attr_types = camels_aus_v2.get_constant_cols()
+print(attr_types)
+
 # ------------------------------ BR --------------------------------
 camels_br = Camels(camels_br_path, download=False, region=br_region)
 gage_ids = camels_br.read_object_ids()

diff --git a/hydrodataset/__init__.py b/hydrodataset/__init__.py
@@ -76,7 +76,7 @@ def read_setting(setting_path):
 
 # set some constants for datasets
 DATASETS = ["CAMELS", "Caravan", "GRDC", "HYSETS", "LamaH", "MOPEX"]
-CAMELS_REGIONS = ["AUS", "BR", "CL", "GB", "US"]
+CAMELS_REGIONS = ["AUS", "BR", "CL", "GB", "US", "AUS_v2"]
 LAMAH_REGIONS = ["CE"]
 # For CANOPEX, We don't treat it as a dataset, but a special case for MOPEX. We only have CANOPEX now.
 MOPEX_REGIONS = ["CA"]