diff --git a/examples/aus_test.py b/examples/aus_test.py deleted file mode 100644 index 7b51b42..0000000 --- a/examples/aus_test.py +++ /dev/null @@ -1,62 +0,0 @@ -import os -import numpy as np -import pandas as pd -from tqdm import tqdm -from hydrodataset import Camels - -directory = 'basin_flow' -if not os.path.exists(directory): - os.makedirs(directory) - -camels_aus_path = "/ftproot/camels/camels_aus/" -camels_aus_v2_path="/home/estelle/data/waterism/datasets-origin/camels/camels_aus_v2/" - -aus_region = "AUS" -aus_v2_region = "AUS_v2" -# ------------------------------ AUS -------------------------------- -camels_aus = Camels(camels_aus_path, download=False, region=aus_region) -camels_aus_v2=Camels(camels_aus_v2_path, download=False, region=aus_v2_region) -gage_ids = camels_aus.read_object_ids() - -# Return -> np.array: forcing data -hydro_info = camels_aus.read_relevant_cols( - gage_ids[:], - ["2015-01-01", "2022-02-15"], - ["et_morton_point_SILO", "precipitation_SILO", "et_morton_actual_SILO"] -) - -gages_to_nan = ['403213A', '224213A', '224214A', '227225A'] - -# 1 Megaliters Per Day = 0.011574074074074 Cubic Meters Per Second -# ML_to_m3_per_s = 0.011574074074074 - -t_info = pd.date_range(start="2015-01-01", end="2022-02-14", freq='D') -formatted_time = t_info.strftime('%Y-%m-%d %H:%M:%S') - -for i, gage_id in enumerate(gage_ids): - hydro_data = hydro_info[i] - - if gage_id in gages_to_nan: - streamflow_data_m3_per_s = np.nan * np.ones_like(hydro_data[:, 0]) - else: - # Return -> np.array: streamflow data, 3-dim [station, time, streamflow(ML/d)] - streamflow_info = camels_aus_v2.read_target_cols( - gage_ids[i:i+1], - ["2015-01-01", "2022-02-15"], - target_cols=["streamflow_MLd"], - ) - streamflow_data_m3_per_s = (streamflow_info[0,:,0]/35.314666721489) - - pet = hydro_data[:, 0] - prcp = hydro_data[:, 1] - flow = streamflow_data_m3_per_s - et = hydro_data[:, 2] - node1_flow = np.nan * np.ones_like(flow) # NA for node1_flow - merged_row = np.column_stack([formatted_time, pet, prcp, flow, et, node1_flow]) - # tiem pet(mm/day) prcp(mm/day) flow(m^3/s) et(mm/day) node1_flow(m^3/s) - columns = ["time", "pet(mm/day)", "prcp(mm/day)", "flow(m^3/s)", "et(mm/day)", "node1_flow(m^3/s)"] - df = pd.DataFrame(merged_row, columns=columns) - filename = f'basin_{gage_id}.csv' - file_path = os.path.join(directory, filename) - df.to_csv(file_path, index=False) - diff --git a/examples/aus_v2_test.py b/examples/aus_v2_test.py deleted file mode 100644 index 6b0f720..0000000 --- a/examples/aus_v2_test.py +++ /dev/null @@ -1,41 +0,0 @@ -import os -import numpy as np - -from hydrodataset import Camels - -camels_aus_v2_path="/home/estelle/data/waterism/datasets-origin/camels/camels_aus_v2/" - -aus_v2_region = "AUS_v2" - -# ---------------------------- AUS-V2 ------------------------------- -camels_aus_v2=Camels(camels_aus_v2_path, download=False, region=aus_v2_region) - -gage_ids = camels_aus_v2.read_object_ids() - -p_mean_info=camels_aus_v2.read_mean_prcp( - gage_ids[:5],unit="mm/h" -) -print(p_mean_info) - -attrs = camels_aus_v2.read_constant_cols( - gage_ids[:5], var_lst=["catchment_area", "geol_sec", "metamorph"] -) -print(attrs) -forcings = camels_aus_v2.read_relevant_cols( - gage_ids[:5], - ["1990-01-01", "2010-01-01"], - var_lst=["precipitation_AGCD", "et_morton_actual_SILO", "tmin_SILO"], -) -print(forcings.shape) -flows = camels_aus_v2.read_target_cols( - gage_ids[:5], - ["2015-01-01", "2022-01-01"], - target_cols=["streamflow_MLd", "streamflow_mmd"], -) -print(flows) -streamflow_types = camels_aus_v2.get_target_cols() -print(streamflow_types) -focing_types = camels_aus_v2.get_relevant_cols() -print(focing_types) -attr_types = camels_aus_v2.get_constant_cols() -print(attr_types) \ No newline at end of file diff --git a/examples/scripts.py b/examples/scripts.py index ba96649..72b17ba 100644 --- a/examples/scripts.py +++ b/examples/scripts.py @@ -1,7 +1,7 @@ """ Author: Wenyu Ouyang Date: 2022-09-06 23:42:46 -LastEditTime: 2024-09-14 13:27:17 +LastEditTime: 2024-11-11 16:44:31 LastEditors: Wenyu Ouyang Description: examples for using hydrodataset FilePath: \hydrodataset\examples\scripts.py @@ -18,7 +18,7 @@ camels_cl_path = os.path.join("camels", "camels_cl") camels_gb_path = os.path.join("camels", "camels_gb") camels_us_path = os.path.join("camels", "camels_us") -camels_aus_v2_path=os.path.join("camels","camels_aus_v2") +camels_aus_v2_path = os.path.join("camels", "camels_aus_v2") aus_v2_region = "AUS_v2" aus_region = "AUS" @@ -174,7 +174,7 @@ ) # # ---------------------------- AUS-V2 ------------------------------- -camels_aus_v2=Camels(camels_aus_v2_path, download=False, region=aus_v2_region) +camels_aus_v2 = Camels(camels_aus_v2_path, download=False, region=aus_v2_region) gage_ids = camels_aus_v2.read_object_ids() assert gage_ids.size == 561 attrs = camels_aus_v2.read_constant_cols( diff --git a/hydrodataset/camels.py b/hydrodataset/camels.py index 6cdd1c8..bc2d697 100644 --- a/hydrodataset/camels.py +++ b/hydrodataset/camels.py @@ -1,7 +1,7 @@ """ Author: Wenyu Ouyang Date: 2022-01-05 18:01:11 -LastEditTime: 2024-09-10 19:15:33 +LastEditTime: 2024-11-11 17:26:42 LastEditors: Wenyu Ouyang Description: Read Camels Series ("AUStralia", "BRazil", "ChiLe", "GreatBritain", "UnitedStates") datasets FilePath: \hydrodataset\hydrodataset\camels.py @@ -595,7 +595,9 @@ def get_relevant_cols(self) -> np.ndarray: if root == self.data_source_description["CAMELS_FORCING_DIR"]: continue forcing_types.extend( - file[:-4] for file in files if file not in ["ClimaticIndices.csv", "desktop.ini"] + file[:-4] + for file in files + if file not in ["ClimaticIndices.csv", "desktop.ini"] ) return np.array(forcing_types) elif self.region == "BR": @@ -1295,7 +1297,9 @@ def read_relevant_cols( chosen_data = forcing_data[gage_id_lst].values[ind1, :] x[:, ind2, k] = chosen_data.T elif self.region == "AUS_v2": - for k in tqdm(range(len(var_lst)), desc="Read forcing data of CAMELS-AUS-V2"): + for k in tqdm( + range(len(var_lst)), desc="Read forcing data of CAMELS-AUS-V2" + ): if "precipitation_" in var_lst[k]: forcing_dir = os.path.join( self.data_source_description["CAMELS_FORCING_DIR"], @@ -1559,13 +1563,37 @@ def read_area(self, gage_id_lst) -> np.ndarray: else: raise NotImplementedError(CAMELS_NO_DATASET_ERROR_LOG) - def read_mean_prcp(self, gage_id_lst, unit="mm/d") -> np.ndarray: + def read_mean_prcp(self, gage_id_lst, unit="mm/d") -> xr.Dataset: + """Read mean precipitation data + + Parameters + ---------- + gage_id_lst : list + station ids + unit : str, optional + the unit of mean_prcp, by default "mm/d" + + Returns + ------- + xr.Dataset + TODO: now only support CAMELS-US + + Raises + ------ + NotImplementedError + some regions are not supported + ValueError + unit must be one of ['mm/d', 'mm/day', 'mm/h', 'mm/hour', 'mm/3h', 'mm/3hour', 'mm/8d', 'mm/8day'] + """ if self.region in ["US", "AUS", "AUS_v2", "BR", "GB"]: if self.region == "US": data = self.read_attr_xrdataset(gage_id_lst, ["p_mean"]) - data = self.read_constant_cols( - gage_id_lst, ["p_mean"], is_return_dict=False, - ) + else: + data = self.read_constant_cols( + gage_id_lst, + ["p_mean"], + is_return_dict=False, + ) elif self.region == "CL": # there are different p_mean values for different forcings, here we chose p_mean_cr2met now data = self.read_constant_cols( @@ -1579,9 +1607,11 @@ def read_mean_prcp(self, gage_id_lst, unit="mm/d") -> np.ndarray: converted_data = data / 24 elif unit in ["mm/3h", "mm/3hour"]: converted_data = data / 8 + elif unit in ["mm/8d", "mm/8day"]: + converted_data = data * 8 else: raise ValueError( - "unit must be one of ['mm/d', 'mm/day', 'mm/h', 'mm/hour', 'mm/3h', 'mm/3hour']" + "unit must be one of ['mm/d', 'mm/day', 'mm/h', 'mm/hour', 'mm/3h', 'mm/3hour', 'mm/8d', 'mm/8day']" ) return converted_data @@ -1897,4 +1927,4 @@ def read_attr_xrdataset(self, gage_id_lst=None, var_lst=None, **kwargs): @property def streamflow_unit(self): - return "foot^3/s" \ No newline at end of file + return "foot^3/s" diff --git a/tests/test_camels.py b/tests/test_camels.py index 957a476..f1ccd40 100644 --- a/tests/test_camels.py +++ b/tests/test_camels.py @@ -1,7 +1,7 @@ """ Author: Wenyu Ouyang Date: 2022-09-05 23:20:24 -LastEditTime: 2024-11-04 20:15:11 +LastEditTime: 2024-11-11 17:29:13 LastEditors: Wenyu Ouyang Description: Tests for `hydrodataset` package FilePath: \hydrodataset\tests\test_camels.py @@ -16,6 +16,7 @@ from hydrodataset import Camels import numpy as np import pandas as pd +import xarray as xr from unittest.mock import patch, MagicMock @@ -76,6 +77,7 @@ def test_read_mean_prcp(): gage_ids = camels.read_object_ids() mean_prcp = camels.read_mean_prcp(gage_ids[:5]) print(mean_prcp) + assert isinstance(mean_prcp, xr.Dataset) def test_read_target_cols_us(): @@ -281,16 +283,6 @@ def test_read_camels_us_model_output_data_no_data(): assert np.all(np.isnan(result)) -def test_read_mean_prcp_us(): - camels = Camels() - camels.region = "US" - camels.read_attr_xrdataset = MagicMock(return_value=np.array([1.0, 2.0, 3.0])) - - gage_id_lst = ["01013500", "01013501", "01013502"] - result = camels.read_mean_prcp(gage_id_lst) - assert np.array_equal(result, np.array([1.0, 2.0, 3.0])) - - def test_read_mean_prcp_aus(): camels = Camels() camels.region = "AUS"