From a3ce3c0eced235bd64844794969c62f43e022e41 Mon Sep 17 00:00:00 2001 From: Tadd Bindas Date: Tue, 17 Dec 2024 09:45:07 -0600 Subject: [PATCH] adding s3 paths to config --- marquette/__main__.py | 14 ++--- marquette/conf/config.yaml | 29 ++++----- marquette/conf/saved_configs/v1.0config.yaml | 63 ++++++++++++++++++++ pyproject.toml | 30 ++++++++++ scripts/icechunk_quickstart.py | 31 ++++++++++ 5 files changed, 146 insertions(+), 21 deletions(-) create mode 100644 marquette/conf/saved_configs/v1.0config.yaml create mode 100644 scripts/icechunk_quickstart.py diff --git a/marquette/__main__.py b/marquette/__main__.py index a0ac19e..ef85161 100644 --- a/marquette/__main__.py +++ b/marquette/__main__.py @@ -31,20 +31,20 @@ def main(cfg: DictConfig) -> None: log.info(f"Creating MERIT {cfg.zone} River Graph") edges = create_edges(cfg) - # log.info(f"Creating MERIT {cfg.zone} Connectivity Matrix (N) for gages") - # create_N(cfg, edges) + log.info(f"Creating MERIT {cfg.zone} Connectivity Matrix (N) for gages") + create_N(cfg, edges) log.info(f"Mapping {cfg.zone} Streamflow to TMs") create_TMs(cfg, edges) - log.info("Mapping Lake Pour Points to Edges") - map_lake_points(cfg, edges) + # log.info("Mapping Lake Pour Points to Edges") + # map_lake_points(cfg, edges) log.info("Converting Streamflow to zarr") write_streamflow(cfg, edges) - log.info("Running Data Post-Processing Extensions") - run_extensions(cfg, edges) + # log.info("Running Data Post-Processing Extensions") + # run_extensions(cfg, edges) end = time.perf_counter() log.info(f"Extracting data took : {(end - start):.6f} seconds") @@ -142,4 +142,4 @@ def run_extensions(cfg: DictConfig, edges: zarr.Group) -> None: if __name__ == "__main__": - main() + main() # type: ignore diff --git a/marquette/conf/config.yaml b/marquette/conf/config.yaml index 9a15178..fa355d9 100644 --- a/marquette/conf/config.yaml +++ b/marquette/conf/config.yaml @@ -1,37 +1,38 @@ -name: MERIT +name: merit +s3_path: s3://mhpi-spatial/marquette/${name}/ data_path: /projects/mhpi/data/${name} -zone: 71 +zone: 73 gpu: 6 create_edges: buffer: 0.3334 dx: 2000 - edges: ${data_path}/zarr/graph/CONUS/edges/ + edges: ${s3_path}/edges/ flowlines: ${data_path}/raw/flowlines create_N: run_whole_zone: False drainage_area_treshold: 0.1 filter_based_on_dataset: True - gage_buffered_flowline_intersections: ${data_path}/gage_information/gage_flowline_intersections/gnn_dataset_v1_2.shp - gage_coo_indices: ${data_path}/zarr/gage_coo_indices - pad_gage_id: False - obs_dataset: ${data_path}/gage_information/obs_csvs/GRDC_point_data.csv - obs_dataset_output: ${data_path}/gage_information/formatted_gage_csvs/gnn_formatted_basins.csv - zone_obs_dataset: ${data_path}/gage_information/formatted_gage_csvs/subzones.csv + gage_buffered_flowline_intersections: ${data_path}/gage_information/gage_flowline_intersections/gage_9322_intersection.shp + gage_coo_indices: ${s3_path}/gages/coo_pair_intersections + pad_gage_id: True + obs_dataset: ${data_path}/gage_information/obs_csvs/all_gages_info.csv + obs_dataset_output: ${data_path}/gage_information/formatted_gage_csvs/all_gages_info_combined.csv + zone_obs_dataset: ${data_path}/gage_information/formatted_gage_csvs/${zone}_all.csv create_TMs: MERIT: save_sparse: True - TM: ${data_path}/zarr/TMs/sparse_MERIT_FLOWLINES_${zone} + TM: ${s3_path}/TMs/sparse_MERIT_FLOWLINES_${zone} shp_files: ${data_path}/raw/basins/cat_pfaf_${zone}_MERIT_Hydro_v07_Basins_v01_bugfix1.shp create_streamflow: version: merit_conus_v6.18_snow - data_store: ${data_path}/streamflow/zarr/${create_streamflow.version}/${zone} + data_store: ${s3_path}/streamflow/${create_streamflow.version}/${zone} obs_attributes: ${data_path}/gage_information/MERIT_basin_area_info predictions: /projects/mhpi/yxs275/DM_output/water_loss_model/dPL_local_daymet_new_attr_RMSEloss_with_log_2800 start_date: 01-01-1980 end_date: 12-31-2020 -map_lake_points: - lake_points: /projects/mhpi/data/hydroLakes/merit_intersected_data/RIV_lake_intersection_${zone}.shp - zarr: /projects/mhpi/data/hydroLakes/hydrolakes.zarr +# map_lake_points: +# lake_points: /projects/mhpi/data/hydroLakes/merit_intersected_data/RIV_lake_intersection_${zone}.shp +# zarr: /projects/mhpi/data/hydroLakes/hydrolakes.zarr extensions: - soils_data - pet_forcing diff --git a/marquette/conf/saved_configs/v1.0config.yaml b/marquette/conf/saved_configs/v1.0config.yaml new file mode 100644 index 0000000..26d85aa --- /dev/null +++ b/marquette/conf/saved_configs/v1.0config.yaml @@ -0,0 +1,63 @@ +name: MERIT +data_path: /projects/mhpi/data/${name} +zone: 74 +gpu: 6 +create_edges: + buffer: 0.3334 + dx: 2000 + edges: ${data_path}/zarr/graph/CONUS/edges/ + flowlines: ${data_path}/raw/flowlines +create_N: + run_whole_zone: False + drainage_area_treshold: 0.1 + filter_based_on_dataset: True + gage_buffered_flowline_intersections: ${data_path}/gage_information/gage_flowline_intersections/gnn_dataset_v1_2.shp + gage_coo_indices: ${data_path}/zarr/gage_coo_indices + pad_gage_id: False + obs_dataset: ${data_path}/gage_information/obs_csvs/GRDC_point_data.csv + obs_dataset_output: ${data_path}/gage_information/formatted_gage_csvs/gnn_formatted_basins.csv + zone_obs_dataset: ${data_path}/gage_information/formatted_gage_csvs/subzones.csv +create_TMs: + MERIT: + save_sparse: True + TM: ${data_path}/zarr/TMs/sparse_MERIT_FLOWLINES_${zone} + shp_files: ${data_path}/raw/basins/cat_pfaf_${zone}_MERIT_Hydro_v07_Basins_v01_bugfix1.shp +create_streamflow: + version: merit_conus_v6.18_snow + data_store: ${data_path}/streamflow/zarr/${create_streamflow.version}/${zone} + obs_attributes: ${data_path}/gage_information/MERIT_basin_area_info + predictions: /projects/mhpi/yxs275/DM_output/water_loss_model/dPL_local_daymet_new_attr_RMSEloss_with_log_2800 + start_date: 01-01-1980 + end_date: 12-31-2020 +map_lake_points: + lake_points: /projects/mhpi/data/hydroLakes/merit_intersected_data/RIV_lake_intersection_${zone}.shp + zarr: /projects/mhpi/data/hydroLakes/hydrolakes.zarr +extensions: + - soils_data + - pet_forcing + - global_dhbv_static_inputs + - incremental_drainage_area + - q_prime_sum + - upstream_basin_avg_mean_p + - q_prime_sum_stats + - lstm_stats + - temp_mean +# Hydra Config ------------------------------------------------------------------------# +hydra: + help: + app_name: marquette + header: == ${hydra.help.app_name} == + template: |- + ${hydra.help.header} + + A data pipeline tool used to generate inputs to dMC river routing + By Tadd Bindas + + ${hydra.help.footer} + footer: |- + Powered by Hydra (https://hydra.cc) + Use --hydra-help to view Hydra specific help + job: + name: ${name} + run: + dir: ../runs/${hydra.job.name}_${zone}/${now:%Y-%m-%d_%H-%M-%S} diff --git a/pyproject.toml b/pyproject.toml index 6309b55..55ef9a2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,5 +1,35 @@ +[project] +name = "marquette" +authors = [ + {name = "Tadd Bindas", email = "taddbindas@gmail.com"}, +] +dynamic = ["version", "dependencies"] + +[tool.setuptools.dynamic] +dependencies = [ + "icechunk==0.1.0a7", + "zarr==3.0.0b2", + "packaging==24.2", + "git+https://github.com/pydata/xarray", + "s3fs==2024.10.0", +] + +[project.optional-dependencies] +test = [ + "pytest==8.3.2", +] +jupyter = [ + "contextily==1.6.0", + "matplotlib>=3.7.0,<3.8.0", + "ipykernel>=6.29.0,<7.0.0", + "jupyterlab>=3.6.7,<4.0.0", + "xarray>=2024.1.1", + "matplotlib-inline>=0.1.6" +] + [tool.ruff] exclude = [ "./tests*", "./scripts*", + "./notebooks*", ] diff --git a/scripts/icechunk_quickstart.py b/scripts/icechunk_quickstart.py new file mode 100644 index 0000000..382fe33 --- /dev/null +++ b/scripts/icechunk_quickstart.py @@ -0,0 +1,31 @@ +# import icechunk +import xarray as xr +import zarr +import s3fs + +def main(): + # storage_config = icechunk.StorageConfig.s3_from_env( + # bucket="mhpi-spatial", + # prefix="marquette/merit/quickstart", + # region="us-east-2", + # endpoint_url=None + # ) + # store = icechunk.IcechunkStore.create(storage_config) + ds = xr.open_zarr("/projects/mhpi/data/MERIT/streamflow/zarr/merit_conus_v6.18_snow/74") + + ds1 = ds.isel(time=slice(None, 18)) # part 1 + ds1.to_zarr('s3://mhpi-spatial/marquette/merit/test1/', mode='w') + # storage_config = icechunk.StorageConfig.s3_from_env( + # bucket="mhpi-spatial", + # prefix="marquette/merit/quickstart", + # region="us-east-2", + # endpoint_url=None, + # ) + # store = icechunk.IcechunkStore.create(storage_config) + # group = zarr.group(store) + # array = group.create("my_array", shape=10, dtype=int) + # array[:] = 1 + # store.commit("first commit") + +if __name__ == "__main__": + main()