Skip to content

Commit

Permalink
adding s3 paths to config
Browse files Browse the repository at this point in the history
  • Loading branch information
taddyb committed Dec 17, 2024
1 parent fa1e6e1 commit a3ce3c0
Show file tree
Hide file tree
Showing 5 changed files with 146 additions and 21 deletions.
14 changes: 7 additions & 7 deletions marquette/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,20 +31,20 @@ def main(cfg: DictConfig) -> None:
log.info(f"Creating MERIT {cfg.zone} River Graph")
edges = create_edges(cfg)

# log.info(f"Creating MERIT {cfg.zone} Connectivity Matrix (N) for gages")
# create_N(cfg, edges)
log.info(f"Creating MERIT {cfg.zone} Connectivity Matrix (N) for gages")
create_N(cfg, edges)

log.info(f"Mapping {cfg.zone} Streamflow to TMs")
create_TMs(cfg, edges)

log.info("Mapping Lake Pour Points to Edges")
map_lake_points(cfg, edges)
# log.info("Mapping Lake Pour Points to Edges")
# map_lake_points(cfg, edges)

log.info("Converting Streamflow to zarr")
write_streamflow(cfg, edges)

log.info("Running Data Post-Processing Extensions")
run_extensions(cfg, edges)
# log.info("Running Data Post-Processing Extensions")
# run_extensions(cfg, edges)

end = time.perf_counter()
log.info(f"Extracting data took : {(end - start):.6f} seconds")
Expand Down Expand Up @@ -142,4 +142,4 @@ def run_extensions(cfg: DictConfig, edges: zarr.Group) -> None:


if __name__ == "__main__":
main()
main() # type: ignore
29 changes: 15 additions & 14 deletions marquette/conf/config.yaml
Original file line number Diff line number Diff line change
@@ -1,37 +1,38 @@
name: MERIT
name: merit
s3_path: s3://mhpi-spatial/marquette/${name}/
data_path: /projects/mhpi/data/${name}
zone: 71
zone: 73
gpu: 6
create_edges:
buffer: 0.3334
dx: 2000
edges: ${data_path}/zarr/graph/CONUS/edges/
edges: ${s3_path}/edges/
flowlines: ${data_path}/raw/flowlines
create_N:
run_whole_zone: False
drainage_area_treshold: 0.1
filter_based_on_dataset: True
gage_buffered_flowline_intersections: ${data_path}/gage_information/gage_flowline_intersections/gnn_dataset_v1_2.shp
gage_coo_indices: ${data_path}/zarr/gage_coo_indices
pad_gage_id: False
obs_dataset: ${data_path}/gage_information/obs_csvs/GRDC_point_data.csv
obs_dataset_output: ${data_path}/gage_information/formatted_gage_csvs/gnn_formatted_basins.csv
zone_obs_dataset: ${data_path}/gage_information/formatted_gage_csvs/subzones.csv
gage_buffered_flowline_intersections: ${data_path}/gage_information/gage_flowline_intersections/gage_9322_intersection.shp
gage_coo_indices: ${s3_path}/gages/coo_pair_intersections
pad_gage_id: True
obs_dataset: ${data_path}/gage_information/obs_csvs/all_gages_info.csv
obs_dataset_output: ${data_path}/gage_information/formatted_gage_csvs/all_gages_info_combined.csv
zone_obs_dataset: ${data_path}/gage_information/formatted_gage_csvs/${zone}_all.csv
create_TMs:
MERIT:
save_sparse: True
TM: ${data_path}/zarr/TMs/sparse_MERIT_FLOWLINES_${zone}
TM: ${s3_path}/TMs/sparse_MERIT_FLOWLINES_${zone}
shp_files: ${data_path}/raw/basins/cat_pfaf_${zone}_MERIT_Hydro_v07_Basins_v01_bugfix1.shp
create_streamflow:
version: merit_conus_v6.18_snow
data_store: ${data_path}/streamflow/zarr/${create_streamflow.version}/${zone}
data_store: ${s3_path}/streamflow/${create_streamflow.version}/${zone}
obs_attributes: ${data_path}/gage_information/MERIT_basin_area_info
predictions: /projects/mhpi/yxs275/DM_output/water_loss_model/dPL_local_daymet_new_attr_RMSEloss_with_log_2800
start_date: 01-01-1980
end_date: 12-31-2020
map_lake_points:
lake_points: /projects/mhpi/data/hydroLakes/merit_intersected_data/RIV_lake_intersection_${zone}.shp
zarr: /projects/mhpi/data/hydroLakes/hydrolakes.zarr
# map_lake_points:
# lake_points: /projects/mhpi/data/hydroLakes/merit_intersected_data/RIV_lake_intersection_${zone}.shp
# zarr: /projects/mhpi/data/hydroLakes/hydrolakes.zarr
extensions:
- soils_data
- pet_forcing
Expand Down
63 changes: 63 additions & 0 deletions marquette/conf/saved_configs/v1.0config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
name: MERIT
data_path: /projects/mhpi/data/${name}
zone: 74
gpu: 6
create_edges:
buffer: 0.3334
dx: 2000
edges: ${data_path}/zarr/graph/CONUS/edges/
flowlines: ${data_path}/raw/flowlines
create_N:
run_whole_zone: False
drainage_area_treshold: 0.1
filter_based_on_dataset: True
gage_buffered_flowline_intersections: ${data_path}/gage_information/gage_flowline_intersections/gnn_dataset_v1_2.shp
gage_coo_indices: ${data_path}/zarr/gage_coo_indices
pad_gage_id: False
obs_dataset: ${data_path}/gage_information/obs_csvs/GRDC_point_data.csv
obs_dataset_output: ${data_path}/gage_information/formatted_gage_csvs/gnn_formatted_basins.csv
zone_obs_dataset: ${data_path}/gage_information/formatted_gage_csvs/subzones.csv
create_TMs:
MERIT:
save_sparse: True
TM: ${data_path}/zarr/TMs/sparse_MERIT_FLOWLINES_${zone}
shp_files: ${data_path}/raw/basins/cat_pfaf_${zone}_MERIT_Hydro_v07_Basins_v01_bugfix1.shp
create_streamflow:
version: merit_conus_v6.18_snow
data_store: ${data_path}/streamflow/zarr/${create_streamflow.version}/${zone}
obs_attributes: ${data_path}/gage_information/MERIT_basin_area_info
predictions: /projects/mhpi/yxs275/DM_output/water_loss_model/dPL_local_daymet_new_attr_RMSEloss_with_log_2800
start_date: 01-01-1980
end_date: 12-31-2020
map_lake_points:
lake_points: /projects/mhpi/data/hydroLakes/merit_intersected_data/RIV_lake_intersection_${zone}.shp
zarr: /projects/mhpi/data/hydroLakes/hydrolakes.zarr
extensions:
- soils_data
- pet_forcing
- global_dhbv_static_inputs
- incremental_drainage_area
- q_prime_sum
- upstream_basin_avg_mean_p
- q_prime_sum_stats
- lstm_stats
- temp_mean
# Hydra Config ------------------------------------------------------------------------#
hydra:
help:
app_name: marquette
header: == ${hydra.help.app_name} ==
template: |-
${hydra.help.header}
A data pipeline tool used to generate inputs to dMC river routing
By Tadd Bindas

${hydra.help.footer}
footer: |-
Powered by Hydra (https://hydra.cc)
Use --hydra-help to view Hydra specific help
job:
name: ${name}
run:
dir: ../runs/${hydra.job.name}_${zone}/${now:%Y-%m-%d_%H-%M-%S}
30 changes: 30 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,5 +1,35 @@
[project]
name = "marquette"
authors = [
{name = "Tadd Bindas", email = "[email protected]"},
]
dynamic = ["version", "dependencies"]

[tool.setuptools.dynamic]
dependencies = [
"icechunk==0.1.0a7",
"zarr==3.0.0b2",
"packaging==24.2",
"git+https://github.com/pydata/xarray",
"s3fs==2024.10.0",
]

[project.optional-dependencies]
test = [
"pytest==8.3.2",
]
jupyter = [
"contextily==1.6.0",
"matplotlib>=3.7.0,<3.8.0",
"ipykernel>=6.29.0,<7.0.0",
"jupyterlab>=3.6.7,<4.0.0",
"xarray>=2024.1.1",
"matplotlib-inline>=0.1.6"
]

[tool.ruff]
exclude = [
"./tests*",
"./scripts*",
"./notebooks*",
]
31 changes: 31 additions & 0 deletions scripts/icechunk_quickstart.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# import icechunk
import xarray as xr
import zarr
import s3fs

def main():
# storage_config = icechunk.StorageConfig.s3_from_env(
# bucket="mhpi-spatial",
# prefix="marquette/merit/quickstart",
# region="us-east-2",
# endpoint_url=None
# )
# store = icechunk.IcechunkStore.create(storage_config)
ds = xr.open_zarr("/projects/mhpi/data/MERIT/streamflow/zarr/merit_conus_v6.18_snow/74")

ds1 = ds.isel(time=slice(None, 18)) # part 1
ds1.to_zarr('s3://mhpi-spatial/marquette/merit/test1/', mode='w')
# storage_config = icechunk.StorageConfig.s3_from_env(
# bucket="mhpi-spatial",
# prefix="marquette/merit/quickstart",
# region="us-east-2",
# endpoint_url=None,
# )
# store = icechunk.IcechunkStore.create(storage_config)
# group = zarr.group(store)
# array = group.create("my_array", shape=10, dtype=int)
# array[:] = 1
# store.commit("first commit")

if __name__ == "__main__":
main()

0 comments on commit a3ce3c0

Please sign in to comment.