From c9e85cdcbdfa7a9f22ce3e79e30c0bec1da32141 Mon Sep 17 00:00:00 2001
From: B Steele <B.Steele@colostate.edu>
Date: Wed, 14 Aug 2024 15:13:09 -0600
Subject: [PATCH 1/3] a few small bug fixes after running only polycenter from
 polygon

---
 _targets.R                           |  6 +--
 config.yml                           |  4 +-
 data_acquisition/py/runGEEperTile.py | 61 ++++++++++++++++------------
 data_acquisition/src/add_metadata.R  | 26 +++++++-----
 data_acquisition/src/calc_center.R   |  4 ++
 data_acquisition/src/get_NHD.R       |  4 +-
 6 files changed, 61 insertions(+), 44 deletions(-)

diff --git a/_targets.R b/_targets.R
index 62ddade..4056618 100644
--- a/_targets.R
+++ b/_targets.R
@@ -74,7 +74,8 @@ list(
     name = locs,
     command = locs_save,
     read = read_csv(!!.x),
-    packages = "readr"
+    packages = "readr",
+    error = "null"
   ),
   
   # use location shapefile and configurations to get polygons from NHDPlusv2
@@ -182,8 +183,7 @@ list(
       make_collated_data_files
       add_metadata(yaml = yml,
                    file_prefix = yml$proj,
-                   version_identifier = yml$run_date,
-                   collation_identifier = "2024-08-01")
+                   version_identifier = yml$run_date)
     },
     packages = c("tidyverse", "feather")
   )
diff --git a/config.yml b/config.yml
index 3500ba6..103a2c4 100644
--- a/config.yml
+++ b/config.yml
@@ -9,7 +9,7 @@ local_settings:
 # information is stored - this path must end with a '/'
 - location_file: "" # name of the *.csv* file that contains the location information
 - unique_id: "" # this is the column that stores the unique identifier for each 
-# site, should not contain any special characters
+# site or polygon and should not contain any special characters
 - latitude: "" # this is the column that stores the latitude of the site, must 
 # be in decimal degrees
 - longitude: "" # this is the column that stores the longitude of the site, must 
@@ -43,7 +43,7 @@ temporal_settings:
 # is used, the date will be set to the current date
 
 spatial_settings: 
-- extent: "site" # options: "site", "polygon", "polycenter", "site+poly", 
+- extent: "site" # options: "site", "polygon", "polycenter", "site+polygon", 
 # "site+polygon+polycenter", "polygon+polycenter" - at this time lake and lake 
 # center can only be calculated for lakes in the US
 - site_buffer: 120 # buffer distance in meters around the site or poly center
diff --git a/data_acquisition/py/runGEEperTile.py b/data_acquisition/py/runGEEperTile.py
index 7f0e1e8..69718d3 100644
--- a/data_acquisition/py/runGEEperTile.py
+++ b/data_acquisition/py/runGEEperTile.py
@@ -1,6 +1,7 @@
 #import modules
 import ee
 import time
+import fiona
 from datetime import date, datetime
 import os 
 from pandas import read_csv
@@ -1080,7 +1081,7 @@ def ref_pull_89_DSWE3(image):
   Returns:
       summaries for band data within any given geometry area where the DSWE value is 3
   """
-# process image with the radsat mask
+  # process image with the radsat mask
   r = add_rad_mask(image).select('radsat')
   # process image with cfmask
   f = cf_mask(image).select('cfmask')
@@ -1092,7 +1093,7 @@ def ref_pull_89_DSWE3(image):
   h = calc_hill_shades(image, wrs.geometry()).select('hillShade')
   #calculate hillshadow
   hs = calc_hill_shadows(image, wrs.geometry()).select('hillShadow')
-
+  
   #apply dswe function
   d = DSWE(image).select('dswe')
   gt0 = (d.gt(0).rename('dswe_gt0')
@@ -1265,7 +1266,7 @@ def maximum_no_of_tasks(MaxNActive, waitingPeriod):
   locs_feature = csv_to_eeFeat(locations, yml['location_crs'][0])
 
 
-if 'poly' in extent:
+if 'polygon' in extent:
   #if polygon is in extent, check for shapefile
   shapefile = yml['polygon'][0]
   # if shapefile provided by user 
@@ -1287,13 +1288,19 @@ def maximum_no_of_tasks(MaxNActive, waitingPeriod):
   poly_feat = ee.FeatureCollection(features)
 
 
-if 'center' in extent:
+if 'polycenter' in extent:
   if yml['polygon'][0] == True:
     centers_csv = read_csv('data_acquisition/out/user_polygon_centers.csv')
+    centers_csv = (centers_csv.rename(columns={'poi_latitude': 'Latitude', 
+      'poi_longitude': 'Longitude',
+      'r_id': 'id'}))
     # load the shapefile into a Fiona object
-    centers = csv_to_eeFeat(centers_csv, yml['poly_crs'][0])
+    centers = csv_to_eeFeat(centers_csv, 'EPSG:4326')
   else: #otherwise use the NHDPlus file
     centers_csv = read_csv('data_acquisition/out/NHDPlus_polygon_centers.csv')
+    centers_csv = (centers_csv.rename(columns={'poi_latitude': 'Latitude', 
+      'poi_longitude': 'Longitude',
+      'r_id': 'id'}))
     centers = csv_to_eeFeat(centers_csv, 'EPSG:4326')
   # Create an ee.FeatureCollection from the ee.Features
   ee_centers = ee.FeatureCollection(centers)    
@@ -1388,16 +1395,17 @@ def maximum_no_of_tasks(MaxNActive, waitingPeriod):
     feat = (locs_feature
       .filterBounds(geo)
       .map(dp_buff))
-    if e == 'poly':
-      ## get the polygon stack ##
-      feat = (poly_feat
-        .filterBounds(geo))
-      if e == 'center':
-        ## get centers feature and buffer ##
-        feat = (ee_centers
-          .filterBounds(geo)
-          .map(dp_buff))
-      else: print('Extent not identified. Check configuration file.')
+  elif e == 'polygon':
+    ## get the polygon stack ##
+    feat = (poly_feat
+      .filterBounds(geo))
+  elif e == 'polycenter':
+    ## get centers feature and buffer ##
+    feat = (ee_centers
+      .filterBounds(geo)
+      .map(dp_buff))
+  else: 
+    print('Extent not identified. Check configuration file.')
   
   ## process 457 stack
   #snip the ls data by the geometry of the location points    
@@ -1541,16 +1549,17 @@ def maximum_no_of_tasks(MaxNActive, waitingPeriod):
     feat = (locs_feature
       .filterBounds(geo)
       .map(dp_buff))
-    if e == 'poly':
-      ## get the polygon stack ##
-      feat = (poly_feat
-        .filterBounds(geo))
-      if e == 'center':
-        ## get centers feature and buffer ##
-        feat = (ee_centers
-          .filterBounds(geo)
-          .map(dp_buff))
-      else: print('Extent not identified. Check configuration file.')
+  elif e == 'polygon':
+    ## get the polygon stack ##
+    feat = (poly_feat
+      .filterBounds(geo))
+  elif e == 'polycenter':
+    ## get centers feature and buffer ##
+    feat = (ee_centers
+      .filterBounds(geo)
+      .map(dp_buff))
+  else: 
+    print('Extent not identified. Check configuration file.')
   
   # snip the ls data by the geometry of the location points    
   locs_stack_ls89 = (ls89
@@ -1615,7 +1624,7 @@ def maximum_no_of_tasks(MaxNActive, waitingPeriod):
       locs_dataOut_89_D1a.start()
       print('Completed Landsat 8, 9 DSWE 1a stack acquisitions for ' + e + ' configuration at tile ' + str(tiles))
     else:
-      print("Not configured to acquire DSWE 1a stack for Landsat 8, 9 for ' + e + ' configuration")
+      print('Not configured to acquire DSWE 1a stack for Landsat 8, 9 for ' + e + ' configuration')
       print('Starting Landsat 8, 9 DSWE1 acquisition for ' + e + ' configuration at tile ' + str(tiles))
       locs_out_89_D1 = locs_stack_ls89.map(ref_pull_89_DSWE1).flatten()
       locs_out_89_D1 = locs_out_89_D1.filter(ee.Filter.notNull(['med_Blue']))
diff --git a/data_acquisition/src/add_metadata.R b/data_acquisition/src/add_metadata.R
index ba367f7..fd44307 100644
--- a/data_acquisition/src/add_metadata.R
+++ b/data_acquisition/src/add_metadata.R
@@ -8,16 +8,13 @@
 #' @param file_prefix specified string that matches the file group to collate
 #' @param version_identifier user-specified string to identify the RS pull these
 #' data are associated with
-#' @param collation_identifier user-specified string to identify the output of this
-#' target
 #' @returns silently creates collated .feather files from 'mid' folder and 
 #' dumps into 'data'
 #' 
 #' 
 add_metadata <- function(yaml,
                          file_prefix, 
-                         version_identifier,
-                         collation_identifier) {
+                         version_identifier) {
   
   files <- list.files(file.path("data_acquisition/mid/"),
                      pattern = file_prefix,
@@ -76,8 +73,13 @@ add_metadata <- function(yaml,
         rename(r_id = yaml$unique_id)%>% 
         mutate(r_id = as.character(r_id))
     } else if (e == "polycenter") {
-      spatial_info <- read_csv("data_acquisition/out/NHDPlus_polygon_centers.csv") %>% 
-        mutate(r_id = as.character(r_id))
+      if (yaml$polygon) { 
+        spatial_info <- read_csv("data_acquisition/out/user_polygon_withrowid.csv") %>% 
+          mutate(r_id = as.character(r_id))
+      } else {
+        spatial_info <- read_csv("data_acquisition/out/NHDPlus_polygon_centers.csv") %>% 
+          mutate(r_id = as.character(r_id))
+      }
     } else if (e == "polygon") {
       if (yaml$polygon) {
         spatial_info <- read_sf(file.path(yaml$poly_dir,
@@ -105,10 +107,12 @@ add_metadata <- function(yaml,
                                      parsed_sub <- unlist(parsed)[1:(str_len-1)]
                                      str_flatten(parsed_sub, collapse = '_')
                                      })
+    dswe_location <- str_locate(df$source[1], "DSWE") 
     df <- df %>% 
       select(-`system:index`) %>% 
       left_join(., metadata_light) %>% 
-      mutate(DSWE = str_split(source, "_")[[1]][7], .by = source) %>% 
+      mutate(DSWE = str_sub(source, dswe_location[1], dswe_location[2]+2)) %>% 
+      mutate(DSWE = str_remove(DSWE, "_")) %>%
       left_join(., spatial_info)
     
     # break out the DSWE 1 data
@@ -121,7 +125,7 @@ add_metadata <- function(yaml,
                                      "_collated_DSWE1_",
                                      ext,
                                      "_meta_v",
-                                     collation_identifier,
+                                     version_identifier,
                                      ".feather")))
     } 
     
@@ -135,7 +139,7 @@ add_metadata <- function(yaml,
                                      "_collated_DSWE1a_",
                                      ext, 
                                      "_meta_v",
-                                     collation_identifier,
+                                     version_identifier,
                                      ".feather")))
     }
     
@@ -149,7 +153,7 @@ add_metadata <- function(yaml,
                                      "_collated_DSWE3_",
                                      ext,
                                      "_meta_v",
-                                     collation_identifier,
+                                     version_identifier,
                                      ".feather")))
     }
   })
@@ -159,7 +163,7 @@ add_metadata <- function(yaml,
              pattern = file_prefix,
              full.names = TRUE) %>% 
     #but make sure they are the specified version
-    .[grepl(collation_identifier, .)] %>% 
+    .[grepl(version_identifier, .)] %>% 
     .[!grepl('filtered', .)]
   
 }
diff --git a/data_acquisition/src/calc_center.R b/data_acquisition/src/calc_center.R
index 202769a..b466075 100644
--- a/data_acquisition/src/calc_center.R
+++ b/data_acquisition/src/calc_center.R
@@ -35,6 +35,10 @@ calc_center <- function(poly, yaml) {
       # Xiao Yang's code in EE - Yang, Xiao. (2020). Deepest point calculation 
       # for any given polygon using Google Earth Engine JavaScript API 
       # (Version v1). Zenodo. https://doi.org/10.5281/zenodo.4136755
+      # this needs to be in WGS for best results
+      if (yaml$poly_crs != "EPSG:4326" & !is.na(yaml$poly_crs)) {
+        one_wbd <- st_transform(one_wbd, "EPSG:4326")
+      }
       coord_for_UTM <- one_wbd %>% st_coordinates()
       mean_x <- mean(coord_for_UTM[ ,1])
       mean_y <- mean(coord_for_UTM[ ,2])
diff --git a/data_acquisition/src/get_NHD.R b/data_acquisition/src/get_NHD.R
index b3d923f..f80f844 100644
--- a/data_acquisition/src/get_NHD.R
+++ b/data_acquisition/src/get_NHD.R
@@ -49,9 +49,9 @@ get_NHD <- function(locations, yaml) {
     } else { # otherwise read in specified file
       polygons <- read_sf(file.path(yaml$poly_dir[1], yaml$poly_file[1])) 
       polygons <- st_zm(polygons)#drop z or m if present
-      polygons <- st_make_valid(polygons)
+      polygons <- st_make_valid(polygons) %>% 
+        rename(r_id = yaml$unique_id)
       st_drop_geometry(polygons) %>% 
-        rename(r_id = yaml$unique_id) %>% 
         mutate(py_id = r_id - 1) %>% #subtract 1 so that it matches with Py output
         write_csv(., "data_acquisition/out/user_polygon_withrowid.csv")
       st_write(polygons, "data_acquisition/out/user_polygon.shp", append = F)

From 0f857b1bd903a0f8c486075d32ff8ce40d4fd7e1 Mon Sep 17 00:00:00 2001
From: B Steele <B.Steele@colostate.edu>
Date: Thu, 15 Aug 2024 10:14:42 -0600
Subject: [PATCH 2/3] a few more bug fixes

---
 data_acquisition/src/add_metadata.R           | 55 +++++++++++++------
 .../src/collate_csvs_from_drive.R             | 54 +++++++++---------
 2 files changed, 64 insertions(+), 45 deletions(-)

diff --git a/data_acquisition/src/add_metadata.R b/data_acquisition/src/add_metadata.R
index fd44307..902c623 100644
--- a/data_acquisition/src/add_metadata.R
+++ b/data_acquisition/src/add_metadata.R
@@ -17,8 +17,8 @@ add_metadata <- function(yaml,
                          version_identifier) {
   
   files <- list.files(file.path("data_acquisition/mid/"),
-                     pattern = file_prefix,
-                     full.names = TRUE) %>% 
+                      pattern = file_prefix,
+                      full.names = TRUE) %>% 
     # and grab the right version
     .[grepl(version_identifier, .)]
   
@@ -61,11 +61,17 @@ add_metadata <- function(yaml,
     } else {
       ext <- e
     }
-  
+    
     # get file using ext
     file <- files[grepl(ext, files)]
     # load file
-    df <- read_feather(file)
+    df <- read_feather(file) %>% 
+      mutate(mission = case_when(grepl("LT04", `system:index`) ~ "LANDSAT_4",
+                                 grepl("LT05", `system:index`) ~ "LANDSAT_5",
+                                 grepl("LE07", `system:index`) ~ "LANDSAT_7",
+                                 grepl("LC08", `system:index`) ~ "LANDSAT_8",
+                                 grepl("LC09", `system:index`) ~ "LANDSAT_9",
+                                 TRUE ~ NA_character_)) 
     
     if (e == "site") {
       spatial_info <- read_csv(file.path(yaml$data_dir,
@@ -91,27 +97,40 @@ add_metadata <- function(yaml,
           mutate(r_id = as.character(r_id))
       }
     }
+    
     # format system index for join - right now it has a rowid and the unique LS id
     # could also do this rowwise, but this method is a little faster
     df$r_id <- map_chr(.x = df$`system:index`, 
-                            function(.x) {
-                              parsed <- str_split(.x, '_')
-                              str_len <- length(unlist(parsed))
-                              unlist(parsed)[str_len]
-                            })
+                       function(.x) {
+                         parsed <- str_split(.x, '_')
+                         last(unlist(parsed))
+                       })
     df$system.index <- map_chr(.x = df$`system:index`, 
-                                   #function to grab the system index
-                                   function(.x) {
-                                     parsed <- str_split(.x, '_')
-                                     str_len <- length(unlist(parsed))
-                                     parsed_sub <- unlist(parsed)[1:(str_len-1)]
-                                     str_flatten(parsed_sub, collapse = '_')
-                                     })
-    dswe_location <- str_locate(df$source[1], "DSWE") 
+                               #function to grab the system index
+                               function(.x) {
+                                 parsed <- str_split(.x, '_')
+                                 str_len <- length(unlist(parsed))
+                                 parsed_sub <- unlist(parsed)[1:(str_len-1)]
+                                 str_flatten(parsed_sub, collapse = '_')
+                               })
+    
+    # dswe info is stored differently in each mission group because of character length
+    # so grab out mission-specific dswe info and use that to define dswe
+    mission_dswe <- df %>% 
+      group_by(mission) %>% 
+      slice(1) %>% 
+      ungroup()
+    dswe_loc <- as_tibble(str_locate(mission_dswe$source, "DSWE")) %>% 
+      rowid_to_column() %>% 
+      left_join(., mission_dswe %>% rowid_to_column()) %>% 
+      select(rowid, mission, start, end) %>% 
+      mutate(end = end + 2)
+    
     df <- df %>% 
       select(-`system:index`) %>% 
       left_join(., metadata_light) %>% 
-      mutate(DSWE = str_sub(source, dswe_location[1], dswe_location[2]+2)) %>% 
+      left_join(., dswe_loc) %>% 
+      mutate(DSWE = str_sub(source, start, end), .by = mission) %>% 
       mutate(DSWE = str_remove(DSWE, "_")) %>%
       left_join(., spatial_info)
     
diff --git a/data_acquisition/src/collate_csvs_from_drive.R b/data_acquisition/src/collate_csvs_from_drive.R
index a69f531..fdf0f86 100644
--- a/data_acquisition/src/collate_csvs_from_drive.R
+++ b/data_acquisition/src/collate_csvs_from_drive.R
@@ -15,11 +15,11 @@
 #' 
 #' 
 collate_csvs_from_drive <- function(file_prefix, version_identifier) {
-    # get the list of files in the `in` directory 
+  # get the list of files in the `in` directory 
   files <- list.files(file.path("data_acquisition/down/",
                                 version_identifier),
-                     pattern = file_prefix,
-                     full.names = TRUE) 
+                      pattern = file_prefix,
+                      full.names = TRUE) 
   
   # make sure directory exists, create it if not
   if(!dir.exists(file.path("data_acquisition/mid/"))) {
@@ -29,28 +29,28 @@ collate_csvs_from_drive <- function(file_prefix, version_identifier) {
   meta_files <- files[grepl("meta", files)]
   all_meta <- map_dfr(meta_files, read_csv) 
   write_feather(all_meta, file.path("data_acquisition/mid/",
-                                  paste0(file_prefix, "_collated_metadata_",
-                                         version_identifier, ".feather")))
+                                    paste0(file_prefix, "_collated_metadata_",
+                                           version_identifier, ".feather")))
   
   # if point data are present, subset those, collate, and save
   if (any(grepl("site", files))) {
     point_files <- files[grepl("site", files)]
     # collate files, but add the filename, since this *could be* is DSWE 1 + 3
     all_points <- map_dfr(.x = point_files, 
-                         .f = function(.x) {
-                           file_name = str_split(.x, '/')[[1]][5]
-                           df <- read_csv(.x) 
-                           # grab all column names except system:index
-                           df_names <- colnames(df)[2:length(colnames(df))]
-                           # and coerce them to numeric for joining later
-                           df %>% 
-                             mutate(across(all_of(df_names),
-                                           ~ as.numeric(.)))%>% 
-                             mutate(source = file_name)
-                           }) 
+                          .f = function(.x) {
+                            file_name = last(str_split(.x, '/')[[1]])
+                            df <- read_csv(.x) 
+                            # grab all column names except system:index
+                            df_names <- colnames(df)[2:length(colnames(df))]
+                            # and coerce them to numeric for joining later
+                            df %>% 
+                              mutate(across(all_of(df_names),
+                                            ~ as.numeric(.)))%>% 
+                              mutate(source = file_name)
+                          }) 
     write_feather(all_points, file.path("data_acquisition/mid/",
-                                    paste0(file_prefix, "_collated_points_",
-                                           version_identifier, ".feather")))
+                                        paste0(file_prefix, "_collated_points_",
+                                               version_identifier, ".feather")))
   }
   
   # if centers data are present, subset those, collate, and save
@@ -59,7 +59,7 @@ collate_csvs_from_drive <- function(file_prefix, version_identifier) {
     # collate files, but add the filename, since this *could be* is DSWE 1 + 3
     all_centers <- map_dfr(.x = center_files, 
                            .f = function(.x) {
-                             file_name = str_split(.x, '/')[[1]][5]
+                             file_name = last(str_split(.x, '/')[[1]])
                              df <- read_csv(.x) 
                              # grab all column names except system:index
                              df_names <- colnames(df)[2:length(colnames(df))]
@@ -70,8 +70,8 @@ collate_csvs_from_drive <- function(file_prefix, version_identifier) {
                                mutate(source = file_name)
                            }) 
     write_feather(all_centers, file.path("data_acquisition/mid/",
-                                    paste0(file_prefix, "_collated_centers_",
-                                           version_identifier, ".feather")))
+                                         paste0(file_prefix, "_collated_centers_",
+                                                version_identifier, ".feather")))
   }
   
   #if polygon data are present, subset those, collate, and save
@@ -80,7 +80,7 @@ collate_csvs_from_drive <- function(file_prefix, version_identifier) {
     # collate files, but add the filename, since this *could be* is DSWE 1 + 3
     all_polys <- map_dfr(.x = poly_files,
                          .f = function(.x) {
-                           file_name = str_split(.x, '/')[[1]][5]
+                           file_name = last(str_split(.x, '/')[[1]])
                            df <- read_csv(.x) 
                            # grab all column names except system:index
                            df_names <- colnames(df)[2:length(colnames(df))]
@@ -90,16 +90,16 @@ collate_csvs_from_drive <- function(file_prefix, version_identifier) {
                                            ~ as.numeric(.)))%>% 
                              mutate(source = file_name)
                          })
-
+    
     write_feather(all_polys, file.path("data_acquisition/mid/",
-                                  paste0(file_prefix, "_collated_polygons_",
-                                         version_identifier, ".feather")))
+                                       paste0(file_prefix, "_collated_polygons_",
+                                              version_identifier, ".feather")))
   }
   
   # return the list of files from this process
   list.files("data_acquisition/mid/",
-                         pattern = file_prefix,
-                         full.names = TRUE) %>% 
+             pattern = file_prefix,
+             full.names = TRUE) %>% 
     #but make sure they are the specified version
     .[grepl(version_identifier, .)]
 }
\ No newline at end of file

From 6fa146d28e7c50e26b40db6b7f6b9c5462bb03ba Mon Sep 17 00:00:00 2001
From: B Steele <32140074+steeleb@users.noreply.github.com>
Date: Thu, 15 Aug 2024 17:52:44 -0600
Subject: [PATCH 3/3] Update data_acquisition/src/add_metadata.R

Co-authored-by: Matt Brousil <37380883+mbrousil@users.noreply.github.com>
---
 data_acquisition/src/add_metadata.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/data_acquisition/src/add_metadata.R b/data_acquisition/src/add_metadata.R
index 902c623..3a82b4d 100644
--- a/data_acquisition/src/add_metadata.R
+++ b/data_acquisition/src/add_metadata.R
@@ -130,8 +130,8 @@ add_metadata <- function(yaml,
       select(-`system:index`) %>% 
       left_join(., metadata_light) %>% 
       left_join(., dswe_loc) %>% 
-      mutate(DSWE = str_sub(source, start, end), .by = mission) %>% 
-      mutate(DSWE = str_remove(DSWE, "_")) %>%
+      mutate(DSWE = str_sub(source, start, end), .by = mission,
+             DSWE = str_remove(DSWE, "_")) %>%
       left_join(., spatial_info)
     
     # break out the DSWE 1 data