From c9e85cdcbdfa7a9f22ce3e79e30c0bec1da32141 Mon Sep 17 00:00:00 2001 From: B Steele Date: Wed, 14 Aug 2024 15:13:09 -0600 Subject: [PATCH 1/3] a few small bug fixes after running only polycenter from polygon --- _targets.R | 6 +-- config.yml | 4 +- data_acquisition/py/runGEEperTile.py | 61 ++++++++++++++++------------ data_acquisition/src/add_metadata.R | 26 +++++++----- data_acquisition/src/calc_center.R | 4 ++ data_acquisition/src/get_NHD.R | 4 +- 6 files changed, 61 insertions(+), 44 deletions(-) diff --git a/_targets.R b/_targets.R index 62ddade..4056618 100644 --- a/_targets.R +++ b/_targets.R @@ -74,7 +74,8 @@ list( name = locs, command = locs_save, read = read_csv(!!.x), - packages = "readr" + packages = "readr", + error = "null" ), # use location shapefile and configurations to get polygons from NHDPlusv2 @@ -182,8 +183,7 @@ list( make_collated_data_files add_metadata(yaml = yml, file_prefix = yml$proj, - version_identifier = yml$run_date, - collation_identifier = "2024-08-01") + version_identifier = yml$run_date) }, packages = c("tidyverse", "feather") ) diff --git a/config.yml b/config.yml index 3500ba6..103a2c4 100644 --- a/config.yml +++ b/config.yml @@ -9,7 +9,7 @@ local_settings: # information is stored - this path must end with a '/' - location_file: "" # name of the *.csv* file that contains the location information - unique_id: "" # this is the column that stores the unique identifier for each -# site, should not contain any special characters +# site or polygon and should not contain any special characters - latitude: "" # this is the column that stores the latitude of the site, must # be in decimal degrees - longitude: "" # this is the column that stores the longitude of the site, must @@ -43,7 +43,7 @@ temporal_settings: # is used, the date will be set to the current date spatial_settings: -- extent: "site" # options: "site", "polygon", "polycenter", "site+poly", +- extent: "site" # options: "site", "polygon", "polycenter", "site+polygon", # "site+polygon+polycenter", "polygon+polycenter" - at this time lake and lake # center can only be calculated for lakes in the US - site_buffer: 120 # buffer distance in meters around the site or poly center diff --git a/data_acquisition/py/runGEEperTile.py b/data_acquisition/py/runGEEperTile.py index 7f0e1e8..69718d3 100644 --- a/data_acquisition/py/runGEEperTile.py +++ b/data_acquisition/py/runGEEperTile.py @@ -1,6 +1,7 @@ #import modules import ee import time +import fiona from datetime import date, datetime import os from pandas import read_csv @@ -1080,7 +1081,7 @@ def ref_pull_89_DSWE3(image): Returns: summaries for band data within any given geometry area where the DSWE value is 3 """ -# process image with the radsat mask + # process image with the radsat mask r = add_rad_mask(image).select('radsat') # process image with cfmask f = cf_mask(image).select('cfmask') @@ -1092,7 +1093,7 @@ def ref_pull_89_DSWE3(image): h = calc_hill_shades(image, wrs.geometry()).select('hillShade') #calculate hillshadow hs = calc_hill_shadows(image, wrs.geometry()).select('hillShadow') - + #apply dswe function d = DSWE(image).select('dswe') gt0 = (d.gt(0).rename('dswe_gt0') @@ -1265,7 +1266,7 @@ def maximum_no_of_tasks(MaxNActive, waitingPeriod): locs_feature = csv_to_eeFeat(locations, yml['location_crs'][0]) -if 'poly' in extent: +if 'polygon' in extent: #if polygon is in extent, check for shapefile shapefile = yml['polygon'][0] # if shapefile provided by user @@ -1287,13 +1288,19 @@ def maximum_no_of_tasks(MaxNActive, waitingPeriod): poly_feat = ee.FeatureCollection(features) -if 'center' in extent: +if 'polycenter' in extent: if yml['polygon'][0] == True: centers_csv = read_csv('data_acquisition/out/user_polygon_centers.csv') + centers_csv = (centers_csv.rename(columns={'poi_latitude': 'Latitude', + 'poi_longitude': 'Longitude', + 'r_id': 'id'})) # load the shapefile into a Fiona object - centers = csv_to_eeFeat(centers_csv, yml['poly_crs'][0]) + centers = csv_to_eeFeat(centers_csv, 'EPSG:4326') else: #otherwise use the NHDPlus file centers_csv = read_csv('data_acquisition/out/NHDPlus_polygon_centers.csv') + centers_csv = (centers_csv.rename(columns={'poi_latitude': 'Latitude', + 'poi_longitude': 'Longitude', + 'r_id': 'id'})) centers = csv_to_eeFeat(centers_csv, 'EPSG:4326') # Create an ee.FeatureCollection from the ee.Features ee_centers = ee.FeatureCollection(centers) @@ -1388,16 +1395,17 @@ def maximum_no_of_tasks(MaxNActive, waitingPeriod): feat = (locs_feature .filterBounds(geo) .map(dp_buff)) - if e == 'poly': - ## get the polygon stack ## - feat = (poly_feat - .filterBounds(geo)) - if e == 'center': - ## get centers feature and buffer ## - feat = (ee_centers - .filterBounds(geo) - .map(dp_buff)) - else: print('Extent not identified. Check configuration file.') + elif e == 'polygon': + ## get the polygon stack ## + feat = (poly_feat + .filterBounds(geo)) + elif e == 'polycenter': + ## get centers feature and buffer ## + feat = (ee_centers + .filterBounds(geo) + .map(dp_buff)) + else: + print('Extent not identified. Check configuration file.') ## process 457 stack #snip the ls data by the geometry of the location points @@ -1541,16 +1549,17 @@ def maximum_no_of_tasks(MaxNActive, waitingPeriod): feat = (locs_feature .filterBounds(geo) .map(dp_buff)) - if e == 'poly': - ## get the polygon stack ## - feat = (poly_feat - .filterBounds(geo)) - if e == 'center': - ## get centers feature and buffer ## - feat = (ee_centers - .filterBounds(geo) - .map(dp_buff)) - else: print('Extent not identified. Check configuration file.') + elif e == 'polygon': + ## get the polygon stack ## + feat = (poly_feat + .filterBounds(geo)) + elif e == 'polycenter': + ## get centers feature and buffer ## + feat = (ee_centers + .filterBounds(geo) + .map(dp_buff)) + else: + print('Extent not identified. Check configuration file.') # snip the ls data by the geometry of the location points locs_stack_ls89 = (ls89 @@ -1615,7 +1624,7 @@ def maximum_no_of_tasks(MaxNActive, waitingPeriod): locs_dataOut_89_D1a.start() print('Completed Landsat 8, 9 DSWE 1a stack acquisitions for ' + e + ' configuration at tile ' + str(tiles)) else: - print("Not configured to acquire DSWE 1a stack for Landsat 8, 9 for ' + e + ' configuration") + print('Not configured to acquire DSWE 1a stack for Landsat 8, 9 for ' + e + ' configuration') print('Starting Landsat 8, 9 DSWE1 acquisition for ' + e + ' configuration at tile ' + str(tiles)) locs_out_89_D1 = locs_stack_ls89.map(ref_pull_89_DSWE1).flatten() locs_out_89_D1 = locs_out_89_D1.filter(ee.Filter.notNull(['med_Blue'])) diff --git a/data_acquisition/src/add_metadata.R b/data_acquisition/src/add_metadata.R index ba367f7..fd44307 100644 --- a/data_acquisition/src/add_metadata.R +++ b/data_acquisition/src/add_metadata.R @@ -8,16 +8,13 @@ #' @param file_prefix specified string that matches the file group to collate #' @param version_identifier user-specified string to identify the RS pull these #' data are associated with -#' @param collation_identifier user-specified string to identify the output of this -#' target #' @returns silently creates collated .feather files from 'mid' folder and #' dumps into 'data' #' #' add_metadata <- function(yaml, file_prefix, - version_identifier, - collation_identifier) { + version_identifier) { files <- list.files(file.path("data_acquisition/mid/"), pattern = file_prefix, @@ -76,8 +73,13 @@ add_metadata <- function(yaml, rename(r_id = yaml$unique_id)%>% mutate(r_id = as.character(r_id)) } else if (e == "polycenter") { - spatial_info <- read_csv("data_acquisition/out/NHDPlus_polygon_centers.csv") %>% - mutate(r_id = as.character(r_id)) + if (yaml$polygon) { + spatial_info <- read_csv("data_acquisition/out/user_polygon_withrowid.csv") %>% + mutate(r_id = as.character(r_id)) + } else { + spatial_info <- read_csv("data_acquisition/out/NHDPlus_polygon_centers.csv") %>% + mutate(r_id = as.character(r_id)) + } } else if (e == "polygon") { if (yaml$polygon) { spatial_info <- read_sf(file.path(yaml$poly_dir, @@ -105,10 +107,12 @@ add_metadata <- function(yaml, parsed_sub <- unlist(parsed)[1:(str_len-1)] str_flatten(parsed_sub, collapse = '_') }) + dswe_location <- str_locate(df$source[1], "DSWE") df <- df %>% select(-`system:index`) %>% left_join(., metadata_light) %>% - mutate(DSWE = str_split(source, "_")[[1]][7], .by = source) %>% + mutate(DSWE = str_sub(source, dswe_location[1], dswe_location[2]+2)) %>% + mutate(DSWE = str_remove(DSWE, "_")) %>% left_join(., spatial_info) # break out the DSWE 1 data @@ -121,7 +125,7 @@ add_metadata <- function(yaml, "_collated_DSWE1_", ext, "_meta_v", - collation_identifier, + version_identifier, ".feather"))) } @@ -135,7 +139,7 @@ add_metadata <- function(yaml, "_collated_DSWE1a_", ext, "_meta_v", - collation_identifier, + version_identifier, ".feather"))) } @@ -149,7 +153,7 @@ add_metadata <- function(yaml, "_collated_DSWE3_", ext, "_meta_v", - collation_identifier, + version_identifier, ".feather"))) } }) @@ -159,7 +163,7 @@ add_metadata <- function(yaml, pattern = file_prefix, full.names = TRUE) %>% #but make sure they are the specified version - .[grepl(collation_identifier, .)] %>% + .[grepl(version_identifier, .)] %>% .[!grepl('filtered', .)] } diff --git a/data_acquisition/src/calc_center.R b/data_acquisition/src/calc_center.R index 202769a..b466075 100644 --- a/data_acquisition/src/calc_center.R +++ b/data_acquisition/src/calc_center.R @@ -35,6 +35,10 @@ calc_center <- function(poly, yaml) { # Xiao Yang's code in EE - Yang, Xiao. (2020). Deepest point calculation # for any given polygon using Google Earth Engine JavaScript API # (Version v1). Zenodo. https://doi.org/10.5281/zenodo.4136755 + # this needs to be in WGS for best results + if (yaml$poly_crs != "EPSG:4326" & !is.na(yaml$poly_crs)) { + one_wbd <- st_transform(one_wbd, "EPSG:4326") + } coord_for_UTM <- one_wbd %>% st_coordinates() mean_x <- mean(coord_for_UTM[ ,1]) mean_y <- mean(coord_for_UTM[ ,2]) diff --git a/data_acquisition/src/get_NHD.R b/data_acquisition/src/get_NHD.R index b3d923f..f80f844 100644 --- a/data_acquisition/src/get_NHD.R +++ b/data_acquisition/src/get_NHD.R @@ -49,9 +49,9 @@ get_NHD <- function(locations, yaml) { } else { # otherwise read in specified file polygons <- read_sf(file.path(yaml$poly_dir[1], yaml$poly_file[1])) polygons <- st_zm(polygons)#drop z or m if present - polygons <- st_make_valid(polygons) + polygons <- st_make_valid(polygons) %>% + rename(r_id = yaml$unique_id) st_drop_geometry(polygons) %>% - rename(r_id = yaml$unique_id) %>% mutate(py_id = r_id - 1) %>% #subtract 1 so that it matches with Py output write_csv(., "data_acquisition/out/user_polygon_withrowid.csv") st_write(polygons, "data_acquisition/out/user_polygon.shp", append = F) From 0f857b1bd903a0f8c486075d32ff8ce40d4fd7e1 Mon Sep 17 00:00:00 2001 From: B Steele Date: Thu, 15 Aug 2024 10:14:42 -0600 Subject: [PATCH 2/3] a few more bug fixes --- data_acquisition/src/add_metadata.R | 55 +++++++++++++------ .../src/collate_csvs_from_drive.R | 54 +++++++++--------- 2 files changed, 64 insertions(+), 45 deletions(-) diff --git a/data_acquisition/src/add_metadata.R b/data_acquisition/src/add_metadata.R index fd44307..902c623 100644 --- a/data_acquisition/src/add_metadata.R +++ b/data_acquisition/src/add_metadata.R @@ -17,8 +17,8 @@ add_metadata <- function(yaml, version_identifier) { files <- list.files(file.path("data_acquisition/mid/"), - pattern = file_prefix, - full.names = TRUE) %>% + pattern = file_prefix, + full.names = TRUE) %>% # and grab the right version .[grepl(version_identifier, .)] @@ -61,11 +61,17 @@ add_metadata <- function(yaml, } else { ext <- e } - + # get file using ext file <- files[grepl(ext, files)] # load file - df <- read_feather(file) + df <- read_feather(file) %>% + mutate(mission = case_when(grepl("LT04", `system:index`) ~ "LANDSAT_4", + grepl("LT05", `system:index`) ~ "LANDSAT_5", + grepl("LE07", `system:index`) ~ "LANDSAT_7", + grepl("LC08", `system:index`) ~ "LANDSAT_8", + grepl("LC09", `system:index`) ~ "LANDSAT_9", + TRUE ~ NA_character_)) if (e == "site") { spatial_info <- read_csv(file.path(yaml$data_dir, @@ -91,27 +97,40 @@ add_metadata <- function(yaml, mutate(r_id = as.character(r_id)) } } + # format system index for join - right now it has a rowid and the unique LS id # could also do this rowwise, but this method is a little faster df$r_id <- map_chr(.x = df$`system:index`, - function(.x) { - parsed <- str_split(.x, '_') - str_len <- length(unlist(parsed)) - unlist(parsed)[str_len] - }) + function(.x) { + parsed <- str_split(.x, '_') + last(unlist(parsed)) + }) df$system.index <- map_chr(.x = df$`system:index`, - #function to grab the system index - function(.x) { - parsed <- str_split(.x, '_') - str_len <- length(unlist(parsed)) - parsed_sub <- unlist(parsed)[1:(str_len-1)] - str_flatten(parsed_sub, collapse = '_') - }) - dswe_location <- str_locate(df$source[1], "DSWE") + #function to grab the system index + function(.x) { + parsed <- str_split(.x, '_') + str_len <- length(unlist(parsed)) + parsed_sub <- unlist(parsed)[1:(str_len-1)] + str_flatten(parsed_sub, collapse = '_') + }) + + # dswe info is stored differently in each mission group because of character length + # so grab out mission-specific dswe info and use that to define dswe + mission_dswe <- df %>% + group_by(mission) %>% + slice(1) %>% + ungroup() + dswe_loc <- as_tibble(str_locate(mission_dswe$source, "DSWE")) %>% + rowid_to_column() %>% + left_join(., mission_dswe %>% rowid_to_column()) %>% + select(rowid, mission, start, end) %>% + mutate(end = end + 2) + df <- df %>% select(-`system:index`) %>% left_join(., metadata_light) %>% - mutate(DSWE = str_sub(source, dswe_location[1], dswe_location[2]+2)) %>% + left_join(., dswe_loc) %>% + mutate(DSWE = str_sub(source, start, end), .by = mission) %>% mutate(DSWE = str_remove(DSWE, "_")) %>% left_join(., spatial_info) diff --git a/data_acquisition/src/collate_csvs_from_drive.R b/data_acquisition/src/collate_csvs_from_drive.R index a69f531..fdf0f86 100644 --- a/data_acquisition/src/collate_csvs_from_drive.R +++ b/data_acquisition/src/collate_csvs_from_drive.R @@ -15,11 +15,11 @@ #' #' collate_csvs_from_drive <- function(file_prefix, version_identifier) { - # get the list of files in the `in` directory + # get the list of files in the `in` directory files <- list.files(file.path("data_acquisition/down/", version_identifier), - pattern = file_prefix, - full.names = TRUE) + pattern = file_prefix, + full.names = TRUE) # make sure directory exists, create it if not if(!dir.exists(file.path("data_acquisition/mid/"))) { @@ -29,28 +29,28 @@ collate_csvs_from_drive <- function(file_prefix, version_identifier) { meta_files <- files[grepl("meta", files)] all_meta <- map_dfr(meta_files, read_csv) write_feather(all_meta, file.path("data_acquisition/mid/", - paste0(file_prefix, "_collated_metadata_", - version_identifier, ".feather"))) + paste0(file_prefix, "_collated_metadata_", + version_identifier, ".feather"))) # if point data are present, subset those, collate, and save if (any(grepl("site", files))) { point_files <- files[grepl("site", files)] # collate files, but add the filename, since this *could be* is DSWE 1 + 3 all_points <- map_dfr(.x = point_files, - .f = function(.x) { - file_name = str_split(.x, '/')[[1]][5] - df <- read_csv(.x) - # grab all column names except system:index - df_names <- colnames(df)[2:length(colnames(df))] - # and coerce them to numeric for joining later - df %>% - mutate(across(all_of(df_names), - ~ as.numeric(.)))%>% - mutate(source = file_name) - }) + .f = function(.x) { + file_name = last(str_split(.x, '/')[[1]]) + df <- read_csv(.x) + # grab all column names except system:index + df_names <- colnames(df)[2:length(colnames(df))] + # and coerce them to numeric for joining later + df %>% + mutate(across(all_of(df_names), + ~ as.numeric(.)))%>% + mutate(source = file_name) + }) write_feather(all_points, file.path("data_acquisition/mid/", - paste0(file_prefix, "_collated_points_", - version_identifier, ".feather"))) + paste0(file_prefix, "_collated_points_", + version_identifier, ".feather"))) } # if centers data are present, subset those, collate, and save @@ -59,7 +59,7 @@ collate_csvs_from_drive <- function(file_prefix, version_identifier) { # collate files, but add the filename, since this *could be* is DSWE 1 + 3 all_centers <- map_dfr(.x = center_files, .f = function(.x) { - file_name = str_split(.x, '/')[[1]][5] + file_name = last(str_split(.x, '/')[[1]]) df <- read_csv(.x) # grab all column names except system:index df_names <- colnames(df)[2:length(colnames(df))] @@ -70,8 +70,8 @@ collate_csvs_from_drive <- function(file_prefix, version_identifier) { mutate(source = file_name) }) write_feather(all_centers, file.path("data_acquisition/mid/", - paste0(file_prefix, "_collated_centers_", - version_identifier, ".feather"))) + paste0(file_prefix, "_collated_centers_", + version_identifier, ".feather"))) } #if polygon data are present, subset those, collate, and save @@ -80,7 +80,7 @@ collate_csvs_from_drive <- function(file_prefix, version_identifier) { # collate files, but add the filename, since this *could be* is DSWE 1 + 3 all_polys <- map_dfr(.x = poly_files, .f = function(.x) { - file_name = str_split(.x, '/')[[1]][5] + file_name = last(str_split(.x, '/')[[1]]) df <- read_csv(.x) # grab all column names except system:index df_names <- colnames(df)[2:length(colnames(df))] @@ -90,16 +90,16 @@ collate_csvs_from_drive <- function(file_prefix, version_identifier) { ~ as.numeric(.)))%>% mutate(source = file_name) }) - + write_feather(all_polys, file.path("data_acquisition/mid/", - paste0(file_prefix, "_collated_polygons_", - version_identifier, ".feather"))) + paste0(file_prefix, "_collated_polygons_", + version_identifier, ".feather"))) } # return the list of files from this process list.files("data_acquisition/mid/", - pattern = file_prefix, - full.names = TRUE) %>% + pattern = file_prefix, + full.names = TRUE) %>% #but make sure they are the specified version .[grepl(version_identifier, .)] } \ No newline at end of file From 6fa146d28e7c50e26b40db6b7f6b9c5462bb03ba Mon Sep 17 00:00:00 2001 From: B Steele <32140074+steeleb@users.noreply.github.com> Date: Thu, 15 Aug 2024 17:52:44 -0600 Subject: [PATCH 3/3] Update data_acquisition/src/add_metadata.R Co-authored-by: Matt Brousil <37380883+mbrousil@users.noreply.github.com> --- data_acquisition/src/add_metadata.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/data_acquisition/src/add_metadata.R b/data_acquisition/src/add_metadata.R index 902c623..3a82b4d 100644 --- a/data_acquisition/src/add_metadata.R +++ b/data_acquisition/src/add_metadata.R @@ -130,8 +130,8 @@ add_metadata <- function(yaml, select(-`system:index`) %>% left_join(., metadata_light) %>% left_join(., dswe_loc) %>% - mutate(DSWE = str_sub(source, start, end), .by = mission) %>% - mutate(DSWE = str_remove(DSWE, "_")) %>% + mutate(DSWE = str_sub(source, start, end), .by = mission, + DSWE = str_remove(DSWE, "_")) %>% left_join(., spatial_info) # break out the DSWE 1 data