From c82b36ea8bf1de5df2367137f4a5ed3c36a8cad2 Mon Sep 17 00:00:00 2001 From: Glenda Yenni Date: Mon, 11 Sep 2023 13:51:40 -0600 Subject: [PATCH] spatial packages fu [patch] --- .github/workflows/main.yml | 7 +- DataCleaningScripts/download_eden.R | 136 ++++++++++++ DataCleaningScripts/eden_covariates.R | 295 ++++++++++++++++++++++++++ DataCleaningScripts/get_water_data.R | 26 +-- install-packages.R | 7 +- 5 files changed, 451 insertions(+), 20 deletions(-) create mode 100644 DataCleaningScripts/download_eden.R create mode 100644 DataCleaningScripts/eden_covariates.R diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 523d33af..0ae16dcc 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -49,9 +49,10 @@ jobs: - name: Install system dependencies run: | - sudo apt remove libudunits2-dev libgdal-dev libgeos-dev libproj-dev - sudo apt-get install libgit2-dev libcurl4-openssl-dev libudunits2-dev libgdal-dev libnetcdf-dev libgeos-dev libproj-dev - + sudo apt-get -y update && sudo apt-get install -y \ + libgit2-dev libicu-dev gdal-bin proj-data proj-bin libv8-dev libprotobuf-dev protobuf-compiler \ + libudunits2-dev libgdal-dev libgeos-dev libproj-dev libfontconfig1-dev libjq-dev libmysqlclient-dev libpng-dev + sudo apt-get update - name: Install packages run: Rscript install-packages.R diff --git a/DataCleaningScripts/download_eden.R b/DataCleaningScripts/download_eden.R new file mode 100644 index 00000000..9c0b91ec --- /dev/null +++ b/DataCleaningScripts/download_eden.R @@ -0,0 +1,136 @@ +# Functions to find and download EDEN water depth data +#' @name get_metadata +#' +#' @title Get EDEN metadata +#' +#' @export +#' + +get_metadata <- function() { + url <- "https://sflthredds.er.usgs.gov/thredds/catalog/eden/depths/catalog.html" + metadata <- url %>% + rvest::read_html() %>% + rvest::html_table() + metadata <- metadata[[1]] %>% + dplyr::filter(Dataset != "depths") %>% #Drop directory name from first row + dplyr::rename(dataset = Dataset, size = Size, last_modified = `Last Modified`) %>% + dplyr::mutate(last_modified = as.POSIXct(last_modified, + format = "%Y-%m-%dT%H:%M:%S"), + year = as.integer(substr(dataset, start = 1, stop = 4))) +} + +#' @name get_data_urls +#' +#' @title Get EDEN depths data URLs for download +#' +#' @param file_names file names to download from metadata +#' +#' @return list of file urls +#' +#' @export +#' + +get_data_urls <- function(file_names) { + base_url <- "https://sflthredds.er.usgs.gov/thredds/fileServer/eden/depths" + urls <- file.path(base_url, file_names) + return(list(file_names = file_names, urls = urls)) +} + +#' @name get_last_download +#' +#' @title Get list of EDEN depths data already downloaded +#' +#' @param eden_path path where the EDEN data should be stored +#' @param metadata EDEN file metadata +#' @param force_update if TRUE update all data files even if checks indicate +#' that remote files are unchanged since the current local copies were +#' created +#' +#' @return table of files already downloaded +#' +#' @export +#' +get_last_download <- function(eden_path = file.path("Water"), + metadata, force_update = FALSE) { + if ("last_download.csv" %in% list.files(eden_path) & !force_update) { + last_download <- read.csv(file.path(eden_path, "last_download.csv")) + } else { + last_download <- data.frame(dataset = metadata$dataset, size = "0 Mbytes", + last_modified = as.POSIXct("1900-01-01 00:00:01", + format = "%Y-%m-%d %H:%M:%S")) + } + return(last_download) +} + +#' @name get_files_to_update +#' +#' @title Determine list of new EDEN files to download +#' +#' @param eden_path path where the EDEN data should be stored +#' @param metadata EDEN file metadata +#' @param force_update if TRUE update all data files even if checks indicate +#' that remote files are unchanged since the current local copies were +#' created +#' +#' @export +#' +get_files_to_update <- function(eden_path = file.path("Water"), + metadata, force_update = FALSE){ + # Find files that have been updated since last download + last_download <- get_last_download(eden_path, metadata, force_update = force_update) + new <- metadata %>% + dplyr::left_join(last_download, by = "dataset", suffix = c(".curr", ".last")) %>% + dplyr::filter(last_modified.curr > last_modified.last | size.curr != size.last | is.na(last_modified.last)) + metadata %>% + dplyr::filter(dplyr::between(year, new$year-2, new$year+2)) + } + +#' @name update_last_download +#' +#' @title Write new metata file for files already downloaded +#' +#' @param eden_path path where the EDEN data should be stored +#' @param metadata EDEN file metadata +#' +#' @export +#' +update_last_download <- function(eden_path = file.path("Water"), + metadata){ + current_files <- list.files(eden_path, pattern = "*_depth.nc") + write.csv(metadata, file.path(eden_path, 'last_download.csv')) +} + +#' @name download_eden_depths +#' +#' @title Download the EDEN depths data +#' +#' @param eden_path path where the EDEN data should be stored +#' @param force_update if TRUE update all data files even if checks indicate +#' that remote files are unchanged since the current local copies were +#' created +#' +#' @return char vector of downloaded/updated files +#' +#' @export +#' +download_eden_depths <- function(eden_path = file.path("Water"), + force_update = FALSE) { + + if (!dir.exists(eden_path)) { + dir.create(eden_path, recursive = TRUE) + } + + metadata <- get_metadata() + to_update <- get_files_to_update(eden_path, metadata, + force_update = force_update) + data_urls <- get_data_urls(to_update$dataset) + options(timeout = 226) + + downloaded <- mapply(download.file, + data_urls$urls, + file.path(eden_path, data_urls$file_names)) + + update_last_download(eden_path, metadata) + + return(file.path(eden_path, data_urls$file_names)) +} diff --git a/DataCleaningScripts/eden_covariates.R b/DataCleaningScripts/eden_covariates.R new file mode 100644 index 00000000..861a4a45 --- /dev/null +++ b/DataCleaningScripts/eden_covariates.R @@ -0,0 +1,295 @@ +# Calculate standard water depth covariates from EDEN data following WADEM model + +#' @name load_boundaries +#' +#' @title Calculate dry days from everwader +#' +#' @param path path to regions shapefile +#' @param level region level to load (all, wcas, or subregions) +#' +#' @export +#' +load_boundaries <- function(path = file.path("SiteandMethods/regions"), + level = "subregions") { + level <- tolower(level) + boundaries <- sf::st_read(file.path(path,paste(level,".shp",sep = ""))) + return(boundaries) +} + +#' @name calc_dry_days +#' +#' @title Calculate dry days from everwader +#' +#' @param depth_data depth .nc files +#' +#' @export +#' +calc_dry_days <- function(depth_data) { + dry_days <- depth_data %>% + dplyr::mutate(dry_days = dplyr::case_when(depth <= units::set_units(0, cm) ~ + units::set_units(1, d), + depth > units::set_units(0, cm) ~ units::set_units(0, d), + is.na(depth) ~ units::set_units(NA, d)), + .keep = "none") %>% + stars::st_apply(c(1, 2), sum) + return(dry_days) +} + +#' @name calc_recession +#' +#' @title Calculate recession from everwader +#' +#' @param depth_data depth .nc files +#' +#' @export +#' +calc_recession <- function(depth_data) { + times <- stars::st_get_dimension_values(depth_data, 'time') + start_depth <- depth_data |> + dplyr::filter(time == min(times)) |> + stars::st_set_dimensions("time", values = NULL) + end_depth <- depth_data |> + dplyr::filter(time == max(times)) |> + stars::st_set_dimensions("time", values = NULL) + recession <- start_depth - end_depth + days <- as.integer(max(times) - min(times)) + recession_rate <- recession / days + return(recession_rate) +} + +#' @name calc_reversals +#' +#' @title Calculate reversals following Peterson 2017 +#' +#' @param depth_data depth .nc files +#' +#' @export +#' +calc_reversals <- function(depth_data) { + end_date_position <- stars::st_dimensions(depth_data)$time$to + depth_t <- depth_data[,,,2:end_date_position] |> + stars::st_set_dimensions("time", values = seq(1, end_date_position - 1)) + depth_t_minus_1 <- depth_data[,,,1:(end_date_position - 1)] |> + stars::st_set_dimensions("time", values = seq(1, end_date_position - 1)) + depth_deltas <- depth_t - depth_t_minus_1 + reversals <- depth_deltas %>% + dplyr::mutate(reversal = dplyr::case_when(depth > units::set_units(0, cm) ~ + units::set_units(1, d), + depth <= units::set_units(0, cm) ~ units::set_units(0, d), + is.na(depth) ~ units::set_units(NA, d)), + .keep = "none") %>% + stars::st_apply(c(1, 2), sum) +} + + +#' @name extract_region_means +#' +#' @title Calculate region means from raster data +#' +#' @param raster variable raster +#' @param regions regions polygons +#' +#' @return region means spdf +#' +#' @export +#' +extract_region_means <- function(raster, regions) { + var_name <- names(raster) + region_means <- raster::aggregate(raster, regions, mean, na.rm=TRUE) %>% + setNames(., "value") + region_means_spdf <- regions %>% + dplyr::mutate(variable = var_name, value = region_means$value) + return(region_means_spdf) +} + +#' @name available_years +#' +#' @title Get list of years available for covariate calculation +#' +#' @param eden_path path where the EDEN data should be stored +#' +#' @return vector of years +#' +#' @export +#' + +available_years <- function(eden_path = file.path("Water")) { + eden_data_files <- list.files(file.path(eden_path), pattern = '_depth.nc') + years <- eden_data_files %>% + stringr::str_split('_', simplify = TRUE) %>% + .[, 1] %>% + unique() + return(years) +} + +#' @name get_eden_covariates +#' +#' @title Generate annual scale water covariates using EDEN data +#' +#' @param level region level to load (all, wcas, or subregions) +#' @param eden_path path where the EDEN data should be stored +#' @param years numeric vector of years to generate covariates for, +#' defaults to all available years +#' @param boundaries_path name of a shape file holding the boundaries +#' within which to calculate covariates +#' +#' @return data.frame covariate data including columns for region, year, +#' covariate, value, and the geometry of the region +#' +#' @export +#' +get_eden_covariates <- function(level = "subregions", + eden_path = file.path("Water"), + years = available_years(eden_path), + boundaries_path = file.path("SiteandMethods/regions")) + { + + eden_data_files <- list.files(file.path(eden_path), pattern = '_depth.nc') + boundaries <- load_boundaries(boundaries_path,level) + examp_eden_file <- stars::read_stars(file.path(eden_path, eden_data_files[1])) + boundaries_utm <- sf::st_transform(boundaries, sf::st_crs(examp_eden_file)) + + covariates <- c() + for (year in years) { + print(paste("Processing ", year, "...", sep = "")) + pattern <- file.path(paste(year, "_.*_depth.nc", sep = '')) + nc_files <- list.files(eden_path, pattern, full.names = TRUE) + year_data <- stars::read_stars(nc_files, along = "time") %>% + setNames(., "depth") %>% + dplyr::mutate(depth = dplyr::case_when(depth < units::set_units(0, cm) ~ units::set_units(0, cm), + depth >= units::set_units(0, cm) ~ depth, + is.na(depth) ~ units::set_units(NA, cm))) + + breed_start <- as.POSIXct(paste0(year, '-01-01')) + breed_end <- as.POSIXct(paste0(year, '-06-30')) + breed_season_data <- year_data %>% + dplyr::filter(time >= breed_start, time <= breed_end) + + # Do a pre-breed/post-breed split to allow pre-breeding recession calculations + # following Peterson 2017. Peterson does this on a per species basis. To start + # just pick the mid-point for the different species to split on + pre_breed_end <- as.POSIXct(paste0(year, '-03-01')) + pre_breed_season_data <- year_data %>% + dplyr::filter(time >= breed_start, time <= pre_breed_end) + post_breed_season_data <- year_data %>% + dplyr::filter(time >= pre_breed_end, time <= breed_end) + + # Calculate depth_breed from everwader + breed_season_depth <- breed_season_data %>% + stars::st_apply(c(1, 2), mean) %>% + setNames(., "breed_season_depth") + init_depth <- breed_season_data[,,,1] %>% + setNames(., "init_depth") + + # Calculate recession from everwader + recession <- calc_recession(breed_season_data) %>% + setNames(., "recession") + pre_recession <- calc_recession(pre_breed_season_data) %>% + setNames(., "pre_recession") + post_recession <- calc_recession(post_breed_season_data) %>% + setNames(., "post_recession") + + # Calculate dryindex from everwader + # TODO: USGS code calculates this from t-3 3/31 to t 6/30 and converts + # the first value to NA + # To replicate we would need to load three years worth of data and be careful with + # other applications using min/max or replace this with a different predictor + # since it's unclear why the lag would be important here + # we could also fit the lag rather than building it into the feature + dry_days <- calc_dry_days(breed_season_data) %>% + setNames(., "dry_days") + + # Calculate reversals following Peterson 2017 + reversals <- calc_reversals(breed_season_data) %>% + setNames(., "reversals") + + predictors <- list(init_depth, breed_season_depth, recession, pre_recession, + post_recession, dry_days, reversals) + for (predictor in predictors) { + year_covariates <- extract_region_means(predictor, boundaries_utm) %>% + dplyr::mutate(year = year) + covariates <- rbind(covariates, year_covariates) + } + } + return(covariates) +} + +#' @name get_eden_depths +#' +#' @title Generate regional daily mean depth using EDEN data +#' +#' @param level region level to load (all, wcas, or subregions) +#' @param eden_path path where the EDEN data should be stored +#' @param years numeric vector of years to collect, defaults to all available years +#' @param boundaries_path name of a shape file holding the boundaries +#' within which to calculate depth +#' +#' @return data.frame covariate data including columns for region, date, +#' depth value, and the geometry of the region +#' +#' @export +#' +get_eden_depths <- function(level="subregions", + eden_path = file.path("Water"), + years = available_years(eden_path), + boundaries_path = file.path("SiteandMethods/regions")) + { + + eden_data_files <- list.files(file.path(eden_path), pattern = '_depth.nc', full.names = TRUE) + boundaries <- load_boundaries(boundaries_path,level) + examp_eden_file <- stars::read_stars(file.path(eden_data_files[1])) + boundaries_utm <- sf::st_transform(boundaries, sf::st_crs(examp_eden_file)) + + new_data <- c() + for (year in years) { + print(paste("Processing ", year, "...", sep = "")) + pattern <- file.path(paste(year, "_.*_depth.nc", sep = '')) + nc_files <- list.files(eden_path, pattern, full.names = TRUE) + year_data <- stars::read_stars(nc_files, along = "time") %>% + setNames(., "depth") # %>% + # dplyr::mutate(depth = dplyr::case_when(depth < units::set_units(0, cm) ~ units::set_units(0, cm), + # depth >= units::set_units(0, cm) ~ depth, + # is.na(depth) ~ units::set_units(NA, cm))) + + region_means <- raster::aggregate(year_data, boundaries_utm, mean, na.rm=TRUE) + region_sd <- raster::aggregate(year_data, boundaries_utm, sd, na.rm=TRUE) + region_max <- raster::aggregate(year_data, boundaries_utm, max, na.rm=TRUE) + region_min <- raster::aggregate(year_data, boundaries_utm, min, na.rm=TRUE) + + new_year <- reshape_star(region_means, variable="depth_mean", year=year, boundaries=boundaries_utm) %>% + merge(reshape_star(region_sd, variable="depth_sd", year=year, boundaries=boundaries_utm)) %>% + merge(reshape_star(region_max, variable="depth_max", year=year, boundaries=boundaries_utm)) %>% + merge(reshape_star(region_min, variable="depth_min", year=year, boundaries=boundaries_utm)) + + new_data <- rbind(new_data, new_year) + } + return(new_data) +} + +#' @name reshape_star +#' +#' @title Reshapes star object to dataframe +#' +#' @param data data to reshape +#' @param variable value to reshape on +#' @param year data year +#' @param boundaries boundaries object +#' +#' @return data.frame depth data including columns for region, date, depth type value +#' +#' @export +#' +reshape_star <- function(data, variable="depth", year, boundaries) { + + region_spdf <- boundaries_utm %>% dplyr::mutate(value = data$depth) + new_region <- as.data.frame(region_spdf$value) %>% + dplyr::mutate_all(as.double) + colnames(new_region) <- as.character(as.Date(stars::st_get_dimension_values(data, 'time'))) + new_data <- new_region %>% + dplyr::mutate(region = region_spdf$Name) %>% + tidyr::pivot_longer(starts_with(as.character(year)), + names_to = "date", values_to = "value") %>% + dplyr::select(date, region, value) %>% + dplyr::rename(!!variable := value) + return(new_data) +} diff --git a/DataCleaningScripts/get_water_data.R b/DataCleaningScripts/get_water_data.R index 6cef7f5c..b467cbfb 100644 --- a/DataCleaningScripts/get_water_data.R +++ b/DataCleaningScripts/get_water_data.R @@ -2,17 +2,19 @@ #' `%>%` <- magrittr::`%>%` +source("DataCleaningScripts/download_eden.R") +source("DataCleaningScripts/eden_covariates.R") # Downloads new EDEN depth data, calculates covariates, appends to covariate file get_eden_data <- function() { -wader::download_eden_depths() +download_eden_depths() -covariate_data <- read.table("../Water/eden_covariates.csv", header = TRUE, sep = ",") -new_data <- wader::get_eden_covariates() -new_data2 <- wader::get_eden_covariates(level="all") -new_data3 <- wader::get_eden_covariates(level="wcas") +covariate_data <- read.table("Water/eden_covariates.csv", header = TRUE, sep = ",") +new_data <- get_eden_covariates() +new_data2 <- get_eden_covariates(level="all") +new_data3 <- get_eden_covariates(level="wcas") all_data <- dplyr::bind_rows(new_data,new_data2,new_data3) %>% dplyr::select(year, region=Name, variable, value) %>% as.data.frame() %>% @@ -23,17 +25,17 @@ new_covariates <- all_data %>% merge(dplyr::filter(covariate_data, !date %in% all_data$year)) %>% dplyr::arrange("year", "region") -depth_data <- read.table("../Water/eden_depth.csv", header = TRUE, sep = ",") %>% +depth_data <- read.table("Water/eden_depth.csv", header = TRUE, sep = ",") %>% dplyr::mutate(date=as.Date(date)) -new_depths <- wader::get_eden_depths() %>% - dplyr::bind_rows(wader::get_eden_depths(level="all")) %>% - dplyr::bind_rows(wader::get_eden_depths(level="wcas")) %>% +new_depths <- get_eden_depths() %>% + dplyr::bind_rows(get_eden_depths(level="all")) %>% + dplyr::bind_rows(get_eden_depths(level="wcas")) %>% dplyr::mutate(date=as.Date(date)) new_depths <- new_depths %>% merge(dplyr::filter(depth_data, !date %in% new_depths$date)) %>% dplyr::arrange("date", "region") -file.remove(dir(path=file.path(get_default_data_path(), 'EvergladesWadingBird/Water'), pattern="_.*_depth.nc")) +file.remove(dir(path=file.path('Water'), pattern="_.*_depth.nc")) return(list(new_covariates=new_covariates, new_depths=new_depths)) } @@ -46,10 +48,10 @@ update_water <- function() { data <- get_eden_data() - write.table(data$new_covariates, "../Water/eden_covariates.csv", row.names = FALSE, col.names = TRUE, + write.table(data$new_covariates, "Water/eden_covariates.csv", row.names = FALSE, col.names = TRUE, na="", sep = ",", quote = FALSE) - write.table(data$new_depths, file = "../Water/eden_depth.csv", + write.table(data$new_depths, file = "Water/eden_depth.csv", row.names = FALSE, col.names = TRUE, na = "", sep = ",", quote = FALSE) } diff --git a/install-packages.R b/install-packages.R index d2ae6b58..4a7e0d2f 100644 --- a/install-packages.R +++ b/install-packages.R @@ -2,8 +2,5 @@ if ("pacman" %in% rownames(installed.packages()) == FALSE) install.packages("pacman") # Install packages required for analysis - -pacman::p_load(git2r, httr, semver, sf, testthat, yaml, - dplyr, lubridate, remotes) - -remotes::install_github("weecology/wader") +pacman::p_load(git2r, httr, semver, testthat, yaml, + dplyr, lubridate, sf) \ No newline at end of file