From c82b36ea8bf1de5df2367137f4a5ed3c36a8cad2 Mon Sep 17 00:00:00 2001
From: Glenda Yenni <glenda@weecology.org>
Date: Mon, 11 Sep 2023 13:51:40 -0600
Subject: [PATCH] spatial packages fu [patch]

---
 .github/workflows/main.yml            |   7 +-
 DataCleaningScripts/download_eden.R   | 136 ++++++++++++
 DataCleaningScripts/eden_covariates.R | 295 ++++++++++++++++++++++++++
 DataCleaningScripts/get_water_data.R  |  26 +--
 install-packages.R                    |   7 +-
 5 files changed, 451 insertions(+), 20 deletions(-)
 create mode 100644 DataCleaningScripts/download_eden.R
 create mode 100644 DataCleaningScripts/eden_covariates.R

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 523d33af..0ae16dcc 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -49,9 +49,10 @@ jobs:
 
       - name: Install system dependencies
         run: |
-             sudo apt remove libudunits2-dev libgdal-dev libgeos-dev libproj-dev
-             sudo apt-get install libgit2-dev libcurl4-openssl-dev libudunits2-dev libgdal-dev libnetcdf-dev libgeos-dev libproj-dev
-
+          sudo apt-get -y update && sudo apt-get install -y \
+            libgit2-dev libicu-dev gdal-bin proj-data proj-bin libv8-dev libprotobuf-dev protobuf-compiler \
+            libudunits2-dev libgdal-dev libgeos-dev libproj-dev libfontconfig1-dev libjq-dev libmysqlclient-dev libpng-dev
+          sudo apt-get update
       - name: Install packages
         run: Rscript install-packages.R
 
diff --git a/DataCleaningScripts/download_eden.R b/DataCleaningScripts/download_eden.R
new file mode 100644
index 00000000..9c0b91ec
--- /dev/null
+++ b/DataCleaningScripts/download_eden.R
@@ -0,0 +1,136 @@
+# Functions to find and download EDEN water depth data
+#' @name get_metadata
+#'
+#' @title Get EDEN metadata
+#'
+#' @export
+#'
+
+get_metadata <- function() {
+  url <- "https://sflthredds.er.usgs.gov/thredds/catalog/eden/depths/catalog.html"
+  metadata <- url %>%
+    rvest::read_html() %>%
+    rvest::html_table()
+  metadata <- metadata[[1]] %>%
+    dplyr::filter(Dataset != "depths") %>% #Drop directory name from first row
+    dplyr::rename(dataset = Dataset, size = Size, last_modified = `Last Modified`) %>%
+    dplyr::mutate(last_modified = as.POSIXct(last_modified,
+                             format = "%Y-%m-%dT%H:%M:%S"),
+                  year = as.integer(substr(dataset, start = 1, stop = 4)))
+}
+
+#' @name get_data_urls
+#'
+#' @title Get EDEN depths data URLs for download
+#'
+#' @param file_names file names to download from metadata
+#'
+#' @return list of file urls
+#'
+#' @export
+#'
+
+get_data_urls <- function(file_names) {
+  base_url <- "https://sflthredds.er.usgs.gov/thredds/fileServer/eden/depths"
+  urls <- file.path(base_url, file_names)
+  return(list(file_names = file_names, urls = urls))
+}
+
+#' @name get_last_download
+#'
+#' @title Get list of EDEN depths data already downloaded
+#'
+#' @param eden_path path where the EDEN data should be stored
+#' @param metadata EDEN file metadata
+#' @param force_update if TRUE update all data files even if checks indicate
+#'   that remote files are unchanged since the current local copies were
+#'   created
+#'
+#' @return table of files already downloaded
+#'
+#' @export
+#'
+get_last_download <- function(eden_path = file.path("Water"),
+                              metadata, force_update = FALSE) {
+  if ("last_download.csv" %in% list.files(eden_path) & !force_update) {
+    last_download <- read.csv(file.path(eden_path, "last_download.csv"))
+  } else {
+    last_download <- data.frame(dataset = metadata$dataset, size = "0 Mbytes",
+                       last_modified = as.POSIXct("1900-01-01 00:00:01",
+                                         format = "%Y-%m-%d %H:%M:%S"))
+  }
+  return(last_download)
+}
+
+#' @name get_files_to_update
+#'
+#' @title Determine list of new EDEN files to download
+#'
+#' @param eden_path path where the EDEN data should be stored
+#' @param metadata EDEN file metadata
+#' @param force_update if TRUE update all data files even if checks indicate
+#'   that remote files are unchanged since the current local copies were
+#'   created
+#'
+#' @export
+#'
+get_files_to_update <- function(eden_path = file.path("Water"),
+                                metadata, force_update = FALSE){
+  # Find files that have been updated since last download
+  last_download <- get_last_download(eden_path, metadata, force_update = force_update)
+  new <- metadata %>%
+    dplyr::left_join(last_download, by = "dataset", suffix = c(".curr", ".last")) %>%
+    dplyr::filter(last_modified.curr > last_modified.last | size.curr != size.last | is.na(last_modified.last))
+  metadata %>%
+    dplyr::filter(dplyr::between(year, new$year-2, new$year+2))
+  }
+
+#' @name update_last_download
+#'
+#' @title Write new metata file for files already downloaded
+#'
+#' @param eden_path path where the EDEN data should be stored
+#' @param metadata EDEN file metadata
+#'
+#' @export
+#'
+update_last_download <- function(eden_path = file.path("Water"),
+                                 metadata){
+  current_files <- list.files(eden_path, pattern = "*_depth.nc")
+  write.csv(metadata, file.path(eden_path, 'last_download.csv'))
+}
+
+#' @name download_eden_depths
+#'
+#' @title Download the EDEN depths data
+#'
+#' @param eden_path path where the EDEN data should be stored
+#' @param force_update if TRUE update all data files even if checks indicate
+#'   that remote files are unchanged since the current local copies were
+#'   created
+#'
+#' @return char vector of downloaded/updated files
+#'
+#' @export
+#'
+download_eden_depths <- function(eden_path = file.path("Water"),
+                                 force_update = FALSE) {
+
+  if (!dir.exists(eden_path)) {
+    dir.create(eden_path, recursive = TRUE)
+  }
+
+  metadata <- get_metadata()
+  to_update <- get_files_to_update(eden_path, metadata,
+                                   force_update = force_update)
+  data_urls <- get_data_urls(to_update$dataset)
+  options(timeout = 226)
+
+  downloaded <- mapply(download.file,
+    data_urls$urls,
+    file.path(eden_path, data_urls$file_names))
+
+  update_last_download(eden_path, metadata)
+
+  return(file.path(eden_path, data_urls$file_names))
+}
diff --git a/DataCleaningScripts/eden_covariates.R b/DataCleaningScripts/eden_covariates.R
new file mode 100644
index 00000000..861a4a45
--- /dev/null
+++ b/DataCleaningScripts/eden_covariates.R
@@ -0,0 +1,295 @@
+# Calculate standard water depth covariates from EDEN data following WADEM model
+
+#' @name load_boundaries
+#'
+#' @title Calculate dry days from everwader
+#'
+#' @param path path to regions shapefile
+#' @param level region level to load (all, wcas, or subregions)
+#'
+#' @export
+#'
+load_boundaries <- function(path = file.path("SiteandMethods/regions"),
+                            level = "subregions") {
+  level <- tolower(level)
+  boundaries <- sf::st_read(file.path(path,paste(level,".shp",sep = "")))
+  return(boundaries)
+}
+
+#' @name calc_dry_days
+#'
+#' @title Calculate dry days from everwader
+#'
+#' @param depth_data depth .nc files
+#'
+#' @export
+#'
+calc_dry_days <- function(depth_data) {
+  dry_days <- depth_data %>%
+    dplyr::mutate(dry_days = dplyr::case_when(depth <= units::set_units(0, cm) ~
+                                                units::set_units(1, d),
+                                              depth > units::set_units(0, cm) ~ units::set_units(0, d),
+                                              is.na(depth) ~ units::set_units(NA, d)),
+                  .keep = "none") %>%
+    stars::st_apply(c(1, 2), sum)
+  return(dry_days)
+}
+
+#' @name calc_recession
+#'
+#' @title Calculate recession from everwader
+#'
+#' @param depth_data depth .nc files
+#'
+#' @export
+#'
+calc_recession <- function(depth_data) {
+  times <- stars::st_get_dimension_values(depth_data, 'time')
+  start_depth <- depth_data |>
+    dplyr::filter(time == min(times)) |>
+    stars::st_set_dimensions("time", values = NULL)
+  end_depth <- depth_data |>
+    dplyr::filter(time == max(times)) |>
+    stars::st_set_dimensions("time", values = NULL)
+  recession <- start_depth - end_depth
+  days <- as.integer(max(times) - min(times))
+  recession_rate <- recession / days
+  return(recession_rate)
+}
+
+#' @name calc_reversals
+#'
+#' @title Calculate reversals following Peterson 2017
+#'
+#' @param depth_data depth .nc files
+#'
+#' @export
+#'
+calc_reversals <- function(depth_data) {
+  end_date_position <- stars::st_dimensions(depth_data)$time$to
+  depth_t <- depth_data[,,,2:end_date_position] |>
+    stars::st_set_dimensions("time", values = seq(1, end_date_position - 1))
+  depth_t_minus_1 <- depth_data[,,,1:(end_date_position - 1)] |>
+    stars::st_set_dimensions("time", values = seq(1, end_date_position - 1))
+  depth_deltas <- depth_t - depth_t_minus_1
+  reversals <- depth_deltas %>%
+    dplyr::mutate(reversal = dplyr::case_when(depth > units::set_units(0, cm) ~
+                                                units::set_units(1, d),
+                                              depth <= units::set_units(0, cm) ~ units::set_units(0, d),
+                                              is.na(depth) ~ units::set_units(NA, d)),
+                  .keep = "none") %>%
+    stars::st_apply(c(1, 2), sum)
+}
+
+
+#' @name extract_region_means
+#'
+#' @title Calculate region means from raster data
+#'
+#' @param raster variable raster
+#' @param regions regions polygons
+#'
+#' @return region means spdf
+#'
+#' @export
+#'
+extract_region_means <- function(raster, regions) {
+  var_name <- names(raster)
+  region_means <- raster::aggregate(raster, regions, mean, na.rm=TRUE) %>%
+    setNames(., "value")
+  region_means_spdf <- regions %>%
+    dplyr::mutate(variable = var_name, value = region_means$value)
+  return(region_means_spdf)
+}
+
+#' @name available_years
+#'
+#' @title Get list of years available for covariate calculation
+#'
+#' @param eden_path path where the EDEN data should be stored
+#'
+#' @return vector of years
+#'
+#' @export
+#'
+
+available_years <- function(eden_path = file.path("Water")) {
+  eden_data_files <- list.files(file.path(eden_path), pattern = '_depth.nc')
+  years <- eden_data_files %>%
+    stringr::str_split('_', simplify = TRUE) %>%
+    .[, 1] %>%
+    unique()
+  return(years)
+}
+
+#' @name get_eden_covariates
+#'
+#' @title Generate annual scale water covariates using EDEN data
+#'
+#' @param level region level to load (all, wcas, or subregions)
+#' @param eden_path path where the EDEN data should be stored
+#' @param years numeric vector of years to generate covariates for,
+#' defaults to all available years
+#' @param boundaries_path name of a shape file holding the boundaries
+#' within which to calculate covariates
+#'
+#' @return data.frame covariate data including columns for region, year,
+#' covariate, value, and the geometry of the region
+#'
+#' @export
+#'
+get_eden_covariates <- function(level = "subregions",
+                                eden_path = file.path("Water"),
+                                years = available_years(eden_path),
+                                boundaries_path = file.path("SiteandMethods/regions"))
+  {
+
+  eden_data_files <- list.files(file.path(eden_path), pattern = '_depth.nc')
+  boundaries <- load_boundaries(boundaries_path,level)
+  examp_eden_file <- stars::read_stars(file.path(eden_path, eden_data_files[1]))
+  boundaries_utm <- sf::st_transform(boundaries, sf::st_crs(examp_eden_file))
+
+  covariates <- c()
+  for (year in years) {
+    print(paste("Processing ", year, "...", sep = ""))
+    pattern <- file.path(paste(year, "_.*_depth.nc", sep = ''))
+    nc_files <- list.files(eden_path, pattern, full.names = TRUE)
+    year_data <- stars::read_stars(nc_files, along = "time") %>%
+      setNames(., "depth") %>%
+      dplyr::mutate(depth = dplyr::case_when(depth < units::set_units(0, cm) ~ units::set_units(0, cm),
+                                             depth >= units::set_units(0, cm) ~ depth,
+                                             is.na(depth) ~ units::set_units(NA, cm)))
+
+    breed_start <- as.POSIXct(paste0(year, '-01-01'))
+    breed_end <- as.POSIXct(paste0(year, '-06-30'))
+    breed_season_data <- year_data %>%
+      dplyr::filter(time >= breed_start, time <= breed_end)
+
+    # Do a pre-breed/post-breed split to allow pre-breeding recession calculations
+    # following Peterson 2017. Peterson does this on a per species basis. To start
+    # just pick the mid-point for the different species to split on
+    pre_breed_end <- as.POSIXct(paste0(year, '-03-01'))
+    pre_breed_season_data <- year_data %>%
+      dplyr::filter(time >= breed_start, time <= pre_breed_end)
+    post_breed_season_data <- year_data %>%
+      dplyr::filter(time >= pre_breed_end, time <= breed_end)
+
+    # Calculate depth_breed from everwader
+    breed_season_depth <- breed_season_data %>%
+      stars::st_apply(c(1, 2), mean) %>%
+      setNames(., "breed_season_depth")
+    init_depth <- breed_season_data[,,,1] %>%
+      setNames(., "init_depth")
+
+    # Calculate recession from everwader
+    recession <- calc_recession(breed_season_data) %>%
+      setNames(., "recession")
+    pre_recession <- calc_recession(pre_breed_season_data) %>%
+      setNames(., "pre_recession")
+    post_recession <- calc_recession(post_breed_season_data) %>%
+      setNames(., "post_recession")
+
+    # Calculate dryindex from everwader
+    # TODO: USGS code calculates this from t-3 3/31 to t 6/30 and converts
+    # the first value to NA
+    # To replicate we would need to load three years worth of data and be careful with
+    # other applications using min/max or replace this with a different predictor
+    # since it's unclear why the lag would be important here
+    # we could also fit the lag rather than building it into the feature
+    dry_days <- calc_dry_days(breed_season_data) %>%
+      setNames(., "dry_days")
+
+    # Calculate reversals following Peterson 2017
+    reversals <- calc_reversals(breed_season_data) %>%
+      setNames(., "reversals")
+
+    predictors <- list(init_depth, breed_season_depth, recession, pre_recession,
+                       post_recession, dry_days, reversals)
+    for (predictor in predictors) {
+      year_covariates <- extract_region_means(predictor, boundaries_utm) %>%
+        dplyr::mutate(year = year)
+      covariates <- rbind(covariates, year_covariates)
+    }
+  }
+  return(covariates)
+}
+
+#' @name get_eden_depths
+#'
+#' @title Generate regional daily mean depth using EDEN data
+#'
+#' @param level region level to load (all, wcas, or subregions)
+#' @param eden_path path where the EDEN data should be stored
+#' @param years numeric vector of years to collect, defaults to all available years
+#' @param boundaries_path name of a shape file holding the boundaries
+#' within which to calculate depth
+#'
+#' @return data.frame covariate data including columns for region, date,
+#' depth value, and the geometry of the region
+#'
+#' @export
+#'
+get_eden_depths <- function(level="subregions",
+                            eden_path = file.path("Water"),
+                            years = available_years(eden_path),
+                            boundaries_path = file.path("SiteandMethods/regions"))
+  {
+
+  eden_data_files <- list.files(file.path(eden_path), pattern = '_depth.nc', full.names = TRUE)
+  boundaries <- load_boundaries(boundaries_path,level)
+  examp_eden_file <- stars::read_stars(file.path(eden_data_files[1]))
+  boundaries_utm <- sf::st_transform(boundaries, sf::st_crs(examp_eden_file))
+
+  new_data <- c()
+  for (year in years) {
+    print(paste("Processing ", year, "...", sep = ""))
+    pattern <- file.path(paste(year, "_.*_depth.nc", sep = ''))
+    nc_files <- list.files(eden_path, pattern, full.names = TRUE)
+    year_data <- stars::read_stars(nc_files, along = "time") %>%
+      setNames(., "depth") # %>%
+      # dplyr::mutate(depth = dplyr::case_when(depth < units::set_units(0, cm) ~ units::set_units(0, cm),
+      #                                       depth >= units::set_units(0, cm) ~ depth,
+      #                                       is.na(depth) ~ units::set_units(NA, cm)))
+
+    region_means <- raster::aggregate(year_data, boundaries_utm, mean, na.rm=TRUE)
+    region_sd <- raster::aggregate(year_data, boundaries_utm, sd, na.rm=TRUE)
+    region_max <- raster::aggregate(year_data, boundaries_utm, max, na.rm=TRUE)
+    region_min <- raster::aggregate(year_data, boundaries_utm, min, na.rm=TRUE)
+
+    new_year <- reshape_star(region_means, variable="depth_mean", year=year, boundaries=boundaries_utm) %>%
+                merge(reshape_star(region_sd, variable="depth_sd", year=year, boundaries=boundaries_utm)) %>%
+                merge(reshape_star(region_max, variable="depth_max", year=year, boundaries=boundaries_utm)) %>%
+                merge(reshape_star(region_min, variable="depth_min", year=year, boundaries=boundaries_utm))
+
+    new_data <- rbind(new_data, new_year)
+  }
+ return(new_data)
+}
+
+#' @name reshape_star
+#'
+#' @title Reshapes star object to dataframe
+#'
+#' @param data data to reshape
+#' @param variable value to reshape on
+#' @param year data year
+#' @param boundaries boundaries object
+#'
+#' @return data.frame depth data including columns for region, date, depth type value
+#'
+#' @export
+#'
+reshape_star <- function(data, variable="depth", year, boundaries) {
+
+ region_spdf <- boundaries_utm %>% dplyr::mutate(value = data$depth)
+ new_region <- as.data.frame(region_spdf$value) %>%
+                              dplyr::mutate_all(as.double)
+ colnames(new_region) <- as.character(as.Date(stars::st_get_dimension_values(data, 'time')))
+ new_data <- new_region %>%
+             dplyr::mutate(region = region_spdf$Name) %>%
+             tidyr::pivot_longer(starts_with(as.character(year)),
+             names_to = "date", values_to = "value") %>%
+             dplyr::select(date, region, value) %>%
+             dplyr::rename(!!variable := value)
+ return(new_data)
+}
diff --git a/DataCleaningScripts/get_water_data.R b/DataCleaningScripts/get_water_data.R
index 6cef7f5c..b467cbfb 100644
--- a/DataCleaningScripts/get_water_data.R
+++ b/DataCleaningScripts/get_water_data.R
@@ -2,17 +2,19 @@
 #'
 
 `%>%` <- magrittr::`%>%`
+source("DataCleaningScripts/download_eden.R")
+source("DataCleaningScripts/eden_covariates.R")
 
 # Downloads new EDEN depth data, calculates covariates, appends to covariate file
 
 get_eden_data <- function() {
 
-wader::download_eden_depths()
+download_eden_depths()
 
-covariate_data <- read.table("../Water/eden_covariates.csv", header = TRUE, sep = ",")
-new_data <- wader::get_eden_covariates()
-new_data2 <- wader::get_eden_covariates(level="all")
-new_data3 <- wader::get_eden_covariates(level="wcas")
+covariate_data <- read.table("Water/eden_covariates.csv", header = TRUE, sep = ",")
+new_data <- get_eden_covariates()
+new_data2 <- get_eden_covariates(level="all")
+new_data3 <- get_eden_covariates(level="wcas")
 all_data <- dplyr::bind_rows(new_data,new_data2,new_data3) %>%
   dplyr::select(year, region=Name, variable, value) %>%
   as.data.frame() %>%
@@ -23,17 +25,17 @@ new_covariates <- all_data %>%
   merge(dplyr::filter(covariate_data, !date %in% all_data$year)) %>%
   dplyr::arrange("year", "region")
 
-depth_data <- read.table("../Water/eden_depth.csv", header = TRUE, sep = ",") %>%
+depth_data <- read.table("Water/eden_depth.csv", header = TRUE, sep = ",") %>%
               dplyr::mutate(date=as.Date(date))
-new_depths <- wader::get_eden_depths() %>%
-              dplyr::bind_rows(wader::get_eden_depths(level="all")) %>%
-              dplyr::bind_rows(wader::get_eden_depths(level="wcas")) %>%
+new_depths <- get_eden_depths() %>%
+              dplyr::bind_rows(get_eden_depths(level="all")) %>%
+              dplyr::bind_rows(get_eden_depths(level="wcas")) %>%
               dplyr::mutate(date=as.Date(date))
 new_depths <- new_depths %>%
               merge(dplyr::filter(depth_data, !date %in% new_depths$date)) %>%
               dplyr::arrange("date", "region")
 
-file.remove(dir(path=file.path(get_default_data_path(), 'EvergladesWadingBird/Water'),  pattern="_.*_depth.nc"))
+file.remove(dir(path=file.path('Water'),  pattern="_.*_depth.nc"))
 
 return(list(new_covariates=new_covariates, new_depths=new_depths))
 }
@@ -46,10 +48,10 @@ update_water <- function() {
 
   data <- get_eden_data()
 
-  write.table(data$new_covariates, "../Water/eden_covariates.csv", row.names = FALSE, col.names = TRUE,
+  write.table(data$new_covariates, "Water/eden_covariates.csv", row.names = FALSE, col.names = TRUE,
             na="", sep = ",", quote = FALSE)
 
-  write.table(data$new_depths, file = "../Water/eden_depth.csv",
+  write.table(data$new_depths, file = "Water/eden_depth.csv",
               row.names = FALSE, col.names = TRUE, na = "", sep = ",", quote = FALSE)
 }
 
diff --git a/install-packages.R b/install-packages.R
index d2ae6b58..4a7e0d2f 100644
--- a/install-packages.R
+++ b/install-packages.R
@@ -2,8 +2,5 @@
 if ("pacman" %in% rownames(installed.packages()) == FALSE) install.packages("pacman")
 
 # Install packages required for analysis
-
-pacman::p_load(git2r, httr, semver, sf, testthat, yaml,
-               dplyr, lubridate, remotes)
-
-remotes::install_github("weecology/wader")
+pacman::p_load(git2r, httr, semver, testthat, yaml,
+               dplyr, lubridate, sf)
\ No newline at end of file