Skip to content

Commit

Permalink
spatial packages fu [patch]
Browse files Browse the repository at this point in the history
  • Loading branch information
gmyenni committed Nov 2, 2023
1 parent 7881a11 commit c82b36e
Show file tree
Hide file tree
Showing 5 changed files with 451 additions and 20 deletions.
7 changes: 4 additions & 3 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,10 @@ jobs:
- name: Install system dependencies
run: |
sudo apt remove libudunits2-dev libgdal-dev libgeos-dev libproj-dev
sudo apt-get install libgit2-dev libcurl4-openssl-dev libudunits2-dev libgdal-dev libnetcdf-dev libgeos-dev libproj-dev
sudo apt-get -y update && sudo apt-get install -y \
libgit2-dev libicu-dev gdal-bin proj-data proj-bin libv8-dev libprotobuf-dev protobuf-compiler \
libudunits2-dev libgdal-dev libgeos-dev libproj-dev libfontconfig1-dev libjq-dev libmysqlclient-dev libpng-dev
sudo apt-get update
- name: Install packages
run: Rscript install-packages.R

Expand Down
136 changes: 136 additions & 0 deletions DataCleaningScripts/download_eden.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
# Functions to find and download EDEN water depth data
#' @name get_metadata
#'
#' @title Get EDEN metadata
#'
#' @export
#'

get_metadata <- function() {
url <- "https://sflthredds.er.usgs.gov/thredds/catalog/eden/depths/catalog.html"
metadata <- url %>%
rvest::read_html() %>%
rvest::html_table()
metadata <- metadata[[1]] %>%
dplyr::filter(Dataset != "depths") %>% #Drop directory name from first row
dplyr::rename(dataset = Dataset, size = Size, last_modified = `Last Modified`) %>%
dplyr::mutate(last_modified = as.POSIXct(last_modified,
format = "%Y-%m-%dT%H:%M:%S"),
year = as.integer(substr(dataset, start = 1, stop = 4)))
}

#' @name get_data_urls
#'
#' @title Get EDEN depths data URLs for download
#'
#' @param file_names file names to download from metadata
#'
#' @return list of file urls
#'
#' @export
#'

get_data_urls <- function(file_names) {
base_url <- "https://sflthredds.er.usgs.gov/thredds/fileServer/eden/depths"
urls <- file.path(base_url, file_names)
return(list(file_names = file_names, urls = urls))
}

#' @name get_last_download
#'
#' @title Get list of EDEN depths data already downloaded
#'
#' @param eden_path path where the EDEN data should be stored
#' @param metadata EDEN file metadata
#' @param force_update if TRUE update all data files even if checks indicate
#' that remote files are unchanged since the current local copies were
#' created
#'
#' @return table of files already downloaded
#'
#' @export
#'
get_last_download <- function(eden_path = file.path("Water"),
metadata, force_update = FALSE) {
if ("last_download.csv" %in% list.files(eden_path) & !force_update) {
last_download <- read.csv(file.path(eden_path, "last_download.csv"))
} else {
last_download <- data.frame(dataset = metadata$dataset, size = "0 Mbytes",
last_modified = as.POSIXct("1900-01-01 00:00:01",
format = "%Y-%m-%d %H:%M:%S"))
}
return(last_download)
}

#' @name get_files_to_update
#'
#' @title Determine list of new EDEN files to download
#'
#' @param eden_path path where the EDEN data should be stored
#' @param metadata EDEN file metadata
#' @param force_update if TRUE update all data files even if checks indicate
#' that remote files are unchanged since the current local copies were
#' created
#'
#' @export
#'
get_files_to_update <- function(eden_path = file.path("Water"),
metadata, force_update = FALSE){
# Find files that have been updated since last download
last_download <- get_last_download(eden_path, metadata, force_update = force_update)
new <- metadata %>%
dplyr::left_join(last_download, by = "dataset", suffix = c(".curr", ".last")) %>%
dplyr::filter(last_modified.curr > last_modified.last | size.curr != size.last | is.na(last_modified.last))
metadata %>%
dplyr::filter(dplyr::between(year, new$year-2, new$year+2))
}

#' @name update_last_download
#'
#' @title Write new metata file for files already downloaded
#'
#' @param eden_path path where the EDEN data should be stored
#' @param metadata EDEN file metadata
#'
#' @export
#'
update_last_download <- function(eden_path = file.path("Water"),
metadata){
current_files <- list.files(eden_path, pattern = "*_depth.nc")
write.csv(metadata, file.path(eden_path, 'last_download.csv'))
}

#' @name download_eden_depths
#'
#' @title Download the EDEN depths data
#'
#' @param eden_path path where the EDEN data should be stored
#' @param force_update if TRUE update all data files even if checks indicate
#' that remote files are unchanged since the current local copies were
#' created
#'
#' @return char vector of downloaded/updated files
#'
#' @export
#'
download_eden_depths <- function(eden_path = file.path("Water"),
force_update = FALSE) {

if (!dir.exists(eden_path)) {
dir.create(eden_path, recursive = TRUE)
}

metadata <- get_metadata()
to_update <- get_files_to_update(eden_path, metadata,
force_update = force_update)
data_urls <- get_data_urls(to_update$dataset)
options(timeout = 226)

downloaded <- mapply(download.file,
data_urls$urls,
file.path(eden_path, data_urls$file_names))

update_last_download(eden_path, metadata)

return(file.path(eden_path, data_urls$file_names))
}
Loading

0 comments on commit c82b36e

Please sign in to comment.