Skip to content

Commit

Permalink
Merge pull request #125 from weecology/eden-downloads
Browse files Browse the repository at this point in the history
Keep eden downloads and add data [minor]
  • Loading branch information
gmyenni authored Oct 30, 2024
2 parents fbbb24a + 3166590 commit 00ebca7
Show file tree
Hide file tree
Showing 16 changed files with 573 additions and 61 deletions.
14 changes: 10 additions & 4 deletions DataCleaningScripts/download_eden.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ get_metadata <- function() {
metadata <- url %>%
rvest::read_html() %>%
rvest::html_table()
metadata <- metadata[[1]] %>%
metadata <- as.data.frame(metadata[[1]]) %>%
dplyr::filter(Dataset != "depths") %>% #Drop directory name from first row
dplyr::rename(dataset = Dataset, size = Size, last_modified = `Last Modified`) %>%
dplyr::mutate(last_modified = as.POSIXct(last_modified,
Expand Down Expand Up @@ -53,7 +53,8 @@ get_data_urls <- function(file_names) {
get_last_download <- function(eden_path = file.path("Water"),
metadata, force_update = FALSE) {
if ("last_download.csv" %in% list.files(eden_path) & !force_update) {
last_download <- read.csv(file.path(eden_path, "last_download.csv"))
last_download <- read.csv(file.path(eden_path, "last_download.csv")) %>%
dplyr::mutate(last_modified = as.POSIXct(last_modified))
} else {
last_download <- data.frame(dataset = metadata$dataset, size = "0 Mbytes",
last_modified = as.POSIXct("1900-01-01 00:00:01",
Expand All @@ -76,13 +77,18 @@ get_last_download <- function(eden_path = file.path("Water"),
#'
get_files_to_update <- function(eden_path = file.path("Water"),
metadata, force_update = FALSE){

# Find files that have been updated since last download
last_download <- get_last_download(eden_path, metadata, force_update = force_update)
new <- metadata %>%
dplyr::left_join(last_download, by = "dataset", suffix = c("", ".last")) %>%
dplyr::filter(last_modified > last_modified.last | size != size.last | is.na(last_modified.last))

unlink(file.path(eden_path, new$dataset))
unchanged_files <- list.files("Water", pattern = "*_depth.nc")
metadata %>%
dplyr::filter(year %in% c(new$year-2, new$year-1, new$year, new$year+1, new$year+2))
dplyr::filter(year %in% c(new$year-2, new$year-1, new$year, new$year+1, new$year+2)) %>%
dplyr::filter(!(dataset %in% unchanged_files))
}

#' @name update_last_download
Expand Down Expand Up @@ -124,7 +130,7 @@ download_eden_depths <- function(eden_path = file.path("Water"),
to_update <- get_files_to_update(eden_path, metadata,
force_update = force_update)
data_urls <- get_data_urls(to_update$dataset)
options(timeout = 1000)
options(timeout = 500)

downloaded <- mapply(download.file,
data_urls$urls,
Expand Down
Loading

0 comments on commit 00ebca7

Please sign in to comment.