From 7c9cf7178fcd95dc58f74955e4962559b8c7dd1d Mon Sep 17 00:00:00 2001 From: Andrea Manica Date: Thu, 15 Dec 2022 11:35:05 +0000 Subject: [PATCH 1/9] initial implementation of a remote test --- tests/testthat/test_remote_files.R | 46 ++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 tests/testthat/test_remote_files.R diff --git a/tests/testthat/test_remote_files.R b/tests/testthat/test_remote_files.R new file mode 100644 index 00000000..b380fe28 --- /dev/null +++ b/tests/testthat/test_remote_files.R @@ -0,0 +1,46 @@ +## This is a resource intensive test. It downloads all files in the dataset_list +## and then validates them. It is only run if the appropriate environment +## variable is set, and thus skipped most of the time +## To set the environment variable, use: +## Sys.setenv(PASTCLIM_TEST = "download_full") +## remember to unset it once you are done +## Sys.unsetenv("PASTCLIM_TEST") + + +# set up data path for this test +data_path <- file.path(tempdir(),"pastclim_data") +unlink(data_path, recursive = TRUE) # it should not exist, but remove it just in case +# set data path +set_data_path(path_to_nc = data_path, + ask = FALSE, + write_config = FALSE, + copy_example = TRUE) +################################################################################ +test_that("download and validate all files", { + skip_if(Sys.getenv("PASTCLIM_TEST")!="download_full") + # download all files for each dataset + all_datasets <- get_available_datasets() + all_datasets <- all_datasets[!all_datasets %in% "Example"] + for (i_dataset in all_datasets){ + expect_true(download_dataset(dataset = i_dataset)) + } + # now check that the files we downloaded are valid + for (i_file in list.files(get_data_path())){ + expect_true(validate_nc(i_file)) + } + # check that the variables in the table are found in the respective files + meta_table <- getOption("pastclim.dataset_list") + for (i_row in 1:nrow(meta_table)){ + nc_in <- ncdf4::nc_open(file.path(in_dir, meta_table$file_name[i])) + # check below if !! works to unquote the expression + expect_true(!!meta_table$ncvar[i] %in% names(nc_in$var)) + ncdf4::nc_close(nc_in) + } + # for each dataset, check that all variables cover the same extent and have + # the same missing values +} +) + +################################################################################ +# clean up for the next test +unlink(data_path, recursive = TRUE) From a563182b9b0d5ba230b8dd0cb6bf447dd748204f Mon Sep 17 00:00:00 2001 From: Andrea Manica Date: Fri, 8 Sep 2023 17:25:15 +0100 Subject: [PATCH 2/9] test files --- .../helper_functions/verify_files_by_dataset.R | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/data-raw/helper_functions/verify_files_by_dataset.R b/data-raw/helper_functions/verify_files_by_dataset.R index cbf753b1..d84c250a 100644 --- a/data-raw/helper_functions/verify_files_by_dataset.R +++ b/data-raw/helper_functions/verify_files_by_dataset.R @@ -1,13 +1,23 @@ # verify that all the variables in the tables are actually found in the files # this requires all data to have been downloaded -full_meta <- read.csv("./inst/rawdata_scripts/data_files/variable_table.csv") +full_meta <- pastclim:::dataset_list_included in_dir <- get_data_path() -in_dir <- "~/project_temp/past_climate/new_meta" +problem_rows <- vector() for (i in 1:nrow(full_meta)){ + pastclim::download_dataset(dataset = full_meta$dataset[i], + bio_variables = full_meta$variable[i]) nc_in <- ncdf4::nc_open(file.path(in_dir, full_meta$file_name[i])) if (!full_meta$ncvar[i] %in% names(nc_in$var)){ - ncdf4::nc_close(nc_in) - stop("problem with ",full_meta$ncvar[i]," in ", full_meta$file_name[i]) + message("problem with ",full_meta$ncvar[i]," in ", full_meta$file_name[i],"\n") + problem_rows[i]<-TRUE + } else { + problem_rows[i]<-FALSE } ncdf4::nc_close(nc_in) } + +if (any(problem_rows)){ + which(problem_rows) +} else { + cat("all files are fine") +} From b012853e9ae5ceac961a64431df4754ec2abf5a1 Mon Sep 17 00:00:00 2001 From: Andrea Manica Date: Fri, 8 Sep 2023 18:25:45 +0100 Subject: [PATCH 3/9] checks --- R/download_dataset.R | 1 + R/download_worldclim_present.R | 1 + 2 files changed, 2 insertions(+) diff --git a/R/download_dataset.R b/R/download_dataset.R index 453877ba..3a5ea39e 100644 --- a/R/download_dataset.R +++ b/R/download_dataset.R @@ -78,6 +78,7 @@ download_dataset <- function(dataset, bio_variables = NULL, annual = TRUE, destfile = file.path(get_data_path(), file_details$file_name), quiet = FALSE )} else{ # we use a custom download function if the files have to be converted locally + browser() eval(parse(text=file_details$download_function))(dataset=dataset, bio_var = this_var, filename = file.path(get_data_path(), file_details$file_name)) diff --git a/R/download_worldclim_present.R b/R/download_worldclim_present.R index a894459e..49f41f20 100644 --- a/R/download_worldclim_present.R +++ b/R/download_worldclim_present.R @@ -16,6 +16,7 @@ download_worldclim_present <- function(dataset, bio_var, filename){ # get resolution from the dataset name and convert it to the original res_conversion <- data.frame(our_res = c("10m","5m","2.5m", "0.5m"), wc_res = c("10m","5m", "2.5m", "30s")) + browser() wc_res <- res_conversion$wc_res[res_conversion$our_res==substr(dataset, start = 15, stop=nchar(dataset))] From 3f73e8c216d7891cf3930e52f999a1f7d9c50514 Mon Sep 17 00:00:00 2001 From: Andrea Manica Date: Fri, 8 Sep 2023 18:36:37 +0100 Subject: [PATCH 4/9] test_fix --- R/download_dataset.R | 1 - R/download_worldclim_present.R | 1 - data-raw/helper_functions/verify_files_by_dataset.R | 2 +- 3 files changed, 1 insertion(+), 3 deletions(-) diff --git a/R/download_dataset.R b/R/download_dataset.R index 3a5ea39e..453877ba 100644 --- a/R/download_dataset.R +++ b/R/download_dataset.R @@ -78,7 +78,6 @@ download_dataset <- function(dataset, bio_variables = NULL, annual = TRUE, destfile = file.path(get_data_path(), file_details$file_name), quiet = FALSE )} else{ # we use a custom download function if the files have to be converted locally - browser() eval(parse(text=file_details$download_function))(dataset=dataset, bio_var = this_var, filename = file.path(get_data_path(), file_details$file_name)) diff --git a/R/download_worldclim_present.R b/R/download_worldclim_present.R index 49f41f20..a894459e 100644 --- a/R/download_worldclim_present.R +++ b/R/download_worldclim_present.R @@ -16,7 +16,6 @@ download_worldclim_present <- function(dataset, bio_var, filename){ # get resolution from the dataset name and convert it to the original res_conversion <- data.frame(our_res = c("10m","5m","2.5m", "0.5m"), wc_res = c("10m","5m", "2.5m", "30s")) - browser() wc_res <- res_conversion$wc_res[res_conversion$our_res==substr(dataset, start = 15, stop=nchar(dataset))] diff --git a/data-raw/helper_functions/verify_files_by_dataset.R b/data-raw/helper_functions/verify_files_by_dataset.R index d84c250a..4cbaea18 100644 --- a/data-raw/helper_functions/verify_files_by_dataset.R +++ b/data-raw/helper_functions/verify_files_by_dataset.R @@ -4,7 +4,7 @@ full_meta <- pastclim:::dataset_list_included in_dir <- get_data_path() problem_rows <- vector() for (i in 1:nrow(full_meta)){ - pastclim::download_dataset(dataset = full_meta$dataset[i], + pastclim::download_dataset(dataset = as.character(full_meta$dataset[i]), bio_variables = full_meta$variable[i]) nc_in <- ncdf4::nc_open(file.path(in_dir, full_meta$file_name[i])) if (!full_meta$ncvar[i] %in% names(nc_in$var)){ From 2e383023c49b47184586acd28e2e51bae1fbf8d7 Mon Sep 17 00:00:00 2001 From: Andrea Manica Date: Fri, 8 Sep 2023 19:15:18 +0100 Subject: [PATCH 5/9] filter tif for worldclim --- R/download_worldclim_present.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/download_worldclim_present.R b/R/download_worldclim_present.R index a894459e..a394dcd2 100644 --- a/R/download_worldclim_present.R +++ b/R/download_worldclim_present.R @@ -53,7 +53,7 @@ download_worldclim_present <- function(dataset, bio_var, filename){ # unzip it to a temporary directory destpath <- file.path(tempdir(),"to_unzip") utils::unzip(destfile,exdir=destpath) - wc_rast <- terra::rast(dir(destpath, full.names = TRUE)) + wc_rast <- terra::rast(dir(destpath, pattern=".tif", full.names = TRUE)) # sort out variable names if (!(grepl("altitude",bio_var))){ # digits at the end of the name are the key identifier of each variable From fdaec415b391c4cdaf6705bcb3a1a0a2ede0f931 Mon Sep 17 00:00:00 2001 From: Andrea Manica Date: Fri, 8 Sep 2023 22:10:57 +0100 Subject: [PATCH 6/9] Note on fixing future monthly --- R/download_worldclim_future.R | 1 + data-raw/helper_functions/verify_files_by_dataset.R | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/R/download_worldclim_future.R b/R/download_worldclim_future.R index 35fa4bda..26cc626a 100644 --- a/R/download_worldclim_future.R +++ b/R/download_worldclim_future.R @@ -77,6 +77,7 @@ download_worldclim_future <- function(dataset, bio_var, filename){ sds_list[[i_var]]<-terra::rast(lapply(wc_list, terra::subset,subset=i_var)) names(sds_list[[i_var]])<-rep(i_var,nlyr((sds_list[[i_var]]))) } + browser() wc_sds <- terra::sds(sds_list) terra::writeCDF(wc_sds,filename=filename, compression=9, diff --git a/data-raw/helper_functions/verify_files_by_dataset.R b/data-raw/helper_functions/verify_files_by_dataset.R index 4cbaea18..38b63941 100644 --- a/data-raw/helper_functions/verify_files_by_dataset.R +++ b/data-raw/helper_functions/verify_files_by_dataset.R @@ -21,3 +21,8 @@ if (any(problem_rows)){ } else { cat("all files are fine") } + + +### There is a problem with how we are renaming variables when downloading future worldclim +# monthly variables (it's not working) +# check line 78 From e3905fe4d970edc823f2cc8ebad95e1281706740 Mon Sep 17 00:00:00 2001 From: Andrea Manica Date: Tue, 12 Sep 2023 17:17:01 +0100 Subject: [PATCH 7/9] fix var names --- R/download_worldclim_future.R | 18 +++++++++++------- .../helper_functions/verify_files_by_dataset.R | 2 ++ 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/R/download_worldclim_future.R b/R/download_worldclim_future.R index 35fa4bda..90d6ade2 100644 --- a/R/download_worldclim_future.R +++ b/R/download_worldclim_future.R @@ -16,9 +16,7 @@ download_worldclim_future <- function(dataset, bio_var, filename){ # get resolution from the dataset name and convert it to the original res_conversion <- data.frame(our_res = c("10m","5m","2.5m", "0.5m"), wc_res = c("10m","5m", "2.5m", "30s")) - wc_res <- res_conversion$wc_res[res_conversion$our_res==substr(dataset, - start = regexpr("_\\d+\\.?\\d+m",dataset)+1, - stop=nchar(dataset))] + wc_res <- res_conversion$wc_res[res_conversion$our_res==tail(strsplit(dataset,"_")[[1]],1)] gcm <- c("ACCESS-CM2", "BCC-CSM2-MR", "CMCC-ESM2", "EC-Earth3-Veg", "FIO-ESM-2-0", "GFDL-ESM4", "GISS-E2-1-G", "HadGEM3-GC31-LL", "INM-CM5-0", "IPSL-CM6A-LR", "MIROC6", "MPI-ESM1-2-HR", "MRI-ESM2-0", "UKESM1-0-LL") @@ -73,12 +71,18 @@ download_worldclim_future <- function(dataset, bio_var, filename){ var_names <- names(wc_list[[1]]) sds_list <- list() - for (i_var in var_names){ - sds_list[[i_var]]<-terra::rast(lapply(wc_list, terra::subset,subset=i_var)) - names(sds_list[[i_var]])<-rep(i_var,nlyr((sds_list[[i_var]]))) + for (i in 1:length(var_names)){ + i_var <- var_names[i] + if (!any(postfix %in% c("bioc","elev"))){ + new_var_name <-paste0(var_prefix,sprintf("%02d",i)) + } else { + new_var_name <- i_var + } + sds_list[[new_var_name]]<-terra::rast(lapply(wc_list, terra::subset,subset=i_var)) + names(sds_list[[new_var_name]])<-rep(new_var_name,nlyr((sds_list[[new_var_name]]))) } wc_sds <- terra::sds(sds_list) - + terra::writeCDF(wc_sds,filename=filename, compression=9, overwrite=TRUE) # fix time axis (this is a workaround if we open the file with sf) diff --git a/data-raw/helper_functions/verify_files_by_dataset.R b/data-raw/helper_functions/verify_files_by_dataset.R index 4cbaea18..5bd89e02 100644 --- a/data-raw/helper_functions/verify_files_by_dataset.R +++ b/data-raw/helper_functions/verify_files_by_dataset.R @@ -1,5 +1,6 @@ # verify that all the variables in the tables are actually found in the files # this requires all data to have been downloaded +library(pastclim) full_meta <- pastclim:::dataset_list_included in_dir <- get_data_path() problem_rows <- vector() @@ -9,6 +10,7 @@ for (i in 1:nrow(full_meta)){ nc_in <- ncdf4::nc_open(file.path(in_dir, full_meta$file_name[i])) if (!full_meta$ncvar[i] %in% names(nc_in$var)){ message("problem with ",full_meta$ncvar[i]," in ", full_meta$file_name[i],"\n") + stop("we had a problem") problem_rows[i]<-TRUE } else { problem_rows[i]<-FALSE From eec464e22c7a240529f83491c7452cd1ea8a9e4b Mon Sep 17 00:00:00 2001 From: Andrea Manica Date: Wed, 13 Sep 2023 12:48:12 +0100 Subject: [PATCH 8/9] remove debug code --- R/download_worldclim_future.R | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/R/download_worldclim_future.R b/R/download_worldclim_future.R index 6195c354..e068bb5d 100644 --- a/R/download_worldclim_future.R +++ b/R/download_worldclim_future.R @@ -67,7 +67,7 @@ download_worldclim_future <- function(dataset, bio_var, filename){ # and finally we save it as a netcdf file time_bp(wc_list[[i_step]]) <- rep(dates_df$time_bp[dates_df$orig == i_step],nlyr(wc_list[[i_step]])) } - message("assembling all the data into a netcdf file for use with pastclim; this operation will take a couple of minutes...\n") + message("assembling all the data into a netcdf file for use with pastclim; this operation will take a few minutes...\n") var_names <- names(wc_list[[1]]) sds_list <- list() @@ -81,7 +81,6 @@ download_worldclim_future <- function(dataset, bio_var, filename){ sds_list[[new_var_name]]<-terra::rast(lapply(wc_list, terra::subset,subset=i_var)) names(sds_list[[new_var_name]])<-rep(new_var_name,nlyr((sds_list[[new_var_name]]))) } - browser() wc_sds <- terra::sds(sds_list) terra::writeCDF(wc_sds,filename=filename, compression=9, From 036886b21e752e0b89d8deb333f4fd321f6f1e87 Mon Sep 17 00:00:00 2001 From: Andrea Manica Date: Wed, 13 Sep 2023 12:56:47 +0100 Subject: [PATCH 9/9] remove note after bugfix --- data-raw/helper_functions/verify_files_by_dataset.R | 4 ---- 1 file changed, 4 deletions(-) diff --git a/data-raw/helper_functions/verify_files_by_dataset.R b/data-raw/helper_functions/verify_files_by_dataset.R index c63b3148..b0091cda 100644 --- a/data-raw/helper_functions/verify_files_by_dataset.R +++ b/data-raw/helper_functions/verify_files_by_dataset.R @@ -24,7 +24,3 @@ if (any(problem_rows)){ cat("all files are fine") } - -### There is a problem with how we are renaming variables when downloading future worldclim -# monthly variables (it's not working) -# check line 78