Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

File testing #40

Merged
merged 11 commits into from
Sep 13, 2023
20 changes: 12 additions & 8 deletions R/download_worldclim_future.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,7 @@ download_worldclim_future <- function(dataset, bio_var, filename){
# get resolution from the dataset name and convert it to the original
res_conversion <- data.frame(our_res = c("10m","5m","2.5m", "0.5m"),
wc_res = c("10m","5m", "2.5m", "30s"))
wc_res <- res_conversion$wc_res[res_conversion$our_res==substr(dataset,
start = regexpr("_\\d+\\.?\\d+m",dataset)+1,
stop=nchar(dataset))]
wc_res <- res_conversion$wc_res[res_conversion$our_res==tail(strsplit(dataset,"_")[[1]],1)]
gcm <- c("ACCESS-CM2", "BCC-CSM2-MR", "CMCC-ESM2", "EC-Earth3-Veg", "FIO-ESM-2-0",
"GFDL-ESM4", "GISS-E2-1-G", "HadGEM3-GC31-LL", "INM-CM5-0", "IPSL-CM6A-LR",
"MIROC6", "MPI-ESM1-2-HR", "MRI-ESM2-0", "UKESM1-0-LL")
Expand Down Expand Up @@ -69,16 +67,22 @@ download_worldclim_future <- function(dataset, bio_var, filename){
# and finally we save it as a netcdf file
time_bp(wc_list[[i_step]]) <- rep(dates_df$time_bp[dates_df$orig == i_step],nlyr(wc_list[[i_step]]))
}
message("assembling all the data into a netcdf file for use with pastclim; this operation will take a couple of minutes...\n")
message("assembling all the data into a netcdf file for use with pastclim; this operation will take a few minutes...\n")

var_names <- names(wc_list[[1]])
sds_list <- list()
for (i_var in var_names){
sds_list[[i_var]]<-terra::rast(lapply(wc_list, terra::subset,subset=i_var))
names(sds_list[[i_var]])<-rep(i_var,nlyr((sds_list[[i_var]])))
for (i in 1:length(var_names)){
i_var <- var_names[i]
if (!any(postfix %in% c("bioc","elev"))){
new_var_name <-paste0(var_prefix,sprintf("%02d",i))
} else {
new_var_name <- i_var
}
sds_list[[new_var_name]]<-terra::rast(lapply(wc_list, terra::subset,subset=i_var))
names(sds_list[[new_var_name]])<-rep(new_var_name,nlyr((sds_list[[new_var_name]])))
}
wc_sds <- terra::sds(sds_list)

terra::writeCDF(wc_sds,filename=filename, compression=9,
overwrite=TRUE)
# fix time axis (this is a workaround if we open the file with sf)
Expand Down
2 changes: 1 addition & 1 deletion R/download_worldclim_present.R
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ download_worldclim_present <- function(dataset, bio_var, filename){
# unzip it to a temporary directory
destpath <- file.path(tempdir(),"to_unzip")
utils::unzip(destfile,exdir=destpath)
wc_rast <- terra::rast(dir(destpath, full.names = TRUE))
wc_rast <- terra::rast(dir(destpath, pattern=".tif", full.names = TRUE))
# sort out variable names
if (!(grepl("altitude",bio_var))){
# digits at the end of the name are the key identifier of each variable
Expand Down
21 changes: 17 additions & 4 deletions data-raw/helper_functions/verify_files_by_dataset.R
Original file line number Diff line number Diff line change
@@ -1,13 +1,26 @@
# verify that all the variables in the tables are actually found in the files
# this requires all data to have been downloaded
full_meta <- read.csv("./inst/rawdata_scripts/data_files/variable_table.csv")
library(pastclim)
full_meta <- pastclim:::dataset_list_included
in_dir <- get_data_path()
in_dir <- "~/project_temp/past_climate/new_meta"
problem_rows <- vector()
for (i in 1:nrow(full_meta)){
pastclim::download_dataset(dataset = as.character(full_meta$dataset[i]),
bio_variables = full_meta$variable[i])
nc_in <- ncdf4::nc_open(file.path(in_dir, full_meta$file_name[i]))
if (!full_meta$ncvar[i] %in% names(nc_in$var)){
ncdf4::nc_close(nc_in)
stop("problem with ",full_meta$ncvar[i]," in ", full_meta$file_name[i])
message("problem with ",full_meta$ncvar[i]," in ", full_meta$file_name[i],"\n")
stop("we had a problem")
problem_rows[i]<-TRUE
} else {
problem_rows[i]<-FALSE
}
ncdf4::nc_close(nc_in)
}

if (any(problem_rows)){
which(problem_rows)
} else {
cat("all files are fine")
}

46 changes: 46 additions & 0 deletions tests/testthat/test_remote_files.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
## This is a resource intensive test. It downloads all files in the dataset_list
## and then validates them. It is only run if the appropriate environment
## variable is set, and thus skipped most of the time
## To set the environment variable, use:
## Sys.setenv(PASTCLIM_TEST = "download_full")
## remember to unset it once you are done
## Sys.unsetenv("PASTCLIM_TEST")


# set up data path for this test
data_path <- file.path(tempdir(),"pastclim_data")
unlink(data_path, recursive = TRUE) # it should not exist, but remove it just in case
# set data path
set_data_path(path_to_nc = data_path,
ask = FALSE,
write_config = FALSE,
copy_example = TRUE)
################################################################################
test_that("download and validate all files", {
skip_if(Sys.getenv("PASTCLIM_TEST")!="download_full")
# download all files for each dataset
all_datasets <- get_available_datasets()
all_datasets <- all_datasets[!all_datasets %in% "Example"]
for (i_dataset in all_datasets){
expect_true(download_dataset(dataset = i_dataset))
}
# now check that the files we downloaded are valid
for (i_file in list.files(get_data_path())){
expect_true(validate_nc(i_file))
}
# check that the variables in the table are found in the respective files
meta_table <- getOption("pastclim.dataset_list")
for (i_row in 1:nrow(meta_table)){
nc_in <- ncdf4::nc_open(file.path(in_dir, meta_table$file_name[i]))
# check below if !! works to unquote the expression
expect_true(!!meta_table$ncvar[i] %in% names(nc_in$var))
ncdf4::nc_close(nc_in)
}
# for each dataset, check that all variables cover the same extent and have
# the same missing values
}
)

################################################################################
# clean up for the next test
unlink(data_path, recursive = TRUE)
Loading