diff --git a/R/download_worldclim_future.R b/R/download_worldclim_future.R index 35fa4bda..e068bb5d 100644 --- a/R/download_worldclim_future.R +++ b/R/download_worldclim_future.R @@ -16,9 +16,7 @@ download_worldclim_future <- function(dataset, bio_var, filename){ # get resolution from the dataset name and convert it to the original res_conversion <- data.frame(our_res = c("10m","5m","2.5m", "0.5m"), wc_res = c("10m","5m", "2.5m", "30s")) - wc_res <- res_conversion$wc_res[res_conversion$our_res==substr(dataset, - start = regexpr("_\\d+\\.?\\d+m",dataset)+1, - stop=nchar(dataset))] + wc_res <- res_conversion$wc_res[res_conversion$our_res==tail(strsplit(dataset,"_")[[1]],1)] gcm <- c("ACCESS-CM2", "BCC-CSM2-MR", "CMCC-ESM2", "EC-Earth3-Veg", "FIO-ESM-2-0", "GFDL-ESM4", "GISS-E2-1-G", "HadGEM3-GC31-LL", "INM-CM5-0", "IPSL-CM6A-LR", "MIROC6", "MPI-ESM1-2-HR", "MRI-ESM2-0", "UKESM1-0-LL") @@ -69,16 +67,22 @@ download_worldclim_future <- function(dataset, bio_var, filename){ # and finally we save it as a netcdf file time_bp(wc_list[[i_step]]) <- rep(dates_df$time_bp[dates_df$orig == i_step],nlyr(wc_list[[i_step]])) } - message("assembling all the data into a netcdf file for use with pastclim; this operation will take a couple of minutes...\n") + message("assembling all the data into a netcdf file for use with pastclim; this operation will take a few minutes...\n") var_names <- names(wc_list[[1]]) sds_list <- list() - for (i_var in var_names){ - sds_list[[i_var]]<-terra::rast(lapply(wc_list, terra::subset,subset=i_var)) - names(sds_list[[i_var]])<-rep(i_var,nlyr((sds_list[[i_var]]))) + for (i in 1:length(var_names)){ + i_var <- var_names[i] + if (!any(postfix %in% c("bioc","elev"))){ + new_var_name <-paste0(var_prefix,sprintf("%02d",i)) + } else { + new_var_name <- i_var + } + sds_list[[new_var_name]]<-terra::rast(lapply(wc_list, terra::subset,subset=i_var)) + names(sds_list[[new_var_name]])<-rep(new_var_name,nlyr((sds_list[[new_var_name]]))) } wc_sds <- terra::sds(sds_list) - + terra::writeCDF(wc_sds,filename=filename, compression=9, overwrite=TRUE) # fix time axis (this is a workaround if we open the file with sf) diff --git a/R/download_worldclim_present.R b/R/download_worldclim_present.R index a894459e..a394dcd2 100644 --- a/R/download_worldclim_present.R +++ b/R/download_worldclim_present.R @@ -53,7 +53,7 @@ download_worldclim_present <- function(dataset, bio_var, filename){ # unzip it to a temporary directory destpath <- file.path(tempdir(),"to_unzip") utils::unzip(destfile,exdir=destpath) - wc_rast <- terra::rast(dir(destpath, full.names = TRUE)) + wc_rast <- terra::rast(dir(destpath, pattern=".tif", full.names = TRUE)) # sort out variable names if (!(grepl("altitude",bio_var))){ # digits at the end of the name are the key identifier of each variable diff --git a/data-raw/helper_functions/verify_files_by_dataset.R b/data-raw/helper_functions/verify_files_by_dataset.R index cbf753b1..b0091cda 100644 --- a/data-raw/helper_functions/verify_files_by_dataset.R +++ b/data-raw/helper_functions/verify_files_by_dataset.R @@ -1,13 +1,26 @@ # verify that all the variables in the tables are actually found in the files # this requires all data to have been downloaded -full_meta <- read.csv("./inst/rawdata_scripts/data_files/variable_table.csv") +library(pastclim) +full_meta <- pastclim:::dataset_list_included in_dir <- get_data_path() -in_dir <- "~/project_temp/past_climate/new_meta" +problem_rows <- vector() for (i in 1:nrow(full_meta)){ + pastclim::download_dataset(dataset = as.character(full_meta$dataset[i]), + bio_variables = full_meta$variable[i]) nc_in <- ncdf4::nc_open(file.path(in_dir, full_meta$file_name[i])) if (!full_meta$ncvar[i] %in% names(nc_in$var)){ - ncdf4::nc_close(nc_in) - stop("problem with ",full_meta$ncvar[i]," in ", full_meta$file_name[i]) + message("problem with ",full_meta$ncvar[i]," in ", full_meta$file_name[i],"\n") + stop("we had a problem") + problem_rows[i]<-TRUE + } else { + problem_rows[i]<-FALSE } ncdf4::nc_close(nc_in) } + +if (any(problem_rows)){ + which(problem_rows) +} else { + cat("all files are fine") +} + diff --git a/tests/testthat/test_remote_files.R b/tests/testthat/test_remote_files.R new file mode 100644 index 00000000..b380fe28 --- /dev/null +++ b/tests/testthat/test_remote_files.R @@ -0,0 +1,46 @@ +## This is a resource intensive test. It downloads all files in the dataset_list +## and then validates them. It is only run if the appropriate environment +## variable is set, and thus skipped most of the time +## To set the environment variable, use: +## Sys.setenv(PASTCLIM_TEST = "download_full") +## remember to unset it once you are done +## Sys.unsetenv("PASTCLIM_TEST") + + +# set up data path for this test +data_path <- file.path(tempdir(),"pastclim_data") +unlink(data_path, recursive = TRUE) # it should not exist, but remove it just in case +# set data path +set_data_path(path_to_nc = data_path, + ask = FALSE, + write_config = FALSE, + copy_example = TRUE) +################################################################################ +test_that("download and validate all files", { + skip_if(Sys.getenv("PASTCLIM_TEST")!="download_full") + # download all files for each dataset + all_datasets <- get_available_datasets() + all_datasets <- all_datasets[!all_datasets %in% "Example"] + for (i_dataset in all_datasets){ + expect_true(download_dataset(dataset = i_dataset)) + } + # now check that the files we downloaded are valid + for (i_file in list.files(get_data_path())){ + expect_true(validate_nc(i_file)) + } + # check that the variables in the table are found in the respective files + meta_table <- getOption("pastclim.dataset_list") + for (i_row in 1:nrow(meta_table)){ + nc_in <- ncdf4::nc_open(file.path(in_dir, meta_table$file_name[i])) + # check below if !! works to unquote the expression + expect_true(!!meta_table$ncvar[i] %in% names(nc_in$var)) + ncdf4::nc_close(nc_in) + } + # for each dataset, check that all variables cover the same extent and have + # the same missing values +} +) + +################################################################################ +# clean up for the next test +unlink(data_path, recursive = TRUE)