diff --git a/NAMESPACE b/NAMESPACE index 2671ea51..4dca0c1e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -12,6 +12,7 @@ export(getModuleList) export(retrieveConnectionDetails) export(storeConnectionDetails) export(unlockKeyring) +export(verifyModuleInstallation) import(CohortGenerator) import(DatabaseConnector) import(dplyr) diff --git a/R/Execution.R b/R/Execution.R index c4e29680..727f3a4f 100644 --- a/R/Execution.R +++ b/R/Execution.R @@ -61,6 +61,9 @@ execute <- function(analysisSpecifications, ) } modules <- ensureAllModulesInstantiated(analysisSpecifications) + if (isFALSE(modules$allModulesInstalled)) { + stop("Stopping execution due to module issues") + } if (is.null(executionScriptFolder)) { executionScriptFolder <- tempfile("strategusTempSettings") @@ -79,7 +82,7 @@ execute <- function(analysisSpecifications, keyringName = keyringName ) } - dependencies <- extractDependencies(modules) + dependencies <- extractDependencies(modules$modules) fileName <- generateTargetsScript( diff --git a/R/ModuleInstantiation.R b/R/ModuleInstantiation.R index df27152f..9db6724f 100644 --- a/R/ModuleInstantiation.R +++ b/R/ModuleInstantiation.R @@ -29,11 +29,15 @@ #' #' @template AnalysisSpecifications #' +#' @template forceVerification +#' #' @return -#' A tibble listing the instantiated modules. +#' A list containing the install status of all modules +#' (TRUE if all are installed properly) and a tibble listing +#' the instantiated modules. #' #' @export -ensureAllModulesInstantiated <- function(analysisSpecifications) { +ensureAllModulesInstantiated <- function(analysisSpecifications, forceVerification = FALSE) { modules <- getModuleTable(analysisSpecifications, distinct = TRUE) # Verify only one version per module: @@ -73,7 +77,229 @@ ensureAllModulesInstantiated <- function(analysisSpecifications) { stop(message) } - return(modules) + # Verify all modules are properly installed + moduleInstallStatus <- list() + for (i in 1:nrow(modules)) { + status <- verifyModuleInstallation( + module = modules$module[i], + version = modules$version[i], + forceVerification = forceVerification + ) + moduleInstallStatus[[length(moduleInstallStatus) + 1]] <- status + } + attr(modules, 'moduleInstallStatus') <- moduleInstallStatus + + installStatus <- unlist(lapply(moduleInstallStatus, FUN = function(x) { x$moduleInstalled })) + if (!all(installStatus)) { + problemModules <- moduleInstallStatus[!installStatus] + message("There were ", length(problemModules), " issue(s) found with your Strategus modules!") + for (i in seq_along(problemModules)) { + message("Issue #", i, ": Module ", problemModules[[i]]$moduleFolder, " could not install the following R packages:") + print(problemModules[[i]]$issues) + } + message("To fix these issues, open the module project (.Rproj file) at the path specified above and re-run \"renv::restore()\" and correct all issues") + } + + return( + list( + allModulesInstalled = all(installStatus), + modules = modules + ) + ) +} + + +#' Verify a module is properly installed +#' +#' @description +#' In some instances a module may fail to instantiate and install due to problems +#' when calling renv::restore for the module's renv.lock file. This function +#' will allow you to surface inconsistencies between the module renv.lock file +#' and the module's renv project library. This function will check to that a +#' module has been properly installed using internal functions of the `renv` +#' package. If a module is verified to work via this function, the hash of +#' the module's renv.lock file will be written to a text file in the module +#' directory to indicate that it is ready for use. This will allow subsequent +#' calls to work faster since the initial verification process can take some +#' time.It is possible to re-run the verification of a module +#' by using the `forceVerification` parameter. +#' +#' To fix issues with a module, you will need to open the module's .Rproj in +#' RStudio instance and debug the issues when calling renv::restore(). +#' +#' @param module The name of the module to verify (i.e. "CohortGeneratorModule") +#' +#' @param version The version of the module to verify (i.e. "0.2.1") +#' +#' @param silent When TRUE output of this verification process is suppressed +#' +#' @template forceVerification +#' +#' @return +#' A list with the output of the consistency check +#' +#' @export +verifyModuleInstallation <- function(module, version, silent = FALSE, forceVerification = FALSE) { + # Internal helper function + verifyModuleInstallationReturnValue <- function(moduleFolder, moduleInstalled, issues = NULL) { + returnVal <- list( + moduleFolder = moduleFolder, + moduleInstalled = moduleInstalled, + issues = issues + ) + return(returnVal) + } + + moduleFolder <- getModuleFolder(module, version) + if (!dir.exists(moduleFolder)) { + if (!silent) { + warn("Module ", module, ", Version: ", version, " not found at: ", moduleFolder, ". This means the module was never installed.") + } + return( + verifyModuleInstallationReturnValue( + moduleFolder = moduleFolder, + moduleInstalled = FALSE + ) + ) + } + + if (!silent) { + message("Verifying module: ", module, ", (", version, ") at ", moduleFolder, "...", appendLF = F) + } + moduleStatusFileName <- "moduleStatus.txt" + renvLockFileName <- "renv.lock" + + # If the lock file doesn't exist, we're not sure if we're dealing with a module. + if (!file.exists(file.path(moduleFolder, renvLockFileName))) { + if (!silent) { + message("ERROR - renv.lock file missing.") + } + return( + verifyModuleInstallationReturnValue( + moduleFolder = moduleFolder, + moduleInstalled = FALSE + ) + ) + } + + # Check to see if we've already performed the verification by looking at the + # moduleStatus.txt file to see if the md5 in that file matches the one + # created by hashing the renv.lock file + lockfileContents <- ParallelLogger::loadSettingsFromJson( + fileName = file.path(moduleFolder, renvLockFileName) + ) + lockfileHash <- digest::digest( + object = lockfileContents, + algo = "md5" + ) + if (!forceVerification && file.exists(file.path(moduleFolder, moduleStatusFileName))) { + lockfileHashFromModuleStatusFile <- SqlRender::readSql( + sourceFile = file.path(moduleFolder, moduleStatusFileName) + ) + + # If the values match, the module is installed correctly + # return and exit + if (lockfileHashFromModuleStatusFile == lockfileHash) { + if (!silent) { + message("MODULE READY!") + } + return( + verifyModuleInstallationReturnValue( + moduleFolder = moduleFolder, + moduleInstalled = TRUE + ) + ) + } + } + + + # Now perform the consistency check to verify that the renv::restore() + # process executed successfully. We must do this in the module's context + Strategus:::withModuleRenv( + code = { + # Start by turning off verbose output to hide renv output + verboseOption <- getOption("renv.verbose") + options(renv.verbose = FALSE) + on.exit(options(renv.verbose = verboseOption)) + + # Get the renv project status and then identify the packages used + # in the project to determine if there were issues when restoring + # the project from the renv.lock file. + projectStatus <- renv::status() + + # Identify the list of package dependencies by using + # the data returned from renv::status() and + # renv::dependencies for the project. + library <- names(projectStatus$library$Packages) + lockfile <- names(projectStatus$lockfile$Packages) + packages <- sort(union(renv::dependencies(quiet = TRUE)$Package, "renv")) + packages <- sort(unique(c(library, lockfile, packages))) + projectStatus$packages <- packages + saveRDS(object = list( + library = library, + lockfile = lockfile, + packages = packages + ), + file="projectStatus.rds" + ) + }, + moduleFolder = moduleFolder + ) + + # The module's project status is written to the + # file system. Now we can get the module status and use the information + # to determine the restoration status + projectStatus <- readRDS(file.path(moduleFolder, "projectStatus.rds")) + + library <- projectStatus$library + lockfile <- projectStatus$lockfile + packages <- projectStatus$packages + + packageStatus <- data.frame( + package = packages, + installed = packages %in% library, + recorded = packages %in% lockfile, + used = packages %in% packages + ) + + # If all of the used & recorded packages are installed, then + # return TRUE for the module installed status. If not, return + # FALSE and set an attribute of the list that contains the issues + # discovered + ok <- packageStatus$installed & (packageStatus$used == packageStatus$recorded) + issues <- packageStatus[!ok, , drop = FALSE] + missing <- !issues$installed + issues$installed <- ifelse(issues$installed, "y", "n") + issues$recorded <- ifelse(issues$recorded, "y", "n") + issues$used <- ifelse(issues$used, "y", if (any(missing)) "?" else "n") + issues <- issues[issues$installed == "n" & issues$recorded == "y" & issues$used == "y", ] + + moduleInstalled <- nrow(issues) == 0 + + if (isTRUE(moduleInstalled)) { + if (!silent) { + message("MODULE READY!") + } + # Write the contents of the md5 hash of the module's + # renv.lock file to the file system to note that the + # module's install status was successful and verified + SqlRender::writeSql( + sql = lockfileHash, + targetFile = file.path(moduleFolder, "moduleStatus.txt") + ) + } else { + if (!silent) { + message("MODULE HAS ISSUES!") + } + } + + return( + verifyModuleInstallationReturnValue( + moduleFolder = moduleFolder, + moduleInstalled = moduleInstalled, + issues = issues + ) + ) } getModuleTable <- function(analysisSpecifications, distinct = FALSE) { @@ -121,15 +347,14 @@ getModuleMetaData <- function(moduleFolder) { } getModuleFolder <- function(module, version) { + assertModulesFolderSetting(x = Sys.getenv("INSTANTIATED_MODULES_FOLDER")) moduleFolder <- file.path(Sys.getenv("INSTANTIATED_MODULES_FOLDER"), sprintf("%s_%s", module, version)) invisible(moduleFolder) } ensureModuleInstantiated <- function(module, version, remoteRepo, remoteUsername) { + assertModulesFolderSetting(x = Sys.getenv("INSTANTIATED_MODULES_FOLDER")) instantiatedModulesFolder <- Sys.getenv("INSTANTIATED_MODULES_FOLDER") - if (instantiatedModulesFolder == "") { - stop("The INSTANTIATED_MODULES_FOLDER environment variable has not been set.") - } if (!dir.exists(instantiatedModulesFolder)) { dir.create(instantiatedModulesFolder, recursive = TRUE) } diff --git a/R/ResultModelCreation.R b/R/ResultModelCreation.R index 65e9f09c..7c9fdc18 100644 --- a/R/ResultModelCreation.R +++ b/R/ResultModelCreation.R @@ -36,6 +36,9 @@ createResultDataModels <- function(analysisSpecifications, checkmate::reportAssertions(collection = errorMessages) modules <- ensureAllModulesInstantiated(analysisSpecifications) + if (isFALSE(modules$allModulesInstalled)) { + stop("Stopping execution due to module issues") + } if (is.null(executionScriptFolder)) { @@ -141,7 +144,11 @@ runSchemaCreation <- function(analysisSpecifications, keyringSettings, moduleInd version <- moduleSpecification$version remoteRepo <- moduleSpecification$remoteRepo remoteUsername <- moduleSpecification$remoteUsername - moduleFolder <- ensureModuleInstantiated(module, version, remoteRepo, remoteUsername) + moduleInstallation <- verifyModuleInstallation(module, version) + moduleFolder <- moduleInstallation$moduleFolder + if (isFALSE(moduleInstallation$moduleInstalled)) { + stop("Stopping since module is not properly installed!") + } # Create job context moduleExecutionSettings <- executionSettings diff --git a/R/ResultsUpload.R b/R/ResultsUpload.R index 0a3ad60a..dfd2752d 100644 --- a/R/ResultsUpload.R +++ b/R/ResultsUpload.R @@ -24,7 +24,11 @@ runResultsUpload <- function(analysisSpecifications, keyringSettings, moduleInde version <- moduleSpecification$version remoteRepo <- moduleSpecification$remoteRepo remoteUsername <- moduleSpecification$remoteUsername - moduleFolder <- ensureModuleInstantiated(module, version, remoteRepo, remoteUsername) + moduleInstallation <- verifyModuleInstallation(module, version) + moduleFolder <- moduleInstallation$moduleFolder + if (isFALSE(moduleInstallation$moduleInstalled)) { + stop("Stopping since module is not properly installed!") + } # Create job context moduleExecutionSettings <- executionSettings diff --git a/R/RunModule.R b/R/RunModule.R index ebf1b5f0..252d765a 100644 --- a/R/RunModule.R +++ b/R/RunModule.R @@ -28,7 +28,11 @@ runModule <- function(analysisSpecifications, keyringSettings, moduleIndex, exec version <- moduleSpecification$version remoteRepo <- moduleSpecification$remoteRepo remoteUsername <- moduleSpecification$remoteUsername - moduleFolder <- ensureModuleInstantiated(module, version, remoteRepo, remoteUsername) + moduleInstallation <- verifyModuleInstallation(module, version) + moduleFolder <- moduleInstallation$moduleFolder + if (isFALSE(moduleInstallation$moduleInstalled)) { + stop("Stopping since module is not properly installed!") + } # Create job context moduleExecutionSettings <- executionSettings diff --git a/R/Settings.R b/R/Settings.R index 6755e8d3..9fb31f64 100644 --- a/R/Settings.R +++ b/R/Settings.R @@ -349,6 +349,15 @@ unlockKeyring <- function(keyringName) { return(keyringLocked) } +#' @keywords internal +.checkModuleFolderSetting <- function(x) { + if (length(x) == 0 || x == "") { + return(paste0("INSTANTIATED_MODULES_FOLDER environment variable has not been set. INSTANTIATED_MODULES_FOLDER must be set using Sys.setenv(INSTANTIATED_MODULES_FOLDER = \"/somepath\")")) + } else { + return(TRUE) + } +} + #' Used when serializing connection details to retain NULL values #' #' @keywords internal diff --git a/R/Strategus.R b/R/Strategus.R index f40a43f7..e9bde376 100644 --- a/R/Strategus.R +++ b/R/Strategus.R @@ -24,3 +24,4 @@ #' @importFrom methods is NULL +assertModulesFolderSetting <- checkmate::makeAssertionFunction(.checkModuleFolderSetting) diff --git a/inst/testdata/analysisSpecification.json b/inst/testdata/analysisSpecification.json index 4ecb641d..bf417726 100644 --- a/inst/testdata/analysisSpecification.json +++ b/inst/testdata/analysisSpecification.json @@ -223,7 +223,7 @@ "moduleSpecifications": [ { "module": "CohortGeneratorModule", - "version": "0.2.0", + "version": "0.2.1", "remoteRepo": "github.com", "remoteUsername": "ohdsi", "settings": { @@ -292,7 +292,7 @@ }, { "module": "CohortIncidenceModule", - "version": "0.1.0", + "version": "0.3.0", "remoteRepo": "github.com", "remoteUsername": "ohdsi", "settings": { @@ -489,7 +489,7 @@ }, { "module": "CohortMethodModule", - "version": "0.2.0", + "version": "0.2.1", "remoteRepo": "github.com", "remoteUsername": "ohdsi", "settings": { @@ -1341,7 +1341,7 @@ }, { "module": "SelfControlledCaseSeriesModule", - "version": "0.2.0", + "version": "0.3.2", "remoteRepo": "github.com", "remoteUsername": "ohdsi", "settings": { @@ -2360,7 +2360,7 @@ }, { "module": "PatientLevelPredictionModule", - "version": "0.2.0", + "version": "0.2.1", "remoteRepo": "github.com", "remoteUsername": "ohdsi", "settings": [ diff --git a/man-roxygen/forceVerification.R b/man-roxygen/forceVerification.R new file mode 100644 index 00000000..540cf869 --- /dev/null +++ b/man-roxygen/forceVerification.R @@ -0,0 +1,5 @@ +#' @param forceVerification When set to TRUE, the verification process is forced +#' to re-evaluate if a module is properly installed. The default is FALSE +#' since if a module is successfully validated, the module will contain +#' the hash value of the module's renv.lock file in the file system so it can +#' by-pass running this check every time. diff --git a/man/ensureAllModulesInstantiated.Rd b/man/ensureAllModulesInstantiated.Rd index e672a528..7d9e2ff9 100644 --- a/man/ensureAllModulesInstantiated.Rd +++ b/man/ensureAllModulesInstantiated.Rd @@ -4,14 +4,22 @@ \alias{ensureAllModulesInstantiated} \title{Ensure all modules are instantiated} \usage{ -ensureAllModulesInstantiated(analysisSpecifications) +ensureAllModulesInstantiated(analysisSpecifications, forceVerification = FALSE) } \arguments{ \item{analysisSpecifications}{An object of type \code{AnalysisSpecifications} as created by \code{\link[=createEmptyAnalysisSpecificiations]{createEmptyAnalysisSpecificiations()}}.} + +\item{forceVerification}{When set to TRUE, the verification process is forced +to re-evaluate if a module is properly installed. The default is FALSE +since if a module is successfully validated, the module will contain +the hash value of the module's renv.lock file in the file system so it can +by-pass running this check every time.} } \value{ -A tibble listing the instantiated modules. +A list containing the install status of all modules +(TRUE if all are installed properly) and a tibble listing +the instantiated modules. } \description{ Ensure that all modules referenced in the analysis specifications are instantiated diff --git a/man/verifyModuleInstallation.Rd b/man/verifyModuleInstallation.Rd new file mode 100644 index 00000000..d0b96c1b --- /dev/null +++ b/man/verifyModuleInstallation.Rd @@ -0,0 +1,45 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ModuleInstantiation.R +\name{verifyModuleInstallation} +\alias{verifyModuleInstallation} +\title{Verify a module is properly installed} +\usage{ +verifyModuleInstallation( + module, + version, + silent = FALSE, + forceVerification = FALSE +) +} +\arguments{ +\item{module}{The name of the module to verify (i.e. "CohortGeneratorModule")} + +\item{version}{The version of the module to verify (i.e. "0.2.1")} + +\item{silent}{When TRUE output of this verification process is suppressed} + +\item{forceVerification}{When set to TRUE, the verification process is forced +to re-evaluate if a module is properly installed. The default is FALSE +since if a module is successfully validated, the module will contain +the hash value of the module's renv.lock file in the file system so it can +by-pass running this check every time.} +} +\value{ +A list with the output of the consistency check +} +\description{ +In some instances a module may fail to instantiate and install due to problems +when calling renv::restore for the module's renv.lock file. This function +will allow you to surface inconsistencies between the module renv.lock file +and the module's renv project library. This function will check to that a +module has been properly installed using internal functions of the \code{renv} +package. If a module is verified to work via this function, the hash of +the module's renv.lock file will be written to a text file in the module +directory to indicate that it is ready for use. This will allow subsequent +calls to work faster since the initial verification process can take some +time.It is possible to re-run the verification of a module +by using the \code{forceVerification} parameter. + +To fix issues with a module, you will need to open the module's .Rproj in +RStudio instance and debug the issues when calling renv::restore(). +} diff --git a/tests/testthat/setup.R b/tests/testthat/setup.R index 76d629ef..a7ece606 100644 --- a/tests/testthat/setup.R +++ b/tests/testthat/setup.R @@ -32,7 +32,7 @@ if (Sys.getenv("DONT_DOWNLOAD_JDBC_DRIVERS", "") != "TRUE") { tableSuffix <- paste0(substr(.Platform$OS.type, 1, 3), format(Sys.time(), "%y%m%d%H%M%S"), sample(1:100, 1)) tableSuffix <- abs(digest::digest2int(tableSuffix)) -tempDir <- tempfile() # "D:" +tempDir <- tempfile() tempDir <- gsub("\\\\", "/", tempDir) # Correct windows path renvCachePath <- file.path(tempDir, "strategus/renv") moduleFolder <- file.path(tempDir, "strategus/modules")