Release v0.8.1

OHDSI · Oct 11, 2023 · 78757f1 · 78757f1
2 parents 8379cb8 + 2393707
commit 78757f1
Show file tree

Hide file tree

Showing 70 changed files with 323 additions and 113 deletions.
diff --git a/.github/workflows/R_CMD_check_Hades.yaml b/.github/workflows/R_CMD_check_Hades.yaml
@@ -20,42 +20,44 @@ jobs:
       fail-fast: false
       matrix:
         config:
-          - {os: windows-latest, r: 'release'}  # Does not appear to have Java 32-bit, hence the --no-multiarch
-          - {os: macOS-latest, r: 'release'}
-          - {os: ubuntu-20.04, r: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"}
-          #- {os: ubuntu-20.04, r: 'devel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"}
-
+          - {os: windows-latest, r: '4.2.3', rtools: '42', rspm: "https://cloud.r-project.org"}
+          - {os: macOS-latest, r: '4.2.3', rtools: '42', rspm: "https://cloud.r-project.org"}
+          - {os: ubuntu-20.04, r: '4.2.3', rtools: '42', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"}
+          - {os: windows-latest, r: 'release', rtools: '', rspm: "https://cloud.r-project.org"}
+          - {os: macOS-latest, r: 'release', rtools: '', rspm: "https://cloud.r-project.org"}
+          - {os: ubuntu-20.04, r: 'release', rtools: '', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"}
     env:
+      GITHUB_PAT: ${{ secrets.GH_TOKEN }}
       R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
       RSPM: ${{ matrix.config.rspm }}
-      CDM5_ORACLE_CDM_SCHEMA: ${{ secrets.CDM5_ORACLE_CDM_SCHEMA }}
+      CDM5_ORACLE_CDM_SCHEMA: ${{ secrets.CDM5_ORACLE_CDM54_SCHEMA }}
       CDM5_ORACLE_OHDSI_SCHEMA: ${{ secrets.CDM5_ORACLE_OHDSI_SCHEMA }}
       CDM5_ORACLE_PASSWORD: ${{ secrets.CDM5_ORACLE_PASSWORD }}
       CDM5_ORACLE_SERVER: ${{ secrets.CDM5_ORACLE_SERVER }}
       CDM5_ORACLE_USER: ${{ secrets.CDM5_ORACLE_USER }}
-      CDM5_POSTGRESQL_CDM_SCHEMA: ${{ secrets.CDM5_POSTGRESQL_CDM_SCHEMA }}
+      CDM5_POSTGRESQL_CDM_SCHEMA: ${{ secrets.CDM5_POSTGRESQL_CDM54_SCHEMA }}
       CDM5_POSTGRESQL_OHDSI_SCHEMA: ${{ secrets.CDM5_POSTGRESQL_OHDSI_SCHEMA }}
       CDM5_POSTGRESQL_PASSWORD: ${{ secrets.CDM5_POSTGRESQL_PASSWORD }}
       CDM5_POSTGRESQL_SERVER: ${{ secrets.CDM5_POSTGRESQL_SERVER }}
       CDM5_POSTGRESQL_USER: ${{ secrets.CDM5_POSTGRESQL_USER }}
-      CDM5_SQL_SERVER_CDM_SCHEMA: ${{ secrets.CDM5_SQL_SERVER_CDM_SCHEMA }}
+      CDM5_SQL_SERVER_CDM_SCHEMA: ${{ secrets.CDM5_SQL_SERVER_CDM54_SCHEMA }}
       CDM5_SQL_SERVER_OHDSI_SCHEMA: ${{ secrets.CDM5_SQL_SERVER_OHDSI_SCHEMA }}
       CDM5_SQL_SERVER_PASSWORD: ${{ secrets.CDM5_SQL_SERVER_PASSWORD }}
       CDM5_SQL_SERVER_SERVER: ${{ secrets.CDM5_SQL_SERVER_SERVER }}
       CDM5_SQL_SERVER_USER: ${{ secrets.CDM5_SQL_SERVER_USER }}
-#      CDM5_REDSHIFT_CDM_SCHEMA: ${{ secrets.CDM5_REDSHIFT_CDM_SCHEMA }}
-#      CDM5_REDSHIFT_OHDSI_SCHEMA: ${{ secrets.CDM5_REDSHIFT_OHDSI_SCHEMA }}
-#      CDM5_REDSHIFT_PASSWORD: ${{ secrets.CDM5_REDSHIFT_PASSWORD }}
-#      CDM5_REDSHIFT_SERVER: ${{ secrets.CDM5_REDSHIFT_SERVER }}
-#      CDM5_REDSHIFT_USER: ${{ secrets.CDM5_REDSHIFT_USER }}
-      GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
-
+      CDM5_REDSHIFT_CDM_SCHEMA: ${{ secrets.CDM5_REDSHIFT_CDM54_SCHEMA }}
+      CDM5_REDSHIFT_OHDSI_SCHEMA: ${{ secrets.CDM5_REDSHIFT_OHDSI_SCHEMA }}
+      CDM5_REDSHIFT_PASSWORD: ${{ secrets.CDM5_REDSHIFT_PASSWORD }}
+      CDM5_REDSHIFT_SERVER: ${{ secrets.CDM5_REDSHIFT_SERVER }}
+      CDM5_REDSHIFT_USER: ${{ secrets.CDM5_REDSHIFT_USER }}
+
     steps:
       - uses: actions/checkout@v2
 
       - uses: r-lib/actions/setup-r@v2
         with:
           r-version: ${{ matrix.config.r }}
+          rtools-version: ${{ matrix.config.rtools }}
 
       - uses: r-lib/actions/setup-tinytex@v2
 
@@ -79,6 +81,11 @@ jobs:
       - name: Install system dependencies
         if: runner.os == 'Linux'
         run: |
+          sudo apt-get install -y make
+          sudo apt-get install -y default-jdk
+          sudo apt-get install -y libcurl4-openssl-dev
+          sudo apt-get install -y libssl-dev
+          sudo apt-get install -y libglpk-dev        
           while read -r cmd
           do
             eval sudo $cmd

diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,8 +1,8 @@
 Package: CohortGenerator
 Type: Package
 Title: An R Package for Cohort Generation Against the OMOP CDM
-Version: 0.8.0
-Date: 2023-03-10
+Version: 0.8.1
+Date: 2023-10-10
 Authors@R: c(
   person("Anthony", "Sena", email = "[email protected]", role = c("aut", "cre")),
   person("Jamie", "Gilbert", role = c("aut")),

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,14 @@
+CohortGenerator 0.8.1
+=======================
+- Include cohorts with 0 people in cohort counts (Issue #91).
+- Use numeric for cohort ID (Issue #98)
+- Allow big ints for target pairs (#103)
+- Pass `tempEmulationSchema` when creating negative controlc ohorts (#104)
+- Target CDM v5.4 for unit tests (#119)
+- Fix for subset references (#115)
+- Allow for subset cohort name templating (#118)
+- Allow all entries with limit operator and do not require > 0 days follow up (#112)
+
 CohortGenerator 0.8.0
 =======================
 - New feature: cohort subsetting (Issue #67).

diff --git a/R/CohortCount.R b/R/CohortCount.R
@@ -69,9 +69,14 @@ getCohortCounts <- function(connectionDetails = NULL,
     if (!is.null(cohortDefinitionSet)) {
       counts <- merge(
         x = counts,
-        y = cohortDefinitionSet,
+        y = cohortDefinitionSet[cohortDefinitionSet$cohortId %in% cohortIds, ],
         by = "cohortId",
-        all.x = TRUE
+        all.y = TRUE
+      )
+      counts <- transform(
+        counts,
+        cohortEntries = ifelse(is.na(cohortEntries), 0L, cohortEntries),
+        cohortSubjects = ifelse(is.na(cohortSubjects), 0L, cohortSubjects)
       )
     }
     return(counts)

diff --git a/R/Incremental.R b/R/Incremental.R
@@ -187,7 +187,7 @@ saveIncremental <- function(data, fileName, ...) {
     if (length(idx) > 0) {
       previousData <- previousData[-idx, ]
     }
-    data <- dplyr::bind_rows(previousData, data)
+    data <- rbind(previousData, data)
   }
   .writeCsv(x = data, file = fileName)
 }

diff --git a/R/NegativeControlCohorts.R b/R/NegativeControlCohorts.R
@@ -135,6 +135,7 @@ generateNegativeControlOutcomeCohorts <- function(connectionDetails = NULL,
   DatabaseConnector::insertTable(
     connection = connection,
     data = negativeControlOutcomeCohortSet,
+    tempEmulationSchema = tempEmulationSchema,
     tableName = "#nc_set",
     camelCaseToSnakeCase = TRUE,
     dropTableIfExists = TRUE,

diff --git a/R/SubsetDefinitions.R b/R/SubsetDefinitions.R
@@ -14,6 +14,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+.defaultNameTemplate <- "@baseCohortName - @subsetDefinitionName @operatorNames"
+
 # CohortSubsetDefinition ------------------------------
 #' @title Cohort Subset Definition
 #' @export
@@ -23,6 +25,8 @@ CohortSubsetDefinition <- R6::R6Class(
   classname = "CohortSubsetDefinition",
   private = list(
     .name = "",
+    .operatorNameConcatString = "",
+    .subsetCohortNameTemplate = "",
     .definitionId = integer(0),
     .subsetOperators = list(),
     .targetOutputPairs = list(),
@@ -45,6 +49,8 @@ CohortSubsetDefinition <- R6::R6Class(
         self$definitionId <- definition$definitionId
         self$targetOutputPairs <- definition$targetOutputPairs
         self$subsetOperators <- lapply(definition$subsetOperators, private$createSubset)
+        self$operatorNameConcatString <- definition$operatorNameConcatString
+        self$subsetCohortNameTemplate <- definition$subsetCohortNameTemplate
       }
       self
     },
@@ -59,7 +65,9 @@ CohortSubsetDefinition <- R6::R6Class(
           operator$toList()
         }),
         packageVersion = jsonlite::unbox(as.character(utils::packageVersion(utils::packageName()))),
-        identifierExpression = jsonlite::unbox(as.character(private$.identifierExpression))
+        identifierExpression = jsonlite::unbox(as.character(private$.identifierExpression)),
+        operatorNameConcatString = jsonlite::unbox(as.character(private$.operatorNameConcatString)),
+        subsetCohortNameTemplate = jsonlite::unbox(as.character(private$.subsetCohortNameTemplate))
       )
     },
     #' to JSON
@@ -75,7 +83,7 @@ CohortSubsetDefinition <- R6::R6Class(
     #' @param overwrite if a subset operator of the same ID is present, replace it with a new definition
     addSubsetOperator = function(subsetOperator) {
       checkmate::assertR6(subsetOperator, "SubsetOperator")
-      private$.subsetOperators <- c(private$.subsetOperators, subsetOperator)
+      private$.subsetOperators <- c(private$.subsetOperators, subsetOperator$clone(deep = TRUE))
       self
     },
 
@@ -135,15 +143,20 @@ CohortSubsetDefinition <- R6::R6Class(
         dplyr::select("cohortName") %>%
         dplyr::pull()
 
-      opNames <- lapply(self$subsetOperators, function(x) {
-        x$name
-      })
-      paste(baseName, "-", self$name, paste0(opNames, collapse = ", "))
+      opNameList <- lapply(self$subsetOperators, function(x) x$name)
+      opNames <- paste0(opNameList, collapse = self$operatorNameConcatString)
+
+      SqlRender::render(self$subsetCohortNameTemplate,
+        baseCohortName = baseName,
+        subsetDefinitionName = self$name,
+        operatorNames = opNames,
+        warnOnMissingParameters = FALSE
+      )
     },
     #' Set the targetOutputPairs to be added to a cohort definition set
     #' @param targetIds   list of cohort ids to apply subsetting operations to
     setTargetOutputPairs = function(targetIds) {
-      checkmate::assertIntegerish(targetIds, min.len = 1)
+      checkmate::assertIntegerish(targetIds, min.len = 1, upper = 10e11)
       definitionId <- self$definitionId
       targetOutputPairs <- list()
 
@@ -177,7 +190,7 @@ CohortSubsetDefinition <- R6::R6Class(
         targetOutputPairs,
         function(targetOutputPair) {
           targetOutputPair <- as.numeric(targetOutputPair)
-          checkmate::assertIntegerish(targetOutputPair, len = 2)
+          checkmate::assertIntegerish(targetOutputPair, len = 2, upper = 10e11)
           checkmate::assertFALSE(targetOutputPair[[1]] == targetOutputPair[[2]])
           targetOutputPair
         }
@@ -189,7 +202,10 @@ CohortSubsetDefinition <- R6::R6Class(
     #' @field subsetOperators list of subset operations
     subsetOperators = function(subsetOperators) {
       if (missing(subsetOperators)) {
-        return(private$.subsetOperators)
+        # We don't want to return references to the operators in case users modify them after this
+        return(lapply(private$.subsetOperators, function(x) {
+          x$clone(deep = TRUE)
+        }))
       }
 
       checkmate::assertList(subsetOperators, types = "SubsetOperator")
@@ -206,6 +222,46 @@ CohortSubsetDefinition <- R6::R6Class(
       private$.name <- name
       self
     },
+    #' @field subsetCohortNameTemplate template string for formatting resulting cohort names
+    subsetCohortNameTemplate = function(subsetCohortNameTemplate) {
+      if (missing(subsetCohortNameTemplate)) {
+        return(private$.subsetCohortNameTemplate)
+      }
+
+      if (is.null(subsetCohortNameTemplate)) {
+        subsetCohortNameTemplate <- ""
+      }
+
+      checkmate::assertCharacter(subsetCohortNameTemplate)
+
+      if (subsetCohortNameTemplate == "") {
+        # Set to default subsetCohortNameTemplate
+        subsetCohortNameTemplate <- .defaultNameTemplate
+      }
+
+      private$.subsetCohortNameTemplate <- subsetCohortNameTemplate
+      self
+    },
+
+    #' @field operatorNameConcatString string used when concatenating operator names together
+    operatorNameConcatString = function(operatorNameConcatString) {
+      if (missing(operatorNameConcatString)) {
+        return(private$.operatorNameConcatString)
+      }
+
+      if (is.null(operatorNameConcatString)) {
+        operatorNameConcatString <- ""
+      }
+
+      if (operatorNameConcatString == "") {
+        operatorNameConcatString <- ", "
+      }
+      checkmate::assertCharacter(operatorNameConcatString)
+
+      private$.operatorNameConcatString <- operatorNameConcatString
+      self
+    },
+
     #' @field definitionId numeric definition id
     definitionId = function(definitionId) {
       if (missing(definitionId)) {
@@ -243,17 +299,29 @@ CohortSubsetDefinition <- R6::R6Class(
 #' @description
 #' Create subset definition from subset objects
 #' @export
-#' @param name                     Name of definition
-#' @param definitionId             Definition identifier
-#' @param subsetOperators          list of subsetOperator instances to apply
-#' @param identifierExpression     Expression (or string that converts to expression) that returns an id for an output cohort
-#'                                 the default is dplyr::expr(targetId * 1000 + definitionId)
-createCohortSubsetDefinition <- function(name, definitionId, subsetOperators, identifierExpression = NULL) {
+#' @param name                      Name of definition
+#' @param definitionId              Definition identifier
+#' @param subsetOperators           list of subsetOperator instances to apply
+#' @param identifierExpression      Expression (or string that converts to expression) that returns an id for an output cohort
+#'                                  the default is dplyr::expr(targetId * 1000 + definitionId)
+#' @param subsetCohortNameTemplate  (optional) SqlRender string template for formatting names of resulting subset cohorts
+#'                                  Can use the variables @baseCohortName, @subsetDefinitionName and @operatorNames.
+#'                                  This is applied when adding the subset definition to a cohort definition set.
+#' @param operatorNameConcatString  (optional) String to concatenate operator names together when outputting resulting cohort
+#'                                   name
+createCohortSubsetDefinition <- function(name,
+                                         definitionId,
+                                         subsetOperators,
+                                         identifierExpression = NULL,
+                                         operatorNameConcatString = "",
+                                         subsetCohortNameTemplate = "") {
   subsetDef <- CohortSubsetDefinition$new()
   subsetDef$name <- name
   subsetDef$definitionId <- definitionId
   subsetDef$subsetOperators <- subsetOperators
   subsetDef$identifierExpression <- identifierExpression
+  subsetDef$operatorNameConcatString <- operatorNameConcatString
+  subsetDef$subsetCohortNameTemplate <- subsetCohortNameTemplate
   return(subsetDef)
 }
 

diff --git a/R/SubsetQueryBuilders.R b/R/SubsetQueryBuilders.R
@@ -96,6 +96,7 @@ LimitSubsetQb <- R6::R6Class(
         follow_up_time = private$operator$followUpTime,
         limit_to = private$operator$limitTo,
         prior_time = private$operator$priorTime,
+        use_prior_fu_time = private$operator$followUpTime > 0 || private$operator$priorTime > 0,
         output_table = self$getTableObjectId(),
         target_table = targetTable,
         warnOnMissingParameters = TRUE

diff --git a/R/Subsets.R b/R/Subsets.R
@@ -703,7 +703,7 @@ LimitSubsetOperator <- R6::R6Class(
     getAutoGeneratedName = function() {
       nameString <- ""
 
-      if (self$limitTo != "") {
+      if (self$limitTo != "all") {
         nameString <- paste0(nameString, tolower(SqlRender::camelCaseToTitleCase(self$limitTo)), " occurence")
       } else {
         nameString <- paste0(nameString, "occurs")
@@ -781,8 +781,15 @@ LimitSubsetOperator <- R6::R6Class(
       if (missing(limitTo)) {
         return(private$.limitTo)
       }
+
       checkmate::assertCharacter(limitTo)
-      checkmate::assertChoice(limitTo, choices = c("", "firstEver", "earliestRemaining", "latestRemaining", "lastEver"))
+
+      # maintain support for old versions
+      if (limitTo == "") {
+        limitTo <- "all"
+      }
+
+      checkmate::assertChoice(limitTo, choices = c("all", "firstEver", "earliestRemaining", "latestRemaining", "lastEver"))
       private$.limitTo <- limitTo
       self
     },
@@ -846,15 +853,15 @@ LimitSubsetOperator <- R6::R6Class(
 createLimitSubset <- function(name = NULL,
                               priorTime = 0,
                               followUpTime = 0,
-                              limitTo = "",
+                              limitTo = "all",
                               calendarStartDate = NULL,
                               calendarEndDate = NULL) {
-  if (priorTime == 0 & followUpTime == 0 & limitTo == "" & is.null(calendarStartDate) & is.null(calendarEndDate)) {
-    stop("No limit criteria specified")
+  if (limitTo == "" || is.null(limitTo)) {
+    limitTo <- "all"
   }
 
-  if ((is.null(limitTo) | limitTo == "") & (priorTime > 0 | followUpTime > 0)) {
-    stop("If specifying observation prior time, must specifcy follow up time")
+  if (priorTime == 0 & followUpTime == 0 & limitTo == "all" & is.null(calendarStartDate) & is.null(calendarEndDate)) {
+    stop("No limit criteria specified")
   }
 
   subset <- LimitSubsetOperator$new()

diff --git a/docs/404.html b/docs/404.html
diff --git a/docs/articles/CreatingCohortSubsetDefinitions.html b/docs/articles/CreatingCohortSubsetDefinitions.html