Skip to content

Commit

Permalink
U
Browse files Browse the repository at this point in the history
  • Loading branch information
gowthamrao committed Dec 6, 2023
1 parent 3f2a1e8 commit cfe0bb1
Show file tree
Hide file tree
Showing 2 changed files with 62 additions and 16 deletions.
67 changes: 51 additions & 16 deletions R/GetConceptRecordCount.R
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,14 @@
#'
#' @param domain domains to look for concept id
#'
#' @param limitToCohort Do you wantt to limit to a cohort_definition_id?
#'
#' @param cohortDatabaseSchema Optional
#'
#' @param cohortTableName Optional
#'
#' @param cohortDefinitionId Optional
#'
#' @return
#' Returns a tibble data frame.
#'
Expand All @@ -46,6 +54,9 @@ getConceptRecordCount <- function(conceptIds = NULL,
vocabularyDatabaseSchema = cdmDatabaseSchema,
tempEmulationSchema = getOption("sqlRenderTempEmulationSchema"),
minCellCount = 0,
cohortDatabaseSchema = NULL,
cohortTableName = NULL,
cohortDefinitionId = NULL,
domainTableName = c(
"drug_exposure",
"condition_occurrence",
Expand Down Expand Up @@ -77,6 +88,15 @@ getConceptRecordCount <- function(conceptIds = NULL,
dplyr::filter(eraTable == FALSE)
# filtering out ERA tables because they are supposed to be derived tables, and counting them is double counting

limitToCohort <- FALSE
if (all(
!is.null(cohortDatabaseSchema),
!is.null(cohortTableName),
!is.null(cohortDefinitionId)
)) {
limitToCohort <- TRUE
}

# REASON for many SQL --DISTINCT subject_count cannot be computed from aggregation query of calendar month level data
sql <- "
Expand All @@ -95,6 +115,7 @@ getConceptRecordCount <- function(conceptIds = NULL,
ON u.concept_id = c.concept_id
;
}
--PERCENTILES are difficult and will need subqueries
SELECT
{@include_concept_id} ? {@domain_concept_id} : {0} concept_id,
Expand All @@ -111,20 +132,20 @@ getConceptRecordCount <- function(conceptIds = NULL,
MIN(@domain_start_date) min_date,
MAX(@domain_start_date) max_date,
COUNT(DISTINCT @domain_start_date) unique_dates,
AVG(DATEDIFF(day, op.observation_period_start_date, @domain_start_date)) prior_obs_avg,
ROUND(AVG(DATEDIFF(day, op.observation_period_start_date, @domain_start_date)), 2) prior_obs_avg,
MIN(DATEDIFF(day, op.observation_period_start_date, @domain_start_date)) prior_obs_min,
MAX(DATEDIFF(day, op.observation_period_start_date, @domain_start_date)) prior_obs_max,
STDDEV(DATEDIFF(day, op.observation_period_start_date, @domain_start_date)) prior_obs_std,
ROUND(STDDEV(DATEDIFF(day, op.observation_period_start_date, @domain_start_date)), 2) prior_obs_std,
SUM(DATEDIFF(day, op.observation_period_start_date, @domain_start_date)) prior_obs_sum,
AVG(DATEDIFF(day, @domain_start_date, op.observation_period_end_date)) post_obs_avg,
ROUND(AVG(DATEDIFF(day, @domain_start_date, op.observation_period_end_date)), 2) post_obs_avg,
MIN(DATEDIFF(day, @domain_start_date, op.observation_period_end_date)) post_obs_min,
MAX(DATEDIFF(day, @domain_start_date, op.observation_period_end_date)) post_obs_max,
STDDEV(DATEDIFF(day, @domain_start_date, op.observation_period_end_date)) post_obs_std,
SUM(DATEDIFF(day, @domain_start_date, op.observation_period_end_date)) sum_obs_median,
AVG(DATEPART(yy, @domain_start_date) - year_of_birth) age_avg,
ROUND(STDDEV(DATEDIFF(day, @domain_start_date, op.observation_period_end_date)), 2) post_obs_std,
SUM(DATEDIFF(day, @domain_start_date, op.observation_period_end_date)) post_obs_sum,
ROUND(AVG(DATEPART(yy, @domain_start_date) - year_of_birth), 2) age_avg,
MIN(DATEPART(yy, @domain_start_date) - year_of_birth) age_min,
MAX(DATEPART(yy, @domain_start_date) - year_of_birth) age_max,
STDDEV(DATEPART(yy, @domain_start_date) - year_of_birth) age_std,
ROUND(STDDEV(DATEPART(yy, @domain_start_date) - year_of_birth), 2) age_std,
SUM(DATEPART(yy, @domain_start_date) - year_of_birth) age_sum
INTO #concept_count_table
FROM @cdm_database_schema.@domain_table dt
Expand All @@ -135,6 +156,15 @@ getConceptRecordCount <- function(conceptIds = NULL,
INNER JOIN @cdm_database_schema.person p
ON dt.person_id = p.person_id
{@limit_to_cohort} ? {
INNER JOIN
(
SELECT subject_id, cohort_start_date, cohort_end_date
FROM @cohort_database_schema.@cohort_table_name
WHERE cohort_definition_id = @cohort_definition_id
)
}
{@incidence} ? {
-- limit to first occurrence of concept id by person_id
INNER JOIN
Expand Down Expand Up @@ -203,7 +233,7 @@ getConceptRecordCount <- function(conceptIds = NULL,
tidyr::crossing(dplyr::tibble(incidence = c("Y", "N")))

existingOutput <- c()

for (i in (1:nrow(iterations))) {
rowData <- iterations[i, ]

Expand Down Expand Up @@ -254,7 +284,11 @@ getConceptRecordCount <- function(conceptIds = NULL,
use_date_month = (rowData$useDateMonth == 'Y'),
domain_table_short = rowData$domainTableShort,
domain_field_short = rowData$domainFieldShort,
calendar_type = rowData$calendarType
calendar_type = rowData$calendarType,
limit_to_cohort = limitToCohort,
cohort_database_schema = cohortDatabaseSchema,
cohort_table_name = cohortTableName,
cohort_definition_id = cohortDefinitionId
)

# Regular expression to find a comma followed by any whitespace (including line breaks) and a semicolon
Expand Down Expand Up @@ -300,15 +334,16 @@ getConceptRecordCount <- function(conceptIds = NULL,

existingOutput <- existingOutput |>
dplyr::inner_join(
domainInformation |>
domainInformation$long |>
dplyr::select(
domainTableShort,
domainFieldShort,
domainTable,
domainField
)
"domainTableShort",
"domainFieldShort",
"domainTable",
"domainField"
),
by = c("domainTableShort", "domainFieldShort")
) |>
dplyr::select(-domainFieldShort,-domainTableShort)
dplyr::select(-"domainFieldShort", -"domainTableShort")

if (!is.null(minCellCount)) {
existingOutput <-
Expand Down
11 changes: 11 additions & 0 deletions man/getConceptRecordCount.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit cfe0bb1

Please sign in to comment.