From 0d7e464f2db1b43e2f5da26c481bb0cb6ebd8b42 Mon Sep 17 00:00:00 2001 From: prockenschaub Date: Wed, 26 Apr 2023 07:50:18 +0200 Subject: [PATCH 01/48] define SIC data structure --- inst/extdata/config/data-sources.json | 392 ++++++++++++++++++++++++++ 1 file changed, 392 insertions(+) diff --git a/inst/extdata/config/data-sources.json b/inst/extdata/config/data-sources.json index a7ceea6e..12e66608 100644 --- a/inst/extdata/config/data-sources.json +++ b/inst/extdata/config/data-sources.json @@ -9385,5 +9385,397 @@ } } } + }, + { + "name": "sic", + "url": "https://physionet.org/content/sicdb/1.0.5/", + "id_cfg": { + "patient": { + "id": "patientid", + "position": 1, + "start": "firstadmission", + "end": "offsetofdeath", + "table": "cases" + }, + "icustay": { + "id": "caseid", + "position": 2, + "start": "offsetafterfirstadmission", + "end": "timeofstay", + "table": "cases" + } + }, + "tables": { + "cases": { + "files": "cases.csv.gz", + "defaults": { + "index_var": "offsetafterfirstadmission", + "time_vars": ["offsetafterfirstadmission"] + }, + "cols": { + "caseid": { + "name": "CaseID", + "spec": "col_integer" + }, + "patientid": { + "name": "PatientID", + "spec": "col_integer" + }, + "admissionyear": { + "name": "AdmissionYear", + "spec": "col_integer" + }, + "timeofstay": { + "name": "TimeOfStay", + "spec": "col_integer" + }, + "saps3": { + "name": "saps3", + "spec": "col_double" + }, + "hospitaldischargetype": { + "name": "HospitalDischargeType", + "spec": "col_integer" + }, + "dischargestate": { + "name": "DischargeState", + "spec": "col_integer" + }, + "dischargeunit": { + "name": "DischargeUnit", + "spec": "col_integer" + }, + "offsetofdeath": { + "name": "OffsetOfDeath", + "spec": "col_integer" + }, + "estimatedsurvivalobservationtime": { + "name": "EstimatedSurvivalObservationTime", + "spec": "col_integer" + }, + "sex": { + "name": "Sex", + "spec": "col_integer" + }, + "weightonadmission": { + "name": "WeightOnAdmission", + "spec": "col_double" + }, + "heightonadmission": { + "name": "HeightOnAdmission", + "spec": "col_double" + }, + "ageonadmission": { + "name": "AgeOnAdmission", + "spec": "col_integer" + }, + "hospitalunit": { + "name": "HospitalUnit", + "spec": "col_integer" + }, + "referringunit": { + "name": "ReferringUnit", + "spec": "col_integer" + }, + "icd10main": { + "name": "ICD10Main", + "spec": "col_character" + }, + "icd10maintext": { + "name": "ICD10MainText", + "spec": "col_character" + }, + "diagnosist2": { + "name": "DiagnosisT2", + "spec": "col_character" + }, + "surgicalsite": { + "name": "SurgicalSite", + "spec": "col_integer" + }, + "hoursofcrrt": { + "name": "HoursOfCRRT", + "spec": "col_integer" + }, + "admissionformhassepsis": { + "name": "AdmissionFormHasSepsis", + "spec": "col_integer" + }, + "orbisdataavailable": { + "name": "OrbisDataAvailable", + "spec": "col_character" + }, + "heartsurgeryadditionaldata": { + "name": "HeartSurgeryAdditionalData", + "spec": "col_integer" + }, + "heartsurgerycrossclamptime": { + "name": "HeartSurgeryCrossClampTime", + "spec": "col_integer" + }, + "heartsurgerybeginoffset": { + "name": "HeartSurgeryBeginOffset", + "spec": "col_integer" + }, + "heartsurgeryendoffset": { + "name": "HeartSurgeryEndOffset", + "spec": "col_integer" + }, + "offsetafterfirstadmission": { + "name": "OffsetAfterFirstAdmission", + "spec": "col_integer" + } + } + }, + "d_references": { + "files": "d_references.csv.gz", + "cols": { + "referenceglobalid": { + "name": "ReferenceGlobalID", + "spec": "col_integer" + }, + "referencevalue": { + "name": "ReferenceValue", + "spec": "col_character" + }, + "referencename": { + "name": "ReferenceName", + "spec": "col_character" + }, + "referencedescription": { + "name": "ReferenceDescription", + "spec": "col_character" + }, + "referenceunit": { + "name": "ReferenceUnit", + "spec": "col_character" + }, + "referenceorder": { + "name": "ReferenceOrder", + "spec": "col_integer" + }, + "referencetype": { + "name": "ReferenceType", + "spec": "col_integer" + }, + "data": { + "name": "Data", + "spec": "col_character" + } + } + }, + "data_float_h": { + "files": "data_float_h.csv.gz", + "defaults": { + "index_var": "offset", + "time_vars": ["offset"] + }, + "cols": { + "caseid": { + "name": "CaseID", + "spec": "col_integer" + }, + "dataid": { + "name": "DataID", + "spec": "col_integer" + }, + "offset": { + "name": "Offset", + "spec": "col_integer" + }, + "val": { + "name": "Val", + "spec": "col_double" + }, + "cnt": { + "name": "cnt", + "spec": "col_integer" + }, + "rawdata": { + "name": "rawdata", + "spec": "col_character" + } + }, + "partitioning": { + "col": "dataid", + "breaks": [702, 703, 705, 708, 709, 710, 715, 717, 719, 724, 725, + 731, 773, 2018, 2274, 2278, 2280, 2283, 2290, 3056, 3059, 3071] + } + }, + "data_ref": { + "files": "data_ref.csv.gz", + "defaults": { + "index_var": "offsetafterfirstadmission", + "time_vars": ["offsetafterfirstadmission"] + }, + "cols": { + "id": { + "name": "id", + "spec": "col_integer" + }, + "caseid": { + "name": "CaseID", + "spec": "col_integer" + }, + "refid": { + "name": "RefID", + "spec": "col_integer" + }, + "customfieldid": { + "name": "CustomFieldID", + "spec": "col_integer" + } + } + }, + "laboratory": { + "files": "laboratory.csv.gz", + "defaults": { + "index_var": "offset", + "val_var": "laboratoryvalue", + "time_vars": ["offset"] + }, + "cols": { + "id": { + "name": "id", + "spec": "col_integer" + }, + "caseid": { + "name": "CaseID", + "spec": "col_integer" + }, + "laboratoryid": { + "name": "LaboratoryID", + "spec": "col_integer" + }, + "offset": { + "name": "Offset", + "spec": "col_integer" + }, + "laboratoryvalue": { + "name": "LaboratoryValue", + "spec": "col_double" + }, + "laboratorytype": { + "name": "LaboratoryType", + "spec": "col_integer" + } + } + }, + "medication": { + "files": "medication.csv.gz", + "defaults": { + "index_var": "offset", + "time_vars": ["offset", "offsetdrugend"] + }, + "cols": { + "id": { + "name": "id", + "spec": "col_integer" + }, + "caseid": { + "name": "CaseID", + "spec": "col_integer" + }, + "patientid": { + "name": "PatientID", + "spec": "col_integer" + }, + "drugid": { + "name": "DrugID", + "spec": "col_integer" + }, + "offset": { + "name": "Offset", + "spec": "col_integer" + }, + "offsetdrugend": { + "name": "OffsetDrugEnd", + "spec": "col_integer" + }, + "issingledose": { + "name": "IsSingleDose", + "spec": "col_logical" + }, + "amount": { + "name": "Amount", + "spec": "col_double" + }, + "amountperminute": { + "name": "AmountPerMinute", + "spec": "col_double" + }, + "givenstate": { + "name": "GivenState", + "spec": "col_integer" + } + } + }, + "data_range": { + "files": "data_range.csv.gz", + "defaults": { + "index_var": "offset", + "time_vars": ["offset", "offsetend"] + }, + "cols": { + "id": { + "name": "id", + "spec": "col_integer" + }, + "caseid": { + "name": "CaseID", + "spec": "col_integer" + }, + "dataid": { + "name": "DataID", + "spec": "col_integer" + }, + "offset": { + "name": "Offset", + "spec": "col_integer" + }, + "offsetend": { + "name": "OffsetEnd", + "spec": "col_integer" + }, + "data": { + "name": "Data", + "spec": "col_character" + } + } + }, + "unitlog": { + "files": "unitlog.csv.gz", + "defaults": { + "index_var": "offset", + "time_vars": ["offset", "offsetend"] + }, + "cols": { + "id": { + "name": "id", + "spec": "col_integer" + }, + "caseid": { + "name": "CaseID", + "spec": "col_integer" + }, + "patientid": { + "name": "PatientID", + "spec": "col_integer" + }, + "logstate": { + "name": "LogState", + "spec": "col_integer" + }, + "offset": { + "name": "Offset", + "spec": "col_integer" + }, + "hospitalunit": { + "name": "HospitalUnit", + "spec": "col_integer" + } + } + } + } } ] From 9ffcb4d3ffcc75b68445556b7d2bab10ca9d8cd2 Mon Sep 17 00:00:00 2001 From: prockenschaub Date: Wed, 26 Apr 2023 07:50:38 +0200 Subject: [PATCH 02/48] add SIC to auto attach --- R/utils-file.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/utils-file.R b/R/utils-file.R index a970f3f5..3f961d62 100644 --- a/R/utils-file.R +++ b/R/utils-file.R @@ -194,7 +194,7 @@ auto_attach_srcs <- function() { res <- sys_env("RICU_SRC_LOAD", unset = NA_character_) if (is.na(res)) { - c("mimic", "mimic_demo", "eicu", "eicu_demo", "hirid", "aumc", "miiv") + c("mimic", "mimic_demo", "eicu", "eicu_demo", "hirid", "aumc", "miiv", "sic") } else { strsplit(res, ",")[[1L]] } From 2884e3f0e55b95fd5af6d7f39564e18a05a96f78 Mon Sep 17 00:00:00 2001 From: prockenschaub Date: Wed, 26 Apr 2023 07:50:55 +0200 Subject: [PATCH 03/48] add SIC loading helpers --- R/data-load.R | 11 +++++++++++ R/data-utils.R | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++ R/utils-misc.R | 2 ++ 3 files changed, 65 insertions(+) diff --git a/R/data-load.R b/R/data-load.R index 711f6d43..316aeb22 100644 --- a/R/data-load.R +++ b/R/data-load.R @@ -168,6 +168,17 @@ load_difftime.miiv_tbl <- function(x, rows, cols = colnames(x), load_mihi(x, {{ rows }}, cols, id_hint, time_vars) } +#' @rdname load_src +#' @export +load_difftime.sic_tbl <- function(x, rows, cols = colnames(x), + id_hint = id_vars(x), + time_vars = ricu::time_vars(x), ...) { + + warn_dots(...) + # TODO: consider renaming fun to reflect its use for SICdb + load_eiau(x, {{ rows }}, cols, id_hint, time_vars, s_as_mins) +} + #' @rdname load_src #' @export load_difftime.character <- function(x, src, ...) { diff --git a/R/data-utils.R b/R/data-utils.R index 5dcac99a..f880634a 100644 --- a/R/data-utils.R +++ b/R/data-utils.R @@ -129,6 +129,29 @@ id_orig_helper.miiv_env <- function(x, id) { as_id_tbl(res, id, by_ref = TRUE) } +#' @rdname data_utils +#' @export +id_orig_helper.sic_env <- function(x, id) { + + if (!identical(id, "patientid")) { + return(NextMethod()) + } + + cfg <- as_id_cfg(x)[id == id_var_opts(x)] + + assert_that(length(cfg) == 1L) + + sta <- field(cfg, "start") + age <- "admissionyear" + + res <- as_src_tbl(x, field(cfg, "table")) + res <- res[, c(id, sta, age)] + res <- res[, c(sta, age) := shift_year(get(sta), get(age))] + + as_id_tbl(res, id, by_ref = TRUE) +} + + #' @export id_orig_helper.default <- function(x, ...) stop_generic(x, .Generic) @@ -332,6 +355,35 @@ id_win_helper.miiv_env <- function(x) { order_rename(res, ids, sta, end) } +#' @rdname data_utils +#' @export +id_win_helper.sic_env <- function(x) { + cfg <- sort(as_id_cfg(x), decreasing = TRUE) + + ids <- field(cfg, "id") + sta <- field(cfg, "start") + end <- field(cfg, "end") + + tbl <- as_src_tbl(x, unique(field(cfg, "table"))) + + mis <- setdiff(sta, colnames(tbl)) + + res <- load_src(tbl, cols = c(ids, intersect(sta, colnames(tbl)), end)) + + assert_that(length(mis) == 1L) + res[, firstadmission := 0L] + + res <- res[, c(sta, end) := lapply(.SD, s_as_mins), .SDcols = c(sta, end)] + res[, timeofstay := offsetafterfirstadmission + timeofstay] + + res <- setcolorder(res, c(ids, sta, end)) + res <- rename_cols(res, c(ids, paste0(ids, "_start"), + paste0(ids, "_end")), by_ref = TRUE) + + as_id_tbl(res, ids[2L], by_ref = TRUE) +} + + #' @export id_win_helper.default <- function(x) stop_generic(x, .Generic) diff --git a/R/utils-misc.R b/R/utils-misc.R index d3a1de5f..8c14e4d3 100644 --- a/R/utils-misc.R +++ b/R/utils-misc.R @@ -245,6 +245,8 @@ cat_line <- function(...) { ms_as_mins <- function(x) min_as_mins(as.integer(x / 6e4)) +s_as_mins <- function(x) min_as_mins(as.integer(x / 60)) + min_as_mins <- function(x) as.difftime(x, units = "mins") digest_lst <- function(x) as.character(openssl::md5(serialize(x, NULL))) From 2b39787b60fa2269c07db47a2f9d84c5309f14ff Mon Sep 17 00:00:00 2001 From: prockenschaub Date: Thu, 11 May 2023 07:22:19 +0200 Subject: [PATCH 04/48] add callback hooks to postprocess tbls on import --- R/setup-import.R | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/R/setup-import.R b/R/setup-import.R index 95640867..28e9c3af 100644 --- a/R/setup-import.R +++ b/R/setup-import.R @@ -257,7 +257,8 @@ partition_table <- function(x, dir, progress = NULL, chunk_length = 10 ^ 7, rawf <- raw_file_name(x) file <- file.path(dir, rawf) name <- tbl_name(x) - + callback <- tbl_callback(x) + exp_row <- n_row(x) if (is.na(exp_row)) { @@ -268,17 +269,18 @@ partition_table <- function(x, dir, progress = NULL, chunk_length = 10 ^ 7, if (length(file) == 1L) { - callback <- function(x, pos, ...) { - report_problems(x, rawf) - split_write(x, pfun, tempdir, ((pos - 1L) / chunk_length) + 1L, - progress, name, tick) + process_chunk <- function(x, pos, ...) { + report_problems(x, rawf) + split_write(callback(x), pfun, tempdir, ((pos - 1L) / chunk_length) + 1L, + progress, name, tick) } + if (grepl("\\.gz$", file)) { file <- gunzip(file, tempdir) } - readr::read_csv_chunked(file, callback, chunk_length, col_types = spec, + readr::read_csv_chunked(file, process_chunk, chunk_length, col_types = spec, progress = FALSE, ...) if (is.na(exp_row)) { @@ -291,8 +293,7 @@ partition_table <- function(x, dir, progress = NULL, chunk_length = 10 ^ 7, dat <- readr::read_csv(file[i], col_types = spec, progress = FALSE, ...) report_problems(dat, rawf[i]) - - split_write(dat, pfun, tempdir, i, progress, name, tick) + split_write(callback(dat), pfun, tempdir, i, progress, name, tick) } } @@ -356,6 +357,7 @@ csv_to_fst <- function(x, dir, progress = NULL, ...) { raw <- raw_file_name(x) src <- file.path(dir, raw) dst <- file.path(dir, fst_file_name(x)) + callback <- tbl_callback(x) assert_that(length(x) == 1L, length(src) == 1L, length(dst) == 1L) @@ -365,6 +367,7 @@ csv_to_fst <- function(x, dir, progress = NULL, ...) { report_problems(dat, raw) + dat <- callback(dat) dat <- rename_cols(setDT(dat), ricu_cols(x), orig_cols(x)) fst::write_fst(dat, dst, compress = 100L) From d459959c85853dec2c703569f27a80409b2e2f54 Mon Sep 17 00:00:00 2001 From: prockenschaub Date: Thu, 11 May 2023 07:24:28 +0200 Subject: [PATCH 05/48] add callback to deserialise sicdb data_float_h --- R/callback-tbl.R | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 R/callback-tbl.R diff --git a/R/callback-tbl.R b/R/callback-tbl.R new file mode 100644 index 00000000..6777e073 --- /dev/null +++ b/R/callback-tbl.R @@ -0,0 +1,24 @@ + +sic_data_float_h <- function(dat, ...) { + hexstring_to_float <- function(x) { + if (is.na(x)) { + return(NA_real_) + } + hexstring <- substring(x, seq(1, 482, 2), seq(2, 482, 2)) + bytes <- as.raw(strtoi(hexstring[-1], base = 16)) + floats <- readBin(bytes, numeric(), length(bytes) %/% 4, 4, endian = "little") + ifelse(floats == 0, NA_real_, floats) + } + + setDT(dat) + dat[, c("rawdata") := lapply(get("rawdata"), hexstring_to_float)] # TODO: remove hard coding of rawdata and derive from JSON config + dat <- dat[, .( + Offset = Offset + 60 * (0:(sapply(rawdata, length)-1)), + Val = Val, + cnt = cnt, + rawdata = unlist(rawdata) + ), + by = .(id, CaseID, DataID) + ] + dat +} \ No newline at end of file From 32b8f60a5a27aeed5bdd3858f3cba2328d5d2d6b Mon Sep 17 00:00:00 2001 From: prockenschaub Date: Thu, 11 May 2023 07:28:35 +0200 Subject: [PATCH 06/48] add missing tbl_callback function --- R/config-utils.R | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/R/config-utils.R b/R/config-utils.R index faf09d75..055b12ad 100644 --- a/R/config-utils.R +++ b/R/config-utils.R @@ -387,6 +387,17 @@ partition_col <- function(x, orig_names = FALSE) { col } +tbl_callback <- function(x){ + x <- as_tbl_cfg(x) + assert_that(length(x) == 1L) + + if ("callback" %in% vctrs::fields(x)) { + str_to_fun(vctrs::field(x, "callback")) + } else { + identity_callback + } +} + #' @export n_tick.tbl_cfg <- function(x) { From 6afe44c91c5a8c8916a6f26e8ac0d54e7a8f2a10 Mon Sep 17 00:00:00 2001 From: prockenschaub Date: Thu, 11 May 2023 07:28:40 +0200 Subject: [PATCH 07/48] add sic_itm inspired by hrd_itm --- R/concept-utils.R | 52 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/R/concept-utils.R b/R/concept-utils.R index 114bd1ea..6fb0c339 100644 --- a/R/concept-utils.R +++ b/R/concept-utils.R @@ -200,6 +200,30 @@ get_hirid_ids <- function(x, ids) { load_id("variables", x, .data$id %in% .env$ids, cols = "unit", id_var = "id") } +#' @rdname data_items +#' @export +init_itm.sic_itm <- function(x, table, sub_var, ids, + callback = "identity_callback", ...) { + + assert_that(is.string(table), has_length(ids), + is.character(ids) || is_intish(ids)) + + x[["table"]] <- table + + units <- get_sic_ids(x, ids) + units <- rename_cols(rm_na(units), sub_var, "referenceglobalid") + + todo <- c("ids", "units") + x[todo] <- mget(todo) + + complete_tbl_itm(x, callback, sub_var, ...) +} + +get_sic_ids <- function(x, ids) { + load_id("d_references", x, .data$referenceglobalid %in% .env$ids, cols = "referenceunit", id_var = "referenceglobalid") +} + + #' @param unit_val String valued unit to be used in case no `unit_var` is #' available for the given table #' @@ -331,6 +355,10 @@ prepare_query.sel_itm <- prep_sel #' @export prepare_query.hrd_itm <- prep_sel +#' @keywords internal +#' @export +prepare_query.sic_itm <- prep_sel + #' @keywords internal #' @export prepare_query.rgx_itm <- function(x) { @@ -547,6 +575,17 @@ do_callback.hrd_itm <- function(x, ...) { NextMethod() } +#' @keywords internal +#' @export +do_callback.sic_itm <- function(x, ...) { + # TODO: generalise and combine with do_callback.hrd_itm + if (is.null(get_itm_var(x, "unit_var"))) { + x <- try_add_vars(x, unit_var = "referenceunit") + } + + NextMethod() +} + #' @keywords internal #' @export do_callback.col_itm <- function(x, ...) { @@ -605,6 +644,19 @@ do_itm_load.hrd_itm <- function(x, id_type = "icustay", interval = hours(1L)) { res } +#' @export +do_itm_load.sic_itm <- function(x, id_type = "icustay", interval = hours(1L)) { + + res <- NextMethod() + + if (is.null(get_itm_var(x, "unit_var"))) { + unt <- x[["units"]] + res <- merge(res, unt, by = get_itm_var(x, "sub_var"), all.x = TRUE) + } + + res +} + #' @export do_itm_load.col_itm <- function(x, id_type = "icustay", interval = hours(1L)) { From bc41f88e20bd2f4a376946421157dc39e0e19a4b Mon Sep 17 00:00:00 2001 From: prockenschaub Date: Thu, 11 May 2023 07:30:22 +0200 Subject: [PATCH 08/48] adjust data_float_h config to recent changes --- inst/extdata/config/data-sources.json | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/inst/extdata/config/data-sources.json b/inst/extdata/config/data-sources.json index 12e66608..d7ef2cda 100644 --- a/inst/extdata/config/data-sources.json +++ b/inst/extdata/config/data-sources.json @@ -9568,6 +9568,7 @@ "files": "data_float_h.csv.gz", "defaults": { "index_var": "offset", + "val_var": "rawdata", "time_vars": ["offset"] }, "cols": { @@ -9600,7 +9601,8 @@ "col": "dataid", "breaks": [702, 703, 705, 708, 709, 710, 715, 717, 719, 724, 725, 731, 773, 2018, 2274, 2278, 2280, 2283, 2290, 3056, 3059, 3071] - } + }, + "callback": "sic_data_float_h" }, "data_ref": { "files": "data_ref.csv.gz", From b53c592f50ae56f4dbd160cd2ea05dbb4e1293f3 Mon Sep 17 00:00:00 2001 From: prockenschaub Date: Thu, 11 May 2023 07:33:27 +0200 Subject: [PATCH 09/48] add hr and crea as examples for sicdb --- inst/extdata/config/concept-dict.json | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/inst/extdata/config/concept-dict.json b/inst/extdata/config/concept-dict.json index 10fd7f3c..91c7f4a0 100644 --- a/inst/extdata/config/concept-dict.json +++ b/inst/extdata/config/concept-dict.json @@ -1375,6 +1375,14 @@ "table": "labevents", "sub_var": "itemid" } + ], + "sic": [ + { + "ids": 367, + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" + } ] } }, @@ -3060,6 +3068,14 @@ "table": "chartevents", "sub_var": "itemid" } + ], + "sic": [ + { + "ids": 708, + "table": "data_float_h", + "sub_var": "dataid", + "class": "sic_itm" + } ] } }, From 3ca5b04201e38a3561fb7557cb1fa56bcf2a50fa Mon Sep 17 00:00:00 2001 From: prockenschaub Date: Wed, 26 Jul 2023 09:30:16 +0200 Subject: [PATCH 10/48] add sex and death concepts for sic --- R/callback-itm.R | 17 +++++++++++++++++ inst/extdata/config/concept-dict.json | 18 ++++++++++++++++++ inst/extdata/config/data-sources.json | 2 +- 3 files changed, 36 insertions(+), 1 deletion(-) diff --git a/R/callback-itm.R b/R/callback-itm.R index 5f2521be..7f4d48ba 100644 --- a/R/callback-itm.R +++ b/R/callback-itm.R @@ -195,6 +195,16 @@ mimic_age <- function(x) { eicu_age <- function(x) as.numeric(ifelse(x == "> 89", 90, x)) +sic_sex <- function(x) { + ifelse( + x == 735, + "Male", + ifelse(x == 736, + "Female", + NA_character_ + )) +} + hirid_death <- function(x, val_var, sub_var, env, ...) { dis <- "discharge_status" @@ -747,6 +757,13 @@ aumc_death <- function(x, val_var, ...) { x } +sic_death <- function(x, val_var, adm_time, ...) { + idx <- index_var(x) + + x <- x[, c(val_var) := is_true(get(idx) - (get(adm_time) + secs(get(val_var))) < hours(72L))] + x +} + aumc_bxs <- function(x, val_var, dir_var, ...) { x <- x[get(dir_var) == "-", c(val_var) := -1L * get(val_var)] x diff --git a/inst/extdata/config/concept-dict.json b/inst/extdata/config/concept-dict.json index 91c7f4a0..a98d6bd2 100644 --- a/inst/extdata/config/concept-dict.json +++ b/inst/extdata/config/concept-dict.json @@ -1578,6 +1578,16 @@ "callback": "transform_fun(comp_na(`==`, 1L))", "class": "col_itm" } + ], + "sic": [ + { + "table": "cases", + "index_var": "offsetofdeath", + "adm_time": "offsetafterfirstadmission", + "val_var": "timeofstay", + "callback": "sic_death", + "class": "col_itm" + } ] } }, @@ -5334,6 +5344,14 @@ "callback": "apply_map(c(M = 'Male', F = 'Female'))", "class": "col_itm" } + ], + "sic": [ + { + "table": "cases", + "val_var": "sex", + "class": "col_itm", + "callback": "transform_fun(sic_sex)" + } ] } }, diff --git a/inst/extdata/config/data-sources.json b/inst/extdata/config/data-sources.json index d7ef2cda..d74cda96 100644 --- a/inst/extdata/config/data-sources.json +++ b/inst/extdata/config/data-sources.json @@ -9410,7 +9410,7 @@ "files": "cases.csv.gz", "defaults": { "index_var": "offsetafterfirstadmission", - "time_vars": ["offsetafterfirstadmission"] + "time_vars": ["offsetafterfirstadmission", "offsetofdeath"] }, "cols": { "caseid": { From 9396da564069a9a1834edcbc6383e573965c1bb6 Mon Sep 17 00:00:00 2001 From: prockenschaub Date: Wed, 26 Jul 2023 09:36:44 +0200 Subject: [PATCH 11/48] add vitals, labs, height, and weight concepts for sic --- inst/extdata/config/concept-dict.json | 457 +++++++++++++++++++++++++- 1 file changed, 456 insertions(+), 1 deletion(-) diff --git a/inst/extdata/config/concept-dict.json b/inst/extdata/config/concept-dict.json index a98d6bd2..0ce2e180 100644 --- a/inst/extdata/config/concept-dict.json +++ b/inst/extdata/config/concept-dict.json @@ -369,6 +369,14 @@ "table": "labevents", "sub_var": "itemid" } + ], + "sic": [ + { + "ids": 171, + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" + } ] } }, @@ -428,6 +436,14 @@ "table": "labevents", "sub_var": "itemid" } + ], + "sic": [ + { + "ids": 609, + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" + } ] } }, @@ -487,6 +503,14 @@ "table": "labevents", "sub_var": "itemid" } + ], + "sic": [ + { + "ids": 617, + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" + } ] } }, @@ -546,6 +570,14 @@ "table": "labevents", "sub_var": "itemid" } + ], + "sic": [ + { + "ids": 616, + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" + } ] } }, @@ -612,6 +644,21 @@ "table": "labevents", "sub_var": "itemid" } + ], + "sic": [ + { + "ids": 174, + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" + }, + { + "ids": 295, + "table": "laboratory", + "sub_var": "laboratoryid", + "callback": "blood_cell_ratio", + "class": "sic_itm" + } ] } }, @@ -674,6 +721,14 @@ "table": "labevents", "sub_var": "itemid" } + ], + "sic": [ + { + "ids": 449, + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" + } ] } }, @@ -734,6 +789,14 @@ "table": "labevents", "sub_var": "itemid" } + ], + "sic": [ + { + "ids": 456, + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" + } ] } }, @@ -796,6 +859,14 @@ "table": "labevents", "sub_var": "itemid" } + ], + "sic": [ + { + "ids": 333, + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" + } ] } }, @@ -858,6 +929,14 @@ "table": "labevents", "sub_var": "itemid" } + ], + "sic": [ + { + "ids": 332, + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" + } ] } }, @@ -1051,6 +1130,15 @@ "table": "labevents", "sub_var": "itemid" } + ], + "sic": [ + { + "ids": 457, + "table": "laboratory", + "sub_var": "laboratoryid", + "callback": "convert_unit(binary_op(`*`, 4), 'mg/dL', 'mmol/l')", + "class": "sic_itm" + } ] } }, @@ -1111,6 +1199,14 @@ "table": "labevents", "sub_var": "itemid" } + ], + "sic": [ + { + "ids": 452, + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" + } ] } }, @@ -1170,6 +1266,14 @@ "table": "labevents", "sub_var": "itemid" } + ], + "sic": [ + { + "ids": 611, + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" + } ] } }, @@ -1229,6 +1333,14 @@ "table": "labevents", "sub_var": "itemid" } + ], + "sic": [ + { + "ids": 253, + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" + } ] } }, @@ -1289,6 +1401,14 @@ "table": "labevents", "sub_var": "itemid" } + ], + "sic": [ + { + "ids": 450, + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" + } ] } }, @@ -1447,6 +1567,14 @@ "sub_var": "itemid", "callback": "convert_unit(binary_op(`*`, 10), 'mg/L', 'mg/dl')" } + ], + "sic": [ + { + "ids": 341, + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" + } ] } }, @@ -1507,6 +1635,14 @@ "table": "chartevents", "sub_var": "itemid" } + ], + "sic": [ + { + "ids": [702, 705], + "table": "data_float_h", + "sub_var": "dataid", + "class": "sic_itm" + } ] } }, @@ -2191,6 +2327,21 @@ "table": "labevents", "sub_var": "itemid" } + ], + "sic": [ + { + "ids": 197, + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" + }, + { + "ids": 299, + "table": "laboratory", + "sub_var": "laboratoryid", + "callback": "blood_cell_ratio", + "class": "sic_itm" + } ] } }, @@ -2471,6 +2622,14 @@ "table": "chartevents", "sub_var": "itemid" } + ], + "sic": [ + { + "ids": 716, + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" + } ] } }, @@ -2596,6 +2755,14 @@ "table": "labevents", "sub_var": "itemid" } + ], + "sic": [ + { + "ids": 344, + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" + } ] } }, @@ -2685,6 +2852,14 @@ "table": "labevents", "sub_var": "itemid" } + ], + "sic": [ + { + "ids": 684, + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" + } ] } }, @@ -2766,6 +2941,14 @@ "table": "labevents", "sub_var": "itemid" } + ], + "sic": [ + { + "ids": [348, 656], + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" + } ] } }, @@ -2802,6 +2985,14 @@ "table": "labevents", "sub_var": "itemid" } + ], + "sic": [ + { + "ids": 214, + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" + } ] } }, @@ -2838,6 +3029,14 @@ "sub_var": "variableid", "class": "hrd_itm" } + ], + "sic": [ + { + "ids": [196, 660], + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" + } ] } }, @@ -2891,6 +3090,14 @@ "table": "labevents", "sub_var": "itemid" } + ], + "sic": [ + { + "ids": [217, 682], + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" + } ] } }, @@ -2956,6 +3163,13 @@ "sub_var": "itemid", "callback": "convert_unit(binary_op(`*`, 2.54), 'cm', '^in')" } + ], + "sic": [ + { + "table": "cases", + "val_var": "heightonadmission", + "class": "col_itm" + } ] } }, @@ -3018,6 +3232,14 @@ "table": "labevents", "sub_var": "itemid" } + ], + "sic": [ + { + "ids": [289, 658], + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" + } ] } }, @@ -3081,7 +3303,7 @@ ], "sic": [ { - "ids": 708, + "ids": [708, 724], "table": "data_float_h", "sub_var": "dataid", "class": "sic_itm" @@ -3283,6 +3505,14 @@ "table": "labevents", "sub_var": "itemid" } + ], + "sic": [ + { + "ids": [463, 685], + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" + } ] } }, @@ -3343,6 +3573,14 @@ "table": "labevents", "sub_var": "itemid" } + ], + "sic": [ + { + "ids": [465, 657], + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" + } ] } }, @@ -3513,6 +3751,21 @@ "table": "labevents", "sub_var": "itemid" } + ], + "sic": [ + { + "ids": 223, + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" + }, + { + "ids": 302, + "table": "laboratory", + "sub_var": "laboratoryid", + "callback": "blood_cell_ratio", + "class": "sic_itm" + } ] } }, @@ -3583,6 +3836,14 @@ "table": "chartevents", "sub_var": "itemid" } + ], + "sic": [ + { + "ids": [702, 705], + "table": "data_float_h", + "sub_var": "dataid", + "class": "sic_itm" + } ] } }, @@ -3643,6 +3904,14 @@ "table": "labevents", "sub_var": "itemid" } + ], + "sic": [ + { + "ids": 566, + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" + } ] } }, @@ -3705,6 +3974,15 @@ "table": "labevents", "sub_var": "itemid" } + ], + "sic": [ + { + "ids": 290, + "table": "laboratory", + "sub_var": "laboratoryid", + "callback": "convert_unit(binary_op(`*`, 0.16114), '%')", + "class": "sic_itm" + } ] } }, @@ -3765,6 +4043,14 @@ "table": "labevents", "sub_var": "itemid" } + ], + "sic": [ + { + "ids": 286, + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" + } ] } }, @@ -3864,6 +4150,14 @@ "table": "labevents", "sub_var": "itemid" } + ], + "sic": [ + { + "ids": 661, + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" + } ] } }, @@ -3936,6 +4230,15 @@ "table": "labevents", "sub_var": "itemid" } + ], + "sic": [ + { + "ids": 468, + "table": "laboratory", + "sub_var": "laboratoryid", + "callback": "convert_unit(binary_op(`*`, 2.431), 'mg/dL')", + "class": "sic_itm" + } ] } }, @@ -4062,6 +4365,14 @@ "table": "labevents", "sub_var": "itemid" } + ], + "sic": [ + { + "ids": 469, + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" + } ] } }, @@ -4128,6 +4439,21 @@ "table": "labevents", "sub_var": "itemid" } + ], + "sic": [ + { + "ids": 230, + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" + }, + { + "ids": 308, + "table": "laboratory", + "sub_var": "laboratoryid", + "callback": "blood_cell_ratio", + "class": "sic_itm" + } ] } }, @@ -4408,6 +4734,14 @@ "table": "chartevents", "sub_var": "itemid" } + ], + "sic": [ + { + "ids": 710, + "table": "data_float_h", + "sub_var": "dataid", + "class": "sic_itm" + } ] } }, @@ -4477,6 +4811,14 @@ "table": "labevents", "sub_var": "itemid" } + ], + "sic": [ + { + "ids": 687, + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" + } ] } }, @@ -4536,6 +4878,14 @@ "table": "labevents", "sub_var": "itemid" } + ], + "sic": [ + { + "ids": 688, + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" + } ] } }, @@ -4681,6 +5031,15 @@ "table": "labevents", "sub_var": "itemid" } + ], + "sic": [ + { + "ids": 471, + "table": "laboratory", + "sub_var": "laboratoryid", + "callback": "convert_unit(binary_op(`*`, 3.097521), 'mg/dL')", + "class": "sic_itm" + } ] } }, @@ -4741,6 +5100,14 @@ "table": "labevents", "sub_var": "itemid" } + ], + "sic": [ + { + "ids": 314, + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" + } ] } }, @@ -4801,6 +5168,14 @@ "table": "labevents", "sub_var": "itemid" } + ], + "sic": [ + { + "ids": 689, + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" + } ] } }, @@ -4852,6 +5227,14 @@ "table": "labevents", "sub_var": "itemid" } + ], + "sic": [ + { + "ids": 598, + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" + } ] } }, @@ -4911,6 +5294,14 @@ "table": "labevents", "sub_var": "itemid" } + ], + "sic": [ + { + "ids": 597, + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" + } ] } }, @@ -5032,6 +5423,14 @@ "table": "labevents", "sub_var": "itemid" } + ], + "sic": [ + { + "ids": 599, + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" + } ] } }, @@ -5144,6 +5543,14 @@ "table": "chartevents", "sub_var": "itemid" } + ], + "sic": [ + { + "ids": 719, + "table": "data_float_h", + "sub_var": "dataid", + "class": "sic_itm" + } ] } }, @@ -5271,6 +5678,14 @@ "table": "chartevents", "sub_var": "itemid" } + ], + "sic": [ + { + "ids": [701, 704], + "table": "data_float_h", + "sub_var": "dataid", + "class": "sic_itm" + } ] } }, @@ -5569,6 +5984,14 @@ "sub_var": "itemid", "callback": "convert_unit(fahr_to_cels, 'C', 'f')" } + ], + "sic": [ + { + "ids": 709, + "table": "data_float_h", + "sub_var": "dataid", + "class": "sic_itm" + } ] } }, @@ -5666,6 +6089,14 @@ "table": "labevents", "sub_var": "itemid" } + ], + "sic": [ + { + "ids": 481, + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" + } ] } }, @@ -5784,6 +6215,14 @@ "table": "outputevents", "sub_var": "itemid" } + ], + "sic": [ + { + "ids": 725, + "table": "data_float_h", + "sub_var": "dataid", + "class": "sic_itm" + } ] } }, @@ -6075,6 +6514,14 @@ "table": "labevents", "sub_var": "itemid" } + ], + "sic": [ + { + "ids": 301, + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" + } ] } }, @@ -6137,6 +6584,14 @@ "table": "chartevents", "sub_var": "itemid" } + ], + "sic": [ + { + "table": "cases", + "val_var": "weightonadmission", + "class": "col_itm", + "callback": "transform_fun(binary_op(`/`, 1000))" + } ] } } From e84ea7d593d79b0b10df75159ae454e54baed729 Mon Sep 17 00:00:00 2001 From: prockenschaub Date: Wed, 26 Jul 2023 09:39:28 +0200 Subject: [PATCH 12/48] add age and los_icu concepts --- inst/extdata/config/concept-dict.json | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/inst/extdata/config/concept-dict.json b/inst/extdata/config/concept-dict.json index 0ce2e180..38c226e3 100644 --- a/inst/extdata/config/concept-dict.json +++ b/inst/extdata/config/concept-dict.json @@ -307,6 +307,13 @@ "callback": "transform_fun(mimic_age)", "class": "col_itm" } + ], + "sic": [ + { + "table": "cases", + "val_var": "ageonadmission", + "class": "col_itm" + } ] } }, @@ -3684,6 +3691,14 @@ "win_type": "icustay", "class": "fun_itm" } + ], + "sic": [ + { + "table": "cases", + "val_var": "timeofstay", + "callback": "transform_fun(binary_op(`/`, 60 * 60 * 24))", + "class": "col_itm" + } ] } }, From b9350a1a98801968f0ab964ac63c5a38a3917a96 Mon Sep 17 00:00:00 2001 From: prockenschaub Date: Wed, 26 Jul 2023 09:40:15 +0200 Subject: [PATCH 13/48] add most medication concepts for sic --- R/callback-itm.R | 24 +++++++ inst/extdata/config/concept-dict.json | 99 +++++++++++++++++++++++++++ inst/extdata/config/data-sources.json | 1 + 3 files changed, 124 insertions(+) diff --git a/R/callback-itm.R b/R/callback-itm.R index 7f4d48ba..deffa031 100644 --- a/R/callback-itm.R +++ b/R/callback-itm.R @@ -620,6 +620,21 @@ aumc_rate_units <- function(mcg_to_units) { } } +sic_rate_kg <- function(x, val_var, unit_var, stop_var, env, ...) { + + g_to_mcg <- convert_unit(binary_op(`*`, 1000000), "mcg", "g") + + res <- g_to_mcg(x, val_var, unit_var) + res <- add_weight(res, env, "weight") + + res <- res[, c(val_var) := get(val_var) / get("weight")] + res <- res[, c(unit_var) := paste(get(unit_var), 'min', sep = "/kg/")] + + expand(res, index_var(x), stop_var, + keep_vars = c(id_vars(x), val_var, unit_var)) +} + + eicu_duration <- function(gap_length) { assert_that(is_interval(gap_length), is_scalar(gap_length)) @@ -641,6 +656,15 @@ aumc_dur <- function(x, val_var, stop_var, grp_var, ...) { calc_dur(x, val_var, index_var(x), stop_var, grp_var) } +default_duration <- function(x, val_var, stop_var, grp_var, ...) { + calc_dur(x, val_var, index_var(x), stop_var, grp_var) +} + +no_duration <- function(x, val_var, grp_var, ...) { + calc_dur(x, val_var, index_var(x), index_var(x), grp_var) +} + + #' Used for determining vasopressor durations, `calc_dur()` will calculate #' durations by taking either per ID or per combination of ID and `grp_var` #' the minimum for `min_var` and the maximum of `max_var` and returning the diff --git a/inst/extdata/config/concept-dict.json b/inst/extdata/config/concept-dict.json index 38c226e3..5de3f492 100644 --- a/inst/extdata/config/concept-dict.json +++ b/inst/extdata/config/concept-dict.json @@ -99,6 +99,13 @@ "sub_var": "itemid", "callback": "transform_fun(set_val(TRUE))" } + ], + "sic": [ + { + "ids": [1401, 1406, 1408, 1410, 1418, 1421, 1422, 1423, 1428, 1431, 1433, 1436, 1439, 1446, 1449, 1451, 1454, 1455, 1456, 1457, 1458, 1459, 1460, 1461, 1462, 1577, 1603, 1628, 1605, 1997, 1693, 1606, 1813, 1913, 1927, 1819], + "table": "medication", + "sub_var": "drugid" + } ] } }, @@ -1440,6 +1447,14 @@ "sub_var": "pharmaid", "callback": "transform_fun(set_val(TRUE))" } + ], + "sic": [ + { + "ids": [1397, 1506, 1524, 1525, 1751, 1977], + "table": "medication", + "sub_var": "drugid", + "callback": "transform_fun(set_val(TRUE))" + } ] } }, @@ -1942,6 +1957,16 @@ "grp_var": "linkorderid", "callback": "mimic_dur_inmv" } + ], + "sic": [ + { + "ids": 1559, + "table": "medication", + "sub_var": "drugid", + "stop_var": "offsetdrugend", + "grp_var": "id", + "callback": "default_duration" + } ] } }, @@ -2033,6 +2058,17 @@ "stop_var": "endtime", "callback": "mimic_rate_mv" } + ], + "sic": [ + { + "ids": 1559, + "table": "medication", + "sub_var": "drugid", + "val_var": "amountperminute", + "stop_var": "offsetdrugend", + "class": "sic_itm", + "callback": "sic_rate_kg" + } ] } }, @@ -2120,6 +2156,16 @@ "grp_var": "linkorderid", "callback": "mimic_dur_inmv" } + ], + "sic": [ + { + "ids": 1618, + "table": "medication", + "sub_var": "drugid", + "stop_var": "offsetdrugend", + "grp_var": "id", + "callback": "default_duration" + } ] } }, @@ -2202,6 +2248,17 @@ "stop_var": "endtime", "callback": "mimic_rate_mv" } + ], + "sic": [ + { + "ids": 1618, + "table": "medication", + "sub_var": "drugid", + "val_var": "amountperminute", + "stop_var": "offsetdrugend", + "class": "sic_itm", + "callback": "sic_rate_kg" + } ] } }, @@ -2437,6 +2494,16 @@ "grp_var": "linkorderid", "callback": "mimic_dur_inmv" } + ], + "sic": [ + { + "ids": 1502, + "table": "medication", + "sub_var": "drugid", + "stop_var": "offsetdrugend", + "grp_var": "id", + "callback": "default_duration" + } ] } }, @@ -2528,6 +2595,17 @@ "stop_var": "endtime", "callback": "mimic_rate_mv" } + ], + "sic": [ + { + "ids": 1502, + "table": "medication", + "sub_var": "drugid", + "val_var": "amountperminute", + "stop_var": "offsetdrugend", + "class": "sic_itm", + "callback": "sic_rate_kg" + } ] } }, @@ -4565,6 +4643,16 @@ "grp_var": "linkorderid", "callback": "mimic_dur_inmv" } + ], + "sic": [ + { + "ids": 1562, + "table": "medication", + "sub_var": "drugid", + "stop_var": "offsetdrugend", + "grp_var": "id", + "callback": "default_duration" + } ] } }, @@ -4663,6 +4751,17 @@ "stop_var": "endtime", "callback": "mimic_rate_mv" } + ], + "sic": [ + { + "ids": 1562, + "table": "medication", + "sub_var": "drugid", + "val_var": "amountperminute", + "stop_var": "offsetdrugend", + "class": "sic_itm", + "callback": "sic_rate_kg" + } ] } }, diff --git a/inst/extdata/config/data-sources.json b/inst/extdata/config/data-sources.json index d74cda96..bc1461ba 100644 --- a/inst/extdata/config/data-sources.json +++ b/inst/extdata/config/data-sources.json @@ -9667,6 +9667,7 @@ "files": "medication.csv.gz", "defaults": { "index_var": "offset", + "val_var": "amount", "time_vars": ["offset", "offsetdrugend"] }, "cols": { From 5419ab4c6a6f38fc2fc15e2e44744cacbb9c9f9c Mon Sep 17 00:00:00 2001 From: prockenschaub Date: Wed, 26 Jul 2023 15:28:43 +0200 Subject: [PATCH 14/48] fix preproc for data_float_h some values are only taken once during the hour and thus have a cnt=1 and rawdata=NA. The actual data is stored in Val, which otherwise holds the average. Since after expansion, rawdata is the main data field, the value from Val needs to be moved to rawdata in this case. --- R/callback-tbl.R | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/R/callback-tbl.R b/R/callback-tbl.R index 6777e073..d8e0be63 100644 --- a/R/callback-tbl.R +++ b/R/callback-tbl.R @@ -13,12 +13,17 @@ sic_data_float_h <- function(dat, ...) { setDT(dat) dat[, c("rawdata") := lapply(get("rawdata"), hexstring_to_float)] # TODO: remove hard coding of rawdata and derive from JSON config dat <- dat[, .( - Offset = Offset + 60 * (0:(sapply(rawdata, length)-1)), - Val = Val, - cnt = cnt, - rawdata = unlist(rawdata) - ), - by = .(id, CaseID, DataID) + Offset = Offset + 60 * (0:(sapply(rawdata, length)-1)), + Val = Val, + cnt = cnt, + rawdata = unlist(rawdata), + rawdata_present = !is.na(rawdata) + ), + by = .(id, CaseID, DataID) ] + dat[rawdata_present == FALSE, rawdata := Val] # Fix measurements that only have one + dat[, rawdata_present := NULL] dat -} \ No newline at end of file +} + + From 078149e5b5211e6d18db8ab6eede26965dca9ccd Mon Sep 17 00:00:00 2001 From: Drago Date: Fri, 17 Mar 2023 12:52:31 -0400 Subject: [PATCH 15/48] add OMR to miiv --- inst/extdata/config/data-sources.json | 31 +++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/inst/extdata/config/data-sources.json b/inst/extdata/config/data-sources.json index bc1461ba..a6f8dc67 100644 --- a/inst/extdata/config/data-sources.json +++ b/inst/extdata/config/data-sources.json @@ -7173,6 +7173,37 @@ } } }, + "omr" : { + "files": "core/omr.csv.gz", + "defaults": { + "timevars": ["chartdate"], + "val_var": "result_value" + }, + "num_rows": 6439169, + "cols" : { + "subject_id": { + "name": "subject_id", + "spec": "col_integer" + }, + "chartdate": { + "name": "chartdate", + "spec": "col_datetime", + "format": "%Y-%m-%d" + }, + "seq_num": { + "name": "seq_num", + "spec": "col_integer" + }, + "result_name": { + "name": "result_name", + "spec": "col_character" + }, + "result_value": { + "name": "result_value", + "spec": "col_character" + } + } + }, "transfers": { "files": "core/transfers.csv.gz", "defaults": { From e5be7e871dc90db9098dc1d56cd47805210d8ce8 Mon Sep 17 00:00:00 2001 From: Drago Date: Tue, 11 Apr 2023 17:23:45 -0400 Subject: [PATCH 16/48] add miiv omr --- inst/extdata/config/data-sources.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/extdata/config/data-sources.json b/inst/extdata/config/data-sources.json index a6f8dc67..ba91be50 100644 --- a/inst/extdata/config/data-sources.json +++ b/inst/extdata/config/data-sources.json @@ -7176,7 +7176,7 @@ "omr" : { "files": "core/omr.csv.gz", "defaults": { - "timevars": ["chartdate"], + "time_vars": ["chartdate"], "val_var": "result_value" }, "num_rows": 6439169, From c8d0c9bba110081bf9d782d7ad0a5a4902fbac0a Mon Sep 17 00:00:00 2001 From: Drago Date: Mon, 1 May 2023 12:04:45 -0400 Subject: [PATCH 17/48] load_concepts() concepts arg doc fix --- R/concept-load.R | 7 ++-- man/change_id.Rd | 58 ++++++++++---------------- man/data_env.Rd | 99 +++++++++++++------------------------------- man/load_concepts.Rd | 5 ++- 4 files changed, 57 insertions(+), 112 deletions(-) diff --git a/R/concept-load.R b/R/concept-load.R index bca69bb3..174a089c 100644 --- a/R/concept-load.R +++ b/R/concept-load.R @@ -166,8 +166,9 @@ load_concepts <- function(x, ...) UseMethod("load_concepts", x) #' @param src A character vector, used to subset the `concepts`; `NULL` #' means no subsetting -#' @param concepts The concepts to be used or `NULL` in which case -#' [load_dictionary()] is called +#' @param concepts The concepts to be used, or `NULL`. In the latter case the +#' standard ricu dictionary (obtained by calling [load_dictionary()]) is used +#' for loading the objects specified in `x`. #' @param dict_name,dict_dirs In case not concepts are passed as `concepts`, #' these are forwarded to [load_dictionary()] as `name` and `file` arguments #' @@ -179,8 +180,6 @@ load_concepts.character <- function(x, src = NULL, concepts = NULL, ..., if (is.null(concepts)) { - assert_that(not_null(src)) - load_concepts( load_dictionary(src, x, name = dict_name, cfg_dirs = dict_dirs), src = NULL, ... diff --git a/man/change_id.Rd b/man/change_id.Rd index 8c3fa01e..6193adf5 100644 --- a/man/change_id.Rd +++ b/man/change_id.Rd @@ -57,34 +57,19 @@ and \code{downgrade_id()} when the target ID system is of lower cardinality } \details{ In order to provide ID system conversion for a data source, the (internal) -function \code{\link[=id_map]{id_map()}} must be able to construct an ID mapping for that data +function [id_map()] must be able to construct an ID mapping for that data source. Constructing such a mapping can be expensive w.r.t. the frequency -it might be re-used and therefore, \code{\link[=id_map]{id_map()}} provides caching +it might be re-used and therefore, [id_map()] provides caching infrastructure. The mapping itself is constructed by the (internal) -function \code{\link[=id_map_helper]{id_map_helper()}}, which is expected to provide source and +function [id_map_helper()], which is expected to provide source and destination ID columns as well as start and end columns corresponding to the destination ID, relative to the source ID system. In the following -example, we request for \code{mimic_demo}, with ICU stay IDs as source and +example, we request for `mimic_demo`, with ICU stay IDs as source and hospital admissions as destination IDs. -\if{html}{\out{
}}\preformatted{id_map_helper(mimic_demo, "icustay_id", "hadm_id") -#> # An `id_tbl`: 136 x 4 -#> # Id var: `icustay_id` -#> icustay_id hadm_id hadm_id_start hadm_id_end -#> -#> 1 201006 198503 -3290 mins 9114 mins -#> 2 201204 114648 -2 mins 6949 mins -#> 3 203766 126949 -1336 mins 8818 mins -#> 4 204132 157609 -1 mins 10103 mins -#> 5 204201 177678 -368 mins 9445 mins -#> ... -#> 132 295043 170883 -10413 mins 31258 mins -#> 133 295741 176805 -1 mins 3153 mins -#> 134 296804 110244 -1294 mins 4599 mins -#> 135 297782 167612 -1 mins 207 mins -#> 136 298685 151323 -1 mins 19082 mins -#> # i 131 more rows -}\if{html}{\out{
}} +```{r, eval = is_data_avail("mimic_demo")} +id_map_helper(mimic_demo, "icustay_id", "hadm_id") +``` Both start and end columns encode the hospital admission windows relative to each corresponding ICU stay start time. It therefore comes as no @@ -93,21 +78,22 @@ occurs before ICU stay start time), while end times are often days in the future (as hospital discharge typically occurs several days after ICU admission). -In order to use the ID conversion infrastructure offered by \code{ricu} for a -new dataset, it typically suffices to provide an \code{id_cfg} entry in the -source configuration (see \code{\link[=load_src_cfg]{load_src_cfg()}}), outlining the available ID +In order to use the ID conversion infrastructure offered by `ricu` for a +new dataset, it typically suffices to provide an `id_cfg` entry in the +source configuration (see [load_src_cfg()]), outlining the available ID systems alongside an ordering, as well as potentially a class specific -implementation of \code{\link[=id_map_helper]{id_map_helper()}} for the given source class, specifying +implementation of [id_map_helper()] for the given source class, specifying the corresponding time windows in 1 minute resolution (for every possible pair of IDs). -While both up- and downgrades for \code{id_tbl} objects, as well as downgrades -for \code{ts_tbl} objects are simple merge operations based on the ID mapping -provided by \code{\link[=id_map]{id_map()}}, ID upgrades for \code{ts_tbl} objects are slightly more -involved. As an example, consider the following setting: we have \code{data} -associated with \code{hadm_id} IDs and times relative to hospital admission: +While both up- and downgrades for `id_tbl` objects, as well as downgrades +for `ts_tbl` objects are simple merge operations based on the ID mapping +provided by [id_map()], ID upgrades for `ts_tbl` objects are slightly more +involved. As an example, consider the following setting: we have `data` +associated with `hadm_id` IDs and times relative to hospital admission: -\if{html}{\out{
}}\preformatted{ 1 2 3 4 5 6 7 8 +``` + 1 2 3 4 5 6 7 8 data ---*------*-------*--------*-------*-------*--------*------*--- 3h 10h 18h 27h 35h 43h 52h 59h @@ -117,17 +103,17 @@ hadm_id |-------------------------------------------------------------| icustay_id |------------------| |---------------| 0h 19h 0h 16h ICU_1 ICU_2 -}\if{html}{\out{
}} +``` -The mapping of data points from \code{hadm_id} to \code{icustay_id} is created as +The mapping of data points from `hadm_id` to `icustay_id` is created as follows: ICU stay end times mark boundaries and all data that is recorded after the last ICU stay ended is assigned to the last ICU stay. Therefore -data points 1-3 are assigned to \code{ICU_1}, while 4-8 are assigned to \code{ICU_2}. +data points 1-3 are assigned to `ICU_1`, while 4-8 are assigned to `ICU_2`. Times have to be shifted as well, as timestamps are expected to be relative to the current ID system. Data points 1-3 therefore are assigned to time stamps -4h, 3h and 11h, while data points 4-8 are assigned to -10h, -2h, 6h, 15h and 22h. Implementation-wise, the mapping is computed using an -efficient \code{data.table} rolling join. +efficient `data.table` rolling join. } \examples{ if (require(mimic.demo)) { diff --git a/man/data_env.Rd b/man/data_env.Rd index 0fd9d2a0..6249b595 100644 --- a/man/data_env.Rd +++ b/man/data_env.Rd @@ -42,98 +42,57 @@ hosted data source is available as well. As with the PhysioNet datasets, access is public but has to be granted by the data collectors. } \details{ -Setting up a dataset for use with \code{ricu} requires a configuration object. +Setting up a dataset for use with `ricu` requires a configuration object. For the included datasets, configuration can be loaded from -\if{html}{\out{
}}\preformatted{system.file("extdata", "config", "data-sources.json", package = "ricu") -}\if{html}{\out{
}} +``` +system.file("extdata", "config", "data-sources.json", package = "ricu") +``` -by calling \code{\link[=load_src_cfg]{load_src_cfg()}} and for dataset that are external to \code{ricu}, +by calling [load_src_cfg()] and for dataset that are external to `ricu`, additional configuration can be made available by setting the environment -variable \code{RICU_CONFIG_PATH} (for more information, refer to -\code{\link[=load_src_cfg]{load_src_cfg()}}). Using the dataset configuration object, data can be -downloaded (\code{\link[=download_src]{download_src()}}), imported (\code{\link[=import_src]{import_src()}}) and attached -(\code{\link[=attach_src]{attach_src()}}). While downloading and importing are one-time procedures, +variable `RICU_CONFIG_PATH` (for more information, refer to +[load_src_cfg()]). Using the dataset configuration object, data can be +downloaded ([download_src()]), imported ([import_src()]) and attached +([attach_src()]). While downloading and importing are one-time procedures, attaching of the dataset is repeated every time the package is loaded. Briefly, downloading loads the raw dataset from the internet (most likely -in \code{.csv} format), importing consists of some preprocessing to make the -data available more efficiently (by converting it to \code{\link[fst:fst]{.fst}} +in `.csv` format), importing consists of some preprocessing to make the +data available more efficiently (by converting it to [`.fst`][fst::fst()] format) and attaching sets up the data for use by the package. For more information on the individual steps, refer to the respective documentation pages. A dataset that has been successfully made available can interactively be explored by typing its name into the console and individual tables can be -inspected using the \code{$} function. For example for the MIMIC-III demo -dataset and the \code{icustays} table, this gives - -\if{html}{\out{
}}\preformatted{mimic_demo -#> -#> admissions callout caregivers chartevents -#> [129 x 19] [77 x 24] [7,567 x 4] [758,355 x 15] -#> cptevents d_cpt d_icd_diagnoses d_icd_procedures -#> [1,579 x 12] [134 x 9] [14,567 x 4] [3,882 x 4] -#> d_items d_labitems datetimeevents diagnoses_icd -#> [12,487 x 10] [753 x 6] [15,551 x 14] [1,761 x 5] -#> drgcodes icustays inputevents_cv inputevents_mv -#> [297 x 8] [136 x 12] [34,799 x 22] [13,224 x 31] -#> labevents microbiologyevents outputevents patients -#> [76,074 x 9] [2,003 x 16] [11,320 x 13] [100 x 8] -#> prescriptions procedureevents_mv procedures_icd services -#> [10,398 x 19] [753 x 25] [506 x 5] [163 x 6] -#> transfers -#> [524 x 13] +inspected using the `$` function. For example for the MIMIC-III demo +dataset and the `icustays` table, this gives + +```{r, eval = is_data_avail("mimic_demo")} +mimic_demo mimic_demo$icustays -#> # : [136 x 12] -#> # ID options: subject_id (patient) < hadm_id (hadm) < icustay_id (icustay) -#> # Defaults: `intime` (index), `last_careunit` (val) -#> # Time vars: `intime`, `outtime` -#> row_id subject_id hadm_id icustay_id dbsource first_careunit last_careunit -#> -#> 1 12742 10006 142345 206504 carevue MICU MICU -#> 2 12747 10011 105331 232110 carevue MICU MICU -#> 3 12749 10013 165520 264446 carevue MICU MICU -#> 4 12754 10017 199207 204881 carevue CCU CCU -#> 5 12755 10019 177759 228977 carevue MICU MICU -#> ... -#> 132 42676 44083 198330 286428 metavision CCU CCU -#> 133 42691 44154 174245 217724 metavision MICU MICU -#> 134 42709 44212 163189 239396 metavision MICU MICU -#> 135 42712 44222 192189 238186 metavision CCU CCU -#> 136 42714 44228 103379 217992 metavision SICU SICU -#> # i 131 more rows -#> # i 5 more variables: first_wardid , last_wardid , intime , -#> # outtime , los -}\if{html}{\out{
}} +``` Table subsets can be loaded into memory for example using the -\code{\link[base:subset]{base::subset()}} function, which uses non-standard evaluation (NSE) to +[base::subset()] function, which uses non-standard evaluation (NSE) to determine a row-subsetting. This design choice stems form the fact that some tables can have on the order of 10^8 rows, which makes loading full tables into memory an expensive operation. Table subsets loaded into -memory are represented as \code{\link[data.table:data.table]{data.table}} objects. +memory are represented as [`data.table`][data.table::data.table()] objects. Extending the above example, if only ICU stays corresponding to the patient -with \code{subject_id == 10124} are of interest, the respective data can be +with `subject_id == 10124` are of interest, the respective data can be loaded as -\if{html}{\out{
}}\preformatted{subset(mimic_demo$icustays, subject_id == 10124) -#> row_id subject_id hadm_id icustay_id dbsource first_careunit last_careunit -#> 1: 12863 10124 182664 261764 carevue MICU MICU -#> 2: 12864 10124 170883 222779 carevue MICU MICU -#> 3: 12865 10124 170883 295043 carevue CCU CCU -#> 4: 12866 10124 170883 237528 carevue MICU MICU -#> first_wardid last_wardid intime outtime los -#> 1: 23 23 2192-03-29 10:46:51 2192-04-01 06:36:00 2.8258 -#> 2: 50 50 2192-04-16 20:58:32 2192-04-20 08:51:28 3.4951 -#> 3: 7 7 2192-04-24 02:29:49 2192-04-26 23:59:45 2.8958 -#> 4: 23 23 2192-04-30 14:50:44 2192-05-15 23:34:21 15.3636 -}\if{html}{\out{
}} - -Much care has been taken to make \code{ricu} extensible to new datasets. For -example the publicly available ICU database \href{https://amsterdammedicaldatascience.nl/amsterdamumcdb/}{AmsterdamUMCdb } +```{r, eval = is_data_avail("mimic_demo")} +subset(mimic_demo$icustays, subject_id == 10124) +``` + +Much care has been taken to make `ricu` extensible to new datasets. For +example the publicly available ICU database [AmsterdamUMCdb +](https://amsterdammedicaldatascience.nl/amsterdamumcdb/) provided by the Amsterdam University Medical Center, currently is not part -of the core datasets of \code{ricu}, but code for integrating this dataset is -available on \href{https://github.com/eth-mds/aumc}{github}. +of the core datasets of `ricu`, but code for integrating this dataset is +available on [github](https://github.com/eth-mds/aumc). } \section{MIMIC-III}{ diff --git a/man/load_concepts.Rd b/man/load_concepts.Rd index 100ed04c..f2c687ff 100644 --- a/man/load_concepts.Rd +++ b/man/load_concepts.Rd @@ -92,8 +92,9 @@ load_concepts(x, ...) \item{src}{A character vector, used to subset the \code{concepts}; \code{NULL} means no subsetting} -\item{concepts}{The concepts to be used or \code{NULL} in which case -\code{\link[=load_dictionary]{load_dictionary()}} is called} +\item{concepts}{The concepts to be used, or \code{NULL}. In the latter case the +standard ricu dictionary (obtained by calling \code{\link[=load_dictionary]{load_dictionary()}}) is used +for loading the objects specified in \code{x}.} \item{dict_name, dict_dirs}{In case not concepts are passed as \code{concepts}, these are forwarded to \code{\link[=load_dictionary]{load_dictionary()}} as \code{name} and \code{file} arguments} From b1e2aed3b806e357fe8c4e3f056a2264834735ba Mon Sep 17 00:00:00 2001 From: Drago Date: Mon, 1 May 2023 12:13:37 -0400 Subject: [PATCH 18/48] load_concepts.integer() src NULL fix --- R/concept-load.R | 2 -- 1 file changed, 2 deletions(-) diff --git a/R/concept-load.R b/R/concept-load.R index 174a089c..a420f1bf 100644 --- a/R/concept-load.R +++ b/R/concept-load.R @@ -201,8 +201,6 @@ load_concepts.integer <- function(x, src = NULL, concepts = NULL, ..., if (is.null(concepts)) { - assert_that(not_null(src)) - concepts <- load_dictionary(src, name = dict_name, cfg_dirs = dict_dirs) } else if (not_null(src)) { From 9c3481f2f7bb9d9790f34dc54c05ad0fc877bf1f Mon Sep 17 00:00:00 2001 From: Manuel Burger Date: Wed, 20 Mar 2024 20:07:05 +0100 Subject: [PATCH 19/48] Fix sic config --- inst/extdata/config/data-sources/sic.json | 99 ++++++++--------------- 1 file changed, 34 insertions(+), 65 deletions(-) diff --git a/inst/extdata/config/data-sources/sic.json b/inst/extdata/config/data-sources/sic.json index 760b50fc..351f1ab9 100644 --- a/inst/extdata/config/data-sources/sic.json +++ b/inst/extdata/config/data-sources/sic.json @@ -4,17 +4,17 @@ "url": "https://physionet.org/files/sicdb/1.0.6", "id_cfg": { "patient": { - "id": "PatientID", + "id": "patientid", "position": 1, - "start": "ICUOffset", - "end": "OffsetOfDeath", + "start": "icuoffset", + "end": "offsetofdeath", "table": "cases" }, "icustay": { - "id": "CaseID", + "id": "caseid", "position": 2, - "start": "ICUOffset", - "end": "TimeOfStay", + "start": "icuoffset", + "end": "timeofstay", "table": "cases" } }, @@ -22,13 +22,13 @@ "cases": { "files": "cases.csv.gz", "defaults": { - "index_var": "ICUOffset", + "index_var": "icuoffset", "time_vars": [ - "ICUOffset", - "OffsetOfDeath", - "HeartSurgeryBeginOffset", - "HeartSurgeryEndOffset", - "OffsetAfterFirstAdmission" + "icuoffset", + "offsetofdeath", + "heartsurgerybeginoffset", + "heartsurgeryendoffset", + "offsetafterfirstadmission" ] }, "num_rows": 27386, @@ -193,16 +193,12 @@ "data_float_h": { "files": "data_float_h.csv.gz", "defaults": { - "index_var": "Offset", - "val_var": "Val", - "time_vars": "Offset" + "index_var": "offset", + "val_var": "val", + "time_vars": "offset" }, "num_rows": 36785241, "cols": { - "id": { - "name": "id", - "spec": "col_integer" - }, "caseid": { "name": "CaseID", "spec": "col_integer" @@ -225,48 +221,21 @@ }, "rawdata": { "name": "rawdata", - "spec": "col_double" + "spec": "col_character" } }, "partitioning": { "col": "dataid", "breaks": [ - 1, - 2, - 3, - 4, - 7, - 28, - 29, - 702, - 703, - 705, - 708, - 709, - 710, - 715, - 717, - 719, - 724, - 725, - 731, - 773, - 2018, - 2274, - 2278, - 2280, - 2283, - 2290, - 3056, - 3059, - 3071 - ] - } + 702, 703, 705, 708, 709, 710, 715, 717, 719, 724, 725, + 731, 773, 2018, 2274, 2278, 2280, 2283, 2290, 3056, 3059, 3071] + }, + "callback": "sic_data_float_h" }, "data_ref": { "files": "data_ref.csv.gz", "defaults": { - "index_var": "OffsetAfterFirstAdmission" + "index_var": "offsetafterfirstadmission" }, "num_rows": 354157, "cols": { @@ -282,8 +251,8 @@ "name": "RefID", "spec": "col_integer" }, - "customfieldid": { - "name": "CustomFieldID", + "fieldid": { + "name": "FieldID", "spec": "col_integer" } } @@ -291,9 +260,9 @@ "laboratory": { "files": "laboratory.csv.gz", "defaults": { - "index_var": "Offset", - "val_var": "LaboratoryValue", - "time_vars": "Offset" + "index_var": "offset", + "val_var": "laboratoryvalue", + "time_vars": "offset" }, "num_rows": 17572279, "cols": { @@ -326,11 +295,11 @@ "medication": { "files": "medication.csv.gz", "defaults": { - "index_var": "Offset", + "index_var": "offset", "val_var": "Amount", "time_vars": [ - "Offset", - "OffsetDrugEnd" + "offset", + "offsetdrugend" ] }, "num_rows": 5141346, @@ -380,10 +349,10 @@ "data_range": { "files": "data_range.csv.gz", "defaults": { - "index_var": "Offset", + "index_var": "offset", "time_vars": [ - "Offset", - "OffsetEnd" + "offset", + "offsetend" ] }, "num_rows": 183339, @@ -417,8 +386,8 @@ "unitlog": { "files": "unitlog.csv.gz", "defaults": { - "index_var": "Offset", - "time_vars": "Offset" + "index_var": "offset", + "time_vars": "offset" }, "num_rows": 139968, "cols": { From d5d4c074c2063ea1d464997d1e3c22208572d258 Mon Sep 17 00:00:00 2001 From: Manuel Burger Date: Wed, 20 Mar 2024 20:07:27 +0100 Subject: [PATCH 20/48] Properly support full rawdata found in sic --- R/callback-tbl.R | 28 +++++++++++++++++++++++++ R/config-utils.R | 28 +++++++++++++++++++++++++ R/setup-import.R | 53 ++++++++++++++++++++++++++++-------------------- 3 files changed, 87 insertions(+), 22 deletions(-) create mode 100644 R/callback-tbl.R diff --git a/R/callback-tbl.R b/R/callback-tbl.R new file mode 100644 index 00000000..fc31a6c9 --- /dev/null +++ b/R/callback-tbl.R @@ -0,0 +1,28 @@ + +sic_data_float_h <- function(dat, ...) { + hexstring_to_float <- function(x) { + if (is.na(x)) { + return(NA_real_) + } + hexstring <- substring(x, seq(1, 482, 2), seq(2, 482, 2)) + bytes <- as.raw(strtoi(hexstring[-1], base = 16)) + floats <- readBin(bytes, numeric(), length(bytes) %/% 4, 4, endian = "little") + ifelse(floats == 0, NA_real_, floats) + } + + setDT(dat) + dat[, c("rawdata") := lapply(get("rawdata"), hexstring_to_float)] # TODO: remove hard coding of rawdata and derive from JSON config + dat <- dat[, .( + Offset = Offset + 60 * (0:(sapply(rawdata, length)-1)), + Val = Val, + cnt = cnt, + rawdata = unlist(rawdata), + rawdata_present = !is.na(rawdata) + ), + by = .(id, CaseID, DataID) + ] + dat[rawdata_present == FALSE, rawdata := Val] # Fix measurements that only have one + dat[, rawdata_present := NULL] + print(dat) + dat +} \ No newline at end of file diff --git a/R/config-utils.R b/R/config-utils.R index ca9e6a49..bdada7db 100644 --- a/R/config-utils.R +++ b/R/config-utils.R @@ -386,6 +386,34 @@ partition_col <- function(x, orig_names = FALSE) { col } + +tbl_callback <- function(x){ + x <- as_tbl_cfg(x) + assert_that(length(x) == 1L) + + + if (!("callback" %in% vctrs::fields(x))) { + return(identity_callback) + } + + callback_field <- vctrs::field(x, "callback") + if (is.character(callback_field)) { + return(str_to_fun(callback_field)) + } + + if (!is.null(callback_field) && !is.list(callback_field)) { + return(identity_callback) + } + + callback_value <- callback_field[[1]] + if (is.character(callback_value)) { + return(str_to_fun(callback_field[1])) + } + + return(identity_callback) + } + + #' @export n_tick.tbl_cfg <- function(x) { diff --git a/R/setup-import.R b/R/setup-import.R index 0d2e536d..ead910f6 100644 --- a/R/setup-import.R +++ b/R/setup-import.R @@ -184,6 +184,9 @@ import_tbl.tbl_cfg <- function(x, data_dir = src_data_dir(x), progress = NULL, assert_that(is.dir(data_dir), is.flag(cleanup)) + # Print number of parts + print(paste("[import_tbl] Import table ", tbl_name(x))) + print(paste("[import_tbl] Number of parts: ", n_part(x))) if (n_part(x) > 1L) { partition_table(x, data_dir, progress, ...) } else { @@ -257,6 +260,8 @@ partition_table <- function(x, dir, progress = NULL, chunk_length = 10 ^ 7, file <- file.path(dir, rawf) name <- tbl_name(x) + callback <- tbl_callback(x) + exp_row <- n_row(x) if (is.na(exp_row)) { @@ -267,17 +272,19 @@ partition_table <- function(x, dir, progress = NULL, chunk_length = 10 ^ 7, if (length(file) == 1L) { - callback <- function(x, pos, ...) { - report_problems(x, rawf) - split_write(x, pfun, tempdir, ((pos - 1L) / chunk_length) + 1L, - progress, name, tick) - } + process_chunk <- function(x, pos, ...) { + report_problems(x, rawf) + split_write(callback(x), pfun, tempdir, ((pos - 1L) / chunk_length) + 1L, + progress, name, tick) + } if (grepl("\\.gz$", file)) { + print("[partition_table] gunzipping") file <- gunzip(file, tempdir) } - readr::read_csv_chunked(file, callback, chunk_length, col_types = spec, + print(paste("[partition_table] reading csv chunked with chunk_length: ", chunk_length)) + readr::read_csv_chunked(file, process_chunk, chunk_length, col_types = spec, progress = FALSE, ...) if (is.na(exp_row)) { @@ -291,7 +298,7 @@ partition_table <- function(x, dir, progress = NULL, chunk_length = 10 ^ 7, dat <- readr::read_csv(file[i], col_types = spec, progress = FALSE, ...) report_problems(dat, rawf[i]) - split_write(dat, pfun, tempdir, i, progress, name, tick) + split_write(callback(data), pfun, tempdir, i, progress, name, tick) } } @@ -355,6 +362,7 @@ csv_to_fst <- function(x, dir, progress = NULL, ...) { raw <- raw_file_name(x) src <- file.path(dir, raw) dst <- file.path(dir, fst_file_name(x)) + callback <- tbl_callback(x) assert_that(length(x) == 1L, length(src) == 1L, length(dst) == 1L) @@ -364,6 +372,7 @@ csv_to_fst <- function(x, dir, progress = NULL, ...) { report_problems(dat, raw) + dat <- callback(dat) dat <- rename_cols(setDT(dat), ricu_cols(x), orig_cols(x)) fst::write_fst(dat, dst, compress = 100L) @@ -422,24 +431,24 @@ report_problems <- function(x, file) { invisible(NULL) } -report_problems <- function(x, file) { +# report_problems <- function(x, file) { - prob_to_str <- function(x) { - paste0("[", x[1L], ", ", x[2L], "]: got '", x[4L], "' instead of ", x[3L]) - } +# prob_to_str <- function(x) { +# paste0("[", x[1L], ", ", x[2L], "]: got '", x[4L], "' instead of ", x[3L]) +# } - probs <- readr::problems(x) +# probs <- readr::problems(x) - if (nrow(probs)) { +# if (nrow(probs)) { - probs <- bullet(apply(probs, 1L, prob_to_str)) +# probs <- bullet(apply(probs, 1L, prob_to_str)) - warn_ricu( - c("Encountered parsing problems for file {basename(file)}:", probs), - class = "csv_parsing_error", indent = c(0L, rep_along(2L, probs)), - exdent = c(0L, rep_along(2L, probs)) - ) - } +# warn_ricu( +# c("Encountered parsing problems for file {basename(file)}:", probs), +# class = "csv_parsing_error", indent = c(0L, rep_along(2L, probs)), +# exdent = c(0L, rep_along(2L, probs)) +# ) +# } - invisible(NULL) -} +# invisible(NULL) +# } From 31d48f7550c37e9e0d7e4da23104d496100f5830 Mon Sep 17 00:00:00 2001 From: Manuel Burger Date: Wed, 20 Mar 2024 20:42:21 +0100 Subject: [PATCH 21/48] Remove print --- R/callback-tbl.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/callback-tbl.R b/R/callback-tbl.R index fc31a6c9..b506c8ec 100644 --- a/R/callback-tbl.R +++ b/R/callback-tbl.R @@ -23,6 +23,6 @@ sic_data_float_h <- function(dat, ...) { ] dat[rawdata_present == FALSE, rawdata := Val] # Fix measurements that only have one dat[, rawdata_present := NULL] - print(dat) - dat + + return(dat) } \ No newline at end of file From eb41aaa386f1ce2496e4216c629e891991c7db0a Mon Sep 17 00:00:00 2001 From: Manuel Burger Date: Wed, 20 Mar 2024 21:04:39 +0100 Subject: [PATCH 22/48] Add utility functions proposed by `prockenschaub` here: https://github.com/eth-mds/ricu/pull/30/files --- R/concept-utils.R | 51 ++++++++++++++++++++++++++++++++++++++++++ R/data-utils.R | 57 ++++++++++++++++++++++++++++++++--------------- 2 files changed, 90 insertions(+), 18 deletions(-) diff --git a/R/concept-utils.R b/R/concept-utils.R index 47f7d96b..d0e22d68 100644 --- a/R/concept-utils.R +++ b/R/concept-utils.R @@ -199,6 +199,29 @@ get_hirid_ids <- function(x, ids) { load_id("variables", x, .data$id %in% .env$ids, cols = "unit", id_var = "id") } +#' @rdname data_items + #' @export + init_itm.sic_itm <- function(x, table, sub_var, ids, + callback = "identity_callback", ...) { + + assert_that(is.string(table), has_length(ids), + is.character(ids) || is_intish(ids)) + + x[["table"]] <- table + + units <- get_sic_ids(x, ids) + units <- rename_cols(rm_na(units), sub_var, "referenceglobalid") + + todo <- c("ids", "units") + x[todo] <- mget(todo) + + complete_tbl_itm(x, callback, sub_var, ...) + } + + get_sic_ids <- function(x, ids) { + load_id("d_references", x, .data$referenceglobalid %in% .env$ids, cols = "referenceunit", id_var = "referenceglobalid") + } + #' @param unit_val String valued unit to be used in case no `unit_var` is #' available for the given table #' @@ -330,6 +353,10 @@ prepare_query.sel_itm <- prep_sel #' @export prepare_query.hrd_itm <- prep_sel +#' @keywords internal +#' @export +prepare_query.sic_itm <- prep_sel + #' @keywords internal #' @export prepare_query.rgx_itm <- function(x) { @@ -546,6 +573,17 @@ do_callback.hrd_itm <- function(x, ...) { NextMethod() } +#' @keywords internal + #' @export + do_callback.sic_itm <- function(x, ...) { + # TODO: generalise and combine with do_callback.hrd_itm + if (is.null(get_itm_var(x, "unit_var"))) { + x <- try_add_vars(x, unit_var = "referenceunit") + } + + NextMethod() +} + #' @keywords internal #' @export do_callback.col_itm <- function(x, ...) { @@ -604,6 +642,19 @@ do_itm_load.hrd_itm <- function(x, id_type = "icustay", interval = hours(1L)) { res } +#' @export + do_itm_load.sic_itm <- function(x, id_type = "icustay", interval = hours(1L)) { + + res <- NextMethod() + + if (is.null(get_itm_var(x, "unit_var"))) { + unt <- x[["units"]] + res <- merge(res, unt, by = get_itm_var(x, "sub_var"), all.x = TRUE) + } + + res +} + #' @export do_itm_load.col_itm <- function(x, id_type = "icustay", interval = hours(1L)) { diff --git a/R/data-utils.R b/R/data-utils.R index 31fa02df..d3941c0f 100644 --- a/R/data-utils.R +++ b/R/data-utils.R @@ -128,6 +128,28 @@ id_orig_helper.miiv_env <- function(x, id) { as_id_tbl(res, id, by_ref = TRUE) } +#' @rdname data_utils +#' @export +id_orig_helper.sic_env <- function(x, id) { + + if (!identical(id, "patientid")) { + return(NextMethod()) + } + + cfg <- as_id_cfg(x)[id == id_var_opts(x)] + + assert_that(length(cfg) == 1L) + + sta <- field(cfg, "start") + age <- "admissionyear" + + res <- as_src_tbl(x, field(cfg, "table")) + res <- res[, c(id, sta, age)] + res <- res[, c(sta, age) := shift_year(get(sta), get(age))] + + as_id_tbl(res, id, by_ref = TRUE) +} + #' @export id_orig_helper.default <- function(x, ...) stop_generic(x, .Generic) @@ -228,33 +250,32 @@ id_win_helper.eicu_env <- function(x) { order_rename(res, ids, sta, end) } -#' @rdname data_utils -#' @export -id_win_helper.sic_env <- function(x) { - - sec_as_mins <- function(x) min_as_mins(as.integer(x / 60)) - + + #' @rdname data_utils + #' @export + id_win_helper.sic_env <- function(x) { cfg <- sort(as_id_cfg(x), decreasing = TRUE) - + ids <- field(cfg, "id") - sta <- c(unique(field(cfg, "start")), "HospAdmTime") + sta <- field(cfg, "start") end <- field(cfg, "end") - + tbl <- as_src_tbl(x, unique(field(cfg, "table"))) - + mis <- setdiff(sta, colnames(tbl)) - + res <- load_src(tbl, cols = c(ids, intersect(sta, colnames(tbl)), end)) - - if (length(mis) > 0L) { - res[, c(mis) := 0L] - } - - res <- res[, c(sta, end) := lapply(.SD, sec_as_mins), .SDcols = c(sta, end)] + + assert_that(length(mis) == 1L) + res[, firstadmission := 0L] + + res <- res[, c(sta, end) := lapply(.SD, s_as_mins), .SDcols = c(sta, end)] + res[, timeofstay := offsetafterfirstadmission + timeofstay] + res <- setcolorder(res, c(ids, sta, end)) res <- rename_cols(res, c(ids, paste0(ids, "_start"), paste0(ids, "_end")), by_ref = TRUE) - + as_id_tbl(res, ids[2L], by_ref = TRUE) } From e4c930aed683ff9eb099f5ccdfd26b7df4f6d36c Mon Sep 17 00:00:00 2001 From: Manuel Burger Date: Mon, 25 Mar 2024 08:42:12 +0100 Subject: [PATCH 23/48] Fix configs for `sic` based on `prockenschaub` --- .../config/concept-dict/blood_gas.json | 12 +++-- .../config/concept-dict/chemistry.json | 54 +++++++++++-------- .../config/concept-dict/demographics.json | 2 +- .../config/concept-dict/hematology.json | 10 +++- .../config/concept-dict/medications.json | 42 +++++---------- inst/extdata/config/concept-dict/vitals.json | 5 +- 6 files changed, 66 insertions(+), 59 deletions(-) diff --git a/inst/extdata/config/concept-dict/blood_gas.json b/inst/extdata/config/concept-dict/blood_gas.json index 04b7854d..16c003ee 100644 --- a/inst/extdata/config/concept-dict/blood_gas.json +++ b/inst/extdata/config/concept-dict/blood_gas.json @@ -67,9 +67,10 @@ ], "sic": [ { - "ids": 668, + "ids": [668, 449], "table": "laboratory", - "sub_var": "LaboratoryID" + "sub_var": "laboratoryid", + "class": "sic_itm" } ], "picdb": [ @@ -150,10 +151,11 @@ ], "sic": [ { - "ids": 655, + "ids": [655, 452], "table": "laboratory", - "sub_var": "LaboratoryID" - } + "sub_var": "laboratoryid", + "class": "sic_itm" + }, ], "picdb": [] } diff --git a/inst/extdata/config/concept-dict/chemistry.json b/inst/extdata/config/concept-dict/chemistry.json index bae0b17c..763617a3 100644 --- a/inst/extdata/config/concept-dict/chemistry.json +++ b/inst/extdata/config/concept-dict/chemistry.json @@ -66,7 +66,8 @@ { "ids": 287, "table": "laboratory", - "sub_var": "LaboratoryID" + "sub_var": "laboratoryid", + "class": "sic_itm" } ], "picdb": [ @@ -147,7 +148,8 @@ { "ids": 609, "table": "laboratory", - "sub_var": "LaboratoryID" + "sub_var": "laboratoryid", + "class": "sic_itm" } ], "picdb": [ @@ -228,7 +230,8 @@ { "ids": 617, "table": "laboratory", - "sub_var": "LaboratoryID" + "sub_var": "laboratoryid", + "class": "sic_itm" } ], "picdb": [] @@ -301,7 +304,8 @@ { "ids": 616, "table": "laboratory", - "sub_var": "LaboratoryID" + "sub_var": "laboratoryid", + "class": "sic_itm" } ], "picdb": [] @@ -375,7 +379,8 @@ { "ids": 456, "table": "laboratory", - "sub_var": "LaboratoryID" + "sub_var": "laboratoryid", + "class": "sic_itm" } ], "picdb": [ @@ -456,7 +461,8 @@ { "ids": 333, "table": "laboratory", - "sub_var": "LaboratoryID" + "sub_var": "laboratoryid", + "class": "sic_itm" } ], "picdb": [] @@ -526,7 +532,8 @@ { "ids": 332, "table": "laboratory", - "sub_var": "LaboratoryID" + "sub_var": "laboratoryid", + "class": "sic_itm" } ], "picdb": [] @@ -595,14 +602,6 @@ "sub_var": "itemid" } ], - "sic": [ - { - "ids": 355, - "table": "laboratory", - "sub_var": "LaboratoryID", - "callback": "transform_fun(binary_op(`*`, 2.14))" - } - ], "picdb": [] } }, @@ -675,8 +674,9 @@ { "ids": 457, "table": "laboratory", - "sub_var": "LaboratoryID", - "callback": "transform_fun(binary_op(`*`, 4.008))" + "sub_var": "laboratoryid", + "callback": "convert_unit(binary_op(`*`, 4.008), 'mg/dL', 'mmol/l')", + "class": "sic_itm" } ], "picdb": [] @@ -749,7 +749,8 @@ { "ids": 611, "table": "laboratory", - "sub_var": "LaboratoryID" + "sub_var": "laboratoryid", + "class": "sic_itm" } ], "picdb": [] @@ -816,7 +817,8 @@ { "ids": 253, "table": "laboratory", - "sub_var": "LaboratoryID" + "sub_var": "laboratoryid", + "class": "sic_itm" } ], "picdb": [] @@ -893,7 +895,8 @@ 450 ], "table": "laboratory", - "sub_var": "LaboratoryID" + "sub_var": "laboratoryid", + "class": "sic_itm" } ], "picdb": [] @@ -967,7 +970,8 @@ { "ids": 367, "table": "laboratory", - "sub_var": "LaboratoryID" + "sub_var": "laboratoryid", + "class": "sic_itm" } ], "picdb": [ @@ -1048,6 +1052,14 @@ "callback": "convert_unit(binary_op(`*`, 10), 'mg/L', 'mg/dl')" } ], + "sic": [ + { + "ids": 341, + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" + } + ], "picdb": [ { "table": "labevents", diff --git a/inst/extdata/config/concept-dict/demographics.json b/inst/extdata/config/concept-dict/demographics.json index 6f156f78..1e54fa3a 100644 --- a/inst/extdata/config/concept-dict/demographics.json +++ b/inst/extdata/config/concept-dict/demographics.json @@ -127,7 +127,7 @@ "sic": [ { "table": "cases", - "val_var": "AgeOnAdmission", + "val_var": "ageonadmission", "class": "col_itm" } ], diff --git a/inst/extdata/config/concept-dict/hematology.json b/inst/extdata/config/concept-dict/hematology.json index 4cd8d094..60dee64f 100644 --- a/inst/extdata/config/concept-dict/hematology.json +++ b/inst/extdata/config/concept-dict/hematology.json @@ -59,7 +59,15 @@ { "ids": 174, "table": "laboratory", - "sub_var": "LaboratoryID" + "sub_var": "laboratoryid", + "class": "sic_itm" + }, + { + "ids": 295, + "table": "laboratory", + "sub_var": "laboratoryid", + "callback": "blood_cell_ratio", + "class": "sic_itm" } ], "picdb": [ diff --git a/inst/extdata/config/concept-dict/medications.json b/inst/extdata/config/concept-dict/medications.json index 3ad798e8..fbea781c 100644 --- a/inst/extdata/config/concept-dict/medications.json +++ b/inst/extdata/config/concept-dict/medications.json @@ -408,35 +408,11 @@ } ], "sic": [ - { - "ids": [ - 1406, - 1408, - 1410, - 1418, - 1421, - 1422, - 1423, - 1428, - 1431, - 1433, - 1436, - 1449, - 1454, - 1457, - 1458, - 1459, - 1460, - 1461, - 1603, - 1795, - 1913, - 1927 - ], - "table": "medication", - "sub_var": "DrugID", - "callback": "transform_fun(set_val(TRUE))" - } + { + "ids": [1401, 1406, 1408, 1410, 1418, 1421, 1422, 1423, 1428, 1431, 1433, 1436, 1439, 1446, 1449, 1451, 1454, 1455, 1456, 1457, 1458, 1459, 1460, 1461, 1462, 1577, 1603, 1628, 1605, 1997, 1693, 1606, 1813, 1913, 1927, 1819], + "table": "medication", + "sub_var": "drugid" + } ], "picdb": [] } @@ -580,6 +556,14 @@ "callback": "transform_fun(set_val(TRUE))" } ], + "sic": [ + { + "ids": [1397, 1506, 1524, 1525, 1751, 1977], + "table": "medication", + "sub_var": "drugid", + "callback": "transform_fun(set_val(TRUE))" + } + ], "picdb": [] } }, diff --git a/inst/extdata/config/concept-dict/vitals.json b/inst/extdata/config/concept-dict/vitals.json index 91dfcc08..9230ed0c 100644 --- a/inst/extdata/config/concept-dict/vitals.json +++ b/inst/extdata/config/concept-dict/vitals.json @@ -82,7 +82,8 @@ 705 ], "table": "data_float_h", - "sub_var": "DataID" + "sub_var": "dataid", + "class": "sic_itm" } ], "picdb": [ @@ -560,7 +561,7 @@ { "table": "chartevents", "ids": [ - "1001" + 1001 ], "sub_var": "itemid" }, From ee483632c916e37f8831be5f8836f4e281048e48 Mon Sep 17 00:00:00 2001 From: Manuel Burger Date: Mon, 25 Mar 2024 09:16:33 +0100 Subject: [PATCH 24/48] Fix `sic` configs based on https://github.com/prockenschaub/ricu-package/tree/sicdb --- .../config/concept-dict/blood_gas.json | 42 +++++- .../config/concept-dict/chemistry.json | 42 +++--- .../config/concept-dict/demographics.json | 18 +-- .../config/concept-dict/hematology.json | 108 ++++++++++------ .../config/concept-dict/medications.json | 120 +++++++++--------- inst/extdata/config/concept-dict/outcome.json | 18 +++ inst/extdata/config/concept-dict/output.json | 3 +- .../config/concept-dict/respiratory.json | 6 +- inst/extdata/config/concept-dict/vitals.json | 24 +++- 9 files changed, 243 insertions(+), 138 deletions(-) diff --git a/inst/extdata/config/concept-dict/blood_gas.json b/inst/extdata/config/concept-dict/blood_gas.json index 935db222..0138565e 100644 --- a/inst/extdata/config/concept-dict/blood_gas.json +++ b/inst/extdata/config/concept-dict/blood_gas.json @@ -263,7 +263,14 @@ { "ids": 2283, "table": "data_float_h", - "sub_var": "DataID" + "sub_var": "dataid", + "class": "sic_itm" + }, + { + "ids": 684, + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" } ], "picdb": [ @@ -310,6 +317,14 @@ "class": "hrd_itm" } ], + "sic": [ + { + "ids": [196, 660], + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" + } + ], "picdb": [ { "table": "labevents", @@ -387,10 +402,12 @@ { "ids": [ 657, + 465, 454 ], "table": "laboratory", - "sub_var": "LaboratoryID" + "sub_var": "laboratoryid", + "class": "sic_itm" } ], "picdb": [ @@ -462,6 +479,14 @@ "sub_var": "itemid" } ], + "sic": [ + { + "ids": 661, + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" + } + ], "picdb": [ { "table": "labevents", @@ -540,9 +565,10 @@ ], "sic": [ { - "ids": 687, - "table": "laboratory", - "sub_var": "LaboratoryID" + "ids": 687, + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" } ], "picdb": [ @@ -621,7 +647,8 @@ { "ids": 688, "table": "laboratory", - "sub_var": "LaboratoryID" + "sub_var": "laboratoryid", + "class": "sic_itm" } ], "picdb": [ @@ -708,7 +735,8 @@ 689 ], "table": "laboratory", - "sub_var": "LaboratoryID" + "sub_var": "laboratoryid", + "class": "sic_itm" } ], "picdb": [ diff --git a/inst/extdata/config/concept-dict/chemistry.json b/inst/extdata/config/concept-dict/chemistry.json index 472be88b..39fbd784 100644 --- a/inst/extdata/config/concept-dict/chemistry.json +++ b/inst/extdata/config/concept-dict/chemistry.json @@ -1242,7 +1242,8 @@ 656 ], "table": "laboratory", - "sub_var": "LaboratoryID" + "sub_var": "laboratoryid", + "class": "sic_itm" } ], "picdb": [ @@ -1331,12 +1332,10 @@ ], "sic": [ { - "ids": [ - 463, - 685 - ], - "table": "laboratory", - "sub_var": "LaboratoryID" + "ids": [463, 685], + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" } ], "picdb": [ @@ -1417,10 +1416,11 @@ ], "sic": [ { - "ids": 468, - "table": "laboratory", - "sub_var": "LaboratoryID", - "callback": "transform_fun(binary_op(`*`, 2.431))" + "ids": 468, + "table": "laboratory", + "sub_var": "laboratoryid", + "callback": "convert_unit(binary_op(`*`, 2.431), 'mg/dL')", + "class": "sic_itm" } ], "picdb": [ @@ -1514,7 +1514,8 @@ 686 ], "table": "laboratory", - "sub_var": "LaboratoryID" + "sub_var": "laboratoryid", + "class": "sic_itm" } ], "picdb": [ @@ -1593,10 +1594,11 @@ ], "sic": [ { - "ids": 471, - "table": "laboratory", - "sub_var": "LaboratoryID", - "callback": "transform_fun(binary_op(`*`, 3.097521))" + "ids": 471, + "table": "laboratory", + "sub_var": "laboratoryid", + "callback": "convert_unit(binary_op(`*`, 3.097521), 'mg/dL')", + "class": "sic_itm" } ], "picdb": [ @@ -1673,8 +1675,9 @@ { "ids": 481, "table": "laboratory", - "sub_var": "LaboratoryID", - "callback": "transform_fun(binary_op(`/`, 1000))" + "sub_var": "laboratoryid", + "callback": "transform_fun(binary_op(`/`, 1000))", + "class": "sic_itm" } ], "picdb": [ @@ -1744,7 +1747,8 @@ { "ids": 270, "table": "laboratory", - "sub_var": "LaboratoryID" + "sub_var": "laboratoryid", + "class": "sic_itm" } ], "picdb": [ diff --git a/inst/extdata/config/concept-dict/demographics.json b/inst/extdata/config/concept-dict/demographics.json index 59d944a1..a0f7da0d 100644 --- a/inst/extdata/config/concept-dict/demographics.json +++ b/inst/extdata/config/concept-dict/demographics.json @@ -235,7 +235,7 @@ "sic": [ { "table": "cases", - "val_var": "HeightOnAdmission", + "val_var": "heightonadmission", "class": "col_itm" } ], @@ -317,10 +317,10 @@ ], "sic": [ { - "table": "cases", - "val_var": "Sex", - "callback": "apply_map(c(`735` = 'Male', `736` = 'Female'))", - "class": "col_itm" + "table": "cases", + "val_var": "sex", + "class": "col_itm", + "callback": "transform_fun(sic_sex)" } ], "picdb": [ @@ -403,10 +403,10 @@ ], "sic": [ { - "table": "cases", - "val_var": "WeightOnAdmission", - "class": "col_itm", - "callback": "transform_fun(binary_op(`/`, 1000))" + "table": "cases", + "val_var": "weightonadmission", + "class": "col_itm", + "callback": "transform_fun(binary_op(`/`, 1000))" } ], "picdb": [ diff --git a/inst/extdata/config/concept-dict/hematology.json b/inst/extdata/config/concept-dict/hematology.json index 60dee64f..3d9b5ef5 100644 --- a/inst/extdata/config/concept-dict/hematology.json +++ b/inst/extdata/config/concept-dict/hematology.json @@ -200,11 +200,19 @@ } ], "sic": [ - { - "ids": 197, - "table": "laboratory", - "sub_var": "LaboratoryID" - } + { + "ids": 197, + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" + }, + { + "ids": 299, + "table": "laboratory", + "sub_var": "laboratoryid", + "callback": "blood_cell_ratio", + "class": "sic_itm" + } ], "picdb": [ { @@ -335,11 +343,12 @@ } ], "sic": [ - { - "ids": 344, - "table": "laboratory", - "sub_var": "LaboratoryID" - } + { + "ids": 344, + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" + } ], "picdb": [] } @@ -381,6 +390,14 @@ "sub_var": "itemid" } ], + "sic": [ + { + "ids": 214, + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" + } + ], "picdb": [] } }, @@ -446,7 +463,8 @@ 682 ], "table": "laboratory", - "sub_var": "LaboratoryID" + "sub_var": "laboratoryid", + "class": "sic_itm" } ], "picdb": [ @@ -538,7 +556,8 @@ 289 ], "table": "laboratory", - "sub_var": "LaboratoryID" + "sub_var": "laboratoryid", + "class": "sic_itm" } ], "picdb": [ @@ -681,6 +700,13 @@ "ids": 223, "table": "laboratory", "sub_var": "LaboratoryID" + }, + { + "ids": 302, + "table": "laboratory", + "sub_var": "laboratoryid", + "callback": "blood_cell_ratio", + "class": "sic_itm" } ], "picdb": [] @@ -748,7 +774,8 @@ { "ids": 566, "table": "laboratory", - "sub_var": "LaboratoryID" + "sub_var": "laboratoryid", + "class": "sic_itm" } ], "picdb": [ @@ -824,9 +851,11 @@ ], "sic": [ { - "ids": 290, - "table": "laboratory", - "sub_var": "LaboratoryID" + "ids": 290, + "table": "laboratory", + "sub_var": "laboratoryid", + "callback": "convert_unit(binary_op(`*`, 0.16114), '%')", + "class": "sic_itm" } ], "picdb": [ @@ -902,7 +931,8 @@ { "ids": 286, "table": "laboratory", - "sub_var": "LaboratoryID" + "sub_var": "laboratoryid", + "class": "sic_itm" } ], "picdb": [ @@ -985,9 +1015,17 @@ ], "sic": [ { - "ids": 230, - "table": "laboratory", - "sub_var": "LaboratoryID" + "ids": 230, + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" + }, + { + "ids": 308, + "table": "laboratory", + "sub_var": "laboratoryid", + "callback": "blood_cell_ratio", + "class": "sic_itm" } ], "picdb": [ @@ -1069,9 +1107,10 @@ ], "sic": [ { - "ids": 314, - "table": "laboratory", - "sub_var": "LaboratoryID" + "ids": 314, + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" } ], "picdb": [ @@ -1138,7 +1177,8 @@ { "ids": 598, "table": "laboratory", - "sub_var": "LaboratoryID" + "sub_var": "laboratoryid", + "class": "sic_itm" } ], "picdb": [ @@ -1214,7 +1254,8 @@ { "ids": 597, "table": "laboratory", - "sub_var": "LaboratoryID" + "sub_var": "laboratoryid", + "class": "sic_itm" } ], "picdb": [ @@ -1283,9 +1324,10 @@ ], "sic": [ { - "ids": 599, - "table": "laboratory", - "sub_var": "LaboratoryID" + "ids": 599, + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" } ], "picdb": [ @@ -1351,13 +1393,6 @@ "sub_var": "itemid" } ], - "sic": [ - { - "ids": 3319, - "table": "laboratory", - "sub_var": "LaboratoryID" - } - ], "picdb": [ { "table": "labevents", @@ -1436,7 +1471,8 @@ { "ids": 301, "table": "laboratory", - "sub_var": "LaboratoryID" + "sub_var": "laboratoryid", + "class": "sic_itm" } ], "picdb": [ diff --git a/inst/extdata/config/concept-dict/medications.json b/inst/extdata/config/concept-dict/medications.json index fbea781c..770deb8f 100644 --- a/inst/extdata/config/concept-dict/medications.json +++ b/inst/extdata/config/concept-dict/medications.json @@ -826,9 +826,10 @@ 1559 ], "table": "medication", - "sub_var": "DrugID", - "stop_var": "OffsetDrugEnd", - "callback": "sic_dur" + "sub_var": "drugid", + "stop_var": "offsetdrugend", + "grp_var": "id", + "callback": "default_duration" } ], "picdb": [] @@ -931,16 +932,17 @@ "stop_var": "endtime", "callback": "mimic_rate_mv" } - ], + ], "sic": [ - { - "ids": 1559, - "table": "medication", - "sub_var": "DrugID", - "val_var": "AmountPerMinute", - "stop_var": "OffsetDrugEnd", - "callback": "sic_rate_kg" - } + { + "ids": 1559, + "table": "medication", + "sub_var": "drugid", + "val_var": "amountperminute", + "stop_var": "offsetdrugend", + "class": "sic_itm", + "callback": "sic_rate_kg" + } ], "picdb": [] } @@ -1042,13 +1044,14 @@ } ], "sic": [ - { - "ids": 1618, - "table": "medication", - "sub_var": "DrugID", - "stop_var": "OffsetDrugEnd", - "callback": "sic_dur" - } + { + "ids": 1618, + "table": "medication", + "sub_var": "drugid", + "stop_var": "offsetdrugend", + "grp_var": "id", + "callback": "default_duration" + } ], "picdb": [] } @@ -1143,16 +1146,17 @@ "stop_var": "endtime", "callback": "mimic_rate_mv" } - ], + ], "sic": [ - { - "ids": 1618, - "table": "medication", - "sub_var": "DrugID", - "val_var": "AmountPerMinute", - "stop_var": "OffsetDrugEnd", - "callback": "sic_rate_kg" - } + { + "ids": 1618, + "table": "medication", + "sub_var": "drugid", + "val_var": "amountperminute", + "stop_var": "offsetdrugend", + "class": "sic_itm", + "callback": "sic_rate_kg" + } ], "picdb": [] } @@ -1258,15 +1262,14 @@ } ], "sic": [ - { - "ids": [ - 1502 - ], - "table": "medication", - "sub_var": "DrugID", - "stop_var": "OffsetDrugEnd", - "callback": "sic_dur" - } + { + "ids": 1502, + "table": "medication", + "sub_var": "drugid", + "stop_var": "offsetdrugend", + "grp_var": "id", + "callback": "default_duration" + } ], "picdb": [] } @@ -1378,14 +1381,15 @@ } ], "sic": [ - { - "ids": 1502, - "table": "medication", - "sub_var": "DrugID", - "val_var": "AmountPerMinute", - "stop_var": "OffsetDrugEnd", - "callback": "sic_rate_kg" - } + { + "ids": 1502, + "table": "medication", + "sub_var": "drugid", + "val_var": "amountperminute", + "stop_var": "offsetdrugend", + "class": "sic_itm", + "callback": "sic_rate_kg" + } ], "picdb": [] } @@ -1600,13 +1604,12 @@ ], "sic": [ { - "ids": [ - 1562 - ], - "table": "medication", - "sub_var": "DrugID", - "stop_var": "OffsetDrugEnd", - "callback": "sic_dur" + "ids": 1562, + "table": "medication", + "sub_var": "drugid", + "stop_var": "offsetdrugend", + "grp_var": "id", + "callback": "default_duration" } ], "picdb": [] @@ -1730,12 +1733,13 @@ ], "sic": [ { - "ids": 1562, - "table": "medication", - "sub_var": "DrugID", - "val_var": "AmountPerMinute", - "stop_var": "OffsetDrugEnd", - "callback": "sic_rate_kg" + "ids": 1562, + "table": "medication", + "sub_var": "drugid", + "val_var": "amountperminute", + "stop_var": "offsetdrugend", + "class": "sic_itm", + "callback": "sic_rate_kg" } ], "picdb": [] diff --git a/inst/extdata/config/concept-dict/outcome.json b/inst/extdata/config/concept-dict/outcome.json index 0d08a873..95009c78 100644 --- a/inst/extdata/config/concept-dict/outcome.json +++ b/inst/extdata/config/concept-dict/outcome.json @@ -79,6 +79,16 @@ "callback": "transform_fun(comp_na(`==`, 1L))", "class": "col_itm" } + ], + "sic": [ + { + "table": "cases", + "index_var": "offsetofdeath", + "adm_time": "offsetafterfirstadmission", + "val_var": "timeofstay", + "callback": "sic_death", + "class": "col_itm" + } ] } }, @@ -206,6 +216,14 @@ "class": "fun_itm" } ], + "sic": [ + { + "table": "cases", + "val_var": "timeofstay", + "callback": "transform_fun(binary_op(`/`, 60 * 60 * 24))", + "class": "col_itm" + } + ], "picdb": [] } }, diff --git a/inst/extdata/config/concept-dict/output.json b/inst/extdata/config/concept-dict/output.json index 1d3796ae..e078d7d7 100644 --- a/inst/extdata/config/concept-dict/output.json +++ b/inst/extdata/config/concept-dict/output.json @@ -148,7 +148,8 @@ { "ids": 725, "table": "data_float_h", - "sub_var": "DataID" + "sub_var": "dataid", + "class": "sic_itm" } ], "picdb": [ diff --git a/inst/extdata/config/concept-dict/respiratory.json b/inst/extdata/config/concept-dict/respiratory.json index d71d24ef..234531a9 100644 --- a/inst/extdata/config/concept-dict/respiratory.json +++ b/inst/extdata/config/concept-dict/respiratory.json @@ -223,7 +223,8 @@ { "ids": 710, "table": "data_float_h", - "sub_var": "DataID" + "sub_var": "dataid", + "class": "sic_itm" } ], "picdb": [ @@ -349,7 +350,8 @@ { "ids": 719, "table": "data_float_h", - "sub_var": "DataID" + "sub_var": "dataid", + "class": "sic_itm" } ], "picdb": [ diff --git a/inst/extdata/config/concept-dict/vitals.json b/inst/extdata/config/concept-dict/vitals.json index 6c8b37a9..bac04180 100644 --- a/inst/extdata/config/concept-dict/vitals.json +++ b/inst/extdata/config/concept-dict/vitals.json @@ -162,6 +162,14 @@ "sub_var": "itemid" } ], + "sic": [ + { + "ids": 716, + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" + } + ], "picdb": [ ] } @@ -235,9 +243,10 @@ ], "sic": [ { - "ids": 708, - "table": "data_float_h", - "sub_var": "DataID" + "ids": [708, 724], + "table": "data_float_h", + "sub_var": "dataid", + "class": "sic_itm" } ], "picdb": [ @@ -352,7 +361,8 @@ 706 ], "table": "data_float_h", - "sub_var": "DataID" + "sub_var": "dataid", + "class": "sic_itm" } ], "picdb": [ @@ -442,7 +452,8 @@ 704 ], "table": "data_float_h", - "sub_var": "DataID" + "sub_var": "dataid", + "class": "sic_itm" } ], "picdb": [ @@ -568,7 +579,8 @@ { "ids": 709, "table": "data_float_h", - "sub_var": "DataID" + "sub_var": "dataid", + "class": "sic_itm" } ], "picdb": [ From c2b8c4961fcb5a5f8633ce0ace26b2a40ea5f480 Mon Sep 17 00:00:00 2001 From: Manuel Burger Date: Mon, 25 Mar 2024 09:39:12 +0100 Subject: [PATCH 25/48] Remove prints and use ricu msg --- R/setup-import.R | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/R/setup-import.R b/R/setup-import.R index 2b501803..0673290b 100644 --- a/R/setup-import.R +++ b/R/setup-import.R @@ -184,9 +184,8 @@ import_tbl.tbl_cfg <- function(x, data_dir = src_data_dir(x), progress = NULL, assert_that(is.dir(data_dir), is.flag(cleanup)) - # Print number of parts - print(paste("[import_tbl] Import table ", tbl_name(x))) - print(paste("[import_tbl] Number of parts: ", n_part(x))) + msg_ricu(paste("[import_tbl] Import table ", tbl_name(x))) + msg_ricu(paste("[import_tbl] Number of parts: ", n_part(x))) if (n_part(x) > 1L) { partition_table(x, data_dir, progress, ...) } else { @@ -279,11 +278,10 @@ partition_table <- function(x, dir, progress = NULL, chunk_length = 10 ^ 7, } if (grepl("\\.gz$", file)) { - print("[partition_table] gunzipping") + msg_ricu(paste("[partition_table] gunzip: ", file)) file <- gunzip(file, tempdir) } - print(paste("[partition_table] reading csv chunked with chunk_length: ", chunk_length)) readr::read_csv_chunked(file, process_chunk, chunk_length, col_types = spec, progress = FALSE, ...) From e5061d4915ea5113fc27f8d25cf744de2d284d97 Mon Sep 17 00:00:00 2001 From: Manuel Burger Date: Mon, 25 Mar 2024 11:58:38 +0100 Subject: [PATCH 26/48] Remove redundant `report_probolems` --- R/setup-import.R | 22 ---------------------- 1 file changed, 22 deletions(-) diff --git a/R/setup-import.R b/R/setup-import.R index 0673290b..5af6f8dc 100644 --- a/R/setup-import.R +++ b/R/setup-import.R @@ -426,25 +426,3 @@ report_problems <- function(x, file) { invisible(NULL) } - -# report_problems <- function(x, file) { - -# prob_to_str <- function(x) { -# paste0("[", x[1L], ", ", x[2L], "]: got '", x[4L], "' instead of ", x[3L]) -# } - -# probs <- readr::problems(x) - -# if (nrow(probs)) { - -# probs <- bullet(apply(probs, 1L, prob_to_str)) - -# warn_ricu( -# c("Encountered parsing problems for file {basename(file)}:", probs), -# class = "csv_parsing_error", indent = c(0L, rep_along(2L, probs)), -# exdent = c(0L, rep_along(2L, probs)) -# ) -# } - -# invisible(NULL) -# } From 2c8d76397c60191c40c286757b7b5c83e57dfe31 Mon Sep 17 00:00:00 2001 From: Manuel Burger Date: Mon, 25 Mar 2024 14:00:28 +0100 Subject: [PATCH 27/48] Add prints and tempdir arg --- R/setup-import.R | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/R/setup-import.R b/R/setup-import.R index ead910f6..5bb57b46 100644 --- a/R/setup-import.R +++ b/R/setup-import.R @@ -221,16 +221,21 @@ merge_fst_chunks <- function(src, targ, new, old, sort_col, prog, nme, tick) { fst::write_fst(dat, new_file, compress = 100L) - progress_tick(paste(nme, "part", part_no), prog, - coalesce(tick, floor(nrow(dat) / 2))) + # progress_tick(paste(nme, "part", part_no), prog, + # coalesce(tick, floor(nrow(dat) / 2))) invisible(NULL) } -split_write <- function(x, part_fun, dir, chunk_no, prog, nme, tick) { +split_write <- function(x, part_fun, dir, chunk_no, prog, nme, tick, callback = NULL) { n_row <- nrow(x) + if (!is.null(callback)) { + print("[split_write] apply callback") + x <- callback(x) + } + x <- split(x, part_fun(x)) tmp_nme <- file.path(dir, paste0("part_", names(x)), @@ -241,16 +246,21 @@ split_write <- function(x, part_fun, dir, chunk_no, prog, nme, tick) { Map(fst::write_fst, x, tmp_nme) - progress_tick(paste(nme, "chunk", chunk_no), prog, - coalesce(tick, floor(n_row / 2))) + # progress_tick(paste(nme, "chunk", chunk_no), prog, + # coalesce(tick, floor(n_row / 2))) invisible(NULL) } -partition_table <- function(x, dir, progress = NULL, chunk_length = 10 ^ 7, +partition_table <- function(x, dir, progress = NULL, chunk_length = 10 ^ 7, tempdir = NULL, ...) { - tempdir <- ensure_dirs(tempfile()) + # tempdir <- ensure_dirs(tempfile()) + if (is.null(tempdir)) { + # tempdir <- ensure_dirs(file.path(dir, "tempdir")) + tempdir <- ensure_dirs(tempfile()) + } + print(paste("[partition_table] tempdir: ", tempdir)) on.exit(unlink(tempdir, recursive = TRUE)) spec <- col_spec(x) @@ -274,8 +284,8 @@ partition_table <- function(x, dir, progress = NULL, chunk_length = 10 ^ 7, process_chunk <- function(x, pos, ...) { report_problems(x, rawf) - split_write(callback(x), pfun, tempdir, ((pos - 1L) / chunk_length) + 1L, - progress, name, tick) + split_write(x, pfun, tempdir, ((pos - 1L) / chunk_length) + 1L, + progress, name, tick, callback) } if (grepl("\\.gz$", file)) { @@ -310,6 +320,7 @@ partition_table <- function(x, dir, progress = NULL, chunk_length = 10 ^ 7, tick <- 1L } + print(paste("[partition_table] merge_fst_chunks")) for (src_dir in file.path(tempdir, paste0("part_", seq_len(n_part(x))))) { merge_fst_chunks(src_dir, targ, newc, oldc, pcol, progress, name, tick) } @@ -357,7 +368,7 @@ gunzip <- function(file, exdir) { return(dest) } -csv_to_fst <- function(x, dir, progress = NULL, ...) { +csv_to_fst <- function(x, dir, progress = NULL, tempdir = NULL, ...) { raw <- raw_file_name(x) src <- file.path(dir, raw) From 14a14033a924b8cef35e7f9f5e27c6d56f4e30ec Mon Sep 17 00:00:00 2001 From: Manuel Burger Date: Mon, 25 Mar 2024 14:02:23 +0100 Subject: [PATCH 28/48] Cleanup prints --- R/setup-import.R | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/R/setup-import.R b/R/setup-import.R index a56a79cc..98bd69ed 100644 --- a/R/setup-import.R +++ b/R/setup-import.R @@ -230,11 +230,6 @@ split_write <- function(x, part_fun, dir, chunk_no, prog, nme, tick, callback = n_row <- nrow(x) - if (!is.null(callback)) { - print("[split_write] apply callback") - x <- callback(x) - } - x <- split(x, part_fun(x)) tmp_nme <- file.path(dir, paste0("part_", names(x)), @@ -259,7 +254,7 @@ partition_table <- function(x, dir, progress = NULL, chunk_length = 10 ^ 7, temp # tempdir <- ensure_dirs(file.path(dir, "tempdir")) tempdir <- ensure_dirs(tempfile()) } - print(paste("[partition_table] tempdir: ", tempdir)) + msg_ricu(paste("[partition_table] tempdir: ", tempdir)) on.exit(unlink(tempdir, recursive = TRUE)) spec <- col_spec(x) @@ -316,7 +311,6 @@ partition_table <- function(x, dir, progress = NULL, chunk_length = 10 ^ 7, temp tick <- 1L } - print(paste("[partition_table] merge_fst_chunks")) for (src_dir in file.path(tempdir, paste0("part_", seq_len(n_part(x))))) { merge_fst_chunks(src_dir, targ, newc, oldc, pcol, progress, name, tick) } From d96d9fe4b636103df9a98c263f4815d2ca9abd48 Mon Sep 17 00:00:00 2001 From: Manuel Burger Date: Mon, 25 Mar 2024 15:31:38 +0100 Subject: [PATCH 29/48] Fix blood_gas config --- inst/extdata/config/concept-dict/blood_gas.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/extdata/config/concept-dict/blood_gas.json b/inst/extdata/config/concept-dict/blood_gas.json index 0138565e..0c2c498d 100644 --- a/inst/extdata/config/concept-dict/blood_gas.json +++ b/inst/extdata/config/concept-dict/blood_gas.json @@ -155,7 +155,7 @@ "table": "laboratory", "sub_var": "laboratoryid", "class": "sic_itm" - }, + } ], "picdb": [ { From fa37f630cb720f5f04aef097abc93625e0900060 Mon Sep 17 00:00:00 2001 From: Manuel Burger Date: Mon, 25 Mar 2024 16:28:23 +0100 Subject: [PATCH 30/48] Fix sic table config --- inst/extdata/config/data-sources/sic.json | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/inst/extdata/config/data-sources/sic.json b/inst/extdata/config/data-sources/sic.json index 351f1ab9..5d527e36 100644 --- a/inst/extdata/config/data-sources/sic.json +++ b/inst/extdata/config/data-sources/sic.json @@ -6,14 +6,14 @@ "patient": { "id": "patientid", "position": 1, - "start": "icuoffset", + "start": "firstadmission", "end": "offsetofdeath", "table": "cases" }, "icustay": { "id": "caseid", "position": 2, - "start": "icuoffset", + "start": "offsetafterfirstadmission", "end": "timeofstay", "table": "cases" } @@ -25,10 +25,10 @@ "index_var": "icuoffset", "time_vars": [ "icuoffset", - "offsetofdeath", "heartsurgerybeginoffset", "heartsurgeryendoffset", - "offsetafterfirstadmission" + "offsetafterfirstadmission", + "offsetofdeath" ] }, "num_rows": 27386, @@ -235,7 +235,8 @@ "data_ref": { "files": "data_ref.csv.gz", "defaults": { - "index_var": "offsetafterfirstadmission" + "index_var": "offsetafterfirstadmission", + "time_vars": ["offsetafterfirstadmission"] }, "num_rows": 354157, "cols": { From 9f152c3c3f964b150e4d0e5285865d93a508be84 Mon Sep 17 00:00:00 2001 From: Manuel Burger Date: Mon, 25 Mar 2024 16:31:56 +0100 Subject: [PATCH 31/48] Use finer resolution rawdata where available --- inst/extdata/config/concept-dict/blood_gas.json | 1 + inst/extdata/config/concept-dict/output.json | 1 + inst/extdata/config/concept-dict/respiratory.json | 2 ++ inst/extdata/config/concept-dict/vitals.json | 5 +++++ 4 files changed, 9 insertions(+) diff --git a/inst/extdata/config/concept-dict/blood_gas.json b/inst/extdata/config/concept-dict/blood_gas.json index 0c2c498d..2b536a4c 100644 --- a/inst/extdata/config/concept-dict/blood_gas.json +++ b/inst/extdata/config/concept-dict/blood_gas.json @@ -263,6 +263,7 @@ { "ids": 2283, "table": "data_float_h", + "val_var": "rawdata", "sub_var": "dataid", "class": "sic_itm" }, diff --git a/inst/extdata/config/concept-dict/output.json b/inst/extdata/config/concept-dict/output.json index e078d7d7..73953d48 100644 --- a/inst/extdata/config/concept-dict/output.json +++ b/inst/extdata/config/concept-dict/output.json @@ -148,6 +148,7 @@ { "ids": 725, "table": "data_float_h", + "val_var": "rawdata", "sub_var": "dataid", "class": "sic_itm" } diff --git a/inst/extdata/config/concept-dict/respiratory.json b/inst/extdata/config/concept-dict/respiratory.json index 234531a9..5e64f8fe 100644 --- a/inst/extdata/config/concept-dict/respiratory.json +++ b/inst/extdata/config/concept-dict/respiratory.json @@ -223,6 +223,7 @@ { "ids": 710, "table": "data_float_h", + "val_var": "rawdata", "sub_var": "dataid", "class": "sic_itm" } @@ -350,6 +351,7 @@ { "ids": 719, "table": "data_float_h", + "val_var": "rawdata", "sub_var": "dataid", "class": "sic_itm" } diff --git a/inst/extdata/config/concept-dict/vitals.json b/inst/extdata/config/concept-dict/vitals.json index bac04180..395fa144 100644 --- a/inst/extdata/config/concept-dict/vitals.json +++ b/inst/extdata/config/concept-dict/vitals.json @@ -82,6 +82,7 @@ 705 ], "table": "data_float_h", + "val_var": "rawdata", "sub_var": "dataid", "class": "sic_itm" } @@ -245,6 +246,7 @@ { "ids": [708, 724], "table": "data_float_h", + "val_var": "rawdata", "sub_var": "dataid", "class": "sic_itm" } @@ -361,6 +363,7 @@ 706 ], "table": "data_float_h", + "val_var": "rawdata", "sub_var": "dataid", "class": "sic_itm" } @@ -452,6 +455,7 @@ 704 ], "table": "data_float_h", + "val_var": "rawdata", "sub_var": "dataid", "class": "sic_itm" } @@ -579,6 +583,7 @@ { "ids": 709, "table": "data_float_h", + "val_var": "rawdata", "sub_var": "dataid", "class": "sic_itm" } From 74a66d9785ba3fb78d6d9dc396942dadf748cb42 Mon Sep 17 00:00:00 2001 From: Manuel Burger Date: Mon, 25 Mar 2024 16:54:23 +0100 Subject: [PATCH 32/48] Pass tbl callback correctly --- R/config-utils.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/R/config-utils.R b/R/config-utils.R index 4a6dbc07..4b036943 100644 --- a/R/config-utils.R +++ b/R/config-utils.R @@ -396,6 +396,7 @@ tbl_callback <- function(x){ callback_field <- vctrs::field(x, "callback") if (is.character(callback_field)) { + msg_ricu(paste("[tbl_callback] Using callback function: ", callback_field)) return(str_to_fun(callback_field)) } @@ -405,7 +406,8 @@ tbl_callback <- function(x){ callback_value <- callback_field[[1]] if (is.character(callback_value)) { - return(str_to_fun(callback_field[1])) + msg_ricu(paste("[tbl_callback] Using callback function: ", callback_value)) + return(str_to_fun(callback_value)) } return(identity_callback) From 84ec51ff0a767429cbd647b57345169b88695a78 Mon Sep 17 00:00:00 2001 From: Manuel Burger Date: Mon, 25 Mar 2024 17:02:53 +0100 Subject: [PATCH 33/48] Fix missing callback application --- R/setup-import.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/R/setup-import.R b/R/setup-import.R index 98bd69ed..3cfcc487 100644 --- a/R/setup-import.R +++ b/R/setup-import.R @@ -229,7 +229,9 @@ merge_fst_chunks <- function(src, targ, new, old, sort_col, prog, nme, tick) { split_write <- function(x, part_fun, dir, chunk_no, prog, nme, tick, callback = NULL) { n_row <- nrow(x) - + if (!is.null(callback)) { + x <- callback(x) + } x <- split(x, part_fun(x)) tmp_nme <- file.path(dir, paste0("part_", names(x)), From 99529cdec8dd16860007231c05a6f5c153bab645 Mon Sep 17 00:00:00 2001 From: Manuel Burger Date: Mon, 25 Mar 2024 17:31:32 +0100 Subject: [PATCH 34/48] Apply callback before split_write --- R/setup-import.R | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/R/setup-import.R b/R/setup-import.R index 3cfcc487..ab6388ec 100644 --- a/R/setup-import.R +++ b/R/setup-import.R @@ -226,12 +226,10 @@ merge_fst_chunks <- function(src, targ, new, old, sort_col, prog, nme, tick) { invisible(NULL) } -split_write <- function(x, part_fun, dir, chunk_no, prog, nme, tick, callback = NULL) { +split_write <- function(x, part_fun, dir, chunk_no, prog, nme, tick) { n_row <- nrow(x) - if (!is.null(callback)) { - x <- callback(x) - } + x <- split(x, part_fun(x)) tmp_nme <- file.path(dir, paste0("part_", names(x)), @@ -280,8 +278,8 @@ partition_table <- function(x, dir, progress = NULL, chunk_length = 10 ^ 7, temp process_chunk <- function(x, pos, ...) { report_problems(x, rawf) - split_write(x, pfun, tempdir, ((pos - 1L) / chunk_length) + 1L, - progress, name, tick, callback) + split_write(callback(x), pfun, tempdir, ((pos - 1L) / chunk_length) + 1L, + progress, name, tick) } if (grepl("\\.gz$", file)) { From f54afad4770f784aabb6fff4f5deba5e4eccc23c Mon Sep 17 00:00:00 2001 From: Manuel Burger Date: Tue, 26 Mar 2024 09:47:14 +0100 Subject: [PATCH 35/48] Config updates: - Fix sic bugs - Slack temp range --- inst/extdata/config/concept-dict/chemistry.json | 6 ++++-- inst/extdata/config/concept-dict/hematology.json | 8 +++++--- inst/extdata/config/concept-dict/vitals.json | 4 ++-- 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/inst/extdata/config/concept-dict/chemistry.json b/inst/extdata/config/concept-dict/chemistry.json index 39fbd784..fab37aee 100644 --- a/inst/extdata/config/concept-dict/chemistry.json +++ b/inst/extdata/config/concept-dict/chemistry.json @@ -876,7 +876,8 @@ "ids": 253, "table": "laboratory", "sub_var": "laboratoryid", - "class": "sic_itm" + "class": "sic_itm", + "callback": "convert_unit(identity_callback, 'ng/mL', 'µg/l')" } ], "picdb": [ @@ -1133,7 +1134,8 @@ "ids": 341, "table": "laboratory", "sub_var": "laboratoryid", - "class": "sic_itm" + "class": "sic_itm", + "callback": "convert_unit(binary_op(`*`, 10), 'mg/L', 'mg/dl')" } ], "picdb": [ diff --git a/inst/extdata/config/concept-dict/hematology.json b/inst/extdata/config/concept-dict/hematology.json index 3d9b5ef5..8aac6cf7 100644 --- a/inst/extdata/config/concept-dict/hematology.json +++ b/inst/extdata/config/concept-dict/hematology.json @@ -699,7 +699,8 @@ { "ids": 223, "table": "laboratory", - "sub_var": "LaboratoryID" + "sub_var": "laboratoryid", + "class": "sic_itm" }, { "ids": 302, @@ -854,7 +855,7 @@ "ids": 290, "table": "laboratory", "sub_var": "laboratoryid", - "callback": "convert_unit(binary_op(`*`, 0.16114), '%')", + "callback": "convert_unit(binary_op(`*`, 1.6114), '%', 'g/dl')", "class": "sic_itm" } ], @@ -1327,7 +1328,8 @@ "ids": 599, "table": "laboratory", "sub_var": "laboratoryid", - "class": "sic_itm" + "class": "sic_itm", + "callback": "convert_unit(identity_callback, 'm/uL', 'T/L.')" } ], "picdb": [ diff --git a/inst/extdata/config/concept-dict/vitals.json b/inst/extdata/config/concept-dict/vitals.json index 395fa144..75967e5e 100644 --- a/inst/extdata/config/concept-dict/vitals.json +++ b/inst/extdata/config/concept-dict/vitals.json @@ -476,8 +476,8 @@ "C", "\u00b0C" ], - "min": 32, - "max": 42, + "min": 30, + "max": 44, "description": "temperature", "omopid": 4302666, "category": "vitals", From ad6b07e6c7e8af96e41beff1bc0cf77c071fcf2f Mon Sep 17 00:00:00 2001 From: Manuel Burger Date: Tue, 26 Mar 2024 09:57:29 +0100 Subject: [PATCH 36/48] Fix configs --- inst/extdata/config/concept-dict/chemistry.json | 2 +- inst/extdata/config/concept-dict/hematology.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/inst/extdata/config/concept-dict/chemistry.json b/inst/extdata/config/concept-dict/chemistry.json index fab37aee..712c7e36 100644 --- a/inst/extdata/config/concept-dict/chemistry.json +++ b/inst/extdata/config/concept-dict/chemistry.json @@ -1678,7 +1678,7 @@ "ids": 481, "table": "laboratory", "sub_var": "laboratoryid", - "callback": "transform_fun(binary_op(`/`, 1000))", + "callback": "transform_fun(binary_op(`/`, 1000), 'ng/mL', 'ng/L')", "class": "sic_itm" } ], diff --git a/inst/extdata/config/concept-dict/hematology.json b/inst/extdata/config/concept-dict/hematology.json index 8aac6cf7..735e071b 100644 --- a/inst/extdata/config/concept-dict/hematology.json +++ b/inst/extdata/config/concept-dict/hematology.json @@ -855,7 +855,7 @@ "ids": 290, "table": "laboratory", "sub_var": "laboratoryid", - "callback": "convert_unit(binary_op(`*`, 1.6114), '%', 'g/dl')", + "callback": "convert_unit(identity_callback, '%', 'g/dl')", "class": "sic_itm" } ], From 640d4d27812bee1238b8bfc1ca4775a0dbd47774 Mon Sep 17 00:00:00 2001 From: Manuel Burger Date: Tue, 26 Mar 2024 10:16:41 +0100 Subject: [PATCH 37/48] Fix callback --- inst/extdata/config/concept-dict/chemistry.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/extdata/config/concept-dict/chemistry.json b/inst/extdata/config/concept-dict/chemistry.json index 712c7e36..12e92d91 100644 --- a/inst/extdata/config/concept-dict/chemistry.json +++ b/inst/extdata/config/concept-dict/chemistry.json @@ -1678,7 +1678,7 @@ "ids": 481, "table": "laboratory", "sub_var": "laboratoryid", - "callback": "transform_fun(binary_op(`/`, 1000), 'ng/mL', 'ng/L')", + "callback": "convert_unit(binary_op(`/`, 1000), 'ng/mL', 'ng/L')", "class": "sic_itm" } ], From 5f0a6845053fa6e3b03380dcba2e0fad9bffb3e8 Mon Sep 17 00:00:00 2001 From: Manuel Burger Date: Tue, 26 Mar 2024 11:54:56 +0100 Subject: [PATCH 38/48] Start to integrate YAIB configs --- .../config/concept-dict/chemistry.json | 11 +- .../config/concept-dict/circulatory.json | 19 ++ .../config/concept-dict/demographics.json | 83 ++++++++- .../config/concept-dict/hematology.json | 9 +- .../config/concept-dict/medications.json | 168 ++++++++++++++++++ inst/extdata/config/concept-dict/misc.json | 90 ++++++++++ inst/extdata/config/concept-dict/outcome.json | 123 +++++++++++++ 7 files changed, 493 insertions(+), 10 deletions(-) create mode 100644 inst/extdata/config/concept-dict/circulatory.json create mode 100644 inst/extdata/config/concept-dict/misc.json diff --git a/inst/extdata/config/concept-dict/chemistry.json b/inst/extdata/config/concept-dict/chemistry.json index 12e92d91..d0560498 100644 --- a/inst/extdata/config/concept-dict/chemistry.json +++ b/inst/extdata/config/concept-dict/chemistry.json @@ -43,7 +43,7 @@ ], "miiv": [ { - "ids": 50862, + "ids": [50862, 53085], "table": "labevents", "sub_var": "itemid" } @@ -127,7 +127,7 @@ ], "miiv": [ { - "ids": 50863, + "ids": [50863, 53086], "table": "labevents", "sub_var": "itemid" } @@ -454,7 +454,7 @@ ], "miiv": [ { - "ids": 50885, + "ids": [50885, 53089], "table": "labevents", "sub_var": "itemid" } @@ -506,7 +506,7 @@ "ids": 6812, "table": "numericitems", "sub_var": "itemid", - "callback": "convert_unit(binary_op(`*`, 0.058467), 'mg/dL')" + "callback": "convert_unit(binary_op(`*`, 1.), 'mg/dL')" } ], "eicu": [ @@ -825,7 +825,8 @@ { "ids": 6824, "table": "numericitems", - "sub_var": "itemid" + "sub_var": "itemid", + "callback": "convert_unit(binary_op(`*`, 0.2), 'ng/mL')" } ], "eicu": [ diff --git a/inst/extdata/config/concept-dict/circulatory.json b/inst/extdata/config/concept-dict/circulatory.json new file mode 100644 index 00000000..b892d513 --- /dev/null +++ b/inst/extdata/config/concept-dict/circulatory.json @@ -0,0 +1,19 @@ +{ + "mech_circ": { + "description": "mechanical circulatory support", + "class": "lgl_cncpt", + "target": "id_tbl", + "category": "circulatory", + "sources": { + "mimic": [ + { + "ids": ["3752", "3760", "3765", "3766", "3768"], + "table": "procedures_icd", + "sub_var": "icd9_code", + "val_var": "seq_num", + "callback": "transform_fun(set_val(TRUE))" + } + ] + } + } + } \ No newline at end of file diff --git a/inst/extdata/config/concept-dict/demographics.json b/inst/extdata/config/concept-dict/demographics.json index a0f7da0d..35338bbb 100644 --- a/inst/extdata/config/concept-dict/demographics.json +++ b/inst/extdata/config/concept-dict/demographics.json @@ -419,5 +419,86 @@ } ] } - } + }, + "ethnic": { + "target": "id_tbl", + "levels": ["asian", "black", "white", "other"], + "class": "fct_cncpt", + "description": "ethnicity", + "category": "demographics", + "sources": { + "eicu": [ + { + "table": "patient", + "val_var": "ethnicity", + "callback": "apply_map(c(`African American` = 'black', Asian = 'asian', Caucasian = 'white', Hispanic = 'other', `Native American` = 'other', `Other/Unknown` = 'other'))", + "class": "col_itm" + } + ], + "eicu_demo": [ + { + "table": "patient", + "val_var": "ethnicity", + "callback": "apply_map(c(`African American` = 'black', Asian = 'asian', Caucasian = 'white', Hispanic = 'other', `Native American` = 'other', `Other/Unknown` = 'other'))", + "class": "col_itm" + } + ], + "miiv": [ + { + "table": "admissions", + "val_var": "race", + "callback": "apply_map(c(`AMERICAN INDIAN/ALASKA NATIVE` = 'other', ASIAN = 'asian', `ASIAN - ASIAN INDIAN` = 'asian', `ASIAN - CHINESE` = 'asian', `ASIAN - KOREAN` = 'asian', `ASIAN - SOUTH EAST ASIAN` = 'asian', `BLACK/AFRICAN` = 'black', `BLACK/AFRICAN AMERICAN` = 'black', `BLACK/CAPE VERDEAN` = 'black', `BLACK/CARIBBEAN ISLAND` = 'black', `HISPANIC OR LATINO` = 'other', `HISPANIC/LATINO - CENTRAL AMERICAN` = 'other', `HISPANIC/LATINO - COLUMBIAN` = 'other', `HISPANIC/LATINO - CUBAN` = 'other', `HISPANIC/LATINO - DOMINICAN` = 'other', `HISPANIC/LATINO - GUATEMALAN` = 'other', `HISPANIC/LATINO - HONDURAN` = 'other', `HISPANIC/LATINO - MEXICAN` = 'other', `HISPANIC/LATINO - PUERTO RICAN` = 'other', `HISPANIC/LATINO - SALVADORAN` = 'other', `MULTIPLE RACE/ETHNICITY NATIVE HAWAIIAN OR OTHER PACIFIC ISLANDER` = 'other', `OTHER` = 'other', `PATIENT DECLINED TO ANSWER` = NA, `PORTUGUESE` = 'white', `SOUTH AMERICAN` = 'other', `UNABLE TO OBTAIN` = NA, `UNKNOWN` = NA, `WHITE` = 'white', `WHITE - BRAZILIAN` = 'white', `WHITE - EASTERN EUROPEAN` = 'white', `WHITE - OTHER EUROPEAN` = 'white', `WHITE - RUSSIAN` = 'white'))", + "class": "col_itm" + } + ], + "mimic": [ + { + "table": "admissions", + "val_var": "ethnicity", + "callback": "apply_map(c(`AMERICAN INDIAN/ALASKA NATIVE` = 'other', `AMERICAN INDIAN/ALASKA NATIVE FEDERALLY RECOGNIZED TRIBE` = 'other', ASIAN = 'asian', `ASIAN - ASIAN INDIAN` = 'asian', `ASIAN - CAMBODIAN` = 'asian', `ASIAN - CHINESE` = 'asian', `ASIAN - FILIPINO` = 'asian', `ASIAN - JAPANESE` = 'asian', `ASIAN - KOREAN` = 'asian', `ASIAN - OTHER` = 'asian', `ASIAN - THAI` = 'asian', `ASIAN - VIETNAMESE` = 'asian',`BLACK/AFRICAN` = 'black', `BLACK/AFRICAN AMERICAN` = 'black', `BLACK/CAPE VERDEAN` = 'black', `BLACK/HAITIAN` = 'black', `CARIBBEAN ISLAND` = 'black', `HISPANIC OR LATINO` = 'other', `HISPANIC/LATINO - CENTRAL AMERICAN (OTHER)` = 'other', `HISPANIC/LATINO - COLOMBIAN` = 'other', `HISPANIC/LATINO - CUBAN` = 'other', `HISPANIC/LATINO - DOMINICAN` = 'other', `HISPANIC/LATINO - GUATEMALAN` = 'other', `HISPANIC/LATINO - HONDURAN` = 'other', `HISPANIC/LATINO - MEXICAN` = 'other', `HISPANIC/LATINO - PUERTO RICAN` = 'other', `HISPANIC/LATINO - SALVADORAN` = 'other', `MIDDLE EASTERN` = 'other', `MULTI RACE ETHNICITY` = 'other', `NATIVE HAWAIIAN OR OTHER PACIFIC ISLANDER` = 'other', `OTHER` = 'other', `PATIENT DECLINED TO ANSWER` = NA, `PORTUGUESE` = 'white', `SOUTH AMERICAN` = 'other', `UNABLE TO OBTAIN` = NA, `UNKNOWN/NOT SPECIFIED` = NA, `WHITE` = 'white', `WHITE - BRAZILIAN` = 'white', `WHITE - EASTERN EUROPEAN` = 'white', `WHITE - OTHER EUROPEAN` = 'white', `WHITE - RUSSIAN` = 'white'))", + "class": "col_itm" + } + ], + "mimic_demo": [ + { + "table": "admissions", + "val_var": "ethnicity", + "callback": "apply_map(c(`AMERICAN INDIAN/ALASKA NATIVE` = 'other', `AMERICAN INDIAN/ALASKA NATIVE FEDERALLY RECOGNIZED TRIBE` = 'other', ASIAN = 'asian', `ASIAN - ASIAN INDIAN` = 'asian', `ASIAN - CAMBODIAN` = 'asian', `ASIAN - CHINESE` = 'asian', `ASIAN - FILIPINO` = 'asian', `ASIAN - JAPANESE` = 'asian', `ASIAN - KOREAN` = 'asian', `ASIAN - OTHER` = 'asian', `ASIAN - THAI` = 'asian', `ASIAN - VIETNAMESE` = 'asian',`BLACK/AFRICAN` = 'black', `BLACK/AFRICAN AMERICAN` = 'black', `BLACK/CAPE VERDEAN` = 'black', `BLACK/HAITIAN` = 'black', `CARIBBEAN ISLAND` = 'black', `HISPANIC OR LATINO` = 'other', `HISPANIC/LATINO - CENTRAL AMERICAN (OTHER)` = 'other', `HISPANIC/LATINO - COLOMBIAN` = 'other', `HISPANIC/LATINO - CUBAN` = 'other', `HISPANIC/LATINO - DOMINICAN` = 'other', `HISPANIC/LATINO - GUATEMALAN` = 'other', `HISPANIC/LATINO - HONDURAN` = 'other', `HISPANIC/LATINO - MEXICAN` = 'other', `HISPANIC/LATINO - PUERTO RICAN` = 'other', `HISPANIC/LATINO - SALVADORAN` = 'other', `MIDDLE EASTERN` = 'other', `MULTI RACE ETHNICITY` = 'other', `NATIVE HAWAIIAN OR OTHER PACIFIC ISLANDER` = 'other', `OTHER` = 'other', `PATIENT DECLINED TO ANSWER` = NA, `PORTUGUESE` = 'white', `SOUTH AMERICAN` = 'other', `UNABLE TO OBTAIN` = NA, `UNKNOWN/NOT SPECIFIED` = NA, `WHITE` = 'white', `WHITE - BRAZILIAN` = 'white', `WHITE - EASTERN EUROPEAN` = 'white', `WHITE - OTHER EUROPEAN` = 'white', `WHITE - RUSSIAN` = 'white'))", + "class": "col_itm" + } + ], + "picdb": [ + { + "table": "admissions", + "val_var": "ethnicity", + "callback": "apply_map(c(`Hui ethnic` = 'asian', `Buyei ethnic` = 'asian', `Han ethnic` = 'asian', `Yi ethnic` = 'asian', `Miao ethnic` = 'asian', `Others` = 'other', `Tujia ethnic` = 'asian'))", + "class": "col_itm" + } + ] + } + }, + "insurance": { + "target": "id_tbl", + "levels": ["Government", "Medicaid", "Medicare", "Private", "Self Pay"], + "class": "fct_cncpt", + "description": "insurance", + "category": "demographics", + "sources": { + "mimic": [{ + "table": "admissions", + "val_var": "insurance", + "class": "col_itm" + }], + "mimic_demo": [{ + "table": "admissions", + "val_var": "insurance", + "class": "col_itm" + }], + "miiv": [{ + "table": "admissions", + "val_var": "insurance", + "class": "col_itm" + }] + } + } } \ No newline at end of file diff --git a/inst/extdata/config/concept-dict/hematology.json b/inst/extdata/config/concept-dict/hematology.json index 735e071b..73193b6c 100644 --- a/inst/extdata/config/concept-dict/hematology.json +++ b/inst/extdata/config/concept-dict/hematology.json @@ -825,9 +825,9 @@ "ids": 24000170, "table": "observations", "sub_var": "variableid", - "callback": "convert_unit(binary_op(`*`, 0.16114), '%')", + "callback": "convert_unit(binary_op(`*`, 0.1), '%')", "class": "hrd_itm" - } + } ], "miiv": [ { @@ -990,8 +990,9 @@ "ids": 24000550, "table": "observations", "sub_var": "variableid", - "class": "hrd_itm" - } + "class": "hrd_itm", + "callback": "blood_cell_ratio" + } ], "miiv": [ { diff --git a/inst/extdata/config/concept-dict/medications.json b/inst/extdata/config/concept-dict/medications.json index 770deb8f..cbb177be 100644 --- a/inst/extdata/config/concept-dict/medications.json +++ b/inst/extdata/config/concept-dict/medications.json @@ -417,6 +417,102 @@ "picdb": [] } }, + "abx_cont": { + "description": "continuous antibiotic administration", + "concepts": ["abx_duration", "death_icu"], + "category": "medications", + "callback": "abx_cont", + "class": "rec_cncpt" + }, + "abx_duration": { + "description": "duration of administered antibiotics", + "category": "medications", + "target": "win_tbl", + "class": "lgl_cncpt", + "sources": { + "aumc": [ + { + "ids": [2, 13, 19, 24, 28, 29, 57, 59, 82, 103, 240, 247, 333, 1133, 1199, 1300, 1371, 1795, 2284, 2834, 3237, 3741, 5576, 6834, 6847, 6871, 6919, 6948, 6953, 6958, 7044, 7064, 7185, 7187, 7208, 7227, 7235, 8064, 8394, 8942, 9029, 9030, 9052, 9070, 9117, 9128, 9133, 9142, 9151, 9152, 12262, 12389, 12398, 12956, 12997, 13057, 13094, 13102, 15591, 18860, 19137, 19773, 20563, 23166, 24241, 25776, 27617, 29321], + "table": "drugitems", + "sub_var": "itemid", + "dur_var": "stop", + "callback": "transform_fun(set_val(TRUE))" + } + ], + "eicu": [ + { + "regex": "bactrim|cipro|flagyl|metronidazole|zithromax|zosyn|(((amika|cleo|ofloxa)|(azithro|clinda|tobra|vanco)my)c|(ampi|oxa|peni|pipera)cill|cefazol|levaqu|rifamp)in", + "table": "infusiondrug", + "sub_var": "drugname", + "target": "ts_tbl", + "callback": "combine_callbacks(transform_fun(set_val(TRUE)), ricu:::ts_to_win_tbl(mins(1L)))", + "class": "rgx_itm" + }, + { + "regex": "cipro|flagyl|maxipime|metronidazole|tazobactam|zosyn|cef(azolin|epime)|(((azithro|clinda|vanco)my|ofloxa|vanco)c|levaqu|piperacill|roceph)in", + "table": "medication", + "sub_var": "drugname", + "dur_var": "drugstopoffset", + "class": "rgx_itm", + "callback": "transform_fun(set_val(TRUE))" + } + ], + "eicu_demo": [ + { + "regex": "bactrim|cipro|flagyl|metronidazole|zithromax|zosyn|(((amika|cleo|ofloxa)|(azithro|clinda|tobra|vanco)my)c|(ampi|oxa|peni|pipera)cill|cefazol|levaqu|rifamp)in", + "table": "infusiondrug", + "sub_var": "drugname", + "target": "ts_tbl", + "callback": "combine_callbacks(transform_fun(set_val(TRUE)), ricu:::ts_to_win_tbl(mins(1L)))", + "class": "rgx_itm" + }, + { + "regex": "cipro|flagyl|maxipime|metronidazole|tazobactam|zosyn|cef(azolin|epime)|(((azithro|clinda|vanco)my|ofloxa|vanco)c|levaqu|piperacill|roceph)in", + "table": "medication", + "sub_var": "drugname", + "dur_var": "drugstopoffset", + "class": "rgx_itm", + "callback": "transform_fun(set_val(TRUE))" + } + ], + "hirid": [ + { + "ids": [163, 176, 181, 186, 189, 300, 326, 331, 351, 405, 1000234, 1000272, 1000273, 1000274, 1000284, 1000299, 1000300, 1000302, 1000304, 1000305, 1000306, 1000315, 1000317, 1000318, 1000320, 1000321, 1000322, 1000335, 1000348, 1000352, 1000363, 1000365, 1000390, 1000407, 1000408, 1000424, 1000425, 1000426, 1000437, 1000483, 1000507, 1000508, 1000518, 1000519, 1000549, 1000601, 1000648, 1000666, 1000670, 1000671, 1000760, 1000781, 1000791, 1000797, 1000812, 1000825, 1000829, 1000830, 1000837, 1000838, 1000854, 1000855, 1000893, 1000894, 1001005, 1001068, 1001075, 1001079, 1001084, 1001086, 1001095, 1001096, 1001097, 1001098, 1001168, 1001169, 1001170, 1001171, 1001173, 1001193, 1001198], + "table": "pharma", + "sub_var": "pharmaid", + "target": "ts_tbl", + "callback": "combine_callbacks(transform_fun(set_val(TRUE)), ricu:::ts_to_win_tbl(mins(1L)))" + } + ], + "miiv": [ + { + "ids": [225798, 225837, 225838, 225840, 225842, 225843, 225844, 225845, 225847, 225848, 225850, 225851, 225853, 225855, 225857, 225859, 225860, 225862, 225863, 225865, 225866, 225868, 225869, 225871, 225873, 225875, 225876, 225877, 225879, 225881, 225882, 225883, 225884, 225885, 225886, 225888, 225889, 225890, 225892, 225893, 225895, 225896, 225897, 225898, 225899, 225900, 225902, 225903, 225905, 227691, 228003], + "table": "inputevents", + "sub_var": "itemid", + "dur_var": "endtime", + "callback": "transform_fun(set_val(TRUE))" + } + ], + "mimic": [ + { + "ids": [225798, 225837, 225838, 225840, 225842, 225843, 225844, 225845, 225847, 225848, 225850, 225851, 225853, 225855, 225857, 225859, 225860, 225862, 225863, 225865, 225866, 225868, 225869, 225871, 225873, 225875, 225876, 225877, 225879, 225881, 225882, 225883, 225884, 225885, 225886, 225888, 225889, 225890, 225892, 225893, 225895, 225896, 225897, 225898, 225899, 225900, 225902, 225903, 225905, 227691, 228003], + "table": "inputevents_mv", + "sub_var": "itemid", + "dur_var": "endtime", + "callback": "transform_fun(set_val(TRUE))" + } + ], + "mimic_demo": [ + { + "ids": [225798, 225837, 225838, 225840, 225842, 225843, 225844, 225845, 225847, 225848, 225850, 225851, 225853, 225855, 225857, 225859, 225860, 225862, 225863, 225865, 225866, 225868, 225869, 225871, 225873, 225875, 225876, 225877, 225879, 225881, 225882, 225883, 225884, 225885, 225886, 225888, 225889, 225890, 225892, 225893, 225895, 225896, 225897, 225898, 225899, 225900, 225902, 225903, 225905, 227691, 228003], + "table": "inputevents_mv", + "sub_var": "itemid", + "dur_var": "endtime", + "callback": "transform_fun(set_val(TRUE))" + } + ] + } + }, "adh_rate": { "unit": [ "units/min", @@ -510,6 +606,22 @@ "picdb": [] } }, + "adh_dur": { + "description": "vasopressin duration", + "category": "medications", + "aggregate": "max", + "sources": { + "hirid": [ + { + "ids": [112, 113], + "table": "pharma", + "sub_var": "pharmaid", + "grp_var": "infusionid", + "callback": "hirid_duration" + } + ] + } + }, "cort": { "class": "lgl_cncpt", "description": "corticosteroids", @@ -1745,6 +1857,54 @@ "picdb": [] } }, + "levo_dur": { + "description": "levosimendan duration", + "category": "medications", + "aggregate": "max", + "sources": { + "hirid": [ + { + "ids": 1000606, + "table": "pharma", + "sub_var": "pharmaid", + "grp_var": "infusionid", + "callback": "hirid_duration" + } + ] + } + }, + "milrin_dur": { + "description": "milrinone duration", + "category": "medications", + "aggregate": "max", + "sources": { + "hirid": [ + { + "ids": 1000441, + "table": "pharma", + "sub_var": "pharmaid", + "grp_var": "infusionid", + "callback": "hirid_duration" + } + ] + } + }, + "teophyllin_dur": { + "description": "theophyllin duration", + "category": "medications", + "aggregate": "max", + "sources": { + "hirid": [ + { + "ids": [1000706, 1000707, 1000698, 1000267], + "table": "pharma", + "sub_var": "pharmaid", + "grp_var": "infusionid", + "callback": "hirid_duration" + } + ] + } + }, "phn_rate": { "unit": "mcg/kg/min", "description": "phenylephrine rate", @@ -1841,5 +2001,13 @@ "category": "medications", "callback": "vaso_ind", "class": "rec_cncpt" + }, + "cf_treat": { + "description": "treatment for circulatory failure (vasopressors/inotropes)", + "category": "medications", + "concepts": ["epi_dur", "norepi_dur", "dopa_dur", "dobu_dur", "adh_dur", + "phn_dur", "levo_dur", "milrin_dur", "teophyllin_dur"], + "callback": "cf_treat", + "class": "rec_cncpt" } } \ No newline at end of file diff --git a/inst/extdata/config/concept-dict/misc.json b/inst/extdata/config/concept-dict/misc.json new file mode 100644 index 00000000..597b8d32 --- /dev/null +++ b/inst/extdata/config/concept-dict/misc.json @@ -0,0 +1,90 @@ +{ + "hospital_id": { + "description": "source hospital identifier", + "min": 1, + "max": 1000, + "target": "id_tbl", + "sources": { + "eicu": [ + { + "table": "patient", + "class": "col_itm", + "val_var": "hospitalid" + } + ], + "eicu_demo": [ + { + "table": "patient", + "class": "col_itm", + "val_var": "hospitalid" + } + ], + "hirid": [ + { + "table": "general", + "class": "col_itm", + "val_var": "sex", + "callback": "transform_fun(set_val(1))" + } + ], + "mimic": [ + { + "table": "icustays", + "class": "col_itm", + "val_var": "hadm_id", + "callback": "transform_fun(set_val(1))" + } + ], + "mimic_demo": [ + { + "table": "icustays", + "class": "col_itm", + "val_var": "hadm_id", + "callback": "transform_fun(set_val(1))" + } + ], + "miiv": [ + { + "table": "icustays", + "class": "col_itm", + "val_var": "hadm_id", + "callback": "transform_fun(set_val(1))" + } + ] + } + }, + "patient_id": { + "description": "Patient Identifier", + "target": "id_tbl", + "sources": { + "mimic": [ + { + "table": "admissions", + "val_var": "subject_id", + "class": "col_itm" + } + ], + "miiv": [ + { + "table": "admissions", + "val_var": "subject_id", + "class": "col_itm" + } + ], + "hirid": [ + { + "table": "general", + "val_var": "patientid", + "class": "col_itm" + } + ], + "eicu": [ + { + "table": "patient", + "val_var": "patientunitstayid", + "class": "col_itm" + } + ] + } + } +} \ No newline at end of file diff --git a/inst/extdata/config/concept-dict/outcome.json b/inst/extdata/config/concept-dict/outcome.json index 95009c78..af5256f1 100644 --- a/inst/extdata/config/concept-dict/outcome.json +++ b/inst/extdata/config/concept-dict/outcome.json @@ -92,6 +92,129 @@ ] } }, + "death_hosp": { + "class": "lgl_cncpt", + "description": "ICU mortality", + "category": "outcome", + "sources": { + "eicu": [ + { + "table": "patient", + "index_var": "hospitaldischargeoffset", + "val_var": "hospitaldischargestatus", + "callback": "transform_fun(comp_na(`==`, 'Expired'))", + "class": "col_itm" + } + ], + "eicu_demo": [ + { + "table": "patient", + "index_var": "hospitaldischargeoffset", + "val_var": "hospitaldischargestatus", + "callback": "transform_fun(comp_na(`==`, 'Expired'))", + "class": "col_itm" + } + ], + "miiv": [ + { + "table": "admissions", + "index_var": "deathtime", + "val_var": "hospital_expire_flag", + "callback": "transform_fun(comp_na(`==`, 1L))", + "class": "col_itm" + } + ], + "mimic": [ + { + "table": "admissions", + "index_var": "deathtime", + "val_var": "hospital_expire_flag", + "callback": "transform_fun(comp_na(`==`, 1L))", + "class": "col_itm" + } + ], + "mimic_demo": [ + { + "table": "admissions", + "index_var": "deathtime", + "val_var": "hospital_expire_flag", + "callback": "transform_fun(comp_na(`==`, 1L))", + "class": "col_itm" + } + ] + } + }, + "death_icu": { + "class": "lgl_cncpt", + "description": "ICU mortality", + "category": "outcome", + "sources": { + "aumc": [ + { + "table": "admissions", + "index_var": "dischargedat", + "val_var": "destination", + "death_date": "dateofdeath", + "callback": "aumc_death_icu", + "class": "col_itm" + } + ], + "eicu": [ + { + "table": "patient", + "index_var": "unitdischargeoffset", + "val_var": "unitdischargestatus", + "callback": "transform_fun(comp_na(`==`, 'Expired'))", + "class": "col_itm" + } + ], + "eicu_demo": [ + { + "table": "patient", + "index_var": "unitdischargeoffset", + "val_var": "unitdischargestatus", + "callback": "transform_fun(comp_na(`==`, 'Expired'))", + "class": "col_itm" + } + ], + "hirid": [ + { + "ids": [110, 200], + "table": "observations", + "sub_var": "variableid", + "callback": "hirid_death_icu", + "class": "hrd_itm" + } + ], + "miiv": [ + { + "table": "admissions", + "index_var": "deathtime", + "val_var": "hospital_expire_flag", + "callback": "miiv_death_icu", + "class": "col_itm" + } + ], + "mimic": [ + { + "table": "admissions", + "index_var": "deathtime", + "val_var": "hospital_expire_flag", + "callback": "mimic_death_icu", + "class": "col_itm" + } + ], + "mimic_demo": [ + { + "table": "admissions", + "index_var": "deathtime", + "val_var": "hospital_expire_flag", + "callback": "mimic_death_icu", + "class": "col_itm" + } + ] + } + }, "dopa60": { "concepts": [ "dopa_rate", From 4d01ab8142ce6c1b241fa302dfc7a7b7e07f529a Mon Sep 17 00:00:00 2001 From: Manuel Burger Date: Tue, 26 Mar 2024 12:17:10 +0100 Subject: [PATCH 39/48] Add `outcome` and `output` configs --- inst/extdata/config/concept-dict/outcome.json | 59 ++++++++++++++++++- inst/extdata/config/concept-dict/output.json | 58 +++++++++++++++++- 2 files changed, 114 insertions(+), 3 deletions(-) diff --git a/inst/extdata/config/concept-dict/outcome.json b/inst/extdata/config/concept-dict/outcome.json index af5256f1..c488c703 100644 --- a/inst/extdata/config/concept-dict/outcome.json +++ b/inst/extdata/config/concept-dict/outcome.json @@ -215,6 +215,13 @@ ] } }, + "circ_fail": { + "description": "circulatory failure", + "concepts": ["map", "cf_treat", "lact"], + "category": "outcome", + "callback": "circ_fail", + "class": "rec_cncpt" + }, "dopa60": { "concepts": [ "dopa_rate", @@ -539,5 +546,55 @@ ], "callback": "susp_inf", "class": "rec_cncpt" - } + }, + "sep3_abx_cont": { + "concepts": ["sofa", "susp_inf_alt"], + "description": "sepsis-3 criterion (alt. def.: continuous abx only)", + "category": "outcome", + "callback": "sep3_abx_cont", + "class": "rec_cncpt" + }, + "susp_inf_abx_cont": { + "concepts": ["abx_cont", "samp"], + "description": "suspected infection (alt. def.: continuous abx only)", + "category": "outcome", + "aggregate": [ + [ + "sum" + ], + [ + false + ] + ], + "callback": "susp_inf_abx_cont", + "class": "rec_cncpt" + }, + "aki": { + "description": "acute kidney injury (KDIGO 1/2/3)", + "concepts": "kdigo", + "category": "outcome", + "callback": "aki", + "class": "rec_cncpt" + }, + "kdigo": { + "description": "kidney disease improving global outcome staging", + "concepts": ["kdigo_crea", "kdigo_urine"], + "category": "outcome", + "callback": "kdigo", + "class": "rec_cncpt" + }, + "kdigo_crea": { + "description": "kidney disease improving global outcome creatinine component", + "concepts": "crea", + "category": "outcome", + "callback": "kdigo_crea", + "class": "rec_cncpt" + }, + "kdigo_urine": { + "description": "kidney disease improving global outcome urine output component", + "concepts": ["urine_rate", "weight"], + "category": "outcome", + "callback": "kdigo_urine", + "class": "rec_cncpt" + } } \ No newline at end of file diff --git a/inst/extdata/config/concept-dict/output.json b/inst/extdata/config/concept-dict/output.json index 73953d48..50659e8d 100644 --- a/inst/extdata/config/concept-dict/output.json +++ b/inst/extdata/config/concept-dict/output.json @@ -2,7 +2,7 @@ "urine": { "unit": "mL", "min": 0, - "max": 2000, + "max": 4000, "aggregate": "sum", "description": "urine output", "omopid": 4264378, @@ -10,7 +10,7 @@ "sources": { "aumc": [ { - "ids": 8794, + "ids": [8794, 8796, 8798, 8800, 8803], "table": "numericitems", "sub_var": "itemid" } @@ -164,6 +164,60 @@ ] } }, + "urine_rate": { + "unit": "mL/h", + "min": 0, + "max": 2000, + "aggregate": "max", + "description": "urine rate per hour", + "category": "output", + "sources": { + "aumc": [ + { + "class": "fun_itm", + "callback": "combine_callbacks(fwd_concept('urine'), urine_rate)" + } + ], + "eicu": [ + { + "class": "fun_itm", + "callback": "combine_callbacks(fwd_concept('urine'), urine_rate)" + } + ], + "eicu_demo": [ + { + "class": "fun_itm", + "callback": "combine_callbacks(fwd_concept('urine'), urine_rate)" + } + ], + "hirid": [ + { + "ids": 10020000, + "table": "observations", + "sub_var": "variableid", + "class": "hrd_itm" + } + ], + "miiv": [ + { + "class": "fun_itm", + "callback": "combine_callbacks(fwd_concept('urine'), urine_rate)" + } + ], + "mimic": [ + { + "class": "fun_itm", + "callback": "combine_callbacks(fwd_concept('urine'), urine_rate)" + } + ], + "mimic_demo": [ + { + "class": "fun_itm", + "callback": "combine_callbacks(fwd_concept('urine'), urine_rate)" + } + ] + } + }, "urine24": { "concepts": "urine", "description": "urine output per 24h", From 9ebaf7f97e7974f0c76e118cb499cc4293ecbc16 Mon Sep 17 00:00:00 2001 From: Manuel Burger Date: Tue, 26 Mar 2024 14:10:57 +0100 Subject: [PATCH 40/48] Use `apply_map` for `sic` `sex` --- R/callback-itm.R | 10 ---------- inst/extdata/config/concept-dict/demographics.json | 2 +- 2 files changed, 1 insertion(+), 11 deletions(-) diff --git a/R/callback-itm.R b/R/callback-itm.R index 22a971fc..941470b7 100644 --- a/R/callback-itm.R +++ b/R/callback-itm.R @@ -213,16 +213,6 @@ mimic_age <- function(x) { eicu_age <- function(x) as.numeric(ifelse(x == "> 89", 90, x)) -sic_sex <- function(x) { - ifelse( - x == 735, - "Male", - ifelse(x == 736, - "Female", - NA_character_ - )) -} - hirid_death <- function(x, val_var, sub_var, env, ...) { dis <- "discharge_status" diff --git a/inst/extdata/config/concept-dict/demographics.json b/inst/extdata/config/concept-dict/demographics.json index a0f7da0d..f4723d6d 100644 --- a/inst/extdata/config/concept-dict/demographics.json +++ b/inst/extdata/config/concept-dict/demographics.json @@ -320,7 +320,7 @@ "table": "cases", "val_var": "sex", "class": "col_itm", - "callback": "transform_fun(sic_sex)" + "callback": "apply_map(c(`735` = 'Male', `736` = 'Female'))" } ], "picdb": [ From 6bb39f9d5813bc4ba741acd13b832716d8426673 Mon Sep 17 00:00:00 2001 From: Manuel Burger Date: Tue, 26 Mar 2024 15:55:43 +0100 Subject: [PATCH 41/48] Add `vitals` and `respiratory` --- .../config/concept-dict/respiratory.json | 86 ++++++++++++++++++- inst/extdata/config/concept-dict/vitals.json | 56 ++++++++++-- 2 files changed, 130 insertions(+), 12 deletions(-) diff --git a/inst/extdata/config/concept-dict/respiratory.json b/inst/extdata/config/concept-dict/respiratory.json index 5e64f8fe..8d385a8f 100644 --- a/inst/extdata/config/concept-dict/respiratory.json +++ b/inst/extdata/config/concept-dict/respiratory.json @@ -119,6 +119,82 @@ "picdb": [] } }, + "spo2": { + "unit": ["%", "% Sat."], + "min": 50, + "max": 100, + "description": "oxygen saturation (pulse oximetry)", + "category": "respiratory", + "sources": { + "eicu": [ + { + "table": "vitalperiodic", + "val_var": "sao2", + "unit_val": "%", + "class": "col_itm" + } + ], + "eicu_demo": [ + { + "table": "vitalperiodic", + "val_var": "sao2", + "unit_val": "%", + "class": "col_itm" + } + ], + "mimic": [ + { + "ids": [646, 220277], + "table": "chartevents", + "sub_var": "itemid" + } + ], + "mimic_demo": [ + { + "ids": [646, 220277], + "table": "chartevents", + "sub_var": "itemid" + } + ] + } + }, + "sao2": { + "unit": ["%", "% Sat."], + "min": 50, + "max": 100, + "description": "oxygen saturation (arterial blood)", + "category": "respiratory", + "sources": { + "eicu": [ + { + "ids": "O2 Sat (%)", + "table": "lab", + "sub_var": "labname" + } + ], + "eicu_demo": [ + { + "ids": "O2 Sat (%)", + "table": "lab", + "sub_var": "labname" + } + ], + "mimic": [ + { + "ids": [834, 220227], + "table": "chartevents", + "sub_var": "itemid" + } + ], + "mimic_demo": [ + { + "ids": [834, 220227], + "table": "chartevents", + "sub_var": "itemid" + } + ] + } + }, "o2sat": { "unit": [ "%", @@ -274,10 +350,7 @@ "sources": { "aumc": [ { - "ids": [ - 8874, - 12266 - ], + "ids": [8873, 8874, 12266], "table": "numericitems", "sub_var": "itemid" } @@ -287,6 +360,11 @@ "table": "vitalperiodic", "val_var": "respiration", "class": "col_itm" + }, + { + "table": "nursecharting", + "ids": "Respiratory Rate", + "sub_var": "nursingchartcelltypevalname" } ], "eicu_demo": [ diff --git a/inst/extdata/config/concept-dict/vitals.json b/inst/extdata/config/concept-dict/vitals.json index 75967e5e..75190235 100644 --- a/inst/extdata/config/concept-dict/vitals.json +++ b/inst/extdata/config/concept-dict/vitals.json @@ -12,7 +12,7 @@ "sources": { "aumc": [ { - "ids": 6643, + "ids": [6643, 6680, 8842], "table": "numericitems", "sub_var": "itemid" } @@ -22,6 +22,11 @@ "table": "vitalperiodic", "val_var": "systemicdiastolic", "class": "col_itm" + }, + { + "table": "vitalaperiodic", + "val_var": "noninvasivediastolic", + "class": "col_itm" } ], "eicu_demo": [ @@ -29,6 +34,11 @@ "table": "vitalperiodic", "val_var": "systemicdiastolic", "class": "col_itm" + }, + { + "table": "vitalaperiodic", + "val_var": "noninvasivediastolic", + "class": "col_itm" } ], "hirid": [ @@ -275,7 +285,7 @@ "sources": { "aumc": [ { - "ids": 6642, + "ids": [6642, 6679, 8843], "table": "numericitems", "sub_var": "itemid" } @@ -385,7 +395,7 @@ "sources": { "aumc": [ { - "ids": 6641, + "ids": [6641, 6678, 8841], "table": "numericitems", "sub_var": "itemid" } @@ -395,6 +405,11 @@ "table": "vitalperiodic", "val_var": "systemicsystolic", "class": "col_itm" + }, + { + "table": "vitalaperiodic", + "val_var": "noninvasivesystolic", + "class": "col_itm" } ], "eicu_demo": [ @@ -402,6 +417,11 @@ "table": "vitalperiodic", "val_var": "systemicsystolic", "class": "col_itm" + }, + { + "table": "vitalaperiodic", + "val_var": "noninvasivesystolic", + "class": "col_itm" } ], "hirid": [ @@ -484,11 +504,7 @@ "sources": { "aumc": [ { - "ids": [ - 8658, - 13952, - 16110 - ], + "ids": [8658, 8659, 8662, 11889, 13058, 13059, 13060, 13061, 13062, 13063, 13952, 16110], "table": "numericitems", "sub_var": "itemid" } @@ -497,14 +513,38 @@ { "table": "vitalperiodic", "val_var": "temperature", + "unit_val": "C", "class": "col_itm" + }, + { + "table": "nursecharting", + "ids": "Temperature (C)", + "sub_var": "nursingchartcelltypevalname" + }, + { + "table": "nursecharting", + "ids": "Temperature (F)", + "sub_var": "nursingchartcelltypevalname", + "callback": "combine_callbacks(transform_fun(as.numeric), transform_fun(fahr_to_cels))" } ], "eicu_demo": [ { "table": "vitalperiodic", "val_var": "temperature", + "unit_val": "C", "class": "col_itm" + }, + { + "table": "nursecharting", + "ids": "Temperature (C)", + "sub_var": "nursingchartcelltypevalname" + }, + { + "table": "nursecharting", + "ids": "Temperature (F)", + "sub_var": "nursingchartcelltypevalname", + "callback": "combine_callbacks(transform_fun(as.numeric), transform_fun(fahr_to_cels))" } ], "hirid": [ From 40f3bcd07e599fb0e3f2618083cba16d640e9f8b Mon Sep 17 00:00:00 2001 From: Manuel Burger Date: Tue, 26 Mar 2024 18:04:44 +0100 Subject: [PATCH 42/48] Add callbacks and `sic` support for `urine_rate` --- R/callback-icu-mortality.R | 63 ++++++++ R/callback-kdigo.R | 140 ++++++++++++++++++ R/callback-sep3.R | 128 ++++++++++++++++ .../config/concept-dict/circulatory.json | 2 +- inst/extdata/config/concept-dict/output.json | 10 +- 5 files changed, 340 insertions(+), 3 deletions(-) create mode 100644 R/callback-icu-mortality.R create mode 100644 R/callback-kdigo.R diff --git a/R/callback-icu-mortality.R b/R/callback-icu-mortality.R new file mode 100644 index 00000000..0b6fc7b2 --- /dev/null +++ b/R/callback-icu-mortality.R @@ -0,0 +1,63 @@ +# ============================================================================== +# +# Refined ICU mortality callbacks +# +# based on YAIB: https://github.com/rvandewater/YAIB-cohorts/tree/main/ricu-extensions/callbacks +# ============================================================================== + +aumc_death_icu <- function (x, val_var, death_date, ...) { + # Identify ICU mortality in AUMCdb via the discharge destination field. Use + # discharge time from the ICU as death time, as date of death sometimes only + # contain the date part and no time (i.e., 00:00:00). + # + # See discussions here: + # https://github.com/AmsterdamUMC/AmsterdamUMCdb/issues/56 + # https://github.com/AmsterdamUMC/AmsterdamUMCdb/issues/61 + idx <- index_var(x) + x[, `:=`(c(val_var), ricu:::is_true(get(val_var) == "Overleden"))] + x[get(death_date) - get(idx) > hours(24L), `:=`(c(val_var), FALSE)] + x +} + +hirid_death_icu <- function (x, val_var, sub_var, env, ...) { + dis <- "discharge_status" + idx <- index_var(x) + idc <- id_vars(x) + res <- dt_gforce(x, "last", by = idc, vars = idx) + tmp <- load_id(env[["general"]], cols = dis) + res <- merge(res, tmp[ricu:::is_true(get(dis) == "dead"), ]) + res <- res[, `:=`(c(val_var, dis), list(TRUE, NULL))] + res +} + +mi_death_icu <- function(x, transfers, icu_wards, ...) { + # Look for all hospital deaths in which the last careunit was an ICU. + # See discussion here: https://github.com/MIT-LCP/mimic-code/issues/874 + id <- id_vars(transfers) + lead <- function(x) data.table:::shift(x, type = "lead") + + transfers[, is_last := ricu:::is_true(lead(eventtype) == "discharge")] + last_ward <- transfers[, .(ward = ward[is_last]), by = c(id)] + last_ward[, "is_icu" := .(ricu:::is_true(ward %in% icu_wards))] + + dat <- data_var(x) + x[(last_ward[is_icu == FALSE]), c(dat) := 0L] + x[, c(dat) := ricu:::is_true(get(dat) == 1L)] + x +} + +mimic_death_icu <- function(x, env, ...){ + icu_wards <- sort(unique(env[["icustays"]]$first_careunit)) + transfers <- load_ts(env[["transfers"]], id_var = "hadm_id", index_var = "intime", interval = mins(1L)) + transfers <- change_id(transfers, "icustay", as_src_cfg(env), id_type = TRUE) + rename_cols(transfers, "ward", "curr_careunit", by_ref = TRUE) + mi_death_icu(x, transfers, icu_wards, ...) +} + +miiv_death_icu <- function(x, env, ...){ + icu_wards <- sort(unique(env[["icustays"]]$first_careunit)) + transfers <- load_ts(env[["transfers"]], index_var = "intime") + rename_cols(transfers, "ward", "careunit", by_ref = TRUE) + mi_death_icu(x, transfers, icu_wards, ...) +} + diff --git a/R/callback-kdigo.R b/R/callback-kdigo.R new file mode 100644 index 00000000..644b4ea8 --- /dev/null +++ b/R/callback-kdigo.R @@ -0,0 +1,140 @@ +# ============================================================================== +# +# KDIGO Callbacks +# +# based on YAIB: https://github.com/rvandewater/YAIB-cohorts/tree/main/ricu-extensions/callbacks +# ============================================================================== +kdigo_crea <- function(..., keep_components = FALSE, interval = NULL) { + cnc <- c("crea") + crea <- ricu:::collect_dots(cnc, interval, ...) + + id <- id_vars(crea) + ind <- index_var(crea) + + min_over_period <- function(dur = hours(1L)) { + cdur <- as.character(dur) + summ <- slide( + crea, + list(crea = min(get("crea"), na.rm = TRUE)), + dur, + left_closed = FALSE + ) + rename_cols(summ, paste0("crea_", cdur, "hr"), "crea") + } + + res <- lapply(hours(2 * 24, 7 * 24), min_over_period) + res <- merge_lst(c(list(crea), res)) + res[, kdigo_crea := data.table::fcase( + crea >= 3 * crea_168hr , 3L, + crea >= 4 & + (crea - crea_48hr >= 0.3 | + crea >= 1.5 * crea_168hr) , 3L, + crea >= 2 * crea_168hr , 2L, + crea >= crea_48hr + 0.3 , 1L, + crea >= 1.5 * crea_168hr , 1L, + default = 0L + )] + + cols_rm <- c("crea_48hr", "crea_168hr") + if (!keep_components) { + cols_rm <- c(cols_rm, "crea") + } + res <- rm_cols(res, cols_rm, skip_absent = TRUE, by_ref = TRUE) + res +} + + +urine_rate <- function(x, max_gap = hours(24L), interval = NULL, id_type = "icustay") { + # TODO: Does not currently work as a rec_cncpt. For example, currently keep_components = TRUE would lead to + # a situation in which `urine` and not `urine_rate` is passed back. This is likely because `fun_itm`, + # which is currently expects a table with a single column. If multiple are present, it chooses the first, + # which in this case is `urine`. Unhelpfully, this is then renamed to `urine_rate`, hiding this + # behaviour. + # Solution: remove keep_components for now and use only as `fun_itm` + id <- id_var(x) + ind <- index_var(x) + + res <- rename_cols(x, "urine", old = data_var(x)) + + res[, tm := get(ind) - data.table::shift(get(ind)) + 1L, by = c(id)] + res[, tm := ifelse(is.na(tm) | tm > max_gap, 1, tm)] + res[, val_var := urine / tm] + + cols_rm <- c("tm", "urine") + res <- rm_cols(res, cols_rm, skip_absent = TRUE, by_ref = TRUE) + res +} + + +kdigo_urine <- function(..., keep_components = FALSE, interval = NULL) { + cnc <- c("urine_rate", "weight") + res <- ricu:::collect_dots(cnc, interval, ...) + urine_rate <- res[["urine_rate"]] + weight <- res[["weight"]] + + id <- id_vars(urine_rate) + ind <- index_var(urine_rate) + + rate_over_period <- function(dur = hours(1L)) { + name <- paste0("urine_rate_", as.character(dur), "hr") + summ <- slide(urine_rate, list(urine_h = sum(get("urine_rate"), na.rm = TRUE)), dur, left_closed = FALSE) + summ[weight, urine_h := urine_h / ifelse(is.na(weight), 75, weight), on = c(id)] + summ <- rename_cols(summ, name, "urine_h") + summ[, .SD, .SDcols = c(id, ind, name)] + } + + res <- lapply(hours(6L, 12L, 24L), rate_over_period) + res <- merge_lst(c(list(urine_rate, weight), res)) + res[, kdigo_urine := data.table::fcase( # TODO: make work with intervals other than + get(ind) >= hours(24L) & urine_rate_24hr < 0.3, 3L, + get(ind) >= hours(12L) & urine_rate_12hr == 0 , 3L, + get(ind) >= hours(12L) & urine_rate_12hr < 0.5, 2L, + get(ind) >= hours(6L) & urine_rate_6hr < 0.5, 1L, + default = 0L + )] + + cols_rm <- c( + "urine_rate_6hr", "urine_rate_12hr", "urine_rate_24hr" + ) + if (!keep_components) { + cols_rm <- c(cols_rm, "urine_rate", "weight") + } + res <- rm_cols(res, cols_rm, skip_absent = TRUE, by_ref = TRUE) + res +} + + +kdigo <- function(..., keep_components = FALSE, interval = NULL) { + cnc <- c("kdigo_crea", "kdigo_urine") + res <- ricu:::collect_dots(cnc, interval, ...) + kdigo_crea <- res[["kdigo_crea"]] + kdigo_urine <- res[["kdigo_urine"]] + + idc <- id_vars(kdigo_crea) + indc <- index_var(kdigo_crea) + idu <- id_vars(kdigo_urine) + indu <- index_var(kdigo_urine) + + res <- merge(kdigo_crea, kdigo_urine, by.x = c(idc, indc), by.y = c(idu, indu), all = TRUE) + res[, kdigo := pmax(kdigo_crea, kdigo_urine, na.rm = TRUE)] + + if (!keep_components) { + cols_rm <- c("kdigo_crea", "kdigo_urine") + res <- rm_cols(res, cols_rm, skip_absent = TRUE, by_ref = TRUE) + } + res +} + + +aki <- function(..., threshold = 1L, interval = NULL, keep_components = FALSE) { + + cnc <- c("kdigo") + res <- ricu:::collect_dots(cnc, interval, ...) + res[, aki := kdigo >= threshold] + + if (!keep_components) { + res <- rm_cols(res, "kdigo", skip_absent = TRUE, by_ref = TRUE) + } + + res[aki == TRUE] +} diff --git a/R/callback-sep3.R b/R/callback-sep3.R index b49aba12..cd35c67a 100644 --- a/R/callback-sep3.R +++ b/R/callback-sep3.R @@ -432,3 +432,131 @@ si_or <- function(abx, samp, abx_win, samp_win, keep) { res } + + +# ============================================================================== +# Alternative sepsis3 implementations +# based on: https://github.com/rvandewater/YAIB-cohorts/tree/main/ricu-extensions/callbacks +# ============================================================================== +cummax_difftime <- function(x){ + # TODO: change to allow other intervals than hours + as.difftime(cummax(as.numeric(x)), units = "hours") +} +lead <- function(x) { + data.table::shift(x, type="lead") +} + +abx_cont <- function(..., abx_win = hours(72L), abx_max_gap = hours(24L), keep_components = FALSE, interval = NULL) { + cnc <- c("abx_duration", "death_icu") + res <- ricu:::collect_dots(cnc, interval, ...) + abx <- res[["abx_duration"]] + death_icu <- res[["death_icu"]] + + aid <- id_vars(abx) + aind <- index_var(abx) + adur <- dur_var(abx) + did <- id_vars(death_icu) + dind <- index_var(death_icu) + + abx <- as_ts_tbl(abx) + abx <- abx[, .(dur_var = max(get(adur))), by = c(aid, aind)] + death_icu <- death_icu[death_icu == TRUE] + abx_death <- merge(abx, death_icu, by.x = aid, by.y = did, all.x = TRUE) + + res <- slide( + # Only look at antibiotic records that are recorded before the time of death + abx_death[is.na(get(dind)) | get(aind) <= get(dind)], + .( + # Calculate the maximum gap between two administrations for the next `abx_win` hours + # as follows: + # + # 1. get the administration time of the next antibiotic: + # lead(get(aind)) + # 2. this isn't defined for the last (.N-th) time within the window, so remove that + # lead(get(aind))[-.N]: + # 3. replace the last time with either + # a) the time of death: + # get(dind) + # b) the first antibiotic time in the window (=current antibiotic we are looking at) + # plus the window lenght + # get(aind)[1] + abx_win + # whichever is earlier + # 4. subtract from it the latest time that any previous antibiotic was stopped + # cummax_difftime(get(aind) + dur_var) + # this is the gap + # 5. take the maximum gap calculated this way for this window + # 6. repeat for all possible windows + max_gap = max( + c(lead(get(aind))[-.N], min(c(get(dind), get(aind)[1] + abx_win), na.rm = TRUE)) - + cummax_difftime(get(aind) + dur_var) + ) + ), + before = hours(0L), # we always start from the current antibiotic and look `abx_win` in the future + after = abx_win + ) + + res <- res[max_gap <= abx_max_gap] + res[, c("abx_cont", "max_gap") := .(TRUE, NULL)] + res +} + + +susp_inf_abx_cont <- function(..., abx_count_win = hours(24L), abx_min_count = 1L, + positive_cultures = FALSE, si_mode = c("and", "or", "abx", "samp"), + abx_win = hours(24L), samp_win = hours(72L), + by_ref = TRUE, keep_components = FALSE, interval = NULL) +{ + cnc <- c("abx_cont", "samp") + res <- ricu:::collect_dots(cnc, interval, ...) + abx_cont <- res[["abx_cont"]] + samp <- res[['samp']] + + # make `abx_cont` look like abx to pass on to the original ricu::susp_inf + rename_cols(abx_cont, "abx", "abx_cont", by_ref = TRUE) + + # pass the rest of the calculations to ricu::susp_inf + res <- ricu::susp_inf( + abx = abx_cont, + samp = samp, + abx_count_win = abx_count_win, + abx_min_count = abx_min_count, + positive_cultures = positive_cultures, + si_mode = si_mode, + abx_win = abx_win, + samp_win = samp_win, + by_ref = by_ref, + keep_components = keep_components, + interval = interval + ) + rename_cols(res, "susp_inf_alt", "susp_inf", by_ref = TRUE) + res +} + + +sep3_abx_cont <- function (..., si_window = c("first", "last", "any"), delta_fun = delta_cummin, + sofa_thresh = 2L, si_lwr = hours(48L), si_upr = hours(24L), + keep_components = FALSE, interval = NULL) +{ + cnc <- c("sofa", "susp_inf_alt") + res <- ricu:::collect_dots(cnc, interval, ...) + sofa <- res[["sofa"]] + susp <- res[["susp_inf_alt"]] + + # make `susp_inf_alt` look like susp_inf to pass on to the original ricu::sep3 + rename_cols(susp, "susp_inf", "susp_inf_alt", by_ref = TRUE) + + # pass the rest of the calculations to ricu::susp_inf + res <- ricu::sep3( + sofa = sofa, + susp_inf = susp, + si_window = si_window, + delta_fun = delta_fun, + sofa_thresh = sofa_thresh, + si_lwr = si_lwr, + si_upr = si_upr, + keep_components = keep_components, + interval = interval + ) + rename_cols(res, "sep3_alt", "sep3", by_ref = TRUE) + res +} diff --git a/inst/extdata/config/concept-dict/circulatory.json b/inst/extdata/config/concept-dict/circulatory.json index b892d513..9d37f4fb 100644 --- a/inst/extdata/config/concept-dict/circulatory.json +++ b/inst/extdata/config/concept-dict/circulatory.json @@ -16,4 +16,4 @@ ] } } - } \ No newline at end of file +} \ No newline at end of file diff --git a/inst/extdata/config/concept-dict/output.json b/inst/extdata/config/concept-dict/output.json index 50659e8d..12439354 100644 --- a/inst/extdata/config/concept-dict/output.json +++ b/inst/extdata/config/concept-dict/output.json @@ -2,7 +2,7 @@ "urine": { "unit": "mL", "min": 0, - "max": 4000, + "max": 7000, "aggregate": "sum", "description": "urine output", "omopid": 4264378, @@ -167,7 +167,7 @@ "urine_rate": { "unit": "mL/h", "min": 0, - "max": 2000, + "max": 3000, "aggregate": "max", "description": "urine rate per hour", "category": "output", @@ -215,6 +215,12 @@ "class": "fun_itm", "callback": "combine_callbacks(fwd_concept('urine'), urine_rate)" } + ], + "sic": [ + { + "class": "fun_itm", + "callback": "combine_callbacks(fwd_concept('urine'), urine_rate)" + } ] } }, From e0f46e5e6dd355c12f30a2180c6f6b3b0aab47f6 Mon Sep 17 00:00:00 2001 From: Manuel Burger Date: Sun, 31 Mar 2024 18:39:55 +0200 Subject: [PATCH 43/48] Add `death_icu` for `sic` --- inst/extdata/config/concept-dict/outcome.json | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/inst/extdata/config/concept-dict/outcome.json b/inst/extdata/config/concept-dict/outcome.json index c488c703..56b41191 100644 --- a/inst/extdata/config/concept-dict/outcome.json +++ b/inst/extdata/config/concept-dict/outcome.json @@ -212,6 +212,16 @@ "callback": "mimic_death_icu", "class": "col_itm" } + ], + "sic": [ + { + "table": "cases", + "index_var": "offsetofdeath", + "adm_time": "offsetafterfirstadmission", + "val_var": "timeofstay", + "callback": "sic_death", + "class": "col_itm" + } ] } }, From 4c3838d22ca4b4f0d53279e46d73144c3503ecf0 Mon Sep 17 00:00:00 2001 From: Manuel Burger Date: Sun, 31 Mar 2024 19:46:52 +0200 Subject: [PATCH 44/48] Support `sao2`, `spo2` and combination in `o2sat` --- R/callback-cncpt.R | 24 ++ .../config/concept-dict/respiratory.json | 246 ++++++++---------- 2 files changed, 138 insertions(+), 132 deletions(-) diff --git a/R/callback-cncpt.R b/R/callback-cncpt.R index cd919e0f..d640ce01 100644 --- a/R/callback-cncpt.R +++ b/R/callback-cncpt.R @@ -615,6 +615,30 @@ bmi <- function(..., interval = NULL) { res } +#' @rdname callback_cncpt +#' @export +o2sat_lab_first <- function(..., interval = NULL) { + + # Pulse Oxymetry: `spo2` + # Arterial Blood Gas: `sao2` + cnc <- c("sao2", "spo2") + res <- collect_dots(cnc, interval, ..., merge_dat = TRUE) + + # default to sao2 (arterial blood gas) + res <- res[, o2sat := sao2] + + # if sao2 is missing, use spo2 (pulse oxymetry) + res <- res[is.na(sao2), o2sat := spo2] + + # Filter out values below 50 and above 100 + res <- filter_bounds(res, "o2sat", 50, 100) + + # remove sao2 and spo2 columns + res <- rm_cols(res, cnc, by_ref = TRUE) + + res +} + #' @rdname callback_cncpt #' @export norepi_equiv <- function(..., interval = NULL) { diff --git a/inst/extdata/config/concept-dict/respiratory.json b/inst/extdata/config/concept-dict/respiratory.json index 8d385a8f..7efc8189 100644 --- a/inst/extdata/config/concept-dict/respiratory.json +++ b/inst/extdata/config/concept-dict/respiratory.json @@ -125,86 +125,6 @@ "max": 100, "description": "oxygen saturation (pulse oximetry)", "category": "respiratory", - "sources": { - "eicu": [ - { - "table": "vitalperiodic", - "val_var": "sao2", - "unit_val": "%", - "class": "col_itm" - } - ], - "eicu_demo": [ - { - "table": "vitalperiodic", - "val_var": "sao2", - "unit_val": "%", - "class": "col_itm" - } - ], - "mimic": [ - { - "ids": [646, 220277], - "table": "chartevents", - "sub_var": "itemid" - } - ], - "mimic_demo": [ - { - "ids": [646, 220277], - "table": "chartevents", - "sub_var": "itemid" - } - ] - } - }, - "sao2": { - "unit": ["%", "% Sat."], - "min": 50, - "max": 100, - "description": "oxygen saturation (arterial blood)", - "category": "respiratory", - "sources": { - "eicu": [ - { - "ids": "O2 Sat (%)", - "table": "lab", - "sub_var": "labname" - } - ], - "eicu_demo": [ - { - "ids": "O2 Sat (%)", - "table": "lab", - "sub_var": "labname" - } - ], - "mimic": [ - { - "ids": [834, 220227], - "table": "chartevents", - "sub_var": "itemid" - } - ], - "mimic_demo": [ - { - "ids": [834, 220227], - "table": "chartevents", - "sub_var": "itemid" - } - ] - } - }, - "o2sat": { - "unit": [ - "%", - "% Sat." - ], - "min": 50, - "max": 100, - "description": "oxygen saturation", - "omopid": 40483579, - "category": "respiratory", "sources": { "aumc": [ { @@ -214,46 +134,29 @@ ], "table": "numericitems", "sub_var": "itemid" - }, - { - "ids": 12311, - "table": "numericitems", - "sub_var": "itemid", - "callback": "transform_fun(binary_op(`*`, 100))" } ], "eicu": [ { - "table": "vitalperiodic", - "val_var": "sao2", - "unit_val": "%", - "class": "col_itm" - }, - { - "ids": "O2 Sat (%)", - "table": "lab", - "sub_var": "labname" + "table": "vitalperiodic", + "val_var": "sao2", + "unit_val": "%", + "class": "col_itm" } ], "eicu_demo": [ { - "table": "vitalperiodic", - "val_var": "sao2", - "unit_val": "%", - "class": "col_itm" - }, - { - "ids": "O2 Sat (%)", - "table": "lab", - "sub_var": "labname" + "table": "vitalperiodic", + "val_var": "sao2", + "unit_val": "%", + "class": "col_itm" } ], "hirid": [ { "ids": [ 4000, - 8280, - 20000800 + 8280 ], "table": "observations", "sub_var": "variableid", @@ -263,9 +166,7 @@ "miiv": [ { "ids": [ - 220277, - 226253, - 50817 + 220277 ], "table": "chartevents", "sub_var": "itemid" @@ -273,26 +174,16 @@ ], "mimic": [ { - "ids": [ - 646, - 220277, - 226253, - 50817 - ], - "table": "chartevents", - "sub_var": "itemid" + "ids": [646, 220277], + "table": "chartevents", + "sub_var": "itemid" } ], "mimic_demo": [ { - "ids": [ - 646, - 220277, - 226253, - 50817 - ], - "table": "chartevents", - "sub_var": "itemid" + "ids": [646, 220277], + "table": "chartevents", + "sub_var": "itemid" } ], "sic": [ @@ -311,16 +202,107 @@ "1006" ], "sub_var": "itemid" - }, - { - "table": "labevents", - "ids": [ - 5252 - ], - "sub_var": "itemid" } ] } + }, + "sao2": { + "unit": ["%", "% Sat."], + "min": 50, + "max": 100, + "description": "oxygen saturation (arterial blood)", + "category": "respiratory", + "sources": { + "aumc": [ + { + "ids": 12311, + "table": "numericitems", + "sub_var": "itemid", + "callback": "transform_fun(binary_op(`*`, 100))" + } + ], + "eicu": [ + { + "ids": "O2 Sat (%)", + "table": "lab", + "sub_var": "labname" + } + ], + "eicu_demo": [ + { + "ids": "O2 Sat (%)", + "table": "lab", + "sub_var": "labname" + } + ], + "hirid": [ + { + "ids": [ + 20000800 + ], + "table": "observations", + "sub_var": "variableid", + "class": "hrd_itm" + } + ], + "miiv": [ + { + "ids": [ + 50817 + ], + "table": "chartevents", + "sub_var": "itemid" + } + ], + "mimic": [ + { + "ids": [834, 220227], + "table": "chartevents", + "sub_var": "itemid" + } + ], + "mimic_demo": [ + { + "ids": [834, 220227], + "table": "chartevents", + "sub_var": "itemid" + } + ], + "sic": [ + { + "ids": 673, + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" + } + ], + "picdb": [ + { + "table": "labevents", + "ids": [ + 5252 + ], + "sub_var": "itemid" + } + ] + } + }, + "o2sat": { + "concepts": [ + "sao2", + "spo2" + ], + "unit": [ + "%", + "% Sat." + ], + "min": 50, + "max": 100, + "description": "oxygen saturation (pulse oximetry and arterial blood)", + "omopid": 40483579, + "category": "respiratory", + "callback": "o2sat_lab_first", + "class": "rec_cncpt" }, "pafi": { "concepts": [ From f280964966246dd701484f3422c2d2cd74611f14 Mon Sep 17 00:00:00 2001 From: Manuel Burger Date: Sun, 31 Mar 2024 19:51:17 +0200 Subject: [PATCH 45/48] Add empty `phn_dur` --- inst/extdata/config/concept-dict/medications.json | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/inst/extdata/config/concept-dict/medications.json b/inst/extdata/config/concept-dict/medications.json index cbb177be..b43943df 100644 --- a/inst/extdata/config/concept-dict/medications.json +++ b/inst/extdata/config/concept-dict/medications.json @@ -1857,6 +1857,13 @@ "picdb": [] } }, + "phn_dur": { + "description": "phenylephrine duration", + "category": "medications", + "aggregate": "max", + "sources": { + } + }, "levo_dur": { "description": "levosimendan duration", "category": "medications", From abae74be269f7d63e4fa0b92e297e21c35c954a9 Mon Sep 17 00:00:00 2001 From: Manuel Burger Date: Sun, 31 Mar 2024 20:07:37 +0200 Subject: [PATCH 46/48] Add circ callbacks --- R/callback-circ-fail.R | 183 ++++++++++++++++++ .../config/concept-dict/medications.json | 4 +- 2 files changed, 185 insertions(+), 2 deletions(-) create mode 100644 R/callback-circ-fail.R diff --git a/R/callback-circ-fail.R b/R/callback-circ-fail.R new file mode 100644 index 00000000..25cc6c76 --- /dev/null +++ b/R/callback-circ-fail.R @@ -0,0 +1,183 @@ +# ============================================================================== +# +# Callback for circulatory failure +# Implementation by `prockenschaub` +# from: https://github.com/prockenschaub/icuDG-preprocessing/blob/main/R/callback-circ-fail.R +# +# ============================================================================== + +circ_fail <- function (..., lact_thresh = 2, map_thresh = 65, + fill_for = hours(3L), cond_win = mins(45L), + cond_dur = mins(30L), keep_components = FALSE, + interval = NULL, by_ref = FALSE) { + cnc <- c("map", "cf_treat", "lact") + res <- ricu:::collect_dots(cnc, interval, ...) + assert_that(lact_thresh >= 0, map_thresh >= 0, + ricu:::is_interval(fill_for), ricu:::is_interval(cond_win), + ricu:::is_interval(cond_dur), is.flag(keep_components), + units(cond_win) == units(cond_dur), cond_dur < cond_win) + + map <- res[["map"]] + cf_treat <- res[["cf_treat"]] + lact <- res[["lact"]] + + if (!by_ref) { + map <- copy(map) + cf_treat <- copy(cf_treat) + lact <- copy(lact) + } + + id <- id_vars(map) + step_size <- interval(map) + + assert_that(units(step_size) == units(cond_win)) + + p <- as.numeric(cond_dur) / as.numeric(cond_win) + steps <- as.integer(cond_win / as.numeric(step_size)) + + # Interpolate lactate values + map_times <- map[, .SD, .SDcols = meta_vars(map)] + map_limits <- ricu::collapse(map_times, as_win_tbl = FALSE) + grid_times <- fill_gaps(map_times, map_limits) + lact <- interpolate_lactate(lact, grid_times, lact_thresh, fill_for) + + # Combine MAP, vasopress/inotrope meds, and lactate to define cf + res <- merge_lst(list(map, cf_treat, lact)) + + .rmean <- function(x) frollmean(x, steps, align = "center") + .thresh <- function(x, op, val) fifelse(!is.na(x), op(x, val), FALSE) + + res[, miss := pmax(.rmean(is.na(map)), .rmean(is.na(lact))) == 1, by = c(id)] + res[, low_map := .rmean(.thresh(map, `<`, map_thresh)), by = c(id)] + res[, treated := .rmean(!is.na(cf_treat)), by = c(id)] + res[, high_lact := .rmean(.thresh(lact, `>`, lact_thresh)), by = c(id)] + res[, circ_fail := fcase( + miss, NA, + (low_map <= p | treated <= p) & high_lact <= p, FALSE, + (low_map > p | treated > p) & high_lact > p, TRUE, + default = NA + )] + + cols_rm <- c("miss", "low_map", "treated", "high_lact") + if (!keep_components) { + cols_rm <- c(cols_rm, "map", "cf_treat", "lact") + } + res <- rm_cols(res, cols_rm, skip_absent = TRUE, by_ref = TRUE) + + res +} + +approx <- function(x, y = NULL, xout, ...) { + if (length(x) == 1) { + return(list(x = x, y = y)) + } + stats::approx(x, y, xout, ...) +} + +# TODO: bring in line with ricu::replace_na +replace_na <- function(x, val, type = "const", max_n = Inf, ...) { + seq_num <- rleid(is.na(x)) + imp_cnt <- sapply(split(x, seq_num), function(x) seq_along(x)) + + if (type == "nocb") { + imp_cnt <- Map(rev, imp_cnt) + } + + imp_cnt <- Reduce(c, imp_cnt) + + if (identical(type, "const")) { + repl <- data.table::nafill(x, type, val, ...) + } + else { + repl <- data.table::nafill(x, type, ...) + } + + fifelse(imp_cnt <= max_n, repl, x) +} + +interpolate_lactate <- function(df, grid_times, thresh, fill_win = hours(3L)) { + id <- id_vars(df) + ind <- index_var(df) + val <- data_var(df) + + step_size <- interval(df) + fill_win <- ricu:::re_time(fill_win, step_size) + + df[, abn := .SD[[val]] >= thresh] # TODO: generalise to allow for < thresh + df[, tdiff := c(diff(.SD[[ind]]), NA_real_), by = c(id)] + + # Linearly interpolate everything + rep_to_interpol_grid <- function(x, d) { + rep(x, times = fifelse(is.na(d), 1., as.numeric(d) / as.numeric(step_size))) + } + + int_lin_cond <- expr(abn == shift(abn, type = "lead") | tdiff < 2 * fill_win) + df[, int_lin := eval(int_lin_cond), by = c(id)] + + df <- df[, c( + # Expand all existing columns to match length of interpolation + lapply(.SD, rep_to_interpol_grid, d = tdiff), + # Linearly interpolate every observation (for speed, ffill/bfill later) + approx(.SD[[ind]], .SD[[val]], xout = seq(as.numeric(.SD[[ind]][1]), as.numeric(.SD[[ind]][.N]), by = as.numeric(step_size))) + ), + by = c(id) + ] + + df[x == get(ind), int_lin := TRUE] + df[int_lin == FALSE , y := NA_real_] + + # Replace linear with forward/backward fill if consecutive values crossed + # thresh and are more than `fill_win` apart + max_n <- as.numeric(fill_win) / as.numeric(step_size) + df[, y := fifelse(!int_lin, replace_na(y, type = "locf", max_n = max_n), y), by = c(id)] + df[, y := fifelse(!int_lin, replace_na(y, type = "nocb", max_n = max_n), y), by = c(id)] + + # Clean up table + df[, c(ind) := as.difftime(x, units = attr(step_size, "units"))] + df[, c(val) := y] + df <- df[, .SD, .SDcols = c(id, ind, val, "abn")] + + # Forward/backward fill the first and last observation over grid + df <- merge(df, grid_times, all = TRUE) + df[, abn_int := replace_na(as.integer(abn), type = "nocb"), by = c(id)] + df[, c(val) := fcase( + !is.na(abn), .SD[[val]], # If not first, do nothing + abn_int == 0, replace_na(.SD[[val]], type = "nocb"), # If first val normal, infinite bfill + abn_int == 1, replace_na(.SD[[val]], type = "nocb", max_n = max_n), # If first val abnormal, bfill `max_n` steps + rep(TRUE, .N), .SD[[val]] + ), + by = c(id) + ] + df[, abn_int := replace_na(as.integer(abn), type = "locf"), by = c(id)] + df[, c(val) := fcase( + !is.na(abn), .SD[[val]], # If not last, do nothing + abn_int == 0, replace_na(.SD[[val]], type = "locf"), # If last val normal, infinite ffill + abn_int == 1, replace_na(.SD[[val]], type = "locf", max_n = max_n), # If last val abnormal, ffill `max_n` steps + rep(TRUE, .N), .SD[[val]] + ), + by = c(id) + ] + + df[, .SD, .SDcols = c(id, ind, val)] +} + +cf_treat <- function(..., interval = NULL) { + + cnc <- c("epi_dur", "norepi_dur", "dopa_dur", "dobu_dur", "adh_dur", "phn_dur", + "levo_dur", "milrin_dur", "teophyllin_dur") + res <- ricu:::collect_dots(cnc, interval, ..., merge_dat = TRUE) + unt <- ricu::time_unit(res) + + res <- res[, c(cnc) := lapply(.SD, as.difftime, units = unt), .SDcols = cnc] + res <- res[, c("cf_treat", cnc) := list(pmax( + get("dopa_dur"), get("norepi_dur"), get("dobu_dur"), get("epi_dur"), + get("adh_dur"), get("phn_dur"), get("levo_dur"), get("milrin_dur"), get("teophyllin_dur"), + na.rm = TRUE), NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL) + ] + + res <- expand(res, index_var(res), "cf_treat") + res <- unique(res) + res <- res[, c("cf_treat") := TRUE] + + res +} diff --git a/inst/extdata/config/concept-dict/medications.json b/inst/extdata/config/concept-dict/medications.json index b43943df..98f0b821 100644 --- a/inst/extdata/config/concept-dict/medications.json +++ b/inst/extdata/config/concept-dict/medications.json @@ -1953,7 +1953,7 @@ "table": "inputevents_cv", "sub_var": "itemid", "grp_var": "linkorderid", - "callback": "combine_callbacks(\n mimic_kg_rate, mimic_rate_cv)\n " + "callback": "combine_callbacks(mimic_kg_rate, mimic_rate_cv)" }, { "ids": 30128, @@ -1976,7 +1976,7 @@ "table": "inputevents_cv", "sub_var": "itemid", "grp_var": "linkorderid", - "callback": "combine_callbacks(\n mimic_kg_rate, mimic_rate_cv)\n " + "callback": "combine_callbacks(mimic_kg_rate, mimic_rate_cv)" }, { "ids": 30128, From 47497a9bdd9e4e09c02cde3d6b2db996c1e63c91 Mon Sep 17 00:00:00 2001 From: Manuel Burger Date: Sun, 31 Mar 2024 20:11:41 +0200 Subject: [PATCH 47/48] Fix `susp_inf_abx_cont` calls --- R/callback-sep3.R | 10 +++++----- inst/extdata/config/concept-dict/outcome.json | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/R/callback-sep3.R b/R/callback-sep3.R index cd35c67a..2effb004 100644 --- a/R/callback-sep3.R +++ b/R/callback-sep3.R @@ -528,7 +528,7 @@ susp_inf_abx_cont <- function(..., abx_count_win = hours(24L), abx_min_count = 1 keep_components = keep_components, interval = interval ) - rename_cols(res, "susp_inf_alt", "susp_inf", by_ref = TRUE) + rename_cols(res, "susp_inf_abx_cont", "susp_inf", by_ref = TRUE) res } @@ -537,13 +537,13 @@ sep3_abx_cont <- function (..., si_window = c("first", "last", "any"), delta_fun sofa_thresh = 2L, si_lwr = hours(48L), si_upr = hours(24L), keep_components = FALSE, interval = NULL) { - cnc <- c("sofa", "susp_inf_alt") + cnc <- c("sofa", "susp_inf_abx_cont") res <- ricu:::collect_dots(cnc, interval, ...) sofa <- res[["sofa"]] - susp <- res[["susp_inf_alt"]] + susp <- res[["susp_inf_abx_cont"]] - # make `susp_inf_alt` look like susp_inf to pass on to the original ricu::sep3 - rename_cols(susp, "susp_inf", "susp_inf_alt", by_ref = TRUE) + # make `susp_inf_abx_cont` look like susp_inf to pass on to the original ricu::sep3 + rename_cols(susp, "susp_inf", "susp_inf_abx_cont", by_ref = TRUE) # pass the rest of the calculations to ricu::susp_inf res <- ricu::sep3( diff --git a/inst/extdata/config/concept-dict/outcome.json b/inst/extdata/config/concept-dict/outcome.json index 56b41191..6316741b 100644 --- a/inst/extdata/config/concept-dict/outcome.json +++ b/inst/extdata/config/concept-dict/outcome.json @@ -558,7 +558,7 @@ "class": "rec_cncpt" }, "sep3_abx_cont": { - "concepts": ["sofa", "susp_inf_alt"], + "concepts": ["sofa", "susp_inf_abx_cont"], "description": "sepsis-3 criterion (alt. def.: continuous abx only)", "category": "outcome", "callback": "sep3_abx_cont", From 3f0f229da0401e805155a4b673373b8ed3638804 Mon Sep 17 00:00:00 2001 From: Manuel Burger Date: Sun, 31 Mar 2024 20:18:42 +0200 Subject: [PATCH 48/48] Add `patient_id` --- inst/extdata/config/concept-dict/misc.json | 35 ++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/inst/extdata/config/concept-dict/misc.json b/inst/extdata/config/concept-dict/misc.json index 597b8d32..2cbc07f1 100644 --- a/inst/extdata/config/concept-dict/misc.json +++ b/inst/extdata/config/concept-dict/misc.json @@ -57,6 +57,20 @@ "description": "Patient Identifier", "target": "id_tbl", "sources": { + "aumc": [ + { + "table": "admissions", + "val_var": "patientid", + "class": "col_itm" + } + ], + "mimic_demo": [ + { + "table": "admissions", + "val_var": "subject_id", + "class": "col_itm" + } + ], "mimic": [ { "table": "admissions", @@ -78,12 +92,33 @@ "class": "col_itm" } ], + "eicu_demo": [ + { + "table": "patient", + "val_var": "patientunitstayid", + "class": "col_itm" + } + ], "eicu": [ { "table": "patient", "val_var": "patientunitstayid", "class": "col_itm" } + ], + "picdb": [ + { + "table": "icustays", + "val_var": "subject_id", + "class": "col_itm" + } + ], + "sic": [ + { + "table": "cases", + "val_var": "patientid", + "class": "col_itm" + } ] } }