Skip to content

Commit

Permalink
Add support for synthea v3.3 and various fixes (#202)
Browse files Browse the repository at this point in the history
* fixes for duckdb support
* synthea v3.3.0 support (addition of icd10 codes to condition_occurrence logic)
* update of LoadFromVocabCSV.R function to accept a delimiter #198 

Co-authored-by: Frank DeFalco <[email protected]>
  • Loading branch information
burrowse and fdefalco authored Oct 4, 2024
1 parent 48c33f8 commit e59d1c7
Show file tree
Hide file tree
Showing 76 changed files with 583 additions and 187 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
output/*.sql
output/*.txt
.DS_Store
.Rproj.user
errorReport.txt
Expand Down
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: ETLSyntheaBuilder
Type: Package
Title: A Builder for Converting the Synthea Data to the OMOP CDM
Version: 2.0
Version: 2.1
mDate: 2021-12-04
Author: Anthony Molinaro [aut, cre],
Clair Blacketer [aut],
Expand All @@ -28,7 +28,7 @@ Remotes:
OHDSI/CommonDataModel
Encoding: UTF-8
LazyData: true
RoxygenNote: 7.2.3
RoxygenNote: 7.3.1
Suggests:
rmarkdown,
knitr,
Expand Down
6 changes: 3 additions & 3 deletions R/CreateMapAndRollupTables.r
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
#' so for example 'cdm_instance.dbo'.
#' @param cdmVersion The version of your CDM. Currently "5.3" and "5.4".
#' @param syntheaVersion The version of Synthea used to generate the csv files.
#' Currently "2.7.0", "3.0.0", "3.1.0" and "3.2.0" are supported.
#' Currently "2.7.0", "3.0.0", "3.1.0", "3.2.0" and "3.3.0" are supported.
#' @param cdmSourceName The source name to insert into the CDM_SOURCE table. Default is Synthea synthetic health database.
#' @param cdmSourceAbbreviation The source abbreviation to insert into the CDM_SOURCE table. Default is Synthea.
#' @param cdmHolder The holder to insert into the CDM_SOURCE table. Default is OHDSI
Expand All @@ -46,11 +46,11 @@ CreateMapAndRollupTables <- function(connectionDetails,
stop("Unsupported CDM specified. Supported CDM versions are \"5.3\" and \"5.4\".")
}

supportedSyntheaVersions <- c("2.7.0", "3.0.0", "3.1.0", "3.2.0")
supportedSyntheaVersions <- c("2.7.0", "3.0.0", "3.1.0", "3.2.0", "3.3.0")

if (!(syntheaVersion %in% supportedSyntheaVersions))
stop(
"Invalid Synthea version specified. Currently \"2.7.0\", \"3.0.0\",\"3.1.0\", and \"3.2.0\" are supported."
"Invalid Synthea version specified. Currently \"2.7.0\", \"3.0.0\",\"3.1.0\", \"3.2.0\", and \"3.3.0\" are supported."
)

# Create Vocabulary mapping tables
Expand Down
6 changes: 4 additions & 2 deletions R/CreateSyntheaTables.r
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
#' Server, this should specify both the database and the schema,
#' so for example 'cdm_instance.dbo'.
#' @param syntheaVersion The version of Synthea used to generate the csv files.
#' Currently "2.7.0", "3.0.0", "3.1.0", and "3.2.0" are is supported.
#' Currently "2.7.0", "3.0.0", "3.1.0", "3.2.0" and "3.3.0" are is supported.
#'
#'@export

Expand All @@ -29,9 +29,11 @@ CreateSyntheaTables <-
sqlFilePath <- "synthea_version/v310"
else if (syntheaVersion == "3.2.0")
sqlFilePath <- "synthea_version/v320"
else if (syntheaVersion == "3.3.0")
sqlFilePath <- "synthea_version/v330"
else
stop(
"Invalid synthea version specified. Currently \"2.7.0\", \"3.0.0\", \"3.1.0\" and \"3.2.0\" are supported."
"Invalid synthea version specified. Currently \"2.7.0\", \"3.0.0\", \"3.1.0\" , \"3.2.0\" and \"3.3.0\" are supported."
)

sqlFilename <-
Expand Down
11 changes: 6 additions & 5 deletions R/LoadEventTables.r
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
#' so for example 'cdm_instance.dbo'.
#' @param cdmVersion The version of your CDM. Currently "5.3" and "5.4".
#' @param syntheaVersion The version of Synthea used to generate the csv files.
#' Currently "2.7.0","3.0.0","3.1.0" and "3.2.0" are supported.
#' Currently "2.7.0","3.0.0","3.1.0","3.2.0" and "3.3.0" are supported.
#' @param cdmSourceName The source name to insert into the CDM_SOURCE table. Default is Synthea synthetic health database.
#' @param cdmSourceAbbreviation The source abbreviation to insert into the CDM_SOURCE table. Default is Synthea.
#' @param cdmHolder The holder to insert into the CDM_SOURCE table. Default is OHDSI
Expand Down Expand Up @@ -51,11 +51,11 @@ LoadEventTables <- function(connectionDetails,
stop("Unsupported CDM specified. Supported CDM versions are \"5.3\" and \"5.4\".")
}

supportedSyntheaVersions <- c("2.7.0", "3.0.0", "3.1.0", "3.2.0")
supportedSyntheaVersions <- c("2.7.0", "3.0.0", "3.1.0", "3.2.0", "3.3.0")

if (!(syntheaVersion %in% supportedSyntheaVersions))
stop(
"Invalid Synthea version specified. Currently \"2.7.0\", \"3.0.0\",\"3.1.0\", and \"3.2.0\" are supported."
"Invalid Synthea version specified. Currently \"2.7.0\", \"3.0.0\",\"3.1.0\",\"3.2.0\", and \"3.3.0\" are supported."
)

if (createIndices) {
Expand Down Expand Up @@ -177,7 +177,8 @@ LoadEventTables <- function(connectionDetails,
packageName = "ETLSyntheaBuilder",
dbms = connectionDetails$dbms,
cdm_schema = cdmSchema,
synthea_schema = syntheaSchema
synthea_schema = syntheaSchema,
synthea_version = syntheaVersion
)
runStep(sql, fileQuery)

Expand Down Expand Up @@ -299,7 +300,7 @@ LoadEventTables <- function(connectionDetails,
# cost
if (syntheaVersion == "2.7.0")
fileQuery <- "insert_cost_v270.sql"
else if (syntheaVersion %in% c("3.0.0", "3.1.0", "3.2.0"))
else if (syntheaVersion %in% c("3.0.0", "3.1.0", "3.2.0", "3.3.0"))
fileQuery <- "insert_cost_v300.sql"

sql <- SqlRender::loadRenderTranslateSql(
Expand Down
8 changes: 5 additions & 3 deletions R/LoadVocabFromCsv.r
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,11 @@
#' \code{DatabaseConnector} package.
#' @param cdmSchema The name of the database schema that will contain the Vocabulary (and CDM)
#' tables. Requires read and write permissions to this database. On SQL
#' Server, this should specifiy both the database and the schema,
#' Server, this should specify both the database and the schema,
#' so for example 'cdm_instance.dbo'.
#' @param vocabFileLoc The location of the vocabulary csv files.
#' @param bulkLoad Boolean flag indicating whether or not to use bulk loading (if possible). Default is FALSE.
#' @param delimiter Parameter to specify the delimiter of the csv file. By default the function expects the file to be tab delimited ("\\t") based on the export from Athena.
#'
#'@export

Expand All @@ -21,7 +22,8 @@ LoadVocabFromCsv <-
function(connectionDetails,
cdmSchema,
vocabFileLoc,
bulkLoad = FALSE)
bulkLoad = FALSE,
delimiter = "\t")
{
csvList <- c(
"concept.csv",
Expand Down Expand Up @@ -50,7 +52,7 @@ LoadVocabFromCsv <-
file = paste0(vocabFileLoc, "/", csv),
stringsAsFactors = FALSE,
header = TRUE,
sep = "\t",
sep = delimiter,
na.strings = ""
)

Expand Down
4 changes: 2 additions & 2 deletions R/createExtraIndices.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,13 @@
#' Server, this should specify both the database and the schema, so for example 'cdm_instance.dbo'.
#' @param syntheaSchema The name of the Synthea database schema. Requires read and write permissions to this schema. On SQL
#' Server, this should specify both the database and the schema, so for example 'synthea.dbo'.
#' @param syntheaVersion Your Synthea version. Currently "2.7.0", "3.0.0", "3.1.0" and "3.2.0" are supported.
#' @param syntheaVersion Your Synthea version. Currently "2.7.0", "3.0.0", "3.1.0", "3.2.0" and "3.3.0" are supported.
#' @param outputFolder Location of the SQL scripts if sqlOnly = TRUE. Default is NULL.
#' @param sqlOnly A boolean that determines whether to create the indices or generate a SQL scripts. Default is FALSE.
#'
#' @details This function creates indices which have been found to speed up certain long-running INSERT queries in LoadEventTables,
#' for some users. Indices are created on the intermediate vocabulary mapping tables; the person & provider CDM tables;
#' and the claims_transactions Synthea table (in Synthea 3.0.0, 3.1.0 and 3.2.0) .
#' and the claims_transactions Synthea table (in Synthea 3.0.0, 3.1.0, 3.2.0 and 3.3.0) .
#'
#' @importFrom utils head
#'
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ Follow the steps on the [synthea wiki](https://github.com/synthetichealth/synthe
# Valid CDM versions are determined by executing CommonDataModel::listSupportedVersions().
# The strings representing supported CDM versions are currently "5.3" and "5.4".
# The Synthea version we use in this example is 2.7.0.
# However, at this time we also support 3.0.0, 3.1.0 and 3.2.0.
# However, at this time we also support 3.0.0, 3.1.0, 3.2.0 and 3.3.0.
# Please note that Synthea's MASTER branch is always active and this package will be updated to support
# future versions as possible.
# The schema to load the Synthea tables is called "native".
Expand Down
2 changes: 1 addition & 1 deletion docs/404.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion docs/articles/care_site.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion docs/articles/condition_occurrence.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion docs/articles/cost.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion docs/articles/death.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion docs/articles/device_exposure.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion docs/articles/drug_exposure.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion docs/articles/index.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion docs/articles/location.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion docs/articles/measurement.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion docs/articles/observation.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion docs/articles/observation_period.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion docs/articles/payer_plan_period.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion docs/articles/person.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion docs/articles/procedure_occurrence.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion docs/articles/provider.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion docs/articles/visit_detail.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion docs/articles/visit_occurrence.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions docs/authors.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit e59d1c7

Please sign in to comment.