Merge pull request #61 from OHDSI/53-add-omop-cdm-documentation-and-t…

…ests-for-final-omop-models-1 feat: Add final OMOP models yaml definitions with associated dbt style tests
OHDSI · Sep 29, 2024 · 1285fb5 · 1285fb5
2 parents 5561586 + 11fbbdb
commit 1285fb5
Show file tree

Hide file tree

Showing 87 changed files with 4,213 additions and 959 deletions.
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -25,4 +25,5 @@
             "packages.yml"
         ]
     },
+    "dbt.enableNewLineagePanel": true,
 }
diff --git a/dbt_project.yml b/dbt_project.yml
@@ -26,9 +26,6 @@ seeds:
     map:
       +enabled: true
       +schema: map_seeds
-    omop:
-      +enabled: true
-      +schema: omop_seeds
     vocabulary:
       +enabled: true
       +schema: vocab_seeds

diff --git a/models/omop/_models/care_site.yml b/models/omop/_models/care_site.yml
@@ -0,0 +1,39 @@
+models:
+  - name: care_site
+    description: The CARE_SITE table contains a list of uniquely identified institutional (physical or
+      organizational) units where healthcare delivery is practiced (offices, wards, hospitals, clinics,
+      etc.).
+    columns:
+      - name: care_site_id
+        description: ''
+        data_type: integer
+        tests:
+          - not_null
+          - unique
+      - name: care_site_name
+        description: The name of the care_site as it appears in the source data
+        data_type: varchar(255)
+      - name: place_of_service_concept_id
+        description: This is a high-level way of characterizing a Care Site. Typically,however, Care Sites
+          can provide care in multiple settings (inpatient,outpatient, etc.) and this granularity should
+          be reflected in the visit.
+        data_type: integer
+        tests:
+          - relationships:
+              to: ref('concept')
+              field: concept_id
+      - name: location_id
+        description: The location_id from the LOCATION table representing the physicallocation of the
+          care_site.
+        data_type: integer
+        tests:
+          - relationships:
+              to: ref('location')
+              field: location_id
+      - name: care_site_source_value
+        description: The identifier of the care_site as it appears in the source data. Thiscould be an
+          identifier separate from the name of the care_site.
+        data_type: varchar(50)
+      - name: place_of_service_source_value
+        description: ''
+        data_type: varchar(50)
diff --git a/models/omop/_models/cdm_source.yml b/models/omop/_models/cdm_source.yml
@@ -0,0 +1,56 @@
+models:
+  - name: cdm_source
+    description: The CDM_SOURCE table contains detail about the source database and the process used to
+      transform the data into the OMOP Common Data Model.
+    columns:
+      - name: cdm_source_name
+        description: The name of the CDM instance.
+        data_type: varchar(255)
+        tests:
+          - not_null
+      - name: cdm_source_abbreviation
+        description: The abbreviation of the CDM instance.
+        data_type: varchar(25)
+        tests:
+          - not_null
+      - name: cdm_holder
+        description: The holder of the CDM instance.
+        data_type: varchar(255)
+        tests:
+          - not_null
+      - name: source_description
+        description: The description of the CDM instance.
+        data_type: varchar(MAX)
+      - name: source_documentation_reference
+        description: ''
+        data_type: varchar(255)
+      - name: cdm_etl_reference
+        description: ''
+        data_type: varchar(255)
+      - name: source_release_date
+        description: The date the data was extracted from the source system. In some systemsthat is the
+          same as the date the ETL was run. Typically the latest evendate in the source is on the source_release_date.
+        data_type: date
+        tests:
+          - not_null
+      - name: cdm_release_date
+        description: The date the ETL script was completed. Typically this is after thesource_release_date.
+        data_type: date
+        tests:
+          - not_null
+      - name: cdm_version
+        description: Version of the OMOP CDM used as string. e.g. v5.4
+        data_type: varchar(10)
+      - name: cdm_version_concept_id
+        description: The Concept Id representing the version of the CDM.
+        data_type: integer
+        tests:
+          - not_null
+          - relationships:
+              to: ref('concept')
+              field: concept_id
+      - name: vocabulary_version
+        description: Version of the OMOP standardised vocabularies loaded
+        data_type: varchar(20)
+        tests:
+          - not_null
diff --git a/models/omop/_models/concept.yml b/models/omop/_models/concept.yml
@@ -0,0 +1,82 @@
+models:
+  - name: concept
+    description: The Standardized Vocabularies contains records, or Concepts, that uniquely identify each
+      fundamental unit of meaning used to express clinical information in all domain tables of the CDM.
+      Concepts are derived from vocabularies, which represent clinical information across a domain (e.g. conditions,
+      drugs, procedures) through the use of codes and associated descriptions. Some Concepts are designated
+      Standard Concepts, meaning these Concepts can be used as normative expressions of a clinical entity
+      within the OMOP Common Data Model and standardized analytics. Each Standard Concept belongs to one
+      Domain, which defines the location where the Concept would be expected to occur within the data
+      tables of the CDM. Concepts can represent broad categories (‘Cardiovascular disease’), detailed
+      clinical elements (‘Myocardial infarction of the anterolateral wall’), or modifying characteristics
+      and attributes that define Concepts at various levels of detail (severity of a disease, associated
+      morphology, etc.). Records in the Standardized Vocabularies tables are derived from national or
+      international vocabularies such as SNOMED-CT, RxNorm, and LOINC, or custom OMOP Concepts defined
+      to cover various aspects of observational data analysis.
+    columns:
+      - name: concept_id
+        description: A unique identifier for each Concept across all domains.
+        data_type: integer
+        tests:
+          - not_null
+          - unique
+      - name: concept_name
+        description: An unambiguous, meaningful and descriptive name for the Concept.
+        data_type: varchar(255)
+        tests:
+          - not_null
+      - name: domain_id
+        description: A foreign key to the DOMAINtable the Concept belongs to.
+        data_type: varchar(20)
+        tests:
+          - not_null
+          - relationships:
+              to: ref('domain')
+              field: domain_id
+      - name: vocabulary_id
+        description: A foreign key to the VOCABULARYtable indicating from which source the Concept has
+          been adapted.
+        data_type: varchar(20)
+        tests:
+          - not_null
+          - relationships:
+              to: ref('vocabulary')
+              field: vocabulary_id
+      - name: concept_class_id
+        description: The attribute or concept class of the Concept. Examples are ‘ClinicalDrug’, ‘Ingredient’,
+          ‘Clinical Finding’ etc.
+        data_type: varchar(20)
+        tests:
+          - not_null
+          - relationships:
+              to: ref('concept_class')
+              field: concept_class_id
+      - name: standard_concept
+        description: This flag determines where a Concept is a Standard Concept, i.e. is usedin the data,
+          a Classification Concept, or a non-standard Source Concept.The allowable values are ‘S’ (Standard
+          Concept) and ‘C’ (ClassificationConcept), otherwise the content is NULL.
+        data_type: varchar(1)
+      - name: concept_code
+        description: The concept code represents the identifier of the Concept in the sourcevocabulary,
+          such as SNOMED-CT concept IDs, RxNorm RXCUIs etc. Note thatconcept codes are not unique across
+          vocabularies.
+        data_type: varchar(50)
+        tests:
+          - not_null
+      - name: valid_start_date
+        description: The date when the Concept was first recorded. The default value is1-Jan-1970, meaning,
+          the Concept has no (known) date of inception.
+        data_type: date
+        tests:
+          - not_null
+      - name: valid_end_date
+        description: The date when the Concept became invalid because it was deleted orsuperseded (updated)
+          by a new concept. The default value is 31-Dec-2099,meaning, the Concept is valid until it becomes
+          deprecated.
+        data_type: date
+        tests:
+          - not_null
+      - name: invalid_reason
+        description: Reason the Concept was invalidated. Possible values are D (deleted), U(replaced with
+          an update) or NULL when valid_end_date has the defaultvalue.
+        data_type: varchar(1)
diff --git a/models/omop/_models/concept_ancestor.yml b/models/omop/_models/concept_ancestor.yml
@@ -0,0 +1,39 @@
+models:
+  - name: concept_ancestor
+    description: The CONCEPT_ANCESTOR table is designed to simplify observational analysis by providing
+      the complete hierarchical relationships between Concepts. Only direct parent-child relationships
+      between Concepts are stored in the CONCEPT_RELATIONSHIP table. To determine higher level ancestry
+      connections, all individual direct relationships would have to be navigated at analysis time. The
+      CONCEPT_ANCESTOR table includes records for all parent-child relationships, as well as grandparent-grandchild
+      relationships and those of any other level of lineage. Using the CONCEPT_ANCESTOR table allows for
+      querying for all descendants of a hierarchical concept. For example, drug ingredients and drug products
+      are all descendants of a drug class ancestor.
+    columns:
+      - name: ancestor_concept_id
+        description: The Concept Id for the higher-level concept that forms the ancestor inthe relationship.
+        data_type: integer
+        tests:
+          - not_null
+          - relationships:
+              to: ref('concept')
+              field: concept_id
+      - name: descendant_concept_id
+        description: The Concept Id for the lower-level concept that forms the descendant inthe relationship.
+        data_type: integer
+        tests:
+          - not_null
+          - relationships:
+              to: ref('concept')
+              field: concept_id
+      - name: min_levels_of_separation
+        description: The minimum separation in number of levels of hierarchy between ancestorand descendant
+          concepts. This is an attribute that is used to simplifyhierarchic analysis.
+        data_type: integer
+        tests:
+          - not_null
+      - name: max_levels_of_separation
+        description: The maximum separation in number of levels of hierarchy between ancestorand descendant
+          concepts. This is an attribute that is used to simplifyhierarchic analysis.
+        data_type: integer
+        tests:
+          - not_null
diff --git a/models/omop/_models/concept_class.yml b/models/omop/_models/concept_class.yml
@@ -0,0 +1,28 @@
+models:
+  - name: concept_class
+    description: The CONCEPT_CLASS table includes semantic categories that reference the source structure
+      of each Vocabulary. Concept Classes represent so-called horizontal (e.g. MedDRA, RxNorm) or vertical
+      levels (e.g. SNOMED) of the vocabulary structure. Vocabularies without any Concept Classes, such
+      as HCPCS, use the vocabulary_id as the Concept Class. This reference table is populated with a single
+      record for each Concept Class, which includes a Concept Class ID and a fully specified Concept Class
+      name.
+    columns:
+      - name: concept_class_id
+        description: A unique key for each class.
+        data_type: varchar(20)
+        tests:
+          - not_null
+          - unique
+      - name: concept_class_name
+        description: The name describing the Concept Class, e.g. Clinical Finding,Ingredient, etc.
+        data_type: varchar(255)
+        tests:
+          - not_null
+      - name: concept_class_concept_id
+        description: A Concept that represents the Concept Class.
+        data_type: integer
+        tests:
+          - not_null
+          - relationships:
+              to: ref('concept')
+              field: concept_id
diff --git a/models/omop/_models/concept_relationship.yml b/models/omop/_models/concept_relationship.yml
@@ -0,0 +1,47 @@
+models:
+  - name: concept_relationship
+    description: The CONCEPT_RELATIONSHIP table contains records that define relationships between any
+      two Concepts and the nature or type of the relationship. This table captures various types of relationships,
+      including hierarchical, associative, and other semantic connections, enabling comprehensive analysis
+      and interpretation of clinical concepts. Every kind of relationship is defined in the RELATIONSHIP
+      table.
+    columns:
+      - name: concept_id_1
+        description: ''
+        data_type: integer
+        tests:
+          - not_null
+          - relationships:
+              to: ref('concept')
+              field: concept_id
+      - name: concept_id_2
+        description: ''
+        data_type: integer
+        tests:
+          - not_null
+          - relationships:
+              to: ref('concept')
+              field: concept_id
+      - name: relationship_id
+        description: The relationship between CONCEPT_ID_1 and CONCEPT_ID_2. Please see theVocabularyConventions.
+          for more information.
+        data_type: varchar(20)
+        tests:
+          - not_null
+          - relationships:
+              to: ref('relationship')
+              field: relationship_id
+      - name: valid_start_date
+        description: The date when the relationship is first recorded.
+        data_type: date
+        tests:
+          - not_null
+      - name: valid_end_date
+        description: The date when the relationship is invalidated.
+        data_type: date
+        tests:
+          - not_null
+      - name: invalid_reason
+        description: Reason the relationship was invalidated. Possible values are ‘D’(deleted), ‘U’ (updated)
+          or NULL.
+        data_type: varchar(1)
diff --git a/models/omop/_models/concept_synonym.yml b/models/omop/_models/concept_synonym.yml
@@ -0,0 +1,25 @@
+models:
+  - name: concept_synonym
+    description: The CONCEPT_SYNONYM table is used to store alternate names and descriptions for Concepts.
+    columns:
+      - name: concept_id
+        description: ''
+        data_type: integer
+        tests:
+          - not_null
+          - relationships:
+              to: ref('concept')
+              field: concept_id
+      - name: concept_synonym_name
+        description: ''
+        data_type: varchar(1000)
+        tests:
+          - not_null
+      - name: language_concept_id
+        description: ''
+        data_type: integer
+        tests:
+          - not_null
+          - relationships:
+              to: ref('concept')
+              field: concept_id
diff --git a/models/omop/_models/condition_era.yml b/models/omop/_models/condition_era.yml
@@ -0,0 +1,47 @@
+models:
+  - name: condition_era
+    description: 'A Condition Era is defined as a span of time when the Person is assumed to have a given
+      condition. Similar to Drug Eras, Condition Eras are chronological periods of Condition Occurrence
+      and every Condition Occurrence record should be part of a Condition Era. Combining individual Condition
+      Occurrences into a single Condition Era serves two purposes:'
+    columns:
+      - name: condition_era_id
+        description: ''
+        data_type: integer
+        tests:
+          - not_null
+          - unique
+      - name: person_id
+        description: ''
+        data_type: integer
+        tests:
+          - not_null
+          - relationships:
+              to: ref('person')
+              field: person_id
+      - name: condition_concept_id
+        description: The Concept Id representing the Condition.
+        data_type: integer
+        tests:
+          - not_null
+          - dbt_utils.relationships_where:
+              to: ref('concept')
+              field: concept_id
+              from_condition: condition_concept_id <> 0
+              to_condition: domain_id = 'Condition'
+      - name: condition_era_start_date
+        description: The start date for the Condition Era constructed from the individualinstances of
+          Condition Occurrences. It is the start date of the veryfirst chronologically recorded instance
+          of the condition with at least31 days since any prior record of the same Condition.
+        data_type: date
+        tests:
+          - not_null
+      - name: condition_era_end_date
+        description: The end date for the Condition Era constructed from the individualinstances of Condition
+          Occurrences. It is the end date of the finalcontinuously recorded instance of the Condition.
+        data_type: date
+        tests:
+          - not_null
+      - name: condition_occurrence_count
+        description: The number of individual Condition Occurrences used to construct thecondition era.
+        data_type: integer
-Original file line number
+Diff line change
@@ Expand Up / @@ -25,4 +25,5 @@ @@
                 "packages.yml"
             ]
         },
+        "dbt.enableNewLineagePanel": true,
     }