From beb360281bac54070e8dfa769f57b1e58f15ef47 Mon Sep 17 00:00:00 2001 From: Anna Tsvetkova Date: Mon, 1 Feb 2021 19:44:44 +0300 Subject: [PATCH] waveform POC-2; bugfixes --- README.md | 6 + custom_mapping_csv/gcpt_meas_lab_loinc.csv | 9 - etl/etl/cdm_measurement.sql | 3 +- etl/etl/cdm_observation.sql | 8 +- etl/etl/cdm_procedure_occurrence.sql | 2 +- etl/etl/lk_drug.sql | 2 + etl/etl/lk_meas_chartevents.sql | 2 +- etl/etl/lk_meas_waveform.sql | 9 +- .../{st_waveform.sql => st_waveform_poc1.sql} | 0 etl/staging/st_waveform_poc2.sql | 164 ++++ scripts/wf_read.py | 263 +++++ test/metrics_gen/me_gen_queries_from_conf.py | 4 +- .../me_mapping_rate_from_conf.bak.sql | 920 ++++++++++++++++++ .../metrics_gen/me_mapping_rate_from_conf.sql | 70 +- test/ut/README.md | 7 + test/ut/gen_bq_ut_basic.py | 49 + vocabulary_refresh/z_refresh_results.sql | 12 + 17 files changed, 1476 insertions(+), 54 deletions(-) rename etl/staging/{st_waveform.sql => st_waveform_poc1.sql} (100%) create mode 100644 etl/staging/st_waveform_poc2.sql create mode 100644 scripts/wf_read.py create mode 100644 test/metrics_gen/me_mapping_rate_from_conf.bak.sql diff --git a/README.md b/README.md index 4803477..632afc8 100644 --- a/README.md +++ b/README.md @@ -31,6 +31,12 @@ ### Change Log (latest first) ### +**2021-02-01** + +* Waveforms POC-2 (load from folders tree and csv files) +* Bugfixes + + **2021-01-25** * Mapping improvement diff --git a/custom_mapping_csv/gcpt_meas_lab_loinc.csv b/custom_mapping_csv/gcpt_meas_lab_loinc.csv index 8fe59cd..3f83029 100644 --- a/custom_mapping_csv/gcpt_meas_lab_loinc.csv +++ b/custom_mapping_csv/gcpt_meas_lab_loinc.csv @@ -82,7 +82,6 @@ "Ventilator",2000001081,"mimiciv_meas_lab_loinc","Measurement","Lab Test",,"Ventilator","1970-01-01","2099-12-31",,3004921,"Maps to","Mapped from","1970-01-01","2099-12-31", "Intubated",2000001082,"mimiciv_meas_lab_loinc","Measurement","Lab Test",,"Intubated","1970-01-01","2099-12-31",,45884415,"Maps to","Mapped from","1970-01-01","2099-12-31", "Alveolar-arterial Gradient",2000001083,"mimiciv_meas_lab_loinc","Measurement","Lab Test",,"Alveolar-arterial Gradient","1970-01-01","2099-12-31",,3007913,"Maps to","Mapped from","1970-01-01","2099-12-31", -"Calculated Bicarbonate, Whole Blood",2000001084,"mimiciv_meas_lab_loinc","Measurement","Lab Test",,"Calculated Bicarbonate, Whole Blood","1970-01-01","2099-12-31",,0,"Maps to","Mapped from","1970-01-01","2099-12-31", "Chloride, Whole Blood",2000001085,"mimiciv_meas_lab_loinc","Measurement","Lab Test",,"Chloride, Whole Blood","1970-01-01","2099-12-31",,3018572,"Maps to","Mapped from","1970-01-01","2099-12-31", "Free Calcium",2000001086,"mimiciv_meas_lab_loinc","Measurement","Lab Test",,"Free Calcium","1970-01-01","2099-12-31",,3021119,"Maps to","Mapped from","1970-01-01","2099-12-31", "Glucose",2000001087,"mimiciv_meas_lab_loinc","Measurement","Lab Test",,"Glucose","1970-01-01","2099-12-31",,3000483,"Maps to","Mapped from","1970-01-01","2099-12-31", @@ -92,7 +91,6 @@ "Oxygen",2000001091,"mimiciv_meas_lab_loinc","Measurement","Lab Test",,"Oxygen","1970-01-01","2099-12-31",,3024882,"Maps to","Mapped from","1970-01-01","2099-12-31", "Oxygen Saturation",2000001092,"mimiciv_meas_lab_loinc","Measurement","Lab Test",,"Oxygen Saturation","1970-01-01","2099-12-31",,3013502,"Maps to","Mapped from","1970-01-01","2099-12-31", "PEEP",2000001093,"mimiciv_meas_lab_loinc","Measurement","Lab Test",,"PEEP","1970-01-01","2099-12-31",,3022875,"Maps to","Mapped from","1970-01-01","2099-12-31", -"Calculated Bicarbonate, Whole Blood",2000001094,"mimiciv_meas_lab_loinc","Measurement","Lab Test",,"Calculated Bicarbonate, Whole Blood","1970-01-01","2099-12-31",,3005456,"Maps to","Mapped from","1970-01-01","2099-12-31", "Sodium, Whole Blood",2000001095,"mimiciv_meas_lab_loinc","Measurement","Lab Test",,"Sodium, Whole Blood","1970-01-01","2099-12-31",,3000285,"Maps to","Mapped from","1970-01-01","2099-12-31", "Tidal Volume",2000001096,"mimiciv_meas_lab_loinc","Measurement","Lab Test",,"Tidal Volume","1970-01-01","2099-12-31",,3012410,"Maps to","Mapped from","1970-01-01","2099-12-31", "Barbiturate Screen",2000001097,"mimiciv_meas_lab_loinc","Measurement","Lab Test",,"Barbiturate Screen","1970-01-01","2099-12-31",,3003132,"Maps to","Mapped from","1970-01-01","2099-12-31", @@ -105,7 +103,6 @@ "Osmolality, Measured",2000001104,"mimiciv_meas_lab_loinc","Measurement","Lab Test",,"Osmolality, Measured","1970-01-01","2099-12-31",,3008295,"Maps to","Mapped from","1970-01-01","2099-12-31", "Protein Electrophoresis",2000001105,"mimiciv_meas_lab_loinc","Measurement","Lab Test",,"Protein Electrophoresis","1970-01-01","2099-12-31",,3004588,"Maps to","Mapped from","1970-01-01","2099-12-31", "Tricyclic Antidepressant Screen",2000001106,"mimiciv_meas_lab_loinc","Measurement","Lab Test",,"Tricyclic Antidepressant Screen","1970-01-01","2099-12-31",,3025478,"Maps to","Mapped from","1970-01-01","2099-12-31", -"Pleural fluid lactate dehydrogenase measurement",2000001107,"mimiciv_meas_lab_loinc","Measurement","Lab Test",,"Lactate Dehydrogenase, Pleural","1970-01-01","2099-12-31",,4135658,"Maps to","Mapped from","1970-01-01","2099-12-31", "Total Protein, Pleural",2000001108,"mimiciv_meas_lab_loinc","Measurement","Lab Test",,"Total Protein, Pleural","1970-01-01","2099-12-31",,3003434,"Maps to","Mapped from","1970-01-01","2099-12-31", "Amphetamine Screen, Urine",2000001109,"mimiciv_meas_lab_loinc","Measurement","Lab Test",,"Amphetamine Screen, Urine","1970-01-01","2099-12-31",,3027944,"Maps to","Mapped from","1970-01-01","2099-12-31", "Benzodiazepine Screen, Urine",2000001110,"mimiciv_meas_lab_loinc","Measurement","Lab Test",,"Benzodiazepine Screen, Urine","1970-01-01","2099-12-31",,3000764,"Maps to","Mapped from","1970-01-01","2099-12-31", @@ -116,18 +113,14 @@ "Opiate Screen, Urine",2000001115,"mimiciv_meas_lab_loinc","Measurement","Lab Test",,"Opiate Screen, Urine","1970-01-01","2099-12-31",,3027008,"Maps to","Mapped from","1970-01-01","2099-12-31", "Osmolality, Urine",2000001116,"mimiciv_meas_lab_loinc","Measurement","Lab Test",,"Osmolality, Urine","1970-01-01","2099-12-31",,3026782,"Maps to","Mapped from","1970-01-01","2099-12-31", "Urea Nitrogen, Urine",2000001117,"mimiciv_meas_lab_loinc","Measurement","Lab Test",,"Urea Nitrogen, Urine","1970-01-01","2099-12-31",,3011965,"Maps to","Mapped from","1970-01-01","2099-12-31", -"Lymphocytes",2000001118,"mimiciv_meas_lab_loinc","Measurement","Lab Test",,"Lymphocytes","1970-01-01","2099-12-31",,3004437,"Maps to","Mapped from","1970-01-01","2099-12-31", "Macrophage",2000001119,"mimiciv_meas_lab_loinc","Measurement","Lab Test",,"Macrophage","1970-01-01","2099-12-31",,3041635,"Maps to","Mapped from","1970-01-01","2099-12-31", "Monocytes",2000001120,"mimiciv_meas_lab_loinc","Measurement","Lab Test",,"Monocytes","1970-01-01","2099-12-31",,3033483,"Maps to","Mapped from","1970-01-01","2099-12-31", -"Polys",2000001121,"mimiciv_meas_lab_loinc","Measurement","Lab Test",,"Polys","1970-01-01","2099-12-31",,3008838,"Maps to","Mapped from","1970-01-01","2099-12-31", "RBC, Ascites",2000001122,"mimiciv_meas_lab_loinc","Measurement","Lab Test",,"RBC, Ascites","1970-01-01","2099-12-31",,3009613,"Maps to","Mapped from","1970-01-01","2099-12-31", "Fibrinogen, Functional",2000001123,"mimiciv_meas_lab_loinc","Measurement","Lab Test",,"Fibrinogen, Functional","1970-01-01","2099-12-31",,3016407,"Maps to","Mapped from","1970-01-01","2099-12-31", "Granulocyte Count",2000001124,"mimiciv_meas_lab_loinc","Measurement","Lab Test",,"Granulocyte Count","1970-01-01","2099-12-31",,3035715,"Maps to","Mapped from","1970-01-01","2099-12-31", "Reticulocyte Count, Absolute",2000001125,"mimiciv_meas_lab_loinc","Measurement","Lab Test",,"Reticulocyte Count, Absolute","1970-01-01","2099-12-31",,3023520,"Maps to","Mapped from","1970-01-01","2099-12-31", "Reticulocyte Count, Automated",2000001126,"mimiciv_meas_lab_loinc","Measurement","Lab Test",,"Reticulocyte Count, Automated","1970-01-01","2099-12-31",,3007124,"Maps to","Mapped from","1970-01-01","2099-12-31", -"Lymphocytes",2000001127,"mimiciv_meas_lab_loinc","Measurement","Lab Test",,"Lymphocytes","1970-01-01","2099-12-31",,3005532,"Maps to","Mapped from","1970-01-01","2099-12-31", "Monos",2000001128,"mimiciv_meas_lab_loinc","Measurement","Lab Test",,"Monos","1970-01-01","2099-12-31",,3043387,"Maps to","Mapped from","1970-01-01","2099-12-31", -"Polys",2000001129,"mimiciv_meas_lab_loinc","Measurement","Lab Test",,"Polys","1970-01-01","2099-12-31",,3026051,"Maps to","Mapped from","1970-01-01","2099-12-31", "Blood",2000001130,"mimiciv_meas_lab_loinc","Measurement","Lab Test",,"Blood","1970-01-01","2099-12-31",,3011397,"Maps to","Mapped from","1970-01-01","2099-12-31", "Eosinophils",2000001131,"mimiciv_meas_lab_loinc","Measurement","Lab Test",,"Eosinophils","1970-01-01","2099-12-31",,3037579,"Maps to","Mapped from","1970-01-01","2099-12-31", "Transitional Epithelial Cells",2000001132,"mimiciv_meas_lab_loinc","Measurement","Lab Test",,"Transitional Epithelial Cells","1970-01-01","2099-12-31",,3035851,"Maps to","Mapped from","1970-01-01","2099-12-31", @@ -155,12 +148,10 @@ "Cytomegalovirus DNA [Units/volume] (viral load) in Unspecified specimen by Probe with signal amplification",2000001154,"mimiciv_meas_lab_loinc","Measurement","Lab Test",,"Cytomegalovirus Viral Load","1970-01-01","2099-12-31",,3022489,"Maps to","Mapped from","1970-01-01","2099-12-31", "Hepatitis B virus core IgM Ab [Units/volume] in Serum",2000001155,"mimiciv_meas_lab_loinc","Measurement","Lab Test",,"Hepatitis B Core Antibody, IgM","1970-01-01","2099-12-31",,3018806,"Maps to","Mapped from","1970-01-01","2099-12-31", "Cholesterol in LDL [Moles/volume] in Serum or Plasma",2000001156,"mimiciv_meas_lab_loinc","Measurement","Lab Test",,"Cholesterol, LDL, Measured","1970-01-01","2099-12-31",,3001308,"Maps to","Mapped from","1970-01-01","2099-12-31", -"Lactate dehydrogenase [Enzymatic activity/volume] in Pleural fluid by Pyruvate to lactate reaction",2000001157,"mimiciv_meas_lab_loinc","Measurement","Lab Test",,"Lactate Dehydrogenase, Pleural","1970-01-01","2099-12-31",,40763081,"Maps to","Mapped from","1970-01-01","2099-12-31", "Erythrocytes [#/volume] in Pleural fluid",2000001158,"mimiciv_meas_lab_loinc","Measurement","Lab Test",,"RBC, Pleural","1970-01-01","2099-12-31",,3028308,"Maps to","Mapped from","1970-01-01","2099-12-31", "Glucose [Mass/volume] in Pleural fluid",2000001159,"mimiciv_meas_lab_loinc","Measurement","Lab Test",,"Glucose, Pleural","1970-01-01","2099-12-31",,3003403,"Maps to","Mapped from","1970-01-01","2099-12-31", "Lymphocytes [#/volume] in Blood",2000001160,"mimiciv_meas_lab_loinc","Measurement","Lab Test",,"Absolute Lymphocyte Count","1970-01-01","2099-12-31",,3019198,"Maps to","Mapped from","1970-01-01","2099-12-31", "Erythrocyte shape [Morphology] in Blood",2000001161,"mimiciv_meas_lab_loinc","Measurement","Lab Test",,"RBC Morphology","1970-01-01","2099-12-31",,3014029,"Maps to","Mapped from","1970-01-01","2099-12-31", -"Glomerular filtration rate/1.73 sq M.predicted [Volume Rate/Area] in Serum, Plasma or Blood by Creatinine-based formula (MDRD)",2000001162,"mimiciv_meas_lab_loinc","Measurement","Lab Test",,"Estimated GFR (MDRD equation)","1970-01-01","2099-12-31",,46236952,"Maps to","Mapped from","1970-01-01","2099-12-31", "Potassium [Moles/volume] in Blood",2000001163,"mimiciv_meas_lab_loinc","Measurement","Lab Test",,"Potassium, Whole Blood","1970-01-01","2099-12-31",,3005456,"Maps to","Mapped from","1970-01-01","2099-12-31", "Immature granulocytes [#/volume] in Blood",2000001164,"mimiciv_meas_lab_loinc","Measurement","Lab Test",,"Immature Granulocytes","1970-01-01","2099-12-31",,3040168,"Maps to","Mapped from","1970-01-01","2099-12-31", "Protein [Mass/volume] in Peritoneal fluid",2000001165,"mimiciv_meas_lab_loinc","Measurement","Lab Test",,"Total Protein, Ascites","1970-01-01","2099-12-31",,3002331,"Maps to","Mapped from","1970-01-01","2099-12-31", diff --git a/etl/etl/cdm_measurement.sql b/etl/etl/cdm_measurement.sql index ba1a8c8..8d9773a 100644 --- a/etl/etl/cdm_measurement.sql +++ b/etl/etl/cdm_measurement.sql @@ -24,7 +24,6 @@ -- src_labevents.value: -- investigate if there are formatted values with thousand separators, -- and if we need to use more complicated parsing. --- see `@etl_project`.@etl_dataset.an_labevents_full -- ------------------------------------------------------------------- @@ -261,7 +260,7 @@ SELECT 44807951 AS measurement_type_concept_id, -- ECG - electrocardiograph -- ??? CAST(NULL AS INT64) AS operator_concept_id, src.value_as_number AS value_as_number, - CAST(NULL AS INT64) AS value_as_concept_id, + CAST(NULL AS INT64) AS value_as_concept_id, -- to add values src.unit_concept_id AS unit_concept_id, CAST(NULL AS FLOAT64) AS range_low, CAST(NULL AS FLOAT64) AS range_high, diff --git a/etl/etl/cdm_observation.sql b/etl/etl/cdm_observation.sql index c5e81a8..7265a11 100644 --- a/etl/etl/cdm_observation.sql +++ b/etl/etl/cdm_observation.sql @@ -67,7 +67,9 @@ SELECT src.type_concept_id AS observation_type_concept_id, CAST(NULL AS FLOAT64) AS value_as_number, src.value_as_string AS value_as_string, - src.value_as_concept_id AS value_as_concept_id, + IF(src.value_as_string IS NOT NULL, + COALESCE(src.value_as_concept_id, 0), + NULL) AS value_as_concept_id, CAST(NULL AS INT64) AS qualifier_concept_id, CAST(NULL AS INT64) AS unit_concept_id, CAST(NULL AS INT64) AS provider_id, @@ -110,7 +112,9 @@ SELECT 32817 AS observation_type_concept_id, -- EHR Type Concept Type Concept src.value_as_number AS value_as_number, src.value_source_value AS value_as_string, - src.value_as_concept_id AS value_as_concept_id, + IF(src.value_source_value IS NOT NULL, + COALESCE(src.value_as_concept_id, 0), + NULL) AS value_as_concept_id, CAST(NULL AS INT64) AS qualifier_concept_id, src.unit_concept_id AS unit_concept_id, CAST(NULL AS INT64) AS provider_id, diff --git a/etl/etl/cdm_procedure_occurrence.sql b/etl/etl/cdm_procedure_occurrence.sql index 8c9474d..4fc7f56 100644 --- a/etl/etl/cdm_procedure_occurrence.sql +++ b/etl/etl/cdm_procedure_occurrence.sql @@ -89,7 +89,7 @@ WHERE -- ------------------------------------------------------------------- -- Rule 5 --- lk_observation_mapped +-- lk_observation_mapped, possible DRG codes -- ------------------------------------------------------------------- INSERT INTO `@etl_project`.@etl_dataset.cdm_procedure_occurrence diff --git a/etl/etl/lk_drug.sql b/etl/etl/lk_drug.sql index f963d79..9524163 100644 --- a/etl/etl/lk_drug.sql +++ b/etl/etl/lk_drug.sql @@ -53,6 +53,7 @@ SELECT src.drug, '') || ' ' || COALESCE(src.prod_strength, '') AS gcpt_source_code, 'mimiciv_drug_ndc' AS gcpt_source_vocabulary, -- source_code = label + src.pharmacy_id AS pharmacy_id, -- 'prescriptions' AS unit_id, src.load_table_id AS load_table_id, @@ -173,6 +174,7 @@ SELECT src.route_source_code AS route_source_code, src.dose_unit_source_code AS dose_unit_source_code, src.form_val_disp AS quantity_source_value, + src.pharmacy_id AS pharmacy_id, -- to investigate pharmacy.medication -- CONCAT('drug.', src.unit_id) AS unit_id, src.load_table_id AS load_table_id, diff --git a/etl/etl/lk_meas_chartevents.sql b/etl/etl/lk_meas_chartevents.sql index bd0e40f..19d4b58 100644 --- a/etl/etl/lk_meas_chartevents.sql +++ b/etl/etl/lk_meas_chartevents.sql @@ -138,7 +138,7 @@ LEFT JOIN `@etl_project`.@etl_dataset.lk_chartevents_concept c_value -- values for main ON c_value.source_code = src.value AND c_value.source_vocabulary_id = 'mimiciv_meas_chartevents_value' - AND c_value.target_domain_id = 'Observation' + AND c_value.target_domain_id = 'Meas Value' LEFT JOIN `@etl_project`.@etl_dataset.lk_meas_unit_concept uc ON uc.source_code = src.valueuom diff --git a/etl/etl/lk_meas_waveform.sql b/etl/etl/lk_meas_waveform.sql index 5ee5458..4dd1daf 100644 --- a/etl/etl/lk_meas_waveform.sql +++ b/etl/etl/lk_meas_waveform.sql @@ -7,7 +7,7 @@ -- Populate lookups for cdm_measurement table -- Rule 10 waveforms -- Dependencies: run after --- st_waveform.sql, +-- st_waveform_poc2.sql, -- lk_meas_unit_concept -- ------------------------------------------------------------------- @@ -100,7 +100,10 @@ SELECT FARM_FINGERPRINT(GENERATE_UUID()) AS measurement_id, wh.subject_id AS subject_id, hadm.hadm_id AS hadm_id, -- get hadm_id by datetime period - src.reference_id AS reference_id, + CONCAT( + src.reference_id, '.', src.segment_name, + '.', src.source_code + ) AS reference_id, -- add segment name and source code to make the field unique COALESCE(vc2.concept_id, 0) AS target_concept_id, COALESCE(vc2.domain_id, 'Measurement') AS target_domain_id, src.mx_datetime AS start_datetime, @@ -129,7 +132,7 @@ LEFT JOIN LEFT JOIN `@etl_project`.@etl_dataset.voc_concept vc1 ON vc1.concept_code = src.source_code - AND vc1.vocabulary_id = 'mimiciv_meas_waveform_code' + AND vc1.vocabulary_id = 'mimiciv_meas_wf' -- supposing that the standard mapping is supplemented with custom concepts for waveform specific values LEFT JOIN `@etl_project`.@etl_dataset.voc_concept_relationship vr diff --git a/etl/staging/st_waveform.sql b/etl/staging/st_waveform_poc1.sql similarity index 100% rename from etl/staging/st_waveform.sql rename to etl/staging/st_waveform_poc1.sql diff --git a/etl/staging/st_waveform_poc2.sql b/etl/staging/st_waveform_poc2.sql new file mode 100644 index 0000000..8acbb79 --- /dev/null +++ b/etl/staging/st_waveform_poc2.sql @@ -0,0 +1,164 @@ +-- ------------------------------------------------------------------- +-- @2020, Odysseus Data Services, Inc. All rights reserved +-- MIMIC IV CDM Conversion +-- ------------------------------------------------------------------- + +-- ------------------------------------------------------------------- +-- dependency, run after: +-- st_core.sql +-- ------------------------------------------------------------------- + +-- ------------------------------------------------------------------- +-- A draft to apply Wave Forms + +-- (Manlik) Regardless of what format we end up with - is to take the meta data and map them into: + +-- Procedure +-- [SNOMED.4141651] Continuous ECG monitoring +-- date/time is offset from start of monitor (1/1/1990 if no actual time is given) +-- procedure_source_value = "" + +-- Device_exposure +-- [SNOMED.45758393] Patient monitoring system module, electrocardiographic +-- device_exposure_start_datetime is the date/time is offset from start of monitor (1/1/1990 if no actual time is given) +-- device_exposure_end_datetime is the start + total duration of the monitor data +-- device_source_value = "" + +-- Measurement - heart rate (1 to N), original WFDB reported or generated by our ETL +-- [SNOMED.4239408] Heart rate - units [SNOMED.4118124] bpm +-- date/time is offset from start of monitor (1/1/1990 if no actual time is given) +-- measurement_source_value = ". - ex "3700002_0011.WFDB" or "3700002_0011.CCSIMxv1" + +-- Measurement - P-QRS-T derived measurements - aVF R-wave example +-- [LOINC.3022916] R wave amplitude in lead AVF +-- date/time is offset from start of monitor (1/1/1990 if no actual time is given) + segment offset +-- measurement_source_value = ". - ex 3700002_0011.CCSIMxv1" + +-- I have ECG measurement map down to lead level + +-- The same approach would apply to BP and Respiratory values. + +-- If we derive observations like AFib and Tachycardia - we can further map these to the condition_occurrence table as [4064452] ECG: atrial fibrillation using the same reference time and source +-- +-- parsed codes targeted to clinical findings, target cdm table - cdm_condition_occurrence +-- ------------------------------------------------------------------- + +-- ------------------------------------------------------------------- +-- open points: +-- parse XML to create src_* or raw_* tables +-- +-- POC source table: +/* + bq --location=US load --replace --source_format=CSV --allow_quoted_newlines=True --skip_leading_rows=1 --autodetect waveform_source_poc.raw_case055_ecg_lines3 z_more/raw_case055_ecg_lines3.csv +*/ +-- ------------------------------------------------------------------- + +-- ------------------------------------------------------------------- +-- staging tables +-- ------------------------------------------------------------------- + +-- ------------------------------------------------------------------- +-- src_waveform_header +-- ------------------------------------------------------------------- + +CREATE OR REPLACE TABLE `@etl_project`.@etl_dataset.src_waveform_header +( + reference_id STRING, + raw_files_path STRING, + case_id INT64, + subject_id INT64, + start_datetime DATETIME, + end_datetime DATETIME, + -- + load_table_id STRING, + load_row_id INT64, + trace_id STRING +); + +-- parsed codes to be targeted to table cdm_measurement + +CREATE OR REPLACE TABLE `@etl_project`.@etl_dataset.src_waveform_mx +( + case_id INT64, -- FK to the header + segment_name STRING, -- two digits of case_id, 5 digits of internal sequence number + subject_id INT64, -- patient's id + reference_id STRING, -- file name without extension + mx_datetime DATETIME, -- time of measurement + source_code STRING, -- type of measurement + value_as_number FLOAT64, + unit_source_value STRING, -- measurement unit "BPM", "MS", "UV" (microvolt) etc. + -- map these labels and populate unit_concept_id + -- + load_table_id STRING, + load_row_id INT64, + trace_id STRING +); + + +-- parse xml from Manlik? -> src_waveform +-- src_waveform -> visit_detail (visit_detail_source_value = ) + +-- finding the visit +-- create visit_detail +-- create measurement -> link visit_detail using visit_detail_source_value = meas_source_value +-- (start with Manlik's proposal) + + +-- ------------------------------------------------------------------- +-- insert sample data +-- ------------------------------------------------------------------- + + +INSERT INTO `@etl_project`.@etl_dataset.src_waveform_header +SELECT + subj.short_reference_id AS reference_id, + subj.long_reference_id AS raw_files_path, + subj.case_id AS case_id, + subj.subject_id AS subject_id, + CAST(src.start_datetime AS DATETIME) AS start_datetime, + CAST(src.end_datetime AS DATETIME) AS end_datetime, + -- + 'wf_header' AS load_table_id, + 0 AS load_row_id, + TO_JSON_STRING(STRUCT( + subj.subject_id AS subject_id, + subj.short_reference_id AS reference_id + )) AS trace_id +FROM + `@wf_project`.@wf_dataset.wf_header subj +INNER JOIN + ( + SELECT + case_id, + MIN(date_time) AS start_datetime, + MAX(date_time) AS end_datetime + FROM `@wf_project`.@wf_dataset.wf_details + GROUP BY case_id + ) src + ON src.case_id = subj.case_id +; + + +INSERT INTO `@etl_project`.@etl_dataset.src_waveform_mx +SELECT + src.case_id AS case_id, -- FK to the header + CAST(src.segment_name AS STRING) AS segment_name, + subj.subject_id AS subject_id, + subj.short_reference_id AS reference_id, + CAST(src.date_time AS DATETIME) AS mx_datetime, + src.src_name AS source_code, + CAST(src.value AS FLOAT64) AS value_as_number, + unit_concept_name AS unit_source_value, + -- + 'wf_details' load_table_id, + FARM_FINGERPRINT(GENERATE_UUID()) AS load_row_id, + TO_JSON_STRING(STRUCT( + src.segment_name AS segment_name, + subj.subject_id AS subject_id + )) AS trace_id -- +FROM + `@wf_project`.@wf_dataset.wf_details src +INNER JOIN + `@wf_project`.@wf_dataset.wf_header subj + ON src.case_id = subj.case_id +; diff --git a/scripts/wf_read.py b/scripts/wf_read.py new file mode 100644 index 0000000..847563b --- /dev/null +++ b/scripts/wf_read.py @@ -0,0 +1,263 @@ +''' +------------------------------------------------------------------- +@2020, Odysseus Data Services, Inc. All rights reserved +MIMIC IV CDM Conversion +------------------------------------------------------------------- + +Iterate trhough waveform source csv files, organized in folders +Folders hierarcy: + root_source_files/case_id/subject_id/wfdb_reference_id.csv + +''' + +import subprocess +import os +import sys +import getopt +import json +import datetime + +# ---------------------------------------------------- +# default config values +# To override default config values, copy the keys to be overriden to a json file, +# and indicate this file as --config parameter +# ---------------------------------------------------- + +config_default = { + + "variables": { + + "@waveforms_csv_path": "gs://...", + + "@wf_project": "...", + "@wf_dataset": "..." + } + +} + + +################################################################################## + +# gsutil ls gs://mimic_iv_to_omop/waveforms/source_data/csv/ + +# subprocess.check_output('gsutil ls gs://mimic_iv_to_omop/waveforms/source_data/csv/', shell=True) + +################################################################################## + +# ---------------------------------------------------- +# read_params() +# ---------------------------------------------------- + +def read_params(): + + print('Reading params...') + params = { + "etlconf_file": "", + "config_file": "" + } + + # Parsing command line arguments + try: + opts, args = getopt.getopt(sys.argv[1:],"e:c:",["etlconf=,config="]) + if len(opts) == 0: + raise getopt.GetoptError("read_params() error", "Mandatory argument is missing.") + + except getopt.GetoptError as err: + print(err.args) + print("Please indicate correct params:") + print("etlconf_file: optional: indicate '-e' for 'etlconf', global config json file") + print("config_file: optional: indicate '-c' for 'config', local config json file") + sys.exit(2) + + # get config files namess + for opt, arg in opts: + if opt == '-e' or opt == '--etlconf': + if os.path.isfile(arg): + params['etlconf_file'] = arg + if opt == '-c' or opt == '--config': + if os.path.isfile(arg): + params['config_file'] = arg + + # collect script names + for arg in args: + if os.path.isfile(arg): + params['script_files'].append(arg) + else: + params['files_not_found'].append(arg) + + print('scripts to run', params) + return params + +# ---------------------------------------------------- +# read_config() +# ---------------------------------------------------- + +def read_config(etlconf_file, config_file): + + print('Reading config...') + config = {} + config_read = {} + etlconf_read = {} + + if os.path.isfile(etlconf_file): + with open(etlconf_file) as f: + etlconf_read = json.load(f) + + if os.path.isfile(config_file): + with open(config_file) as f: + config_read = json.load(f) + + # global config has lower priority + for k in config_default: + s = etlconf_read.get(k, config_default[k]) + config[k] = s + + # local config has higher priority + for k in config_default: + s = config_read.get(k, config[k]) + config[k] = s + + print(config) + return config + + +''' + get_files_list() + it is CSVs we are going to load to a details table +''' +def get_files_list(path): + + s = subprocess.check_output('gsutil ls -r {0}'.format(path), shell=True) + files_list = s.decode('utf-8').split() + files_list = filter(lambda x: '.csv' in x, files_list) + + return files_list + + +''' + get_header_csv() + create a "header table" from the files list +''' +def get_header_csv(files_list, root_path): + + result = [] + s_template = "{case_id},{subject_id},{short_reference_id},{long_reference_id}\n" + result.append(s_template.format( + case_id = "case_id", + subject_id = "subject_id", + short_reference_id = "short_reference_id", + long_reference_id = "long_reference_id" + )) + + for s in files_list: + s_parts = s.replace(root_path, '') + parts = s_parts.split('/') + result.append(s_template.format( + case_id = parts[1], + subject_id = parts[2], + short_reference_id = parts[3].replace('.csv', ''), + long_reference_id = s + )) + print(result) + return result + +'''''' +def create_tmp_csv(header_csv, table_name): + + csv_temp_name = "tmp_{0}.csv".format(table_name) + + with open(csv_temp_name, 'w') as f: + for s in header_csv: + f.write(s) + f.close() + print('Generated', f.name) + + return csv_temp_name + + +table_wf_header = 'wf_header' +table_wf_details = 'wf_details' + + +''' +---------------------------------------------------- + load_table() + return codes: 0, 1, 2 +---------------------------------------------------- +''' + +def load_table(table, file_path, config, replace_flag): + + return_code = 0 + + bq_table = '{dataset}.{prefix}{table}' + + bq_load_command = \ + "bq --location=US load --replace={replace_flag} " + \ + " --source_format=CSV " + \ + " --allow_quoted_newlines=True " + \ + " --skip_leading_rows=1 " + \ + " --autodetect " + \ + " {table_name} " + \ + " \"{files_path}\" " + + table_path = bq_table.format(dataset=config['variables']['@wf_dataset'], prefix="", table=table) + + if os.path.isfile(file_path) or 'gs:/' in file_path: + bqc = bq_load_command.format( \ + table_name = table_path, \ + files_path = file_path, \ + replace_flag = replace_flag + ) + print('To BQ: ' + bqc) + + try: + os.system(bqc) + except Exception as e: + return_code = 2 # error during execution of the command + raise e + else: + return_code = 1 # file not found + print ('Source file {f} is not found.'.format(f=file_path)) + + return return_code + + +''' + main function +''' +def main(): + rc = 0 + duration = datetime.datetime.now() + params = read_params() + config = read_config(params['etlconf_file'], params['config_file']) + + # read folders structure + fl = get_files_list(config['variables']['@waveforms_csv_path']) + header_csv = get_header_csv(fl, config['variables']['@waveforms_csv_path']) + tmp_csv = create_tmp_csv(header_csv, table_wf_header) + + # load a header table + load_table(table_wf_header, tmp_csv, config, True) + if os.path.exists(tmp_csv): + os.remove(tmp_csv) + + # load a details table + replace_flag = True + print(fl) + for f in fl: + print(f) + load_table(table_wf_details, f, config, replace_flag) + replace_flag = False + + duration = datetime.datetime.now() - duration + print('Run time: {0}'.format(duration)) # timedelta HH:MM:SS.f + + return rc + +# ------------- + +if __name__ == '__main__': + # test1.py executed as script + # do something + main() \ No newline at end of file diff --git a/test/metrics_gen/me_gen_queries_from_conf.py b/test/metrics_gen/me_gen_queries_from_conf.py index 257cfc0..c1a50c2 100644 --- a/test/metrics_gen/me_gen_queries_from_conf.py +++ b/test/metrics_gen/me_gen_queries_from_conf.py @@ -83,8 +83,10 @@ ' 0) AS percent,\n' + \ ' COUNT(*) AS total\n' + \ 'FROM {omop_db}.{omop_prefix}{omop_table} ev\n' + \ - 'WHERE {source_value_field} IS NOT NULL\n' + \ + 'WHERE {concept_field} IS NOT NULL\n' + \ ';\n\n' +# to calculate value_as_concept_id etc MR right, consider only not null concept_id +# 'WHERE {source_value_field} IS NOT NULL\n' + \ # # with totals_table # bq_mapping_rate_insert = \ diff --git a/test/metrics_gen/me_mapping_rate_from_conf.bak.sql b/test/metrics_gen/me_mapping_rate_from_conf.bak.sql new file mode 100644 index 0000000..d8aab7f --- /dev/null +++ b/test/metrics_gen/me_mapping_rate_from_conf.bak.sql @@ -0,0 +1,920 @@ +DROP TABLE IF EXISTS `@metrics_project`.@metrics_dataset.me_mapping_rate; +CREATE TABLE `@metrics_project`.@metrics_dataset.me_mapping_rate +( + table_name STRING, + concept_field STRING, + count INT64, + percent FLOAT64, + total INT64 +); + +INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate +SELECT + 'cdm_care_site' AS table_name, + 'place_of_service_concept_id' AS concept_field, + COUNT(CASE WHEN place_of_service_concept_id > 0 THEN 1 ELSE NULL END) AS count, + IF(COUNT(*) > 0, + ROUND(CAST(COUNT(IF(place_of_service_concept_id > 0, 1, NULL)) AS FLOAT64) / COUNT(*) * 100, 2), + 0) AS percent, + COUNT(*) AS total +FROM `@etl_project`.@etl_dataset.cdm_care_site ev +WHERE place_of_service_source_value IS NOT NULL +; + +INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate +SELECT + 'cdm_provider' AS table_name, + 'specialty_concept_id' AS concept_field, + COUNT(CASE WHEN specialty_concept_id > 0 THEN 1 ELSE NULL END) AS count, + IF(COUNT(*) > 0, + ROUND(CAST(COUNT(IF(specialty_concept_id > 0, 1, NULL)) AS FLOAT64) / COUNT(*) * 100, 2), + 0) AS percent, + COUNT(*) AS total +FROM `@etl_project`.@etl_dataset.cdm_provider ev +WHERE specialty_source_value IS NOT NULL +; + +INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate +SELECT + 'cdm_provider' AS table_name, + 'gender_concept_id' AS concept_field, + COUNT(CASE WHEN gender_concept_id > 0 THEN 1 ELSE NULL END) AS count, + IF(COUNT(*) > 0, + ROUND(CAST(COUNT(IF(gender_concept_id > 0, 1, NULL)) AS FLOAT64) / COUNT(*) * 100, 2), + 0) AS percent, + COUNT(*) AS total +FROM `@etl_project`.@etl_dataset.cdm_provider ev +WHERE gender_source_value IS NOT NULL +; + +INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate +SELECT + 'cdm_person' AS table_name, + 'gender_concept_id' AS concept_field, + COUNT(CASE WHEN gender_concept_id > 0 THEN 1 ELSE NULL END) AS count, + IF(COUNT(*) > 0, + ROUND(CAST(COUNT(IF(gender_concept_id > 0, 1, NULL)) AS FLOAT64) / COUNT(*) * 100, 2), + 0) AS percent, + COUNT(*) AS total +FROM `@etl_project`.@etl_dataset.cdm_person ev +WHERE gender_source_value IS NOT NULL +; + +INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate +SELECT + 'cdm_person' AS table_name, + 'race_concept_id' AS concept_field, + COUNT(CASE WHEN race_concept_id > 0 THEN 1 ELSE NULL END) AS count, + IF(COUNT(*) > 0, + ROUND(CAST(COUNT(IF(race_concept_id > 0, 1, NULL)) AS FLOAT64) / COUNT(*) * 100, 2), + 0) AS percent, + COUNT(*) AS total +FROM `@etl_project`.@etl_dataset.cdm_person ev +WHERE race_source_value IS NOT NULL +; + +INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate +SELECT + 'cdm_person' AS table_name, + 'ethnicity_concept_id' AS concept_field, + COUNT(CASE WHEN ethnicity_concept_id > 0 THEN 1 ELSE NULL END) AS count, + IF(COUNT(*) > 0, + ROUND(CAST(COUNT(IF(ethnicity_concept_id > 0, 1, NULL)) AS FLOAT64) / COUNT(*) * 100, 2), + 0) AS percent, + COUNT(*) AS total +FROM `@etl_project`.@etl_dataset.cdm_person ev +WHERE ethnicity_source_value IS NOT NULL +; + +INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate +SELECT + 'cdm_death' AS table_name, + 'death_type_concept_id' AS concept_field, + COUNT(CASE WHEN death_type_concept_id > 0 THEN 1 ELSE NULL END) AS count, + IF(COUNT(*) > 0, + ROUND(CAST(COUNT(IF(death_type_concept_id > 0, 1, NULL)) AS FLOAT64) / COUNT(*) * 100, 2), + 0) AS percent, + COUNT(*) AS total +FROM `@etl_project`.@etl_dataset.cdm_death ev +WHERE death_type_concept_id IS NOT NULL +; + +INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate +SELECT + 'cdm_death' AS table_name, + 'cause_concept_id' AS concept_field, + COUNT(CASE WHEN cause_concept_id > 0 THEN 1 ELSE NULL END) AS count, + IF(COUNT(*) > 0, + ROUND(CAST(COUNT(IF(cause_concept_id > 0, 1, NULL)) AS FLOAT64) / COUNT(*) * 100, 2), + 0) AS percent, + COUNT(*) AS total +FROM `@etl_project`.@etl_dataset.cdm_death ev +WHERE cause_source_value IS NOT NULL +; + +INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate +SELECT + 'cdm_observation_period' AS table_name, + 'period_type_concept_id' AS concept_field, + COUNT(CASE WHEN period_type_concept_id > 0 THEN 1 ELSE NULL END) AS count, + IF(COUNT(*) > 0, + ROUND(CAST(COUNT(IF(period_type_concept_id > 0, 1, NULL)) AS FLOAT64) / COUNT(*) * 100, 2), + 0) AS percent, + COUNT(*) AS total +FROM `@etl_project`.@etl_dataset.cdm_observation_period ev +WHERE period_type_concept_id IS NOT NULL +; + +INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate +SELECT + 'cdm_payer_plan_period' AS table_name, + 'payer_concept_id' AS concept_field, + COUNT(CASE WHEN payer_concept_id > 0 THEN 1 ELSE NULL END) AS count, + IF(COUNT(*) > 0, + ROUND(CAST(COUNT(IF(payer_concept_id > 0, 1, NULL)) AS FLOAT64) / COUNT(*) * 100, 2), + 0) AS percent, + COUNT(*) AS total +FROM `@etl_project`.@etl_dataset.cdm_payer_plan_period ev +WHERE payer_source_value IS NOT NULL +; + +INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate +SELECT + 'cdm_payer_plan_period' AS table_name, + 'plan_concept_id' AS concept_field, + COUNT(CASE WHEN plan_concept_id > 0 THEN 1 ELSE NULL END) AS count, + IF(COUNT(*) > 0, + ROUND(CAST(COUNT(IF(plan_concept_id > 0, 1, NULL)) AS FLOAT64) / COUNT(*) * 100, 2), + 0) AS percent, + COUNT(*) AS total +FROM `@etl_project`.@etl_dataset.cdm_payer_plan_period ev +WHERE plan_source_value IS NOT NULL +; + +INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate +SELECT + 'cdm_payer_plan_period' AS table_name, + 'sponsor_concept_id' AS concept_field, + COUNT(CASE WHEN sponsor_concept_id > 0 THEN 1 ELSE NULL END) AS count, + IF(COUNT(*) > 0, + ROUND(CAST(COUNT(IF(sponsor_concept_id > 0, 1, NULL)) AS FLOAT64) / COUNT(*) * 100, 2), + 0) AS percent, + COUNT(*) AS total +FROM `@etl_project`.@etl_dataset.cdm_payer_plan_period ev +WHERE sponsor_source_value IS NOT NULL +; + +INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate +SELECT + 'cdm_payer_plan_period' AS table_name, + 'stop_reason_concept_id' AS concept_field, + COUNT(CASE WHEN stop_reason_concept_id > 0 THEN 1 ELSE NULL END) AS count, + IF(COUNT(*) > 0, + ROUND(CAST(COUNT(IF(stop_reason_concept_id > 0, 1, NULL)) AS FLOAT64) / COUNT(*) * 100, 2), + 0) AS percent, + COUNT(*) AS total +FROM `@etl_project`.@etl_dataset.cdm_payer_plan_period ev +WHERE stop_reason_source_value IS NOT NULL +; + +INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate +SELECT + 'cdm_visit_occurrence' AS table_name, + 'visit_concept_id' AS concept_field, + COUNT(CASE WHEN visit_concept_id > 0 THEN 1 ELSE NULL END) AS count, + IF(COUNT(*) > 0, + ROUND(CAST(COUNT(IF(visit_concept_id > 0, 1, NULL)) AS FLOAT64) / COUNT(*) * 100, 2), + 0) AS percent, + COUNT(*) AS total +FROM `@etl_project`.@etl_dataset.cdm_visit_occurrence ev +WHERE visit_concept_id IS NOT NULL +; + +INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate +SELECT + 'cdm_visit_occurrence' AS table_name, + 'visit_type_concept_id' AS concept_field, + COUNT(CASE WHEN visit_type_concept_id > 0 THEN 1 ELSE NULL END) AS count, + IF(COUNT(*) > 0, + ROUND(CAST(COUNT(IF(visit_type_concept_id > 0, 1, NULL)) AS FLOAT64) / COUNT(*) * 100, 2), + 0) AS percent, + COUNT(*) AS total +FROM `@etl_project`.@etl_dataset.cdm_visit_occurrence ev +WHERE visit_type_concept_id IS NOT NULL +; + +INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate +SELECT + 'cdm_visit_occurrence' AS table_name, + 'visit_source_concept_id' AS concept_field, + COUNT(CASE WHEN visit_source_concept_id > 0 THEN 1 ELSE NULL END) AS count, + IF(COUNT(*) > 0, + ROUND(CAST(COUNT(IF(visit_source_concept_id > 0, 1, NULL)) AS FLOAT64) / COUNT(*) * 100, 2), + 0) AS percent, + COUNT(*) AS total +FROM `@etl_project`.@etl_dataset.cdm_visit_occurrence ev +WHERE visit_source_concept_id IS NOT NULL +; + +INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate +SELECT + 'cdm_visit_occurrence' AS table_name, + 'admitting_source_concept_id' AS concept_field, + COUNT(CASE WHEN admitting_source_concept_id > 0 THEN 1 ELSE NULL END) AS count, + IF(COUNT(*) > 0, + ROUND(CAST(COUNT(IF(admitting_source_concept_id > 0, 1, NULL)) AS FLOAT64) / COUNT(*) * 100, 2), + 0) AS percent, + COUNT(*) AS total +FROM `@etl_project`.@etl_dataset.cdm_visit_occurrence ev +WHERE admitting_source_value IS NOT NULL +; + +INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate +SELECT + 'cdm_visit_occurrence' AS table_name, + 'discharge_to_concept_id' AS concept_field, + COUNT(CASE WHEN discharge_to_concept_id > 0 THEN 1 ELSE NULL END) AS count, + IF(COUNT(*) > 0, + ROUND(CAST(COUNT(IF(discharge_to_concept_id > 0, 1, NULL)) AS FLOAT64) / COUNT(*) * 100, 2), + 0) AS percent, + COUNT(*) AS total +FROM `@etl_project`.@etl_dataset.cdm_visit_occurrence ev +WHERE discharge_to_source_value IS NOT NULL +; + +INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate +SELECT + 'cdm_visit_detail' AS table_name, + 'visit_detail_concept_id' AS concept_field, + COUNT(CASE WHEN visit_detail_concept_id > 0 THEN 1 ELSE NULL END) AS count, + IF(COUNT(*) > 0, + ROUND(CAST(COUNT(IF(visit_detail_concept_id > 0, 1, NULL)) AS FLOAT64) / COUNT(*) * 100, 2), + 0) AS percent, + COUNT(*) AS total +FROM `@etl_project`.@etl_dataset.cdm_visit_detail ev +WHERE visit_detail_concept_id IS NOT NULL +; + +INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate +SELECT + 'cdm_visit_detail' AS table_name, + 'visit_detail_type_concept_id' AS concept_field, + COUNT(CASE WHEN visit_detail_type_concept_id > 0 THEN 1 ELSE NULL END) AS count, + IF(COUNT(*) > 0, + ROUND(CAST(COUNT(IF(visit_detail_type_concept_id > 0, 1, NULL)) AS FLOAT64) / COUNT(*) * 100, 2), + 0) AS percent, + COUNT(*) AS total +FROM `@etl_project`.@etl_dataset.cdm_visit_detail ev +WHERE visit_detail_type_concept_id IS NOT NULL +; + +INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate +SELECT + 'cdm_visit_detail' AS table_name, + 'visit_detail_source_concept_id' AS concept_field, + COUNT(CASE WHEN visit_detail_source_concept_id > 0 THEN 1 ELSE NULL END) AS count, + IF(COUNT(*) > 0, + ROUND(CAST(COUNT(IF(visit_detail_source_concept_id > 0, 1, NULL)) AS FLOAT64) / COUNT(*) * 100, 2), + 0) AS percent, + COUNT(*) AS total +FROM `@etl_project`.@etl_dataset.cdm_visit_detail ev +WHERE visit_detail_source_concept_id IS NOT NULL +; + +INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate +SELECT + 'cdm_visit_detail' AS table_name, + 'admitting_source_concept_id' AS concept_field, + COUNT(CASE WHEN admitting_source_concept_id > 0 THEN 1 ELSE NULL END) AS count, + IF(COUNT(*) > 0, + ROUND(CAST(COUNT(IF(admitting_source_concept_id > 0, 1, NULL)) AS FLOAT64) / COUNT(*) * 100, 2), + 0) AS percent, + COUNT(*) AS total +FROM `@etl_project`.@etl_dataset.cdm_visit_detail ev +WHERE admitting_source_value IS NOT NULL +; + +INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate +SELECT + 'cdm_visit_detail' AS table_name, + 'discharge_to_concept_id' AS concept_field, + COUNT(CASE WHEN discharge_to_concept_id > 0 THEN 1 ELSE NULL END) AS count, + IF(COUNT(*) > 0, + ROUND(CAST(COUNT(IF(discharge_to_concept_id > 0, 1, NULL)) AS FLOAT64) / COUNT(*) * 100, 2), + 0) AS percent, + COUNT(*) AS total +FROM `@etl_project`.@etl_dataset.cdm_visit_detail ev +WHERE discharge_to_source_value IS NOT NULL +; + +INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate +SELECT + 'cdm_condition_occurrence' AS table_name, + 'condition_concept_id' AS concept_field, + COUNT(CASE WHEN condition_concept_id > 0 THEN 1 ELSE NULL END) AS count, + IF(COUNT(*) > 0, + ROUND(CAST(COUNT(IF(condition_concept_id > 0, 1, NULL)) AS FLOAT64) / COUNT(*) * 100, 2), + 0) AS percent, + COUNT(*) AS total +FROM `@etl_project`.@etl_dataset.cdm_condition_occurrence ev +WHERE condition_source_value IS NOT NULL +; + +INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate +SELECT + 'cdm_condition_occurrence' AS table_name, + 'condition_type_concept_id' AS concept_field, + COUNT(CASE WHEN condition_type_concept_id > 0 THEN 1 ELSE NULL END) AS count, + IF(COUNT(*) > 0, + ROUND(CAST(COUNT(IF(condition_type_concept_id > 0, 1, NULL)) AS FLOAT64) / COUNT(*) * 100, 2), + 0) AS percent, + COUNT(*) AS total +FROM `@etl_project`.@etl_dataset.cdm_condition_occurrence ev +WHERE condition_type_concept_id IS NOT NULL +; + +INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate +SELECT + 'cdm_condition_occurrence' AS table_name, + 'condition_status_concept_id' AS concept_field, + COUNT(CASE WHEN condition_status_concept_id > 0 THEN 1 ELSE NULL END) AS count, + IF(COUNT(*) > 0, + ROUND(CAST(COUNT(IF(condition_status_concept_id > 0, 1, NULL)) AS FLOAT64) / COUNT(*) * 100, 2), + 0) AS percent, + COUNT(*) AS total +FROM `@etl_project`.@etl_dataset.cdm_condition_occurrence ev +WHERE condition_status_source_value IS NOT NULL +; + +INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate +SELECT + 'cdm_procedure_occurrence' AS table_name, + 'procedure_concept_id' AS concept_field, + COUNT(CASE WHEN procedure_concept_id > 0 THEN 1 ELSE NULL END) AS count, + IF(COUNT(*) > 0, + ROUND(CAST(COUNT(IF(procedure_concept_id > 0, 1, NULL)) AS FLOAT64) / COUNT(*) * 100, 2), + 0) AS percent, + COUNT(*) AS total +FROM `@etl_project`.@etl_dataset.cdm_procedure_occurrence ev +WHERE procedure_source_value IS NOT NULL +; + +INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate +SELECT + 'cdm_procedure_occurrence' AS table_name, + 'procedure_type_concept_id' AS concept_field, + COUNT(CASE WHEN procedure_type_concept_id > 0 THEN 1 ELSE NULL END) AS count, + IF(COUNT(*) > 0, + ROUND(CAST(COUNT(IF(procedure_type_concept_id > 0, 1, NULL)) AS FLOAT64) / COUNT(*) * 100, 2), + 0) AS percent, + COUNT(*) AS total +FROM `@etl_project`.@etl_dataset.cdm_procedure_occurrence ev +WHERE procedure_type_concept_id IS NOT NULL +; + +INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate +SELECT + 'cdm_procedure_occurrence' AS table_name, + 'modifier_concept_id' AS concept_field, + COUNT(CASE WHEN modifier_concept_id > 0 THEN 1 ELSE NULL END) AS count, + IF(COUNT(*) > 0, + ROUND(CAST(COUNT(IF(modifier_concept_id > 0, 1, NULL)) AS FLOAT64) / COUNT(*) * 100, 2), + 0) AS percent, + COUNT(*) AS total +FROM `@etl_project`.@etl_dataset.cdm_procedure_occurrence ev +WHERE modifier_source_value IS NOT NULL +; + +INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate +SELECT + 'cdm_observation' AS table_name, + 'observation_concept_id' AS concept_field, + COUNT(CASE WHEN observation_concept_id > 0 THEN 1 ELSE NULL END) AS count, + IF(COUNT(*) > 0, + ROUND(CAST(COUNT(IF(observation_concept_id > 0, 1, NULL)) AS FLOAT64) / COUNT(*) * 100, 2), + 0) AS percent, + COUNT(*) AS total +FROM `@etl_project`.@etl_dataset.cdm_observation ev +WHERE observation_source_value IS NOT NULL +; + +INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate +SELECT + 'cdm_observation' AS table_name, + 'observation_type_concept_id' AS concept_field, + COUNT(CASE WHEN observation_type_concept_id > 0 THEN 1 ELSE NULL END) AS count, + IF(COUNT(*) > 0, + ROUND(CAST(COUNT(IF(observation_type_concept_id > 0, 1, NULL)) AS FLOAT64) / COUNT(*) * 100, 2), + 0) AS percent, + COUNT(*) AS total +FROM `@etl_project`.@etl_dataset.cdm_observation ev +WHERE observation_type_concept_id IS NOT NULL +; + +INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate +SELECT + 'cdm_observation' AS table_name, + 'value_as_concept_id' AS concept_field, + COUNT(CASE WHEN value_as_concept_id > 0 THEN 1 ELSE NULL END) AS count, + IF(COUNT(*) > 0, + ROUND(CAST(COUNT(IF(value_as_concept_id > 0, 1, NULL)) AS FLOAT64) / COUNT(*) * 100, 2), + 0) AS percent, + COUNT(*) AS total +FROM `@etl_project`.@etl_dataset.cdm_observation ev +WHERE value_as_string IS NOT NULL +; + +INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate +SELECT + 'cdm_observation' AS table_name, + 'qualifier_concept_id' AS concept_field, + COUNT(CASE WHEN qualifier_concept_id > 0 THEN 1 ELSE NULL END) AS count, + IF(COUNT(*) > 0, + ROUND(CAST(COUNT(IF(qualifier_concept_id > 0, 1, NULL)) AS FLOAT64) / COUNT(*) * 100, 2), + 0) AS percent, + COUNT(*) AS total +FROM `@etl_project`.@etl_dataset.cdm_observation ev +WHERE qualifier_source_value IS NOT NULL +; + +INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate +SELECT + 'cdm_observation' AS table_name, + 'unit_concept_id' AS concept_field, + COUNT(CASE WHEN unit_concept_id > 0 THEN 1 ELSE NULL END) AS count, + IF(COUNT(*) > 0, + ROUND(CAST(COUNT(IF(unit_concept_id > 0, 1, NULL)) AS FLOAT64) / COUNT(*) * 100, 2), + 0) AS percent, + COUNT(*) AS total +FROM `@etl_project`.@etl_dataset.cdm_observation ev +WHERE unit_source_value IS NOT NULL +; + +INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate +SELECT + 'cdm_measurement' AS table_name, + 'measurement_concept_id' AS concept_field, + COUNT(CASE WHEN measurement_concept_id > 0 THEN 1 ELSE NULL END) AS count, + IF(COUNT(*) > 0, + ROUND(CAST(COUNT(IF(measurement_concept_id > 0, 1, NULL)) AS FLOAT64) / COUNT(*) * 100, 2), + 0) AS percent, + COUNT(*) AS total +FROM `@etl_project`.@etl_dataset.cdm_measurement ev +WHERE measurement_source_value IS NOT NULL +; + +INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate +SELECT + 'cdm_measurement' AS table_name, + 'measurement_type_concept_id' AS concept_field, + COUNT(CASE WHEN measurement_type_concept_id > 0 THEN 1 ELSE NULL END) AS count, + IF(COUNT(*) > 0, + ROUND(CAST(COUNT(IF(measurement_type_concept_id > 0, 1, NULL)) AS FLOAT64) / COUNT(*) * 100, 2), + 0) AS percent, + COUNT(*) AS total +FROM `@etl_project`.@etl_dataset.cdm_measurement ev +WHERE measurement_type_concept_id IS NOT NULL +; + +INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate +SELECT + 'cdm_measurement' AS table_name, + 'operator_concept_id' AS concept_field, + COUNT(CASE WHEN operator_concept_id > 0 THEN 1 ELSE NULL END) AS count, + IF(COUNT(*) > 0, + ROUND(CAST(COUNT(IF(operator_concept_id > 0, 1, NULL)) AS FLOAT64) / COUNT(*) * 100, 2), + 0) AS percent, + COUNT(*) AS total +FROM `@etl_project`.@etl_dataset.cdm_measurement ev +WHERE operator_concept_id IS NOT NULL +; + +INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate +SELECT + 'cdm_measurement' AS table_name, + 'value_as_concept_id' AS concept_field, + COUNT(CASE WHEN value_as_concept_id > 0 THEN 1 ELSE NULL END) AS count, + IF(COUNT(*) > 0, + ROUND(CAST(COUNT(IF(value_as_concept_id > 0, 1, NULL)) AS FLOAT64) / COUNT(*) * 100, 2), + 0) AS percent, + COUNT(*) AS total +FROM `@etl_project`.@etl_dataset.cdm_measurement ev +WHERE value_source_value IS NOT NULL +; + +INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate +SELECT + 'cdm_measurement' AS table_name, + 'unit_concept_id' AS concept_field, + COUNT(CASE WHEN unit_concept_id > 0 THEN 1 ELSE NULL END) AS count, + IF(COUNT(*) > 0, + ROUND(CAST(COUNT(IF(unit_concept_id > 0, 1, NULL)) AS FLOAT64) / COUNT(*) * 100, 2), + 0) AS percent, + COUNT(*) AS total +FROM `@etl_project`.@etl_dataset.cdm_measurement ev +WHERE unit_source_value IS NOT NULL +; + +INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate +SELECT + 'cdm_device_exposure' AS table_name, + 'device_concept_id' AS concept_field, + COUNT(CASE WHEN device_concept_id > 0 THEN 1 ELSE NULL END) AS count, + IF(COUNT(*) > 0, + ROUND(CAST(COUNT(IF(device_concept_id > 0, 1, NULL)) AS FLOAT64) / COUNT(*) * 100, 2), + 0) AS percent, + COUNT(*) AS total +FROM `@etl_project`.@etl_dataset.cdm_device_exposure ev +WHERE device_source_value IS NOT NULL +; + +INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate +SELECT + 'cdm_device_exposure' AS table_name, + 'device_type_concept_id' AS concept_field, + COUNT(CASE WHEN device_type_concept_id > 0 THEN 1 ELSE NULL END) AS count, + IF(COUNT(*) > 0, + ROUND(CAST(COUNT(IF(device_type_concept_id > 0, 1, NULL)) AS FLOAT64) / COUNT(*) * 100, 2), + 0) AS percent, + COUNT(*) AS total +FROM `@etl_project`.@etl_dataset.cdm_device_exposure ev +WHERE device_type_concept_id IS NOT NULL +; + +INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate +SELECT + 'cdm_drug_exposure' AS table_name, + 'drug_concept_id' AS concept_field, + COUNT(CASE WHEN drug_concept_id > 0 THEN 1 ELSE NULL END) AS count, + IF(COUNT(*) > 0, + ROUND(CAST(COUNT(IF(drug_concept_id > 0, 1, NULL)) AS FLOAT64) / COUNT(*) * 100, 2), + 0) AS percent, + COUNT(*) AS total +FROM `@etl_project`.@etl_dataset.cdm_drug_exposure ev +WHERE drug_source_value IS NOT NULL +; + +INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate +SELECT + 'cdm_drug_exposure' AS table_name, + 'drug_type_concept_id' AS concept_field, + COUNT(CASE WHEN drug_type_concept_id > 0 THEN 1 ELSE NULL END) AS count, + IF(COUNT(*) > 0, + ROUND(CAST(COUNT(IF(drug_type_concept_id > 0, 1, NULL)) AS FLOAT64) / COUNT(*) * 100, 2), + 0) AS percent, + COUNT(*) AS total +FROM `@etl_project`.@etl_dataset.cdm_drug_exposure ev +WHERE drug_type_concept_id IS NOT NULL +; + +INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate +SELECT + 'cdm_drug_exposure' AS table_name, + 'route_concept_id' AS concept_field, + COUNT(CASE WHEN route_concept_id > 0 THEN 1 ELSE NULL END) AS count, + IF(COUNT(*) > 0, + ROUND(CAST(COUNT(IF(route_concept_id > 0, 1, NULL)) AS FLOAT64) / COUNT(*) * 100, 2), + 0) AS percent, + COUNT(*) AS total +FROM `@etl_project`.@etl_dataset.cdm_drug_exposure ev +WHERE route_source_value IS NOT NULL +; + +INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate +SELECT + 'cdm_cost' AS table_name, + 'cost_type_concept_id' AS concept_field, + COUNT(CASE WHEN cost_type_concept_id > 0 THEN 1 ELSE NULL END) AS count, + IF(COUNT(*) > 0, + ROUND(CAST(COUNT(IF(cost_type_concept_id > 0, 1, NULL)) AS FLOAT64) / COUNT(*) * 100, 2), + 0) AS percent, + COUNT(*) AS total +FROM `@etl_project`.@etl_dataset.cdm_cost ev +WHERE cost_type_concept_id IS NOT NULL +; + +INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate +SELECT + 'cdm_cost' AS table_name, + 'currency_concept_id' AS concept_field, + COUNT(CASE WHEN currency_concept_id > 0 THEN 1 ELSE NULL END) AS count, + IF(COUNT(*) > 0, + ROUND(CAST(COUNT(IF(currency_concept_id > 0, 1, NULL)) AS FLOAT64) / COUNT(*) * 100, 2), + 0) AS percent, + COUNT(*) AS total +FROM `@etl_project`.@etl_dataset.cdm_cost ev +WHERE currency_concept_id IS NOT NULL +; + +INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate +SELECT + 'cdm_cost' AS table_name, + 'revenue_code_concept_id' AS concept_field, + COUNT(CASE WHEN revenue_code_concept_id > 0 THEN 1 ELSE NULL END) AS count, + IF(COUNT(*) > 0, + ROUND(CAST(COUNT(IF(revenue_code_concept_id > 0, 1, NULL)) AS FLOAT64) / COUNT(*) * 100, 2), + 0) AS percent, + COUNT(*) AS total +FROM `@etl_project`.@etl_dataset.cdm_cost ev +WHERE revenue_code_source_value IS NOT NULL +; + +INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate +SELECT + 'cdm_cost' AS table_name, + 'drg_concept_id' AS concept_field, + COUNT(CASE WHEN drg_concept_id > 0 THEN 1 ELSE NULL END) AS count, + IF(COUNT(*) > 0, + ROUND(CAST(COUNT(IF(drg_concept_id > 0, 1, NULL)) AS FLOAT64) / COUNT(*) * 100, 2), + 0) AS percent, + COUNT(*) AS total +FROM `@etl_project`.@etl_dataset.cdm_cost ev +WHERE drg_source_value IS NOT NULL +; + +INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate +SELECT + 'cdm_condition_era' AS table_name, + 'condition_concept_id' AS concept_field, + COUNT(CASE WHEN condition_concept_id > 0 THEN 1 ELSE NULL END) AS count, + IF(COUNT(*) > 0, + ROUND(CAST(COUNT(IF(condition_concept_id > 0, 1, NULL)) AS FLOAT64) / COUNT(*) * 100, 2), + 0) AS percent, + COUNT(*) AS total +FROM `@etl_project`.@etl_dataset.cdm_condition_era ev +WHERE condition_concept_id IS NOT NULL +; + +INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate +SELECT + 'cdm_drug_era' AS table_name, + 'drug_concept_id' AS concept_field, + COUNT(CASE WHEN drug_concept_id > 0 THEN 1 ELSE NULL END) AS count, + IF(COUNT(*) > 0, + ROUND(CAST(COUNT(IF(drug_concept_id > 0, 1, NULL)) AS FLOAT64) / COUNT(*) * 100, 2), + 0) AS percent, + COUNT(*) AS total +FROM `@etl_project`.@etl_dataset.cdm_drug_era ev +WHERE drug_concept_id IS NOT NULL +; + +INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate +SELECT + 'cdm_dose_era' AS table_name, + 'drug_concept_id' AS concept_field, + COUNT(CASE WHEN drug_concept_id > 0 THEN 1 ELSE NULL END) AS count, + IF(COUNT(*) > 0, + ROUND(CAST(COUNT(IF(drug_concept_id > 0, 1, NULL)) AS FLOAT64) / COUNT(*) * 100, 2), + 0) AS percent, + COUNT(*) AS total +FROM `@etl_project`.@etl_dataset.cdm_dose_era ev +WHERE drug_concept_id IS NOT NULL +; + +INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate +SELECT + 'cdm_dose_era' AS table_name, + 'unit_concept_id' AS concept_field, + COUNT(CASE WHEN unit_concept_id > 0 THEN 1 ELSE NULL END) AS count, + IF(COUNT(*) > 0, + ROUND(CAST(COUNT(IF(unit_concept_id > 0, 1, NULL)) AS FLOAT64) / COUNT(*) * 100, 2), + 0) AS percent, + COUNT(*) AS total +FROM `@etl_project`.@etl_dataset.cdm_dose_era ev +WHERE unit_concept_id IS NOT NULL +; + +INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate +SELECT + 'cdm_specimen' AS table_name, + 'specimen_concept_id' AS concept_field, + COUNT(CASE WHEN specimen_concept_id > 0 THEN 1 ELSE NULL END) AS count, + IF(COUNT(*) > 0, + ROUND(CAST(COUNT(IF(specimen_concept_id > 0, 1, NULL)) AS FLOAT64) / COUNT(*) * 100, 2), + 0) AS percent, + COUNT(*) AS total +FROM `@etl_project`.@etl_dataset.cdm_specimen ev +WHERE specimen_source_value IS NOT NULL +; + +INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate +SELECT + 'cdm_specimen' AS table_name, + 'specimen_type_concept_id' AS concept_field, + COUNT(CASE WHEN specimen_type_concept_id > 0 THEN 1 ELSE NULL END) AS count, + IF(COUNT(*) > 0, + ROUND(CAST(COUNT(IF(specimen_type_concept_id > 0, 1, NULL)) AS FLOAT64) / COUNT(*) * 100, 2), + 0) AS percent, + COUNT(*) AS total +FROM `@etl_project`.@etl_dataset.cdm_specimen ev +WHERE specimen_type_concept_id IS NOT NULL +; + +INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate +SELECT + 'cdm_specimen' AS table_name, + 'unit_concept_id' AS concept_field, + COUNT(CASE WHEN unit_concept_id > 0 THEN 1 ELSE NULL END) AS count, + IF(COUNT(*) > 0, + ROUND(CAST(COUNT(IF(unit_concept_id > 0, 1, NULL)) AS FLOAT64) / COUNT(*) * 100, 2), + 0) AS percent, + COUNT(*) AS total +FROM `@etl_project`.@etl_dataset.cdm_specimen ev +WHERE unit_source_value IS NOT NULL +; + +INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate +SELECT + 'cdm_specimen' AS table_name, + 'anatomic_site_concept_id' AS concept_field, + COUNT(CASE WHEN anatomic_site_concept_id > 0 THEN 1 ELSE NULL END) AS count, + IF(COUNT(*) > 0, + ROUND(CAST(COUNT(IF(anatomic_site_concept_id > 0, 1, NULL)) AS FLOAT64) / COUNT(*) * 100, 2), + 0) AS percent, + COUNT(*) AS total +FROM `@etl_project`.@etl_dataset.cdm_specimen ev +WHERE anatomic_site_source_value IS NOT NULL +; + +INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate +SELECT + 'cdm_specimen' AS table_name, + 'disease_status_concept_id' AS concept_field, + COUNT(CASE WHEN disease_status_concept_id > 0 THEN 1 ELSE NULL END) AS count, + IF(COUNT(*) > 0, + ROUND(CAST(COUNT(IF(disease_status_concept_id > 0, 1, NULL)) AS FLOAT64) / COUNT(*) * 100, 2), + 0) AS percent, + COUNT(*) AS total +FROM `@etl_project`.@etl_dataset.cdm_specimen ev +WHERE disease_status_source_value IS NOT NULL +; + +INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate +SELECT + 'cdm_note' AS table_name, + 'note_type_concept_id' AS concept_field, + COUNT(CASE WHEN note_type_concept_id > 0 THEN 1 ELSE NULL END) AS count, + IF(COUNT(*) > 0, + ROUND(CAST(COUNT(IF(note_type_concept_id > 0, 1, NULL)) AS FLOAT64) / COUNT(*) * 100, 2), + 0) AS percent, + COUNT(*) AS total +FROM `@etl_project`.@etl_dataset.cdm_note ev +WHERE note_type_concept_id IS NOT NULL +; + +INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate +SELECT + 'cdm_note' AS table_name, + 'note_class_concept_id' AS concept_field, + COUNT(CASE WHEN note_class_concept_id > 0 THEN 1 ELSE NULL END) AS count, + IF(COUNT(*) > 0, + ROUND(CAST(COUNT(IF(note_class_concept_id > 0, 1, NULL)) AS FLOAT64) / COUNT(*) * 100, 2), + 0) AS percent, + COUNT(*) AS total +FROM `@etl_project`.@etl_dataset.cdm_note ev +WHERE note_class_concept_id IS NOT NULL +; + +INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate +SELECT + 'cdm_note' AS table_name, + 'encoding_concept_id' AS concept_field, + COUNT(CASE WHEN encoding_concept_id > 0 THEN 1 ELSE NULL END) AS count, + IF(COUNT(*) > 0, + ROUND(CAST(COUNT(IF(encoding_concept_id > 0, 1, NULL)) AS FLOAT64) / COUNT(*) * 100, 2), + 0) AS percent, + COUNT(*) AS total +FROM `@etl_project`.@etl_dataset.cdm_note ev +WHERE encoding_concept_id IS NOT NULL +; + +INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate +SELECT + 'cdm_note' AS table_name, + 'language_concept_id' AS concept_field, + COUNT(CASE WHEN language_concept_id > 0 THEN 1 ELSE NULL END) AS count, + IF(COUNT(*) > 0, + ROUND(CAST(COUNT(IF(language_concept_id > 0, 1, NULL)) AS FLOAT64) / COUNT(*) * 100, 2), + 0) AS percent, + COUNT(*) AS total +FROM `@etl_project`.@etl_dataset.cdm_note ev +WHERE language_concept_id IS NOT NULL +; + +INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate +SELECT + 'cdm_note_nlp' AS table_name, + 'section_concept_id' AS concept_field, + COUNT(CASE WHEN section_concept_id > 0 THEN 1 ELSE NULL END) AS count, + IF(COUNT(*) > 0, + ROUND(CAST(COUNT(IF(section_concept_id > 0, 1, NULL)) AS FLOAT64) / COUNT(*) * 100, 2), + 0) AS percent, + COUNT(*) AS total +FROM `@etl_project`.@etl_dataset.cdm_note_nlp ev +WHERE section_concept_id IS NOT NULL +; + +INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate +SELECT + 'cdm_note_nlp' AS table_name, + 'note_nlp_concept_id' AS concept_field, + COUNT(CASE WHEN note_nlp_concept_id > 0 THEN 1 ELSE NULL END) AS count, + IF(COUNT(*) > 0, + ROUND(CAST(COUNT(IF(note_nlp_concept_id > 0, 1, NULL)) AS FLOAT64) / COUNT(*) * 100, 2), + 0) AS percent, + COUNT(*) AS total +FROM `@etl_project`.@etl_dataset.cdm_note_nlp ev +WHERE note_nlp_concept_id IS NOT NULL +; + +INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate +SELECT + 'cdm_fact_relationship' AS table_name, + 'domain_concept_id_1' AS concept_field, + COUNT(CASE WHEN domain_concept_id_1 > 0 THEN 1 ELSE NULL END) AS count, + IF(COUNT(*) > 0, + ROUND(CAST(COUNT(IF(domain_concept_id_1 > 0, 1, NULL)) AS FLOAT64) / COUNT(*) * 100, 2), + 0) AS percent, + COUNT(*) AS total +FROM `@etl_project`.@etl_dataset.cdm_fact_relationship ev +WHERE domain_concept_id_1 IS NOT NULL +; + +INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate +SELECT + 'cdm_fact_relationship' AS table_name, + 'domain_concept_id_2' AS concept_field, + COUNT(CASE WHEN domain_concept_id_2 > 0 THEN 1 ELSE NULL END) AS count, + IF(COUNT(*) > 0, + ROUND(CAST(COUNT(IF(domain_concept_id_2 > 0, 1, NULL)) AS FLOAT64) / COUNT(*) * 100, 2), + 0) AS percent, + COUNT(*) AS total +FROM `@etl_project`.@etl_dataset.cdm_fact_relationship ev +WHERE domain_concept_id_2 IS NOT NULL +; + +INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate +SELECT + 'cdm_fact_relationship' AS table_name, + 'relationship_concept_id' AS concept_field, + COUNT(CASE WHEN relationship_concept_id > 0 THEN 1 ELSE NULL END) AS count, + IF(COUNT(*) > 0, + ROUND(CAST(COUNT(IF(relationship_concept_id > 0, 1, NULL)) AS FLOAT64) / COUNT(*) * 100, 2), + 0) AS percent, + COUNT(*) AS total +FROM `@etl_project`.@etl_dataset.cdm_fact_relationship ev +WHERE relationship_concept_id IS NOT NULL +; + +INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate +SELECT + 'cdm_cohort_attribute' AS table_name, + 'value_as_concept_id' AS concept_field, + COUNT(CASE WHEN value_as_concept_id > 0 THEN 1 ELSE NULL END) AS count, + IF(COUNT(*) > 0, + ROUND(CAST(COUNT(IF(value_as_concept_id > 0, 1, NULL)) AS FLOAT64) / COUNT(*) * 100, 2), + 0) AS percent, + COUNT(*) AS total +FROM `@etl_project`.@etl_dataset.cdm_cohort_attribute ev +WHERE value_as_concept_id IS NOT NULL +; + +INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate +SELECT + 'cdm_metadata' AS table_name, + 'metadata_concept_id' AS concept_field, + COUNT(CASE WHEN metadata_concept_id > 0 THEN 1 ELSE NULL END) AS count, + IF(COUNT(*) > 0, + ROUND(CAST(COUNT(IF(metadata_concept_id > 0, 1, NULL)) AS FLOAT64) / COUNT(*) * 100, 2), + 0) AS percent, + COUNT(*) AS total +FROM `@etl_project`.@etl_dataset.cdm_metadata ev +WHERE metadata_concept_id IS NOT NULL +; + +INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate +SELECT + 'cdm_metadata' AS table_name, + 'metadata_type_concept_id' AS concept_field, + COUNT(CASE WHEN metadata_type_concept_id > 0 THEN 1 ELSE NULL END) AS count, + IF(COUNT(*) > 0, + ROUND(CAST(COUNT(IF(metadata_type_concept_id > 0, 1, NULL)) AS FLOAT64) / COUNT(*) * 100, 2), + 0) AS percent, + COUNT(*) AS total +FROM `@etl_project`.@etl_dataset.cdm_metadata ev +WHERE metadata_type_concept_id IS NOT NULL +; + +INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate +SELECT + 'cdm_metadata' AS table_name, + 'value_as_concept_id' AS concept_field, + COUNT(CASE WHEN value_as_concept_id > 0 THEN 1 ELSE NULL END) AS count, + IF(COUNT(*) > 0, + ROUND(CAST(COUNT(IF(value_as_concept_id > 0, 1, NULL)) AS FLOAT64) / COUNT(*) * 100, 2), + 0) AS percent, + COUNT(*) AS total +FROM `@etl_project`.@etl_dataset.cdm_metadata ev +WHERE value_as_concept_id IS NOT NULL +; + diff --git a/test/metrics_gen/me_mapping_rate_from_conf.sql b/test/metrics_gen/me_mapping_rate_from_conf.sql index d8aab7f..eacb88f 100644 --- a/test/metrics_gen/me_mapping_rate_from_conf.sql +++ b/test/metrics_gen/me_mapping_rate_from_conf.sql @@ -18,7 +18,7 @@ SELECT 0) AS percent, COUNT(*) AS total FROM `@etl_project`.@etl_dataset.cdm_care_site ev -WHERE place_of_service_source_value IS NOT NULL +WHERE place_of_service_concept_id IS NOT NULL ; INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate @@ -31,7 +31,7 @@ SELECT 0) AS percent, COUNT(*) AS total FROM `@etl_project`.@etl_dataset.cdm_provider ev -WHERE specialty_source_value IS NOT NULL +WHERE specialty_concept_id IS NOT NULL ; INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate @@ -44,7 +44,7 @@ SELECT 0) AS percent, COUNT(*) AS total FROM `@etl_project`.@etl_dataset.cdm_provider ev -WHERE gender_source_value IS NOT NULL +WHERE gender_concept_id IS NOT NULL ; INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate @@ -57,7 +57,7 @@ SELECT 0) AS percent, COUNT(*) AS total FROM `@etl_project`.@etl_dataset.cdm_person ev -WHERE gender_source_value IS NOT NULL +WHERE gender_concept_id IS NOT NULL ; INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate @@ -70,7 +70,7 @@ SELECT 0) AS percent, COUNT(*) AS total FROM `@etl_project`.@etl_dataset.cdm_person ev -WHERE race_source_value IS NOT NULL +WHERE race_concept_id IS NOT NULL ; INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate @@ -83,7 +83,7 @@ SELECT 0) AS percent, COUNT(*) AS total FROM `@etl_project`.@etl_dataset.cdm_person ev -WHERE ethnicity_source_value IS NOT NULL +WHERE ethnicity_concept_id IS NOT NULL ; INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate @@ -109,7 +109,7 @@ SELECT 0) AS percent, COUNT(*) AS total FROM `@etl_project`.@etl_dataset.cdm_death ev -WHERE cause_source_value IS NOT NULL +WHERE cause_concept_id IS NOT NULL ; INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate @@ -135,7 +135,7 @@ SELECT 0) AS percent, COUNT(*) AS total FROM `@etl_project`.@etl_dataset.cdm_payer_plan_period ev -WHERE payer_source_value IS NOT NULL +WHERE payer_concept_id IS NOT NULL ; INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate @@ -148,7 +148,7 @@ SELECT 0) AS percent, COUNT(*) AS total FROM `@etl_project`.@etl_dataset.cdm_payer_plan_period ev -WHERE plan_source_value IS NOT NULL +WHERE plan_concept_id IS NOT NULL ; INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate @@ -161,7 +161,7 @@ SELECT 0) AS percent, COUNT(*) AS total FROM `@etl_project`.@etl_dataset.cdm_payer_plan_period ev -WHERE sponsor_source_value IS NOT NULL +WHERE sponsor_concept_id IS NOT NULL ; INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate @@ -174,7 +174,7 @@ SELECT 0) AS percent, COUNT(*) AS total FROM `@etl_project`.@etl_dataset.cdm_payer_plan_period ev -WHERE stop_reason_source_value IS NOT NULL +WHERE stop_reason_concept_id IS NOT NULL ; INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate @@ -226,7 +226,7 @@ SELECT 0) AS percent, COUNT(*) AS total FROM `@etl_project`.@etl_dataset.cdm_visit_occurrence ev -WHERE admitting_source_value IS NOT NULL +WHERE admitting_source_concept_id IS NOT NULL ; INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate @@ -239,7 +239,7 @@ SELECT 0) AS percent, COUNT(*) AS total FROM `@etl_project`.@etl_dataset.cdm_visit_occurrence ev -WHERE discharge_to_source_value IS NOT NULL +WHERE discharge_to_concept_id IS NOT NULL ; INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate @@ -291,7 +291,7 @@ SELECT 0) AS percent, COUNT(*) AS total FROM `@etl_project`.@etl_dataset.cdm_visit_detail ev -WHERE admitting_source_value IS NOT NULL +WHERE admitting_source_concept_id IS NOT NULL ; INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate @@ -304,7 +304,7 @@ SELECT 0) AS percent, COUNT(*) AS total FROM `@etl_project`.@etl_dataset.cdm_visit_detail ev -WHERE discharge_to_source_value IS NOT NULL +WHERE discharge_to_concept_id IS NOT NULL ; INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate @@ -317,7 +317,7 @@ SELECT 0) AS percent, COUNT(*) AS total FROM `@etl_project`.@etl_dataset.cdm_condition_occurrence ev -WHERE condition_source_value IS NOT NULL +WHERE condition_concept_id IS NOT NULL ; INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate @@ -343,7 +343,7 @@ SELECT 0) AS percent, COUNT(*) AS total FROM `@etl_project`.@etl_dataset.cdm_condition_occurrence ev -WHERE condition_status_source_value IS NOT NULL +WHERE condition_status_concept_id IS NOT NULL ; INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate @@ -356,7 +356,7 @@ SELECT 0) AS percent, COUNT(*) AS total FROM `@etl_project`.@etl_dataset.cdm_procedure_occurrence ev -WHERE procedure_source_value IS NOT NULL +WHERE procedure_concept_id IS NOT NULL ; INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate @@ -382,7 +382,7 @@ SELECT 0) AS percent, COUNT(*) AS total FROM `@etl_project`.@etl_dataset.cdm_procedure_occurrence ev -WHERE modifier_source_value IS NOT NULL +WHERE modifier_concept_id IS NOT NULL ; INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate @@ -395,7 +395,7 @@ SELECT 0) AS percent, COUNT(*) AS total FROM `@etl_project`.@etl_dataset.cdm_observation ev -WHERE observation_source_value IS NOT NULL +WHERE observation_concept_id IS NOT NULL ; INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate @@ -421,7 +421,7 @@ SELECT 0) AS percent, COUNT(*) AS total FROM `@etl_project`.@etl_dataset.cdm_observation ev -WHERE value_as_string IS NOT NULL +WHERE value_as_concept_id IS NOT NULL ; INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate @@ -434,7 +434,7 @@ SELECT 0) AS percent, COUNT(*) AS total FROM `@etl_project`.@etl_dataset.cdm_observation ev -WHERE qualifier_source_value IS NOT NULL +WHERE qualifier_concept_id IS NOT NULL ; INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate @@ -447,7 +447,7 @@ SELECT 0) AS percent, COUNT(*) AS total FROM `@etl_project`.@etl_dataset.cdm_observation ev -WHERE unit_source_value IS NOT NULL +WHERE unit_concept_id IS NOT NULL ; INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate @@ -460,7 +460,7 @@ SELECT 0) AS percent, COUNT(*) AS total FROM `@etl_project`.@etl_dataset.cdm_measurement ev -WHERE measurement_source_value IS NOT NULL +WHERE measurement_concept_id IS NOT NULL ; INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate @@ -499,7 +499,7 @@ SELECT 0) AS percent, COUNT(*) AS total FROM `@etl_project`.@etl_dataset.cdm_measurement ev -WHERE value_source_value IS NOT NULL +WHERE value_as_concept_id IS NOT NULL ; INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate @@ -512,7 +512,7 @@ SELECT 0) AS percent, COUNT(*) AS total FROM `@etl_project`.@etl_dataset.cdm_measurement ev -WHERE unit_source_value IS NOT NULL +WHERE unit_concept_id IS NOT NULL ; INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate @@ -525,7 +525,7 @@ SELECT 0) AS percent, COUNT(*) AS total FROM `@etl_project`.@etl_dataset.cdm_device_exposure ev -WHERE device_source_value IS NOT NULL +WHERE device_concept_id IS NOT NULL ; INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate @@ -551,7 +551,7 @@ SELECT 0) AS percent, COUNT(*) AS total FROM `@etl_project`.@etl_dataset.cdm_drug_exposure ev -WHERE drug_source_value IS NOT NULL +WHERE drug_concept_id IS NOT NULL ; INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate @@ -577,7 +577,7 @@ SELECT 0) AS percent, COUNT(*) AS total FROM `@etl_project`.@etl_dataset.cdm_drug_exposure ev -WHERE route_source_value IS NOT NULL +WHERE route_concept_id IS NOT NULL ; INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate @@ -616,7 +616,7 @@ SELECT 0) AS percent, COUNT(*) AS total FROM `@etl_project`.@etl_dataset.cdm_cost ev -WHERE revenue_code_source_value IS NOT NULL +WHERE revenue_code_concept_id IS NOT NULL ; INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate @@ -629,7 +629,7 @@ SELECT 0) AS percent, COUNT(*) AS total FROM `@etl_project`.@etl_dataset.cdm_cost ev -WHERE drg_source_value IS NOT NULL +WHERE drg_concept_id IS NOT NULL ; INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate @@ -694,7 +694,7 @@ SELECT 0) AS percent, COUNT(*) AS total FROM `@etl_project`.@etl_dataset.cdm_specimen ev -WHERE specimen_source_value IS NOT NULL +WHERE specimen_concept_id IS NOT NULL ; INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate @@ -720,7 +720,7 @@ SELECT 0) AS percent, COUNT(*) AS total FROM `@etl_project`.@etl_dataset.cdm_specimen ev -WHERE unit_source_value IS NOT NULL +WHERE unit_concept_id IS NOT NULL ; INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate @@ -733,7 +733,7 @@ SELECT 0) AS percent, COUNT(*) AS total FROM `@etl_project`.@etl_dataset.cdm_specimen ev -WHERE anatomic_site_source_value IS NOT NULL +WHERE anatomic_site_concept_id IS NOT NULL ; INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate @@ -746,7 +746,7 @@ SELECT 0) AS percent, COUNT(*) AS total FROM `@etl_project`.@etl_dataset.cdm_specimen ev -WHERE disease_status_source_value IS NOT NULL +WHERE disease_status_concept_id IS NOT NULL ; INSERT INTO `@metrics_project`.@metrics_dataset.me_mapping_rate diff --git a/test/ut/README.md b/test/ut/README.md index 973869b..af73bc6 100644 --- a/test/ut/README.md +++ b/test/ut/README.md @@ -33,6 +33,13 @@ ### Change Log (latest first) ### + +**2021-01-26** + +* generate_required_test() function is added +* inactivation flag for fields is added (see .conf) + + **2021-01-19** * This README is created diff --git a/test/ut/gen_bq_ut_basic.py b/test/ut/gen_bq_ut_basic.py index 49688e3..2a363fb 100644 --- a/test/ut/gen_bq_ut_basic.py +++ b/test/ut/gen_bq_ut_basic.py @@ -262,6 +262,54 @@ def generate_fk_test(table_config, config): return result_queries +''' +---------------------------------------------------- + generate_required_test() + returns test query for the given table +---------------------------------------------------- +''' +def generate_required_test(table_config, config): + + q_template = "\n" + \ + "-- -------------------------------------------------------------------\n" + \ + "-- required\n" + \ + "-- -------------------------------------------------------------------\n" + \ + "\n" + \ + "INSERT INTO `{metrics_project}`.{metrics_dataset}.report_unit_test\n" + \ + "SELECT\n" + \ + " CAST(NULL AS STRING) AS report_id,\n" + \ + " FORMAT_DATETIME('%Y-%m-%d %X', CURRENT_DATETIME()) AS report_starttime, -- X = HH:MM:SS\n" + \ + " 'cdm_{table_name}' AS table_id,\n" + \ + " 'required' AS test_type, -- unique, not null, concept etc.\n" + \ + " '{source_field}' AS field_name,\n" + \ + " CAST(NULL AS STRING) AS criteria_json,\n" + \ + " (COUNT(*) - COUNT({source_field}) = 0) AS test_passed\n" + \ + "FROM\n" + \ + " `{etl_project}`.{etl_dataset}.cdm_{table_name}\n" + \ + ";\n" + + + result_queries = "" + test_list = table_config.get('required') + if test_list != None: + for tst in test_list: + print(tst) + + if tst.get('inactive_status') == None: + + result_queries = result_queries + "\n" + \ + q_template.format( + metrics_project = config['metrics_project'], + metrics_dataset = config['metrics_dataset'], + etl_project = config['etl_project'], + etl_dataset = config['etl_dataset'], + table_name = table_config['table'].replace(config['cdm_prefix'], ''), + source_field = tst['source_field'] + ) + + return result_queries + + ''' ---------------------------------------------------- @@ -278,6 +326,7 @@ def generate_queries(header, config): s_queries.append(q_header.format(header=t['table'])) s_queries.append(generate_unique_test(t, config)) s_queries.append(generate_fk_test(t, config)) + s_queries.append(generate_required_test(t, config)) return s_queries diff --git a/vocabulary_refresh/z_refresh_results.sql b/vocabulary_refresh/z_refresh_results.sql index 34877f0..6cccd4d 100644 --- a/vocabulary_refresh/z_refresh_results.sql +++ b/vocabulary_refresh/z_refresh_results.sql @@ -140,6 +140,18 @@ ORDER BY c.vocabulary_id | 7 | wrong valid_start_date, valid_end_date or invalid_reason for the concept | 4271 | +----------+--------------------------------------------------------------------------+-----------+ +-- ----------------------------------------------------------------------------------- +-- 2021-01-26 +-- Remove duplicated measurement mapping +-- ----------------------------------------------------------------------------------- + ++----------+--------------------------------------------------------------------------+-----------+ +| check_id | check_name | row_count | ++----------+--------------------------------------------------------------------------+-----------+ +| 7 | wrong valid_start_date, valid_end_date or invalid_reason for the concept | 4271 | ++----------+--------------------------------------------------------------------------+-----------+ + + -- -----------------------------------------------------------------------------------