Skip to content

Commit

Permalink
Merge pull request #999 from opensafely-core/handle-duplicate-ons-deaths
Browse files Browse the repository at this point in the history
feat: Handle duplicate records in ons_deaths in EMIS
  • Loading branch information
inglesp authored Nov 20, 2023
2 parents fcdfdd3 + 179b9aa commit 18c9544
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 10 deletions.
12 changes: 8 additions & 4 deletions cohortextractor/emis_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -1530,15 +1530,18 @@ def patients_with_these_codes_on_death_certificate(
)
else:
code_conditions = "1 = 1"
# The ONS deaths data contains some duplicate patient IDs. In most
# cases these are exact duplicate rows, but in same cases the same
# patient appears twice with different dates and death or a different
# underlying cause of death. We handle this by (arbitrarily) taking the
# earliest date of death or the lexically smallest ICD-10 code.
if returning == "binary_flag":
column_definition = "1"
elif returning == "date_of_death":
# Yes, we're converting an integer to a string to a timestamp to a date.
column_definition = (
"CAST(date_parse(CAST(o.reg_stat_dod AS VARCHAR), '%Y%m%d') AS date)"
)
column_definition = "MIN(CAST(date_parse(CAST(o.reg_stat_dod AS VARCHAR), '%Y%m%d') AS date))"
elif returning == "underlying_cause_of_death":
column_definition = "o.icd10u"
column_definition = "MIN(o.icd10u)"
else:
raise ValueError(f"Unsupported `returning` value: {returning}")
# ONS_TABLE is updated with each release of data from ONS, so we need to
Expand All @@ -1554,6 +1557,7 @@ def patients_with_these_codes_on_death_certificate(
WHERE ({code_conditions})
AND {date_condition}
AND o.upload_date = (SELECT MAX(upload_date) FROM {ONS_TABLE})
GROUP BY p.registration_id, p.hashed_organisation
"""

def patients_died_from_any_cause(
Expand Down
50 changes: 44 additions & 6 deletions tests/test_emis_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -1566,29 +1566,55 @@ def test_patients_with_these_codes_on_death_certificate():
nhs_no="bbb",
ONSDeath=[
ONSDeaths(
reg_stat_dod=20210101, icd10u=code, upload_date="2020-04-01"
)
reg_stat_dod=20210101,
icd10u=code,
upload_date="2020-03-01",
),
ONSDeaths(
reg_stat_dod=20210101,
icd10u=code,
upload_date="2020-04-01",
),
],
),
# Died of something else
Patient(
nhs_no="ccc",
ONSDeath=[
ONSDeaths(
reg_stat_dod=20200201, icd10u="MI", upload_date="2020-04-01"
)
reg_stat_dod=20200201,
icd10u="MI",
upload_date="2020-03-01",
),
ONSDeaths(
reg_stat_dod=20200201,
icd10u="MI",
upload_date="2020-04-01",
),
],
),
# Covid underlying cause
Patient(
nhs_no="ddd",
ONSDeath=[
ONSDeaths(
reg_stat_dod=20200201,
icd10u=code,
icd10014="MI",
upload_date="2020-03-01",
),
ONSDeaths(
reg_stat_dod=20200201,
icd10u=code,
icd10014="MI",
upload_date="2020-04-01",
)
),
ONSDeaths(
reg_stat_dod=20200202,
icd10u=code,
icd10014="MJ",
upload_date="2020-04-01",
),
],
),
# Covid not underlying cause
Expand All @@ -1600,7 +1626,19 @@ def test_patients_with_these_codes_on_death_certificate():
icd10u="MI",
icd10014=code,
upload_date="2020-04-01",
)
),
ONSDeaths(
reg_stat_dod=20200302,
icd10u="MJ",
icd10014=code,
upload_date="2020-04-01",
),
ONSDeaths(
reg_stat_dod=20200301,
icd10u="MI",
icd10014=code,
upload_date="2020-04-01",
),
],
),
]
Expand Down

0 comments on commit 18c9544

Please sign in to comment.