Skip to content

Commit

Permalink
fix: Use HES archive tables
Browse files Browse the repository at this point in the history
See discussion in opensafely-core/ehrql#2047 for details

Note that archive tables contain data up to March 2024.
  • Loading branch information
inglesp committed Aug 22, 2024
1 parent 51c9e7e commit f6541be
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 54 deletions.
92 changes: 47 additions & 45 deletions cohortextractor/tpp_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -2467,7 +2467,7 @@ def patients_attended_emergency_care(
assert False

date_condition, date_joins = self.get_date_condition(
"EC", "Arrival_Date", between
"EC_ARCHIVED", "Arrival_Date", between
)
conditions = [date_condition]

Expand Down Expand Up @@ -2499,14 +2499,14 @@ def patients_attended_emergency_care(
t.Patient_ID AS patient_id,
{column} AS {returning}
FROM (
SELECT EC.Patient_ID, {column},
SELECT EC_ARCHIVED.Patient_ID, {column},
ROW_NUMBER() OVER (
PARTITION BY EC.Patient_ID
ORDER BY Arrival_Date {ordering}, EC.EC_Ident
PARTITION BY EC_ARCHIVED.Patient_ID
ORDER BY Arrival_Date {ordering}, EC_ARCHIVED.EC_Ident
) AS rownum
FROM EC
INNER JOIN EC_Diagnosis
ON EC.EC_Ident = EC_Diagnosis.EC_Ident
FROM EC_ARCHIVED
INNER JOIN EC_Diagnosis_ARCHIVED
ON EC_ARCHIVED.EC_Ident = EC_Diagnosis_ARCHIVED.EC_Ident
{date_joins}
WHERE {conditions}
) t
Expand All @@ -2515,14 +2515,14 @@ def patients_attended_emergency_care(
else:
sql = f"""
SELECT
EC.Patient_ID AS patient_id,
EC_ARCHIVED.Patient_ID AS patient_id,
{column} AS {returning}
FROM EC
INNER JOIN EC_Diagnosis
ON EC.EC_Ident = EC_Diagnosis.EC_Ident
FROM EC_ARCHIVED
INNER JOIN EC_Diagnosis_ARCHIVED
ON EC_ARCHIVED.EC_Ident = EC_Diagnosis_ARCHIVED.EC_Ident
{date_joins}
WHERE {conditions}
GROUP BY EC.Patient_ID
GROUP BY EC_ARCHIVED.Patient_ID
"""
return sql

Expand All @@ -2544,7 +2544,7 @@ def patients_admitted_to_hospital(
"discharge_destination": "Discharge_Destination",
"patient_classification": "Patient_Classification",
"admission_treatment_function_code": "Der_Admit_Treatment_Function_Code",
"days_in_critical_care": "APCS_Der.Spell_PbR_CC_Day",
"days_in_critical_care": "APCS_Der_ARCHIVED.Spell_PbR_CC_Day",
"administrative_category": "Administrative_Category",
"duration_of_elective_wait": "Duration_of_Elective_Wait",
}
Expand Down Expand Up @@ -2605,7 +2605,9 @@ def patients_admitted_to_hospital(
elif returning == "total_critical_care_days_in_period":
# In case of duplicate spells that start on the same date, we take the
# max value by admission date
returning_column = "MAX(CAST(APCS_Der.Spell_PbR_CC_Day AS INTEGER))"
returning_column = (
"MAX(CAST(APCS_Der_ARCHIVED.Spell_PbR_CC_Day AS INTEGER))"
)
use_sum_query = True
sum_adjustment = ""
use_partition_query = False
Expand All @@ -2616,7 +2618,7 @@ def patients_admitted_to_hospital(
raise ValueError(f"Unsupported `returning` value: {returning}")

date_condition, date_joins = self.get_date_condition(
"APCS", "Admission_Date", between
"APCS_ARCHIVED", "Admission_Date", between
)
conditions = [date_condition]

Expand All @@ -2625,12 +2627,12 @@ def patients_admitted_to_hospital(
conditions.append(f"{supported_columns[column_name]} IN ({value_sql})")

if with_at_least_one_day_in_critical_care:
conditions.append("CAST(APCS_Der.Spell_PbR_CC_Day AS int) > 0")
conditions.append("CAST(APCS_Der_ARCHIVED.Spell_PbR_CC_Day AS int) > 0")

if with_these_primary_diagnoses:
assert with_these_primary_diagnoses.system == "icd10"
fragments = [
f"APCS_Der.Spell_Primary_Diagnosis LIKE {pattern} ESCAPE '!'"
f"APCS_Der_ARCHIVED.Spell_Primary_Diagnosis LIKE {pattern} ESCAPE '!'"
for pattern in codelist_to_like_patterns(
with_these_primary_diagnoses, prefix="", suffix="%"
)
Expand Down Expand Up @@ -2673,14 +2675,14 @@ def patients_admitted_to_hospital(
t.Patient_ID AS patient_id,
t.{returning} AS {returning}
FROM (
SELECT APCS.Patient_ID, {returning_column} AS {returning},
SELECT APCS_ARCHIVED.Patient_ID, {returning_column} AS {returning},
ROW_NUMBER() OVER (
PARTITION BY APCS.Patient_ID
ORDER BY APCS.Admission_Date {ordering}, APCS.APCS_Ident
PARTITION BY APCS_ARCHIVED.Patient_ID
ORDER BY APCS_ARCHIVED.Admission_Date {ordering}, APCS_ARCHIVED.APCS_Ident
) AS rownum
FROM APCS
INNER JOIN APCS_Der
ON APCS.APCS_Ident = APCS_Der.APCS_Ident
FROM APCS_ARCHIVED
INNER JOIN APCS_Der_ARCHIVED
ON APCS_ARCHIVED.APCS_Ident = APCS_Der_ARCHIVED.APCS_Ident
{date_joins}
WHERE {conditions}
) t
Expand All @@ -2691,28 +2693,28 @@ def patients_admitted_to_hospital(
SELECT patient_id, SUM({returning}{sum_adjustment}) AS {returning}
FROM (
SELECT
APCS.Patient_ID AS patient_id,
APCS_ARCHIVED.Patient_ID AS patient_id,
{returning_column} AS {returning}
FROM APCS
INNER JOIN APCS_Der
ON APCS.APCS_Ident = APCS_Der.APCS_Ident
FROM APCS_ARCHIVED
INNER JOIN APCS_Der_ARCHIVED
ON APCS_ARCHIVED.APCS_Ident = APCS_Der_ARCHIVED.APCS_Ident
{date_joins}
WHERE {conditions}
GROUP BY APCS.Patient_ID, APCS.Admission_Date
GROUP BY APCS_ARCHIVED.Patient_ID, APCS_ARCHIVED.Admission_Date
) t
GROUP BY patient_id
"""
else:
sql = f"""
SELECT
APCS.Patient_ID AS patient_id,
APCS_ARCHIVED.Patient_ID AS patient_id,
{returning_column} AS {returning}
FROM APCS
INNER JOIN APCS_Der
ON APCS.APCS_Ident = APCS_Der.APCS_Ident
FROM APCS_ARCHIVED
INNER JOIN APCS_Der_ARCHIVED
ON APCS_ARCHIVED.APCS_Ident = APCS_Der_ARCHIVED.APCS_Ident
{date_joins}
WHERE {conditions}
GROUP BY APCS.Patient_ID
GROUP BY APCS_ARCHIVED.Patient_ID
"""
return sql

Expand Down Expand Up @@ -2872,19 +2874,19 @@ def patients_with_ethnicity_from_sus(
Patient_ID,
Ethnic_group AS ethnicity_code
FROM
APCS
APCS_ARCHIVED
UNION ALL
SELECT
Patient_ID,
Ethnic_Category AS ethnicity_code
FROM
EC
EC_ARCHIVED
UNION ALL
SELECT
Patient_ID,
Ethnic_Category AS ethnicity_code
FROM
OPA
OPA_ARCHIVED
) t
WHERE ethnicity_code IS NOT NULL
AND ethnicity_code != '99'
Expand Down Expand Up @@ -2920,7 +2922,7 @@ def patients_outpatient_appointment_date(
returning="binary_flag",
):
date_condition, date_joins = self.get_date_condition(
"OPA", "Appointment_Date", between
"OPA_ARCHIVED", "Appointment_Date", between
)

conditions = [date_condition]
Expand Down Expand Up @@ -2955,13 +2957,13 @@ def patients_outpatient_appointment_date(
if with_these_procedures:
assert with_these_procedures.system == "opcs4"
fragments = [
f"OPA_Proc.Primary_Procedure_Code LIKE {pattern} ESCAPE '!'"
f"OPA_Proc_ARCHIVED.Primary_Procedure_Code LIKE {pattern} ESCAPE '!'"
for pattern in codelist_to_like_patterns(
with_these_procedures, prefix="%", suffix="%"
)
]
conditions.append("(" + " OR ".join(fragments) + ")")
procedures_joins = "JOIN OPA_Proc ON OPA.OPA_Ident = OPA_Proc.OPA_Ident"
procedures_joins = "JOIN OPA_Proc_ARCHIVED ON OPA_ARCHIVED.OPA_Ident = OPA_Proc_ARCHIVED.OPA_Ident"

conditions = " AND ".join(conditions)

Expand Down Expand Up @@ -2994,13 +2996,13 @@ def patients_outpatient_appointment_date(
t.{column_definition} AS {returning}
FROM (
SELECT
OPA.Patient_ID,
OPA_ARCHIVED.Patient_ID,
{column_definition} AS {returning},
ROW_NUMBER() OVER (
PARTITION BY OPA.Patient_ID
PARTITION BY OPA_ARCHIVED.Patient_ID
ORDER BY Appointment_Date {ordering}
) AS rownum
FROM OPA
FROM OPA_ARCHIVED
{date_joins}
{procedures_joins}
WHERE {conditions}
Expand All @@ -3010,15 +3012,15 @@ def patients_outpatient_appointment_date(
else:
return f"""
SELECT
OPA.Patient_ID AS patient_id,
OPA_ARCHIVED.Patient_ID AS patient_id,
{column_definition} AS {returning}
FROM
OPA
OPA_ARCHIVED
{date_joins}
{procedures_joins}
WHERE {conditions}
GROUP BY
OPA.Patient_ID
OPA_ARCHIVED.Patient_ID
"""

@staticmethod
Expand Down
20 changes: 11 additions & 9 deletions tests/tpp_backend_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -699,7 +699,7 @@ class HouseholdMember(Base):


class EC(Base):
__tablename__ = "EC"
__tablename__ = "EC_ARCHIVED"
Patient_ID = Column(Integer, ForeignKey("Patient.Patient_ID"))
Patient = relationship(
"Patient", back_populates="ECEpisodes", cascade="all, delete"
Expand All @@ -714,7 +714,7 @@ class EC(Base):


class EC_Diagnosis(Base):
__tablename__ = "EC_Diagnosis"
__tablename__ = "EC_Diagnosis_ARCHIVED"

# This column isn't in the actual database but SQLAlchemy gets a bit upset
# if we don't give it a primary key
Expand All @@ -724,7 +724,7 @@ class EC_Diagnosis(Base):
Patient = relationship(
"Patient", back_populates="ECDiagnoses", cascade="all, delete"
)
EC_Ident = Column(Integer, ForeignKey("EC.EC_Ident"))
EC_Ident = Column(Integer, ForeignKey("EC_ARCHIVED.EC_Ident"))
EC = relationship("EC", back_populates="Diagnoses", cascade="all, delete")
EC_Diagnosis_01 = Column(String(collation="Latin1_General_CI_AS"))
EC_Diagnosis_02 = Column(String(collation="Latin1_General_CI_AS"))
Expand Down Expand Up @@ -753,7 +753,7 @@ class EC_Diagnosis(Base):


class APCS(Base):
__tablename__ = "APCS"
__tablename__ = "APCS_ARCHIVED"
Patient_ID = Column(Integer, ForeignKey("Patient.Patient_ID"))
Patient = relationship(
"Patient", back_populates="APCSEpisodes", cascade="all, delete"
Expand All @@ -776,12 +776,14 @@ class APCS(Base):


class APCS_Der(Base):
__tablename__ = "APCS_Der"
__tablename__ = "APCS_Der_ARCHIVED"
Patient_ID = Column(Integer, ForeignKey("Patient.Patient_ID"))
Patient = relationship(
"Patient", back_populates="APCS_DerEpisodes", cascade="all, delete"
)
APCS_Ident = Column(Integer, ForeignKey("APCS.APCS_Ident"), primary_key=True)
APCS_Ident = Column(
Integer, ForeignKey("APCS_ARCHIVED.APCS_Ident"), primary_key=True
)
APCS = relationship("APCS", back_populates="APCS_Der")
Spell_Primary_Diagnosis = Column(String)
Spell_Secondary_Diagnosis = Column(String)
Expand Down Expand Up @@ -829,7 +831,7 @@ class HighCostDrugs(Base):


class OPA(Base):
__tablename__ = "OPA"
__tablename__ = "OPA_ARCHIVED"
Patient_ID = Column(Integer, ForeignKey("Patient.Patient_ID"))
Patient = relationship(
"Patient", back_populates="OPAEpisodes", cascade="all, delete"
Expand All @@ -848,11 +850,11 @@ class OPA(Base):


class OPA_Proc(Base):
__tablename__ = "OPA_Proc"
__tablename__ = "OPA_Proc_ARCHIVED"

Patient_ID = Column(Integer, ForeignKey("Patient.Patient_ID"))
Patient = relationship("Patient", back_populates="OPA_Proc")
OPA_Ident = Column(Integer, ForeignKey("OPA.OPA_Ident"), primary_key=True)
OPA_Ident = Column(Integer, ForeignKey("OPA_ARCHIVED.OPA_Ident"), primary_key=True)
OPA = relationship(
"OPA",
back_populates="OPA_Proc",
Expand Down

0 comments on commit f6541be

Please sign in to comment.