Skip to content

Commit

Permalink
Use imaging UID from OMOP-ES extract (#469)
Browse files Browse the repository at this point in the history
* Add study uid to columns declaration

* Add study UID as part of the message info

* Query VNA with UID, fall back to MRN + accession number if not found

* Add study UID to test data

* Deal with ruff and mypy warnings

* Test that VNA querying falls back to MRN + acc number if UID not found

* Refactor `anonymise_dicom`; move database updating out of `_anonymise_dicom_from_scheme()`

Avoids requiring `study_info` arg in `_anonymise_dicom_from_scheme()`
and limiting its responsibilities

* Query PIXL DB with study UID

* Be more explicit about where study info values are coming from

Avoids magic values that might cause confusion about where they're
coming from.

* Rename `row_for_dicom_testing` and be more explicit where its values come from

* Update tests with new fixtures

* Add study UID to PIXL DB Image model

* Add study UIDs in `pixl_dcmd` test fixtures

* Fix tests

* Run alembic migration

* Also filter out entries where accession number is empty string

* Add study UID column to test files

* Also add study UID when uploading images to PIXL DB

* Add study UIDs in cli test data

* Refactor core tests: add `mock_message` fixture and add study UID

* Need `study_uid` here as well

* This test file also needs a study UID column

* Fall back to MRN + accession number when querying PIXL DB

* Fix the study UID for the system test studies to match what is in the extracts

* Add test for `get_unexported_image` fallback

* Fix fallback with try-except

`sqlalchemy` actually does raise an error when `.one()` doesn't return
anything.

* Test querying an existing pseudo study UID

* Fix docstrings

Co-authored-by: Stef Piatek <[email protected]>

* Make `study_uid` column nullable

Co-authored-by: Stef Piatek <[email protected]>

* Make `study_uid` optional in PIXL DB

---------

Co-authored-by: Stef Piatek <[email protected]>
  • Loading branch information
milanmlft and stefpiatek authored Aug 6, 2024
1 parent 6eb8750 commit bd82c65
Show file tree
Hide file tree
Showing 27 changed files with 386 additions and 155 deletions.
1 change: 1 addition & 0 deletions cli/src/pixl_cli/_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ def _add_images_to_session(extract: Extract, images_df: pd.DataFrame, session: S
accession_number=row["accession_number"],
study_date=row["study_date"],
mrn=row["mrn"],
study_uid=row["study_uid"],
extract=extract,
extract_id=extract.extract_id,
)
Expand Down
9 changes: 8 additions & 1 deletion cli/src/pixl_cli/_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,12 @@ def _check_and_parse_parquet(private_dir: Path, public_dir: Path) -> pd.DataFram
# joining data together
people_procedures = people.merge(procedure, on="person_id")
people_procedures_accessions = people_procedures.merge(accessions, on="procedure_occurrence_id")
return people_procedures_accessions[~people_procedures_accessions["AccessionNumber"].isna()]

# Filter out any rows where accession number is NA or an empty string
return people_procedures_accessions[
~people_procedures_accessions["AccessionNumber"].isna()
& (people_procedures_accessions["AccessionNumber"] != "")
]


class DF_COLUMNS(StrEnum): # noqa: N801
Expand All @@ -149,6 +154,7 @@ class DF_COLUMNS(StrEnum): # noqa: N801
project_name = auto()
extract_generated_timestamp = auto()
study_date = auto()
study_uid = auto()


MAP_CSV_TO_MESSAGE_KEYS = {
Expand All @@ -160,6 +166,7 @@ class DF_COLUMNS(StrEnum): # noqa: N801
"PrimaryMrn": "mrn",
"AccessionNumber": "accession_number",
"procedure_date": "study_date",
"StudyUid_X": "study_uid",
}


Expand Down
1 change: 1 addition & 0 deletions cli/src/pixl_cli/_message_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ def messages_from_df(
message = Message(
mrn=row["mrn"],
accession_number=row["accession_number"],
study_uid=row["study_uid"],
study_date=row["study_date"],
procedure_occurrence_id=row["procedure_occurrence_id"],
project_name=row["project_name"],
Expand Down
19 changes: 14 additions & 5 deletions cli/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ def db_engine(monkeymodule) -> Engine:


@pytest.fixture()
def db_session(db_engine) -> Session:
def db_session(db_engine) -> Generator[Session]:
"""
Creates a session for interacting with an in memory database.
Expand All @@ -127,11 +127,12 @@ def db_session(db_engine) -> Session:
STUDY_DATE = datetime.date.fromisoformat("2023-01-01")


def _make_message(project_name: str, accession_number: str, mrn: str) -> Message:
def _make_message(project_name: str, accession_number: str, mrn: str, study_uid: str) -> Message:
return Message(
project_name=project_name,
accession_number=accession_number,
mrn=mrn,
study_uid=study_uid,
study_date=STUDY_DATE,
procedure_occurrence_id=1,
extract_generated_timestamp=datetime.datetime.now(tz=datetime.UTC),
Expand All @@ -142,9 +143,15 @@ def _make_message(project_name: str, accession_number: str, mrn: str) -> Message
def example_messages():
"""Test input data."""
return [
_make_message(project_name="i-am-a-project", accession_number="123", mrn="mrn"),
_make_message(project_name="i-am-a-project", accession_number="234", mrn="mrn"),
_make_message(project_name="i-am-a-project", accession_number="345", mrn="mrn"),
_make_message(
project_name="i-am-a-project", accession_number="123", mrn="mrn", study_uid="1.2.3"
),
_make_message(
project_name="i-am-a-project", accession_number="234", mrn="mrn", study_uid="2.3.4"
),
_make_message(
project_name="i-am-a-project", accession_number="345", mrn="mrn", study_uid="3.4.5"
),
]


Expand All @@ -163,6 +170,7 @@ def rows_in_session(db_session) -> Session:
accession_number="123",
study_date=STUDY_DATE,
mrn="mrn",
study_uid="1.2.3",
extract=extract,
extract_id=extract.extract_id,
exported_at=datetime.datetime.now(tz=datetime.UTC),
Expand All @@ -171,6 +179,7 @@ def rows_in_session(db_session) -> Session:
accession_number="234",
study_date=STUDY_DATE,
mrn="mrn",
study_uid="2.3.4",
extract=extract,
extract_id=extract.extract_id,
)
Expand Down
3 changes: 3 additions & 0 deletions cli/tests/test_messages_from_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ def test_messages_from_csv(omop_resources: Path) -> None:
procedure_occurrence_id=0,
mrn="patient_identifier",
accession_number="123456789",
study_uid="1.2.3.4.5.6.7.8",
project_name="ms-pinpoint-test",
extract_generated_timestamp=datetime.datetime.fromisoformat("2023-01-01T00:01:00Z"),
study_date=datetime.date.fromisoformat("2022-01-01"),
Expand All @@ -72,6 +73,7 @@ def test_messages_from_parquet(omop_resources: Path) -> None:
Message(
mrn="987654321",
accession_number="AA12345601",
study_uid="1.3.6.1.4.1.14519.5.2.1.99.1071.12985477682660597455732044031486",
study_date=datetime.date.fromisoformat("2020-05-23"),
procedure_occurrence_id=4,
project_name="test-extract-uclh-omop-cdm",
Expand All @@ -80,6 +82,7 @@ def test_messages_from_parquet(omop_resources: Path) -> None:
Message(
mrn="987654321",
accession_number="AA12345605",
study_uid="1.2.276.0.7230010.3.1.2.929116473.1.1710754859.579485",
study_date=datetime.date.fromisoformat("2020-05-23"),
procedure_occurrence_id=5,
project_name="test-extract-uclh-omop-cdm",
Expand Down
3 changes: 2 additions & 1 deletion pixl_core/src/core/db/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ class Image(Base):
accession_number: Mapped[str]
study_date: Mapped[Date] = mapped_column(Date())
mrn: Mapped[str]
study_uid: Mapped[Optional[str]]
pseudo_study_uid: Mapped[Optional[str]]
exported_at: Mapped[DateTime] = mapped_column(DateTime(timezone=True), nullable=True)
extract: Mapped[Extract] = relationship()
Expand All @@ -61,6 +62,6 @@ def __repr__(self) -> str:
"""Nice representation for printing."""
return (
f"<{self.__class__.__name__} "
f"{self.image_id=} {self.accession_number=} {self.mrn=} "
f"{self.image_id=} {self.accession_number=} {self.mrn=} {self.study_uid=}"
f"{self.pseudo_study_uid} {self.extract_id}>"
).replace(" self.", " ")
3 changes: 2 additions & 1 deletion pixl_core/src/core/patient_queue/message.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,11 @@

@dataclass
class Message:
"""Class to represent a message containing the relevant information for a study."""
"""Representation of a RabbitMQ message containing the information to identify a DICOM study."""

mrn: str
accession_number: str
study_uid: str
study_date: date
procedure_occurrence_id: int
project_name: str
Expand Down
19 changes: 19 additions & 0 deletions pixl_core/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import pytest
import requests
from core.db.models import Base, Extract, Image
from core.patient_queue.message import Message
from pydicom.uid import generate_uid
from pytest_pixl.helpers import run_subprocess
from sqlalchemy import Engine, create_engine
Expand Down Expand Up @@ -153,6 +154,7 @@ def rows_in_session(db_session) -> Session:
accession_number="123",
study_date=STUDY_DATE,
mrn="mrn",
study_uid="1.2.3",
extract=extract,
exported_at=datetime.datetime.now(tz=datetime.timezone.utc),
pseudo_study_uid=generate_uid(entropy_srcs=["already_exported"]),
Expand All @@ -161,6 +163,7 @@ def rows_in_session(db_session) -> Session:
accession_number="234",
study_date=STUDY_DATE,
mrn="mrn",
study_uid="2.3.4",
extract=extract,
pseudo_study_uid=generate_uid(entropy_srcs=["not_yet_exported"]),
)
Expand Down Expand Up @@ -197,3 +200,19 @@ def export_dir(tmp_path_factory: pytest.TempPathFactory) -> pathlib.Path:
export_dir = tmp_path_factory.mktemp("export_base") / "exports"
export_dir.mkdir()
return export_dir


@pytest.fixture()
def mock_message() -> Message:
"""An example Message used for testing"""
return Message(
mrn="111",
accession_number="123",
study_uid="1.2.3",
study_date=datetime.date.fromisoformat("2022-11-22"),
procedure_occurrence_id="234",
project_name="test project",
extract_generated_timestamp=datetime.datetime.strptime(
"Dec 7 2023 2:08PM", "%b %d %Y %I:%M%p"
).replace(tzinfo=datetime.timezone.utc),
)
27 changes: 7 additions & 20 deletions pixl_core/tests/patient_queue/test_message.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,35 +13,22 @@
# limitations under the License.
from __future__ import annotations

import datetime
from core.patient_queue.message import deserialise

from core.patient_queue.message import Message, deserialise

msg = Message(
mrn="111",
accession_number="123",
study_date=datetime.date.fromisoformat("2022-11-22"),
procedure_occurrence_id="234",
project_name="test project",
extract_generated_timestamp=datetime.datetime.strptime(
"Dec 7 2023 2:08PM", "%b %d %Y %I:%M%p"
).replace(tzinfo=datetime.timezone.utc),
)


def test_serialise() -> None:
def test_serialise(mock_message) -> None:
"""Checks that messages can be correctly serialised"""
msg_body = msg.serialise(deserialisable=False)
msg_body = mock_message.serialise(deserialisable=False)
assert (
msg_body == b'{"mrn": "111", "accession_number": "123", '
msg_body == b'{"mrn": "111", "accession_number": "123", "study_uid": "1.2.3", '
b'"study_date": "2022-11-22", '
b'"procedure_occurrence_id": "234", '
b'"project_name": "test project", '
b'"extract_generated_timestamp": "2023-12-07T14:08:00+00:00"}'
)


def test_deserialise() -> None:
def test_deserialise(mock_message) -> None:
"""Checks if deserialised messages are the same as the original"""
serialised_msg = msg.serialise()
assert deserialise(serialised_msg) == msg
serialised_msg = mock_message.serialise()
assert deserialise(serialised_msg) == mock_message
13 changes: 2 additions & 11 deletions pixl_core/tests/patient_queue/test_producer.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,18 +14,9 @@
from __future__ import annotations

import pytest
from core.patient_queue.message import Message
from core.patient_queue.producer import PixlProducer

TEST_QUEUE = "test_publish"
TEST_MESSAGE = Message(
mrn="111",
accession_number="123",
study_date="2022-11-22T13:33:00+00:00",
procedure_occurrence_id="234",
project_name="test project",
extract_generated_timestamp="2023-12-07T14:08:00+00:00",
)


@pytest.mark.usefixtures("run_containers")
Expand All @@ -36,14 +27,14 @@ def test_create_pixl_producer() -> None:


@pytest.mark.usefixtures("run_containers")
def test_publish() -> None:
def test_publish(mock_message) -> None:
"""
Checks that after publishing, there is one message in the queue.
Will only work if nothing has been added to queue before.
"""
with PixlProducer(queue_name=TEST_QUEUE) as pp:
pp.clear_queue()
pp.publish(messages=[TEST_MESSAGE])
pp.publish(messages=[mock_message])

with PixlProducer(queue_name=TEST_QUEUE) as pp:
assert pp.message_count == 1
13 changes: 2 additions & 11 deletions pixl_core/tests/patient_queue/test_subscriber.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,20 +17,11 @@
from unittest.mock import AsyncMock

import pytest
from core.patient_queue.message import Message
from core.patient_queue.producer import PixlProducer
from core.patient_queue.subscriber import PixlConsumer
from core.token_buffer.tokens import TokenBucket

TEST_QUEUE = "test_consume"
TEST_MESSAGE = Message(
mrn="111",
accession_number="123",
study_date="2022-11-22T13:33:00+00:00",
procedure_occurrence_id="234",
project_name="test project",
extract_generated_timestamp="2023-12-07T14:08:00+00:00",
)


class ExpectedTestError(Exception):
Expand All @@ -42,10 +33,10 @@ class ExpectedTestError(Exception):
@pytest.mark.xfail(
reason="Sanity check that async test gets run", strict=True, raises=ExpectedTestError
)
async def test_create() -> None:
async def test_create(mock_message) -> None:
"""Checks consume is working."""
with PixlProducer(queue_name=TEST_QUEUE) as producer:
producer.publish(messages=[TEST_MESSAGE])
producer.publish(messages=[mock_message])

consume = AsyncMock()
async with PixlConsumer(
Expand Down
47 changes: 29 additions & 18 deletions pixl_dcmd/src/pixl_dcmd/_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@

from core.db.models import Image, Extract
from sqlalchemy import URL, create_engine, exists
from sqlalchemy.orm import sessionmaker
from sqlalchemy.orm import sessionmaker, exc

from pixl_dcmd._dicom_helpers import StudyInfo

Expand All @@ -41,16 +41,15 @@ def get_uniq_pseudo_study_uid_and_update_db(
project_slug: str, study_info: StudyInfo
) -> UID:
"""
Checks if record (slug, mrn, acc_num) exists in the database,
Checks if record (by slug and study info) exists in the database,
gets the pseudo_study_uid if it is not None or records a new, unique one.
Returns the pseudo_study_uid.
"""
PixlSession = sessionmaker(engine)
with PixlSession() as pixl_session, pixl_session.begin():
existing_image = get_unexported_image(
project_slug,
study_info.mrn,
study_info.accession_number,
study_info,
pixl_session,
)
if existing_image.pseudo_study_uid is None:
Expand Down Expand Up @@ -85,24 +84,36 @@ def is_unique_pseudo_study_uid(pseudo_study_uid: str, pixl_session: Session) ->

def get_unexported_image(
project_slug: str,
mrn: str,
accession_number: str,
study_info: StudyInfo,
pixl_session: Session,
) -> Image:
"""
Get an existing, non-exported (for this project) image record from the database
identified by MRN and Accession Number.
identified by the study UID. If no result is found, retry with querying on
MRN + accession number. If this fails as well, raise a PixlDiscardError.
"""
existing_image: Image = (
pixl_session.query(Image)
.join(Extract)
.filter(
Extract.slug == project_slug,
Image.accession_number == accession_number,
Image.mrn == mrn,
Image.exported_at == None, # noqa: E711
try:
existing_image: Image = (
pixl_session.query(Image)
.join(Extract)
.filter(
Extract.slug == project_slug,
Image.study_uid == study_info.study_uid,
Image.exported_at == None, # noqa: E711
)
.one()
)
# If no image is found by study UID, try MRN + accession number
except exc.NoResultFound:
existing_image = (
pixl_session.query(Image)
.join(Extract)
.filter(
Extract.slug == project_slug,
Image.mrn == study_info.mrn,
Image.accession_number == study_info.accession_number,
Image.exported_at == None, # noqa: E711
)
.one()
)
.one()
)

return existing_image
Loading

0 comments on commit bd82c65

Please sign in to comment.