From b9853a82794654d7fbfdc77bea79449b8ad97537 Mon Sep 17 00:00:00 2001 From: Kai Schlamp Date: Mon, 5 Feb 2024 01:30:13 +0000 Subject: [PATCH] Improve radis api --- notebooks/radis_api.ipynb | 141 ++++++++++++++---- radis/api/site.py | 29 +++- radis/api/views.py | 48 +++++- radis/core/management/commands/populate_db.py | 2 +- radis/core/middlewares.py | 2 +- radis/search/apps.py | 5 +- radis/search/models.py | 114 +++++++------- 7 files changed, 248 insertions(+), 93 deletions(-) diff --git a/notebooks/radis_api.ipynb b/notebooks/radis_api.ipynb index c20e738e..042a9498 100644 --- a/notebooks/radis_api.ipynb +++ b/notebooks/radis_api.ipynb @@ -2,15 +2,15 @@ "cells": [ { "cell_type": "code", - "execution_count": 14, + "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Status Code: 201\n", - "{'id': 7, 'document_id': 'gepacs_3dfidii5858-6633i4-ii398841', 'pacs_aet': 'gepacs', 'pacs_name': 'GE PACS', 'patient_id': '1234578', 'patient_birth_date': '1976-05-23', 'patient_sex': 'M', 'study_instance_uid': '34343-34343-34343', 'accession_number': '345348389', 'study_description': 'CT of the Thorax', 'study_datetime': '2000-08-10T00:00:00+02:00', 'series_instance_uid': '34343-676556-3343', 'modalities_in_study': ['CT', 'PET'], 'sop_instance_uid': '35858-384834-3843', 'references': ['http://gepacs.com/34343-34343-34343'], 'body': 'This is the report', 'institutes': [2]}\n" + "Status Code: 400\n", + "{'document_id': ['report with this document id already exists.']}\n" ] } ], @@ -20,8 +20,10 @@ "\n", "base_url = \"http://localhost:8000/api/\"\n", "\n", + "document_id = \"gepacs_3dfidii5858-6633i4-ii398841\"\n", + "\n", "data = {\n", - " \"document_id\": \"gepacs_3dfidii5858-6633i4-ii398841\",\n", + " \"document_id\": document_id,\n", " \"groups\": [2],\n", " \"pacs_aet\": \"gepacs\",\n", " \"pacs_name\": \"GE PACS\",\n", @@ -40,43 +42,110 @@ "}\n", "\n", "auth_token = \"f2e7412ca332a85e37f3fce88c6a1904fe35ad63\"\n", - "# response = requests.post(base_url + \"reports/\", json=data, headers={\"Authorization\": f\"Token {auth_token}\"})\n", - "response = requests.post(base_url + \"reports/\", json=data)\n", + "response = requests.post(base_url + \"reports/\", json=data, headers={\"Authorization\": f\"Token {auth_token}\"})\n", "\n", "print(f\"Status Code: {response.status_code}\")\n", "print(response.json())\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'id': 102,\n", + " 'document_id': 'gepacs_3dfidii5858-6633i4-ii398841',\n", + " 'pacs_aet': 'gepacs',\n", + " 'pacs_name': 'GE PACS',\n", + " 'patient_id': '1234578',\n", + " 'patient_birth_date': '1976-05-23',\n", + " 'patient_sex': 'M',\n", + " 'study_instance_uid': '34343-34343-34343',\n", + " 'accession_number': '345348389',\n", + " 'study_description': 'CT of the Thorax',\n", + " 'study_datetime': '2000-08-10T00:00:00+02:00',\n", + " 'series_instance_uid': '34343-676556-3343',\n", + " 'modalities_in_study': ['CT', 'PET'],\n", + " 'sop_instance_uid': '35858-384834-3843',\n", + " 'references': ['http://gepacs.com/34343-34343-34343'],\n", + " 'body': 'This is the updated report',\n", + " 'groups': [2]}" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data = {\n", + " \"document_id\": \"gepacs_3dfidii5858-6633i4-ii398841\",\n", + " \"groups\": [2],\n", + " \"pacs_aet\": \"gepacs\",\n", + " \"pacs_name\": \"GE PACS\",\n", + " \"patient_id\": \"1234578\",\n", + " \"patient_birth_date\": date(1976, 5, 23).isoformat(),\n", + " \"patient_sex\": \"M\",\n", + " \"study_instance_uid\": \"34343-34343-34343\",\n", + " \"accession_number\": \"345348389\",\n", + " \"study_description\": \"CT of the Thorax\",\n", + " \"study_datetime\": datetime(2000, 8, 10).isoformat(),\n", + " \"series_instance_uid\": \"34343-676556-3343\",\n", + " \"modalities_in_study\": [\"CT\", \"PET\"],\n", + " \"sop_instance_uid\": \"35858-384834-3843\",\n", + " \"references\": [\"http://gepacs.com/34343-34343-34343\"],\n", + " \"body\": \"This is the updated report\"\n", + "}\n", + "\n", + "response = requests.put(base_url + f\"reports/{document_id}/\", json=data, headers={\"Authorization\": f\"Token {auth_token}\"})\n", "\n", - "# document_id = response.json()[\"id\"]\n" + "response.json()" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "{'pathId': '/document/v1/report/report/docid/gepacs_35858-384834-3843',\n", - " 'id': 'id:report:report::gepacs_35858-384834-3843',\n", - " 'fields': {'pacs_name': 'GE PACS',\n", - " 'modalities_in_study': ['CT', 'PET'],\n", - " 'patient_id': '1234578',\n", - " 'patient_birth_date': 201657600,\n", - " 'body': 'This is the report',\n", - " 'references': ['http://gepacs.com/34343-34343-34343'],\n", - " 'sop_instance_uid': '35858-384834-3843',\n", - " 'patient_sex': 'M',\n", - " 'study_description': 'CT of the Thorax',\n", - " 'study_instance_uid': '34343-34343-34343',\n", - " 'accession_number': '345348389',\n", - " 'series_instance_uid': '34343-676556-3343',\n", - " 'pacs_aet': 'gepacs',\n", - " 'institutes': ['Neuroradiologie'],\n", - " 'study_datetime': 965858400}}" + "{'id': 102,\n", + " 'document_id': 'gepacs_3dfidii5858-6633i4-ii398841',\n", + " 'pacs_aet': 'gepacs',\n", + " 'pacs_name': 'GE PACS',\n", + " 'patient_id': '1234578',\n", + " 'patient_birth_date': '1976-05-23',\n", + " 'patient_sex': 'M',\n", + " 'study_instance_uid': '34343-34343-34343',\n", + " 'accession_number': '345348389',\n", + " 'study_description': 'CT of the Thorax',\n", + " 'study_datetime': '2000-08-10T00:00:00+02:00',\n", + " 'series_instance_uid': '34343-676556-3343',\n", + " 'modalities_in_study': ['CT', 'PET'],\n", + " 'sop_instance_uid': '35858-384834-3843',\n", + " 'references': ['http://gepacs.com/34343-34343-34343'],\n", + " 'body': 'This is the updated report',\n", + " 'groups': [2],\n", + " 'vespa': {'pathId': '/document/v1/report/report/docid/gepacs_3dfidii5858-6633i4-ii398841',\n", + " 'id': 'id:report:report::gepacs_3dfidii5858-6633i4-ii398841',\n", + " 'fields': {'pacs_name': 'GE PACS',\n", + " 'modalities_in_study': ['CT', 'PET'],\n", + " 'patient_birth_date': 201657600,\n", + " 'body': 'This is the updated report',\n", + " 'references': ['http://gepacs.com/34343-34343-34343'],\n", + " 'patient_sex': 'M',\n", + " 'study_description': 'CT of the Thorax',\n", + " 'groups': [2],\n", + " 'pacs_aet': 'gepacs',\n", + " 'study_datetime': 965858400}}}" ] }, - "execution_count": 5, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -86,6 +155,24 @@ "\n", "response.json()" ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + } + ], + "source": [ + "response = requests.delete(base_url + f\"reports/{document_id}\", headers={\"Authorization\": f\"Token {auth_token}\"})\n", + "print(response)" + ] } ], "metadata": { @@ -104,7 +191,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.1" + "version": "3.11.7" }, "orig_nbformat": 4 }, diff --git a/radis/api/site.py b/radis/api/site.py index 9c4a083e..019ee8b0 100644 --- a/radis/api/site.py +++ b/radis/api/site.py @@ -1,4 +1,4 @@ -from typing import Callable, Literal +from typing import Any, Callable, Literal, NamedTuple from radis.reports.models import Report @@ -9,4 +9,31 @@ def register_report_handler(handler: ReportEventHandler) -> None: + """Register a report event handler. + + The report handler gets notified a report is created, updated, or deleted in + PostgreSQL database. It can be used to sync report documents in other + databases like Vespa. + """ report_event_handlers.append(handler) + + +FetchDocument = Callable[[Report], dict[str, Any] | None] + + +class DocumentFetcher(NamedTuple): + source: str + fetch: FetchDocument + + +document_fetchers: list[DocumentFetcher] = [] + + +def register_document_fetcher(source: str, fetch: FetchDocument) -> None: + """Register a document fetcher. + + A document fetcher is a function that takes a report from the PostgreSQL + database and returns a document in the form of a dictionary from another + database (like Vespa). + """ + document_fetchers.append(DocumentFetcher(source, fetch)) diff --git a/radis/api/views.py b/radis/api/views.py index 9c606f27..b51bb48c 100644 --- a/radis/api/views.py +++ b/radis/api/views.py @@ -1,15 +1,53 @@ -from rest_framework import viewsets +from typing import Any + +from rest_framework import mixins, viewsets +from rest_framework.exceptions import MethodNotAllowed +from rest_framework.permissions import IsAdminUser +from rest_framework.request import Request +from rest_framework.response import Response from rest_framework.serializers import BaseSerializer from radis.reports.models import Report from .serializers import ReportSerializer -from .site import report_event_handlers +from .site import document_fetchers, report_event_handlers + + +class ReportViewSet( + mixins.CreateModelMixin, + mixins.DestroyModelMixin, + mixins.RetrieveModelMixin, + mixins.UpdateModelMixin, + viewsets.GenericViewSet, +): + """ViewSet for fetch, creating, updating, and deleting Reports. + Only admins (staff users) can do that. + """ -class ReportViewSet(viewsets.ModelViewSet): serializer_class = ReportSerializer queryset = Report.objects.all() + lookup_field = "document_id" + permission_classes = [IsAdminUser] + + def retrieve(self, request: Request, *args: Any, **kwargs: Any) -> Response: + """Retrieve a single Report. + + It also fetches the associated document from the Vespa database. + """ + instance: Report = self.get_object() + + extra = {} + for fetcher in document_fetchers: + document = fetcher.fetch(instance) + if document: + extra[fetcher.source] = document + + serializer = self.get_serializer(instance) + data = serializer.data + data.update(extra) + + return Response(data) def perform_create(self, serializer: BaseSerializer) -> None: super().perform_create(serializer) @@ -25,6 +63,10 @@ def perform_update(self, serializer: BaseSerializer) -> None: for handler in report_event_handlers: handler("updated", report) + def partial_update(self, request: Request, *args: Any, **kwargs: Any) -> Response: + assert request.method + raise MethodNotAllowed(request.method) + def perform_destroy(self, instance: Report) -> None: super().perform_destroy(instance) for handler in report_event_handlers: diff --git a/radis/core/management/commands/populate_db.py b/radis/core/management/commands/populate_db.py index a29de40b..10b6b315 100644 --- a/radis/core/management/commands/populate_db.py +++ b/radis/core/management/commands/populate_db.py @@ -34,7 +34,7 @@ def feed_report(body: str): report = ReportFactory.create(body=body) groups = fake.random_elements(elements=list(Group.objects.all()), unique=True) report.groups.set(groups) - ReportDocument.from_report_model(report).create() + ReportDocument(report).create() def feed_reports(): diff --git a/radis/core/middlewares.py b/radis/core/middlewares.py index 1b8f9b75..e56738cf 100644 --- a/radis/core/middlewares.py +++ b/radis/core/middlewares.py @@ -10,7 +10,7 @@ def is_html_response(response): - return response["Content-Type"].startswith("text/html") + return response.has_header("Content-Type") and response["Content-Type"].startswith("text/html") class MaintenanceMiddleware: diff --git a/radis/search/apps.py b/radis/search/apps.py index 69b49e72..697f9e18 100644 --- a/radis/search/apps.py +++ b/radis/search/apps.py @@ -15,9 +15,9 @@ def ready(self): def register_app(): - from radis.api.site import register_report_handler + from radis.api.site import register_document_fetcher, register_report_handler - from .models import handle_report + from .models import fetch_document, handle_report register_main_menu_item( url_name="search", @@ -25,6 +25,7 @@ def register_app(): ) register_report_handler(handle_report) + register_document_fetcher("vespa", fetch_document) def init_db(**kwargs): diff --git a/radis/search/models.py b/radis/search/models.py index fe7478a7..68580d61 100644 --- a/radis/search/models.py +++ b/radis/search/models.py @@ -1,7 +1,7 @@ import logging -from dataclasses import asdict, dataclass +from dataclasses import dataclass from datetime import date, datetime, time -from typing import Literal +from typing import Any, Literal from rest_framework.status import HTTP_200_OK from vespa.io import VespaQueryResponse @@ -21,73 +21,66 @@ class Meta: verbose_name_plural = "Search app settings" -@dataclass(kw_only=True) class ReportDocument: - document_id: str - groups: list[int] - pacs_aet: str - pacs_name: str - patient_birth_date: date - patient_sex: Literal["F", "M", "U"] - study_description: str - study_datetime: datetime - modalities_in_study: list[str] - references: list[str] - body: str - - @staticmethod - def from_report_model(report: Report): - assert report.patient_sex in ("M", "F", "U") - - return ReportDocument( - document_id=report.document_id, - groups=[group.id for group in report.groups.all()], - pacs_aet=report.pacs_aet, - pacs_name=report.pacs_name, - patient_birth_date=report.patient_birth_date, - patient_sex=report.patient_sex, - study_description=report.study_description, - study_datetime=report.study_datetime, - modalities_in_study=report.modalities_in_study, - references=report.references, - body=report.body.strip(), - ) + def __init__(self, report: Report) -> None: + self.report = report - def dictify_for_vespa(self): - fields = asdict(self) + def _dictify_for_vespa(self) -> dict[str, Any]: + """Dictify the report for Vespa. - # Vespa can't store dates and datetimes natively, so we store them as a number, - # see also schema in vespa_app.py - fields["patient_birth_date"] = int( - datetime.combine(fields["patient_birth_date"], time()).timestamp() + Must be in the same format as schema in vespa_app.py + """ + # Vespa can't store dates and datetimes natively, so we store them as a number. + patient_birth_date = int( + datetime.combine(self.report.patient_birth_date, time()).timestamp() ) - fields["study_datetime"] = int(fields["study_datetime"].timestamp()) + study_datetime = int(self.report.study_datetime.timestamp()) + + return { + "groups": [group.id for group in self.report.groups.all()], + "pacs_aet": self.report.pacs_aet, + "pacs_name": self.report.pacs_name, + "patient_birth_date": patient_birth_date, + "patient_sex": self.report.patient_sex, + "study_description": self.report.study_description, + "study_datetime": study_datetime, + "modalities_in_study": self.report.modalities_in_study, + "references": self.report.references, + "body": self.report.body.strip(), + } + + def fetch(self) -> dict[str, Any]: + response = vespa_app.get_client().get_data(REPORT_SCHEMA_NAME, self.report.document_id) + + if response.get_status_code() != HTTP_200_OK: + message = response.get_json() + raise Exception(f"Error while fetching report from Vespa: {message}") - return fields + return response.get_json() - def create(self): - fields = self.dictify_for_vespa() - del fields["document_id"] + def create(self) -> None: + fields = self._dictify_for_vespa() response = vespa_app.get_client().feed_data_point( - REPORT_SCHEMA_NAME, self.document_id, fields + REPORT_SCHEMA_NAME, self.report.document_id, fields ) - # TODO: improve error handling + if response.get_status_code() != HTTP_200_OK: message = response.get_json() raise Exception(f"Error while feeding report to Vespa: {message}") - def update(self): - fields = self.dictify_for_vespa() - del fields["document_id"] - response = vespa_app.get_client().update_data("report", self.document_id, fields) - # TODO: improve error handling + def update(self) -> None: + fields = self._dictify_for_vespa() + response = vespa_app.get_client().update_data( + REPORT_SCHEMA_NAME, self.report.document_id, fields + ) + if response.get_status_code() != HTTP_200_OK: message = response.get_json() raise Exception(f"Error while updating report on Vespa: {message}") - def delete(self): - response = vespa_app.get_client().delete_data("report", self.document_id) - # TODO: improve error handling + def delete(self) -> None: + response = vespa_app.get_client().delete_data(REPORT_SCHEMA_NAME, self.report.document_id) + if response.get_status_code() != HTTP_200_OK: message = response.get_json() raise Exception(f"Error while deleting report on Vespa: {message}") @@ -107,7 +100,7 @@ class ReportSummary: body: str @staticmethod - def from_vespa_response(record: dict): + def from_vespa_response(record: dict) -> "ReportSummary": patient_birth_date = date.fromtimestamp(record["fields"]["patient_birth_date"]) study_datetime = datetime.fromtimestamp(record["fields"]["study_datetime"]) @@ -125,7 +118,7 @@ def from_vespa_response(record: dict): ) @property - def report_full(self): + def report_full(self) -> Report: return Report.objects.get(document_id=self.document_id) @@ -164,8 +157,13 @@ def query_reports(query: str, offset: int = 0, page_size: int = 100) -> "ReportQ def handle_report(event_type: ReportEventType, report: Report): # Sync reports with Vespa if event_type == "created": - ReportDocument.from_report_model(report).create() + ReportDocument(report).create() elif event_type == "updated": - ReportDocument.from_report_model(report).update() + ReportDocument(report).update() elif event_type == "deleted": - ReportDocument.from_report_model(report).delete() + ReportDocument(report).delete() + + +def fetch_document(report: Report) -> dict[str, Any]: + doc = ReportDocument(report).fetch() + return doc