From 6f695ecbdef7ff6e1f96be3bab57b31394b33622 Mon Sep 17 00:00:00 2001 From: Manuel Holtgrewe Date: Fri, 27 Sep 2024 11:13:59 +0200 Subject: [PATCH] feat: add optional integration of GestaltMatcher/PEDIA (#399, #1125) (#1249) Co-authored-by: Meghna Ahuja Bhasin --- backend/cases/views.py | 1 + backend/config/settings/base.py | 11 + backend/ext_gestaltmatcher/__init__.py | 0 backend/ext_gestaltmatcher/admin.py | 7 + backend/ext_gestaltmatcher/apps.py | 6 + .../migrations/0001_initial.py | 34 +++ .../0002_smallvariantquerypediascores.py | 33 +++ .../ext_gestaltmatcher/migrations/__init__.py | 0 backend/ext_gestaltmatcher/models.py | 43 ++++ backend/variants/file_export.py | 97 ++++++- backend/variants/models/jobs.py | 37 ++- backend/variants/models/scores.py | 238 +++++++++++++++++- backend/variants/query_schemas.py | 8 + backend/variants/schemas/case-query-v1.json | 36 +++ ...ase-query-v1-01-minimal-with-defaults.json | 6 +- ...e-query-v1-02-singleton-with-defaults.json | 4 + .../examples/case-query-v1-02-singleton.json | 3 + backend/variants/submit_filter.py | 72 +++++- backend/variants/tests/data/query_settings.py | 4 +- backend/variants/tests/factories.py | 4 + backend/variants/tests/test_file_export.py | 100 +++++++- .../src/variants/components/FilterApp.vue | 10 + .../src/variants/components/FilterForm.vue | 50 +++- .../FilterForm/PrioritizationPane.stories.js | 25 ++ .../FilterForm/PrioritizationPane.vue | 181 ++++++++++--- .../components/FilterResultsTable.vue | 30 +++ frontend/src/variants/stores/variantQuery.js | 34 +++ .../FilterForm/PrioritizationPane.spec.js | 90 ++++++- 28 files changed, 1104 insertions(+), 60 deletions(-) create mode 100644 backend/ext_gestaltmatcher/__init__.py create mode 100644 backend/ext_gestaltmatcher/admin.py create mode 100644 backend/ext_gestaltmatcher/apps.py create mode 100644 backend/ext_gestaltmatcher/migrations/0001_initial.py create mode 100644 backend/ext_gestaltmatcher/migrations/0002_smallvariantquerypediascores.py create mode 100644 backend/ext_gestaltmatcher/migrations/__init__.py create mode 100644 backend/ext_gestaltmatcher/models.py diff --git a/backend/cases/views.py b/backend/cases/views.py index 5f3e3c941..df36e24af 100644 --- a/backend/cases/views.py +++ b/backend/cases/views.py @@ -59,6 +59,7 @@ def get_context_data(self, *args, **kwargs): ), "exomiser_enabled": settings.VARFISH_ENABLE_EXOMISER_PRIORITISER, "cadd_enabled": settings.VARFISH_ENABLE_CADD, + "cada_enabled": settings.VARFISH_ENABLE_CADA, "extra_anno_fields": extra_anno_fields, "url_prefixes": { "annonars": settings.VARFISH_BACKEND_URL_PREFIX_ANNONARS, diff --git a/backend/config/settings/base.py b/backend/config/settings/base.py index a855c8765..ea570342e 100644 --- a/backend/config/settings/base.py +++ b/backend/config/settings/base.py @@ -133,6 +133,7 @@ "varannos.apps.VarannosConfig", # Legacy apps - not used anymore! "hgmd.apps.HgmdConfig", + "ext_gestaltmatcher.apps.ExtGestaltmatcherConfig", ] # See: https://docs.djangoproject.com/en/dev/ref/settings/#installed-apps @@ -541,6 +542,16 @@ "VARFISH_CADA_REST_API_URL", "https://cada.gene-talk.de/api/process" ) +# Enable PEDIA prioritization. +VARFISH_ENABLE_PEDIA = env.bool("VARFISH_ENABLE_PEDIA", default=False) +VARFISH_PEDIA_REST_API_URL = env.str("VARFISH_PEDIA_REST_API_URL", "http://127.0.0.1:9000/pedia") +VARFISH_MIDDLEWARE_URL = env.str("VARFISH_MIDDLEWARE_URL", "http://127.0.0.1:7000") + +# Enable Gestalt-based prioritization. +VARFISH_ENABLE_GESTALT_MATCHER = env.bool("VARFISH_ENABLE_GESTALT_MATCHER", default=False) +# Configure URL to GestaltMatcher REST API +VARFISH_GM_SENDER_URL = env.str("VARFISH_GM_SENDER_URL", "http://127.0.0.1:7000/") + # Enable submission of variants to CADD server. VARFISH_ENABLE_CADD_SUBMISSION = env.bool("VARFISH_ENABLE_CADD_SUBMISSION", default=False) # CADD version to use for for submission diff --git a/backend/ext_gestaltmatcher/__init__.py b/backend/ext_gestaltmatcher/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/backend/ext_gestaltmatcher/admin.py b/backend/ext_gestaltmatcher/admin.py new file mode 100644 index 000000000..1bb77f5a2 --- /dev/null +++ b/backend/ext_gestaltmatcher/admin.py @@ -0,0 +1,7 @@ +from django.contrib import admin + +from .models import SmallVariantQueryGestaltMatcherScores, SmallVariantQueryPediaScores + +# Register your models here. +admin.site.register(SmallVariantQueryGestaltMatcherScores) +admin.site.register(SmallVariantQueryPediaScores) diff --git a/backend/ext_gestaltmatcher/apps.py b/backend/ext_gestaltmatcher/apps.py new file mode 100644 index 000000000..2b7889167 --- /dev/null +++ b/backend/ext_gestaltmatcher/apps.py @@ -0,0 +1,6 @@ +from django.apps import AppConfig + + +class ExtGestaltmatcherConfig(AppConfig): + default_auto_field = "django.db.models.BigAutoField" + name = "ext_gestaltmatcher" diff --git a/backend/ext_gestaltmatcher/migrations/0001_initial.py b/backend/ext_gestaltmatcher/migrations/0001_initial.py new file mode 100644 index 000000000..9d5a5d5e7 --- /dev/null +++ b/backend/ext_gestaltmatcher/migrations/0001_initial.py @@ -0,0 +1,34 @@ +# -*- coding: utf-8 -*- +# Generated by Django 1.11.20 on 2023-10-20 07:18 +from __future__ import unicode_literals + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + dependencies = [] + + operations = [ + migrations.CreateModel( + name="SmallVariantQueryGestaltMatcherScores", + fields=[ + ( + "id", + models.AutoField( + auto_created=True, primary_key=True, serialize=False, verbose_name="ID" + ), + ), + ("gene_id", models.CharField(help_text="Entrez gene ID", max_length=64)), + ("gene_symbol", models.CharField(help_text="The gene symbol", max_length=128)), + ("priority_type", models.CharField(help_text="The priority type", max_length=64)), + ("score", models.FloatField(help_text="The gene score")), + ( + "query", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, to="variants.SmallVariantQuery" + ), + ), + ], + ) + ] diff --git a/backend/ext_gestaltmatcher/migrations/0002_smallvariantquerypediascores.py b/backend/ext_gestaltmatcher/migrations/0002_smallvariantquerypediascores.py new file mode 100644 index 000000000..9b4f382dd --- /dev/null +++ b/backend/ext_gestaltmatcher/migrations/0002_smallvariantquerypediascores.py @@ -0,0 +1,33 @@ +# -*- coding: utf-8 -*- +# Generated by Django 1.11.20 on 2023-11-14 07:18 +from __future__ import unicode_literals + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + dependencies = [("ext_gestaltmatcher", "0001_initial")] + + operations = [ + migrations.CreateModel( + name="SmallVariantQueryPediaScores", + fields=[ + ( + "id", + models.AutoField( + auto_created=True, primary_key=True, serialize=False, verbose_name="ID" + ), + ), + ("gene_id", models.CharField(help_text="Entrez gene ID", max_length=64)), + ("gene_symbol", models.CharField(help_text="The gene symbol", max_length=128)), + ("score", models.FloatField(help_text="The gene score")), + ( + "query", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, to="variants.SmallVariantQuery" + ), + ), + ], + ) + ] diff --git a/backend/ext_gestaltmatcher/migrations/__init__.py b/backend/ext_gestaltmatcher/migrations/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/backend/ext_gestaltmatcher/models.py b/backend/ext_gestaltmatcher/models.py new file mode 100644 index 000000000..540a4f738 --- /dev/null +++ b/backend/ext_gestaltmatcher/models.py @@ -0,0 +1,43 @@ +from django.db import models + + +# Create your models here. +class SmallVariantQueryGestaltMatcherScores(models.Model): + """Annotate ``SmallVariantQuery`` with Gestalt Matcher scores (if configured to do so).""" + + #: The query to annotate. + query = models.ForeignKey("variants.SmallVariantQuery", on_delete=models.CASCADE) + + #: The Entrez gene ID. + gene_id = models.CharField(max_length=64, null=False, blank=False, help_text="Entrez gene ID") + + #: The gene symbol. + gene_symbol = models.CharField( + max_length=128, null=False, blank=False, help_text="The gene symbol" + ) + + #: The priority type. + priority_type = models.CharField( + max_length=64, null=False, blank=False, help_text="The priority type" + ) + + #: The score. + score = models.FloatField(null=False, blank=False, help_text="The gene score") + + +class SmallVariantQueryPediaScores(models.Model): + """Annotate ``SmallVariantQuery`` with PEDIA scores (if configured to do so).""" + + #: The query to annotate. + query = models.ForeignKey("variants.SmallVariantQuery", on_delete=models.CASCADE) + + #: The Entrez gene ID. + gene_id = models.CharField(max_length=64, null=False, blank=False, help_text="Entrez gene ID") + + #: The gene symbol. + gene_symbol = models.CharField( + max_length=128, null=False, blank=False, help_text="The gene symbol" + ) + + #: The score. + score = models.FloatField(null=False, blank=False, help_text="The gene score") diff --git a/backend/variants/file_export.py b/backend/variants/file_export.py index bd13ecb46..ec31b5d68 100644 --- a/backend/variants/file_export.py +++ b/backend/variants/file_export.py @@ -24,11 +24,15 @@ ExportProjectCasesFileBgJobResult, SmallVariantComment, VariantScoresFactory, + annotate_with_gm_scores, annotate_with_joint_scores, annotate_with_pathogenicity_scores, + annotate_with_pedia_scores, annotate_with_phenotype_scores, annotate_with_transcripts, + get_pedia_scores, prioritize_genes, + prioritize_genes_gm, unroll_extra_annos_result, ) from .queries import ( @@ -122,6 +126,16 @@ def to_str(val): ("phenotype_rank", "Phenotype Rank", int), ) +HEADERS_GM_SCORES = ( + ("gm_score", "Gestalt Score", float), + ("gm_rank", "Gestalt Rank", int), +) + +HEADERS_PEDIA_SCORES = ( + ("pedia_score", "PEDIA Score", float), + ("pedia_rank", "PEDIA Rank", int), +) + #: Names of the pathogenicity scoring header columns. HEADERS_PATHO_SCORES = ( ("pathogenicity_score", "Pathogenicity Score", float), @@ -318,6 +332,14 @@ def _is_prioritization_enabled(self): ) ) + def _is_gm_enabled(self): + """Return whether Gestalt Matcher prioritization is enabled in this query.""" + return settings.VARFISH_ENABLE_GESTALT_MATCHER and self.query_args.get("gm_enabled") + + def _is_pedia_enabled(self): + """Return whether PEDIA prioritization is enabled in this query.""" + return settings.VARFISH_ENABLE_PEDIA and self.query_args.get("pedia_enabled") + def _is_pathogenicity_enabled(self): """Return whether pathogenicity scoring is enabled in this query.""" return settings.VARFISH_ENABLE_CADD and all( @@ -352,6 +374,10 @@ def _yield_columns(self, members): header += HEADERS_TRANSCRIPTS if self._is_prioritization_enabled() and self._is_pathogenicity_enabled(): header += HEADERS_JOINT_SCORES + if self._is_gm_enabled(): + header += HEADERS_GM_SCORES + if self._is_pedia_enabled(): + header += HEADERS_PEDIA_SCORES header += HEADER_FLAGS header += HEADER_COMMENTS header += self.get_extra_annos_headers() @@ -391,13 +417,25 @@ def _yield_smallvars(self): _result = annotate_with_pathogenicity_scores(_result, variant_scores) if self._is_prioritization_enabled() and self._is_pathogenicity_enabled(): _result = annotate_with_joint_scores(_result) + if self._is_gm_enabled(): + gene_scores = self._fetch_gm_scores([entry.entrez_id for entry in _result]) + _result = annotate_with_gm_scores(_result, gene_scores) + if self._is_pedia_enabled(): + pedia_scores = self._fetch_pedia_scores(_result) + if pedia_scores: + _result = annotate_with_pedia_scores(_result, pedia_scores) fields = {x[1].label: x[0] for x in enumerate(list(ExtraAnnoField.objects.all()))} _result = unroll_extra_annos_result(_result, fields) self.job.add_log_entry("Writing output file...") total = len(_result) steps = math.ceil(total / 10) for i, small_var in enumerate(_result): - if self._is_prioritization_enabled() or self._is_pathogenicity_enabled(): + if ( + self._is_prioritization_enabled() + or self._is_pathogenicity_enabled() + or self._is_gm_enabled + or self._is_pedia_enabled() + ): if i % steps == 0: self.job.add_log_entry("{}%".format(int(100 * i / total))) else: @@ -433,6 +471,63 @@ def _fetch_gene_scores(self, entrez_ids): else: return {} + def _fetch_gm_scores(self, entrez_ids): + prio_gm = self.query_args.get("prio_gm") + if all((self._is_gm_enabled(), prio_gm)): + try: + return { + str(gene_id): score + for gene_id, gene_symbol, score, priority_type in prioritize_genes_gm( + prio_gm, logging=self.job.add_log_entry + ) + } + except ConnectionError as e: + self.job.add_log_entry(e) + else: + return {} + + def _fetch_pedia_scores(self, result): + if self._is_pedia_enabled(): + try: + payloadList = [] + + """Read and json object by reading ``result`` .""" + for line in result: + payload = dict() + + if all( + ( + line.entrez_id, + hasattr(line, "phenotype_score"), + hasattr(line, "pathogenicity_score"), + hasattr(line, "gm_score"), + ) + ): + payload["gene_name"] = line.symbol + payload["gene_id"] = line.entrez_id + + payload["cada_score"] = line.phenotype_score + payload["cadd_score"] = line.pathogenicity_score + payload["gestalt_score"] = ( + 0 if line.gm_score == float("inf") else line.gm_score + ) + + payload["label"] = False + payloadList.append(payload) + + case_name = self.job.case.name + if case_name.startswith("F_"): + name = case_name[2:] # Remove the first two characters ("F_") + else: + name = case_name + scores = {"case_name": name, "genes": payloadList} + + return {str(gene_id): score for gene_id, _, score in get_pedia_scores(scores)} + except ConnectionError as e: + self.job.add_log_entry(e) + else: + return {} + def _fetch_variant_scores(self, variants): if self._is_pathogenicity_enabled(): try: diff --git a/backend/variants/models/jobs.py b/backend/variants/models/jobs.py index fa6b17ae1..9fd010e71 100644 --- a/backend/variants/models/jobs.py +++ b/backend/variants/models/jobs.py @@ -13,6 +13,10 @@ from projectroles.plugins import get_backend_api from sqlalchemy import and_ +from ext_gestaltmatcher.models import ( + SmallVariantQueryGestaltMatcherScores, + SmallVariantQueryPediaScores, +) from variants.helpers import get_engine, get_meta from variants.models import SmallVariantQueryGeneScores, SmallVariantQueryVariantScores from variants.models.queries import ( @@ -98,7 +102,12 @@ def run_query_bg_job(pk): tl_event.add_object(obj=filter_job.case, label="case_name", name=filter_job.case.name) def _read_records( - inputf, smallvariantqueryresultset, pathogenicity_scores=None, phenotype_scores=None + inputf, + smallvariantqueryresultset, + pathogenicity_scores=None, + phenotype_scores=None, + gm_scores=None, + pedia_scores=None, ): """Read and yield ``SmallVariantQueryResultRow`` objects by reading ``inputf`` for the given ``SmallVariantQueryResultSet``.""" for line in inputf: @@ -113,6 +122,12 @@ def _read_records( if phenotype_scores and line.entrez_id: payload["phenotype_score"] = phenotype_scores.get(line.entrez_id, -1) + if gm_scores and line.entrez_id: + payload["gm_score"] = gm_scores.get(line.entrez_id, 0) + + if pedia_scores and line.entrez_id: + payload["pedia_score"] = pedia_scores.get(line.entrez_id, -1) + if pathogenicity_scores and phenotype_scores and line.entrez_id: if payload["pathogenicity_score"] == -1 or payload["phenotype_score"] == -1: payload["patho_pheno_score"] = -1 @@ -164,6 +179,24 @@ def _inner(): ) if row.gene_id } + gm_scores = None + pedia_scores = None + if query_model.query_settings.get("gm_enabled"): + gm_scores = { + row.gene_id: row.score + for row in SmallVariantQueryGestaltMatcherScores.objects.filter( + query__sodar_uuid=query_model.sodar_uuid + ) + if row.gene_id + } + if query_model.query_settings.get("pedia_enabled"): + pedia_scores = { + row.gene_id: row.score + for row in SmallVariantQueryPediaScores.objects.filter( + query__sodar_uuid=query_model.sodar_uuid + ) + if row.gene_id + } with transaction.atomic(): smallvariantqueryresultset = SmallVariantQueryResultSet.objects.create( @@ -183,6 +216,8 @@ def _inner(): smallvariantqueryresultset, pathogenicity_scores=pathogenicity_scores, phenotype_scores=phenotype_scores, + gm_scores=gm_scores, + pedia_scores=pedia_scores, ), n=1000, ): diff --git a/backend/variants/models/scores.py b/backend/variants/models/scores.py index defe03f8a..6d3001d05 100644 --- a/backend/variants/models/scores.py +++ b/backend/variants/models/scores.py @@ -1,5 +1,6 @@ """Code supporting scoring of variants by pathogenicity or phenotype.""" +import json import re import time @@ -9,11 +10,13 @@ from django.db import models from django.forms import model_to_dict from django.utils.html import strip_tags +import pandas as pd from projectroles.app_settings import AppSettingAPI import requests from sqlalchemy.exc import NoSuchColumnError import wrapt +from ext_gestaltmatcher.models import SmallVariantQueryGestaltMatcherScores from varfish.utils import JSONField _app_settings = AppSettingAPI() @@ -271,6 +274,60 @@ def __getitem__(self, key): return self.__wrapped__.__getitem__(key) +class RowWithGestaltMatcherScore(wrapt.ObjectProxy): + """Wrap a result row and add members for Gestalt Matcher score and rank.""" + + def __init__(self, obj): + super().__init__(obj) + self._self_gm_rank = None + self._self_gm_score = -1 + + @property + def gm_rank(self): + return self._self_gm_rank + + @property + def gm_score(self): + return self._self_gm_score + + def __getitem__(self, key): + if key == "gm_rank": + return self.gm_rank + elif key == "gm_score": + return self.gm_score + elif key == "pedia_score": + return + elif key == "pedia_rank": + return + else: + return self.__wrapped__.__getitem__(key) + + +class RowWithPediaScore(wrapt.ObjectProxy): + """Wrap a result row and add members for PEDIA score and rank.""" + + def __init__(self, obj): + super().__init__(obj) + self._self_pedia_rank = None + self._self_pedia_score = -1 + + @property + def pedia_rank(self): + return self._self_pedia_rank + + @property + def pedia_score(self): + return self._self_pedia_score + + def __getitem__(self, key): + if key == "pedia_rank": + return self.pedia_rank + elif key == "pedia_score": + return self.pedia_score + else: + return self.__wrapped__.__getitem__(key) + + class RowWithTranscripts(wrapt.ObjectProxy): """Wrap a result row and add members for phenotype score and rank.""" @@ -330,6 +387,60 @@ def annotate_with_phenotype_scores(rows, gene_scores): return rows +def annotate_with_gm_scores(rows, gm_scores): + """Annotate the results in ``rows`` with Gestalt Matcher scores stored in ``small_variant_query``. + + Variants are ranked by the Gestalt Matcher scores, automatically ranking them by gene. + """ + rows = [RowWithGestaltMatcherScore(row) for row in rows] + for row in rows: + row._self_gm_score = gm_scores.get(row.entrez_id, 0) + rows.sort(key=lambda row: (row._self_gm_score, row.entrez_id or ""), reverse=True) + # Re-compute ranks + prev_gene = rows[0].entrez_id if rows else None + prev_gm_score = rows[0].gm_score if rows else None + rank = 1 + same_score_count = 1 + for row in rows: + if row.entrez_id != prev_gene: + if prev_gm_score == row.gm_score: + same_score_count += 1 + else: + rank += same_score_count + same_score_count = 1 + prev_gm_score = row.gm_score + prev_gene = row.entrez_id + row._self_gm_rank = rank + return rows + + +def annotate_with_pedia_scores(rows, pedia_scores): + """Annotate the results in ``rows`` with PEDIA scores stored in ``small_variant_query``. + + Variants are ranked by the PEDIA scores, automatically ranking them by gene. + """ + rows = [RowWithPediaScore(row) for row in rows] + for row in rows: + row._self_pedia_score = pedia_scores.get(row.entrez_id, -1) + rows.sort(key=lambda row: (row._self_pedia_score, row.entrez_id or ""), reverse=True) + # Re-compute ranks + prev_gene = rows[0].entrez_id if rows else None + prev_pedia_score = rows[0].pedia_score if rows else None + rank = 1 + same_score_count = 1 + for row in rows: + if row.entrez_id != prev_gene: + if prev_pedia_score == row.pedia_score: + same_score_count += 1 + else: + rank += same_score_count + same_score_count = 1 + prev_pedia_score = row.pedia_score + prev_gene = row.entrez_id + row._self_pedia_rank = rank + return rows + + def annotate_with_transcripts(rows, database): """Annotate the results in ``rows`` with transcripts (RefSeq or Ensembl)""" rows = [RowWithTranscripts(row, database) for row in rows] @@ -546,6 +657,81 @@ def unroll_extra_annos_result(rows, fields): return rows_ +def generate_pedia_input(self, pathoEnabled, prioEnabled, gmEnabled, queryId, case_name, results): + pathogenicity_scores = None + if pathoEnabled: + pathogenicity_scores = { + (row.chromosome, row.start, row.reference, row.alternative): row.score + for row in SmallVariantQueryVariantScores.objects.filter(query__sodar_uuid=queryId) + } + phenotype_scores = None + if prioEnabled: + phenotype_scores = { + row.gene_id: row.score + for row in SmallVariantQueryGeneScores.objects.filter(query__sodar_uuid=queryId) + if row.gene_id + } + gm_scores = None + if gmEnabled: + gm_scores = { + row.gene_id: 0 if row.score == 0 else row.score + for row in SmallVariantQueryGestaltMatcherScores.objects.filter( + query__sodar_uuid=queryId + ) + if row.gene_id + } + + payloadList = [] + """Read and json object by reading ``results`` .""" + for line in results: + payload = dict() + + if line["entrez_id"]: + if line["symbol"]: + payload["gene_name"] = line["symbol"] + else: + payload["gene_name"] = " " + payload["gene_id"] = line["entrez_id"] + + if phenotype_scores and line.entrez_id: + payload["cada_score"] = phenotype_scores.get(line.entrez_id, -1) + + if pathogenicity_scores: + payload["cadd_score"] = pathogenicity_scores.get( + (line.chromosome, line.start, line.reference, line.alternative), 0.0 + ) + + if gm_scores and line.entrez_id: + payload["gestalt_score"] = ( + 0 + if gm_scores.get(line.entrez_id, 0) == float("nan") + else gm_scores.get(line.entrez_id, 0) + ) + + payload["label"] = False + + payloadList.append(payload) + + df = pd.DataFrame(payloadList) + + if "cadd_score" in df: + # Sort the DataFrame based on the 'cadd_score' column in descending order + df_sorted = df.sort_values(by="cadd_score", ascending=False) + + # Drop duplicates in the 'gene_name' column, keeping the first occurrence (highest CADD score) + df_no_duplicates = df_sorted.drop_duplicates(subset="gene_name", keep="first") + + if case_name.startswith("F_"): + name = case_name[2:] # Remove the first two characters ("F_") + else: + name = case_name + + scores = {"case_name": name, "genes": df_no_duplicates.to_dict(orient="records")} + return scores + + return {"case_name": "case", "genes": df.to_dict(orient="records")} + + def prioritize_genes(entrez_ids, hpo_terms, prio_algorithm, logging=lambda text: True): """Perform gene prioritization query. @@ -562,6 +748,21 @@ def prioritize_genes(entrez_ids, hpo_terms, prio_algorithm, logging=lambda text: yield from prio_exomiser(entrez_ids, hpo_terms, prio_algorithm) +def prioritize_genes_gm(gm_response, logging=lambda text: True): + """Perform gene prioritization query. + + Yield quadruples (gene id, gene symbol, score, priority type) for the given gene list and query settings. + """ + try: + res = json.loads(gm_response) + except requests.ConnectionError: + raise ConnectionError("ERROR: GestaltMatcher Server not responding.") + for entry in res: + yield entry["gene_entrez_id"], entry["gene_name"], ( + 1.3 - entry["distance"] + ), "GestaltMatcher" + + def prio_exomiser(entrez_ids, hpo_terms, prio_algorithm): if not settings.VARFISH_ENABLE_EXOMISER_PRIORITISER or not entrez_ids or not hpo_terms: return @@ -614,12 +815,47 @@ def prio_cada(hpo_terms): ) except requests.ConnectionError: raise ConnectionError( - "ERROR: Server {} not responding.".format(settings.VARFISH_CADA_API_URL) + "ERROR: Server {} not responding.".format(settings.VARFISH_CADA_REST_API_URL) ) for entry in res.json(): yield entry["geneId"].split(":")[1], entry["geneSymbol"], entry["score"], "CADA" +def prioritize_genes_pedia( + self, pathoEnabled, prioEnabled, gmEnabled, caseId, case_name, result, logging +): + inputJson = generate_pedia_input( + self, pathoEnabled, prioEnabled, gmEnabled, caseId, case_name, result + ) + yield from get_pedia_scores(inputJson) + + +def get_pedia_scores(inputJson): + try: + res = requests.post( + settings.VARFISH_PEDIA_REST_API_URL, + json=inputJson, + ) + + if not res.status_code == 200: + raise ConnectionError( + "ERROR: Server responded with status {} and message {}. ".format( + res.status_code, + strip_tags( + re.sub(".*", "", res.text), + ), + ) + ) + except requests.ConnectionError: + raise ConnectionError( + "ERROR: Server {} not responding.".format(settings.VARFISH_PEDIA_REST_API_URL) + ) + + for entry in res.json(): + gene_name = entry["gene_name"] if entry["gene_name"] else "" + yield entry["gene_id"], gene_name, entry["pedia_score"] + + class VariantScoresFactory: """Factory class for variant scorers.""" diff --git a/backend/variants/query_schemas.py b/backend/variants/query_schemas.py index ef872b4a8..fa194d7f6 100644 --- a/backend/variants/query_schemas.py +++ b/backend/variants/query_schemas.py @@ -255,8 +255,12 @@ class CaseQuery: patho_score: typing.Optional[str] = None prio_enabled: bool = False + gm_enabled: bool = False + pedia_enabled: bool = False prio_algorithm: typing.Optional[str] = None prio_hpo_terms: typing.Optional[typing.List[str]] = None + prio_gm: typing.Optional[str] = None + photo_file: typing.Optional[str] = None recessive_mode: typing.Optional[RecessiveMode] = None recessive_index: typing.Optional[str] = None @@ -379,7 +383,11 @@ def convert(self, case: Case, query: CaseQuery) -> typing.Dict[str, typing.Any]: "prio_enabled": query.prio_enabled, "prio_algorithm": query.prio_algorithm, "prio_hpo_terms": query.prio_hpo_terms, + "prio_gm": query.prio_gm, + "photo_file": query.photo_file, "patho_enabled": query.patho_enabled, + "gm_enabled": query.gm_enabled, + "pedia_enabled": query.pedia_enabled, "patho_score": query.patho_score, "effects": [e.value for e in query.effects], # Add static values that are not relevant for the API use case diff --git a/backend/variants/schemas/case-query-v1.json b/backend/variants/schemas/case-query-v1.json index 0e6665c46..0a7890007 100644 --- a/backend/variants/schemas/case-query-v1.json +++ b/backend/variants/schemas/case-query-v1.json @@ -1261,6 +1261,42 @@ } ] }, + "prio_gm": { + "$id": "#/properties/prio_gm", + "type": "string", + "title": "The prio_gm schema", + "description": "An explanation about the purpose of this instance.", + "default": "" + }, + "gm_enabled": { + "$id": "#/properties/gm_enabled", + "type": "boolean", + "title": "Enable face-based prioritization of variants", + "description": "Select ", + "default": false, + "examples": [ + true, + false + ] + }, + "pedia_enabled": { + "$id": "#/properties/pedia_enabled", + "type": "boolean", + "title": "Enable PEDIA-based prioritization of variants", + "description": "Select ", + "default": false, + "examples": [ + true, + false + ] + }, + "photo_file": { + "$id": "#/properties/photo_file", + "type": ["string", "null"], + "title": "photo_file for prioritization of variants", + "description": " ", + "default": "Empty" + }, "recessive_mode": { "anyOf": [ { diff --git a/backend/variants/schemas/examples/case-query-v1-01-minimal-with-defaults.json b/backend/variants/schemas/examples/case-query-v1-01-minimal-with-defaults.json index 8ee584f39..c57f365ae 100644 --- a/backend/variants/schemas/examples/case-query-v1-01-minimal-with-defaults.json +++ b/backend/variants/schemas/examples/case-query-v1-01-minimal-with-defaults.json @@ -15,6 +15,7 @@ "genotype": { "index": "variant" }, + "gm_enabled": false, "database": "refseq", "exac_enabled": false, "gnomad_exomes_enabled": false, @@ -69,5 +70,8 @@ "clinvar_paranoid_mode": false, "genomic_region": [], "patho_enabled": false, - "prio_enabled": false + "pedia_enabled": false, + "photo_file": "Empty", + "prio_enabled": false, + "prio_gm": "" } diff --git a/backend/variants/schemas/examples/case-query-v1-02-singleton-with-defaults.json b/backend/variants/schemas/examples/case-query-v1-02-singleton-with-defaults.json index 89ebd999e..97137c0cc 100644 --- a/backend/variants/schemas/examples/case-query-v1-02-singleton-with-defaults.json +++ b/backend/variants/schemas/examples/case-query-v1-02-singleton-with-defaults.json @@ -101,10 +101,14 @@ "patho_enabled": true, "patho_score": "cadd", "prio_enabled": true, + "gm_enabled": true, + "pedia_enabled": true, + "photo_file": "Empty", "prio_algorithm": "hiphive-human", "prio_hpo_terms": [ "HP:0000001" ], + "prio_gm": "", "thousand_genomes_enabled": true, "thousand_genomes_frequency": 0.001, "thousand_genomes_hemizygous": 10, diff --git a/backend/variants/schemas/examples/case-query-v1-02-singleton.json b/backend/variants/schemas/examples/case-query-v1-02-singleton.json index 2e3238e7b..bd0efc12e 100644 --- a/backend/variants/schemas/examples/case-query-v1-02-singleton.json +++ b/backend/variants/schemas/examples/case-query-v1-02-singleton.json @@ -100,10 +100,13 @@ "patho_enabled": true, "patho_score": "cadd", "prio_enabled": true, + "gm_enabled": true, + "pedia_enabled": true, "prio_algorithm": "hiphive-human", "prio_hpo_terms": [ "HP:0000001" ], + "prio_gm": "", "thousand_genomes_enabled": true, "thousand_genomes_frequency": 0.001, "thousand_genomes_hemizygous": 10, diff --git a/backend/variants/submit_filter.py b/backend/variants/submit_filter.py index 765eef9e2..6891f3e34 100644 --- a/backend/variants/submit_filter.py +++ b/backend/variants/submit_filter.py @@ -1,4 +1,6 @@ import contextlib +from decimal import Decimal +import json from django.conf import settings from django.db import transaction @@ -6,7 +8,12 @@ from variants.forms import PATHO_SCORES_MAPPING from variants.helpers import get_engine -from variants.models import VariantScoresFactory, prioritize_genes +from variants.models import ( + VariantScoresFactory, + prioritize_genes, + prioritize_genes_gm, + prioritize_genes_pedia, +) from .queries import CasePrefetchQuery, ProjectPrefetchQuery @@ -50,6 +57,8 @@ def run(self, kwargs={}): self._store_results(_results) self._prioritize_gene_phenotype(_results) self._prioritize_variant_pathogenicity(_results) + self._prioritize_gene_gm(_results) + self._prioritize_gene_pedia(_results) def _store_results(self, results): """Store results in ManyToMany field.""" @@ -97,6 +106,60 @@ def _prioritize_gene_phenotype(self, results): except ConnectionError as e: self.job.add_log_entry(e) + def _prioritize_gene_gm(self, results): + """Prioritize genes in ``results`` and store in ``SmallVariantQueryGestaltMatcherScores``.""" + gm_enabled = self.variant_query.query_settings.get("gm_enabled") + gm_response = self.variant_query.query_settings.get("prio_gm") + + if not all((settings.VARFISH_ENABLE_GESTALT_MATCHER, gm_enabled, gm_response)): + return + + self.job.add_log_entry("Prioritize genes with GestaltMatcher scores ...") + try: + for gene_id, gene_symbol, score, priority_type in prioritize_genes_gm( + gm_response, logging=self.job.add_log_entry + ): + self.variant_query.smallvariantquerygestaltmatcherscores_set.create( + gene_id=gene_id, + gene_symbol=gene_symbol, + score=score, + priority_type=priority_type, + ) + except ConnectionError as e: + self.job.add_log_entry(e) + + def _prioritize_gene_pedia(self, results): + """Prioritize genes in ``results`` and store in ``SmallVariantQueryPEDIAScores``.""" + pedia_enabled = self.variant_query.query_settings.get("pedia_enabled") + + if not all((settings.VARFISH_ENABLE_PEDIA, pedia_enabled)): + return + + self.job.add_log_entry("Prioritize genes with PEDIA scores ...") + try: + patho_enabled = self.variant_query.query_settings.get("patho_enabled") + prio_enabled = self.variant_query.query_settings.get("prio_enabled") + gm_enabled = self.variant_query.query_settings.get("gm_enabled") + case_id = self.variant_query.sodar_uuid + case_name = self.variant_query.case.name + for gene_id, gene_symbol, score in prioritize_genes_pedia( + self, + patho_enabled, + prio_enabled, + gm_enabled, + case_id, + case_name, + results, + logging=self.job.add_log_entry, + ): + self.variant_query.smallvariantquerypediascores_set.create( + gene_id=gene_id, + gene_symbol=gene_symbol, + score=score, + ) + except ConnectionError as e: + self.job.add_log_entry(e) + def _prioritize_variant_pathogenicity(self, results): """Prioritize genes in ``results`` and store in ``SmallVariantQueryVariantScores``.""" patho_enabled = self.variant_query.query_settings.get("patho_enabled") @@ -128,6 +191,13 @@ def get_var(row): self.job.add_log_entry(e) +class RowEncoder(json.JSONEncoder): + def default(self, obj): + if isinstance(obj, Decimal): + return float(obj) + return json.JSONEncoder.default(self, obj) + + class CaseFilter(FilterBase): """Class for storing query results for a single case.""" diff --git a/backend/variants/tests/data/query_settings.py b/backend/variants/tests/data/query_settings.py index 1d6dcd066..3c89c4df8 100644 --- a/backend/variants/tests/data/query_settings.py +++ b/backend/variants/tests/data/query_settings.py @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1d3e51d2c327f90b6937890066948b6f6fabe6d0838594ad88401e5c0eaec546 -size 9425 +oid sha256:e7711be285572be6c293d26537f9971260d8bd249174c45940a8ee8de9d7a57d +size 9524 diff --git a/backend/variants/tests/factories.py b/backend/variants/tests/factories.py index e637b3be0..47f363fdc 100644 --- a/backend/variants/tests/factories.py +++ b/backend/variants/tests/factories.py @@ -222,7 +222,11 @@ class FormDataFactory(FormDataFactoryBase): prio_enabled: bool = False prio_algorithm: str = "" prio_hpo_terms: str = "" + prio_gm: str = "" + photo_file: str = "" patho_enabled: bool = False + gm_enabled: bool = False + pedia_enabled: bool = False patho_score: str = "" file_type: str = "tsv" export_flags: bool = True diff --git a/backend/variants/tests/test_file_export.py b/backend/variants/tests/test_file_export.py index 9b61d71b9..d4fb7ba20 100644 --- a/backend/variants/tests/test_file_export.py +++ b/backend/variants/tests/test_file_export.py @@ -3,10 +3,12 @@ from datetime import timedelta import gzip import io +import json import tempfile from unittest.mock import patch from bgjobs.models import BackgroundJob +import django from django.conf import settings from django.utils import timezone import openpyxl @@ -117,7 +119,17 @@ def setUp(self): project=self.bg_job.project, bg_job=self.bg_job, case=self.case, - query_args={"export_flags": True, "export_comments": True}, + query_args={ + "export_flags": True, + "export_comments": True, + "pedia_enabled": True, + "gm_enabled": True, + "patho_enabled": True, + "patho_score": "CADD", + "prio_enabled": True, + "prio_algorithm": "CADA", + "prio_hpo_terms": [], + }, file_type="xlsx", ) @@ -144,6 +156,71 @@ def setUp(self): ensembl_transcript_id=small_var.ensembl_transcript_id, ) + def _set_cada_mocker(self, mock_): + mock_.post( + django.conf.settings.VARFISH_CADA_REST_API_URL, + status_code=200, + text=json.dumps( + [ + { + "geneId": "EntrezId:" + self.small_vars[0].refseq_gene_id, + "geneSymbol": "ASPSCR1", + "score": 0.1, + }, + { + "geneId": "EntrezId:" + self.small_vars[1].refseq_gene_id, + "geneSymbol": "NFKBIL1", + "score": 0.2, + }, + ] + ), + ) + + def _set_pedia_mocker(self, mock_): + mock_.post( + django.conf.settings.VARFISH_PEDIA_REST_API_URL, + status_code=200, + text=json.dumps( + [ + { + "gene_id": self.small_vars[0].refseq_gene_id, + "gene_name": "ASPSCR1", + "pedia_score": 0.4, + }, + { + "gene_id": self.small_vars[1].refseq_gene_id, + "gene_name": "NFKBIL1", + "pedia_score": -0.2, + }, + ] + ), + ) + + def _set_cadd_mocker(self, mock_): + def _key_gen(s): + return "%s-%d-%s-%s" % (s.chromosome, s.start, s.reference, s.alternative) + + mock_.post( + django.conf.settings.VARFISH_CADD_REST_API_URL + "/annotate/", + status_code=200, + text=json.dumps({"uuid": "xxxxxxxx-xxxx-xxxx-xxxxxxxxxxxx"}), + ) + mock_.post( + django.conf.settings.VARFISH_CADD_REST_API_URL + "/result/", + status_code=200, + text=json.dumps( + { + "status": "finished", + "info": {"cadd_rest_api_version": 0.1}, + "scores": { + _key_gen(self.small_vars[0]): [0.345146, 7.773], + _key_gen(self.small_vars[1]): [0.345179, 7.773], + _key_gen(self.small_vars[2]): [0.345212, 7.774], + }, + } + ), + ) + class CaseExporterTest(MehariMockerMixin, ExportTestBase): def setUp(self): @@ -155,6 +232,9 @@ def setUp(self): def _test_export_xlsx(self, database, mock_): self._set_mehari_mocker(mock_) + self._set_cada_mocker(mock_) + self._set_pedia_mocker(mock_) + self._set_cadd_mocker(mock_) self.export_job.query_args["database_select"] = database with file_export.CaseExporterXlsx(self.export_job, self.export_job.case) as exporter: @@ -198,7 +278,7 @@ def _test_tabular(self, arrs, has_trailing, mehari_enable, database): self.assertEquals(len(arrs), 4 + int(has_trailing)) # TODO: also test without flags and comments if not mehari_enable: - self.assertEquals(len(arrs[0]), 58) + self.assertEquals(len(arrs[0]), 68) else: self.assertEquals(len(arrs[0]), 59) self.assertSequenceEqual(arrs[0][:3], ["Chromosome", "Position", "Reference bases"]) @@ -319,9 +399,23 @@ def test_export_vcf(self, mock_): ) self.assertEquals(content[3], "") - @patch("django.conf.settings.VARFISH_BACKEND_URL_MEHARI", "https://mehari.com") + @patch("django.conf.settings.VARFISH_ENABLE_GESTALT_MATCHER", True) + @patch("django.conf.settings.VARFISH_ENABLE_PEDIA", True) + @patch("django.conf.settings.VARFISH_ENABLE_CADD", True) + @patch("django.conf.settings.VARFISH_ENABLE_CADA", True) + @patch("django.conf.settings.VARFISH_CADA_REST_API_URL", "https://cada.com") + @patch("django.conf.settings.VARFISH_CADD_REST_API_URL", "https://cadd.com") + @patch("django.conf.settings.VARFISH_PEDIA_REST_API_URL", "https://pedia.com") + @patch("django.conf.settings.VARFISH_BACKEND_URL_MEHARI", None) @Mocker() def test_export_xlsx(self, mock): + self.export_job.query_args["pedia_enabled"] = True + self.export_job.query_args["gm_enabled"] = True + self.export_job.query_args["patho_enabled"] = True + self.export_job.query_args["patho_score"] = "cadd" + self.export_job.query_args["prio_enabled"] = True + self.export_job.query_args["prio_algorithm"] = "CADA" + self.export_job.query_args["prio_hpo_terms"] = ["HP:0001234"] self._test_export_xlsx("refseq", mock) @patch("django.conf.settings.VARFISH_BACKEND_URL_MEHARI", "https://mehari.com") diff --git a/frontend/src/variants/components/FilterApp.vue b/frontend/src/variants/components/FilterApp.vue index a01e93875..fd98fff1e 100644 --- a/frontend/src/variants/components/FilterApp.vue +++ b/frontend/src/variants/components/FilterApp.vue @@ -147,6 +147,9 @@ watch( () => props.caseUuid, () => refreshStores(), ) + +// Set a global JS variable from Django settings +window.middlewareUrl = '{{ settings.VARFISH_MIDDLEWARE_URL }}'