Skip to content

Commit

Permalink
feat: add optional integration of GestaltMatcher/PEDIA (#399, #1125) (#…
Browse files Browse the repository at this point in the history
…1249)

Co-authored-by: Meghna Ahuja Bhasin <[email protected]>
  • Loading branch information
holtgrewe and ahujameg authored Sep 27, 2024
1 parent bfc692d commit 6f695ec
Show file tree
Hide file tree
Showing 28 changed files with 1,104 additions and 60 deletions.
1 change: 1 addition & 0 deletions backend/cases/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ def get_context_data(self, *args, **kwargs):
),
"exomiser_enabled": settings.VARFISH_ENABLE_EXOMISER_PRIORITISER,
"cadd_enabled": settings.VARFISH_ENABLE_CADD,
"cada_enabled": settings.VARFISH_ENABLE_CADA,
"extra_anno_fields": extra_anno_fields,
"url_prefixes": {
"annonars": settings.VARFISH_BACKEND_URL_PREFIX_ANNONARS,
Expand Down
11 changes: 11 additions & 0 deletions backend/config/settings/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,7 @@
"varannos.apps.VarannosConfig",
# Legacy apps - not used anymore!
"hgmd.apps.HgmdConfig",
"ext_gestaltmatcher.apps.ExtGestaltmatcherConfig",
]

# See: https://docs.djangoproject.com/en/dev/ref/settings/#installed-apps
Expand Down Expand Up @@ -541,6 +542,16 @@
"VARFISH_CADA_REST_API_URL", "https://cada.gene-talk.de/api/process"
)

# Enable PEDIA prioritization.
VARFISH_ENABLE_PEDIA = env.bool("VARFISH_ENABLE_PEDIA", default=False)
VARFISH_PEDIA_REST_API_URL = env.str("VARFISH_PEDIA_REST_API_URL", "http://127.0.0.1:9000/pedia")
VARFISH_MIDDLEWARE_URL = env.str("VARFISH_MIDDLEWARE_URL", "http://127.0.0.1:7000")

# Enable Gestalt-based prioritization.
VARFISH_ENABLE_GESTALT_MATCHER = env.bool("VARFISH_ENABLE_GESTALT_MATCHER", default=False)
# Configure URL to GestaltMatcher REST API
VARFISH_GM_SENDER_URL = env.str("VARFISH_GM_SENDER_URL", "http://127.0.0.1:7000/")

# Enable submission of variants to CADD server.
VARFISH_ENABLE_CADD_SUBMISSION = env.bool("VARFISH_ENABLE_CADD_SUBMISSION", default=False)
# CADD version to use for for submission
Expand Down
Empty file.
7 changes: 7 additions & 0 deletions backend/ext_gestaltmatcher/admin.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
from django.contrib import admin

from .models import SmallVariantQueryGestaltMatcherScores, SmallVariantQueryPediaScores

# Register your models here.
admin.site.register(SmallVariantQueryGestaltMatcherScores)
admin.site.register(SmallVariantQueryPediaScores)
6 changes: 6 additions & 0 deletions backend/ext_gestaltmatcher/apps.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from django.apps import AppConfig


class ExtGestaltmatcherConfig(AppConfig):
default_auto_field = "django.db.models.BigAutoField"
name = "ext_gestaltmatcher"
34 changes: 34 additions & 0 deletions backend/ext_gestaltmatcher/migrations/0001_initial.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# -*- coding: utf-8 -*-
# Generated by Django 1.11.20 on 2023-10-20 07:18
from __future__ import unicode_literals

from django.db import migrations, models
import django.db.models.deletion


class Migration(migrations.Migration):
dependencies = []

operations = [
migrations.CreateModel(
name="SmallVariantQueryGestaltMatcherScores",
fields=[
(
"id",
models.AutoField(
auto_created=True, primary_key=True, serialize=False, verbose_name="ID"
),
),
("gene_id", models.CharField(help_text="Entrez gene ID", max_length=64)),
("gene_symbol", models.CharField(help_text="The gene symbol", max_length=128)),
("priority_type", models.CharField(help_text="The priority type", max_length=64)),
("score", models.FloatField(help_text="The gene score")),
(
"query",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE, to="variants.SmallVariantQuery"
),
),
],
)
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# -*- coding: utf-8 -*-
# Generated by Django 1.11.20 on 2023-11-14 07:18
from __future__ import unicode_literals

from django.db import migrations, models
import django.db.models.deletion


class Migration(migrations.Migration):
dependencies = [("ext_gestaltmatcher", "0001_initial")]

operations = [
migrations.CreateModel(
name="SmallVariantQueryPediaScores",
fields=[
(
"id",
models.AutoField(
auto_created=True, primary_key=True, serialize=False, verbose_name="ID"
),
),
("gene_id", models.CharField(help_text="Entrez gene ID", max_length=64)),
("gene_symbol", models.CharField(help_text="The gene symbol", max_length=128)),
("score", models.FloatField(help_text="The gene score")),
(
"query",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE, to="variants.SmallVariantQuery"
),
),
],
)
]
Empty file.
43 changes: 43 additions & 0 deletions backend/ext_gestaltmatcher/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
from django.db import models


# Create your models here.
class SmallVariantQueryGestaltMatcherScores(models.Model):
"""Annotate ``SmallVariantQuery`` with Gestalt Matcher scores (if configured to do so)."""

#: The query to annotate.
query = models.ForeignKey("variants.SmallVariantQuery", on_delete=models.CASCADE)

#: The Entrez gene ID.
gene_id = models.CharField(max_length=64, null=False, blank=False, help_text="Entrez gene ID")

#: The gene symbol.
gene_symbol = models.CharField(
max_length=128, null=False, blank=False, help_text="The gene symbol"
)

#: The priority type.
priority_type = models.CharField(
max_length=64, null=False, blank=False, help_text="The priority type"
)

#: The score.
score = models.FloatField(null=False, blank=False, help_text="The gene score")


class SmallVariantQueryPediaScores(models.Model):
"""Annotate ``SmallVariantQuery`` with PEDIA scores (if configured to do so)."""

#: The query to annotate.
query = models.ForeignKey("variants.SmallVariantQuery", on_delete=models.CASCADE)

#: The Entrez gene ID.
gene_id = models.CharField(max_length=64, null=False, blank=False, help_text="Entrez gene ID")

#: The gene symbol.
gene_symbol = models.CharField(
max_length=128, null=False, blank=False, help_text="The gene symbol"
)

#: The score.
score = models.FloatField(null=False, blank=False, help_text="The gene score")
97 changes: 96 additions & 1 deletion backend/variants/file_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,15 @@
ExportProjectCasesFileBgJobResult,
SmallVariantComment,
VariantScoresFactory,
annotate_with_gm_scores,
annotate_with_joint_scores,
annotate_with_pathogenicity_scores,
annotate_with_pedia_scores,
annotate_with_phenotype_scores,
annotate_with_transcripts,
get_pedia_scores,
prioritize_genes,
prioritize_genes_gm,
unroll_extra_annos_result,
)
from .queries import (
Expand Down Expand Up @@ -122,6 +126,16 @@ def to_str(val):
("phenotype_rank", "Phenotype Rank", int),
)

HEADERS_GM_SCORES = (
("gm_score", "Gestalt Score", float),
("gm_rank", "Gestalt Rank", int),
)

HEADERS_PEDIA_SCORES = (
("pedia_score", "PEDIA Score", float),
("pedia_rank", "PEDIA Rank", int),
)

#: Names of the pathogenicity scoring header columns.
HEADERS_PATHO_SCORES = (
("pathogenicity_score", "Pathogenicity Score", float),
Expand Down Expand Up @@ -318,6 +332,14 @@ def _is_prioritization_enabled(self):
)
)

def _is_gm_enabled(self):
"""Return whether Gestalt Matcher prioritization is enabled in this query."""
return settings.VARFISH_ENABLE_GESTALT_MATCHER and self.query_args.get("gm_enabled")

def _is_pedia_enabled(self):
"""Return whether PEDIA prioritization is enabled in this query."""
return settings.VARFISH_ENABLE_PEDIA and self.query_args.get("pedia_enabled")

def _is_pathogenicity_enabled(self):
"""Return whether pathogenicity scoring is enabled in this query."""
return settings.VARFISH_ENABLE_CADD and all(
Expand Down Expand Up @@ -352,6 +374,10 @@ def _yield_columns(self, members):
header += HEADERS_TRANSCRIPTS
if self._is_prioritization_enabled() and self._is_pathogenicity_enabled():
header += HEADERS_JOINT_SCORES
if self._is_gm_enabled():
header += HEADERS_GM_SCORES
if self._is_pedia_enabled():
header += HEADERS_PEDIA_SCORES
header += HEADER_FLAGS
header += HEADER_COMMENTS
header += self.get_extra_annos_headers()
Expand Down Expand Up @@ -391,13 +417,25 @@ def _yield_smallvars(self):
_result = annotate_with_pathogenicity_scores(_result, variant_scores)
if self._is_prioritization_enabled() and self._is_pathogenicity_enabled():
_result = annotate_with_joint_scores(_result)
if self._is_gm_enabled():
gene_scores = self._fetch_gm_scores([entry.entrez_id for entry in _result])
_result = annotate_with_gm_scores(_result, gene_scores)
if self._is_pedia_enabled():
pedia_scores = self._fetch_pedia_scores(_result)
if pedia_scores:
_result = annotate_with_pedia_scores(_result, pedia_scores)
fields = {x[1].label: x[0] for x in enumerate(list(ExtraAnnoField.objects.all()))}
_result = unroll_extra_annos_result(_result, fields)
self.job.add_log_entry("Writing output file...")
total = len(_result)
steps = math.ceil(total / 10)
for i, small_var in enumerate(_result):
if self._is_prioritization_enabled() or self._is_pathogenicity_enabled():
if (
self._is_prioritization_enabled()
or self._is_pathogenicity_enabled()
or self._is_gm_enabled
or self._is_pedia_enabled()
):
if i % steps == 0:
self.job.add_log_entry("{}%".format(int(100 * i / total)))
else:
Expand Down Expand Up @@ -433,6 +471,63 @@ def _fetch_gene_scores(self, entrez_ids):
else:
return {}

def _fetch_gm_scores(self, entrez_ids):
prio_gm = self.query_args.get("prio_gm")
if all((self._is_gm_enabled(), prio_gm)):
try:
return {
str(gene_id): score
for gene_id, gene_symbol, score, priority_type in prioritize_genes_gm(
prio_gm, logging=self.job.add_log_entry
)
}
except ConnectionError as e:
self.job.add_log_entry(e)
else:
return {}

def _fetch_pedia_scores(self, result):
if self._is_pedia_enabled():
try:
payloadList = []

"""Read and json object by reading ``result`` ."""
for line in result:
payload = dict()

if all(
(
line.entrez_id,
hasattr(line, "phenotype_score"),
hasattr(line, "pathogenicity_score"),
hasattr(line, "gm_score"),
)
):
payload["gene_name"] = line.symbol
payload["gene_id"] = line.entrez_id

payload["cada_score"] = line.phenotype_score
payload["cadd_score"] = line.pathogenicity_score
payload["gestalt_score"] = (
0 if line.gm_score == float("inf") else line.gm_score
)

payload["label"] = False
payloadList.append(payload)

case_name = self.job.case.name
if case_name.startswith("F_"):
name = case_name[2:] # Remove the first two characters ("F_")
else:
name = case_name
scores = {"case_name": name, "genes": payloadList}

return {str(gene_id): score for gene_id, _, score in get_pedia_scores(scores)}
except ConnectionError as e:
self.job.add_log_entry(e)
else:
return {}

def _fetch_variant_scores(self, variants):
if self._is_pathogenicity_enabled():
try:
Expand Down
37 changes: 36 additions & 1 deletion backend/variants/models/jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@
from projectroles.plugins import get_backend_api
from sqlalchemy import and_

from ext_gestaltmatcher.models import (
SmallVariantQueryGestaltMatcherScores,
SmallVariantQueryPediaScores,
)
from variants.helpers import get_engine, get_meta
from variants.models import SmallVariantQueryGeneScores, SmallVariantQueryVariantScores
from variants.models.queries import (
Expand Down Expand Up @@ -98,7 +102,12 @@ def run_query_bg_job(pk):
tl_event.add_object(obj=filter_job.case, label="case_name", name=filter_job.case.name)

def _read_records(
inputf, smallvariantqueryresultset, pathogenicity_scores=None, phenotype_scores=None
inputf,
smallvariantqueryresultset,
pathogenicity_scores=None,
phenotype_scores=None,
gm_scores=None,
pedia_scores=None,
):
"""Read and yield ``SmallVariantQueryResultRow`` objects by reading ``inputf`` for the given ``SmallVariantQueryResultSet``."""
for line in inputf:
Expand All @@ -113,6 +122,12 @@ def _read_records(
if phenotype_scores and line.entrez_id:
payload["phenotype_score"] = phenotype_scores.get(line.entrez_id, -1)

if gm_scores and line.entrez_id:
payload["gm_score"] = gm_scores.get(line.entrez_id, 0)

if pedia_scores and line.entrez_id:
payload["pedia_score"] = pedia_scores.get(line.entrez_id, -1)

if pathogenicity_scores and phenotype_scores and line.entrez_id:
if payload["pathogenicity_score"] == -1 or payload["phenotype_score"] == -1:
payload["patho_pheno_score"] = -1
Expand Down Expand Up @@ -164,6 +179,24 @@ def _inner():
)
if row.gene_id
}
gm_scores = None
pedia_scores = None
if query_model.query_settings.get("gm_enabled"):
gm_scores = {
row.gene_id: row.score
for row in SmallVariantQueryGestaltMatcherScores.objects.filter(
query__sodar_uuid=query_model.sodar_uuid
)
if row.gene_id
}
if query_model.query_settings.get("pedia_enabled"):
pedia_scores = {
row.gene_id: row.score
for row in SmallVariantQueryPediaScores.objects.filter(
query__sodar_uuid=query_model.sodar_uuid
)
if row.gene_id
}

with transaction.atomic():
smallvariantqueryresultset = SmallVariantQueryResultSet.objects.create(
Expand All @@ -183,6 +216,8 @@ def _inner():
smallvariantqueryresultset,
pathogenicity_scores=pathogenicity_scores,
phenotype_scores=phenotype_scores,
gm_scores=gm_scores,
pedia_scores=pedia_scores,
),
n=1000,
):
Expand Down
Loading

0 comments on commit 6f695ec

Please sign in to comment.