Skip to content

Commit

Permalink
feat: extend initdev command to also setup case import (#1942) (#1948)
Browse files Browse the repository at this point in the history
  • Loading branch information
holtgrewe authored Sep 3, 2024
1 parent d345ef1 commit d958421
Show file tree
Hide file tree
Showing 18 changed files with 544 additions and 147 deletions.
1 change: 1 addition & 0 deletions backend/.gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
varfish/users/management/commands/data/*.gz* filter=lfs diff=lfs merge=lfs -text
1 change: 1 addition & 0 deletions backend/Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ jedi = "==0.19.1"
watchdog = "*"
werkzeug = "~=3.0.4"
uritemplate = "*"
django-types = "*"

[ldap-packages]
# Dependencies for enabling LDAP support. You will need the system library
Expand Down
237 changes: 129 additions & 108 deletions backend/Pipfile.lock

Large diffs are not rendered by default.

15 changes: 11 additions & 4 deletions backend/cases_import/models/base.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,17 @@
import uuid as uuid_object

from bgjobs.models import BackgroundJob, JobModelMessageMixin
from django.contrib.auth import get_user_model
from django.db import models
from django.urls import reverse
from projectroles.models import Project

from cases_import.proto import get_case_name_from_family_payload
from varfish.utils import JSONField

#: The User model to use.
User = get_user_model()


class CaseImportAction(models.Model):
"""Stores the necessary information for importing a case."""
Expand Down Expand Up @@ -76,23 +80,25 @@ class CaseImportBackgroundJobManager(models.Manager):
together with the backing ``BackgroundJob``.
"""

def create_full(self, *, caseimportaction, project, user):
def create_full(self, *, caseimportaction: CaseImportAction, user: User):
case_name = caseimportaction.get_case_name()
bg_job = BackgroundJob.objects.create(
name=f"Import of case '{case_name}'",
project=project,
project=caseimportaction.project,
job_type=CaseImportBackgroundJob.spec_name,
user=user,
)
instance = super().create(project=project, bg_job=bg_job, caseimportaction=caseimportaction)
instance = super().create(
project=caseimportaction.project, bg_job=bg_job, caseimportaction=caseimportaction
)
return instance


class CaseImportBackgroundJob(JobModelMessageMixin, models.Model):
"""Background job for importing cases with the ``cases_import`` app."""

# We use a custom manager that provides creation together with the ``BackgroundJob``.
objects = CaseImportBackgroundJobManager()
objects: CaseImportBackgroundJobManager = CaseImportBackgroundJobManager()

#: Task description for logging.
task_desc = "Case Import"
Expand All @@ -119,6 +125,7 @@ class CaseImportBackgroundJob(JobModelMessageMixin, models.Model):
help_text="Background job for state etc.",
on_delete=models.CASCADE,
)

#: The case import action to perform.
caseimportaction = models.ForeignKey(CaseImportAction, on_delete=models.CASCADE, null=False)

Expand Down
23 changes: 12 additions & 11 deletions backend/cases_import/models/executors.py
Original file line number Diff line number Diff line change
Expand Up @@ -848,17 +848,16 @@ def annotate(
"--id-mapping",
f"@{path_id_map}",
]
# if settings.DEBUG: # XXX remove this
# args += ["--max-var-count", "1000"]
# Setup environment so the worker can access the internal S3 storage.
endpoint_host = settings.VARFISH_CASE_IMPORT_INTERNAL_STORAGE.host
endpoint_port = settings.VARFISH_CASE_IMPORT_INTERNAL_STORAGE.port
env = {
**dict(os.environ.items()),
"LC_ALL": "C",
"AWS_ACCESS_KEY_ID": settings.VARFISH_CASE_IMPORT_INTERNAL_STORAGE.access_key,
"AWS_SECRET_ACCESS_KEY": settings.VARFISH_CASE_IMPORT_INTERNAL_STORAGE.secret_key,
"AWS_ENDPOINT_URL": f"http://{endpoint_host}:{endpoint_port}",
"AWS_ENDPOINT_URL": (
f"http://{settings.VARFISH_CASE_IMPORT_INTERNAL_STORAGE.host}"
f":{settings.VARFISH_CASE_IMPORT_INTERNAL_STORAGE.port}"
),
"AWS_REGION": "us-east-1",
}
# Actually execute the worker.
Expand Down Expand Up @@ -929,14 +928,15 @@ def prefilter_seqvars(
f"{bucket}/{ingested_on_s3.path}",
]
# Setup environment so the worker can access the internal S3 storage.
endpoint_host = settings.VARFISH_CASE_IMPORT_INTERNAL_STORAGE.host
endpoint_port = settings.VARFISH_CASE_IMPORT_INTERNAL_STORAGE.port
env = {
**dict(os.environ.items()),
"LC_ALL": "C",
"AWS_ACCESS_KEY_ID": settings.VARFISH_CASE_IMPORT_INTERNAL_STORAGE.access_key,
"AWS_SECRET_ACCESS_KEY": settings.VARFISH_CASE_IMPORT_INTERNAL_STORAGE.secret_key,
"AWS_ENDPOINT_URL": f"http://{endpoint_host}:{endpoint_port}",
"AWS_ENDPOINT_URL": (
f"http://{settings.VARFISH_CASE_IMPORT_INTERNAL_STORAGE.host}"
f":{settings.VARFISH_CASE_IMPORT_INTERNAL_STORAGE.port}"
),
"AWS_REGION": "us-east-1",
}
# Actually execute the worker.
Expand Down Expand Up @@ -1007,14 +1007,15 @@ def annotate(
args += ["--path-in", f"{bucket}/{entry.path}"]
args += ["--path-out", f"{bucket}/{path_out}", "--id-mapping", f"@{path_id_map}"]
# Setup environment so the worker can access the internal S3 storage.
endpoint_host = settings.VARFISH_CASE_IMPORT_INTERNAL_STORAGE.host
endpoint_port = settings.VARFISH_CASE_IMPORT_INTERNAL_STORAGE.port
env = {
**dict(os.environ.items()),
"LC_ALL": "C",
"AWS_ACCESS_KEY_ID": settings.VARFISH_CASE_IMPORT_INTERNAL_STORAGE.access_key,
"AWS_SECRET_ACCESS_KEY": settings.VARFISH_CASE_IMPORT_INTERNAL_STORAGE.secret_key,
"AWS_ENDPOINT_URL": f"http://{endpoint_host}:{endpoint_port}",
"AWS_ENDPOINT_URL": (
f"http://{settings.VARFISH_CASE_IMPORT_INTERNAL_STORAGE.host}"
f":{settings.VARFISH_CASE_IMPORT_INTERNAL_STORAGE.port}"
),
"AWS_REGION": "us-east-1",
}
# Actually execute the worker.
Expand Down
2 changes: 1 addition & 1 deletion backend/cases_import/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,6 @@


@app.task(bind=True)
def run_caseimportactionbackgroundjob(_self, caseimportactionbackgroundjob_pk):
def run_caseimportactionbackgroundjob(_self, *, caseimportactionbackgroundjob_pk: int):
"""Task to execute a ``cases_import.models.CaseImportActionBackgroundJob``."""
return models.run_caseimportactionbackgroundjob(pk=caseimportactionbackgroundjob_pk)
4 changes: 2 additions & 2 deletions backend/cases_import/tests/data/singleton_cramino_qc.yaml
Git LFS file not shown
4 changes: 2 additions & 2 deletions backend/cases_import/tests/data/singleton_dragen_qc.yaml
Git LFS file not shown
4 changes: 2 additions & 2 deletions backend/cases_import/tests/data/singleton_ngsbits_qc.yaml
Git LFS file not shown
4 changes: 2 additions & 2 deletions backend/cases_import/tests/data/singleton_samtools_qc.yaml
Git LFS file not shown
4 changes: 2 additions & 2 deletions backend/cases_import/tests/data/singleton_seqvars.yaml
Git LFS file not shown
4 changes: 2 additions & 2 deletions backend/cases_import/tests/data/singleton_strucvars.yaml
Git LFS file not shown
10 changes: 6 additions & 4 deletions backend/cases_import/views_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,9 @@ def create(self, request, *args, **kwargs):
serializer.is_valid(raise_exception=True)
caseimportbackgroundjob = self.perform_create(serializer)
if caseimportbackgroundjob:
tasks.run_caseimportactionbackgroundjob.delay(caseimportbackgroundjob.pk)
tasks.run_caseimportactionbackgroundjob.delay(
caseimportactionbackgroundjob_pk=caseimportbackgroundjob.pk
)
headers = self.get_success_headers(serializer.data)
return Response(serializer.data, status=status.HTTP_201_CREATED, headers=headers)

Expand All @@ -70,7 +72,6 @@ def perform_create(self, serializer) -> typing.Optional[CaseImportBackgroundJob]
if serializer.instance.state == CaseImportAction.STATE_SUBMITTED:
return CaseImportBackgroundJob.objects.create_full(
caseimportaction=serializer.instance,
project=self.get_project(),
user=self.request.user,
)
else:
Expand Down Expand Up @@ -107,7 +108,9 @@ def update(self, request, *args, **kwargs):
instance._prefetched_objects_cache = {}
caseimportbackgroundjob = self.perform_update(serializer)
if caseimportbackgroundjob:
tasks.run_caseimportactionbackgroundjob.delay(caseimportbackgroundjob.pk)
tasks.run_caseimportactionbackgroundjob.delay(
caseimportactionbackgroundjob_pk=caseimportbackgroundjob.pk
)
return Response(serializer.data)

def perform_update(self, serializer) -> typing.Optional[CaseImportBackgroundJob]:
Expand All @@ -121,7 +124,6 @@ def perform_update(self, serializer) -> typing.Optional[CaseImportBackgroundJob]
if serializer.instance.state == CaseImportAction.STATE_SUBMITTED:
return CaseImportBackgroundJob.objects.create_full(
caseimportaction=serializer.instance,
project=self.get_project(),
user=self.request.user,
)
else:
Expand Down
Git LFS file not shown
Git LFS file not shown
173 changes: 173 additions & 0 deletions backend/varfish/users/management/commands/data/Case_1.grch37.yaml.tpl
Original file line number Diff line number Diff line change
@@ -0,0 +1,173 @@
# family with only metadata field
family:
proband:
id: index
subject:
id: index
sex: MALE
karyotypicSex: XY
phenotypicFeatures:
- type:
id: "HP:0012469"
label: "Infantile spasms"
excluded: false
modifiers:
- id: "HP:0031796"
label: "Recurrent"
measurements:
- assay:
id: NCIT:C158253
label: Targeted Genome Sequencing
value:
ontologyClass:
id: NCIT:C171177
label: Sequencing Data File
files:
- uri: s3://varfish-server/seqmeta/exon-set/grch37/all-coding-exons-1.0.bed.gz
individualToFileIdentifiers:
index: {{ data_case }}_index
fileAttributes:
checksum: sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
designation: sequencing_targets
genomebuild: grch38
mimetype: text/x-bed+x-bgzip
# - uri: s3://data-for-import/example/index.bam
# individualToFileIdentifiers:
# mother: {{ data_case }}_mother
# fileAttributes:
# checksum: sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
# designation: read_alignments
# genomebuild: grch38
# mimetype: text/x-bam+x-bgzip
diseases:
- term:
id: OMIM:164400
label: "SPINOCEREBELLAR ATAXIA 1; SCA1"
excluded: false
metaData: &metadata-prototype
created: "2019-07-21T00:25:54.662Z"
createdBy: Peter N. Robinson
resources:
- id: hp
name: human phenotype ontology
url: http://purl.obolibrary.org/obo/hp.owl
version: "2018-03-08"
namespacePrefix: HP
iriPrefix: hp
phenopacketSchemaVersion: "2.0"
relatives:
- id: mother
subject:
id: mother
sex: FEMALE
karyotypicSex: XX
phenotypicFeatures:
- type:
id: "HP:0012469"
label: "Infantile spasms"
excluded: true
measurements:
- assay:
id: NCIT:C158253
label: Targeted Genome Sequencing
value:
ontologyClass:
id: NCIT:C171177
label: Sequencing Data File
files:
- uri: s3://varfish-server/seqmeta/exon-set/grch37/all-coding-exons-1.0.bed.gz
individualToFileIdentifiers:
mother: {{ data_case }}_mother
fileAttributes:
checksum: sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
designation: sequencing_targets
genomebuild: grch38
mimetype: text/x-bed+x-bgzip
# - uri: s3://data-for-import/example/mother.bam
# individualToFileIdentifiers:
# mother: {{ data_case }}_mother
# fileAttributes:
# checksum: sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
# designation: read_alignments
# genomebuild: grch38
# mimetype: text/x-bam+x-bgzip
diseases:
- term:
id: OMIM:164400
label: "SPINOCEREBELLAR ATAXIA 1; SCA1"
excluded: true
metaData: *metadata-prototype
- id: father
subject:
id: father
sex: MALE
karyotypicSex: XY
phenotypicFeatures:
- type:
id: "HP:0012469"
label: "Infantile spasms"
excluded: true
measurements:
- assay:
id: NCIT:C158253
label: Targeted Genome Sequencing
value:
ontologyClass:
id: NCIT:C171177
label: Sequencing Data File
files:
- uri: s3://varfish-server/seqmeta/exon-set/grch37/all-coding-exons-1.0.bed.gz
individualToFileIdentifiers:
father: {{ data_case }}_father
fileAttributes:
checksum: sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
designation: sequencing_targets
genomebuild: grch38
mimetype: text/x-bed+x-bgzip
# - uri: s3://data-for-import/example/father.bam
# individualToFileIdentifiers:
# father: {{ data_case }}_father
# fileAttributes:
# checksum: sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
# designation: read_alignments
# genomebuild: grch38
# mimetype: text/x-bam+x-bgzip
diseases:
- term:
id: OMIM:164400
label: "SPINOCEREBELLAR ATAXIA 1; SCA1"
excluded: true
metaData: *metadata-prototype
pedigree:
persons:
- familyId: {{ data_case }}
individualId: index
paternalId: father
maternalId: mother
sex: MALE
affectedStatus: AFFECTED
- familyId: {{ data_case }}
individualId: father
paternalId: "0"
maternalId: "0"
sex: MALE
affectedStatus: UNAFFECTED
- familyId: {{ data_case }}
individualId: mother
paternalId: "0"
maternalId: "0"
sex: FEMALE
affectedStatus: UNAFFECTED
metaData: *metadata-prototype
files:
- uri: file://{{ data_path }}/{{ data_case }}.grch37.gatk_hc.vcf.gz
individualToFileIdentifiers:
index: {{ data_case }}_index
father: {{ data_case }}_father
mother: {{ data_case }}_mother
fileAttributes:
checksum: sha256:7104962533dec7a435cdc32785d7bd01caffc87bd68e6edf3c25d43c8136b622
designation: variant_calls
variant_type: seqvars
genomebuild: grch37
mimetype: text/plain+x-bgzip+x-variant-call-format
3 changes: 3 additions & 0 deletions backend/varfish/users/management/commands/data/Case_1.ped
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
FAM_Case_1_index Case_1_index Case_1_father Case_1_mother 1 2
FAM_Case_1_index Case_1_father 0 0 1 1
FAM_Case_1_index Case_1_mother 0 0 2 1
Loading

0 comments on commit d958421

Please sign in to comment.