feat: extend initdev command to also setup case import (#1942) (#1948)

varfish-org · Sep 3, 2024 · d958421 · d958421
1 parent d345ef1
commit d958421
Show file tree

Hide file tree

Showing 18 changed files with 544 additions and 147 deletions.
diff --git a/backend/.gitattributes b/backend/.gitattributes
@@ -0,0 +1 @@
+varfish/users/management/commands/data/*.gz* filter=lfs diff=lfs merge=lfs -text
diff --git a/backend/Pipfile b/backend/Pipfile
@@ -90,6 +90,7 @@ jedi = "==0.19.1"
 watchdog = "*"
 werkzeug = "~=3.0.4"
 uritemplate = "*"
+django-types = "*"
 
 [ldap-packages]
 # Dependencies for enabling LDAP support.  You will need the system library

diff --git a/backend/Pipfile.lock b/backend/Pipfile.lock
diff --git a/backend/cases_import/models/base.py b/backend/cases_import/models/base.py
@@ -1,13 +1,17 @@
 import uuid as uuid_object
 
 from bgjobs.models import BackgroundJob, JobModelMessageMixin
+from django.contrib.auth import get_user_model
 from django.db import models
 from django.urls import reverse
 from projectroles.models import Project
 
 from cases_import.proto import get_case_name_from_family_payload
 from varfish.utils import JSONField
 
+#: The User model to use.
+User = get_user_model()
+
 
 class CaseImportAction(models.Model):
     """Stores the necessary information for importing a case."""
@@ -76,23 +80,25 @@ class CaseImportBackgroundJobManager(models.Manager):
     together with the backing ``BackgroundJob``.
     """
 
-    def create_full(self, *, caseimportaction, project, user):
+    def create_full(self, *, caseimportaction: CaseImportAction, user: User):
         case_name = caseimportaction.get_case_name()
         bg_job = BackgroundJob.objects.create(
             name=f"Import of case '{case_name}'",
-            project=project,
+            project=caseimportaction.project,
             job_type=CaseImportBackgroundJob.spec_name,
             user=user,
         )
-        instance = super().create(project=project, bg_job=bg_job, caseimportaction=caseimportaction)
+        instance = super().create(
+            project=caseimportaction.project, bg_job=bg_job, caseimportaction=caseimportaction
+        )
         return instance
 
 
 class CaseImportBackgroundJob(JobModelMessageMixin, models.Model):
     """Background job for importing cases with the ``cases_import`` app."""
 
     # We use a custom manager that provides creation together with the ``BackgroundJob``.
-    objects = CaseImportBackgroundJobManager()
+    objects: CaseImportBackgroundJobManager = CaseImportBackgroundJobManager()
 
     #: Task description for logging.
     task_desc = "Case Import"
@@ -119,6 +125,7 @@ class CaseImportBackgroundJob(JobModelMessageMixin, models.Model):
         help_text="Background job for state etc.",
         on_delete=models.CASCADE,
     )
+
     #: The case import action to perform.
     caseimportaction = models.ForeignKey(CaseImportAction, on_delete=models.CASCADE, null=False)
 

diff --git a/backend/cases_import/models/executors.py b/backend/cases_import/models/executors.py
@@ -848,17 +848,16 @@ def annotate(
             "--id-mapping",
             f"@{path_id_map}",
         ]
-        # if settings.DEBUG: # XXX remove this
-        #     args += ["--max-var-count", "1000"]
         # Setup environment so the worker can access the internal S3 storage.
-        endpoint_host = settings.VARFISH_CASE_IMPORT_INTERNAL_STORAGE.host
-        endpoint_port = settings.VARFISH_CASE_IMPORT_INTERNAL_STORAGE.port
         env = {
             **dict(os.environ.items()),
             "LC_ALL": "C",
             "AWS_ACCESS_KEY_ID": settings.VARFISH_CASE_IMPORT_INTERNAL_STORAGE.access_key,
             "AWS_SECRET_ACCESS_KEY": settings.VARFISH_CASE_IMPORT_INTERNAL_STORAGE.secret_key,
-            "AWS_ENDPOINT_URL": f"http://{endpoint_host}:{endpoint_port}",
+            "AWS_ENDPOINT_URL": (
+                f"http://{settings.VARFISH_CASE_IMPORT_INTERNAL_STORAGE.host}"
+                f":{settings.VARFISH_CASE_IMPORT_INTERNAL_STORAGE.port}"
+            ),
             "AWS_REGION": "us-east-1",
         }
         # Actually execute the worker.
@@ -929,14 +928,15 @@ def prefilter_seqvars(
             f"{bucket}/{ingested_on_s3.path}",
         ]
         # Setup environment so the worker can access the internal S3 storage.
-        endpoint_host = settings.VARFISH_CASE_IMPORT_INTERNAL_STORAGE.host
-        endpoint_port = settings.VARFISH_CASE_IMPORT_INTERNAL_STORAGE.port
         env = {
             **dict(os.environ.items()),
             "LC_ALL": "C",
             "AWS_ACCESS_KEY_ID": settings.VARFISH_CASE_IMPORT_INTERNAL_STORAGE.access_key,
             "AWS_SECRET_ACCESS_KEY": settings.VARFISH_CASE_IMPORT_INTERNAL_STORAGE.secret_key,
-            "AWS_ENDPOINT_URL": f"http://{endpoint_host}:{endpoint_port}",
+            "AWS_ENDPOINT_URL": (
+                f"http://{settings.VARFISH_CASE_IMPORT_INTERNAL_STORAGE.host}"
+                f":{settings.VARFISH_CASE_IMPORT_INTERNAL_STORAGE.port}"
+            ),
             "AWS_REGION": "us-east-1",
         }
         # Actually execute the worker.
@@ -1007,14 +1007,15 @@ def annotate(
             args += ["--path-in", f"{bucket}/{entry.path}"]
         args += ["--path-out", f"{bucket}/{path_out}", "--id-mapping", f"@{path_id_map}"]
         # Setup environment so the worker can access the internal S3 storage.
-        endpoint_host = settings.VARFISH_CASE_IMPORT_INTERNAL_STORAGE.host
-        endpoint_port = settings.VARFISH_CASE_IMPORT_INTERNAL_STORAGE.port
         env = {
             **dict(os.environ.items()),
             "LC_ALL": "C",
             "AWS_ACCESS_KEY_ID": settings.VARFISH_CASE_IMPORT_INTERNAL_STORAGE.access_key,
             "AWS_SECRET_ACCESS_KEY": settings.VARFISH_CASE_IMPORT_INTERNAL_STORAGE.secret_key,
-            "AWS_ENDPOINT_URL": f"http://{endpoint_host}:{endpoint_port}",
+            "AWS_ENDPOINT_URL": (
+                f"http://{settings.VARFISH_CASE_IMPORT_INTERNAL_STORAGE.host}"
+                f":{settings.VARFISH_CASE_IMPORT_INTERNAL_STORAGE.port}"
+            ),
             "AWS_REGION": "us-east-1",
         }
         # Actually execute the worker.

diff --git a/backend/cases_import/tasks.py b/backend/cases_import/tasks.py
@@ -3,6 +3,6 @@
 
 
 @app.task(bind=True)
-def run_caseimportactionbackgroundjob(_self, caseimportactionbackgroundjob_pk):
+def run_caseimportactionbackgroundjob(_self, *, caseimportactionbackgroundjob_pk: int):
     """Task to execute a ``cases_import.models.CaseImportActionBackgroundJob``."""
     return models.run_caseimportactionbackgroundjob(pk=caseimportactionbackgroundjob_pk)
diff --git a/backend/cases_import/tests/data/singleton_cramino_qc.yaml b/backend/cases_import/tests/data/singleton_cramino_qc.yaml
diff --git a/backend/cases_import/tests/data/singleton_dragen_qc.yaml b/backend/cases_import/tests/data/singleton_dragen_qc.yaml
diff --git a/backend/cases_import/tests/data/singleton_ngsbits_qc.yaml b/backend/cases_import/tests/data/singleton_ngsbits_qc.yaml
diff --git a/backend/cases_import/tests/data/singleton_samtools_qc.yaml b/backend/cases_import/tests/data/singleton_samtools_qc.yaml
diff --git a/backend/cases_import/tests/data/singleton_seqvars.yaml b/backend/cases_import/tests/data/singleton_seqvars.yaml
diff --git a/backend/cases_import/tests/data/singleton_strucvars.yaml b/backend/cases_import/tests/data/singleton_strucvars.yaml
diff --git a/backend/cases_import/views_api.py b/backend/cases_import/views_api.py
@@ -55,7 +55,9 @@ def create(self, request, *args, **kwargs):
             serializer.is_valid(raise_exception=True)
             caseimportbackgroundjob = self.perform_create(serializer)
         if caseimportbackgroundjob:
-            tasks.run_caseimportactionbackgroundjob.delay(caseimportbackgroundjob.pk)
+            tasks.run_caseimportactionbackgroundjob.delay(
+                caseimportactionbackgroundjob_pk=caseimportbackgroundjob.pk
+            )
         headers = self.get_success_headers(serializer.data)
         return Response(serializer.data, status=status.HTTP_201_CREATED, headers=headers)
 
@@ -70,7 +72,6 @@ def perform_create(self, serializer) -> typing.Optional[CaseImportBackgroundJob]
         if serializer.instance.state == CaseImportAction.STATE_SUBMITTED:
             return CaseImportBackgroundJob.objects.create_full(
                 caseimportaction=serializer.instance,
-                project=self.get_project(),
                 user=self.request.user,
             )
         else:
@@ -107,7 +108,9 @@ def update(self, request, *args, **kwargs):
                 instance._prefetched_objects_cache = {}
             caseimportbackgroundjob = self.perform_update(serializer)
         if caseimportbackgroundjob:
-            tasks.run_caseimportactionbackgroundjob.delay(caseimportbackgroundjob.pk)
+            tasks.run_caseimportactionbackgroundjob.delay(
+                caseimportactionbackgroundjob_pk=caseimportbackgroundjob.pk
+            )
         return Response(serializer.data)
 
     def perform_update(self, serializer) -> typing.Optional[CaseImportBackgroundJob]:
@@ -121,7 +124,6 @@ def perform_update(self, serializer) -> typing.Optional[CaseImportBackgroundJob]
         if serializer.instance.state == CaseImportAction.STATE_SUBMITTED:
             return CaseImportBackgroundJob.objects.create_full(
                 caseimportaction=serializer.instance,
-                project=self.get_project(),
                 user=self.request.user,
             )
         else:

diff --git a/backend/varfish/users/management/commands/data/Case_1.grch37.gatk_hc.vcf.gz b/backend/varfish/users/management/commands/data/Case_1.grch37.gatk_hc.vcf.gz
diff --git a/backend/varfish/users/management/commands/data/Case_1.grch37.gatk_hc.vcf.gz.tbi b/backend/varfish/users/management/commands/data/Case_1.grch37.gatk_hc.vcf.gz.tbi
diff --git a/backend/varfish/users/management/commands/data/Case_1.grch37.yaml.tpl b/backend/varfish/users/management/commands/data/Case_1.grch37.yaml.tpl
@@ -0,0 +1,173 @@
+# family with only metadata field
+family:
+  proband:
+    id: index
+    subject:
+      id: index
+      sex: MALE
+      karyotypicSex: XY
+    phenotypicFeatures:
+      - type:
+          id: "HP:0012469"
+          label: "Infantile spasms"
+        excluded: false
+        modifiers:
+          - id: "HP:0031796"
+            label: "Recurrent"
+    measurements:
+      - assay:
+          id: NCIT:C158253
+          label: Targeted Genome Sequencing
+        value:
+          ontologyClass:
+            id: NCIT:C171177
+            label: Sequencing Data File
+    files:
+      - uri: s3://varfish-server/seqmeta/exon-set/grch37/all-coding-exons-1.0.bed.gz
+        individualToFileIdentifiers:
+          index: {{ data_case }}_index
+        fileAttributes:
+          checksum: sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
+          designation: sequencing_targets
+          genomebuild: grch38
+          mimetype: text/x-bed+x-bgzip
+      # - uri: s3://data-for-import/example/index.bam
+      #   individualToFileIdentifiers:
+      #     mother: {{ data_case }}_mother
+      #   fileAttributes:
+      #     checksum: sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
+      #     designation: read_alignments
+      #     genomebuild: grch38
+      #     mimetype: text/x-bam+x-bgzip
+    diseases:
+      - term:
+          id: OMIM:164400
+          label: "SPINOCEREBELLAR ATAXIA 1; SCA1"
+        excluded: false
+    metaData: &metadata-prototype
+      created: "2019-07-21T00:25:54.662Z"
+      createdBy: Peter N. Robinson
+      resources:
+        - id: hp
+          name: human phenotype ontology
+          url: http://purl.obolibrary.org/obo/hp.owl
+          version: "2018-03-08"
+          namespacePrefix: HP
+          iriPrefix: hp
+      phenopacketSchemaVersion: "2.0"
+  relatives:
+    - id: mother
+      subject:
+        id: mother
+        sex: FEMALE
+        karyotypicSex: XX
+      phenotypicFeatures:
+        - type:
+            id: "HP:0012469"
+            label: "Infantile spasms"
+          excluded: true
+      measurements:
+        - assay:
+            id: NCIT:C158253
+            label: Targeted Genome Sequencing
+          value:
+            ontologyClass:
+              id: NCIT:C171177
+              label: Sequencing Data File
+      files:
+        - uri: s3://varfish-server/seqmeta/exon-set/grch37/all-coding-exons-1.0.bed.gz
+          individualToFileIdentifiers:
+            mother: {{ data_case }}_mother
+          fileAttributes:
+            checksum: sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
+            designation: sequencing_targets
+            genomebuild: grch38
+            mimetype: text/x-bed+x-bgzip
+        # - uri: s3://data-for-import/example/mother.bam
+        #   individualToFileIdentifiers:
+        #     mother: {{ data_case }}_mother
+        #   fileAttributes:
+        #     checksum: sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
+        #     designation: read_alignments
+        #     genomebuild: grch38
+        #     mimetype: text/x-bam+x-bgzip
+      diseases:
+        - term:
+            id: OMIM:164400
+            label: "SPINOCEREBELLAR ATAXIA 1; SCA1"
+          excluded: true
+      metaData: *metadata-prototype
+    - id: father
+      subject:
+        id: father
+        sex: MALE
+        karyotypicSex: XY
+      phenotypicFeatures:
+        - type:
+            id: "HP:0012469"
+            label: "Infantile spasms"
+          excluded: true
+      measurements:
+        - assay:
+            id: NCIT:C158253
+            label: Targeted Genome Sequencing
+          value:
+            ontologyClass:
+              id: NCIT:C171177
+              label: Sequencing Data File
+      files:
+        - uri: s3://varfish-server/seqmeta/exon-set/grch37/all-coding-exons-1.0.bed.gz
+          individualToFileIdentifiers:
+            father: {{ data_case }}_father
+          fileAttributes:
+            checksum: sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
+            designation: sequencing_targets
+            genomebuild: grch38
+            mimetype: text/x-bed+x-bgzip
+        # - uri: s3://data-for-import/example/father.bam
+        #   individualToFileIdentifiers:
+        #     father: {{ data_case }}_father
+        #   fileAttributes:
+        #     checksum: sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
+        #     designation: read_alignments
+        #     genomebuild: grch38
+        #     mimetype: text/x-bam+x-bgzip
+      diseases:
+        - term:
+            id: OMIM:164400
+            label: "SPINOCEREBELLAR ATAXIA 1; SCA1"
+          excluded: true
+      metaData: *metadata-prototype
+  pedigree:
+    persons:
+      - familyId: {{ data_case }}
+        individualId: index
+        paternalId: father
+        maternalId: mother
+        sex: MALE
+        affectedStatus: AFFECTED
+      - familyId: {{ data_case }}
+        individualId: father
+        paternalId: "0"
+        maternalId: "0"
+        sex: MALE
+        affectedStatus: UNAFFECTED
+      - familyId: {{ data_case }}
+        individualId: mother
+        paternalId: "0"
+        maternalId: "0"
+        sex: FEMALE
+        affectedStatus: UNAFFECTED
+  metaData: *metadata-prototype
+  files:
+    - uri: file://{{ data_path }}/{{ data_case }}.grch37.gatk_hc.vcf.gz
+      individualToFileIdentifiers:
+        index: {{ data_case }}_index
+        father: {{ data_case }}_father
+        mother: {{ data_case }}_mother
+      fileAttributes:
+        checksum: sha256:7104962533dec7a435cdc32785d7bd01caffc87bd68e6edf3c25d43c8136b622
+        designation: variant_calls
+        variant_type: seqvars
+        genomebuild: grch37
+        mimetype: text/plain+x-bgzip+x-variant-call-format
diff --git a/backend/varfish/users/management/commands/data/Case_1.ped b/backend/varfish/users/management/commands/data/Case_1.ped
@@ -0,0 +1,3 @@
+FAM_Case_1_index	Case_1_index	Case_1_father	Case_1_mother	1	2
+FAM_Case_1_index	Case_1_father	0	0	1	1
+FAM_Case_1_index	Case_1_mother	0	0	2	1
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		varfish/users/management/commands/data/.gz filter=lfs diff=lfs merge=lfs -text