Skip to content

Commit

Permalink
issue #1210 - more deployment checks
Browse files Browse the repository at this point in the history
  • Loading branch information
davmlaw committed Dec 12, 2024
1 parent 763c282 commit 5c59d7b
Show file tree
Hide file tree
Showing 6 changed files with 88 additions and 27 deletions.
20 changes: 2 additions & 18 deletions annotation/views.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
import logging
import subprocess
from collections import defaultdict, Counter
from subprocess import check_output
from typing import Optional

import cdot
from django.conf import settings
Expand Down Expand Up @@ -34,11 +31,11 @@
from genes.models_enums import AnnotationConsortium, GeneSymbolAliasSource
from library.constants import WEEK_SECS
from library.django_utils import require_superuser, get_field_counts
from library.log_utils import log_traceback
from library.utils import first
from ontology.models import OntologyTerm, OntologyService, OntologyImport, OntologyVersion
from snpdb.models import VariantGridColumn, SomalierConfig, GenomeBuild, VCF, UserSettings, ColumnAnnotationLevel
from variantgrid.celery import app
from variantgrid.deployment_validation.somalier_check import verify_somalier_config


def get_build_contigs():
Expand Down Expand Up @@ -231,7 +228,7 @@ def annotation_detail(request):

somalier = None
if somalier_enabled := settings.SOMALIER.get("enabled"):
somalier = _verify_somalier_config()
somalier = verify_somalier_config()

# These are empty/None if not set.
annotations_ok = [all_ontologies_accounted_for,
Expand Down Expand Up @@ -265,19 +262,6 @@ def annotation_detail(request):
return render(request, "annotation/annotation_detail.html", context)


def _verify_somalier_config() -> Optional[str]:
somalier_cfg = SomalierConfig()
somalier_bin = somalier_cfg.get_annotation("command")
somalier = None
try:
somalier_output = check_output([somalier_bin], stderr=subprocess.STDOUT)
somalier = somalier_output.decode().split("\n", 1)[0]
except:
log_traceback()

return somalier


@require_POST
@require_superuser
def load_cached_web_resource(request, pk):
Expand Down
11 changes: 10 additions & 1 deletion genes/gene_matching.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import logging
import re
from collections import defaultdict
from functools import cached_property
Expand Down Expand Up @@ -187,6 +188,7 @@ def _aliases(self, alias_graph, genes_dict, gene_symbol, symbol_match_path, visi
visited_symbols=visited_symbols)

def _get_gene_id_and_match_info_for_symbol(self, gene_symbols) -> dict[str, list]:
logging.info("_get_gene_id_and_match_info_for_symbol")
gene_symbol_gene_id_and_match_info = defaultdict(list) # list items = (gene_id, match_info)
for gene_symbol_id in gene_symbols:
gene_name = clean_string(str(gene_symbol_id)).upper()
Expand All @@ -198,10 +200,12 @@ def _get_gene_id_and_match_info_for_symbol(self, gene_symbols) -> dict[str, list
for gene_id, match_info in alias_items.items():
gene_symbol_gene_id_and_match_info[gene_symbol_id].append((gene_id, match_info))
# Else - no match?

logging.info("/_get_gene_id_and_match_info_for_symbol")
return gene_symbol_gene_id_and_match_info

def match_symbols_to_genes(self, release_gene_symbols):
logging.info("match_symbols_to_genes")

gene_symbols = (rgs.gene_symbol_id for rgs in release_gene_symbols)
gene_symbol_gene_id_and_match_info = self._get_gene_id_and_match_info_for_symbol(gene_symbols)

Expand All @@ -220,17 +224,22 @@ def match_symbols_to_genes(self, release_gene_symbols):
def match_gene_symbols(self, gene_symbols: Iterable[str]):
""" gene_symbols must not have been matched """

logging.info("match_gene_symbols")
release_gene_symbols = [ReleaseGeneSymbol(release=self.release, gene_symbol_id=gene_symbol_id)
for gene_symbol_id in gene_symbols]
if release_gene_symbols:
logging.info("match_gene_symbols: creating release_gene_symbols size=%d", len(release_gene_symbols))

# Need ignore_conflicts=False so we get back PKs
release_gene_symbols = ReleaseGeneSymbol.objects.bulk_create(release_gene_symbols,
batch_size=2000, ignore_conflicts=False)
logging.info("done creating release symbols!")

self.match_symbols_to_genes(release_gene_symbols)

def _match_unmatched_gene_symbol_qs(self, gene_symbol_qs):
""" Match any matched symbols without matched genes """
logging.info("_match_unmatched_gene_symbol_qs - getting genes not in this release")
unmatched_symbols_qs = gene_symbol_qs.exclude(releasegenesymbol__release=self.release)
unmatched_symbols = list(unmatched_symbols_qs.values_list("symbol", flat=True).distinct())
self.match_gene_symbols(unmatched_symbols)
Expand Down
17 changes: 13 additions & 4 deletions variantgrid/deployment_validation/annotation_files_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,16 @@ def annotation_data_exists(flat=False) -> dict:


def check_cdot_data() -> dict:
cdot_checks = {}

for genome_build in GenomeBuild.builds_with_annotation():
cdot_checks[f"cdot_{genome_build}"] = {
"valid": TranscriptVersion.objects.filter(genome_build=genome_build).exists(),
"fix": f"python manage import_gene_annotation --genome-build={genome_build.name}",
}

try:
# Check that latest exists
from cdot.data_release import get_latest_data_release_tag_name, _get_version_from_tag_name

tag_name = get_latest_data_release_tag_name()
Expand All @@ -92,9 +101,9 @@ def check_cdot_data() -> dict:
"notes": f"data version = latest ({cdot_data_version})",
"fix": "python3 manage.py import_cdot_latest"
}
return {
"cdot_data": cdot_data,
}
cdot_checks["latest_cdot_data"] = cdot_data
except ImportError:
# Will already be covered in library version > 0.2.26
return {}
pass

return cdot_checks
18 changes: 16 additions & 2 deletions variantgrid/deployment_validation/annotation_status_checks.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,24 @@
from django.db.models import Count

from annotation.models import AnnotationRangeLock, AnnotationRun, VariantAnnotationVersion
from annotation.models import AnnotationRangeLock, AnnotationRun, VariantAnnotationVersion, AnnotationVersion
from snpdb.models import GenomeBuild


def check_annotation_status() -> dict:
def check_annotation_versions() -> dict:
annotation_versions_checks = {}
for genome_build in GenomeBuild.builds_with_annotation():
build_av = {}
try:
annotation_version = AnnotationVersion.latest(genome_build, validate=True)
build_av["valid"] = True
except Exception as e:
build_av["valid"] = False
build_av["fix"] = f"{e}: See 'Annotation' web page for details"
annotation_versions_checks[f"Annotation Version for {genome_build=}"] = build_av
return annotation_versions_checks


def check_variant_annotation_runs_status() -> dict:
# I am going to make this a warning for a while - before making it an error
ARL_DUPE_ERROR = False
# see https://github.com/SACGF/variantgrid_shariant/issues/177
Expand Down
40 changes: 40 additions & 0 deletions variantgrid/deployment_validation/somalier_check.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import subprocess
from subprocess import check_output
from typing import Optional

from library.log_utils import log_traceback
from snpdb.models import SomalierConfig, GenomeBuild


def verify_somalier_config() -> Optional[str]:
somalier_cfg = SomalierConfig()
somalier_bin = somalier_cfg.get_annotation("command")
somalier = None
try:
somalier_output = check_output([somalier_bin], stderr=subprocess.STDOUT)
somalier = somalier_output.decode().split("\n", 1)[0]
except:
log_traceback()

return somalier


def check_somalier() -> dict:
somalier_data = {
"somalier_config": {
"valid": verify_somalier_config(),
"fix": f"Install Somalier, and place in path https://github.com/brentp/somalier/"
}
}
somalier_cfg = SomalierConfig()
for genome_build in GenomeBuild.builds_with_annotation():
try:
somalier_cfg.get_sites_vcf(genome_build)
valid = True
except Exception as e:
valid = False
somalier_data[f"somalier_sites_vcf_{genome_build.name}"] = {
"valid": valid,
"fix": somalier_cfg.get_sites_vcf_name(genome_build),
}
return somalier_data
9 changes: 7 additions & 2 deletions variantopedia/management/commands/deployment_check.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
import logging

from django.conf import settings
from django.core.management.base import BaseCommand

from variantgrid.deployment_validation.annotation_files_check import annotation_data_exists, check_cdot_data
from variantgrid.deployment_validation.annotation_status_checks import check_annotation_status
from variantgrid.deployment_validation.annotation_status_checks import check_annotation_versions, check_variant_annotation_runs_status
from variantgrid.deployment_validation.celery_checks import check_celery_tasks
from variantgrid.deployment_validation.column_check import check_variantgrid_columns
from variantgrid.deployment_validation.library_version_checks import check_library_versions
from variantgrid.deployment_validation.somalier_check import check_somalier
from variantgrid.deployment_validation.tool_version_checks import check_tool_versions
from variantgrid.deployment_validation.vep_check import check_vep

Expand All @@ -22,14 +24,17 @@ def handle(self, *args, **options):

checks = {
"Annotation data exists": annotation_data_exists(flat=True),
"Annotation status": check_annotation_status(),
"Annotation Versions": check_annotation_versions(),
"Variant Annotation status": check_variant_annotation_runs_status(),
"Library versions": check_library_versions(),
"Tool versions": check_tool_versions(),
"cdot data": check_cdot_data(),
"Celery Tasks": check_celery_tasks(),
"Columns": check_variantgrid_columns(),
"VEP": check_vep(),
}
if settings.SOMALIER.get("enabled"):
checks["somalier"] = check_somalier()

for check_type, check in checks.items():
for k, data in check.items():
Expand Down

0 comments on commit 5c59d7b

Please sign in to comment.