diff --git a/annotation/views.py b/annotation/views.py index 36f84297f..0c317f899 100644 --- a/annotation/views.py +++ b/annotation/views.py @@ -1,8 +1,5 @@ import logging -import subprocess from collections import defaultdict, Counter -from subprocess import check_output -from typing import Optional import cdot from django.conf import settings @@ -34,11 +31,11 @@ from genes.models_enums import AnnotationConsortium, GeneSymbolAliasSource from library.constants import WEEK_SECS from library.django_utils import require_superuser, get_field_counts -from library.log_utils import log_traceback from library.utils import first from ontology.models import OntologyTerm, OntologyService, OntologyImport, OntologyVersion from snpdb.models import VariantGridColumn, SomalierConfig, GenomeBuild, VCF, UserSettings, ColumnAnnotationLevel from variantgrid.celery import app +from variantgrid.deployment_validation.somalier_check import verify_somalier_config def get_build_contigs(): @@ -231,7 +228,7 @@ def annotation_detail(request): somalier = None if somalier_enabled := settings.SOMALIER.get("enabled"): - somalier = _verify_somalier_config() + somalier = verify_somalier_config() # These are empty/None if not set. annotations_ok = [all_ontologies_accounted_for, @@ -265,19 +262,6 @@ def annotation_detail(request): return render(request, "annotation/annotation_detail.html", context) -def _verify_somalier_config() -> Optional[str]: - somalier_cfg = SomalierConfig() - somalier_bin = somalier_cfg.get_annotation("command") - somalier = None - try: - somalier_output = check_output([somalier_bin], stderr=subprocess.STDOUT) - somalier = somalier_output.decode().split("\n", 1)[0] - except: - log_traceback() - - return somalier - - @require_POST @require_superuser def load_cached_web_resource(request, pk): diff --git a/genes/gene_matching.py b/genes/gene_matching.py index d6d368d7d..6e0a2c1e5 100644 --- a/genes/gene_matching.py +++ b/genes/gene_matching.py @@ -1,3 +1,4 @@ +import logging import re from collections import defaultdict from functools import cached_property @@ -187,6 +188,7 @@ def _aliases(self, alias_graph, genes_dict, gene_symbol, symbol_match_path, visi visited_symbols=visited_symbols) def _get_gene_id_and_match_info_for_symbol(self, gene_symbols) -> dict[str, list]: + logging.info("_get_gene_id_and_match_info_for_symbol") gene_symbol_gene_id_and_match_info = defaultdict(list) # list items = (gene_id, match_info) for gene_symbol_id in gene_symbols: gene_name = clean_string(str(gene_symbol_id)).upper() @@ -198,10 +200,12 @@ def _get_gene_id_and_match_info_for_symbol(self, gene_symbols) -> dict[str, list for gene_id, match_info in alias_items.items(): gene_symbol_gene_id_and_match_info[gene_symbol_id].append((gene_id, match_info)) # Else - no match? - + logging.info("/_get_gene_id_and_match_info_for_symbol") return gene_symbol_gene_id_and_match_info def match_symbols_to_genes(self, release_gene_symbols): + logging.info("match_symbols_to_genes") + gene_symbols = (rgs.gene_symbol_id for rgs in release_gene_symbols) gene_symbol_gene_id_and_match_info = self._get_gene_id_and_match_info_for_symbol(gene_symbols) @@ -220,17 +224,22 @@ def match_symbols_to_genes(self, release_gene_symbols): def match_gene_symbols(self, gene_symbols: Iterable[str]): """ gene_symbols must not have been matched """ + logging.info("match_gene_symbols") release_gene_symbols = [ReleaseGeneSymbol(release=self.release, gene_symbol_id=gene_symbol_id) for gene_symbol_id in gene_symbols] if release_gene_symbols: + logging.info("match_gene_symbols: creating release_gene_symbols size=%d", len(release_gene_symbols)) + # Need ignore_conflicts=False so we get back PKs release_gene_symbols = ReleaseGeneSymbol.objects.bulk_create(release_gene_symbols, batch_size=2000, ignore_conflicts=False) + logging.info("done creating release symbols!") self.match_symbols_to_genes(release_gene_symbols) def _match_unmatched_gene_symbol_qs(self, gene_symbol_qs): """ Match any matched symbols without matched genes """ + logging.info("_match_unmatched_gene_symbol_qs - getting genes not in this release") unmatched_symbols_qs = gene_symbol_qs.exclude(releasegenesymbol__release=self.release) unmatched_symbols = list(unmatched_symbols_qs.values_list("symbol", flat=True).distinct()) self.match_gene_symbols(unmatched_symbols) diff --git a/variantgrid/deployment_validation/annotation_files_check.py b/variantgrid/deployment_validation/annotation_files_check.py index 694e9670b..7d2398384 100644 --- a/variantgrid/deployment_validation/annotation_files_check.py +++ b/variantgrid/deployment_validation/annotation_files_check.py @@ -78,7 +78,16 @@ def annotation_data_exists(flat=False) -> dict: def check_cdot_data() -> dict: + cdot_checks = {} + + for genome_build in GenomeBuild.builds_with_annotation(): + cdot_checks[f"cdot_{genome_build}"] = { + "valid": TranscriptVersion.objects.filter(genome_build=genome_build).exists(), + "fix": f"python manage import_gene_annotation --genome-build={genome_build.name}", + } + try: + # Check that latest exists from cdot.data_release import get_latest_data_release_tag_name, _get_version_from_tag_name tag_name = get_latest_data_release_tag_name() @@ -92,9 +101,9 @@ def check_cdot_data() -> dict: "notes": f"data version = latest ({cdot_data_version})", "fix": "python3 manage.py import_cdot_latest" } - return { - "cdot_data": cdot_data, - } + cdot_checks["latest_cdot_data"] = cdot_data except ImportError: # Will already be covered in library version > 0.2.26 - return {} + pass + + return cdot_checks \ No newline at end of file diff --git a/variantgrid/deployment_validation/annotation_status_checks.py b/variantgrid/deployment_validation/annotation_status_checks.py index e236e9a16..8abfaf45b 100644 --- a/variantgrid/deployment_validation/annotation_status_checks.py +++ b/variantgrid/deployment_validation/annotation_status_checks.py @@ -1,10 +1,24 @@ from django.db.models import Count -from annotation.models import AnnotationRangeLock, AnnotationRun, VariantAnnotationVersion +from annotation.models import AnnotationRangeLock, AnnotationRun, VariantAnnotationVersion, AnnotationVersion from snpdb.models import GenomeBuild -def check_annotation_status() -> dict: +def check_annotation_versions() -> dict: + annotation_versions_checks = {} + for genome_build in GenomeBuild.builds_with_annotation(): + build_av = {} + try: + annotation_version = AnnotationVersion.latest(genome_build, validate=True) + build_av["valid"] = True + except Exception as e: + build_av["valid"] = False + build_av["fix"] = f"{e}: See 'Annotation' web page for details" + annotation_versions_checks[f"Annotation Version for {genome_build=}"] = build_av + return annotation_versions_checks + + +def check_variant_annotation_runs_status() -> dict: # I am going to make this a warning for a while - before making it an error ARL_DUPE_ERROR = False # see https://github.com/SACGF/variantgrid_shariant/issues/177 diff --git a/variantgrid/deployment_validation/somalier_check.py b/variantgrid/deployment_validation/somalier_check.py new file mode 100644 index 000000000..bc03aec93 --- /dev/null +++ b/variantgrid/deployment_validation/somalier_check.py @@ -0,0 +1,40 @@ +import subprocess +from subprocess import check_output +from typing import Optional + +from library.log_utils import log_traceback +from snpdb.models import SomalierConfig, GenomeBuild + + +def verify_somalier_config() -> Optional[str]: + somalier_cfg = SomalierConfig() + somalier_bin = somalier_cfg.get_annotation("command") + somalier = None + try: + somalier_output = check_output([somalier_bin], stderr=subprocess.STDOUT) + somalier = somalier_output.decode().split("\n", 1)[0] + except: + log_traceback() + + return somalier + + +def check_somalier() -> dict: + somalier_data = { + "somalier_config": { + "valid": verify_somalier_config(), + "fix": f"Install Somalier, and place in path https://github.com/brentp/somalier/" + } + } + somalier_cfg = SomalierConfig() + for genome_build in GenomeBuild.builds_with_annotation(): + try: + somalier_cfg.get_sites_vcf(genome_build) + valid = True + except Exception as e: + valid = False + somalier_data[f"somalier_sites_vcf_{genome_build.name}"] = { + "valid": valid, + "fix": somalier_cfg.get_sites_vcf_name(genome_build), + } + return somalier_data diff --git a/variantopedia/management/commands/deployment_check.py b/variantopedia/management/commands/deployment_check.py index efac4978d..7f417591e 100644 --- a/variantopedia/management/commands/deployment_check.py +++ b/variantopedia/management/commands/deployment_check.py @@ -1,12 +1,14 @@ import logging +from django.conf import settings from django.core.management.base import BaseCommand from variantgrid.deployment_validation.annotation_files_check import annotation_data_exists, check_cdot_data -from variantgrid.deployment_validation.annotation_status_checks import check_annotation_status +from variantgrid.deployment_validation.annotation_status_checks import check_annotation_versions, check_variant_annotation_runs_status from variantgrid.deployment_validation.celery_checks import check_celery_tasks from variantgrid.deployment_validation.column_check import check_variantgrid_columns from variantgrid.deployment_validation.library_version_checks import check_library_versions +from variantgrid.deployment_validation.somalier_check import check_somalier from variantgrid.deployment_validation.tool_version_checks import check_tool_versions from variantgrid.deployment_validation.vep_check import check_vep @@ -22,7 +24,8 @@ def handle(self, *args, **options): checks = { "Annotation data exists": annotation_data_exists(flat=True), - "Annotation status": check_annotation_status(), + "Annotation Versions": check_annotation_versions(), + "Variant Annotation status": check_variant_annotation_runs_status(), "Library versions": check_library_versions(), "Tool versions": check_tool_versions(), "cdot data": check_cdot_data(), @@ -30,6 +33,8 @@ def handle(self, *args, **options): "Columns": check_variantgrid_columns(), "VEP": check_vep(), } + if settings.SOMALIER.get("enabled"): + checks["somalier"] = check_somalier() for check_type, check in checks.items(): for k, data in check.items():