diff --git a/.github/workflows/backend_checks.yml b/.github/workflows/backend_checks.yml index 6f243f49..e60ef44f 100644 --- a/.github/workflows/backend_checks.yml +++ b/.github/workflows/backend_checks.yml @@ -20,12 +20,13 @@ jobs: steps: - uses: actions/checkout@v4 - - name: black - uses: psf/black@stable + - name: Setup Python + uses: actions/setup-python@v5 with: - src: "./server" + python-version: "3.11" - - name: ruff - uses: chartboost/ruff-action@v1 - with: - src: "./server" + - name: Install dependencies + run: python3 -m pip install "server/.[dev]" + + - name: Check style + run: python3 -m ruff check server/ && python3 -m ruff format --check server/ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 5ebc9143..d6bb6133 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -8,16 +8,12 @@ repos: - id: end-of-file-fixer - id: check-merge-conflict - id: detect-aws-credentials - - repo: https://github.com/psf/black - rev: 23.7.0 - hooks: - - id: black - language_version: python3.11 - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.0.286 + rev: v0.5.0 hooks: + - id: ruff-format - id: ruff - args: [--fix, --exit-non-zero-on-fix] + args: [--fix, --exit-non-zero-on-fix, --config=server/pyproject.toml] - repo: https://github.com/pre-commit/mirrors-eslint rev: "v8.20.0" hooks: diff --git a/server/pyproject.toml b/server/pyproject.toml index c7a67021..5c3a601c 100644 --- a/server/pyproject.toml +++ b/server/pyproject.toml @@ -69,42 +69,104 @@ root = "../." addopts = "--cov=src --cov-report term-missing" testpaths = ["tests"] -[tool.black] -line-length = 88 - [tool.ruff] -# pycodestyle (E, W) -# Pyflakes (F) -# flake8-annotations (ANN) -# flake8-quotes (Q) -# pydocstyle (D) -# pep8-naming (N) -# isort (I) -select = ["E", "W", "F", "ANN", "Q", "D", "N", "I"] - -fixable = ["I"] +src = ["curfu"] +[tool.ruff.lint] +select = [ + "F", # https://docs.astral.sh/ruff/rules/#pyflakes-f + "E", "W", # https://docs.astral.sh/ruff/rules/#pycodestyle-e-w + "I", # https://docs.astral.sh/ruff/rules/#isort-i + "N", # https://docs.astral.sh/ruff/rules/#pep8-naming-n + "D", # https://docs.astral.sh/ruff/rules/#pydocstyle-d + "UP", # https://docs.astral.sh/ruff/rules/#pyupgrade-up + "ANN", # https://docs.astral.sh/ruff/rules/#flake8-annotations-ann + "ASYNC", # https://docs.astral.sh/ruff/rules/#flake8-async-async + "S", # https://docs.astral.sh/ruff/rules/#flake8-bandit-s + "B", # https://docs.astral.sh/ruff/rules/#flake8-bugbear-b + "A", # https://docs.astral.sh/ruff/rules/#flake8-builtins-a + "C4", # https://docs.astral.sh/ruff/rules/#flake8-comprehensions-c4 + "DTZ", # https://docs.astral.sh/ruff/rules/#flake8-datetimez-dtz + "T10", # https://docs.astral.sh/ruff/rules/#flake8-datetimez-dtz + "EM", # https://docs.astral.sh/ruff/rules/#flake8-errmsg-em + "LOG", # https://docs.astral.sh/ruff/rules/#flake8-logging-log + "G", # https://docs.astral.sh/ruff/rules/#flake8-logging-format-g + "INP", # https://docs.astral.sh/ruff/rules/#flake8-no-pep420-inp + "PIE", # https://docs.astral.sh/ruff/rules/#flake8-pie-pie + "T20", # https://docs.astral.sh/ruff/rules/#flake8-print-t20 + "PT", # https://docs.astral.sh/ruff/rules/#flake8-pytest-style-pt + "Q", # https://docs.astral.sh/ruff/rules/#flake8-quotes-q + "RSE", # https://docs.astral.sh/ruff/rules/#flake8-raise-rse + "RET", # https://docs.astral.sh/ruff/rules/#flake8-return-ret + "SLF", # https://docs.astral.sh/ruff/rules/#flake8-self-slf + "SIM", # https://docs.astral.sh/ruff/rules/#flake8-simplify-sim + "ARG", # https://docs.astral.sh/ruff/rules/#flake8-unused-arguments-arg + "PTH", # https://docs.astral.sh/ruff/rules/#flake8-use-pathlib-pth + "PGH", # https://docs.astral.sh/ruff/rules/#pygrep-hooks-pgh + "PERF", # https://docs.astral.sh/ruff/rules/#perflint-perf + "FURB", # https://docs.astral.sh/ruff/rules/#refurb-furb + "RUF", # https://docs.astral.sh/ruff/rules/#ruff-specific-rules-ruf +] +fixable = [ + "I", + "F401", + "D", + "UP", + "ANN", + "B", + "C4", + "LOG", + "G", + "PIE", + "PT", + "RSE", + "SIM", + "PERF", + "FURB", + "RUF" +] +# ANN003 - missing-type-kwargs +# ANN101 - missing-type-self +# ANN102 - missing-type-cls +# D203 - one-blank-line-before-class # D205 - blank-line-after-summary +# D206 - indent-with-spaces* +# D213 - multi-line-summary-second-line +# D300 - triple-single-quotes* # D400 - ends-in-period # D415 - ends-in-punctuation -# ANN101 - missing-type-self -# ANN003 - missing-type-kwargs -# E501 - line-too-long -ignore = ["D205", "D400", "D415", "ANN101", "ANN003", "E501"] - -[tool.ruff.flake8-quotes] -docstring-quotes = "double" +# E111 - indentation-with-invalid-multiple* +# E114 - indentation-with-invalid-multiple-comment* +# E117 - over-indented* +# E501 - line-too-long* +# W191 - tab-indentation* +# S321 - suspicious-ftp-lib-usage +# *ignored for compatibility with formatter +ignore = [ + "ANN003", "ANN101", "ANN102", + "D203", "D205", "D206", "D213", "D300", "D400", "D415", + "E111", "E114", "E117", "E501", + "W191", + "S321", +] -[tool.ruff.per-file-ignores] +[tool.ruff.lint.per-file-ignores] # ANN001 - missing-type-function-argument # ANN2 - missing-return-type # ANN102 - missing-type-cls -# N805 - invalid-first-argument-name-for-method -# F821 - undefined-name +# S101 - assert +# B011 - assert-false # F401 - unused-import -"tests/*" = ["ANN001", "ANN2", "ANN102"] -"setup.py" = ["F821"] +# N805 - invalid-first-argument-name-for-method +# D301 - escape-sequence-in-docstring +# INP001 - implicit-namespace-package +# ARG001 - unused-function-argument +# B008 - function-call-in-default-argument +"**/tests/*" = ["ANN001", "ANN2", "ANN102", "S101", "B011", "INP001", "ARG001"] "*__init__.py" = ["F401"] -"curfu/schemas.py" = ["ANN201", "N805", "ANN001"] -"curfu/routers/*" = ["D301"] -"curfu/cli.py" = ["D301"] +"**/src/curfu/schemas.py" = ["ANN201", "N805", "ANN001"] +"**/src/curfu/routers/*" = ["D301", "B008"] +"**/src/curfu/cli.py" = ["D301"] + +[tool.ruff.format] +docstring-code-format = true diff --git a/server/src/curfu/__init__.py b/server/src/curfu/__init__.py index 405d6d56..bb833ca9 100644 --- a/server/src/curfu/__init__.py +++ b/server/src/curfu/__init__.py @@ -1,4 +1,5 @@ """Fusion curation interface.""" + import logging from importlib.metadata import PackageNotFoundError, version from os import environ @@ -17,7 +18,7 @@ # establish environment-dependent params if "FUSION_EB_PROD" in environ: environ["FUSION_EB_PROD"] = "true" - LOG_FN = "/tmp/curfu.log" + LOG_FN = "/tmp/curfu.log" # noqa: S108 else: LOG_FN = "curfu.log" @@ -50,10 +51,7 @@ UTA_DB_URL = "postgresql://uta_admin@localhost:5433/uta/uta_20210129" # get local seqrepo location -if "SEQREPO_DATA_PATH" not in environ: - SEQREPO_DATA_PATH = f"{APP_ROOT}/data/seqrepo/latest" -else: - SEQREPO_DATA_PATH = environ["SEQREPO_DATA_PATH"] +SEQREPO_DATA_PATH = environ.get("SEQREPO_DATA_PATH", f"{APP_ROOT}/data/seqrepo/latest") class LookupServiceError(Exception): diff --git a/server/src/curfu/cli.py b/server/src/curfu/cli.py index bb8ec26a..9f838c45 100644 --- a/server/src/curfu/cli.py +++ b/server/src/curfu/cli.py @@ -1,7 +1,7 @@ """Provide command-line interface to application and associated utilities.""" + import os from pathlib import Path -from typing import Optional import click @@ -21,8 +21,8 @@ def serve(port: int) -> None: """ # calling uvicorn.run() doesn't get logs printed to console -- # performing a syscall for now until a more elegant solution appears - os.system( - f"uvicorn curfu.main:app --reload --port={port} --reload-dir={str(APP_ROOT.absolute())}" + os.system( # noqa: S605 + f"uvicorn curfu.main:app --reload --port={port} --reload-dir={APP_ROOT.absolute()!s}" ) @@ -57,29 +57,16 @@ def devtools() -> None: "--uniprot", "-u", help="Path to uniprot_sprot_YYYYMMDD.xml", default=None ) def domains( - types: str, protein2ipr: Optional[str], refs: Optional[str], uniprot: Optional[str] + types: str, protein2ipr: str | None, refs: str | None, uniprot: str | None ) -> None: """Build domain mappings for use in Fusion Curation app. \f :param str types: comma-separated list """ types_split = set(types.lower().replace(" ", "").split(",")) - - if protein2ipr: - protein2ipr_path = Path(protein2ipr) - else: - protein2ipr_path = None - - if uniprot: - uniprot_path = Path(uniprot) - else: - uniprot_path = None - - if refs: - refs_path = Path(refs) - else: - refs_path = None - + protein2ipr_path = Path(protein2ipr) if protein2ipr else None + uniprot_path = Path(uniprot) if uniprot else None + refs_path = Path(refs) if refs else None build_gene_domain_maps( interpro_types=types_split, protein_ipr_path=protein2ipr_path, diff --git a/server/src/curfu/devtools/__init__.py b/server/src/curfu/devtools/__init__.py index c1ba6c1e..7863d01a 100644 --- a/server/src/curfu/devtools/__init__.py +++ b/server/src/curfu/devtools/__init__.py @@ -1,6 +1,7 @@ """Utility functions for application setup.""" + import ftplib -from typing import Callable +from collections.abc import Callable from curfu import logger @@ -18,7 +19,7 @@ def ftp_download(domain: str, path: str, fname: str, callback: Callable) -> None ftp.retrbinary(f"RETR {fname}", callback) except ftplib.all_errors as e: logger.error(f"FTP download failed: {e}") - raise Exception(e) + raise Exception(e) from e # default interpro entry types to try to gather for domains diff --git a/server/src/curfu/devtools/build_client_types.py b/server/src/curfu/devtools/build_client_types.py index 8e2a940d..04655e4a 100644 --- a/server/src/curfu/devtools/build_client_types.py +++ b/server/src/curfu/devtools/build_client_types.py @@ -1,4 +1,5 @@ """Provide client type generation tooling.""" + from pathlib import Path from pydantic2ts.cli.script import generate_typescript_defs diff --git a/server/src/curfu/devtools/build_gene_suggest.py b/server/src/curfu/devtools/build_gene_suggest.py index f56ff3ab..f4a1f449 100644 --- a/server/src/curfu/devtools/build_gene_suggest.py +++ b/server/src/curfu/devtools/build_gene_suggest.py @@ -1,9 +1,9 @@ """Provide tools to build backend data relating to gene identification.""" + import csv -from datetime import datetime as dt +import datetime from pathlib import Path from timeit import default_timer as timer -from typing import Dict, List, Optional import click from biocommons.seqrepo.seqrepo import SeqRepo @@ -25,7 +25,7 @@ def __init__(self) -> None: self.sr = SeqRepo(SEQREPO_DATA_PATH) self.genes = [] - def _get_chromosome(self, record: Dict) -> Optional[str]: + def _get_chromosome(self, record: dict) -> str | None: """Extract readable chromosome identifier from gene extensions. :param record: stored normalized record @@ -42,7 +42,7 @@ def _get_chromosome(self, record: Dict) -> Optional[str]: return None @staticmethod - def _make_list_column(values: List[str]) -> str: + def _make_list_column(values: list[str]) -> str: """Convert a list of strings into a comma-separated string, filtering out non-alphabetic values. @@ -62,12 +62,13 @@ def _make_list_column(values: List[str]) -> str: :param values: A list of strings to be converted into a comma-separated string. :return: A comma-separated string containing unique, alphabetic values from the input list. + """ unique = {v.upper() for v in values} filtered = {v for v in unique if any(char.isalpha() for char in v)} return ",".join(filtered) - def _process_gene_record(self, record: Dict) -> None: + def _process_gene_record(self, record: dict) -> None: """Add the gene record to processed suggestions. :param record: gene record object retrieved from DB @@ -117,8 +118,10 @@ def _save_suggest_file(self, output_dir: Path) -> None: "chromosome", "strand", ] - today = dt.strftime(dt.today(), "%Y%m%d") - with open(output_dir / f"gene_suggest_{today}.csv", "w") as csvfile: + today = datetime.datetime.strftime( + datetime.datetime.now(tz=datetime.timezone.utc), "%Y%m%d" + ) + with (output_dir / f"gene_suggest_{today}.csv").open("w") as csvfile: writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() for row in self.genes: diff --git a/server/src/curfu/devtools/build_interpro.py b/server/src/curfu/devtools/build_interpro.py index 112fe277..17fddb8d 100644 --- a/server/src/curfu/devtools/build_interpro.py +++ b/server/src/curfu/devtools/build_interpro.py @@ -1,13 +1,12 @@ """Provide utilities relating to data fetched from InterPro service.""" + import csv +import datetime import gzip -import os import shutil import xml.etree.ElementTree as ET # noqa: N817 -from datetime import datetime from pathlib import Path from timeit import default_timer as timer -from typing import Dict, Optional, Set, Tuple import click from gene.database import create_db @@ -17,10 +16,10 @@ from curfu.devtools import ftp_download # uniprot accession id -> (normalized ID, normalized label) -UniprotRefs = Dict[str, Tuple[str, str]] +UniprotRefs = dict[str, tuple[str, str]] # (uniprot accession id, ncbi gene id) -> refseq NP_ accession -UniprotAcRefs = Dict[Tuple[str, str], str] +UniprotAcRefs = dict[tuple[str, str], str] # consistent formatting for saved files DATE_FMT = "%Y%m%d" @@ -33,21 +32,23 @@ def download_protein2ipr(output_dir: Path) -> None: logger.info("Retrieving Uniprot mapping data from InterPro") gz_file_path = output_dir / "protein2ipr.dat.gz" - with open(gz_file_path, "w") as fp: - - def writefile(data): # noqa - fp.write(data) - + with gz_file_path.open("w") as fp: ftp_download( - "ftp.ebi.ac.uk", "pub/databases/interpro", "protein2ipr.dat.gz", writefile + "ftp.ebi.ac.uk", + "pub/databases/interpro", + "protein2ipr.dat.gz", + lambda data: fp.write(data), ) - today = datetime.strftime(datetime.today(), DATE_FMT) + today = datetime.datetime.strftime( + datetime.datetime.now(tz=datetime.timezone.utc), DATE_FMT + ) outfile_path = output_dir / f"protein2ipr_{today}.dat" - with open(outfile_path, "wb") as f_out, gzip.open(gz_file_path, "rb") as f_in: + with outfile_path.open("wb") as f_out, gzip.open(gz_file_path, "rb") as f_in: shutil.copyfileobj(f_in, f_out) - os.remove(gz_file_path) - assert outfile_path.exists() + gz_file_path.unlink() + if not outfile_path.exists(): + raise Exception logger.info("Successfully retrieved UniProt mapping data for Interpro") @@ -84,9 +85,9 @@ def get_uniprot_refs() -> UniprotRefs: if uniprot_id in uniprot_ids: continue norm_response = q.normalize(uniprot_id) - norm_id = norm_response.gene_descriptor.gene_id # type: ignore - norm_label = norm_response.gene_descriptor.label # type: ignore - uniprot_ids[uniprot_id] = (norm_id, norm_label) # type: ignore + norm_id = norm_response.gene_descriptor.gene_id + norm_label = norm_response.gene_descriptor.label + uniprot_ids[uniprot_id] = (norm_id, norm_label) if not last_evaluated_key: break @@ -95,9 +96,11 @@ def get_uniprot_refs() -> UniprotRefs: logger.info(msg) click.echo(msg) - today = datetime.strftime(datetime.today(), DATE_FMT) + today = datetime.datetime.strftime( + datetime.datetime.now(tz=datetime.timezone.utc), DATE_FMT + ) save_path = APP_ROOT / "data" / f"uniprot_refs_{today}.tsv" - with open(save_path, "w") as out: + with save_path.open("w") as out: for uniprot_ref, data in uniprot_ids.items(): out.write(f"{uniprot_ref.split(':')[1].upper()}\t{data[0]}\t{data[1]}\n") @@ -111,30 +114,33 @@ def download_uniprot_sprot(output_dir: Path) -> Path: logger.info("Retrieving UniProtKB data.") gz_file_path = output_dir / "uniprot_sprot.xml.gz" - with open(gz_file_path, "w") as fp: + with gz_file_path.open("w") as fp: ftp_download( "ftp.uniprot.org", "pub/databases/uniprot/current_release/knowledgebase/complete/", "uniprot_sprot.xml.gz", lambda data: fp.write(data), ) - today = datetime.strftime(datetime.today(), DATE_FMT) + today = datetime.datetime.strftime( + datetime.datetime.now(tz=datetime.timezone.utc), DATE_FMT + ) outfile_path = output_dir / f"uniprot_sprot_{today}.dat" - with open(outfile_path, "wb") as f_out, gzip.open(gz_file_path, "rb") as f_in: + with outfile_path.open("wb") as f_out, gzip.open(gz_file_path, "rb") as f_in: shutil.copyfileobj(f_in, f_out) - os.remove(gz_file_path) - assert outfile_path.exists() + gz_file_path.unlink() + if not outfile_path.exists(): + raise Exception logger.info("Successfully retrieved UniProtKB data.") return outfile_path def get_interpro_uniprot_rels( - protein_ipr_path: Optional[Path], + protein_ipr_path: Path | None, output_dir: Path, - domain_ids: Set[str], - uniprot_refs: Dict, -) -> Dict[str, Dict[str, Tuple[str, str, str, str, str]]]: + domain_ids: set[str], + uniprot_refs: dict, +) -> dict[str, dict[str, tuple[str, str, str, str, str]]]: """Process InterPro to UniProtKB relations, using UniProt references to connect genes with domains @@ -146,9 +152,11 @@ def get_interpro_uniprot_rels( """ if not protein_ipr_path: download_protein2ipr(output_dir) - today = datetime.strftime(datetime.today(), DATE_FMT) + today = datetime.datetime.strftime( + datetime.datetime.now(tz=datetime.timezone.utc), DATE_FMT + ) protein_ipr_path = output_dir / f"protein2ipr_{today}.dat" - protein_ipr = open(protein_ipr_path, "r") + protein_ipr = protein_ipr_path.open() protein_ipr_reader = csv.reader(protein_ipr, delimiter="\t") interpro_uniprot = {} @@ -179,8 +187,8 @@ def get_interpro_uniprot_rels( def get_protein_accessions( - relevant_proteins: Set[str], uniprot_sprot_path: Optional[Path] -) -> Dict[Tuple[str, str], str]: + relevant_proteins: set[str], uniprot_sprot_path: Path | None +) -> dict[tuple[str, str], str]: """Scan uniprot_sprot.xml and extract RefSeq protein accession identifiers for relevant Uniprot accessions. :param Set[str] relevant_proteins: captured Uniprot accessions, for proteins coded @@ -192,7 +200,7 @@ def get_protein_accessions( start = timer() if not uniprot_sprot_path: uniprot_sprot_path = download_uniprot_sprot(APP_ROOT / "data") - parser = ET.iterparse(uniprot_sprot_path, ("start", "end")) + parser = ET.iterparse(uniprot_sprot_path, ("start", "end")) # noqa: S314 accessions_map = {} cur_ac = "" cur_refseq_ac = "" @@ -262,10 +270,10 @@ def get_protein_accessions( def build_gene_domain_maps( - interpro_types: Set[str], - protein_ipr_path: Optional[Path] = None, - uniprot_sprot_path: Optional[Path] = None, - uniprot_refs_path: Optional[Path] = None, + interpro_types: set[str], + protein_ipr_path: Path | None = None, + uniprot_sprot_path: Path | None = None, + uniprot_refs_path: Path | None = None, output_dir: Path = APP_ROOT / "data", ) -> None: """Produce the gene-to-domain lookup table at out_path using the Interpro-Uniprot @@ -279,16 +287,15 @@ def build_gene_domain_maps( directory. """ start_time = timer() - today = datetime.strftime(datetime.today(), DATE_FMT) + today = datetime.strftime(datetime.datetime.now(tz=datetime.timezone.utc), DATE_FMT) # get relevant Interpro IDs interpro_data_bin = [] - - def get_interpro_data(data): # noqa - interpro_data_bin.append(data) - ftp_download( - "ftp.ebi.ac.uk", "pub/databases/interpro", "entry.list", get_interpro_data + "ftp.ebi.ac.uk", + "pub/databases/interpro", + "entry.list", + lambda data: interpro_data_bin.append(data), ) # load interpro IDs directly to memory -- no need to save to file interpro_data_tsv = "".join([d.decode("UTF-8") for d in interpro_data_bin]).split( @@ -297,16 +304,16 @@ def get_interpro_data(data): # noqa interpro_types = {t.lower() for t in interpro_types} interpro_reader = csv.reader(interpro_data_tsv, delimiter="\t") interpro_reader.__next__() # skip header - domain_ids = set( - [row[0] for row in interpro_reader if row and row[1].lower() in interpro_types] - ) + domain_ids = { + row[0] for row in interpro_reader if row and row[1].lower() in interpro_types + } # get Uniprot to gene references if not uniprot_refs_path: uniprot_refs: UniprotRefs = get_uniprot_refs() else: uniprot_refs = {} - with open(uniprot_refs_path, "r") as f: + with uniprot_refs_path.open() as f: reader = csv.reader(f, delimiter="\t") for row in reader: uniprot_refs[row[0]] = (row[1], row[2]) @@ -317,11 +324,11 @@ def get_interpro_data(data): # noqa ) # get refseq accessions for uniprot proteins - uniprot_acs = {k[0] for k in interpro_uniprot.keys()} + uniprot_acs = {k[0] for k in interpro_uniprot} prot_acs = get_protein_accessions(uniprot_acs, uniprot_sprot_path) outfile_path = output_dir / f"domain_lookup_{today}.tsv" - outfile = open(outfile_path, "w") + outfile = outfile_path.open("w") for k, v_list in interpro_uniprot.items(): for v in v_list.values(): if k[0] in uniprot_acs: @@ -329,7 +336,7 @@ def get_interpro_data(data): # noqa if not refseq_ac: logger.warning(f"Unable to lookup refseq ac for {k}, {v}") continue - items = [k[1]] + list(v) + [refseq_ac] + items = [k[1], *list(v), refseq_ac] line = "\t".join(items) + "\n" outfile.write(line) outfile.close() diff --git a/server/src/curfu/domain_services.py b/server/src/curfu/domain_services.py index ca1b08af..920545a7 100644 --- a/server/src/curfu/domain_services.py +++ b/server/src/curfu/domain_services.py @@ -6,8 +6,9 @@ * get_possible_domains shouldn't have to force uniqueness """ + import csv -from typing import Dict, List +from typing import ClassVar from curfu import LookupServiceError, logger from curfu.utils import get_data_file @@ -16,7 +17,7 @@ class DomainService: """Handler class providing requisite services for functional domain lookup.""" - domains: Dict[str, List[Dict]] = {} + domains: ClassVar[dict[str, list[dict]]] = {} def load_mapping(self) -> None: """Load mapping file. @@ -32,7 +33,7 @@ def load_mapping(self) -> None: * RefSeq protein accession """ domain_file = get_data_file("domain_lookup") - with open(domain_file, "r") as df: + with domain_file.open() as df: reader = csv.reader(df, delimiter="\t") for row in reader: gene_id = row[0].lower() @@ -48,7 +49,7 @@ def load_mapping(self) -> None: else: self.domains[gene_id] = [domain_data] - def get_possible_domains(self, gene_id: str) -> List[Dict]: + def get_possible_domains(self, gene_id: str) -> list[dict]: """Given normalized gene ID, return associated domain names and IDs :return: List of valid domain names (up to n names) paired with domain IDs @@ -56,7 +57,7 @@ def get_possible_domains(self, gene_id: str) -> List[Dict]: """ try: domains = self.domains[gene_id.lower()] - except KeyError: + except KeyError as e: logger.warning(f"Unable to retrieve associated domains for {gene_id}") - raise LookupServiceError + raise LookupServiceError from e return domains diff --git a/server/src/curfu/gene_services.py b/server/src/curfu/gene_services.py index 5d43529b..5147a5c9 100644 --- a/server/src/curfu/gene_services.py +++ b/server/src/curfu/gene_services.py @@ -1,7 +1,7 @@ """Wrapper for required Gene Normalization services.""" + import csv from pathlib import Path -from typing import Dict, List, Optional, Tuple, Union from ga4gh.vrsatile.pydantic.vrsatile_models import CURIE from gene.query import QueryHandler @@ -11,18 +11,18 @@ from curfu.utils import get_data_file # term -> (normalized ID, normalized label) -Map = Dict[str, Tuple[str, str, str]] +Map = dict[str, tuple[str, str, str]] # term -> (normalized ID, normalized label) -Map = Dict[str, Tuple[str, str, str]] +Map = dict[str, tuple[str, str, str]] # term, symbol, concept ID, chromosome, strand -Suggestion = Tuple[str, str, str, str, str] +Suggestion = tuple[str, str, str, str, str] class GeneService: """Provide gene ID resolution and term autocorrect suggestions.""" - def __init__(self, suggestions_file: Optional[Path] = None) -> None: + def __init__(self, suggestions_file: Path | None = None) -> None: """Initialize gene service provider class. :param suggestions_file: path to existing suggestions file. If not provided, @@ -31,28 +31,26 @@ def __init__(self, suggestions_file: Optional[Path] = None) -> None: if not suggestions_file: suggestions_file = get_data_file("gene_suggest") - self.concept_id_map: Dict[str, Suggestion] = {} - self.symbol_map: Dict[str, Suggestion] = {} - self.aliases_map: Dict[str, Suggestion] = {} - self.prev_symbols_map: Dict[str, Suggestion] = {} + self.concept_id_map: dict[str, Suggestion] = {} + self.symbol_map: dict[str, Suggestion] = {} + self.aliases_map: dict[str, Suggestion] = {} + self.prev_symbols_map: dict[str, Suggestion] = {} - for row in csv.DictReader(open(suggestions_file, "r")): + for row in csv.DictReader(suggestions_file.open()): symbol = row["symbol"] concept_id = row["concept_id"] suggestion = [symbol, concept_id, row["chromosome"], row["strand"]] - self.concept_id_map[concept_id.upper()] = tuple([concept_id] + suggestion) - self.symbol_map[symbol.upper()] = tuple([symbol] + suggestion) + self.concept_id_map[concept_id.upper()] = (concept_id, *suggestion) + self.symbol_map[symbol.upper()] = (symbol, *suggestion) for alias in row.get("aliases", []): - self.aliases_map[alias.upper()] = tuple([alias] + suggestion) + self.aliases_map[alias.upper()] = (alias, *suggestion) for prev_symbol in row.get("previous_symbols", []): - self.prev_symbols_map[prev_symbol.upper()] = tuple( - [prev_symbol] + suggestion - ) + self.prev_symbols_map[prev_symbol.upper()] = (prev_symbol, *suggestion) @staticmethod def get_normalized_gene( term: str, normalizer: QueryHandler - ) -> Tuple[CURIE, str, Union[str, CURIE, None]]: + ) -> tuple[CURIE, str, str | CURIE | None]: """Get normalized ID given gene symbol/label/alias. :param str term: user-entered gene term :param QueryHandler normalizer: gene normalizer instance @@ -108,16 +106,15 @@ def get_normalized_gene( break if not term_cased: logger.warning( - f"Couldn't find cased version for search term {term} matching gene ID {response.gene_descriptor.gene_id}" # noqa: E501 - ) # noqa: E501 + f"Couldn't find cased version for search term {term} matching gene ID {response.gene_descriptor.gene_id}" + ) return (concept_id, symbol, term_cased) - else: - warn = f"Lookup of gene term {term} failed." - logger.warning(warn) - raise LookupServiceError(warn) + warn = f"Lookup of gene term {term} failed." + logger.warning(warn) + raise LookupServiceError(warn) @staticmethod - def _get_completion_results(term: str, lookup: Dict) -> List[Suggestion]: + def _get_completion_results(term: str, lookup: dict) -> list[Suggestion]: """Filter valid completions for term. :param term: user-entered text @@ -129,10 +126,9 @@ def _get_completion_results(term: str, lookup: Dict) -> List[Suggestion]: for key, data in lookup.items(): if key.startswith(term): matches.append(data) - matches = sorted(matches, key=lambda s: s[0]) - return matches + return sorted(matches, key=lambda s: s[0]) - def suggest_genes(self, query: str) -> Dict[str, List[Suggestion]]: + def suggest_genes(self, query: str) -> dict[str, list[Suggestion]]: """Provide autocomplete suggestions based on submitted term. :param str query: text entered by user diff --git a/server/src/curfu/main.py b/server/src/curfu/main.py index a14cb86b..f56edef4 100644 --- a/server/src/curfu/main.py +++ b/server/src/curfu/main.py @@ -1,4 +1,5 @@ """Provide FastAPI application and route declarations.""" + from fastapi import FastAPI, Request from fastapi.middleware.cors import CORSMiddleware from fastapi.staticfiles import StaticFiles @@ -75,7 +76,7 @@ def serve_react_app(app: FastAPI) -> FastAPI: templates = Jinja2Templates(directory=BUILD_DIR.as_posix()) @app.get("/{full_path:path}", include_in_schema=False) - async def serve_react_app(request: Request, full_path: str) -> TemplateResponse: + async def serve_react_app(request: Request, full_path: str) -> TemplateResponse: # noqa: ARG001 """Add arbitrary path support to FastAPI service. React-router provides something akin to client-side routing based out @@ -110,8 +111,7 @@ def get_gene_services() -> GeneService: :return: GeneService instance """ - gene_services = GeneService() - return gene_services + return GeneService() def get_domain_services() -> DomainService: @@ -135,4 +135,4 @@ async def startup() -> None: @app.on_event("shutdown") async def shutdown() -> None: """Clean up thread pool.""" - await app.state.fusor.cool_seq_tool.uta_db._connection_pool.close() + await app.state.fusor.cool_seq_tool.uta_db._connection_pool.close() # noqa: SLF001 diff --git a/server/src/curfu/routers/complete.py b/server/src/curfu/routers/complete.py index 5b03bb71..ff5c9c33 100644 --- a/server/src/curfu/routers/complete.py +++ b/server/src/curfu/routers/complete.py @@ -1,5 +1,6 @@ """Provide routes for autocomplete/term suggestion methods""" -from typing import Any, Dict + +from typing import Any from fastapi import APIRouter, Query, Request @@ -41,7 +42,7 @@ def suggest_gene(request: Request, term: str = Query("")) -> ResponseDict: response["matches_count"] = n if n > MAX_SUGGESTIONS: - warn = f"Exceeds max matches: Got {n} possible matches for {term} (limit: {MAX_SUGGESTIONS})" # noqa: E501 + warn = f"Exceeds max matches: Got {n} possible matches for {term} (limit: {MAX_SUGGESTIONS})" response["warnings"] = [warn] term_upper = term.upper() for match_type in ("concept_id", "symbol", "prev_symbols", "aliases"): @@ -69,7 +70,7 @@ def suggest_domain(request: Request, gene_id: str = Query("")) -> ResponseDict: :return: JSON response with a list of possible domain name and ID options, or warning(s) if relevant """ - response: Dict[str, Any] = {"gene_id": gene_id} + response: dict[str, Any] = {"gene_id": gene_id} try: possible_matches = request.app.state.domains.get_possible_domains(gene_id) response["suggestions"] = possible_matches diff --git a/server/src/curfu/routers/constructors.py b/server/src/curfu/routers/constructors.py index fab4542e..a7cae563 100644 --- a/server/src/curfu/routers/constructors.py +++ b/server/src/curfu/routers/constructors.py @@ -1,5 +1,4 @@ """Provide routes for element construction endpoints""" -from typing import Optional from fastapi import APIRouter, Query, Request from fusor.models import DomainStatus, RegulatoryClass, Strand @@ -38,11 +37,9 @@ def build_gene_element(request: Request, term: str = Query("")) -> GeneElementRe :return: Pydantic class with gene element if successful and warnings otherwise """ gene_element, warnings = request.app.state.fusor.gene_element(term) - if not warnings: - warnings_l = [] - else: - warnings_l = [warnings] - return GeneElementResponse(element=gene_element, warnings=warnings_l) + return GeneElementResponse( + element=gene_element, warnings=[] if not warnings else [warnings] + ) @router.get( @@ -55,9 +52,9 @@ def build_gene_element(request: Request, term: str = Query("")) -> GeneElementRe async def build_tx_segment_ect( request: Request, transcript: str, - exon_start: Optional[int] = Query(None), + exon_start: int | None = Query(None), exon_start_offset: int = Query(0), - exon_end: Optional[int] = Query(None), + exon_end: int | None = Query(None), exon_end_offset: int = Query(0), ) -> TxSegmentElementResponse: """Construct Transcript Segment element by providing transcript and exon @@ -94,9 +91,9 @@ async def build_tx_segment_gct( request: Request, transcript: str, chromosome: str, - start: Optional[int] = Query(None), - end: Optional[int] = Query(None), - strand: Optional[str] = Query(None), + start: int | None = Query(None), + end: int | None = Query(None), + strand: str | None = Query(None), ) -> TxSegmentElementResponse: """Construct Transcript Segment element by providing transcript and genomic coordinates (chromosome, start, end positions). @@ -142,9 +139,9 @@ async def build_tx_segment_gcg( request: Request, gene: str, chromosome: str, - start: Optional[int] = Query(None), - end: Optional[int] = Query(None), - strand: Optional[str] = Query(None), + start: int | None = Query(None), + end: int | None = Query(None), + strand: str | None = Query(None), ) -> TxSegmentElementResponse: """Construct Transcript Segment element by providing gene and genomic coordinates (chromosome, start, end positions). diff --git a/server/src/curfu/routers/demo.py b/server/src/curfu/routers/demo.py index 434f3e13..2434479a 100644 --- a/server/src/curfu/routers/demo.py +++ b/server/src/curfu/routers/demo.py @@ -1,5 +1,5 @@ """Provide routes for accessing demo objects to client.""" -from typing import Union + from uuid import uuid4 from fastapi import APIRouter, Request @@ -40,24 +40,24 @@ router = APIRouter() -ElementUnion = Union[ - TranscriptSegmentElement, - LinkerElement, - TemplatedSequenceElement, - GeneElement, - UnknownGeneElement, - MultiplePossibleGenesElement, -] -ClientElementUnion = Union[ - ClientTranscriptSegmentElement, - ClientLinkerElement, - ClientTemplatedSequenceElement, - ClientGeneElement, - ClientUnknownGeneElement, - ClientMultiplePossibleGenesElement, -] -Fusion = Union[CategoricalFusion, AssayedFusion] -ClientFusion = Union[ClientCategoricalFusion, ClientAssayedFusion] +ElementUnion = ( + TranscriptSegmentElement + | LinkerElement + | TemplatedSequenceElement + | GeneElement + | UnknownGeneElement + | MultiplePossibleGenesElement +) +ClientElementUnion = ( + ClientTranscriptSegmentElement + | ClientLinkerElement + | ClientTemplatedSequenceElement + | ClientGeneElement + | ClientUnknownGeneElement + | ClientMultiplePossibleGenesElement +) +Fusion = CategoricalFusion | AssayedFusion +ClientFusion = ClientCategoricalFusion | ClientAssayedFusion def clientify_structural_element( @@ -77,14 +77,14 @@ def clientify_structural_element( if element.type == StructuralElementType.UNKNOWN_GENE_ELEMENT: element_args["nomenclature"] = "?" return ClientUnknownGeneElement(**element_args) - elif element.type == StructuralElementType.MULTIPLE_POSSIBLE_GENES_ELEMENT: + if element.type == StructuralElementType.MULTIPLE_POSSIBLE_GENES_ELEMENT: element_args["nomenclature"] = "v" return ClientMultiplePossibleGenesElement(**element_args) - elif element.type == StructuralElementType.LINKER_SEQUENCE_ELEMENT: + if element.type == StructuralElementType.LINKER_SEQUENCE_ELEMENT: nm = element.linker_sequence.sequence element_args["nomenclature"] = nm return ClientLinkerElement(**element_args) - elif element.type == StructuralElementType.TEMPLATED_SEQUENCE_ELEMENT: + if element.type == StructuralElementType.TEMPLATED_SEQUENCE_ELEMENT: nm = templated_seq_nomenclature(element, fusor_instance.seqrepo) element_args["nomenclature"] = nm element_args["input_chromosome"] = element.region.location.sequence_id.split( @@ -93,11 +93,11 @@ def clientify_structural_element( element_args["input_start"] = element.region.location.interval.start.value element_args["input_end"] = element.region.location.interval.end.value return ClientTemplatedSequenceElement(**element_args) - elif element.type == StructuralElementType.GENE_ELEMENT: + if element.type == StructuralElementType.GENE_ELEMENT: nm = gene_nomenclature(element) element_args["nomenclature"] = nm return ClientGeneElement(**element_args) - elif element.type == StructuralElementType.TRANSCRIPT_SEGMENT_ELEMENT: + if element.type == StructuralElementType.TRANSCRIPT_SEGMENT_ELEMENT: nm = tx_segment_nomenclature(element) element_args["nomenclature"] = nm element_args["input_type"] = "exon_coords_tx" @@ -107,8 +107,8 @@ def clientify_structural_element( element_args["input_exon_end"] = element.exon_end element_args["input_exon_end_offset"] = element.exon_end_offset return ClientTranscriptSegmentElement(**element_args) - else: - raise ValueError("Unknown element type provided") + msg = "Unknown element type provided" + raise ValueError(msg) def clientify_fusion(fusion: Fusion, fusor_instance: FUSOR) -> ClientFusion: @@ -119,12 +119,13 @@ def clientify_fusion(fusion: Fusion, fusor_instance: FUSOR) -> ClientFusion: :return: completed client-ready fusion """ fusion_args = fusion.dict() - client_elements = [] - for element in fusion.structural_elements: - client_elements.append(clientify_structural_element(element, fusor_instance)) + client_elements = [ + clientify_structural_element(element, fusor_instance) + for element in fusion.structural_elements + ] fusion_args["structural_elements"] = client_elements - if "regulatory_element" in fusion_args and fusion_args["regulatory_element"]: + if fusion_args.get("regulatory_element"): reg_element_args = fusion_args["regulatory_element"] nomenclature = reg_element_nomenclature( RegulatoryElement(**reg_element_args), fusor_instance.seqrepo @@ -134,7 +135,8 @@ def clientify_fusion(fusion: Fusion, fusor_instance: FUSOR) -> ClientFusion: if regulatory_class == "enhancer": reg_element_args["display_class"] = "Enhancer" else: - raise Exception("Undefined reg element class used in demo") + msg = "Undefined reg element class used in demo" + raise Exception(msg) fusion_args["regulatory_element"] = reg_element_args if fusion.type == FUSORTypes.CATEGORICAL_FUSION: @@ -146,10 +148,10 @@ def clientify_fusion(fusion: Fusion, fusor_instance: FUSOR) -> ClientFusion: client_domains.append(client_domain) fusion_args["critical_functional_domains"] = client_domains return ClientCategoricalFusion(**fusion_args) - elif fusion.type == FUSORTypes.ASSAYED_FUSION: + if fusion.type == FUSORTypes.ASSAYED_FUSION: return ClientAssayedFusion(**fusion_args) - else: - raise ValueError("Unknown fusion type provided") + msg = "Unknown fusion type provided" + raise ValueError(msg) @router.get( @@ -166,10 +168,7 @@ def get_alk(request: Request) -> DemoResponse: FUSOR and UTA-associated tools. """ return DemoResponse( - **{ - "fusion": clientify_fusion(examples.alk, request.app.state.fusor), - "warnings": [], - } + fusion=clientify_fusion(examples.alk, request.app.state.fusor), warnings=[] ) @@ -187,10 +186,7 @@ def get_ewsr1(request: Request) -> DemoResponse: FUSOR and UTA-associated tools. """ return DemoResponse( - **{ - "fusion": clientify_fusion(examples.ewsr1, request.app.state.fusor), - "warnings": [], - } + fusion=clientify_fusion(examples.ewsr1, request.app.state.fusor), warnings=[] ) @@ -208,10 +204,7 @@ def get_bcr_abl1(request: Request) -> DemoResponse: FUSOR and UTA-associated tools. """ return DemoResponse( - **{ - "fusion": clientify_fusion(examples.bcr_abl1, request.app.state.fusor), - "warnings": [], - } + fusion=clientify_fusion(examples.bcr_abl1, request.app.state.fusor), warnings=[] ) @@ -229,10 +222,8 @@ def get_tpm3_ntrk1(request: Request) -> DemoResponse: FUSOR and UTA-associated tools. """ return DemoResponse( - **{ - "fusion": clientify_fusion(examples.tpm3_ntrk1, request.app.state.fusor), - "warnings": [], - } + fusion=clientify_fusion(examples.tpm3_ntrk1, request.app.state.fusor), + warnings=[], ) @@ -250,10 +241,8 @@ def get_tpm3_pdgfrb(request: Request) -> DemoResponse: FUSOR and UTA-associated tools. """ return DemoResponse( - **{ - "fusion": clientify_fusion(examples.tpm3_pdgfrb, request.app.state.fusor), - "warnings": [], - } + fusion=clientify_fusion(examples.tpm3_pdgfrb, request.app.state.fusor), + warnings=[], ) @@ -270,8 +259,5 @@ def get_igh_myc(request: Request) -> DemoResponse: FUSOR and UTA-associated tools. """ return DemoResponse( - **{ - "fusion": clientify_fusion(examples.igh_myc, request.app.state.fusor), - "warnings": [], - } + fusion=clientify_fusion(examples.igh_myc, request.app.state.fusor), warnings=[] ) diff --git a/server/src/curfu/routers/lookup.py b/server/src/curfu/routers/lookup.py index 4e66948f..ea851ef6 100644 --- a/server/src/curfu/routers/lookup.py +++ b/server/src/curfu/routers/lookup.py @@ -1,4 +1,5 @@ """Provide routes for basic data lookup endpoints""" + from fastapi import APIRouter, Query, Request from curfu import LookupServiceError diff --git a/server/src/curfu/routers/meta.py b/server/src/curfu/routers/meta.py index 85211baf..506cd103 100644 --- a/server/src/curfu/routers/meta.py +++ b/server/src/curfu/routers/meta.py @@ -1,4 +1,5 @@ """Provide service meta information""" + from cool_seq_tool.version import __version__ as cool_seq_tool_version from fastapi import APIRouter from fusor import __version__ as fusor_version @@ -18,11 +19,8 @@ def get_service_info() -> ServiceInfoResponse: """Return service info.""" return ServiceInfoResponse( - **{ - "curfu_version": curfu_version, - # "vrs_python_version": vrs_version, - "cool_seq_tool_version": cool_seq_tool_version, - "fusor_version": fusor_version, - "warnings": [], - } + curfu_version=curfu_version, + cool_seq_tool_version=cool_seq_tool_version, + fusor_version=fusor_version, + warnings=[], ) diff --git a/server/src/curfu/routers/nomenclature.py b/server/src/curfu/routers/nomenclature.py index 710cdc84..55b83743 100644 --- a/server/src/curfu/routers/nomenclature.py +++ b/server/src/curfu/routers/nomenclature.py @@ -1,5 +1,4 @@ """Provide routes for nomenclature generation.""" -from typing import Dict from fastapi import APIRouter, Body, Request from fusor.exceptions import FUSORParametersException @@ -31,7 +30,7 @@ tags=[RouteTag.NOMENCLATURE], ) def generate_regulatory_element_nomenclature( - request: Request, regulatory_element: Dict = Body() + request: Request, regulatory_element: dict = Body() ) -> ResponseDict: """Build regulatory element nomenclature. @@ -46,7 +45,7 @@ def generate_regulatory_element_nomenclature( except ValidationError as e: error_msg = str(e) logger.warning( - f"Encountered ValidationError: {error_msg} for regulatory element: {regulatory_element}" # noqa: E501 + f"Encountered ValidationError: {error_msg} for regulatory element: {regulatory_element}" ) return {"warnings": [error_msg]} try: @@ -59,7 +58,7 @@ def generate_regulatory_element_nomenclature( ) return { "warnings": [ - f"Unable to validate regulatory element with provided parameters: {regulatory_element}" # noqa: E501 + f"Unable to validate regulatory element with provided parameters: {regulatory_element}" ] } return {"nomenclature": nomenclature} @@ -72,7 +71,7 @@ def generate_regulatory_element_nomenclature( response_model_exclude_none=True, tags=[RouteTag.NOMENCLATURE], ) -def generate_tx_segment_nomenclature(tx_segment: Dict = Body()) -> ResponseDict: +def generate_tx_segment_nomenclature(tx_segment: dict = Body()) -> ResponseDict: """Build transcript segment element nomenclature. \f @@ -101,7 +100,7 @@ def generate_tx_segment_nomenclature(tx_segment: Dict = Body()) -> ResponseDict: tags=[RouteTag.NOMENCLATURE], ) def generate_templated_seq_nomenclature( - request: Request, templated_sequence: Dict = Body() + request: Request, templated_sequence: dict = Body() ) -> ResponseDict: """Build templated sequence element nomenclature. \f @@ -115,7 +114,7 @@ def generate_templated_seq_nomenclature( except ValidationError as e: error_msg = str(e) logger.warning( - f"Encountered ValidationError: {error_msg} for templated sequence element: {templated_sequence}" # noqa: E501 + f"Encountered ValidationError: {error_msg} for templated sequence element: {templated_sequence}" ) return {"warnings": [error_msg]} try: @@ -128,7 +127,7 @@ def generate_templated_seq_nomenclature( ) return { "warnings": [ - f"Unable to validate templated sequence with provided parameters: {templated_sequence}" # noqa: E501 + f"Unable to validate templated sequence with provided parameters: {templated_sequence}" ] } return {"nomenclature": nomenclature} @@ -141,7 +140,7 @@ def generate_templated_seq_nomenclature( response_model_exclude_none=True, tags=[RouteTag.NOMENCLATURE], ) -def generate_gene_nomenclature(gene_element: Dict = Body()) -> ResponseDict: +def generate_gene_nomenclature(gene_element: dict = Body()) -> ResponseDict: """Build gene element nomenclature. \f :param request: the HTTP request context, supplied by FastAPI. Use to access @@ -177,7 +176,7 @@ def generate_gene_nomenclature(gene_element: Dict = Body()) -> ResponseDict: tags=[RouteTag.NOMENCLATURE], ) def generate_fusion_nomenclature( - request: Request, fusion: Dict = Body() + request: Request, fusion: dict = Body() ) -> ResponseDict: """Generate nomenclature for complete fusion. \f diff --git a/server/src/curfu/routers/utilities.py b/server/src/curfu/routers/utilities.py index 2ce7799c..bdfc650a 100644 --- a/server/src/curfu/routers/utilities.py +++ b/server/src/curfu/routers/utilities.py @@ -1,8 +1,8 @@ """Provide routes for app utility endpoints""" -import os + import tempfile from pathlib import Path -from typing import Any, Dict, List, Optional +from typing import Any from fastapi import APIRouter, HTTPException, Query, Request from fastapi.responses import FileResponse @@ -28,7 +28,7 @@ response_model_exclude_none=True, tags=[RouteTag.UTILITIES], ) -def get_mane_transcripts(request: Request, term: str) -> Dict: +def get_mane_transcripts(request: Request, term: str) -> dict: """Get MANE transcripts for gene term. \f :param Request request: the HTTP request context, supplied by FastAPI. Use to access @@ -39,16 +39,15 @@ def get_mane_transcripts(request: Request, term: str) -> Dict: normalized = request.app.state.fusor.gene_normalizer.normalize(term) if normalized.match_type == gene_schemas.MatchType.NO_MATCH: return {"warnings": [f"Normalization error: {term}"]} - elif not normalized.gene_descriptor.gene_id.lower().startswith("hgnc"): + if not normalized.gene_descriptor.gene_id.lower().startswith("hgnc"): return {"warnings": [f"No HGNC symbol: {term}"]} symbol = normalized.gene_descriptor.label - transcripts = request.app.state.fusor.cool_seq_tool.mane_transcript_mappings.get_gene_mane_data( # noqa: E501 + transcripts = request.app.state.fusor.cool_seq_tool.mane_transcript_mappings.get_gene_mane_data( symbol ) if not transcripts: return {"warnings": [f"No matching transcripts: {term}"]} - else: - return {"transcripts": transcripts} + return {"transcripts": transcripts} @router.get( @@ -60,12 +59,12 @@ def get_mane_transcripts(request: Request, term: str) -> Dict: ) async def get_genome_coords( request: Request, - gene: Optional[str] = None, - transcript: Optional[str] = None, - exon_start: Optional[int] = None, - exon_end: Optional[int] = None, - exon_start_offset: Optional[int] = None, - exon_end_offset: Optional[int] = None, + gene: str | None = None, + transcript: str | None = None, + exon_start: int | None = None, + exon_end: int | None = None, + exon_start_offset: int | None = None, + exon_end_offset: int | None = None, ) -> CoordsUtilsResponse: """Convert provided exon positions to genomic coordinates \f @@ -92,10 +91,10 @@ async def get_genome_coords( ) warnings.append(warning) if (exon_start is None) and (exon_start_offset is not None): - warning = "No start param: exon_start_offset parameter requires explicit exon_start parameter" # noqa: E501 + warning = "No start param: exon_start_offset parameter requires explicit exon_start parameter" warnings.append(warning) if (exon_end is None) and (exon_end_offset is not None): - warning = "No end param: exon_end_offset parameter requires explicit exon_end parameter" # noqa: E501 + warning = "No end param: exon_end_offset parameter requires explicit exon_end parameter" warnings.append(warning) if warnings: for warning in warnings: @@ -108,14 +107,16 @@ async def get_genome_coords( if exon_end is not None and exon_end_offset is None: exon_end_offset = 0 - response = await request.app.state.fusor.cool_seq_tool.transcript_to_genomic_coordinates( # noqa: E501 - gene=gene, - transcript=transcript, - exon_start=exon_start, - exon_end=exon_end, - exon_start_offset=exon_start_offset, - exon_end_offset=exon_end_offset, - residue_mode="inter-residue", + response = ( + await request.app.state.fusor.cool_seq_tool.transcript_to_genomic_coordinates( + gene=gene, + transcript=transcript, + exon_start=exon_start, + exon_end=exon_end, + exon_start_offset=exon_start_offset, + exon_end_offset=exon_end_offset, + residue_mode="inter-residue", + ) ) warnings = response.warnings if warnings: @@ -134,11 +135,11 @@ async def get_genome_coords( async def get_exon_coords( request: Request, chromosome: str, - start: Optional[int] = None, - end: Optional[int] = None, - strand: Optional[str] = None, - gene: Optional[str] = None, - transcript: Optional[str] = None, + start: int | None = None, + end: int | None = None, + strand: str | None = None, + gene: str | None = None, + transcript: str | None = None, ) -> CoordsUtilsResponse: """Convert provided genomic coordinates to exon coordinates \f @@ -152,7 +153,7 @@ async def get_exon_coords( :param Optional[str] transcript: transcript accession ID :return: response with exon coordinates if successful, or warnings if failed """ - warnings: List[str] = [] + warnings: list[str] = [] if start is None and end is None: warnings.append("Must provide start and/or end coordinates") if transcript is None and gene is None: @@ -169,11 +170,11 @@ async def get_exon_coords( logger.warning(warning) return CoordsUtilsResponse(warnings=warnings, coordinates_data=None) - response = await request.app.state.fusor.cool_seq_tool.genomic_to_transcript_exon_coordinates( # noqa: E501 + response = await request.app.state.fusor.cool_seq_tool.genomic_to_transcript_exon_coordinates( chromosome, start=start, end=end, - strand=strand_validated, # type: ignore + strand=strand_validated, transcript=transcript, gene=gene, ) @@ -199,7 +200,7 @@ async def get_sequence_id(request: Request, sequence: str) -> SequenceIDResponse :param str sequence_id: user-provided sequence identifier to translate :return: Response object with ga4gh ID and aliases """ - params: Dict[str, Any] = {"sequence": sequence, "ga4gh_id": None, "aliases": []} + params: dict[str, Any] = {"sequence": sequence, "ga4gh_id": None, "aliases": []} sr = request.app.state.fusor.cool_seq_tool.seqrepo_access sr_ids, errors = sr.translate_identifier(sequence) @@ -260,24 +261,23 @@ async def get_sequence( _, path = tempfile.mkstemp(suffix=".fasta") try: request.app.state.fusor.cool_seq_tool.get_fasta_file(sequence_id, Path(path)) - except KeyError: - resp = request.app.state.fusor.cool_seq_tool.seqrepo_access.translate_identifier( # noqa: E501 - sequence_id, "refseq" + except KeyError as ke: + resp = ( + request.app.state.fusor.cool_seq_tool.seqrepo_access.translate_identifier( + sequence_id, "refseq" + ) ) if len(resp[0]) < 1: raise HTTPException( status_code=404, detail="No sequence available for requested identifier" - ) - else: - try: - new_seq_id = resp[0][0].split(":")[1] - request.app.state.fusor.cool_seq_tool.get_fasta_file( - new_seq_id, Path(path) - ) - except KeyError: - raise HTTPException( - status_code=404, - detail="No sequence available for requested identifier", - ) - background_tasks.add_task(lambda p: os.unlink(p), path) + ) from ke + try: + new_seq_id = resp[0][0].split(":")[1] + request.app.state.fusor.cool_seq_tool.get_fasta_file(new_seq_id, Path(path)) + except KeyError as e: + raise HTTPException( + status_code=404, + detail="No sequence available for requested identifier", + ) from e + background_tasks.add_task(lambda p: Path(p).unlink(), path) return FileResponse(path, filename=f"{sequence_id}.FASTA") diff --git a/server/src/curfu/routers/validate.py b/server/src/curfu/routers/validate.py index a3fc0371..5087b810 100644 --- a/server/src/curfu/routers/validate.py +++ b/server/src/curfu/routers/validate.py @@ -1,5 +1,4 @@ """Provide validation endpoint to confirm correctness of fusion object structure.""" -from typing import Dict from fastapi import APIRouter, Body, Request from fusor.exceptions import FUSORParametersException @@ -16,7 +15,7 @@ response_model_exclude_none=True, tags=[RouteTag.VALIDATORS], ) -def validate_fusion(request: Request, fusion: Dict = Body()) -> ResponseDict: +def validate_fusion(request: Request, fusion: dict = Body()) -> ResponseDict: """Validate proposed Fusion object. Return warnings if invalid. \f :param request: the HTTP request context, supplied by FastAPI. Use to access FUSOR. diff --git a/server/src/curfu/schemas.py b/server/src/curfu/schemas.py index 9cd71c61..73cf5996 100644 --- a/server/src/curfu/schemas.py +++ b/server/src/curfu/schemas.py @@ -1,6 +1,7 @@ """Provide schemas for FastAPI responses.""" + from enum import Enum -from typing import Dict, List, Literal, Optional, Tuple, Union +from typing import Literal from cool_seq_tool.schemas import GenomicData from fusor.models import ( @@ -20,15 +21,13 @@ from ga4gh.vrsatile.pydantic.vrsatile_models import CURIE from pydantic import BaseModel, Extra, Field, StrictInt, StrictStr, validator -ResponseWarnings = Optional[List[StrictStr]] +ResponseWarnings = list[StrictStr] | None -ResponseDict = Dict[ +ResponseDict = dict[ str, - Union[ - str, int, CURIE, List[str], List[Tuple[str, str, str, str]], FunctionalDomain - ], + str | int | CURIE | list[str] | list[tuple[str, str, str, str]] | FunctionalDomain, ] -Warnings = List[str] +Warnings = list[str] class ClientStructuralElement(BaseModel): @@ -41,56 +40,48 @@ class ClientStructuralElement(BaseModel): class ClientTranscriptSegmentElement(TranscriptSegmentElement, ClientStructuralElement): """TranscriptSegment element class used client-side.""" - input_type: Union[ - Literal["genomic_coords_gene"], - Literal["genomic_coords_tx"], - Literal["exon_coords_tx"], - ] - input_tx: Optional[str] - input_strand: Optional[Strand] - input_gene: Optional[str] - input_chr: Optional[str] - input_genomic_start: Optional[str] - input_genomic_end: Optional[str] - input_exon_start: Optional[str] - input_exon_start_offset: Optional[str] - input_exon_end: Optional[str] - input_exon_end_offset: Optional[str] + input_type: ( + Literal["genomic_coords_gene"] + | Literal["genomic_coords_tx"] + | Literal["exon_coords_tx"] + ) + input_tx: str | None + input_strand: Strand | None + input_gene: str | None + input_chr: str | None + input_genomic_start: str | None + input_genomic_end: str | None + input_exon_start: str | None + input_exon_start_offset: str | None + input_exon_end: str | None + input_exon_end_offset: str | None class ClientLinkerElement(LinkerElement, ClientStructuralElement): """Linker element class used client-side.""" - pass - class ClientTemplatedSequenceElement(TemplatedSequenceElement, ClientStructuralElement): """Templated sequence element used client-side.""" - input_chromosome: Optional[str] - input_start: Optional[str] - input_end: Optional[str] + input_chromosome: str | None + input_start: str | None + input_end: str | None class ClientGeneElement(GeneElement, ClientStructuralElement): """Gene element used client-side.""" - pass - class ClientUnknownGeneElement(UnknownGeneElement, ClientStructuralElement): """Unknown gene element used client-side.""" - pass - class ClientMultiplePossibleGenesElement( MultiplePossibleGenesElement, ClientStructuralElement ): """Multiple possible gene element used client-side.""" - pass - class ClientFunctionalDomain(FunctionalDomain): """Define functional domain object used client-side.""" @@ -124,28 +115,28 @@ class Config: class GeneElementResponse(Response): """Response model for gene element construction endoint.""" - element: Optional[GeneElement] + element: GeneElement | None class TxSegmentElementResponse(Response): """Response model for transcript segment element construction endpoint.""" - element: Optional[TranscriptSegmentElement] + element: TranscriptSegmentElement | None class TemplatedSequenceElementResponse(Response): """Response model for transcript segment element construction endpoint.""" - element: Optional[TemplatedSequenceElement] + element: TemplatedSequenceElement | None class NormalizeGeneResponse(Response): """Response model for gene normalization endpoint.""" term: StrictStr - concept_id: Optional[CURIE] - symbol: Optional[StrictStr] - cased: Optional[StrictStr] + concept_id: CURIE | None + symbol: StrictStr | None + cased: StrictStr | None class SuggestGeneResponse(Response): @@ -154,10 +145,10 @@ class SuggestGeneResponse(Response): term: StrictStr matches_count: int # complete term, normalized symbol, normalized concept ID, chromosome ID, strand - concept_id: Optional[List[Tuple[str, str, str, str, str]]] - symbol: Optional[List[Tuple[str, str, str, str, str]]] - prev_symbols: Optional[List[Tuple[str, str, str, str, str]]] - aliases: Optional[List[Tuple[str, str, str, str, str]]] + concept_id: list[tuple[str, str, str, str, str]] | None + symbol: list[tuple[str, str, str, str, str]] | None + prev_symbols: list[tuple[str, str, str, str, str]] | None + aliases: list[tuple[str, str, str, str, str]] | None class DomainParams(BaseModel): @@ -173,31 +164,31 @@ class DomainParams(BaseModel): class GetDomainResponse(Response): """Response model for functional domain constructor endpoint.""" - domain: Optional[FunctionalDomain] + domain: FunctionalDomain | None class AssociatedDomainResponse(Response): """Response model for domain ID autocomplete suggestion endpoint.""" gene_id: StrictStr - suggestions: Optional[List[DomainParams]] + suggestions: list[DomainParams] | None class ValidateFusionResponse(Response): """Response model for Fusion validation endpoint.""" - fusion: Optional[Fusion] + fusion: Fusion | None class ExonCoordsRequest(BaseModel): """Request model for genomic coordinates retrieval""" tx_ac: StrictStr - gene: Optional[StrictStr] = "" - exon_start: Optional[StrictInt] = 0 - exon_start_offset: Optional[StrictInt] = 0 - exon_end: Optional[StrictInt] = 0 - exon_end_offset: Optional[StrictInt] = 0 + gene: StrictStr | None = "" + exon_start: StrictInt | None = 0 + exon_start_offset: StrictInt | None = 0 + exon_end: StrictInt | None = 0 + exon_end_offset: StrictInt | None = 0 @validator("gene") def validate_gene(cls, v) -> str: @@ -217,16 +208,16 @@ def validate_number(cls, v) -> int: class CoordsUtilsResponse(Response): """Response model for genomic coordinates retrieval""" - coordinates_data: Optional[GenomicData] + coordinates_data: GenomicData | None class SequenceIDResponse(Response): """Response model for sequence ID retrieval endpoint.""" sequence: StrictStr - refseq_id: Optional[StrictStr] - ga4gh_id: Optional[StrictStr] - aliases: Optional[List[StrictStr]] + refseq_id: StrictStr | None + ga4gh_id: StrictStr | None + aliases: list[StrictStr] | None class ManeGeneTranscript(BaseModel): @@ -251,7 +242,7 @@ class ManeGeneTranscript(BaseModel): class GetTranscriptsResponse(Response): """Response model for MANE transcript retrieval endpoint.""" - transcripts: Optional[List[ManeGeneTranscript]] + transcripts: list[ManeGeneTranscript] | None class ServiceInfoResponse(Response): @@ -272,17 +263,15 @@ class ClientCategoricalFusion(CategoricalFusion): global FusionContext. """ - regulatory_element: Optional[ClientRegulatoryElement] = None - structural_elements: List[ - Union[ - ClientTranscriptSegmentElement, - ClientGeneElement, - ClientTemplatedSequenceElement, - ClientLinkerElement, - ClientMultiplePossibleGenesElement, - ] + regulatory_element: ClientRegulatoryElement | None = None + structural_elements: list[ + ClientTranscriptSegmentElement + | ClientGeneElement + | ClientTemplatedSequenceElement + | ClientLinkerElement + | ClientMultiplePossibleGenesElement ] - critical_functional_domains: Optional[List[ClientFunctionalDomain]] + critical_functional_domains: list[ClientFunctionalDomain] | None class ClientAssayedFusion(AssayedFusion): @@ -290,22 +279,20 @@ class ClientAssayedFusion(AssayedFusion): global FusionContext. """ - regulatory_element: Optional[ClientRegulatoryElement] = None - structural_elements: List[ - Union[ - ClientTranscriptSegmentElement, - ClientGeneElement, - ClientTemplatedSequenceElement, - ClientLinkerElement, - ClientUnknownGeneElement, - ] + regulatory_element: ClientRegulatoryElement | None = None + structural_elements: list[ + ClientTranscriptSegmentElement + | ClientGeneElement + | ClientTemplatedSequenceElement + | ClientLinkerElement + | ClientUnknownGeneElement ] class NomenclatureResponse(Response): """Response model for regulatory element nomenclature endpoint.""" - nomenclature: Optional[str] + nomenclature: str | None class RegulatoryElementResponse(Response): @@ -317,7 +304,7 @@ class RegulatoryElementResponse(Response): class DemoResponse(Response): """Response model for demo fusion object retrieval endpoints.""" - fusion: Union[ClientAssayedFusion, ClientCategoricalFusion] + fusion: ClientAssayedFusion | ClientCategoricalFusion class RouteTag(str, Enum): diff --git a/server/src/curfu/sequence_services.py b/server/src/curfu/sequence_services.py index b6535b84..a7a1585a 100644 --- a/server/src/curfu/sequence_services.py +++ b/server/src/curfu/sequence_services.py @@ -1,4 +1,5 @@ """Provide sequence ID generation services.""" + import logging logger = logging.getLogger("curfu") @@ -9,15 +10,15 @@ class InvalidInputError(Exception): """Provide exception for input validation.""" -def get_strand(input: str) -> int: +def get_strand(strand_input: str) -> int: """Validate strand arguments received from client. + :param str input: strand argument from client :return: correctly-formatted strand :raises InvalidInputException: if strand arg is invalid """ - if input == "+": + if strand_input == "+": return 1 - elif input == "-": + if strand_input == "-": return -1 - else: - raise InvalidInputError + raise InvalidInputError diff --git a/server/src/curfu/utils.py b/server/src/curfu/utils.py index ec610b52..44757c7b 100644 --- a/server/src/curfu/utils.py +++ b/server/src/curfu/utils.py @@ -1,7 +1,7 @@ """Miscellaneous helper functions.""" -import os + from pathlib import Path -from typing import List, TypeVar +from typing import TypeVar import boto3 from boto3.exceptions import ResourceLoadException @@ -24,18 +24,16 @@ def get_latest_s3_file(file_prefix: str) -> ObjectSummary: logger.info(f"Attempting S3 lookup for data file pattern {file_prefix}...") s3 = boto3.resource("s3", config=Config(region_name="us-east-2")) if not s3: - raise ResourceLoadException("Unable to initialize boto S3 resource") + msg = "Unable to initialize boto S3 resource" + raise ResourceLoadException(msg) bucket = sorted( - list( - s3.Bucket("vicc-services") - .objects.filter(Prefix=f"curfu/{file_prefix}") - .all() - ), + s3.Bucket("vicc-services").objects.filter(Prefix=f"curfu/{file_prefix}").all(), key=lambda f: f.key, reverse=True, ) if len(bucket) == 0: - raise FileNotFoundError(f"No files matching pattern {file_prefix} in bucket.") + msg = f"No files matching pattern {file_prefix} in bucket." + raise FileNotFoundError(msg) return bucket[0] @@ -45,9 +43,9 @@ def download_s3_file(bucket_object: ObjectSummary) -> Path: :param bucket_object: boto object representation of S3 file :return: Path to downloaded file """ - fname = os.path.basename(bucket_object.key) + fname = Path(bucket_object.key).name save_to = APP_ROOT / "data" / fname - with open(save_to, "wb") as f: + with save_to.open("wb") as f: try: bucket_object.Object().download_fileobj(f) except ClientError as e: @@ -57,7 +55,7 @@ def download_s3_file(bucket_object: ObjectSummary) -> Path: return save_to -def get_latest_data_file(file_prefix: str, local_files: List[Path]) -> Path: +def get_latest_data_file(file_prefix: str, local_files: list[Path]) -> Path: """Get path to latest version of given data file. Download from S3 if not available locally. @@ -67,10 +65,9 @@ def get_latest_data_file(file_prefix: str, local_files: List[Path]) -> Path: """ latest_local_file = sorted(local_files, reverse=True)[0] s3_object = get_latest_s3_file(file_prefix) - if os.path.basename(s3_object.key) > latest_local_file.name: + if Path(s3_object.key).name > latest_local_file.name: return download_s3_file(s3_object) - else: - return latest_local_file + return latest_local_file def get_data_file(filename_prefix: str) -> Path: @@ -87,5 +84,4 @@ def get_data_file(filename_prefix: str) -> Path: files = list(data_dir.glob(file_glob)) if not files: return download_s3_file(get_latest_s3_file(filename_prefix)) - else: - return get_latest_data_file(filename_prefix, files) + return get_latest_data_file(filename_prefix, files) diff --git a/server/tests/conftest.py b/server/tests/conftest.py index 46c481b8..da8a95e8 100644 --- a/server/tests/conftest.py +++ b/server/tests/conftest.py @@ -1,11 +1,11 @@ """Provide core fixtures for testing Flask functions.""" + import asyncio -from typing import Callable, Dict +from collections.abc import Callable import pytest -from httpx import AsyncClient - from curfu.main import app, get_domain_services, get_gene_services, start_fusor +from httpx import AsyncClient @pytest.fixture(scope="session") @@ -27,7 +27,7 @@ async def async_client(): await client.aclose() -response_callback_type = Callable[[Dict, Dict], None] +response_callback_type = Callable[[dict, dict], None] @pytest.fixture(scope="session") @@ -36,9 +36,9 @@ async def check_response(async_client): async def check_response( query: str, - expected_response: Dict, + expected_response: dict, data_callback: response_callback_type, - **kwargs + **kwargs, ): """Check that requested URL provides expected response. :param str query: URL endpoint with included query terms diff --git a/server/tests/integration/test_complete.py b/server/tests/integration/test_complete.py index 0f2e6a6d..a4a12eb6 100644 --- a/server/tests/integration/test_complete.py +++ b/server/tests/integration/test_complete.py @@ -1,9 +1,10 @@ """Test lookup endpoints""" + import pytest from httpx import AsyncClient -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_complete_gene(async_client: AsyncClient): """Test /complete/gene endpoint""" response = await async_client.get("/api/complete/gene?term=NTRK") diff --git a/server/tests/integration/test_constructors.py b/server/tests/integration/test_constructors.py index 6b19697f..4e316613 100644 --- a/server/tests/integration/test_constructors.py +++ b/server/tests/integration/test_constructors.py @@ -1,15 +1,14 @@ """Test end-to-end correctness of constructor routes.""" -from typing import Dict import pytest -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_build_gene_element(check_response, alk_gene_element): """Test correct functioning of gene element construction route.""" def check_gene_element_response( - response: Dict, expected_response: Dict, expected_id: str = "unset" + response: dict, expected_response: dict, expected_id: str = "unset" ): assert ("element" in response) == ("element" in expected_response) if ("element" not in response) and ("element" not in expected_response): @@ -46,9 +45,9 @@ def check_gene_element_response( @pytest.fixture(scope="session") def check_tx_element_response(): - """Provide callback function to check correctness of transcript element constructor.""" # noqa: E501 D202 + """Provide callback function to check correctness of transcript element constructor.""" - def check_tx_element_response(response: Dict, expected_response: Dict): + def check_tx_element_response(response: dict, expected_response: dict): assert ("element" in response) == ("element" in expected_response) if ("element" not in response) and ("element" not in expected_response): assert "warnings" in response @@ -82,7 +81,7 @@ def check_tx_element_response(response: Dict, expected_response: Dict): def check_reg_element_response(): """Provide callback function check correctness of regulatory element constructor.""" - def check_re_response(response: Dict, expected_response: Dict): + def check_re_response(response: dict, expected_response: dict): assert ("regulatory_element" in response) == ( "regulatory_element" in expected_response ) @@ -111,7 +110,7 @@ def check_re_response(response: Dict, expected_response: Dict): def check_templated_sequence_response(): """Provide callback function to check templated sequence constructor response""" - def check_temp_seq_response(response: Dict, expected_response: Dict): + def check_temp_seq_response(response: dict, expected_response: dict): assert ("element" in response) == ("element" in expected_response) if ("element" not in response) and ("element" not in expected_response): assert "warnings" in response @@ -159,7 +158,7 @@ def check_temp_seq_response(response: Dict, expected_response: Dict): return check_temp_seq_response -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_build_tx_segment_ect( check_response, check_tx_element_response, ntrk1_tx_element_start ): @@ -167,7 +166,7 @@ async def test_build_tx_segment_ect( coordinates and transcript. """ await check_response( - "/api/construct/structural_element/tx_segment_ect?transcript=NM_002529.3&exon_start=2&exon_start_offset=1", # noqa: E501 + "/api/construct/structural_element/tx_segment_ect?transcript=NM_002529.3&exon_start=2&exon_start_offset=1", {"element": ntrk1_tx_element_start}, check_tx_element_response, ) @@ -181,13 +180,13 @@ async def test_build_tx_segment_ect( # test handle invalid transcript await check_response( - "/api/construct/structural_element/tx_segment_ect?transcript=NM_0012529.3&exon_start=3", # noqa: E501 + "/api/construct/structural_element/tx_segment_ect?transcript=NM_0012529.3&exon_start=3", {"warnings": ["Unable to get exons for NM_0012529.3"]}, check_tx_element_response, ) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_build_segment_gct( check_response, check_tx_element_response, tpm3_tx_t_element ): @@ -195,18 +194,18 @@ async def test_build_segment_gct( genomic coordinates and transcript. """ await check_response( - "/api/construct/structural_element/tx_segment_gct?transcript=NM_152263.4&chromosome=NC_000001.11&start=154171416&end=154171417&strand=-", # noqa: E501 + "/api/construct/structural_element/tx_segment_gct?transcript=NM_152263.4&chromosome=NC_000001.11&start=154171416&end=154171417&strand=-", {"element": tpm3_tx_t_element}, check_tx_element_response, ) await check_response( - "/api/construct/structural_element/tx_segment_gct?transcript=refseq%3ANM_152263.4&chromosome=NC_000001.11&start=154171416&end=154171417&strand=-", # noqa: E501 + "/api/construct/structural_element/tx_segment_gct?transcript=refseq%3ANM_152263.4&chromosome=NC_000001.11&start=154171416&end=154171417&strand=-", {"element": tpm3_tx_t_element}, check_tx_element_response, ) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_build_segment_gcg( check_response, check_tx_element_response, tpm3_tx_g_element ): @@ -214,13 +213,13 @@ async def test_build_segment_gcg( genomic coordinates and gene name. """ await check_response( - "/api/construct/structural_element/tx_segment_gcg?gene=TPM3&chromosome=NC_000001.11&start=154171416&end=154171417&strand=-", # noqa: E501 + "/api/construct/structural_element/tx_segment_gcg?gene=TPM3&chromosome=NC_000001.11&start=154171416&end=154171417&strand=-", {"element": tpm3_tx_g_element}, check_tx_element_response, ) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_build_reg_element(check_response, check_reg_element_response): """Test correctness of regulatory element constructor endpoint.""" await check_response( @@ -241,13 +240,13 @@ async def test_build_reg_element(check_response, check_reg_element_response): ) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_build_templated_sequence( check_response, check_templated_sequence_response ): """Test correct functioning of templated sequence constructor""" await check_response( - "/api/construct/structural_element/templated_sequence?start=154171415&end=154171417&sequence_id=NC_000001.11&strand=-", # noqa: E501 + "/api/construct/structural_element/templated_sequence?start=154171415&end=154171417&sequence_id=NC_000001.11&strand=-", { "element": { "type": "TemplatedSequenceElement", @@ -272,7 +271,7 @@ async def test_build_templated_sequence( ) await check_response( - "/api/construct/structural_element/templated_sequence?start=154171415&end=154171417&sequence_id=refseq%3ANC_000001.11&strand=-", # noqa: E501 + "/api/construct/structural_element/templated_sequence?start=154171415&end=154171417&sequence_id=refseq%3ANC_000001.11&strand=-", { "element": { "type": "TemplatedSequenceElement", diff --git a/server/tests/integration/test_demos.py b/server/tests/integration/test_demos.py index 21166f16..596f4ebf 100644 --- a/server/tests/integration/test_demos.py +++ b/server/tests/integration/test_demos.py @@ -1,9 +1,10 @@ """Test demo endpoints""" + import pytest from httpx import AsyncClient -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_demo(async_client: AsyncClient): """Test /api/demo/ endpoints. Probably not worth it to check individual property values, but the Pydantic models diff --git a/server/tests/integration/test_lookup.py b/server/tests/integration/test_lookup.py index 35c959ab..0ecf52a3 100644 --- a/server/tests/integration/test_lookup.py +++ b/server/tests/integration/test_lookup.py @@ -1,9 +1,10 @@ """Test lookup endpoints""" + import pytest from httpx import AsyncClient -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_normalize_gene(async_client: AsyncClient): """Test /api/lookup/gene endpoint""" response = await async_client.get("/api/lookup/gene?term=NTRK1") diff --git a/server/tests/integration/test_main.py b/server/tests/integration/test_main.py index 0a692e2d..8aacae8d 100644 --- a/server/tests/integration/test_main.py +++ b/server/tests/integration/test_main.py @@ -1,10 +1,11 @@ """Test main service routes.""" + import re import pytest -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_service_info(async_client): """Test /service_info endpoint @@ -15,7 +16,7 @@ async def test_service_info(async_client): assert response.status_code == 200 response_json = response.json() assert response_json["warnings"] == [] - semver_pattern = r"^(?P0|[1-9]\d*)\.(?P0|[1-9]\d*)\.(?P0|[1-9]\d*)(?:-(?P(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\.(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\+(?P[0-9a-zA-Z-]+(?:\.[0-9a-zA-Z-]+)*))?$" # noqa: E501 + semver_pattern = r"^(?P0|[1-9]\d*)\.(?P0|[1-9]\d*)\.(?P0|[1-9]\d*)(?:-(?P(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\.(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\+(?P[0-9a-zA-Z-]+(?:\.[0-9a-zA-Z-]+)*))?$" assert re.match(semver_pattern, response_json["curfu_version"]) assert re.match(semver_pattern, response_json["fusor_version"]) assert re.match(semver_pattern, response_json["cool_seq_tool_version"]) diff --git a/server/tests/integration/test_nomenclature.py b/server/tests/integration/test_nomenclature.py index 782d538d..7577e668 100644 --- a/server/tests/integration/test_nomenclature.py +++ b/server/tests/integration/test_nomenclature.py @@ -1,5 +1,4 @@ """Test /nomenclature/ endpoints.""" -from typing import Dict import pytest from fusor.examples import bcr_abl1 @@ -133,9 +132,9 @@ def templated_sequence_element(): } -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_regulatory_element_nomenclature( - async_client: AsyncClient, regulatory_element: Dict + async_client: AsyncClient, regulatory_element: dict ): """Test correctness of regulatory element nomenclature endpoint.""" response = await async_client.post( @@ -145,13 +144,13 @@ async def test_regulatory_element_nomenclature( assert response.json().get("nomenclature", "") == "reg_p@G1(hgnc:9339)" -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_tx_segment_nomenclature( async_client: AsyncClient, - ntrk1_tx_element_start: Dict, - epcam_5_prime: Dict, - epcam_3_prime: Dict, - epcam_invalid: Dict, + ntrk1_tx_element_start: dict, + epcam_5_prime: dict, + epcam_3_prime: dict, + epcam_invalid: dict, ): """Test correctness of transcript segment nomenclature response.""" response = await async_client.post( @@ -178,13 +177,13 @@ async def test_tx_segment_nomenclature( ) assert response.status_code == 200 assert response.json().get("warnings", []) == [ - "1 validation error for TranscriptSegmentElement\ntranscript\n field required (type=value_error.missing)" # noqa: E501 + "1 validation error for TranscriptSegmentElement\ntranscript\n field required (type=value_error.missing)" ] -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_gene_element_nomenclature( - async_client: AsyncClient, alk_gene_element: Dict + async_client: AsyncClient, alk_gene_element: dict ): """Test correctness of gene element nomenclature endpoint.""" response = await async_client.post("/api/nomenclature/gene", json=alk_gene_element) @@ -197,13 +196,13 @@ async def test_gene_element_nomenclature( ) assert response.status_code == 200 assert response.json().get("warnings", []) == [ - "2 validation errors for GeneElement\ngene_descriptor\n field required (type=value_error.missing)\nassociated_gene\n extra fields not permitted (type=value_error.extra)" # noqa: E501 + "2 validation errors for GeneElement\ngene_descriptor\n field required (type=value_error.missing)\nassociated_gene\n extra fields not permitted (type=value_error.extra)" ] -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_templated_sequence_nomenclature( - async_client: AsyncClient, templated_sequence_element: Dict + async_client: AsyncClient, templated_sequence_element: dict ): """Test correctness of templated sequence element endpoint.""" response = await async_client.post( @@ -235,11 +234,11 @@ async def test_templated_sequence_nomenclature( ) assert response.status_code == 200 assert response.json().get("warnings", []) == [ - "1 validation error for TemplatedSequenceElement\nstrand\n field required (type=value_error.missing)" # noqa: E501 + "1 validation error for TemplatedSequenceElement\nstrand\n field required (type=value_error.missing)" ] -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_fusion_nomenclature(async_client: AsyncClient): """Test correctness of fusion nomneclature endpoint.""" response = await async_client.post("/api/nomenclature/fusion", json=bcr_abl1.dict()) diff --git a/server/tests/integration/test_utilities.py b/server/tests/integration/test_utilities.py index e0d3d0f6..522dcbe4 100644 --- a/server/tests/integration/test_utilities.py +++ b/server/tests/integration/test_utilities.py @@ -1,16 +1,17 @@ """Test end-to-end correctness of utility routes.""" -from typing import Callable, Dict + +from collections.abc import Callable import pytest -response_callback_type = Callable[[Dict, Dict], None] +response_callback_type = Callable[[dict, dict], None] -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_get_mane_transcript(check_response): """Test MANE transcript retrieval endpoint.""" - def check_mane_response(response: Dict, expected_response: Dict): + def check_mane_response(response: dict, expected_response: dict): assert ("transcripts" in response) == ("transcripts" in expected_response) if not (response.get("transcripts")) and not ( expected_response.get("transcripts") @@ -73,11 +74,11 @@ def check_mane_response(response: Dict, expected_response: Dict): ) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_get_genomic_coords(check_response): """Test coordinates utility endpoint using genomic coords.""" - def check_genomic_coords_response(response: Dict, expected_response: Dict): + def check_genomic_coords_response(response: dict, expected_response: dict): assert ("coordinates_data" in response) == ( "coordinates_data" in expected_response ) @@ -109,21 +110,21 @@ def check_genomic_coords_response(response: Dict, expected_response: Dict): ) await check_response( - "/api/utilities/get_genomic?transcript=NM_002529.3&exon_start=1&exon_end=6&gene=FAKE_GENE", # noqa: E501 + "/api/utilities/get_genomic?transcript=NM_002529.3&exon_start=1&exon_end=6&gene=FAKE_GENE", { "warnings": [ - "Unable to find a result where NM_002529.3 has transcript coordinates 0 and 268 between an exon's start and end coordinates on gene FAKE_GENE" # noqa: E501 + "Unable to find a result where NM_002529.3 has transcript coordinates 0 and 268 between an exon's start and end coordinates on gene FAKE_GENE" ] }, check_genomic_coords_response, ) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_get_exon_coords(check_response): """Test /utilities/get_exon endpoint""" - def check_coords_response(response: Dict, expected_response: Dict): + def check_coords_response(response: dict, expected_response: dict): """Provide to check_response to test specific response params""" assert ("coordinates_data" in response) == ( "coordinates_data" in expected_response @@ -135,7 +136,7 @@ def check_coords_response(response: Dict, expected_response: Dict): assert response["coordinates_data"] == expected_response["coordinates_data"] await check_response( - "/api/utilities/get_exon?chromosome=1&transcript=NM_152263.3&start=154192135&strand=-", # noqa: E501 + "/api/utilities/get_exon?chromosome=1&transcript=NM_152263.3&start=154192135&strand=-", { "coordinates_data": { "gene": "TPM3", @@ -165,18 +166,18 @@ def check_coords_response(response: Dict, expected_response: Dict): "/api/utilities/get_exon?chromosome=NC_000001.11&start=154192131&gene=TPM3", { "warnings": [ - "Unable to find mane data for NC_000001.11 with position 154192130 on gene TPM3" # noqa: E501 + "Unable to find mane data for NC_000001.11 with position 154192130 on gene TPM3" ] }, check_coords_response, ) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_get_sequence_id(check_response): """Test sequence ID lookup utility endpoint""" - def check_sequence_id_response(response: Dict, expected_response: Dict): + def check_sequence_id_response(response: dict, expected_response: dict): """Provide to check_response to test specific response params""" assert response["sequence"] == expected_response["sequence"] if response.get("ga4gh_id") or expected_response.get("ga4gh_id"): diff --git a/server/tests/integration/test_validate.py b/server/tests/integration/test_validate.py index b15605d0..7b18153c 100644 --- a/server/tests/integration/test_validate.py +++ b/server/tests/integration/test_validate.py @@ -1,5 +1,4 @@ """Test /validate endpoint.""" -from typing import Dict import pytest from httpx import AsyncClient @@ -199,13 +198,14 @@ def wrong_type_fusion(): } -async def check_validated_fusion_response(client, fixture: Dict, case_name: str): +async def check_validated_fusion_response(client, fixture: dict, case_name: str): """Run basic checks on fusion validation response. Todo: ---- * FUSOR should provide a "fusion equality" utility function -- incorporate it here when that's done + """ response = await client.post("/api/validate", json=fixture["input"]) @@ -219,7 +219,7 @@ async def check_validated_fusion_response(client, fixture: Dict, case_name: str) ), f"{case_name}: warnings incorrect" -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_validate_fusion( async_client: AsyncClient, alk_fusion,