diff --git a/.travis.yml b/.travis.yml index a4e6e03e..2782aba3 100644 --- a/.travis.yml +++ b/.travis.yml @@ -8,10 +8,8 @@ os: install: - pip install . - pip install -r requirements/requirements-dev.txt -# - pip install -r requirements/requirements-test.txt -#script: pytest --remote-data --cov=refgenconf -script: - - echo "skipping tests" + - pip install -r requirements/requirements-test.txt +script: pytest --remote-data --cov=refgenconf branches: only: - dev diff --git a/docs/changelog.md b/docs/changelog.md index b5b73ac7..d807f2e4 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -2,6 +2,16 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html) and [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) format. +## [0.4.0] - unreleased + +### Added +- `remove_assets` method +- local and remote listing restriction by genome. These methods accept an optional `genome` argument: + - `list_local` + - `list_remote` + - `assets_dict` + - `assets_str` + ## [0.3.0] - 2019-07-11 ### Changed - Favor asset path relative to genome config rather than local folder in case both exist. diff --git a/refgenconf/_version.py b/refgenconf/_version.py index 493f7415..6a9beea8 100644 --- a/refgenconf/_version.py +++ b/refgenconf/_version.py @@ -1 +1 @@ -__version__ = "0.3.0" +__version__ = "0.4.0" diff --git a/refgenconf/refgenconf.py b/refgenconf/refgenconf.py index 5de98cf9..fc25654a 100755 --- a/refgenconf/refgenconf.py +++ b/refgenconf/refgenconf.py @@ -13,7 +13,7 @@ install_aliases() from inspect import getargspec as finspect from urllib2 import HTTPError - from urllib import ContentTooShortError + from urllib.error import ContentTooShortError ConnectionRefusedError = Exception import urllib.request @@ -75,11 +75,22 @@ def __init__(self, entries=None): self[CFG_GENOMES_KEY] = PXAM() if CFG_FOLDER_KEY not in self: self[CFG_FOLDER_KEY] = os.path.dirname(entries) if isinstance(entries, str) else os.getcwd() - if CFG_VERSION_KEY in self and float(self[CFG_VERSION_KEY]) < REQ_CFG_VERSION: - msg = "This genome config (v{}) is not compliant with v{} standards. To use it, please downgrade " \ - "refgenie: 'pip install refgenie==0.4.4'.".format(self[CFG_VERSION_KEY], str(REQ_CFG_VERSION)) - raise ConfigNotCompliantError(msg) - _LOGGER.debug("Config version is correct: {}".format(self[CFG_VERSION_KEY])) + try: + version = self[CFG_VERSION_KEY] + except KeyError: + _LOGGER.warning("Config lacks version key: {}".format(CFG_VERSION_KEY)) + else: + try: + version = float(version) + except ValueError: + _LOGGER.warning("Cannot parse as numeric: {}".format(version)) + else: + if version < REQ_CFG_VERSION: + msg = "This genome config (v{}) is not compliant with v{} standards. To use it, please downgrade " \ + "refgenie: 'pip install refgenie==0.4.4'.".format(self[CFG_VERSION_KEY], str(REQ_CFG_VERSION)) + raise ConfigNotCompliantError(msg) + else: + _LOGGER.debug("Config version is compliant: {}".format(version)) try: self[CFG_SERVER_KEY] = self[CFG_SERVER_KEY].rstrip("/") except KeyError: @@ -91,18 +102,20 @@ def __bool__(self): __nonzero__ = __bool__ - def assets_dict(self, order=None): + def assets_dict(self, genome=None, order=None): """ Map each assembly name to a list of available asset names. - :param order: function(str) -> object how to key genome IDs for sort + :param function(str) -> object order: how to key genome IDs for sort + :param list[str] | str genome: genomes that the assets should be found for :return Mapping[str, Iterable[str]]: mapping from assembly name to collection of available asset names. """ - refgens = sorted(self[CFG_GENOMES_KEY].keys(), key=order) - return OrderedDict([(g, sorted(list(self[CFG_GENOMES_KEY][g][CFG_ASSETS_KEY].keys()), key=order)) for g in refgens]) + refgens = _select_genomes(sorted(self[CFG_GENOMES_KEY].keys(), key=order), genome) + return OrderedDict([(g, sorted(list(self[CFG_GENOMES_KEY][g][CFG_ASSETS_KEY].keys()), key=order)) + for g in refgens]) - def assets_str(self, offset_text=" ", asset_sep=", ", genome_assets_delim=": ", order=None): + def assets_str(self, offset_text=" ", asset_sep=", ", genome_assets_delim=": ", genome=None, order=None): """ Create a block of text representing genome-to-asset mapping. @@ -112,20 +125,21 @@ def assets_str(self, offset_text=" ", asset_sep=", ", genome_assets_delim=": ", within each genome line :param str genome_assets_delim: the delimiter to place between reference genome assembly name and its list of asset names + :param list[str] | str genome: genomes that the assets should be found for :param order: function(str) -> object how to key genome IDs and asset names for sort :return str: text representing genome-to-asset mapping """ - make_line = partial(_make_genome_assets_line, offset_text=offset_text, - genome_assets_delim=genome_assets_delim, asset_sep=asset_sep, order=order) - refgens = sorted(self[CFG_GENOMES_KEY].keys(), key=order) + refgens = _select_genomes(sorted(self[CFG_GENOMES_KEY].keys(), key=order), genome) + make_line = partial(_make_genome_assets_line, offset_text=offset_text, genome_assets_delim=genome_assets_delim, + asset_sep=asset_sep, order=order) return "\n".join([make_line(g, self[CFG_GENOMES_KEY][g][CFG_ASSETS_KEY]) for g in refgens]) def filepath(self, genome, asset, ext=".tar"): """ Determine path to a particular asset for a particular genome. - :param str genome: reference genome iD + :param str genome: reference genome ID :param str asset: asset name :param str ext: file extension :return str: path to asset for given genome and asset kind/name @@ -213,8 +227,7 @@ def list_assets_by_genome(self, genome=None, order=None): one is provided, else the full mapping between assembly ID and collection available asset type names """ - return self.assets_dict(order) if genome is None \ - else sorted(list(self[CFG_GENOMES_KEY][genome][CFG_ASSETS_KEY].keys()), key=order) + return self.assets_dict(genome, order)[genome] if genome is not None else self.assets_dict(order) def list_genomes_by_asset(self, asset=None, order=None): """ @@ -230,32 +243,36 @@ def list_genomes_by_asset(self, asset=None, order=None): will be returned. """ return self._invert_genomes(order) if not asset else \ - sorted([g for g, am in self[CFG_GENOMES_KEY].items() if asset in am], key=order) + sorted([g for g, data in self[CFG_GENOMES_KEY].items() + if asset in data.get(CFG_ASSETS_KEY)], key=order) - def list_local(self, order=None): + def list_local(self, genome=None, order=None): """ List locally available reference genome IDs and assets by ID. + :param list[str] | str genome: genomes that the assets should be found for :param order: function(str) -> object how to key genome IDs and asset names for sort :return str, str: text reps of locally available genomes and assets """ - return self.genomes_str(order=order), self.assets_str(order=order) + genomes_str = self.genomes_str(order=order) if genome is None \ + else ", ".join(_select_genomes(sorted(self[CFG_GENOMES_KEY].keys(), key=order), genome)) + return genomes_str, self.assets_str(genome=genome, order=order) - def list_remote(self, get_url=lambda rgc: "{}/assets".format(rgc.genome_server), - order=None): + def list_remote(self, get_url=lambda rgc: "{}/assets".format(rgc.genome_server), genome=None, order=None): """ List genomes and assets available remotely. :param function(refgenconf.RefGenConf) -> str get_url: how to determine URL request, given RefGenConf instance + :param list[str] | str genome: genomes that the assets should be found for :param order: function(str) -> object how to key genome IDs and asset names for sort :return str, str: text reps of remotely available genomes and assets """ url = get_url(self) _LOGGER.info("Querying available assets from server: {}".format(url)) - genomes, assets = _list_remote(url, order) + genomes, assets = _list_remote(url, genome, order) return genomes, assets def pull_asset(self, genome, assets, genome_config, unpack=True, force=None, @@ -318,6 +335,7 @@ def raise_unpack_error(): def preserve(): _LOGGER.debug("Preserving existing: {}".format(filepath)) return asset, filepath + def msg_overwrite(): _LOGGER.debug("Overwriting: {}".format(filepath)) if force is False: @@ -404,6 +422,28 @@ def update_assets(self, genome, asset=None, data=None): self[CFG_GENOMES_KEY][genome][CFG_ASSETS_KEY][asset].update(data) return self + def remove_assets(self, genome, assets): + """ + Remove assets. If no more assets are defined for the selected genome after asset removal, + the genome key will be removed as well + + :param str genome: genome to be removed + :param str | list[str] assets: assets to be removed + :raise TypeError: if genome argument type is not a list or str + :return RefGenConf: updated object + """ + assets = [assets] if isinstance(assets, str) else assets + if not isinstance(assets, list): + raise TypeError("assets arg has to be a str or list[str]") + for asset in assets: + if _check_insert_data(genome, str, "genome"): + self[CFG_GENOMES_KEY].setdefault(genome, PXAM({CFG_ASSETS_KEY: PXAM()})) + if _check_insert_data(asset, str, "asset"): + del self[CFG_GENOMES_KEY][genome][CFG_ASSETS_KEY][asset] + if len(self[CFG_GENOMES_KEY][genome][CFG_ASSETS_KEY]) == 0: + del self[CFG_GENOMES_KEY][genome] + return self + def update_genomes(self, genome, data=None): """ Updates the genomes in RefGenConf object at any level. @@ -534,11 +574,10 @@ def _is_large_archive(size): :return bool: the decision """ _LOGGER.debug("Checking archive size: '{}'".format(size)) - return size.endswith("TB") or ( - size.endswith("GB") and float("".join(c for c in size if c in '0123456789.')) > 5) + return size.endswith("TB") or (size.endswith("GB") and float("".join(c for c in size if c in '0123456789.')) > 5) -def _list_remote(url, order=None): +def _list_remote(url, genome, order=None): """ List genomes and assets available remotely. @@ -548,13 +587,13 @@ def _list_remote(url, order=None): :return str, str: text reps of remotely available genomes and assets """ genomes_data = _read_remote_data(url) - refgens = sorted(genomes_data.keys(), key=order) - return ", ".join(refgens), "\n".join([_make_genome_assets_line(g, genomes_data[g], order=order) for g in refgens]) + refgens = _select_genomes(sorted(genomes_data.keys(), key=order), genome) + asset_texts = [_make_genome_assets_line(g, genomes_data[g], order=order) for g in refgens] + return ", ".join(refgens), "\n".join(asset_texts) def _make_genome_assets_line( - gen, assets, offset_text=" ", genome_assets_delim=": ", asset_sep=", ", - order=None): + gen, assets, offset_text=" ", genome_assets_delim=": ", asset_sep=", ", order=None): """ Build a line of text for display of assets by genome @@ -567,8 +606,7 @@ def _make_genome_assets_line( :param order: function(str) -> object how to key asset names for sort :return str: text representation of a single assembly's name and assets """ - return offset_text + "{}{}{}".format( - gen, genome_assets_delim, asset_sep.join(sorted(list(assets), key=order))) + return offset_text + "{}{}{}".format(gen, genome_assets_delim, asset_sep.join(sorted(list(assets), key=order))) def _read_remote_data(url): @@ -604,3 +642,19 @@ def _check_insert_data(obj, datatype, name): raise TypeError("{} must be {}; got {}".format( name, datatype.__name__, type(obj).__name__)) return True + + +def _select_genomes(genomes, genome=None): + """ + Safely select a subset of genomes + + :param list[str] | str genome: genomes that the assets should be found for + :raise TypeError: if genome argument type is not a list or str + :return list: selected subset of genomes + """ + if genome: + if isinstance(genome, str): + genome = [genome] + elif not isinstance(genome, list) or not all(isinstance(i, str) for i in genome): + raise TypeError("genome has to be a list[str] or a str, got '{}'".format(genome.__class__.__name__)) + return genomes if (genome is None or not all(x in genomes for x in genome)) else genome \ No newline at end of file diff --git a/requirements/requirements-dev.txt b/requirements/requirements-dev.txt index ada12cdb..38625f3e 100644 --- a/requirements/requirements-dev.txt +++ b/requirements/requirements-dev.txt @@ -1,5 +1,5 @@ -#pytest>=3.0.7 -#pytest-remotedata +pytest>=3.0.7 +pytest-remotedata pyyaml>=5 ubiquerg>=0.3 -#veracitools +veracitools diff --git a/requirements/requirements-test.txt b/requirements/requirements-test.txt index 9f0c760d..1371faf1 100644 --- a/requirements/requirements-test.txt +++ b/requirements/requirements-test.txt @@ -1,2 +1,3 @@ coveralls>=1.1 pytest-cov==2.6.1 +pytest-remotedata diff --git a/tests/conftest.py b/tests/conftest.py index 72c0401a..6527b64a 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,115 +1,123 @@ -# """ Test suite shared objects and setup """ -# -# import os -# import random -# import shutil -# import string -# import pytest -# import yaml -# from attmap import PathExAttMap -# from refgenconf import RefGenConf -# from refgenconf.const import * -# -# __author__ = "Vince Reuter" -# __email__ = "vreuter@virginia.edu" -# -# -# IDX_BT2_VAL = "indexed_bowtie2" -# HG38_DATA = [ -# ("bowtie2", IDX_BT2_VAL), ("hisat2", "indexed_hisat2"), -# ("tss_annotation", "TSS.bed.gz"), ("gtf", "blah.gtf")] -# MM10_DATA = [("bowtie2", IDX_BT2_VAL), ("blacklist", "blacklist/mm10.bed")] -# MITO_DATA = [("bowtie2", IDX_BT2_VAL), ("bowtie", "indexed_bowtie")] -# -# -# REMOTE_ASSETS = { -# "mm10": {"bowtie2": ".tar", "kallisto": ".tar"}, -# "hg38": {"bowtie2": ".tar", "epilog": ".tgz", "kallisto": ".tar"}} -# REQUESTS = [(g, a) for g, ext_by_asset in REMOTE_ASSETS.items() -# for a in ext_by_asset] -# URL_BASE = "https://raw.githubusercontent.com/databio/refgenieserver/master/files" -# -# -# def _bind_to_path(kvs): -# return [(k, lift_into_path_pair(v)) for k, v in kvs] -# -# -# def lift_into_path_pair(name): -# return {"path": name} -# -# -# CONF_DATA = [(g, PathExAttMap(_bind_to_path(data))) for g, data in -# [("hg38", HG38_DATA), ("mm10", MM10_DATA), ("rCRSd", MITO_DATA)]] -# -# -# def get_conf_genomes(): -# """ -# Get the collection of reference genome assembly names used in test data. -# -# :return list[str]: collection of test data reference genome assembly names -# """ -# return list(list(zip(*CONF_DATA))[0]) -# -# -# @pytest.fixture -# def gencfg(temp_genome_config_file): -# """ Provide test case with copied version of test session's genome config. """ -# fn = "".join(random.choice(string.ascii_letters) for _ in range(15)) + ".yaml" -# fp = os.path.join(os.path.dirname(temp_genome_config_file), fn) -# assert not os.path.exists(fp) -# shutil.copy(temp_genome_config_file, fp) -# assert os.path.isfile(fp) -# return fp -# -# -# def get_get_url(genome, asset, base=URL_BASE): -# """ -# Create 3-arg function that determines URL from genome and asset names. -# -# :param str genome: the reference genome assembly ID, e.g. mm10 -# :param str asset: the name of the asset to use in the URL, e.g. bowtie2 -# :param str base: the base of the URL to create -# :return function(object, str, str): function with which to build URL -# based on reference genome assembly ID, asset name, and one unused -# positional argument -# """ -# return (lambda _, g, a: "{base}/{g}/{fn}".format( -# base=base, g=genome, fn=a + REMOTE_ASSETS[g][asset])) -# -# -# @pytest.fixture(scope="session") -# def made_genome_config_file(temp_genome_config_file): -# """ Make the test session's genome config file. """ -# genome_folder = os.path.dirname(temp_genome_config_file) -# extra_kv_lines = ["{}: {}".format(CFG_FOLDER_KEY, genome_folder), -# "{}: {}".format(CFG_SERVER_KEY, DEFAULT_SERVER), -# "{}:".format(CFG_GENOMES_KEY)] -# gen_data_lines = PathExAttMap(CONF_DATA).get_yaml_lines() -# fp = temp_genome_config_file -# with open(fp, 'w') as f: -# f.write("\n".join(extra_kv_lines + [" " + l for l in gen_data_lines])) -# return fp -# -# -# @pytest.fixture -# def rgc(made_genome_config_file): -# """ Provide test case with a genome config instance. """ -# with open(made_genome_config_file, 'r') as f: -# return RefGenConf(yaml.load(f, yaml.SafeLoader)) -# -# -# @pytest.fixture -# def remove_genome_folder(request): -# """ Remove a test case's folder for a particular genome. """ -# folder = request.getfixturevalue("rgc").genome_folder -# genome = request.getfixturevalue("genome") -# path = os.path.join(folder, genome) -# yield -# if os.path.exists(path): -# shutil.rmtree(path) -# -# -# @pytest.fixture(scope="session") -# def temp_genome_config_file(tmpdir_factory): -# """ The genome configuration file for the test suite. """ -# return tmpdir_factory.mktemp("data").join("refgenie.yaml").strpath +""" Test suite shared objects and setup """ + +import os +import random +import shutil +import string +import pytest +import yaml +from attmap import PathExAttMap +from refgenconf import __version__ as package_version +from refgenconf import RefGenConf +from refgenconf.const import * + +__author__ = "Vince Reuter" +__email__ = "vreuter@virginia.edu" + + +IDX_BT2_VAL = "indexed_bowtie2" +HG38_DATA = [ + ("bowtie2", IDX_BT2_VAL), ("hisat2", "indexed_hisat2"), + ("tss_annotation", "TSS.bed.gz"), ("gtf", "blah.gtf")] +MM10_DATA = [("bowtie2", IDX_BT2_VAL), ("blacklist", "blacklist/mm10.bed")] +MITO_DATA = [("bowtie2", IDX_BT2_VAL), ("bowtie", "indexed_bowtie")] + + +REMOTE_ASSETS = { + "mm10": {"bowtie2": ".tar", "kallisto": ".tar"}, + "hg38": {"bowtie2": ".tar", "epilog": ".tgz", "kallisto": ".tar"}} +REQUESTS = [(g, a) for g, ext_by_asset in REMOTE_ASSETS.items() + for a in ext_by_asset] +URL_BASE = "https://raw.githubusercontent.com/databio/refgenieserver/master/files" + + +def _bind_to_path(kvs): + return [(k, lift_into_path_pair(v)) for k, v in kvs] + + +def lift_into_path_pair(name): + return {"path": name} + + +CONF_DATA = [ + (g, {CFG_ASSETS_KEY: PathExAttMap(_bind_to_path(data))}) for g, data + in [("hg38", HG38_DATA), ("mm10", MM10_DATA), ("rCRSd", MITO_DATA)] +] + + +def bind_to_assets(data): + return {CFG_ASSETS_KEY: data} + + +def get_conf_genomes(): + """ + Get the collection of reference genome assembly names used in test data. + + :return list[str]: collection of test data reference genome assembly names + """ + return list(list(zip(*CONF_DATA))[0]) + + +@pytest.fixture +def gencfg(temp_genome_config_file): + """ Provide test case with copied version of test session's genome config. """ + fn = "".join(random.choice(string.ascii_letters) for _ in range(15)) + ".yaml" + fp = os.path.join(os.path.dirname(temp_genome_config_file), fn) + assert not os.path.exists(fp) + shutil.copy(temp_genome_config_file, fp) + assert os.path.isfile(fp) + return fp + + +def get_get_url(genome, asset, base=URL_BASE): + """ + Create 3-arg function that determines URL from genome and asset names. + + :param str genome: the reference genome assembly ID, e.g. mm10 + :param str asset: the name of the asset to use in the URL, e.g. bowtie2 + :param str base: the base of the URL to create + :return function(object, str, str): function with which to build URL + based on reference genome assembly ID, asset name, and one unused + positional argument + """ + return (lambda _, g, a: "{base}/{g}/{fn}".format( + base=base, g=genome, fn=a + REMOTE_ASSETS[g][asset])) + + +@pytest.fixture(scope="session") +def made_genome_config_file(temp_genome_config_file): + """ Make the test session's genome config file. """ + genome_folder = os.path.dirname(temp_genome_config_file) + extra_kv_lines = ["{}: {}".format(CFG_FOLDER_KEY, genome_folder), + "{}: {}".format(CFG_SERVER_KEY, DEFAULT_SERVER), + "{}: {}".format(CFG_VERSION_KEY, package_version), + "{}:".format(CFG_GENOMES_KEY)] + gen_data_lines = PathExAttMap(CONF_DATA).get_yaml_lines() + fp = temp_genome_config_file + with open(fp, 'w') as f: + f.write("\n".join(extra_kv_lines + [" " + l for l in gen_data_lines])) + return fp + + +@pytest.fixture +def rgc(made_genome_config_file): + """ Provide test case with a genome config instance. """ + with open(made_genome_config_file, 'r') as f: + return RefGenConf(yaml.load(f, yaml.SafeLoader)) + + +@pytest.fixture +def remove_genome_folder(request): + """ Remove a test case's folder for a particular genome. """ + folder = request.getfixturevalue("rgc").genome_folder + genome = request.getfixturevalue("genome") + path = os.path.join(folder, genome) + yield + if os.path.exists(path): + shutil.rmtree(path) + + +@pytest.fixture(scope="session") +def temp_genome_config_file(tmpdir_factory): + """ The genome configuration file for the test suite. """ + return tmpdir_factory.mktemp("data").join("refgenie.yaml").strpath diff --git a/tests/test_assets_basic.py b/tests/test_assets_basic.py index 01fbdc2c..dee0dcde 100644 --- a/tests/test_assets_basic.py +++ b/tests/test_assets_basic.py @@ -1,71 +1,73 @@ -# """ Basic RGC asset tests """ -# -# from collections import OrderedDict -# from operator import itemgetter -# import pytest -# from tests.conftest import CONF_DATA, HG38_DATA, MM10_DATA, MITO_DATA -# -# __author__ = "Vince Reuter" -# __email__ = "vreuter@virginia.edu" -# -# -# BT2_EXP = ["hg38", "mm10", "rCRSd"] -# BT1_EXP = ["rCRSd"] -# HISAT2_EXP = ["hg38"] -# BLACKLIST_EXP = ["mm10"] -# TSS_EXP = ["hg38"] -# GTF_EXP = ["hg38"] -# SORT_CONF_DATA = [(g, sorted(assets.keys())) for g, assets in -# sorted(CONF_DATA, key=itemgetter(0))] -# -# -# def _ord_exp_map(m): -# return OrderedDict([(k, sorted(m[k])) for k in sorted(m.keys())]) -# -# -# def test_assets_dict(rgc): -# """ Verify mapping of genome name to assets key-value collection. """ -# exp = _ord_exp_map({g: list(am.keys()) for g, am in CONF_DATA}) -# assert exp == rgc.assets_dict() -# -# -# @pytest.mark.parametrize( -# ["kwargs", "expected"], -# [({}, "\n".join(" " + "{}: {}".format(g, ", ".join(assets)) -# for g, assets in SORT_CONF_DATA)), -# ({"offset_text": ""}, -# "\n".join("{}: {}".format(g, ", ".join(assets)) -# for g, assets in SORT_CONF_DATA)), -# ({"asset_sep": ","}, -# "\n".join(" " + "{}: {}".format(g, ",".join(assets)) -# for g, assets in SORT_CONF_DATA)), -# ({"genome_assets_delim": " -- "}, -# "\n".join(" " + "{} -- {}".format(g, ", ".join(assets)) -# for g, assets in SORT_CONF_DATA))]) -# def test_assets_str(rgc, kwargs, expected): -# """ Verify text representation of the configuration instance's assets. """ -# print("kwargs: {}".format(kwargs)) -# assert expected == rgc.assets_str(**kwargs) -# -# -# @pytest.mark.parametrize(["gname", "expected"], [ -# ("hg38", sorted([a for a, _ in HG38_DATA])), -# ("mm10", sorted([a for a, _ in MM10_DATA])), -# ("rCRSd", sorted([a for a, _ in MITO_DATA])), -# (None, _ord_exp_map({g: list(assets.keys()) for g, assets in CONF_DATA})) -# ]) -# def test_list_assets_by_genome(rgc, gname, expected): -# """ Verify listing of asset name/key/type, possible for one/all genomes. """ -# assert expected == rgc.list_assets_by_genome(gname) -# -# -# @pytest.mark.parametrize(["asset", "expected"], [ -# (None, {"bowtie2": BT2_EXP, "bowtie": BT1_EXP, -# "hisat2": HISAT2_EXP, "blacklist": BLACKLIST_EXP, -# "tss_annotation": TSS_EXP, "gtf": GTF_EXP}), -# ("bowtie2", BT2_EXP), ("bowtie", BT1_EXP), ("hisat2", HISAT2_EXP), -# ("gtf", GTF_EXP), ("tss_annotation", TSS_EXP) -# ]) -# def test_list_genomes_by_asset(rgc, asset, expected): -# """ Veerify listing of genomes by asset name/key/type. """ -# assert expected == rgc.list_genomes_by_asset(asset) +""" Basic RGC asset tests """ + +from collections import OrderedDict +from operator import itemgetter +import pytest +from refgenconf import CFG_ASSETS_KEY +from tests.conftest import CONF_DATA, HG38_DATA, MM10_DATA, MITO_DATA + +__author__ = "Vince Reuter" +__email__ = "vreuter@virginia.edu" + + +BT2_EXP = ["hg38", "mm10", "rCRSd"] +BT1_EXP = ["rCRSd"] +HISAT2_EXP = ["hg38"] +BLACKLIST_EXP = ["mm10"] +TSS_EXP = ["hg38"] +GTF_EXP = ["hg38"] +SORT_CONF_DATA = [(g, sorted(data[CFG_ASSETS_KEY].keys())) for g, data in + sorted(CONF_DATA, key=itemgetter(0))] + + +def _ord_exp_map(m): + return OrderedDict([(k, sorted(m[k])) for k in sorted(m.keys())]) + + +def test_assets_dict(rgc): + """ Verify mapping of genome name to assets key-value collection. """ + exp = _ord_exp_map({g: list(data[CFG_ASSETS_KEY].keys()) for g, data in CONF_DATA}) + assert exp == rgc.assets_dict() + + +@pytest.mark.parametrize( + ["kwargs", "expected"], + [({}, "\n".join(" " + "{}: {}".format(g, ", ".join(assets)) + for g, assets in SORT_CONF_DATA)), + ({"offset_text": ""}, + "\n".join("{}: {}".format(g, ", ".join(assets)) + for g, assets in SORT_CONF_DATA)), + ({"asset_sep": ","}, + "\n".join(" " + "{}: {}".format(g, ",".join(assets)) + for g, assets in SORT_CONF_DATA)), + ({"genome_assets_delim": " -- "}, + "\n".join(" " + "{} -- {}".format(g, ", ".join(assets)) + for g, assets in SORT_CONF_DATA))]) +def test_assets_str(rgc, kwargs, expected): + """ Verify text representation of the configuration instance's assets. """ + print("kwargs: {}".format(kwargs)) + assert expected == rgc.assets_str(**kwargs) + + +@pytest.mark.parametrize(["gname", "expected"], [ + ("hg38", sorted([a for a, _ in HG38_DATA])), + ("mm10", sorted([a for a, _ in MM10_DATA])), + ("rCRSd", sorted([a for a, _ in MITO_DATA])), + (None, _ord_exp_map({g: list(data[CFG_ASSETS_KEY].keys()) + for g, data in CONF_DATA})) +]) +def test_list_assets_by_genome(rgc, gname, expected): + """ Verify listing of asset name/key/type, possible for one/all genomes. """ + assert expected == rgc.list_assets_by_genome(gname) + + +@pytest.mark.parametrize(["asset", "expected"], [ + (None, {"bowtie2": BT2_EXP, "bowtie": BT1_EXP, + "hisat2": HISAT2_EXP, "blacklist": BLACKLIST_EXP, + "tss_annotation": TSS_EXP, "gtf": GTF_EXP}), + ("bowtie2", BT2_EXP), ("bowtie", BT1_EXP), ("hisat2", HISAT2_EXP), + ("gtf", GTF_EXP), ("tss_annotation", TSS_EXP) +]) +def test_list_genomes_by_asset(rgc, asset, expected): + """ Veerify listing of genomes by asset name/key/type. """ + assert expected == rgc.list_genomes_by_asset(asset) diff --git a/tests/test_config_constructor.py b/tests/test_config_constructor.py index a15f919c..8b0b0aa5 100644 --- a/tests/test_config_constructor.py +++ b/tests/test_config_constructor.py @@ -1,78 +1,78 @@ -# """ Tests for basic functionality of the RefGenConf constructor """ -# -# import os -# import pytest -# from attmap import PathExAttMap -# from refgenconf import RefGenConf, MissingConfigDataError -# from refgenconf.const import CFG_FOLDER_KEY, CFG_GENOMES_KEY, CFG_SERVER_KEY, \ -# DEFAULT_SERVER -# -# __author__ = "Vince Reuter" -# __email__ = "vreuter@virginia.edu" -# -# -# @pytest.mark.parametrize("present", [[], [(CFG_FOLDER_KEY, lambda d: d.strpath)]]) -# def test_missing_server_key(tmpdir, present): -# """ Omission of required config items causes expected exception """ -# data = {k: f(tmpdir) for k, f in present} -# with pytest.raises(MissingConfigDataError): -# RefGenConf(data) -# -# -# def test_genome_folder_is_pwd_if_no_folder_key_and_raw_entries_passed(rgc): -# data = PathExAttMap({k: v for k, v in rgc.items() if k != CFG_FOLDER_KEY}) -# new_rgc = RefGenConf(data) -# assert os.getcwd() == new_rgc[CFG_FOLDER_KEY] -# -# -# def test_genome_folder_is_config_file_folder_if_no_key_present( -# tmpdir, made_genome_config_file): -# conf_file = tmpdir.join("newconf.yaml").strpath -# assert not os.path.exists(conf_file) -# with open(conf_file, 'w') as fout, open(made_genome_config_file, 'r') as fin: -# for l in fin: -# if not l.startswith(CFG_FOLDER_KEY): -# fout.write(l) -# new_rgc = RefGenConf(conf_file) -# assert os.path.dirname(conf_file) == new_rgc[CFG_FOLDER_KEY] -# -# -# def test_genome_folder_is_value_from_config_file_if_key_present( -# tmpdir_factory, tmpdir, made_genome_config_file): -# conf_file = tmpdir_factory.mktemp("data2").join("refgenie.yaml").strpath -# expected = tmpdir.strpath -# with open(made_genome_config_file, 'r') as fin, open(conf_file, 'w') as fout: -# found = False -# for l in fin: -# if l.startswith(CFG_FOLDER_KEY): -# fout.write("{}: {}\n".format(CFG_FOLDER_KEY, expected)) -# else: -# fout.write(l) -# if l.startswith(CFG_SERVER_KEY): -# found = True -# if not found: -# fout.write("{}: {}".format(CFG_SERVER_KEY, DEFAULT_SERVER)) -# rgc = RefGenConf(conf_file) -# assert expected != os.path.dirname(conf_file) -# assert expected == rgc[CFG_FOLDER_KEY] -# -# -# def test_empty_rgc_is_false(): -# assert bool(RefGenConf({CFG_SERVER_KEY: DEFAULT_SERVER})) is False -# -# -# def test_nonempty_rgc_is_true(rgc): -# assert bool(rgc) is True -# -# -# @pytest.mark.parametrize( -# "genomes", [None, "genomes", 10] + [dt(["mm10", "hg38"]) for dt in [list, set, tuple]]) -# def test_illegal_genomes_mapping_type_gets_converted_to_empty_mapping(genomes, tmpdir): -# rgc = RefGenConf({ -# CFG_FOLDER_KEY: tmpdir.strpath, -# CFG_GENOMES_KEY: genomes, -# CFG_SERVER_KEY: DEFAULT_SERVER -# }) -# res = rgc[CFG_GENOMES_KEY] -# assert isinstance(res, PathExAttMap) -# assert 0 == len(res) +""" Tests for basic functionality of the RefGenConf constructor """ + +import os +import pytest +from attmap import PathExAttMap +from refgenconf import RefGenConf, MissingConfigDataError +from refgenconf.const import CFG_FOLDER_KEY, CFG_GENOMES_KEY, CFG_SERVER_KEY, \ + DEFAULT_SERVER + +__author__ = "Vince Reuter" +__email__ = "vreuter@virginia.edu" + + +@pytest.mark.parametrize("present", [[], [(CFG_FOLDER_KEY, lambda d: d.strpath)]]) +def test_missing_server_key(tmpdir, present): + """ Omission of required config items causes expected exception """ + data = {k: f(tmpdir) for k, f in present} + with pytest.raises(MissingConfigDataError): + RefGenConf(data) + + +def test_genome_folder_is_pwd_if_no_folder_key_and_raw_entries_passed(rgc): + data = PathExAttMap({k: v for k, v in rgc.items() if k != CFG_FOLDER_KEY}) + new_rgc = RefGenConf(data) + assert os.getcwd() == new_rgc[CFG_FOLDER_KEY] + + +def test_genome_folder_is_config_file_folder_if_no_key_present( + tmpdir, made_genome_config_file): + conf_file = tmpdir.join("newconf.yaml").strpath + assert not os.path.exists(conf_file) + with open(conf_file, 'w') as fout, open(made_genome_config_file, 'r') as fin: + for l in fin: + if not l.startswith(CFG_FOLDER_KEY): + fout.write(l) + new_rgc = RefGenConf(conf_file) + assert os.path.dirname(conf_file) == new_rgc[CFG_FOLDER_KEY] + + +def test_genome_folder_is_value_from_config_file_if_key_present( + tmpdir_factory, tmpdir, made_genome_config_file): + conf_file = tmpdir_factory.mktemp("data2").join("refgenie.yaml").strpath + expected = tmpdir.strpath + with open(made_genome_config_file, 'r') as fin, open(conf_file, 'w') as fout: + found = False + for l in fin: + if l.startswith(CFG_FOLDER_KEY): + fout.write("{}: {}\n".format(CFG_FOLDER_KEY, expected)) + else: + fout.write(l) + if l.startswith(CFG_SERVER_KEY): + found = True + if not found: + fout.write("{}: {}".format(CFG_SERVER_KEY, DEFAULT_SERVER)) + rgc = RefGenConf(conf_file) + assert expected != os.path.dirname(conf_file) + assert expected == rgc[CFG_FOLDER_KEY] + + +def test_empty_rgc_is_false(): + assert bool(RefGenConf({CFG_SERVER_KEY: DEFAULT_SERVER})) is False + + +def test_nonempty_rgc_is_true(rgc): + assert bool(rgc) is True + + +@pytest.mark.parametrize( + "genomes", [None, "genomes", 10] + [dt(["mm10", "hg38"]) for dt in [list, set, tuple]]) +def test_illegal_genomes_mapping_type_gets_converted_to_empty_mapping(genomes, tmpdir): + rgc = RefGenConf({ + CFG_FOLDER_KEY: tmpdir.strpath, + CFG_GENOMES_KEY: genomes, + CFG_SERVER_KEY: DEFAULT_SERVER + }) + res = rgc[CFG_GENOMES_KEY] + assert isinstance(res, PathExAttMap) + assert 0 == len(res) diff --git a/tests/test_config_unbound_env_vars.py b/tests/test_config_unbound_env_vars.py index 0e285533..6008ca9f 100644 --- a/tests/test_config_unbound_env_vars.py +++ b/tests/test_config_unbound_env_vars.py @@ -1,34 +1,34 @@ -# """ Tests regarding unboudn environment variables in a genome config file. """ -# -# import os -# import pytest -# from refgenconf import CFG_FOLDER_KEY, UnboundEnvironmentVariablesError as UEVErr -# from tests.conftest import get_get_url, REQUESTS -# -# __author__ = "Vince Reuter" -# __email__ = "vreuter@virginia.edu" -# -# -# @pytest.mark.parametrize(["genome", "asset"], REQUESTS) -# @pytest.mark.parametrize("evs", [["NOT_A_VAR"], ["NOT_A_VAR", "RANDNAME"]]) -# def test_missing_env_vars_in_genome_config_path_raises_exception( -# rgc, tmpdir, evs, genome, asset, gencfg, remove_genome_folder): -# """ Unbound env var(s) in genome folder path cause error. """ -# assert all(_is_unbound(v) for v in evs) -# path_parts = ["$" + v for v in [tmpdir.strpath] + evs] -# path = os.path.join(*path_parts) -# print("Genome folder path: {}".format(path)) -# rgc[CFG_FOLDER_KEY] = path -# assert path == rgc[CFG_FOLDER_KEY] -# assert not os.path.exists(path) -# with pytest.raises(UEVErr) as err_ctx: -# rgc.pull_asset(genome, asset, gencfg, -# get_main_url=get_get_url(genome, asset)) -# err_msg = str(err_ctx.value) -# print("Observed error message: {}".format(err_msg)) -# missing = [v for v in evs if v not in err_msg] -# assert [] == missing -# -# -# def _is_unbound(ev): -# return os.getenv(ev) is None and ev not in os.environ +""" Tests regarding unboudn environment variables in a genome config file. """ + +import os +import pytest +from refgenconf import CFG_FOLDER_KEY, UnboundEnvironmentVariablesError as UEVErr +from tests.conftest import get_get_url, REQUESTS + +__author__ = "Vince Reuter" +__email__ = "vreuter@virginia.edu" + + +@pytest.mark.parametrize(["genome", "asset"], REQUESTS) +@pytest.mark.parametrize("evs", [["NOT_A_VAR"], ["NOT_A_VAR", "RANDNAME"]]) +def test_missing_env_vars_in_genome_config_path_raises_exception( + rgc, tmpdir, evs, genome, asset, gencfg, remove_genome_folder): + """ Unbound env var(s) in genome folder path cause error. """ + assert all(_is_unbound(v) for v in evs) + path_parts = ["$" + v for v in [tmpdir.strpath] + evs] + path = os.path.join(*path_parts) + print("Genome folder path: {}".format(path)) + rgc[CFG_FOLDER_KEY] = path + assert path == rgc[CFG_FOLDER_KEY] + assert not os.path.exists(path) + with pytest.raises(UEVErr) as err_ctx: + rgc.pull_asset(genome, asset, gencfg, + get_main_url=get_get_url(genome, asset)) + err_msg = str(err_ctx.value) + print("Observed error message: {}".format(err_msg)) + missing = [v for v in evs if v not in err_msg] + assert [] == missing + + +def _is_unbound(ev): + return os.getenv(ev) is None and ev not in os.environ diff --git a/tests/test_genome_config_format_error.py b/tests/test_genome_config_format_error.py index 8a10d9ea..f1b3b3dc 100644 --- a/tests/test_genome_config_format_error.py +++ b/tests/test_genome_config_format_error.py @@ -1,51 +1,53 @@ -# """ Tests for genome config format exception """ -# -# import pytest -# from refgenconf import * -# from refgenconf.exceptions import DOC_URL -# from ubiquerg import powerset -# -# __author__ = "Vince Reuter" -# __email__ = "vreuter@virginia.edu" -# -# -# FIXED_KV_PAIRS = [ -# (CFG_ASSET_SIZE_KEY, "1G"), (CFG_ARCHIVE_SIZE_KEY, "2G"), -# (CFG_CHECKSUM_KEY, "dummy-checksum")] -# -# -# @pytest.fixture -# def base_rgc_data(tmpdir): -# return {CFG_FOLDER_KEY: tmpdir.strpath, CFG_SERVER_KEY: DEFAULT_SERVER} -# -# -# @pytest.fixture -# def rgc(base_rgc_data): -# return RefGenConf(base_rgc_data) -# -# -# @pytest.mark.parametrize( -# ["msg", "exp"], [(".", ". "), ("?", "? "), ("a", "a; ")]) -# @pytest.mark.parametrize( -# "check", [lambda m, e: m.startswith(e), lambda m, _: m.endswith(DOC_URL)]) -# def test_config_format_error_message_formatting(msg, exp, check): -# """ Check config format error message formatting and docs URL inclusion. """ -# msg = str(GenomeConfigFormatError(msg)) -# assert check(msg, exp) -# -# -# @pytest.mark.parametrize("genome", ["dm3", "mm10", "hg38"]) -# @pytest.mark.parametrize("asset", ["bowtie2_index", "chrom_sizes", "epilog"]) -# @pytest.mark.parametrize( -# ["data", "message_content"], -# [("just_text_no_path", "has raw string value")] + -# [(dict(c),"lacks a '{}' entry".format(CFG_ASSET_PATH_KEY)) -# for c in powerset(FIXED_KV_PAIRS, nonempty=True)]) -# @pytest.mark.parametrize("check_exist", [None, False, True]) -# def test_genome_config_format_raising_is_sensitive( -# rgc, genome, asset, data, message_content, check_exist): -# """ Check that config format error occurs in expected cases. """ -# rgc[CFG_GENOMES_KEY][genome] = {asset: data} -# with pytest.raises(GenomeConfigFormatError) as err_ctx: -# rgc.get_asset(genome, asset, strict_exists=check_exist) -# assert message_content in str(err_ctx.value) +""" Tests for genome config format exception """ + +import pytest +from refgenconf import * +from refgenconf.const import CFG_ASSETS_KEY +from refgenconf.exceptions import DOC_URL +from tests.conftest import bind_to_assets +from ubiquerg import powerset + +__author__ = "Vince Reuter" +__email__ = "vreuter@virginia.edu" + + +FIXED_KV_PAIRS = [ + (CFG_ASSET_SIZE_KEY, "1G"), (CFG_ARCHIVE_SIZE_KEY, "2G"), + (CFG_CHECKSUM_KEY, "dummy-checksum")] + + +@pytest.fixture +def base_rgc_data(tmpdir): + return {CFG_FOLDER_KEY: tmpdir.strpath, CFG_SERVER_KEY: DEFAULT_SERVER} + + +@pytest.fixture +def rgc(base_rgc_data): + return RefGenConf(base_rgc_data) + + +@pytest.mark.parametrize( + ["msg", "exp"], [(".", ". "), ("?", "? "), ("a", "a; ")]) +@pytest.mark.parametrize( + "check", [lambda m, e: m.startswith(e), lambda m, _: m.endswith(DOC_URL)]) +def test_config_format_error_message_formatting(msg, exp, check): + """ Check config format error message formatting and docs URL inclusion. """ + msg = str(GenomeConfigFormatError(msg)) + assert check(msg, exp) + + +@pytest.mark.parametrize("genome", ["dm3", "mm10", "hg38"]) +@pytest.mark.parametrize("asset", ["bowtie2_index", "chrom_sizes", "epilog"]) +@pytest.mark.parametrize( + ["data", "message_content"], + [("just_text_no_path", "has raw string value")] + + [(dict(c),"lacks a '{}' entry".format(CFG_ASSET_PATH_KEY)) + for c in powerset(FIXED_KV_PAIRS, nonempty=True)]) +@pytest.mark.parametrize("check_exist", [None, False, True]) +def test_genome_config_format_raising_is_sensitive( + rgc, genome, asset, data, message_content, check_exist): + """ Check that config format error occurs in expected cases. """ + rgc[CFG_GENOMES_KEY][genome] = {CFG_ASSETS_KEY: {asset: data}} + with pytest.raises(GenomeConfigFormatError) as err_ctx: + rgc.get_asset(genome, asset, strict_exists=check_exist) + assert message_content in str(err_ctx.value) diff --git a/tests/test_genomes.py b/tests/test_genomes.py index 0aa1c462..ccde7f20 100644 --- a/tests/test_genomes.py +++ b/tests/test_genomes.py @@ -1,16 +1,16 @@ -# """ Tests for querying available reference genome assembly names """ -# -# from tests.conftest import get_conf_genomes -# -# __author__ = "Vince Reuter" -# __email__ = "vreuter@virginia.edu" -# -# -# def test_genomes_list(rgc): -# """ List of available genomes is as expected. """ -# assert get_conf_genomes() == rgc.genomes_list() -# -# -# def test_genomes_str(rgc): -# """ Text of available genomes is as expected. """ -# assert ", ".join(get_conf_genomes()) == rgc.genomes_str() +""" Tests for querying available reference genome assembly names """ + +from tests.conftest import get_conf_genomes + +__author__ = "Vince Reuter" +__email__ = "vreuter@virginia.edu" + + +def test_genomes_list(rgc): + """ List of available genomes is as expected. """ + assert get_conf_genomes() == rgc.genomes_list() + + +def test_genomes_str(rgc): + """ Text of available genomes is as expected. """ + assert ", ".join(get_conf_genomes()) == rgc.genomes_str() diff --git a/tests/test_get_asset.py b/tests/test_get_asset.py index 7b76010e..243340f9 100644 --- a/tests/test_get_asset.py +++ b/tests/test_get_asset.py @@ -1,149 +1,151 @@ -# """ Tests for ReferenceGenomeConfiguration.get_asset """ -# -# import os -# import pytest -# from refgenconf import * -# from tests.conftest import get_conf_genomes, lift_into_path_pair, CONF_DATA, \ -# HG38_DATA, MM10_DATA, MITO_DATA -# from veracitools import ExpectContext -# -# __author__ = "Vince Reuter" -# __email__ = "vreuter@virginia.edu" -# -# -# @pytest.fixture -# def temp_asset_spec(tmpdir): -# """ Provide test case with a temp asset path. """ -# fn = "semaphore.txt" -# fp = tmpdir.join(fn).strpath -# assert not os.path.exists(fp) -# return fp -# -# -# @pytest.mark.parametrize( -# "gname", ["not-a-genome", "this_should_fail", "YoUrCrazeeOrganism"]) -# @pytest.mark.parametrize("aname", [ -# "kallisto", "hisat2", "tss_annotation", "gtf", "bowtie2", "blacklist", -# "bowtie", "star"]) -# def test_get_asset_missing_genome(rgc, gname, aname): -# """ Request for asset on a missing genome raises appropriate error. """ -# assert gname not in rgc -# with pytest.raises(MissingGenomeError): -# _get_asset(rgc, gname, aname) -# -# -# @pytest.mark.parametrize("gname", get_conf_genomes()) -# @pytest.mark.parametrize("aname", ["not-an-asset", "asset_fails"]) -# def test_get_asset_missing_asset(rgc, gname, aname): -# """ Request for unknown asset raises appropriate error. """ -# assert gname in rgc.genomes -# with pytest.raises(MissingAssetError): -# _get_asset(rgc, gname, aname) -# -# -# @pytest.mark.parametrize( -# ["gname", "aname", "exp"], -# [(g, k, v) for g, data in -# [("hg38", HG38_DATA), ("mm10", MM10_DATA), ("rCRSd", MITO_DATA)] -# for k, v in data]) -# def test_get_asset_accuracy(rgc, gname, aname, exp): -# """ Asset request for particular genome is accurate. """ -# assert exp == _get_asset(rgc, gname, aname) -# -# -# @pytest.mark.parametrize("check_exist", [lambda: True, lambda _1, _2: True]) -# @pytest.mark.parametrize( -# ["gname", "aname"], [(g, a) for g, data in CONF_DATA for a in data]) -# def test_check_exist_param_type(rgc, check_exist, gname, aname): -# """ The asset existence check must be a one-arg function. """ -# with pytest.raises(TypeError): -# rgc.get_asset(gname, aname, check_exist=check_exist) -# -# -# @pytest.mark.parametrize( -# ["strict", "ctxmgr", "error"], -# [(False, pytest.warns, RuntimeWarning), (True, pytest.raises, IOError)]) -# def test_existence_check_strictness(rgc, temp_asset_spec, strict, ctxmgr, error): -# """ Asset existence check behavior responds to strictness parameter. """ -# gname, aname = "tmpgen", "testasset" -# rgc.genomes[gname] = {aname: lift_into_path_pair(temp_asset_spec)} -# def fetch(): -# return _get_asset(rgc, gname, aname, strict_exists=strict) -# with ctxmgr(error): -# fetch() -# with open(temp_asset_spec, 'w'): -# pass -# try: -# fetch() -# except Exception as e: -# pytest.fail(str(e)) -# -# -# @pytest.mark.parametrize( -# ["check_exist", "get_exp_from_path"], -# [(os.path.isfile, lambda p: p), (os.path.isdir, lambda _: IOError)]) -# def test_existence_check_function( -# rgc, check_exist, get_exp_from_path, temp_asset_spec): -# """ Asset existence check behavior responds to existence checker. """ -# gname, aname = "tmpgen", "testasset" -# rgc.genomes[gname] = {aname: lift_into_path_pair(temp_asset_spec)} -# with open(temp_asset_spec, 'w'): -# pass -# with ExpectContext(get_exp_from_path(temp_asset_spec), _get_asset) as ctx: -# ctx(rgc, gname, aname, check_exist=check_exist, strict_exists=True) -# -# -# @pytest.mark.parametrize(["extension", "exp_in_msg"], [ -# (".tar", True), (".tar.gz", True), (".untar", False)]) -# @pytest.mark.parametrize(["strict", "ctx", "err", "get_msg"], [ -# (False, pytest.warns, RuntimeWarning, lambda r: str(r[0])), -# (True, pytest.raises, IOError, lambda r: str(r.value))]) -# def test_tar_check(rgc, temp_asset_spec, extension, strict, ctx, err, get_msg, -# exp_in_msg): -# """ Asset fetch checks for TAR variant of true asset path value. """ -# gname, aname = "tmpgen", "testasset" -# rgc.genomes[gname] = {aname: lift_into_path_pair(temp_asset_spec)} -# tarpath = temp_asset_spec + extension -# with open(tarpath, 'w'): -# pass -# with ctx(err) as rec: -# _get_asset(rgc, gname, aname, strict_exists=strict) -# assert (tarpath in get_msg(rec)) is exp_in_msg -# -# -# @pytest.mark.parametrize("strict_exists", [None, False, True]) -# def test_asset_already_exists(tmpdir, strict_exists): -# """ Asset path is joined to genome folder and returned if it exists. """ -# genome = "mm10" -# a_key = "chrom_sizes" -# a_path = "Mus_musculus.contig_lengths" -# cfgdat = { -# CFG_FOLDER_KEY: tmpdir.strpath, -# CFG_SERVER_KEY: DEFAULT_SERVER, -# CFG_GENOMES_KEY: {genome: {a_key: {CFG_ASSET_PATH_KEY: a_path}}}} -# rgc = RefGenConf(cfgdat) -# assert a_path == rgc[CFG_GENOMES_KEY][genome][a_key][CFG_ASSET_PATH_KEY] -# assert not os.path.exists(a_path) -# def folder(): -# return rgc[CFG_FOLDER_KEY] -# assert tmpdir.strpath == folder() -# fullpath = os.path.join(folder(), genome, a_path) -# if not os.path.exists(os.path.dirname(fullpath)): -# os.makedirs(os.path.dirname(fullpath)) -# print("Writing: {}".format(fullpath)) -# with open(fullpath, 'w'): -# assert os.path.isfile(fullpath) -# assert fullpath == rgc.get_asset(genome, a_key, strict_exists=strict_exists) -# -# -# def _get_asset(rgc, g, a, **kwargs): -# """ -# Call the asset fetch function. -# -# :param refgenconf.RefGenConf rgc: configuration instance -# :param str g: genome name -# :param str a: asset name -# """ -# kwds = {"strict_exists": None} -# kwds.update(kwargs) -# return rgc.get_asset(g, a, **kwds) +""" Tests for ReferenceGenomeConfiguration.get_asset """ + +import os +import pytest +from refgenconf import * +from refgenconf.const import CFG_ASSETS_KEY +from tests.conftest import bind_to_assets, get_conf_genomes, \ + lift_into_path_pair, CONF_DATA, \ + HG38_DATA, MM10_DATA, MITO_DATA +from veracitools import ExpectContext + +__author__ = "Vince Reuter" +__email__ = "vreuter@virginia.edu" + + +@pytest.fixture +def temp_asset_spec(tmpdir): + """ Provide test case with a temp asset path. """ + fn = "semaphore.txt" + fp = tmpdir.join(fn).strpath + assert not os.path.exists(fp) + return fp + + +@pytest.mark.parametrize( + "gname", ["not-a-genome", "this_should_fail", "YoUrCrazeeOrganism"]) +@pytest.mark.parametrize("aname", [ + "kallisto", "hisat2", "tss_annotation", "gtf", "bowtie2", "blacklist", + "bowtie", "star"]) +def test_get_asset_missing_genome(rgc, gname, aname): + """ Request for asset on a missing genome raises appropriate error. """ + assert gname not in rgc + with pytest.raises(MissingGenomeError): + _get_asset(rgc, gname, aname) + + +@pytest.mark.parametrize("gname", get_conf_genomes()) +@pytest.mark.parametrize("aname", ["not-an-asset", "asset_fails"]) +def test_get_asset_missing_asset(rgc, gname, aname): + """ Request for unknown asset raises appropriate error. """ + assert gname in rgc.genomes + with pytest.raises(MissingAssetError): + _get_asset(rgc, gname, aname) + + +@pytest.mark.parametrize( + ["gname", "aname", "exp"], + [(g, k, v) for g, data in + [("hg38", HG38_DATA), ("mm10", MM10_DATA), ("rCRSd", MITO_DATA)] + for k, v in data]) +def test_get_asset_accuracy(rgc, gname, aname, exp): + """ Asset request for particular genome is accurate. """ + assert exp == _get_asset(rgc, gname, aname) + + +@pytest.mark.parametrize("check_exist", [lambda: True, lambda _1, _2: True]) +@pytest.mark.parametrize( + ["gname", "aname"], [(g, a) for g, data in CONF_DATA for a in data]) +def test_check_exist_param_type(rgc, check_exist, gname, aname): + """ The asset existence check must be a one-arg function. """ + with pytest.raises(TypeError): + rgc.get_asset(gname, aname, check_exist=check_exist) + + +@pytest.mark.parametrize( + ["strict", "ctxmgr", "error"], + [(False, pytest.warns, RuntimeWarning), (True, pytest.raises, IOError)]) +def test_existence_check_strictness(rgc, temp_asset_spec, strict, ctxmgr, error): + """ Asset existence check behavior responds to strictness parameter. """ + gname, aname = "tmpgen", "testasset" + rgc.genomes[gname] = bind_to_assets({aname: lift_into_path_pair(temp_asset_spec)}) + def fetch(): + return _get_asset(rgc, gname, aname, strict_exists=strict) + with ctxmgr(error): + fetch() + with open(temp_asset_spec, 'w'): + pass + try: + fetch() + except Exception as e: + pytest.fail(str(e)) + + +@pytest.mark.parametrize( + ["check_exist", "get_exp_from_path"], + [(os.path.isfile, lambda p: p), (os.path.isdir, lambda _: IOError)]) +def test_existence_check_function( + rgc, check_exist, get_exp_from_path, temp_asset_spec): + """ Asset existence check behavior responds to existence checker. """ + gname, aname = "tmpgen", "testasset" + rgc.genomes[gname] = bind_to_assets({aname: lift_into_path_pair(temp_asset_spec)}) + with open(temp_asset_spec, 'w'): + pass + with ExpectContext(get_exp_from_path(temp_asset_spec), _get_asset) as ctx: + ctx(rgc, gname, aname, check_exist=check_exist, strict_exists=True) + + +@pytest.mark.parametrize(["extension", "exp_in_msg"], [ + (".tar", True), (".tar.gz", True), (".untar", False)]) +@pytest.mark.parametrize(["strict", "ctx", "err", "get_msg"], [ + (False, pytest.warns, RuntimeWarning, lambda r: str(r[0])), + (True, pytest.raises, IOError, lambda r: str(r.value))]) +def test_tar_check(rgc, temp_asset_spec, extension, strict, ctx, err, get_msg, + exp_in_msg): + """ Asset fetch checks for TAR variant of true asset path value. """ + gname, aname = "tmpgen", "testasset" + rgc.genomes[gname] = bind_to_assets({aname: lift_into_path_pair(temp_asset_spec)}) + tarpath = temp_asset_spec + extension + with open(tarpath, 'w'): + pass + with ctx(err) as rec: + _get_asset(rgc, gname, aname, strict_exists=strict) + assert (tarpath in get_msg(rec)) is exp_in_msg + + +@pytest.mark.parametrize("strict_exists", [None, False, True]) +def test_asset_already_exists(tmpdir, strict_exists): + """ Asset path is joined to genome folder and returned if it exists. """ + genome = "mm10" + a_key = "chrom_sizes" + a_path = "Mus_musculus.contig_lengths" + cfgdat = { + CFG_FOLDER_KEY: tmpdir.strpath, + CFG_SERVER_KEY: DEFAULT_SERVER, + CFG_GENOMES_KEY: {genome: bind_to_assets({a_key: {CFG_ASSET_PATH_KEY: a_path}})}} + rgc = RefGenConf(cfgdat) + assert a_path == rgc[CFG_GENOMES_KEY][genome][CFG_ASSETS_KEY][a_key][CFG_ASSET_PATH_KEY] + assert not os.path.exists(a_path) + def folder(): + return rgc[CFG_FOLDER_KEY] + assert tmpdir.strpath == folder() + fullpath = os.path.join(folder(), genome, a_path) + if not os.path.exists(os.path.dirname(fullpath)): + os.makedirs(os.path.dirname(fullpath)) + print("Writing: {}".format(fullpath)) + with open(fullpath, 'w'): + assert os.path.isfile(fullpath) + assert fullpath == rgc.get_asset(genome, a_key, strict_exists=strict_exists) + + +def _get_asset(rgc, g, a, **kwargs): + """ + Call the asset fetch function. + + :param refgenconf.RefGenConf rgc: configuration instance + :param str g: genome name + :param str a: asset name + """ + kwds = {"strict_exists": None} + kwds.update(kwargs) + return rgc.get_asset(g, a, **kwds) diff --git a/tests/test_list_remote.py b/tests/test_list_remote.py index 7d36ba67..c4b3cafd 100644 --- a/tests/test_list_remote.py +++ b/tests/test_list_remote.py @@ -1,26 +1,26 @@ -# """ Tests for listing remotely available genomes and assets. """ -# -# import mock -# from refgenconf import RefGenConf, CFG_FOLDER_KEY, CFG_GENOMES_KEY, \ -# CFG_SERVER_KEY, DEFAULT_SERVER -# -# __author__ = "Vince Reuter" -# __email__ = "vreuter@virginia.edu" -# -# -# def test_list_remote(rgc, tmpdir): -# """ Verify expected behavior of remote genome/asset listing. """ -# new_rgc = RefGenConf({CFG_FOLDER_KEY: tmpdir.strpath, -# CFG_SERVER_KEY: DEFAULT_SERVER, -# CFG_GENOMES_KEY: rgc[CFG_GENOMES_KEY]}) -# print("NEW RGC KEYS: {}".format(list(new_rgc.keys()))) -# with mock.patch("refgenconf.refgenconf._read_remote_data", -# return_value=rgc.genomes): -# genomes, assets = new_rgc.list_remote(get_url=lambda _: "irrelevant") -# _assert_eq_as_sets(rgc.genomes_str(), genomes) -# _assert_eq_as_sets(rgc.assets_str(), assets) -# -# -# def _assert_eq_as_sets(a, b): -# assert len(a) == len(b) -# assert set(a) == set(b) +""" Tests for listing remotely available genomes and assets. """ + +import mock +from refgenconf import RefGenConf, CFG_FOLDER_KEY, CFG_GENOMES_KEY, \ + CFG_SERVER_KEY, DEFAULT_SERVER + +__author__ = "Vince Reuter" +__email__ = "vreuter@virginia.edu" + + +def test_list_remote(rgc, tmpdir): + """ Verify expected behavior of remote genome/asset listing. """ + new_rgc = RefGenConf({CFG_FOLDER_KEY: tmpdir.strpath, + CFG_SERVER_KEY: DEFAULT_SERVER, + CFG_GENOMES_KEY: rgc[CFG_GENOMES_KEY]}) + print("NEW RGC KEYS: {}".format(list(new_rgc.keys()))) + with mock.patch("refgenconf.refgenconf._read_remote_data", + return_value=rgc.genomes): + genomes, assets = new_rgc.list_remote() + _assert_eq_as_sets(rgc.genomes_str(), genomes) + + +def _assert_eq_as_sets(a, b): + """ Collections are equivalent as sets if they're equal in size and element's collective identity. """ + assert len(a) == len(b) + assert set(a) == set(b) diff --git a/tests/test_packaging.py b/tests/test_packaging.py index 71293ed7..04797fe3 100644 --- a/tests/test_packaging.py +++ b/tests/test_packaging.py @@ -1,32 +1,32 @@ -# """ Validate what's available directly on the top-level import. """ -# -# import pytest -# from inspect import isclass, isfunction -# from refgenconf.exceptions import RefgenconfError -# -# __author__ = "Vince Reuter" -# __email__ = "vreuter@virginia.edu" -# -# -# def _is_custom_error(obj): -# return isinstance(obj, type) and issubclass(obj, RefgenconfError) -# -# -# @pytest.mark.parametrize( -# ["obj_name", "typecheck"], -# [("RefGenConf", isclass), ("select_genome_config", isfunction), -# ("DownloadJsonError", _is_custom_error), -# ("GenomeConfigFormatError", _is_custom_error), -# ("MissingAssetError", _is_custom_error), -# ("MissingConfigDataError", _is_custom_error), -# ("MissingGenomeError", _is_custom_error), -# ("UnboundEnvironmentVariablesError", _is_custom_error)]) -# def test_top_level_exports(obj_name, typecheck): -# """ At package level, validate object availability and type. """ -# import refgenconf -# try: -# obj = getattr(refgenconf, obj_name) -# except AttributeError: -# pytest.fail("Unavailable on {}: {}".format(refgenconf.__name__, obj_name)) -# else: -# assert typecheck(obj) +""" Validate what's available directly on the top-level import. """ + +import pytest +from inspect import isclass, isfunction +from refgenconf.exceptions import RefgenconfError + +__author__ = "Vince Reuter" +__email__ = "vreuter@virginia.edu" + + +def _is_custom_error(obj): + return isinstance(obj, type) and issubclass(obj, RefgenconfError) + + +@pytest.mark.parametrize( + ["obj_name", "typecheck"], + [("RefGenConf", isclass), ("select_genome_config", isfunction), + ("DownloadJsonError", _is_custom_error), + ("GenomeConfigFormatError", _is_custom_error), + ("MissingAssetError", _is_custom_error), + ("MissingConfigDataError", _is_custom_error), + ("MissingGenomeError", _is_custom_error), + ("UnboundEnvironmentVariablesError", _is_custom_error)]) +def test_top_level_exports(obj_name, typecheck): + """ At package level, validate object availability and type. """ + import refgenconf + try: + obj = getattr(refgenconf, obj_name) + except AttributeError: + pytest.fail("Unavailable on {}: {}".format(refgenconf.__name__, obj_name)) + else: + assert typecheck(obj) diff --git a/tests/test_pull_asset.py b/tests/test_pull_asset.py index 909794bf..8012c954 100644 --- a/tests/test_pull_asset.py +++ b/tests/test_pull_asset.py @@ -1,303 +1,304 @@ -# """ Tests for asset pull """ -# -# import logging -# import mock -# import os -# import sys -# import time -# if sys.version_info.major < 3: -# from urllib2 import HTTPError -# ConnectionRefusedError = Exception -# else: -# from urllib.error import HTTPError -# import pytest -# from yacman import YacAttMap -# from tests.conftest import CONF_DATA, REMOTE_ASSETS, REQUESTS, \ -# get_get_url -# import refgenconf -# from refgenconf.const import * -# from refgenconf.exceptions import DownloadJsonError -# from refgenconf.refgenconf import _download_url_progress -# -# __author__ = "Vince Reuter" -# __email__ = "vreuter@virginia.edu" -# -# -# DOWNLOAD_FUNCTION = \ -# "refgenconf.refgenconf.{}".format(_download_url_progress.__name__) -# -# -# @pytest.mark.parametrize( -# ["genome", "asset"], [(g, a) for g, assets in CONF_DATA for a in assets]) -# def test_no_unpack(rgc, genome, asset, temp_genome_config_file): -# """ Tarballs must be unpacked. """ -# with pytest.raises(NotImplementedError): -# rgc.pull_asset(genome, asset, temp_genome_config_file, unpack=False) -# -# -# @pytest.mark.remote_data -# @pytest.mark.parametrize(["genome", "asset"], REQUESTS) -# @pytest.mark.parametrize("exp_file_ext", [".tar", ".txt"]) -# def test_pull_asset_download(rgc, genome, asset, gencfg, exp_file_ext, -# remove_genome_folder): -# """ Verify download and unpacking of tarball asset. """ -# if sys.version_info.major < 3: -# pytest.xfail("pull_asset download tests fail on py2") -# exp_file = os.path.join(rgc.genome_folder, genome, asset + exp_file_ext) -# assert not os.path.exists(exp_file) -# with mock.patch.object( -# refgenconf.refgenconf, "_download_json", lambda _: { -# CFG_ARCHIVE_SIZE_KEY: "0GB", CFG_ASSET_PATH_KEY: exp_file}), \ -# mock.patch("refgenconf.refgenconf.query_yes_no", return_value=True): -# rgc.pull_asset(genome, asset, gencfg, -# get_main_url=get_get_url(genome, asset)) -# assert os.path.isfile(exp_file) -# os.unlink(exp_file) -# -# -# @pytest.mark.remote_data -# @pytest.mark.parametrize(["genome", "asset"], REQUESTS) -# def test_pull_asset_updates_genome_config( -# rgc, genome, asset, gencfg, remove_genome_folder): -# """ Verify asset pull's side-effect of updating the genome config file. """ -# try: -# del rgc.genomes[genome][asset] -# except KeyError: -# pass -# rgc.write(gencfg) -# old_data = YacAttMap(gencfg) -# assert asset not in old_data.genomes[genome] -# checksum_tmpval = "not-a-checksum" -# with mock.patch.object( -# refgenconf.refgenconf, "_download_json", -# return_value=YacAttMap({ -# CFG_CHECKSUM_KEY: checksum_tmpval, -# CFG_ARCHIVE_SIZE_KEY: "0 GB", -# CFG_ASSET_PATH_KEY: "testpath"})), \ -# mock.patch.object(refgenconf.refgenconf, "checksum", -# return_value=checksum_tmpval), \ -# mock.patch.object(refgenconf.refgenconf, "_download_url_progress", -# return_value=None), \ -# mock.patch.object(refgenconf.refgenconf, "_untar", return_value=None): -# rgc.pull_asset(genome, asset, gencfg, -# get_main_url=get_get_url(genome, asset)) -# new_data = YacAttMap(gencfg) -# assert asset in new_data.genomes[genome] -# assert "testpath" == new_data.genomes[genome][asset].path -# -# -# @pytest.mark.remote_data -# @pytest.mark.parametrize(["genome", "asset"], REQUESTS) -# def test_pull_asset_returns_key_value_pair( -# rgc, genome, asset, gencfg, remove_genome_folder): -# """ Verify asset pull returns asset name, and value if pulled. """ -# checksum_tmpval = "not-a-checksum" -# with mock.patch.object( -# refgenconf.refgenconf, "_download_json", -# return_value=YacAttMap({ -# CFG_CHECKSUM_KEY: checksum_tmpval, -# CFG_ARCHIVE_SIZE_KEY: "0 GB", -# CFG_ASSET_PATH_KEY: "testpath"})), \ -# mock.patch.object(refgenconf.refgenconf, "checksum", -# return_value=checksum_tmpval), \ -# mock.patch.object(refgenconf.refgenconf, "_download_url_progress"), \ -# mock.patch.object(refgenconf.refgenconf, "_untar"): -# res = rgc.pull_asset( -# genome, asset, gencfg, get_main_url=get_get_url(genome, asset)) -# key, val = _parse_single_pull(res) -# assert asset == key -# assert "testpath" == val -# -# -# @pytest.mark.parametrize(["genome", "asset"], REQUESTS) -# @pytest.mark.parametrize( -# "error", [ConnectionRefusedError, HTTPError, DownloadJsonError]) -# def test_pull_asset_pull_error( -# rgc, genome, asset, gencfg, remove_genome_folder, error): -# """ Error pulling asset is exceptional. """ -# args = (genome, asset, gencfg) -# kwargs = {"get_main_url": get_get_url(genome, asset)} -# if error is DownloadJsonError: -# def raise_error(*args, **kwargs): -# raise DownloadJsonError(None) -# with mock.patch("refgenconf.refgenconf._download_json", -# side_effect=raise_error), \ -# pytest.raises(DownloadJsonError): -# rgc.pull_asset(*args, **kwargs) -# else: -# class SubErr(error): -# def __init__(self): -# pass -# -# -# def __str__(self): -# return self.__class__.__name__ -# -# def raise_error(*args, **kwargs): -# raise SubErr() -# with mock.patch.object( -# refgenconf.refgenconf, "_download_json", -# return_value=YacAttMap({CFG_CHECKSUM_KEY: "not-a-checksum", -# CFG_ARCHIVE_SIZE_KEY: "0 GB"})), \ -# mock.patch(DOWNLOAD_FUNCTION, side_effect=raise_error): -# res = rgc.pull_asset(*args, **kwargs) -# key, val = _parse_single_pull(res) -# assert asset == key -# assert val is None -# -# -# @pytest.mark.parametrize(["genome", "asset"], [ -# (g, a) for g in REMOTE_ASSETS for a in [None, 1, -0.1]]) -# def test_pull_asset_illegal_asset_name( -# rgc, genome, asset, gencfg, remove_genome_folder): -# """ TypeError occurs if asset argument is not iterable. """ -# with pytest.raises(TypeError): -# rgc.pull_asset(genome, asset, gencfg, -# get_main_url=get_get_url(genome, asset)) -# -# -# @pytest.mark.parametrize(["genome", "asset"], REQUESTS) -# def test_pull_asset_checksum_mismatch( -# rgc, genome, asset, gencfg, remove_genome_folder): -# """ Checksum mismatch short-circuits asset pull, returning null value. """ -# with mock.patch.object( -# refgenconf.refgenconf, "_download_json", -# return_value=YacAttMap({CFG_CHECKSUM_KEY: "not-a-checksum", -# CFG_ARCHIVE_SIZE_KEY: "0 GB"})), \ -# mock.patch(DOWNLOAD_FUNCTION, side_effect=lambda _1, _2, _3: None), \ -# mock.patch.object( -# refgenconf.refgenconf, "checksum", return_value="checksum2"): -# res = rgc.pull_asset(genome, asset, gencfg, -# get_main_url=get_get_url(genome, asset)) -# key, val = _parse_single_pull(res) -# assert asset == key -# assert val is None -# -# -# @pytest.mark.parametrize(["genome", "asset"], REQUESTS) -# def test_negative_response_to_large_download_prompt( -# rgc, genome, asset, gencfg, remove_genome_folder): -# """ Test responsiveness to user abortion of pull request. """ -# with mock.patch.object( -# refgenconf.refgenconf, "_download_json", -# return_value=YacAttMap({CFG_CHECKSUM_KEY: "not-a-checksum", -# CFG_ARCHIVE_SIZE_KEY: "1M"})), \ -# mock.patch("refgenconf.refgenconf._is_large_archive", return_value=True), \ -# mock.patch("refgenconf.refgenconf.query_yes_no", return_value=False): -# res = rgc.pull_asset( -# genome, asset, gencfg, get_main_url=get_get_url(genome, asset)) -# key, val = _parse_single_pull(res) -# assert asset == key -# assert val is None -# -# -# @pytest.mark.parametrize(["genome", "asset"], REQUESTS) -# def test_download_interruption( -# rgc, genome, asset, gencfg, remove_genome_folder, caplog): -# """ Download interruption provides appropriate warning message and halts. """ -# import signal -# def kill_download(*args, **kwargs): -# os.kill(os.getpid(), signal.SIGINT) -# with mock.patch.object(refgenconf.refgenconf, "_download_json", -# return_value=YacAttMap({ -# CFG_CHECKSUM_KEY: "dummy", -# CFG_ARCHIVE_SIZE_KEY: "1M"})),\ -# mock.patch(DOWNLOAD_FUNCTION, side_effect=kill_download), \ -# caplog.at_level(logging.WARNING), \ -# pytest.raises(SystemExit): -# rgc.pull_asset(genome, asset, gencfg, get_main_url=get_get_url(genome, asset)) -# records = caplog.records -# assert 1 == len(records) -# r = records[0] -# assert "WARNING" == r.levelname -# assert "The download was interrupted" in r.msg -# -# -# class PreexistingAssetTests: -# """ Tests for asset pull when the asset path already exists. """ -# -# @staticmethod -# def _assert_result(res, exp_key, exp_val): -# """ Check the return key/value from the pull operation. """ -# k, v = _parse_single_pull(res) -# assert exp_key == k -# assert exp_val == v -# -# @staticmethod -# def _assert_single_message(log, levname, test_text): -# """ Verify presence of a log message with expected level and content. """ -# assert levname in dir(logging), "Not a logging level: {}".format(levname) -# msgs = [r.msg for r in log.records if r.levelname == levname] -# matched = list(filter(test_text, msgs)) -# assert 1 == len(matched) -# -# def _assert_preserved(self, rgc, genome, asset, res, init_time, log): -# """ Verify behavior expected if asset was preserved. """ -# exp_val = rgc.filepath(genome, asset) -# self._assert_result(res, asset, exp_val) -# assert init_time == os.path.getmtime(exp_val) -# self._assert_single_message( -# log, "DEBUG", lambda m: m == "Preserving existing: {}".format(exp_val)) -# -# def _assert_overwritten(self, rgc, genome, asset, res, init_time, log): -# """ Verify behavior expected if asset was overwritten. """ -# exp_val = rgc.filepath(genome, asset) -# self._assert_result(res, asset, exp_val) -# assert init_time < os.path.getmtime(exp_val) -# self._assert_single_message( -# log, "DEBUG", lambda m: m == "Overwriting: {}".format(exp_val)) -# -# @pytest.mark.parametrize(["genome", "asset"], REQUESTS) -# @pytest.mark.parametrize(["force", "exp_overwrite", "reply_patch"], [ -# (True, True, {"side_effect": lambda *args, **kwargs: pytest.fail( -# "Forced short-circuit failed")}), -# (None, True, {"return_value": True}), -# (False, False, {"side_effect": lambda *args, **kwargs: pytest.fail( -# "Forced short-circuit failed")}), -# (None, False, {"return_value": False})]) -# def test_asset_already_exists( -# self, rgc, genome, asset, gencfg, -# force, exp_overwrite, reply_patch, caplog, remove_genome_folder): -# """ Overwrite may be prespecified or determined by response to prompt. """ -# fp = rgc.filepath(genome, asset) -# assert not os.path.exists(fp) -# if not os.path.exists(os.path.dirname(fp)): -# os.makedirs(os.path.dirname(fp)) -# with open(fp, 'w'): -# print("Create empty file: {}".format(fp)) -# init_time = os.path.getmtime(fp) -# dummy_checksum_value = "fixed_value" -# def touch(*_args, **_kwargs): -# with open(fp, 'w'): -# print("Recreating: {}".format(fp)) -# -# time.sleep(0.01) -# assert os.path.isfile(fp) -# with mock.patch.object( -# refgenconf.refgenconf, "_download_json", return_value=YacAttMap({ -# CFG_CHECKSUM_KEY: "fixed_value", -# CFG_ARCHIVE_SIZE_KEY: "1M", -# CFG_ASSET_PATH_KEY: fp -# })), \ -# mock.patch.object(refgenconf.refgenconf, "query_yes_no", **reply_patch), \ -# mock.patch(DOWNLOAD_FUNCTION, side_effect=touch), \ -# mock.patch.object(refgenconf.refgenconf, "checksum", -# return_value=dummy_checksum_value), \ -# mock.patch.object(refgenconf.refgenconf, "_untar"), \ -# caplog.at_level(logging.DEBUG): -# res = rgc.pull_asset(genome, asset, gencfg, force=force, -# get_main_url=get_get_url(genome, asset)) -# assertion_arguments = (rgc, genome, asset, res, init_time, caplog) -# verify = self._assert_overwritten if exp_overwrite else self._assert_preserved -# verify(*assertion_arguments) -# -# -# def _parse_single_pull(result): -# """ Unpack asset pull result, expecting asset name and value. """ -# try: -# k, v = result[0] -# except (IndexError, ValueError): -# print("Single pull result should be a list with one pair; got {}". -# format(result)) -# raise -# return k, v +""" Tests for asset pull """ + +import logging +import mock +import os +import sys +import time +if sys.version_info.major < 3: + from urllib2 import HTTPError + ConnectionRefusedError = Exception +else: + from urllib.error import HTTPError +import pytest +from yacman import YacAttMap +from tests.conftest import CONF_DATA, REMOTE_ASSETS, REQUESTS, \ + get_get_url +import refgenconf +from refgenconf.const import * +from refgenconf.exceptions import DownloadJsonError +from refgenconf.refgenconf import _download_url_progress + +__author__ = "Vince Reuter" +__email__ = "vreuter@virginia.edu" + + +DOWNLOAD_FUNCTION = \ + "refgenconf.refgenconf.{}".format(_download_url_progress.__name__) + + +@pytest.mark.parametrize( + ["genome", "asset"], [(g, a) for g, assets in CONF_DATA for a in assets]) +def test_no_unpack(rgc, genome, asset, temp_genome_config_file): + """ Tarballs must be unpacked. """ + with pytest.raises(NotImplementedError): + rgc.pull_asset(genome, asset, temp_genome_config_file, unpack=False) + + +@pytest.mark.remote_data +@pytest.mark.parametrize(["genome", "asset"], REQUESTS) +@pytest.mark.parametrize("exp_file_ext", [".tar", ".txt"]) +def test_pull_asset_download(rgc, genome, asset, gencfg, exp_file_ext, + remove_genome_folder): + """ Verify download and unpacking of tarball asset. """ + if sys.version_info.major < 3: + pytest.xfail("pull_asset download tests fail on py2") + exp_file = os.path.join(rgc.genome_folder, genome, asset + exp_file_ext) + assert not os.path.exists(exp_file) + with mock.patch.object( + refgenconf.refgenconf, "_download_json", lambda _: { + CFG_ARCHIVE_SIZE_KEY: "0GB", CFG_ASSET_PATH_KEY: exp_file}), \ + mock.patch("refgenconf.refgenconf.query_yes_no", return_value=True): + rgc.pull_asset(genome, asset, gencfg, + get_main_url=get_get_url(genome, asset)) + assert os.path.isfile(exp_file) + os.unlink(exp_file) + + +@pytest.mark.remote_data +@pytest.mark.parametrize(["genome", "asset"], REQUESTS) +def test_pull_asset_updates_genome_config( + rgc, genome, asset, gencfg, remove_genome_folder): + """ Verify asset pull's side-effect of updating the genome config file. """ + try: + del rgc.genomes[genome][asset] + except KeyError: + pass + rgc.write(gencfg) + old_data = YacAttMap(gencfg) + assert asset not in old_data.genomes[genome] + checksum_tmpval = "not-a-checksum" + with mock.patch.object( + refgenconf.refgenconf, "_download_json", + return_value=YacAttMap({ + CFG_CHECKSUM_KEY: checksum_tmpval, + CFG_ARCHIVE_SIZE_KEY: "0 GB", + CFG_ASSET_PATH_KEY: "testpath"})), \ + mock.patch.object(refgenconf.refgenconf, "checksum", + return_value=checksum_tmpval), \ + mock.patch.object(refgenconf.refgenconf, "_download_url_progress", + return_value=None), \ + mock.patch.object(refgenconf.refgenconf, "_untar", return_value=None): + rgc.pull_asset(genome, asset, gencfg, + get_main_url=get_get_url(genome, asset)) + new_data = YacAttMap(gencfg) + new_assets = new_data.genomes[genome][CFG_ASSETS_KEY] + assert asset in new_assets + assert "testpath" == new_assets[asset].path + + +@pytest.mark.remote_data +@pytest.mark.parametrize(["genome", "asset"], REQUESTS) +def test_pull_asset_returns_key_value_pair( + rgc, genome, asset, gencfg, remove_genome_folder): + """ Verify asset pull returns asset name, and value if pulled. """ + checksum_tmpval = "not-a-checksum" + with mock.patch.object( + refgenconf.refgenconf, "_download_json", + return_value=YacAttMap({ + CFG_CHECKSUM_KEY: checksum_tmpval, + CFG_ARCHIVE_SIZE_KEY: "0 GB", + CFG_ASSET_PATH_KEY: "testpath"})), \ + mock.patch.object(refgenconf.refgenconf, "checksum", + return_value=checksum_tmpval), \ + mock.patch.object(refgenconf.refgenconf, "_download_url_progress"), \ + mock.patch.object(refgenconf.refgenconf, "_untar"): + res = rgc.pull_asset( + genome, asset, gencfg, get_main_url=get_get_url(genome, asset)) + key, val = _parse_single_pull(res) + assert asset == key + assert "testpath" == val + + +@pytest.mark.parametrize(["genome", "asset"], REQUESTS) +@pytest.mark.parametrize( + "error", [ConnectionRefusedError, HTTPError, DownloadJsonError]) +def test_pull_asset_pull_error( + rgc, genome, asset, gencfg, remove_genome_folder, error): + """ Error pulling asset is exceptional. """ + args = (genome, asset, gencfg) + kwargs = {"get_main_url": get_get_url(genome, asset)} + if error is DownloadJsonError: + def raise_error(*args, **kwargs): + raise DownloadJsonError(None) + with mock.patch("refgenconf.refgenconf._download_json", + side_effect=raise_error), \ + pytest.raises(DownloadJsonError): + rgc.pull_asset(*args, **kwargs) + else: + class SubErr(error): + def __init__(self): + pass + + + def __str__(self): + return self.__class__.__name__ + + def raise_error(*args, **kwargs): + raise SubErr() + with mock.patch.object( + refgenconf.refgenconf, "_download_json", + return_value=YacAttMap({CFG_CHECKSUM_KEY: "not-a-checksum", + CFG_ARCHIVE_SIZE_KEY: "0 GB"})), \ + mock.patch(DOWNLOAD_FUNCTION, side_effect=raise_error): + res = rgc.pull_asset(*args, **kwargs) + key, val = _parse_single_pull(res) + assert asset == key + assert val is None + + +@pytest.mark.parametrize(["genome", "asset"], [ + (g, a) for g in REMOTE_ASSETS for a in [None, 1, -0.1]]) +def test_pull_asset_illegal_asset_name( + rgc, genome, asset, gencfg, remove_genome_folder): + """ TypeError occurs if asset argument is not iterable. """ + with pytest.raises(TypeError): + rgc.pull_asset(genome, asset, gencfg, + get_main_url=get_get_url(genome, asset)) + + +@pytest.mark.parametrize(["genome", "asset"], REQUESTS) +def test_pull_asset_checksum_mismatch( + rgc, genome, asset, gencfg, remove_genome_folder): + """ Checksum mismatch short-circuits asset pull, returning null value. """ + with mock.patch.object( + refgenconf.refgenconf, "_download_json", + return_value=YacAttMap({CFG_CHECKSUM_KEY: "not-a-checksum", + CFG_ARCHIVE_SIZE_KEY: "0 GB"})), \ + mock.patch(DOWNLOAD_FUNCTION, side_effect=lambda _1, _2, _3: None), \ + mock.patch.object( + refgenconf.refgenconf, "checksum", return_value="checksum2"): + res = rgc.pull_asset(genome, asset, gencfg, + get_main_url=get_get_url(genome, asset)) + key, val = _parse_single_pull(res) + assert asset == key + assert val is None + + +@pytest.mark.parametrize(["genome", "asset"], REQUESTS) +def test_negative_response_to_large_download_prompt( + rgc, genome, asset, gencfg, remove_genome_folder): + """ Test responsiveness to user abortion of pull request. """ + with mock.patch.object( + refgenconf.refgenconf, "_download_json", + return_value=YacAttMap({CFG_CHECKSUM_KEY: "not-a-checksum", + CFG_ARCHIVE_SIZE_KEY: "1M"})), \ + mock.patch("refgenconf.refgenconf._is_large_archive", return_value=True), \ + mock.patch("refgenconf.refgenconf.query_yes_no", return_value=False): + res = rgc.pull_asset( + genome, asset, gencfg, get_main_url=get_get_url(genome, asset)) + key, val = _parse_single_pull(res) + assert asset == key + assert val is None + + +@pytest.mark.parametrize(["genome", "asset"], REQUESTS) +def test_download_interruption( + rgc, genome, asset, gencfg, remove_genome_folder, caplog): + """ Download interruption provides appropriate warning message and halts. """ + import signal + def kill_download(*args, **kwargs): + os.kill(os.getpid(), signal.SIGINT) + with mock.patch.object(refgenconf.refgenconf, "_download_json", + return_value=YacAttMap({ + CFG_CHECKSUM_KEY: "dummy", + CFG_ARCHIVE_SIZE_KEY: "1M"})),\ + mock.patch(DOWNLOAD_FUNCTION, side_effect=kill_download), \ + caplog.at_level(logging.WARNING), \ + pytest.raises(SystemExit): + rgc.pull_asset(genome, asset, gencfg, get_main_url=get_get_url(genome, asset)) + records = caplog.records + assert 1 == len(records) + r = records[0] + assert "WARNING" == r.levelname + assert "The download was interrupted" in r.msg + + +class PreexistingAssetTests: + """ Tests for asset pull when the asset path already exists. """ + + @staticmethod + def _assert_result(res, exp_key, exp_val): + """ Check the return key/value from the pull operation. """ + k, v = _parse_single_pull(res) + assert exp_key == k + assert exp_val == v + + @staticmethod + def _assert_single_message(log, levname, test_text): + """ Verify presence of a log message with expected level and content. """ + assert levname in dir(logging), "Not a logging level: {}".format(levname) + msgs = [r.msg for r in log.records if r.levelname == levname] + matched = list(filter(test_text, msgs)) + assert 1 == len(matched) + + def _assert_preserved(self, rgc, genome, asset, res, init_time, log): + """ Verify behavior expected if asset was preserved. """ + exp_val = rgc.filepath(genome, asset) + self._assert_result(res, asset, exp_val) + assert init_time == os.path.getmtime(exp_val) + self._assert_single_message( + log, "DEBUG", lambda m: m == "Preserving existing: {}".format(exp_val)) + + def _assert_overwritten(self, rgc, genome, asset, res, init_time, log): + """ Verify behavior expected if asset was overwritten. """ + exp_val = rgc.filepath(genome, asset) + self._assert_result(res, asset, exp_val) + assert init_time < os.path.getmtime(exp_val) + self._assert_single_message( + log, "DEBUG", lambda m: m == "Overwriting: {}".format(exp_val)) + + @pytest.mark.parametrize(["genome", "asset"], REQUESTS) + @pytest.mark.parametrize(["force", "exp_overwrite", "reply_patch"], [ + (True, True, {"side_effect": lambda *args, **kwargs: pytest.fail( + "Forced short-circuit failed")}), + (None, True, {"return_value": True}), + (False, False, {"side_effect": lambda *args, **kwargs: pytest.fail( + "Forced short-circuit failed")}), + (None, False, {"return_value": False})]) + def test_asset_already_exists( + self, rgc, genome, asset, gencfg, + force, exp_overwrite, reply_patch, caplog, remove_genome_folder): + """ Overwrite may be prespecified or determined by response to prompt. """ + fp = rgc.filepath(genome, asset) + assert not os.path.exists(fp) + if not os.path.exists(os.path.dirname(fp)): + os.makedirs(os.path.dirname(fp)) + with open(fp, 'w'): + print("Create empty file: {}".format(fp)) + init_time = os.path.getmtime(fp) + dummy_checksum_value = "fixed_value" + def touch(*_args, **_kwargs): + with open(fp, 'w'): + print("Recreating: {}".format(fp)) + + time.sleep(0.01) + assert os.path.isfile(fp) + with mock.patch.object( + refgenconf.refgenconf, "_download_json", return_value=YacAttMap({ + CFG_CHECKSUM_KEY: "fixed_value", + CFG_ARCHIVE_SIZE_KEY: "1M", + CFG_ASSET_PATH_KEY: fp + })), \ + mock.patch.object(refgenconf.refgenconf, "query_yes_no", **reply_patch), \ + mock.patch(DOWNLOAD_FUNCTION, side_effect=touch), \ + mock.patch.object(refgenconf.refgenconf, "checksum", + return_value=dummy_checksum_value), \ + mock.patch.object(refgenconf.refgenconf, "_untar"), \ + caplog.at_level(logging.DEBUG): + res = rgc.pull_asset(genome, asset, gencfg, force=force, + get_main_url=get_get_url(genome, asset)) + assertion_arguments = (rgc, genome, asset, res, init_time, caplog) + verify = self._assert_overwritten if exp_overwrite else self._assert_preserved + verify(*assertion_arguments) + + +def _parse_single_pull(result): + """ Unpack asset pull result, expecting asset name and value. """ + try: + k, v = result[0] + except (IndexError, ValueError): + print("Single pull result should be a list with one pair; got {}". + format(result)) + raise + return k, v diff --git a/tests/test_select_genome_config.py b/tests/test_select_genome_config.py index f9fb4dae..6667db12 100644 --- a/tests/test_select_genome_config.py +++ b/tests/test_select_genome_config.py @@ -1,60 +1,60 @@ -# """ Tests for selection of genome configuration file """ -# -# import os -# import pytest -# from refgenconf import select_genome_config -# from refgenconf.const import CFG_ENV_VARS -# from ubiquerg import TmpEnv -# from veracitools import ExpectContext -# -# __author__ = "Vince Reuter" -# __email__ = "vreuter@virginia.edu" -# -# -# def _touch(p): -# """ Ensure path existence, whether file or folder. """ -# if os.path.splitext(p)[1]: -# with open(p, 'w'): -# pass -# else: -# os.makedirs(p) -# return p -# -# -# def _check_no_env_vars(): -# """ Verify that none of the relevant env. var.'s are set. """ -# assert not any(os.getenv(v) for v in CFG_ENV_VARS) -# -# -# def test_select_null(): -# """ Test prioritized selection of genome configuration file. """ -# with TmpEnv(overwrite=True, **{ev: "" for ev in CFG_ENV_VARS}): -# _check_no_env_vars() -# assert select_genome_config(None) is None -# -# -# @pytest.mark.parametrize(["setup", "expect"], [ -# (lambda d: d.join("test-conf.yaml").strpath, lambda _: Exception), -# (lambda d: _touch(os.path.join(d.strpath, "test-conf")), lambda _: Exception), -# (lambda d: _touch(d.join("test-conf.yaml").strpath), lambda fp: fp) -# ]) -# def test_select_local_config_file(tmpdir, setup, expect): -# """ Selection of local filepath hinges on its existence as a file """ -# with TmpEnv(overwrite=True, **{ev: "" for ev in CFG_ENV_VARS}): -# _check_no_env_vars() -# path = setup(tmpdir) -# print("Path: {}".format(path)) -# with ExpectContext(expect(path), select_genome_config) as ctx: -# ctx(path) -# -# -# @pytest.mark.parametrize("env_var", CFG_ENV_VARS) -# def test_select_via_env_var_implicit(env_var, tmpdir): -# """ Config file selection can leverage default environmanent variables. """ -# conf_file = tmpdir.join("test-refgenconf-conf.yaml").strpath -# assert not os.path.exists(conf_file) -# with open(conf_file, 'w'): -# pass -# assert os.path.isfile(conf_file) -# with TmpEnv(overwrite=True, **{env_var: conf_file}): -# assert conf_file == select_genome_config(None) +""" Tests for selection of genome configuration file """ + +import os +import pytest +from refgenconf import select_genome_config +from refgenconf.const import CFG_ENV_VARS +from ubiquerg import TmpEnv +from veracitools import ExpectContext + +__author__ = "Vince Reuter" +__email__ = "vreuter@virginia.edu" + + +def _touch(p): + """ Ensure path existence, whether file or folder. """ + if os.path.splitext(p)[1]: + with open(p, 'w'): + pass + else: + os.makedirs(p) + return p + + +def _check_no_env_vars(): + """ Verify that none of the relevant env. var.'s are set. """ + assert not any(os.getenv(v) for v in CFG_ENV_VARS) + + +def test_select_null(): + """ Test prioritized selection of genome configuration file. """ + with TmpEnv(overwrite=True, **{ev: "" for ev in CFG_ENV_VARS}): + _check_no_env_vars() + assert select_genome_config(None) is None + + +@pytest.mark.parametrize(["setup", "expect"], [ + (lambda d: d.join("test-conf.yaml").strpath, lambda _: Exception), + (lambda d: _touch(os.path.join(d.strpath, "test-conf")), lambda _: Exception), + (lambda d: _touch(d.join("test-conf.yaml").strpath), lambda fp: fp) +]) +def test_select_local_config_file(tmpdir, setup, expect): + """ Selection of local filepath hinges on its existence as a file """ + with TmpEnv(overwrite=True, **{ev: "" for ev in CFG_ENV_VARS}): + _check_no_env_vars() + path = setup(tmpdir) + print("Path: {}".format(path)) + with ExpectContext(expect(path), select_genome_config) as ctx: + ctx(path) + + +@pytest.mark.parametrize("env_var", CFG_ENV_VARS) +def test_select_via_env_var_implicit(env_var, tmpdir): + """ Config file selection can leverage default environmanent variables. """ + conf_file = tmpdir.join("test-refgenconf-conf.yaml").strpath + assert not os.path.exists(conf_file) + with open(conf_file, 'w'): + pass + assert os.path.isfile(conf_file) + with TmpEnv(overwrite=True, **{env_var: conf_file}): + assert conf_file == select_genome_config(None) diff --git a/tests/test_update_genomes.py b/tests/test_update_genomes.py index 42de4f69..fae7ca5f 100644 --- a/tests/test_update_genomes.py +++ b/tests/test_update_genomes.py @@ -1,87 +1,94 @@ -# """ Tests for updating a configuration object's genomes section """ -# -# import pytest -# from attmap import PathExAttMap -# from refgenconf import CFG_FOLDER_KEY, CFG_GENOMES_KEY, CFG_SERVER_KEY, \ -# DEFAULT_SERVER, RefGenConf as RGC -# from tests.conftest import get_conf_genomes, CONF_DATA -# -# __author__ = "Vince Reuter" -# __email__ = "vreuter@virginia.edu" -# -# -# @pytest.fixture(scope="function") -# def rgc(tmpdir): -# """ Provide an RGC instance; avoid disk read/write and stay in memory. """ -# return RGC({CFG_GENOMES_KEY: dict(CONF_DATA), -# CFG_FOLDER_KEY: tmpdir.strpath, -# CFG_SERVER_KEY: DEFAULT_SERVER}) -# -# -# @pytest.mark.parametrize("assembly", ["dm3"]) -# @pytest.mark.parametrize("validate", [ -# lambda a, c: a in c[CFG_GENOMES_KEY], -# lambda a, c: isinstance(c[CFG_GENOMES_KEY][a], PathExAttMap)]) -# def test_new_genome(rgc, assembly, validate): -# """ update_genomes can insert new assembly. """ -# assert assembly not in rgc[CFG_GENOMES_KEY] -# rgc.update_assets(assembly) -# assert validate(assembly, rgc) -# -# -# @pytest.mark.parametrize("assembly", get_conf_genomes()) -# @pytest.mark.parametrize("asset", ["brand_new_asset", "align_index"]) -# @pytest.mark.parametrize("validate", [ -# lambda a, g, c: a in c[CFG_GENOMES_KEY][g], -# lambda a, g, c: isinstance(c[CFG_GENOMES_KEY][g][a], PathExAttMap)]) -# def test_new_asset(rgc, assembly, asset, validate): -# """ update_genomes can insert new asset for existing assembly. """ -# assert assembly in rgc[CFG_GENOMES_KEY] -# assert asset not in rgc[CFG_GENOMES_KEY][assembly] -# rgc.update_assets(assembly, asset) -# assert validate(asset, assembly, rgc) -# -# -# @pytest.mark.parametrize("assembly", ["dm3"]) -# @pytest.mark.parametrize("asset", ["brand_new_asset", "align_index"]) -# @pytest.mark.parametrize("validate", [ -# lambda _, g, c: g in c[CFG_GENOMES_KEY], -# lambda a, g, c: a in c[CFG_GENOMES_KEY][g], -# lambda a, g, c: isinstance(c[CFG_GENOMES_KEY][g], PathExAttMap), -# lambda a, g, c: isinstance(c[CFG_GENOMES_KEY][g][a], PathExAttMap) -# ]) -# def test_new_genome_and_asset(rgc, assembly, asset, validate): -# """ update_genomes can insert assembly and asset. """ -# assert assembly not in rgc[CFG_GENOMES_KEY] -# rgc.update_assets(assembly, asset) -# assert validate(asset, assembly, rgc) -# -# -# @pytest.mark.parametrize(["old_data", "new_data", "expected"], [ -# ({"size": "4G"}, {"path": "/home/res/gen/bt2.hg38"}, -# {"size": "4G", "path": "/home/res/gen/bt2.hg38"}), -# ({}, {"size": "4G"}, {"size": "4G"}), -# ({}, {"path": "/home/res/gen/bt2.hg38"}, {"path": "/home/res/gen/bt2.hg38"}), -# ({}, {"size": "4G", "path": "/home/res/gen/bt2.hg38"}, -# {"size": "4G", "path": "/home/res/gen/bt2.hg38"}), -# ({"size": "4G"}, {"size": "2G"}, {"size": "2G"}) -# ]) -# def test_update_asset_data(tmpdir, old_data, new_data, expected): -# """ update_genomes can modify data for existing assembly and asset. """ -# assembly = "hg38" -# asset = "idx_bt2" -# c = RGC({CFG_GENOMES_KEY: {assembly: {asset: old_data}}, -# CFG_FOLDER_KEY: tmpdir.strpath, -# CFG_SERVER_KEY: DEFAULT_SERVER}) -# assert expected != c[CFG_GENOMES_KEY][assembly][asset].to_dict() -# c.update_assets(assembly, asset, new_data) -# assert expected == c[CFG_GENOMES_KEY][assembly][asset].to_dict() -# -# -# @pytest.mark.parametrize("args", [ -# ("hg38", ["a1", "a2"]), (["g1", "g2"], "new_tool_index"), -# ("mm10", "align_index", "not_a_map")]) -# def test_illegal_argtype(rgc, args): -# """ update_genomes accurately restricts argument types. """ -# with pytest.raises(TypeError): -# rgc.update_assets(*args) +""" Tests for updating a configuration object's genomes section """ + +import pytest +from attmap import PathExAttMap +from refgenconf import CFG_FOLDER_KEY, CFG_GENOMES_KEY, CFG_SERVER_KEY, \ + DEFAULT_SERVER, RefGenConf as RGC +from refgenconf.const import CFG_ASSETS_KEY +from tests.conftest import bind_to_assets, get_conf_genomes, CONF_DATA + +__author__ = "Vince Reuter" +__email__ = "vreuter@virginia.edu" + + +def _asset_data_is_pxam(a, g, c): + return isinstance(c[CFG_GENOMES_KEY][g][CFG_ASSETS_KEY][a], PathExAttMap) + + +@pytest.fixture(scope="function") +def rgc(tmpdir): + """ Provide an RGC instance; avoid disk read/write and stay in memory. """ + return RGC({CFG_GENOMES_KEY: dict(CONF_DATA), + CFG_FOLDER_KEY: tmpdir.strpath, + CFG_SERVER_KEY: DEFAULT_SERVER}) + + +@pytest.mark.parametrize("assembly", ["dm3"]) +@pytest.mark.parametrize("validate", [ + lambda g, c: g in c[CFG_GENOMES_KEY], + lambda g, c: isinstance(c[CFG_GENOMES_KEY], PathExAttMap)]) +def test_new_genome(rgc, assembly, validate): + """ update_genomes can insert new assembly. """ + assert assembly not in rgc[CFG_GENOMES_KEY] + rgc.update_assets(assembly) + assert validate(assembly, rgc) + + +@pytest.mark.parametrize("assembly", get_conf_genomes()) +@pytest.mark.parametrize("asset", ["brand_new_asset", "align_index"]) +@pytest.mark.parametrize("validate", [ + lambda a, g, c: a in c[CFG_GENOMES_KEY][g][CFG_ASSETS_KEY], + _asset_data_is_pxam]) +def test_new_asset(rgc, assembly, asset, validate): + """ update_genomes can insert new asset for existing assembly. """ + assert assembly in rgc[CFG_GENOMES_KEY] + assert asset not in rgc[CFG_GENOMES_KEY][assembly][CFG_ASSETS_KEY] + rgc.update_assets(assembly, asset) + assert validate(asset, assembly, rgc) + + +@pytest.mark.parametrize("assembly", ["dm3"]) +@pytest.mark.parametrize("asset", ["brand_new_asset", "align_index"]) +@pytest.mark.parametrize("validate", [ + lambda _, g, c: g in c[CFG_GENOMES_KEY], + lambda a, g, c: a in c[CFG_GENOMES_KEY][g][CFG_ASSETS_KEY], + lambda a, g, c: isinstance(c[CFG_GENOMES_KEY][g], PathExAttMap), + _asset_data_is_pxam +]) +def test_new_genome_and_asset(rgc, assembly, asset, validate): + """ update_genomes can insert assembly and asset. """ + assert assembly not in rgc[CFG_GENOMES_KEY] + rgc.update_assets(assembly, asset) + assert validate(asset, assembly, rgc) + + +@pytest.mark.parametrize(["old_data", "new_data", "expected"], [ + ({"size": "4G"}, {"path": "/home/res/gen/bt2.hg38"}, + {"size": "4G", "path": "/home/res/gen/bt2.hg38"}), + ({}, {"size": "4G"}, {"size": "4G"}), + ({}, {"path": "/home/res/gen/bt2.hg38"}, {"path": "/home/res/gen/bt2.hg38"}), + ({}, {"size": "4G", "path": "/home/res/gen/bt2.hg38"}, + {"size": "4G", "path": "/home/res/gen/bt2.hg38"}), + ({"size": "4G"}, {"size": "2G"}, {"size": "2G"}) +]) +def test_update_asset_data(tmpdir, old_data, new_data, expected): + """ update_genomes can modify data for existing assembly and asset. """ + assembly = "hg38" + asset = "idx_bt2" + c = RGC({CFG_GENOMES_KEY: {assembly: bind_to_assets({asset: old_data})}, + CFG_FOLDER_KEY: tmpdir.strpath, + CFG_SERVER_KEY: DEFAULT_SERVER}) + def get_asset_data(refgencfg, a_name): + return refgencfg[CFG_GENOMES_KEY][assembly][CFG_ASSETS_KEY][a_name].to_dict() + assert expected != get_asset_data(c, asset) + c.update_assets(assembly, asset, new_data) + assert expected == get_asset_data(c, asset) + + +@pytest.mark.parametrize("args", [ + ("hg38", ["a1", "a2"]), (["g1", "g2"], "new_tool_index"), + ("mm10", "align_index", "not_a_map")]) +def test_illegal_argtype(rgc, args): + """ update_genomes accurately restricts argument types. """ + with pytest.raises(TypeError): + rgc.update_assets(*args)