diff --git a/docs/changelog.md b/docs/changelog.md index dd85f62b..ab9b619f 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -2,6 +2,16 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html) and [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) format. +## [0.6.1] - 2019-12-13 + +### Added +- `remove_asset_from_relatives` method for assets' relationship links removal +- `initialize_config_file` method + +### Changed +- `remove_assets` method removes the asset relatives links +- in `select_genome_config` function the `filepath` argument is not required anymore; the `$REFGENIE` environment variable can used instead + ## [0.6.0] - 2019-12-06 ### Added diff --git a/refgenconf/_version.py b/refgenconf/_version.py index 906d362f..43c4ab00 100644 --- a/refgenconf/_version.py +++ b/refgenconf/_version.py @@ -1 +1 @@ -__version__ = "0.6.0" +__version__ = "0.6.1" diff --git a/refgenconf/const.py b/refgenconf/const.py index 396f5739..ce165bf1 100644 --- a/refgenconf/const.py +++ b/refgenconf/const.py @@ -72,6 +72,8 @@ CFG_ASSET_CHECKSUM_KEY = "asset_digest" CFG_TAG_DESC_KEY = "tag_description" +CFG_ASSET_RELATIVES_KEYS = [CFG_ASSET_CHILDREN_KEY, CFG_ASSET_PARENTS_KEY] + CFG_TOP_LEVEL_KEYS = [ CFG_FOLDER_KEY, CFG_SERVER_KEY, CFG_SERVERS_KEY, CFG_ARCHIVE_KEY, CFG_GENOMES_KEY, CFG_VERSION_KEY] CFG_GENOME_KEYS = [ @@ -80,11 +82,14 @@ CFG_SINGLE_ASSET_SECTION_KEYS = [CFG_ASSET_PATH_KEY, CFG_ASSET_DESC_KEY, CFG_ASSET_SIZE_KEY, CFG_ARCHIVE_SIZE_KEY, CFG_ARCHIVE_CHECKSUM_KEY, CFG_SEEK_KEYS_KEY] +RGC_REQ_KEYS = [CFG_SERVERS_KEY, CFG_FOLDER_KEY, CFG_GENOMES_KEY, CFG_VERSION_KEY] + CFG_KEY_NAMES = [ "CFG_FOLDER_KEY", "CFG_SERVER_KEY", "CFG_SERVERS_KEY", "CFG_GENOMES_KEY", "CFG_ASSET_PATH_KEY", "CFG_ASSET_DESC_KEY", "CFG_ARCHIVE_KEY", "CFG_ARCHIVE_SIZE_KEY", "CFG_SEEK_KEYS_KEY", "CFG_ASSET_SIZE_KEY", "CFG_CHECKSUM_KEY", "CFG_ARCHIVE_CHECKSUM_KEY", "CFG_VERSION_KEY", "CFG_ASSET_PARENTS_KEY", - "CFG_ASSET_CHILDREN_KEY", "CFG_TAG_DESC_KEY", "CFG_ASSET_CHECKSUM_KEY", "CFG_ASSET_TAGS_KEY"] + "CFG_ASSET_CHILDREN_KEY", "CFG_TAG_DESC_KEY", "CFG_ASSET_CHECKSUM_KEY", "CFG_ASSET_TAGS_KEY", + "CFG_ASSET_RELATIVES_KEYS"] # other consts @@ -97,8 +102,8 @@ __all__ = ["DEFAULT_SERVER", "CFG_ASSET_DEFAULT_TAG_KEY", "CFG_KEY_NAMES", "CFG_GENOME_DESC_KEY", "REQ_CFG_VERSION", "CFG_ASSETS_KEY", "CFG_GENOME_ATTRS_KEYS", "REFGENIE_BY_CFG", "DEFAULT_TAG", "ATTRS_COPY_PULL", - "REQ_TAG_ATTRS", "CUSTOM_BAR_FMT", "API_VERSION", "CONF_STRUCTURE", "OPERATION_IDS", "CUSTOM_PFX"] + \ - FILE_DIR_NAMES + CFG_CONST + CFG_KEY_NAMES + API_IDS + "RGC_REQ_KEYS", "REQ_TAG_ATTRS", "CUSTOM_BAR_FMT", "API_VERSION", "CONF_STRUCTURE", "OPERATION_IDS", + "CUSTOM_PFX"] + FILE_DIR_NAMES + CFG_CONST + CFG_KEY_NAMES + API_IDS CONF_STRUCTURE = """ # example genome configuration structure diff --git a/refgenconf/helpers.py b/refgenconf/helpers.py index 5af81ec6..a7251ba7 100644 --- a/refgenconf/helpers.py +++ b/refgenconf/helpers.py @@ -8,7 +8,7 @@ __all__ = ["select_genome_config"] -def select_genome_config(filename, conf_env_vars=None, **kwargs): +def select_genome_config(filename=None, conf_env_vars=CFG_ENV_VARS, **kwargs): """ Get path to genome configuration file. @@ -17,7 +17,7 @@ def select_genome_config(filename, conf_env_vars=None, **kwargs): consider; basically, a prioritized search list :return str: path to genome configuration file """ - return yacman.select_config(filename, conf_env_vars or CFG_ENV_VARS, **kwargs) + return yacman.select_config(filename, conf_env_vars, **kwargs) def unbound_env_vars(path): diff --git a/refgenconf/refgenconf.py b/refgenconf/refgenconf.py index 5c55724f..5647d9f9 100755 --- a/refgenconf/refgenconf.py +++ b/refgenconf/refgenconf.py @@ -25,13 +25,13 @@ import shutil from attmap import PathExAttMap as PXAM -from ubiquerg import checksum, is_url, query_yes_no, parse_registry_path as prp, untar +from ubiquerg import checksum, is_url, query_yes_no, parse_registry_path as prp, untar, is_writable from tqdm import tqdm import yacman from .const import * -from .helpers import unbound_env_vars, asciify_json_dict +from .helpers import unbound_env_vars, asciify_json_dict, select_genome_config from .exceptions import * @@ -70,6 +70,10 @@ def __init__(self, filepath=None, entries=None, writable=False, wait_max=10): item is missing :raise ValueError: if entries is given as a string and is not a file """ + + def _missing_key_msg(key, value): + _LOGGER.debug("Config lacks '{}' key. Setting to: {}".format(key, value)) + super(RefGenConf, self).__init__(filepath=filepath, entries=entries, writable=writable, wait_max=wait_max) genomes = self.setdefault(CFG_GENOMES_KEY, PXAM()) if not isinstance(genomes, PXAM): @@ -79,10 +83,12 @@ def __init__(self, filepath=None, entries=None, writable=False, wait_max=10): self[CFG_GENOMES_KEY] = PXAM() if CFG_FOLDER_KEY not in self: self[CFG_FOLDER_KEY] = os.path.dirname(entries) if isinstance(entries, str) else os.getcwd() + _missing_key_msg(CFG_FOLDER_KEY, self[CFG_FOLDER_KEY]) try: version = self[CFG_VERSION_KEY] except KeyError: - _LOGGER.warning("Config lacks version key: {}".format(CFG_VERSION_KEY)) + _missing_key_msg(CFG_VERSION_KEY, REQ_CFG_VERSION) + self[CFG_VERSION_KEY] = REQ_CFG_VERSION else: try: version = float(version) @@ -109,14 +115,41 @@ def __init__(self, filepath=None, entries=None, writable=False, wait_max=10): self[CFG_SERVERS_KEY] = self[CFG_SERVERS_KEY].rstrip("/") self[CFG_SERVERS_KEY] = [self[CFG_SERVERS_KEY]] except KeyError: - raise MissingConfigDataError(CFG_SERVER_KEY) + _missing_key_msg(CFG_SERVERS_KEY, str([DEFAULT_SERVER])) + self[CFG_SERVERS_KEY] = [DEFAULT_SERVER] def __bool__(self): - minkeys = set(self.keys()) == {CFG_SERVERS_KEY, CFG_FOLDER_KEY, CFG_GENOMES_KEY} + minkeys = set(self.keys()) == set(RGC_REQ_KEYS) return not minkeys or bool(self[CFG_GENOMES_KEY]) __nonzero__ = __bool__ + def initialize_config_file(self, filepath=None): + """ + Initialize genome configuration file on disk + + :param str filepath: a valid path where the configuration file should be initialized + :return str: the filepath the file was initialized at + :raise OSError: in case the file could not be initialized due to insufficient permissions or pre-existence + :raise TypeError: if no valid filepath cat be determined + """ + def _write_fail_err(reason): + raise OSError("Can't initialize, {}: {} ".format(reason, filepath)) + + filepath = select_genome_config(filepath, check_exist=False) + if not isinstance(filepath, str): + raise TypeError("Could not determine a valid path to " + "initialize a configuration file: {}".format(str(filepath))) + if os.path.exists(filepath): + _write_fail_err("file exists") + if not is_writable(filepath, check_exist=False): + _write_fail_err("insufficient permissions") + self.make_writable(filepath) + self.write() + self.make_readonly() + _LOGGER.info("Initialized genome configuration file: {}".format(filepath)) + return filepath + def assets_dict(self, genome=None, order=None, include_tags=False): """ Map each assembly name to a list of available asset names. @@ -385,8 +418,8 @@ def tag_asset(self, genome, asset, tag, new_tag): asset_mapping = self[CFG_GENOMES_KEY][genome][CFG_ASSETS_KEY][asset] if tag is None: raise ValueError("You must explicitly specify the tag of the asset " - "you want to reassign. \nCurrently defined " - "tags for '{}/{}' are: {}".format(genome, asset,", ".join(get_asset_tags(asset_mapping)))) + "you want to reassign. Currently defined " + "tags for '{}/{}' are: {}".format(genome, asset, ", ".join(get_asset_tags(asset_mapping)))) if new_tag in asset_mapping[CFG_ASSET_TAGS_KEY]: if not query_yes_no("You already have a '{}' asset tagged as '{}', do you wish to override?". format(asset, new_tag)): @@ -446,9 +479,10 @@ def _update_relatives_tags(self, genome, asset, tag, new_tag, relatives, update_ ori_relative_data = prp(relative) if ori_relative_data["item"] == asset and ori_relative_data["tag"] == tag: ori_relative_data["tag"] = new_tag - updated_relatives.append("{}:{}".format(asset, new_tag)) + updated_relatives.append("{}/{}:{}".format(genome, asset, new_tag)) else: - updated_relatives.append("{}:{}".format(ori_relative_data["item"], ori_relative_data["tag"])) + updated_relatives.append("{}/{}:{}".format(ori_relative_data["namespace"], + ori_relative_data["item"], ori_relative_data["tag"])) self.update_relatives_assets(genome, r_data["item"], r_data["tag"], updated_relatives, update_children) self[CFG_GENOMES_KEY][genome][CFG_ASSETS_KEY][r_data["item"]][CFG_ASSET_TAGS_KEY][r_data["tag"]]\ [relative_key] = updated_relatives @@ -601,12 +635,14 @@ def msg_overwrite(): _LOGGER.info("Extracting asset tarball and saving to: {}".format(tag_dir)) tmpdir = tempfile.mkdtemp(dir=genome_dir_path) # TODO: use context manager here when we drop support for py2 untar(filepath, tmpdir) - # here we suspect the unarchived asset to be an asset-named directory with the asset data inside + # here we suspect the unarchived asset to be an asset-named directory + # the asset data inside # and we transfer it to the tag-named subdirectory shutil.move(os.path.join(tmpdir, asset), tag_dir) shutil.rmtree(tmpdir) if os.path.isfile(filepath): os.remove(filepath) + with self as rgc: [rgc.chk_digest_update_child(gat[0], x, "{}/{}:{}".format(*gat), server_url) for x in archive_data[CFG_ASSET_PARENTS_KEY] if CFG_ASSET_PARENTS_KEY in archive_data] @@ -615,6 +651,29 @@ def msg_overwrite(): rgc.set_default_pointer(*gat) return gat, archive_data, server_url + def remove_asset_from_relatives(self, genome, asset, tag): + """ + Remove any relationship links associated with the selected asset + + :param str genome: genome to be removed from its relatives' relatives list + :param str asset: asset to be removed from its relatives' relatives list + :param str tag: tag to be removed from its relatives' relatives list + """ + to_remove = "{}/{}:{}".format(genome, asset, tag) + for rel_type in CFG_ASSET_RELATIVES_KEYS: + tmp = CFG_ASSET_RELATIVES_KEYS[len(CFG_ASSET_RELATIVES_KEYS) - 1 - CFG_ASSET_RELATIVES_KEYS.index(rel_type)] + tag_data = self[CFG_GENOMES_KEY][genome][CFG_ASSETS_KEY][asset][CFG_ASSET_TAGS_KEY][tag] + if rel_type not in tag_data: + continue + for rel in tag_data[rel_type]: + parsed = prp(rel) + _LOGGER.debug("Removing '{}' from '{}' {}".format(to_remove, rel, tmp)) + try: + self[CFG_GENOMES_KEY][parsed["namespace"] or genome][CFG_ASSETS_KEY][parsed["item"]]\ + [CFG_ASSET_TAGS_KEY][parsed["tag"]][tmp].remove(to_remove) + except (KeyError, ValueError): + pass + def update_relatives_assets(self, genome, asset, tag=None, data=None, children=False): """ A convenience method which wraps the update assets and uses it to update the asset relatives of an asset. @@ -696,7 +755,7 @@ def update_assets(self, genome, asset=None, data=None): self[CFG_GENOMES_KEY][genome][CFG_ASSETS_KEY][asset].update(data) return self - def remove_assets(self, genome, asset, tag=None): + def remove_assets(self, genome, asset, tag=None, relationships=True): """ Remove data associated with a specified genome:asset:tag combination. If no tags are specified, the entire asset is removed from the genome. @@ -709,6 +768,8 @@ def remove_assets(self, genome, asset, tag=None): :param str genome: genome to be removed :param str asset: asset package to be removed :param str tag: tag to be removed + :param bool relationships: whether the asset being removed should + be removed from its relatives as well :raise TypeError: if genome argument type is not a list or str :return RefGenConf: updated object """ @@ -733,6 +794,8 @@ def _del_if_empty(obj, attr, alt=None): if _check_insert_data(genome, str, "genome"): if _check_insert_data(asset, str, "asset"): if _check_insert_data(tag, str, "tag"): + if relationships: + self.remove_asset_from_relatives(genome, asset, tag) del self[CFG_GENOMES_KEY][genome][CFG_ASSETS_KEY][asset][CFG_ASSET_TAGS_KEY][tag] _del_if_empty(self[CFG_GENOMES_KEY][genome][CFG_ASSETS_KEY][asset], CFG_ASSET_TAGS_KEY, [self[CFG_GENOMES_KEY][genome][CFG_ASSETS_KEY], asset]) diff --git a/requirements/requirements-all.txt b/requirements/requirements-all.txt index a4301996..8d17ea5f 100644 --- a/requirements/requirements-all.txt +++ b/requirements/requirements-all.txt @@ -3,5 +3,5 @@ pyyaml requests tqdm>=4.38.0 ubiquerg>=0.5.0 -yacman>=0.6.5 +yacman>=0.6.6 future \ No newline at end of file diff --git a/requirements/requirements-dev.txt b/requirements/requirements-dev.txt index 38625f3e..fada5340 100644 --- a/requirements/requirements-dev.txt +++ b/requirements/requirements-dev.txt @@ -2,4 +2,4 @@ pytest>=3.0.7 pytest-remotedata pyyaml>=5 ubiquerg>=0.3 -veracitools +veracitools \ No newline at end of file diff --git a/tests/test_config_constructor.py b/tests/test_config_constructor.py index a73cf3c9..f321e60f 100644 --- a/tests/test_config_constructor.py +++ b/tests/test_config_constructor.py @@ -3,9 +3,9 @@ import os import pytest from attmap import PathExAttMap -from refgenconf import RefGenConf, MissingConfigDataError, ConfigNotCompliantError +from refgenconf import RefGenConf, ConfigNotCompliantError from refgenconf.const import CFG_FOLDER_KEY, CFG_GENOMES_KEY, CFG_SERVERS_KEY, \ - DEFAULT_SERVER + DEFAULT_SERVER, RGC_REQ_KEYS __author__ = "Vince Reuter" __email__ = "vreuter@virginia.edu" @@ -15,12 +15,8 @@ class TestRefGenConf: def test_reads_file(self, cfg_file): assert isinstance(RefGenConf(cfg_file), RefGenConf) - @pytest.mark.parametrize("present", [[], [(CFG_FOLDER_KEY, lambda d: d.strpath)]]) - def test_missing_server_key(self, tmpdir, present): - """ Omission of required config items causes expected exception """ - data = {k: f(tmpdir) for k, f in present} - with pytest.raises(MissingConfigDataError): - RefGenConf(entries=data) + def test_creation_of_empty_object_sets_req_attrs(self): + assert all([k in RefGenConf() for k in RGC_REQ_KEYS]) def test_genome_folder_is_pwd_if_no_folder_key_and_raw_entries_passed(self, ro_rgc): data = PathExAttMap({k: v for k, v in ro_rgc.items() if k != CFG_FOLDER_KEY}) @@ -45,12 +41,6 @@ def test_genome_folder_is_value_from_config_file_if_key_present(self, tmpdir_fac assert expected != os.path.dirname(conf_file) assert expected == rgc[CFG_FOLDER_KEY] - def test_empty_rgc_is_false(self): - assert bool(RefGenConf(entries={CFG_SERVERS_KEY: DEFAULT_SERVER})) is False - - def test_nonempty_rgc_is_true(self, rgc): - assert bool(rgc) is True - @pytest.mark.parametrize("genomes", [None, "genomes", 10] + [dt(["mm10", "hg38"]) for dt in [list, set, tuple]]) def test_illegal_genomes_mapping_type_gets_converted_to_empty_mapping(self, genomes, tmpdir): rgc = RefGenConf(entries={