Skip to content

Commit

Permalink
Merge pull request #79 from databio/dev
Browse files Browse the repository at this point in the history
v0.6.1
  • Loading branch information
stolarczyk authored Dec 13, 2019
2 parents 85bccee + 0ec577f commit 644f95f
Show file tree
Hide file tree
Showing 8 changed files with 101 additions and 33 deletions.
10 changes: 10 additions & 0 deletions docs/changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,16 @@

This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html) and [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) format.

## [0.6.1] - 2019-12-13

### Added
- `remove_asset_from_relatives` method for assets' relationship links removal
- `initialize_config_file` method

### Changed
- `remove_assets` method removes the asset relatives links
- in `select_genome_config` function the `filepath` argument is not required anymore; the `$REFGENIE` environment variable can used instead

## [0.6.0] - 2019-12-06

### Added
Expand Down
2 changes: 1 addition & 1 deletion refgenconf/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.6.0"
__version__ = "0.6.1"
11 changes: 8 additions & 3 deletions refgenconf/const.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,8 @@
CFG_ASSET_CHECKSUM_KEY = "asset_digest"
CFG_TAG_DESC_KEY = "tag_description"

CFG_ASSET_RELATIVES_KEYS = [CFG_ASSET_CHILDREN_KEY, CFG_ASSET_PARENTS_KEY]

CFG_TOP_LEVEL_KEYS = [
CFG_FOLDER_KEY, CFG_SERVER_KEY, CFG_SERVERS_KEY, CFG_ARCHIVE_KEY, CFG_GENOMES_KEY, CFG_VERSION_KEY]
CFG_GENOME_KEYS = [
Expand All @@ -80,11 +82,14 @@
CFG_SINGLE_ASSET_SECTION_KEYS = [CFG_ASSET_PATH_KEY, CFG_ASSET_DESC_KEY, CFG_ASSET_SIZE_KEY, CFG_ARCHIVE_SIZE_KEY,
CFG_ARCHIVE_CHECKSUM_KEY, CFG_SEEK_KEYS_KEY]

RGC_REQ_KEYS = [CFG_SERVERS_KEY, CFG_FOLDER_KEY, CFG_GENOMES_KEY, CFG_VERSION_KEY]

CFG_KEY_NAMES = [
"CFG_FOLDER_KEY", "CFG_SERVER_KEY", "CFG_SERVERS_KEY", "CFG_GENOMES_KEY",
"CFG_ASSET_PATH_KEY", "CFG_ASSET_DESC_KEY", "CFG_ARCHIVE_KEY", "CFG_ARCHIVE_SIZE_KEY", "CFG_SEEK_KEYS_KEY",
"CFG_ASSET_SIZE_KEY", "CFG_CHECKSUM_KEY", "CFG_ARCHIVE_CHECKSUM_KEY", "CFG_VERSION_KEY", "CFG_ASSET_PARENTS_KEY",
"CFG_ASSET_CHILDREN_KEY", "CFG_TAG_DESC_KEY", "CFG_ASSET_CHECKSUM_KEY", "CFG_ASSET_TAGS_KEY"]
"CFG_ASSET_CHILDREN_KEY", "CFG_TAG_DESC_KEY", "CFG_ASSET_CHECKSUM_KEY", "CFG_ASSET_TAGS_KEY",
"CFG_ASSET_RELATIVES_KEYS"]


# other consts
Expand All @@ -97,8 +102,8 @@

__all__ = ["DEFAULT_SERVER", "CFG_ASSET_DEFAULT_TAG_KEY", "CFG_KEY_NAMES", "CFG_GENOME_DESC_KEY", "REQ_CFG_VERSION",
"CFG_ASSETS_KEY", "CFG_GENOME_ATTRS_KEYS", "REFGENIE_BY_CFG", "DEFAULT_TAG", "ATTRS_COPY_PULL",
"REQ_TAG_ATTRS", "CUSTOM_BAR_FMT", "API_VERSION", "CONF_STRUCTURE", "OPERATION_IDS", "CUSTOM_PFX"] + \
FILE_DIR_NAMES + CFG_CONST + CFG_KEY_NAMES + API_IDS
"RGC_REQ_KEYS", "REQ_TAG_ATTRS", "CUSTOM_BAR_FMT", "API_VERSION", "CONF_STRUCTURE", "OPERATION_IDS",
"CUSTOM_PFX"] + FILE_DIR_NAMES + CFG_CONST + CFG_KEY_NAMES + API_IDS

CONF_STRUCTURE = """
# example genome configuration structure
Expand Down
4 changes: 2 additions & 2 deletions refgenconf/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
__all__ = ["select_genome_config"]


def select_genome_config(filename, conf_env_vars=None, **kwargs):
def select_genome_config(filename=None, conf_env_vars=CFG_ENV_VARS, **kwargs):
"""
Get path to genome configuration file.
Expand All @@ -17,7 +17,7 @@ def select_genome_config(filename, conf_env_vars=None, **kwargs):
consider; basically, a prioritized search list
:return str: path to genome configuration file
"""
return yacman.select_config(filename, conf_env_vars or CFG_ENV_VARS, **kwargs)
return yacman.select_config(filename, conf_env_vars, **kwargs)


def unbound_env_vars(path):
Expand Down
85 changes: 74 additions & 11 deletions refgenconf/refgenconf.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,13 @@
import shutil

from attmap import PathExAttMap as PXAM
from ubiquerg import checksum, is_url, query_yes_no, parse_registry_path as prp, untar
from ubiquerg import checksum, is_url, query_yes_no, parse_registry_path as prp, untar, is_writable
from tqdm import tqdm

import yacman

from .const import *
from .helpers import unbound_env_vars, asciify_json_dict
from .helpers import unbound_env_vars, asciify_json_dict, select_genome_config
from .exceptions import *


Expand Down Expand Up @@ -70,6 +70,10 @@ def __init__(self, filepath=None, entries=None, writable=False, wait_max=10):
item is missing
:raise ValueError: if entries is given as a string and is not a file
"""

def _missing_key_msg(key, value):
_LOGGER.debug("Config lacks '{}' key. Setting to: {}".format(key, value))

super(RefGenConf, self).__init__(filepath=filepath, entries=entries, writable=writable, wait_max=wait_max)
genomes = self.setdefault(CFG_GENOMES_KEY, PXAM())
if not isinstance(genomes, PXAM):
Expand All @@ -79,10 +83,12 @@ def __init__(self, filepath=None, entries=None, writable=False, wait_max=10):
self[CFG_GENOMES_KEY] = PXAM()
if CFG_FOLDER_KEY not in self:
self[CFG_FOLDER_KEY] = os.path.dirname(entries) if isinstance(entries, str) else os.getcwd()
_missing_key_msg(CFG_FOLDER_KEY, self[CFG_FOLDER_KEY])
try:
version = self[CFG_VERSION_KEY]
except KeyError:
_LOGGER.warning("Config lacks version key: {}".format(CFG_VERSION_KEY))
_missing_key_msg(CFG_VERSION_KEY, REQ_CFG_VERSION)
self[CFG_VERSION_KEY] = REQ_CFG_VERSION
else:
try:
version = float(version)
Expand All @@ -109,14 +115,41 @@ def __init__(self, filepath=None, entries=None, writable=False, wait_max=10):
self[CFG_SERVERS_KEY] = self[CFG_SERVERS_KEY].rstrip("/")
self[CFG_SERVERS_KEY] = [self[CFG_SERVERS_KEY]]
except KeyError:
raise MissingConfigDataError(CFG_SERVER_KEY)
_missing_key_msg(CFG_SERVERS_KEY, str([DEFAULT_SERVER]))
self[CFG_SERVERS_KEY] = [DEFAULT_SERVER]

def __bool__(self):
minkeys = set(self.keys()) == {CFG_SERVERS_KEY, CFG_FOLDER_KEY, CFG_GENOMES_KEY}
minkeys = set(self.keys()) == set(RGC_REQ_KEYS)
return not minkeys or bool(self[CFG_GENOMES_KEY])

__nonzero__ = __bool__

def initialize_config_file(self, filepath=None):
"""
Initialize genome configuration file on disk
:param str filepath: a valid path where the configuration file should be initialized
:return str: the filepath the file was initialized at
:raise OSError: in case the file could not be initialized due to insufficient permissions or pre-existence
:raise TypeError: if no valid filepath cat be determined
"""
def _write_fail_err(reason):
raise OSError("Can't initialize, {}: {} ".format(reason, filepath))

filepath = select_genome_config(filepath, check_exist=False)
if not isinstance(filepath, str):
raise TypeError("Could not determine a valid path to "
"initialize a configuration file: {}".format(str(filepath)))
if os.path.exists(filepath):
_write_fail_err("file exists")
if not is_writable(filepath, check_exist=False):
_write_fail_err("insufficient permissions")
self.make_writable(filepath)
self.write()
self.make_readonly()
_LOGGER.info("Initialized genome configuration file: {}".format(filepath))
return filepath

def assets_dict(self, genome=None, order=None, include_tags=False):
"""
Map each assembly name to a list of available asset names.
Expand Down Expand Up @@ -385,8 +418,8 @@ def tag_asset(self, genome, asset, tag, new_tag):
asset_mapping = self[CFG_GENOMES_KEY][genome][CFG_ASSETS_KEY][asset]
if tag is None:
raise ValueError("You must explicitly specify the tag of the asset "
"you want to reassign. \nCurrently defined "
"tags for '{}/{}' are: {}".format(genome, asset,", ".join(get_asset_tags(asset_mapping))))
"you want to reassign. Currently defined "
"tags for '{}/{}' are: {}".format(genome, asset, ", ".join(get_asset_tags(asset_mapping))))
if new_tag in asset_mapping[CFG_ASSET_TAGS_KEY]:
if not query_yes_no("You already have a '{}' asset tagged as '{}', do you wish to override?".
format(asset, new_tag)):
Expand Down Expand Up @@ -446,9 +479,10 @@ def _update_relatives_tags(self, genome, asset, tag, new_tag, relatives, update_
ori_relative_data = prp(relative)
if ori_relative_data["item"] == asset and ori_relative_data["tag"] == tag:
ori_relative_data["tag"] = new_tag
updated_relatives.append("{}:{}".format(asset, new_tag))
updated_relatives.append("{}/{}:{}".format(genome, asset, new_tag))
else:
updated_relatives.append("{}:{}".format(ori_relative_data["item"], ori_relative_data["tag"]))
updated_relatives.append("{}/{}:{}".format(ori_relative_data["namespace"],
ori_relative_data["item"], ori_relative_data["tag"]))
self.update_relatives_assets(genome, r_data["item"], r_data["tag"], updated_relatives, update_children)
self[CFG_GENOMES_KEY][genome][CFG_ASSETS_KEY][r_data["item"]][CFG_ASSET_TAGS_KEY][r_data["tag"]]\
[relative_key] = updated_relatives
Expand Down Expand Up @@ -601,12 +635,14 @@ def msg_overwrite():
_LOGGER.info("Extracting asset tarball and saving to: {}".format(tag_dir))
tmpdir = tempfile.mkdtemp(dir=genome_dir_path) # TODO: use context manager here when we drop support for py2
untar(filepath, tmpdir)
# here we suspect the unarchived asset to be an asset-named directory with the asset data inside
# here we suspect the unarchived asset to be an asset-named directory
# the asset data inside
# and we transfer it to the tag-named subdirectory
shutil.move(os.path.join(tmpdir, asset), tag_dir)
shutil.rmtree(tmpdir)
if os.path.isfile(filepath):
os.remove(filepath)

with self as rgc:
[rgc.chk_digest_update_child(gat[0], x, "{}/{}:{}".format(*gat), server_url)
for x in archive_data[CFG_ASSET_PARENTS_KEY] if CFG_ASSET_PARENTS_KEY in archive_data]
Expand All @@ -615,6 +651,29 @@ def msg_overwrite():
rgc.set_default_pointer(*gat)
return gat, archive_data, server_url

def remove_asset_from_relatives(self, genome, asset, tag):
"""
Remove any relationship links associated with the selected asset
:param str genome: genome to be removed from its relatives' relatives list
:param str asset: asset to be removed from its relatives' relatives list
:param str tag: tag to be removed from its relatives' relatives list
"""
to_remove = "{}/{}:{}".format(genome, asset, tag)
for rel_type in CFG_ASSET_RELATIVES_KEYS:
tmp = CFG_ASSET_RELATIVES_KEYS[len(CFG_ASSET_RELATIVES_KEYS) - 1 - CFG_ASSET_RELATIVES_KEYS.index(rel_type)]
tag_data = self[CFG_GENOMES_KEY][genome][CFG_ASSETS_KEY][asset][CFG_ASSET_TAGS_KEY][tag]
if rel_type not in tag_data:
continue
for rel in tag_data[rel_type]:
parsed = prp(rel)
_LOGGER.debug("Removing '{}' from '{}' {}".format(to_remove, rel, tmp))
try:
self[CFG_GENOMES_KEY][parsed["namespace"] or genome][CFG_ASSETS_KEY][parsed["item"]]\
[CFG_ASSET_TAGS_KEY][parsed["tag"]][tmp].remove(to_remove)
except (KeyError, ValueError):
pass

def update_relatives_assets(self, genome, asset, tag=None, data=None, children=False):
"""
A convenience method which wraps the update assets and uses it to update the asset relatives of an asset.
Expand Down Expand Up @@ -696,7 +755,7 @@ def update_assets(self, genome, asset=None, data=None):
self[CFG_GENOMES_KEY][genome][CFG_ASSETS_KEY][asset].update(data)
return self

def remove_assets(self, genome, asset, tag=None):
def remove_assets(self, genome, asset, tag=None, relationships=True):
"""
Remove data associated with a specified genome:asset:tag combination.
If no tags are specified, the entire asset is removed from the genome.
Expand All @@ -709,6 +768,8 @@ def remove_assets(self, genome, asset, tag=None):
:param str genome: genome to be removed
:param str asset: asset package to be removed
:param str tag: tag to be removed
:param bool relationships: whether the asset being removed should
be removed from its relatives as well
:raise TypeError: if genome argument type is not a list or str
:return RefGenConf: updated object
"""
Expand All @@ -733,6 +794,8 @@ def _del_if_empty(obj, attr, alt=None):
if _check_insert_data(genome, str, "genome"):
if _check_insert_data(asset, str, "asset"):
if _check_insert_data(tag, str, "tag"):
if relationships:
self.remove_asset_from_relatives(genome, asset, tag)
del self[CFG_GENOMES_KEY][genome][CFG_ASSETS_KEY][asset][CFG_ASSET_TAGS_KEY][tag]
_del_if_empty(self[CFG_GENOMES_KEY][genome][CFG_ASSETS_KEY][asset], CFG_ASSET_TAGS_KEY,
[self[CFG_GENOMES_KEY][genome][CFG_ASSETS_KEY], asset])
Expand Down
2 changes: 1 addition & 1 deletion requirements/requirements-all.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@ pyyaml
requests
tqdm>=4.38.0
ubiquerg>=0.5.0
yacman>=0.6.5
yacman>=0.6.6
future
2 changes: 1 addition & 1 deletion requirements/requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@ pytest>=3.0.7
pytest-remotedata
pyyaml>=5
ubiquerg>=0.3
veracitools
veracitools
18 changes: 4 additions & 14 deletions tests/test_config_constructor.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@
import os
import pytest
from attmap import PathExAttMap
from refgenconf import RefGenConf, MissingConfigDataError, ConfigNotCompliantError
from refgenconf import RefGenConf, ConfigNotCompliantError
from refgenconf.const import CFG_FOLDER_KEY, CFG_GENOMES_KEY, CFG_SERVERS_KEY, \
DEFAULT_SERVER
DEFAULT_SERVER, RGC_REQ_KEYS

__author__ = "Vince Reuter"
__email__ = "[email protected]"
Expand All @@ -15,12 +15,8 @@ class TestRefGenConf:
def test_reads_file(self, cfg_file):
assert isinstance(RefGenConf(cfg_file), RefGenConf)

@pytest.mark.parametrize("present", [[], [(CFG_FOLDER_KEY, lambda d: d.strpath)]])
def test_missing_server_key(self, tmpdir, present):
""" Omission of required config items causes expected exception """
data = {k: f(tmpdir) for k, f in present}
with pytest.raises(MissingConfigDataError):
RefGenConf(entries=data)
def test_creation_of_empty_object_sets_req_attrs(self):
assert all([k in RefGenConf() for k in RGC_REQ_KEYS])

def test_genome_folder_is_pwd_if_no_folder_key_and_raw_entries_passed(self, ro_rgc):
data = PathExAttMap({k: v for k, v in ro_rgc.items() if k != CFG_FOLDER_KEY})
Expand All @@ -45,12 +41,6 @@ def test_genome_folder_is_value_from_config_file_if_key_present(self, tmpdir_fac
assert expected != os.path.dirname(conf_file)
assert expected == rgc[CFG_FOLDER_KEY]

def test_empty_rgc_is_false(self):
assert bool(RefGenConf(entries={CFG_SERVERS_KEY: DEFAULT_SERVER})) is False

def test_nonempty_rgc_is_true(self, rgc):
assert bool(rgc) is True

@pytest.mark.parametrize("genomes", [None, "genomes", 10] + [dt(["mm10", "hg38"]) for dt in [list, set, tuple]])
def test_illegal_genomes_mapping_type_gets_converted_to_empty_mapping(self, genomes, tmpdir):
rgc = RefGenConf(entries={
Expand Down

0 comments on commit 644f95f

Please sign in to comment.