Skip to content

Commit

Permalink
Merge pull request #49 from databio/dev
Browse files Browse the repository at this point in the history
release 0.4.0
  • Loading branch information
nsheff authored Aug 2, 2019
2 parents ad4cc28 + 6ef9f54 commit a22385e
Show file tree
Hide file tree
Showing 18 changed files with 1,147 additions and 1,062 deletions.
6 changes: 2 additions & 4 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,8 @@ os:
install:
- pip install .
- pip install -r requirements/requirements-dev.txt
# - pip install -r requirements/requirements-test.txt
#script: pytest --remote-data --cov=refgenconf
script:
- echo "skipping tests"
- pip install -r requirements/requirements-test.txt
script: pytest --remote-data --cov=refgenconf
branches:
only:
- dev
Expand Down
10 changes: 10 additions & 0 deletions docs/changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,16 @@

This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html) and [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) format.

## [0.4.0] - unreleased

### Added
- `remove_assets` method
- local and remote listing restriction by genome. These methods accept an optional `genome` argument:
- `list_local`
- `list_remote`
- `assets_dict`
- `assets_str`

## [0.3.0] - 2019-07-11
### Changed
- Favor asset path relative to genome config rather than local folder in case both exist.
Expand Down
2 changes: 1 addition & 1 deletion refgenconf/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.3.0"
__version__ = "0.4.0"
118 changes: 86 additions & 32 deletions refgenconf/refgenconf.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
install_aliases()
from inspect import getargspec as finspect
from urllib2 import HTTPError
from urllib import ContentTooShortError
from urllib.error import ContentTooShortError
ConnectionRefusedError = Exception

import urllib.request
Expand Down Expand Up @@ -75,11 +75,22 @@ def __init__(self, entries=None):
self[CFG_GENOMES_KEY] = PXAM()
if CFG_FOLDER_KEY not in self:
self[CFG_FOLDER_KEY] = os.path.dirname(entries) if isinstance(entries, str) else os.getcwd()
if CFG_VERSION_KEY in self and float(self[CFG_VERSION_KEY]) < REQ_CFG_VERSION:
msg = "This genome config (v{}) is not compliant with v{} standards. To use it, please downgrade " \
"refgenie: 'pip install refgenie==0.4.4'.".format(self[CFG_VERSION_KEY], str(REQ_CFG_VERSION))
raise ConfigNotCompliantError(msg)
_LOGGER.debug("Config version is correct: {}".format(self[CFG_VERSION_KEY]))
try:
version = self[CFG_VERSION_KEY]
except KeyError:
_LOGGER.warning("Config lacks version key: {}".format(CFG_VERSION_KEY))
else:
try:
version = float(version)
except ValueError:
_LOGGER.warning("Cannot parse as numeric: {}".format(version))
else:
if version < REQ_CFG_VERSION:
msg = "This genome config (v{}) is not compliant with v{} standards. To use it, please downgrade " \
"refgenie: 'pip install refgenie==0.4.4'.".format(self[CFG_VERSION_KEY], str(REQ_CFG_VERSION))
raise ConfigNotCompliantError(msg)
else:
_LOGGER.debug("Config version is compliant: {}".format(version))
try:
self[CFG_SERVER_KEY] = self[CFG_SERVER_KEY].rstrip("/")
except KeyError:
Expand All @@ -91,18 +102,20 @@ def __bool__(self):

__nonzero__ = __bool__

def assets_dict(self, order=None):
def assets_dict(self, genome=None, order=None):
"""
Map each assembly name to a list of available asset names.
:param order: function(str) -> object how to key genome IDs for sort
:param function(str) -> object order: how to key genome IDs for sort
:param list[str] | str genome: genomes that the assets should be found for
:return Mapping[str, Iterable[str]]: mapping from assembly name to
collection of available asset names.
"""
refgens = sorted(self[CFG_GENOMES_KEY].keys(), key=order)
return OrderedDict([(g, sorted(list(self[CFG_GENOMES_KEY][g][CFG_ASSETS_KEY].keys()), key=order)) for g in refgens])
refgens = _select_genomes(sorted(self[CFG_GENOMES_KEY].keys(), key=order), genome)
return OrderedDict([(g, sorted(list(self[CFG_GENOMES_KEY][g][CFG_ASSETS_KEY].keys()), key=order))
for g in refgens])

def assets_str(self, offset_text=" ", asset_sep=", ", genome_assets_delim=": ", order=None):
def assets_str(self, offset_text=" ", asset_sep=", ", genome_assets_delim=": ", genome=None, order=None):
"""
Create a block of text representing genome-to-asset mapping.
Expand All @@ -112,20 +125,21 @@ def assets_str(self, offset_text=" ", asset_sep=", ", genome_assets_delim=": ",
within each genome line
:param str genome_assets_delim: the delimiter to place between
reference genome assembly name and its list of asset names
:param list[str] | str genome: genomes that the assets should be found for
:param order: function(str) -> object how to key genome IDs and asset
names for sort
:return str: text representing genome-to-asset mapping
"""
make_line = partial(_make_genome_assets_line, offset_text=offset_text,
genome_assets_delim=genome_assets_delim, asset_sep=asset_sep, order=order)
refgens = sorted(self[CFG_GENOMES_KEY].keys(), key=order)
refgens = _select_genomes(sorted(self[CFG_GENOMES_KEY].keys(), key=order), genome)
make_line = partial(_make_genome_assets_line, offset_text=offset_text, genome_assets_delim=genome_assets_delim,
asset_sep=asset_sep, order=order)
return "\n".join([make_line(g, self[CFG_GENOMES_KEY][g][CFG_ASSETS_KEY]) for g in refgens])

def filepath(self, genome, asset, ext=".tar"):
"""
Determine path to a particular asset for a particular genome.
:param str genome: reference genome iD
:param str genome: reference genome ID
:param str asset: asset name
:param str ext: file extension
:return str: path to asset for given genome and asset kind/name
Expand Down Expand Up @@ -213,8 +227,7 @@ def list_assets_by_genome(self, genome=None, order=None):
one is provided, else the full mapping between assembly ID and
collection available asset type names
"""
return self.assets_dict(order) if genome is None \
else sorted(list(self[CFG_GENOMES_KEY][genome][CFG_ASSETS_KEY].keys()), key=order)
return self.assets_dict(genome, order)[genome] if genome is not None else self.assets_dict(order)

def list_genomes_by_asset(self, asset=None, order=None):
"""
Expand All @@ -230,32 +243,36 @@ def list_genomes_by_asset(self, asset=None, order=None):
will be returned.
"""
return self._invert_genomes(order) if not asset else \
sorted([g for g, am in self[CFG_GENOMES_KEY].items() if asset in am], key=order)
sorted([g for g, data in self[CFG_GENOMES_KEY].items()
if asset in data.get(CFG_ASSETS_KEY)], key=order)

def list_local(self, order=None):
def list_local(self, genome=None, order=None):
"""
List locally available reference genome IDs and assets by ID.
:param list[str] | str genome: genomes that the assets should be found for
:param order: function(str) -> object how to key genome IDs and asset
names for sort
:return str, str: text reps of locally available genomes and assets
"""
return self.genomes_str(order=order), self.assets_str(order=order)
genomes_str = self.genomes_str(order=order) if genome is None \
else ", ".join(_select_genomes(sorted(self[CFG_GENOMES_KEY].keys(), key=order), genome))
return genomes_str, self.assets_str(genome=genome, order=order)

def list_remote(self, get_url=lambda rgc: "{}/assets".format(rgc.genome_server),
order=None):
def list_remote(self, get_url=lambda rgc: "{}/assets".format(rgc.genome_server), genome=None, order=None):
"""
List genomes and assets available remotely.
:param function(refgenconf.RefGenConf) -> str get_url: how to determine
URL request, given RefGenConf instance
:param list[str] | str genome: genomes that the assets should be found for
:param order: function(str) -> object how to key genome IDs and asset
names for sort
:return str, str: text reps of remotely available genomes and assets
"""
url = get_url(self)
_LOGGER.info("Querying available assets from server: {}".format(url))
genomes, assets = _list_remote(url, order)
genomes, assets = _list_remote(url, genome, order)
return genomes, assets

def pull_asset(self, genome, assets, genome_config, unpack=True, force=None,
Expand Down Expand Up @@ -318,6 +335,7 @@ def raise_unpack_error():
def preserve():
_LOGGER.debug("Preserving existing: {}".format(filepath))
return asset, filepath

def msg_overwrite():
_LOGGER.debug("Overwriting: {}".format(filepath))
if force is False:
Expand Down Expand Up @@ -404,6 +422,28 @@ def update_assets(self, genome, asset=None, data=None):
self[CFG_GENOMES_KEY][genome][CFG_ASSETS_KEY][asset].update(data)
return self

def remove_assets(self, genome, assets):
"""
Remove assets. If no more assets are defined for the selected genome after asset removal,
the genome key will be removed as well
:param str genome: genome to be removed
:param str | list[str] assets: assets to be removed
:raise TypeError: if genome argument type is not a list or str
:return RefGenConf: updated object
"""
assets = [assets] if isinstance(assets, str) else assets
if not isinstance(assets, list):
raise TypeError("assets arg has to be a str or list[str]")
for asset in assets:
if _check_insert_data(genome, str, "genome"):
self[CFG_GENOMES_KEY].setdefault(genome, PXAM({CFG_ASSETS_KEY: PXAM()}))
if _check_insert_data(asset, str, "asset"):
del self[CFG_GENOMES_KEY][genome][CFG_ASSETS_KEY][asset]
if len(self[CFG_GENOMES_KEY][genome][CFG_ASSETS_KEY]) == 0:
del self[CFG_GENOMES_KEY][genome]
return self

def update_genomes(self, genome, data=None):
"""
Updates the genomes in RefGenConf object at any level.
Expand Down Expand Up @@ -534,11 +574,10 @@ def _is_large_archive(size):
:return bool: the decision
"""
_LOGGER.debug("Checking archive size: '{}'".format(size))
return size.endswith("TB") or (
size.endswith("GB") and float("".join(c for c in size if c in '0123456789.')) > 5)
return size.endswith("TB") or (size.endswith("GB") and float("".join(c for c in size if c in '0123456789.')) > 5)


def _list_remote(url, order=None):
def _list_remote(url, genome, order=None):
"""
List genomes and assets available remotely.
Expand All @@ -548,13 +587,13 @@ def _list_remote(url, order=None):
:return str, str: text reps of remotely available genomes and assets
"""
genomes_data = _read_remote_data(url)
refgens = sorted(genomes_data.keys(), key=order)
return ", ".join(refgens), "\n".join([_make_genome_assets_line(g, genomes_data[g], order=order) for g in refgens])
refgens = _select_genomes(sorted(genomes_data.keys(), key=order), genome)
asset_texts = [_make_genome_assets_line(g, genomes_data[g], order=order) for g in refgens]
return ", ".join(refgens), "\n".join(asset_texts)


def _make_genome_assets_line(
gen, assets, offset_text=" ", genome_assets_delim=": ", asset_sep=", ",
order=None):
gen, assets, offset_text=" ", genome_assets_delim=": ", asset_sep=", ", order=None):
"""
Build a line of text for display of assets by genome
Expand All @@ -567,8 +606,7 @@ def _make_genome_assets_line(
:param order: function(str) -> object how to key asset names for sort
:return str: text representation of a single assembly's name and assets
"""
return offset_text + "{}{}{}".format(
gen, genome_assets_delim, asset_sep.join(sorted(list(assets), key=order)))
return offset_text + "{}{}{}".format(gen, genome_assets_delim, asset_sep.join(sorted(list(assets), key=order)))


def _read_remote_data(url):
Expand Down Expand Up @@ -604,3 +642,19 @@ def _check_insert_data(obj, datatype, name):
raise TypeError("{} must be {}; got {}".format(
name, datatype.__name__, type(obj).__name__))
return True


def _select_genomes(genomes, genome=None):
"""
Safely select a subset of genomes
:param list[str] | str genome: genomes that the assets should be found for
:raise TypeError: if genome argument type is not a list or str
:return list: selected subset of genomes
"""
if genome:
if isinstance(genome, str):
genome = [genome]
elif not isinstance(genome, list) or not all(isinstance(i, str) for i in genome):
raise TypeError("genome has to be a list[str] or a str, got '{}'".format(genome.__class__.__name__))
return genomes if (genome is None or not all(x in genomes for x in genome)) else genome
6 changes: 3 additions & 3 deletions requirements/requirements-dev.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#pytest>=3.0.7
#pytest-remotedata
pytest>=3.0.7
pytest-remotedata
pyyaml>=5
ubiquerg>=0.3
#veracitools
veracitools
1 change: 1 addition & 0 deletions requirements/requirements-test.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
coveralls>=1.1
pytest-cov==2.6.1
pytest-remotedata
Loading

0 comments on commit a22385e

Please sign in to comment.