From 2cc85b1f35fff5a1c0a65f28153f755eecc3964b Mon Sep 17 00:00:00 2001 From: nsheff Date: Sat, 27 Jun 2020 16:05:52 -0400 Subject: [PATCH 01/13] notify on preserve existing. See refgenie/refgenie#184 --- refgenconf/refgenconf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/refgenconf/refgenconf.py b/refgenconf/refgenconf.py index 96700bcb..13d42285 100755 --- a/refgenconf/refgenconf.py +++ b/refgenconf/refgenconf.py @@ -655,7 +655,7 @@ def _raise_unpack_error(): # check if the genome/asset:tag exists and get request user decision if os.path.exists(tag_dir): def preserve(): - _LOGGER.debug("Preserving existing: {}".format(tag_dir)) + _LOGGER.info("Preserving existing: {}".format(tag_dir)) return _null_return() def msg_overwrite(): From 96094bd1d1697b09ce31e7fa97f4c2fef4a778a6 Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Sun, 28 Jun 2020 18:52:02 -0400 Subject: [PATCH 02/13] update RefGenConf signature --- refgenconf/refgenconf.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/refgenconf/refgenconf.py b/refgenconf/refgenconf.py index 13d42285..88ad9c1a 100755 --- a/refgenconf/refgenconf.py +++ b/refgenconf/refgenconf.py @@ -49,7 +49,8 @@ def handle(sig, frame): class RefGenConf(yacman.YacAttMap): """ A sort of oracle of available reference genome assembly assets """ - def __init__(self, filepath=None, entries=None, writable=False, wait_max=60): + def __init__(self, filepath=None, entries=None, writable=False, wait_max=60, + skip_read_lock=False): """ Create the config instance by with a filepath or key-value pairs. @@ -57,7 +58,10 @@ def __init__(self, filepath=None, entries=None, writable=False, wait_max=60): :param Iterable[(str, object)] | Mapping[str, object] entries: config filepath or collection of key-value pairs :param bool writable: whether to create the object with write capabilities - :param int wait_max: how long to wait for creating an object when the file that data will be read from is locked + :param int wait_max: how long to wait for creating an object when the + file that data will be read from is locked + :param bool skip_read_lock: whether the file should not be locked for + reading when object is created in read only mode :raise refgenconf.MissingConfigDataError: if a required configuration item is missing :raise ValueError: if entries is given as a string and is not a file @@ -66,7 +70,9 @@ def __init__(self, filepath=None, entries=None, writable=False, wait_max=60): def _missing_key_msg(key, value): _LOGGER.debug("Config lacks '{}' key. Setting to: {}".format(key, value)) - super(RefGenConf, self).__init__(filepath=filepath, entries=entries, writable=writable, wait_max=wait_max) + super(RefGenConf, self).__init__(filepath=filepath, entries=entries, + writable=writable, wait_max=wait_max, + skip_read_lock=skip_read_lock) genomes = self.setdefault(CFG_GENOMES_KEY, PXAM()) if not isinstance(genomes, PXAM): if genomes: From 54225f7155cb7daff5c200aed0359390dfeec26a Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Sun, 28 Jun 2020 18:54:32 -0400 Subject: [PATCH 03/13] update yacman requirement --- .travis.yml | 3 +++ refgenconf/refgenconf.py | 2 ++ requirements/requirements-all.txt | 2 +- 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 65e609c6..aebc0db8 100644 --- a/.travis.yml +++ b/.travis.yml @@ -6,6 +6,9 @@ python: - "3.8" os: - linux +before_install: + - pip install git+https://github.com/pepkit/ubiguerg.git@dev + - pip install git+https://github.com/databio/yacman.git@dev install: - pip install . - pip install -r requirements/requirements-dev.txt diff --git a/refgenconf/refgenconf.py b/refgenconf/refgenconf.py index 88ad9c1a..087a7bd3 100755 --- a/refgenconf/refgenconf.py +++ b/refgenconf/refgenconf.py @@ -667,6 +667,8 @@ def preserve(): def msg_overwrite(): _LOGGER.debug("Overwriting: {}".format(tag_dir)) shutil.rmtree(tag_dir) + with self as rgc: + rgc.cfg_remove_assets(*gat) if force is False: return preserve() elif force is None: diff --git a/requirements/requirements-all.txt b/requirements/requirements-all.txt index 99c62f2c..8592b1b8 100644 --- a/requirements/requirements-all.txt +++ b/requirements/requirements-all.txt @@ -3,5 +3,5 @@ pyyaml requests tqdm>=4.38.0 ubiquerg>=0.6.0 -yacman>=0.6.8 +yacman>=0.6.9-dev future \ No newline at end of file From ae53cc11c49f9f42bc787f51e83b3f8891ffceed Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Sun, 28 Jun 2020 19:32:58 -0400 Subject: [PATCH 04/13] typo --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index aebc0db8..4333c36b 100644 --- a/.travis.yml +++ b/.travis.yml @@ -7,7 +7,7 @@ python: os: - linux before_install: - - pip install git+https://github.com/pepkit/ubiguerg.git@dev + - pip install git+https://github.com/pepkit/ubiquerg.git@dev - pip install git+https://github.com/databio/yacman.git@dev install: - pip install . From 788fd0daefd120b04f0a1fcadb6254a1369e5072 Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Mon, 29 Jun 2020 08:34:46 -0400 Subject: [PATCH 05/13] switch to production server url for testing --- tests/conftest.py | 2 +- tests/data/genomes.yaml | 26 +++++++++++++------------- tests/data/genomes_v2.yaml | 2 +- tests/test_list_remote.py | 2 +- 4 files changed, 16 insertions(+), 16 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index bd4ab91e..87563539 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -127,7 +127,7 @@ def made_genome_config_file(temp_genome_config_file): """ Make the test session's genome config file. """ genome_folder = os.path.dirname(temp_genome_config_file) extra_kv_lines = ["{}: {}".format(CFG_FOLDER_KEY, genome_folder), - "{}: {}".format(CFG_SERVERS_KEY, "http://staging.refgenomes.databio.org/"), + "{}: {}".format(CFG_SERVERS_KEY, "https://refgenomes.databio.org/"), "{}: {}".format(CFG_VERSION_KEY, package_version), "{}:".format(CFG_GENOMES_KEY)] gen_data_lines = PathExAttMap(CONF_DATA).get_yaml_lines() diff --git a/tests/data/genomes.yaml b/tests/data/genomes.yaml index f72ff849..d27e59e9 100644 --- a/tests/data/genomes.yaml +++ b/tests/data/genomes.yaml @@ -2,19 +2,6 @@ config_version: 0.3 genome_folder: /tmp genome_servers: ['http://refgenomes.databio.org'] genomes: - human_repeats: - assets: - fasta: - tags: - default: - seek_keys: - fasta: human_repeats.fa - fai: human_repeats.fa.fai - chrom_sizes: human_repeats.chrom.sizes - asset_parents: [] - asset_path: fasta - asset_digest: 4a749d4e74b057d0efa0c8398ebcb871 - default_tag: default mouse_chrM2x: assets: bwa_index: @@ -48,3 +35,16 @@ genomes: asset_path: fasta asset_digest: 4eb430296bc02ed7e4006624f1d5ac53 default_tag: default + human_repeats: + assets: + fasta: + tags: + default: + seek_keys: + fasta: human_repeats.fa + fai: human_repeats.fa.fai + chrom_sizes: human_repeats.chrom.sizes + asset_parents: [] + asset_path: fasta + asset_digest: 4a749d4e74b057d0efa0c8398ebcb871 + default_tag: default diff --git a/tests/data/genomes_v2.yaml b/tests/data/genomes_v2.yaml index d922200f..29deb065 100644 --- a/tests/data/genomes_v2.yaml +++ b/tests/data/genomes_v2.yaml @@ -1,6 +1,6 @@ config_version: 0.2 genome_folder: /tmp -genome_server: http://staging.refgenomes.databio.org +genome_server: https://refgenomes.databio.org/ genomes: rCRSd: assets: diff --git a/tests/test_list_remote.py b/tests/test_list_remote.py index 5c66d48f..88e424a4 100644 --- a/tests/test_list_remote.py +++ b/tests/test_list_remote.py @@ -13,7 +13,7 @@ def test_list_remote(rgc, tmpdir): new_rgc = RefGenConf(entries={CFG_FOLDER_KEY: tmpdir.strpath, CFG_SERVERS_KEY: DEFAULT_SERVER, CFG_GENOMES_KEY: rgc[CFG_GENOMES_KEY]}) - new_rgc[CFG_SERVERS_KEY] = "http://staging.refgenomes.databio.org" + new_rgc[CFG_SERVERS_KEY] = "https://refgenomes.databio.org/" print("NEW RGC KEYS: {}".format(list(new_rgc.keys()))) with mock.patch("refgenconf.refgenconf._read_remote_data", return_value=rgc.genomes): From 961a203ec4d5eee2a3bfa008e0a3b5e107cf0b08 Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Mon, 29 Jun 2020 08:48:57 -0400 Subject: [PATCH 06/13] dont leave lock behind in test --- tests/test_1pull_asset.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/test_1pull_asset.py b/tests/test_1pull_asset.py index 3fba59e6..91ab89dc 100644 --- a/tests/test_1pull_asset.py +++ b/tests/test_1pull_asset.py @@ -120,6 +120,8 @@ def test_pull_asset_updates_genome_config(cfg_file, gname, aname, tname): def test_pull_asset_works_with_nonwritable_and_writable_rgc(cfg_file, gname, aname, tname, state): rgc = RefGenConf(filepath=cfg_file, writable=state) remove_asset_and_file(rgc, gname, aname, tname) + print("\nPulling; genome: {}, asset: {}, tag: {}\n".format(gname, aname, tname)) with mock.patch("refgenconf.refgenconf.query_yes_no", return_value=True): - print("\nPulling; genome: {}, asset: {}, tag: {}\n".format(gname, aname, tname)) rgc.pull(gname, aname, tname) + if state: + rgc.make_readonly() From 2eae0e1f50cd3c3e67f3798f6b200cd78b5e5059 Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Tue, 30 Jun 2020 16:50:07 -0400 Subject: [PATCH 07/13] improve messaging --- refgenconf/refgenconf.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/refgenconf/refgenconf.py b/refgenconf/refgenconf.py index 087a7bd3..2ed698f1 100755 --- a/refgenconf/refgenconf.py +++ b/refgenconf/refgenconf.py @@ -645,7 +645,7 @@ def _raise_unpack_error(): no_asset_json.append(server_url) if num_servers == len(self[CFG_SERVERS_KEY]): _LOGGER.error("Asset '{}/{}:{}' not available on any of the following servers: {}". - format(genome, asset, determined_tag, ", ".join(no_asset_json))) + format(genome, asset, determined_tag, ", ".join(self[CFG_SERVERS_KEY]))) return _null_return() continue @@ -725,7 +725,8 @@ def msg_overwrite(): new_checksum = checksum(filepath) old_checksum = archive_data and archive_data.get(CFG_ARCHIVE_CHECKSUM_KEY) if old_checksum and new_checksum != old_checksum: - _LOGGER.error("Checksum mismatch: ({}, {})".format(new_checksum, old_checksum)) + _LOGGER.error("Downloaded archive ('{}') checksum mismatch: ({}, {})". + format(filepath, new_checksum, old_checksum)) return _null_return() else: _LOGGER.debug("Matched checksum: '{}'".format(old_checksum)) From 2e9e802e00768da7a3455d4ad2d01868e0a821a3 Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Tue, 30 Jun 2020 18:56:52 -0400 Subject: [PATCH 08/13] resolve https://github.com/refgenie/refgenie/issues/185 dont remove asset after overwrite decision, wait for the new downloed to finish --- refgenconf/refgenconf.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/refgenconf/refgenconf.py b/refgenconf/refgenconf.py index 2ed698f1..2344fb47 100755 --- a/refgenconf/refgenconf.py +++ b/refgenconf/refgenconf.py @@ -663,21 +663,15 @@ def _raise_unpack_error(): def preserve(): _LOGGER.info("Preserving existing: {}".format(tag_dir)) return _null_return() - - def msg_overwrite(): - _LOGGER.debug("Overwriting: {}".format(tag_dir)) - shutil.rmtree(tag_dir) - with self as rgc: - rgc.cfg_remove_assets(*gat) if force is False: return preserve() elif force is None: if not query_yes_no("Replace existing ({})?".format(tag_dir), "no"): return preserve() else: - msg_overwrite() + _LOGGER.debug("Overwriting: {}".format(tag_dir)) else: - msg_overwrite() + _LOGGER.debug("Overwriting: {}".format(tag_dir)) # check asset digests local-server match for each parent [self._chk_digest_if_avail(genome, x, server_url) @@ -738,6 +732,9 @@ def msg_overwrite(): # directory with the asset data inside and we transfer it # to the tag-named subdirectory untar(filepath, tmpdir) + if os.path.isdir(tag_dir): + shutil.rmtree(tag_dir) + _LOGGER.info("Removed existing directory: {}".format(tag_dir)) shutil.move(os.path.join(tmpdir, asset), tag_dir) if os.path.isfile(filepath): os.remove(filepath) From 06dd3bc0605a4a653858992cd6aff9465818fc33 Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Tue, 30 Jun 2020 19:06:27 -0400 Subject: [PATCH 09/13] changelog and version --- docs/changelog.md | 6 ++++++ refgenconf/_version.py | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/docs/changelog.md b/docs/changelog.md index 800f3f32..c4304a41 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -2,6 +2,12 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html) and [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) format. +## [0.9.0] - unreleased + +### Changed +- `pull` so it does not remove asset after overwrite decision, wait for the archive download to finish +- file locking mechanism enhancements + ## [0.8.0] - 2020-06-25 ### Added diff --git a/refgenconf/_version.py b/refgenconf/_version.py index 777f190d..3e2f46a3 100644 --- a/refgenconf/_version.py +++ b/refgenconf/_version.py @@ -1 +1 @@ -__version__ = "0.8.0" +__version__ = "0.9.0" From b9393dcc7988a30918f6217aca8e39bbf686983c Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Tue, 30 Jun 2020 19:10:10 -0400 Subject: [PATCH 10/13] set up test workflow, requirements --- .github/workflows/run-pytest.yml | 41 ++++++++++++++++++++++++++++++ requirements/requirements-all.txt | 4 +-- requirements/requirements-dev.txt | 7 ++--- requirements/requirements-test.txt | 5 ++-- 4 files changed, 48 insertions(+), 9 deletions(-) create mode 100644 .github/workflows/run-pytest.yml diff --git a/.github/workflows/run-pytest.yml b/.github/workflows/run-pytest.yml new file mode 100644 index 00000000..e72a5006 --- /dev/null +++ b/.github/workflows/run-pytest.yml @@ -0,0 +1,41 @@ +name: Run pytests + +on: + push: + branches: [master, dev] + pull_request: + branches: [master, dev] + +jobs: + pytest: + runs-on: ${{ matrix.os }} + strategy: + matrix: + python-version: [3.6, 3.7, 3.8] + os: [ubuntu-latest, macos-latest] + + steps: + - uses: actions/checkout@v2 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + + - name: Install dev dependancies + run: if [ -f requirements/requirements-dev.txt ]; then pip install -r requirements/requirements-dev.txt; fi + + - name: Install test dependancies + run: if [ -f requirements/requirements-test.txt ]; then pip install -r requirements/requirements-test.txt; fi + + - name: Install package + run: python -m pip install . + + - name: Run pytest tests + run: pytest tests --remote-data --cov=./ --cov-report=xml + + - name: Upload coverage to Codecov + uses: codecov/codecov-action@v1 + with: + file: ./coverage.xml + name: py-${{ matrix.python-version }}-${{ matrix.os }} diff --git a/requirements/requirements-all.txt b/requirements/requirements-all.txt index 8592b1b8..264c3221 100644 --- a/requirements/requirements-all.txt +++ b/requirements/requirements-all.txt @@ -2,6 +2,6 @@ attmap>=0.12.5 pyyaml requests tqdm>=4.38.0 -ubiquerg>=0.6.0 -yacman>=0.6.9-dev +#ubiquerg>=0.6.1 +#yacman>=0.6.9 future \ No newline at end of file diff --git a/requirements/requirements-dev.txt b/requirements/requirements-dev.txt index fada5340..58ac9581 100644 --- a/requirements/requirements-dev.txt +++ b/requirements/requirements-dev.txt @@ -1,5 +1,2 @@ -pytest>=3.0.7 -pytest-remotedata -pyyaml>=5 -ubiquerg>=0.3 -veracitools \ No newline at end of file +git+git://github.com/databio/yacman@dev#yacman +git+git://github.com/pepkit/ubiquerg@dev#ubiquerg \ No newline at end of file diff --git a/requirements/requirements-test.txt b/requirements/requirements-test.txt index 62fd5580..f5393f9f 100644 --- a/requirements/requirements-test.txt +++ b/requirements/requirements-test.txt @@ -1,4 +1,5 @@ -coveralls>=1.1 -pytest-cov==2.6.1 +pytest-cov +pytest pytest-remotedata +veracitools git+git://github.com/databio/refgenie_myplugin@master#egg=refgenie_myplugin From e5a50a5337ac20dda2a57270cb88b6df12f9e002 Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Tue, 30 Jun 2020 19:11:51 -0400 Subject: [PATCH 11/13] add mock to test reqs --- requirements/requirements-test.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements/requirements-test.txt b/requirements/requirements-test.txt index f5393f9f..e92ee43f 100644 --- a/requirements/requirements-test.txt +++ b/requirements/requirements-test.txt @@ -1,5 +1,6 @@ pytest-cov pytest pytest-remotedata +mock veracitools git+git://github.com/databio/refgenie_myplugin@master#egg=refgenie_myplugin From 6f79c121f32783477c7f636476a8f89e3274a942 Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Tue, 30 Jun 2020 19:17:06 -0400 Subject: [PATCH 12/13] disable travis --- .travis.yml | 22 ---------------------- README.md | 6 +++--- 2 files changed, 3 insertions(+), 25 deletions(-) delete mode 100644 .travis.yml diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 4333c36b..00000000 --- a/.travis.yml +++ /dev/null @@ -1,22 +0,0 @@ -language: python -python: - - "3.5" - - "3.6" - - "3.7" - - "3.8" -os: - - linux -before_install: - - pip install git+https://github.com/pepkit/ubiquerg.git@dev - - pip install git+https://github.com/databio/yacman.git@dev -install: - - pip install . - - pip install -r requirements/requirements-dev.txt - - pip install -r requirements/requirements-test.txt -script: pytest --remote-data --cov=refgenconf -after_success: - - coveralls -branches: - only: - - dev - - master diff --git a/README.md b/README.md index a3a96368..52f1d601 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,9 @@ # refgenconf -[![Build Status](https://travis-ci.org/databio/refgenconf.svg?branch=master)](https://travis-ci.org/databio/refgenconf) -[![Coverage Status](https://coveralls.io/repos/github/refgenie/refgenconf/badge.svg?branch=master)](https://coveralls.io/github/refgenie/refgenconf?branch=master) +![Run pytests](https://github.com/refgenie/refgenconf/workflows/Run%20pytests/badge.svg) +[![codecov](https://codecov.io/gh/refgenie/refgenconf/branch/master/graph/badge.svg)](https://codecov.io/gh/refgenie/refgenconf) [![install with bioconda](https://img.shields.io/badge/install%20with-bioconda-brightgreen.svg?style=flat)](http://bioconda.github.io/recipes/refgenconf/README.html) -Configuration object for refgenie *et al.* +Configuration object for [refgenie](https://doi.org/10.1093/gigascience/giz149) *et al.* Documentation for `refgenconf` can be found with the [primary documentation for refgenie](http://refgenie.databio.org). From 514db67357d1daf5e9498c8346720d5e62a1afb1 Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Wed, 1 Jul 2020 11:08:10 -0400 Subject: [PATCH 13/13] prep release --- docs/changelog.md | 2 +- requirements/requirements-all.txt | 3 +-- requirements/requirements-dev.txt | 2 -- 3 files changed, 2 insertions(+), 5 deletions(-) diff --git a/docs/changelog.md b/docs/changelog.md index c4304a41..5b57e609 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -2,7 +2,7 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html) and [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) format. -## [0.9.0] - unreleased +## [0.9.0] - 2020-07-01 ### Changed - `pull` so it does not remove asset after overwrite decision, wait for the archive download to finish diff --git a/requirements/requirements-all.txt b/requirements/requirements-all.txt index 264c3221..309245da 100644 --- a/requirements/requirements-all.txt +++ b/requirements/requirements-all.txt @@ -2,6 +2,5 @@ attmap>=0.12.5 pyyaml requests tqdm>=4.38.0 -#ubiquerg>=0.6.1 -#yacman>=0.6.9 +yacman>=0.6.9 future \ No newline at end of file diff --git a/requirements/requirements-dev.txt b/requirements/requirements-dev.txt index 58ac9581..e69de29b 100644 --- a/requirements/requirements-dev.txt +++ b/requirements/requirements-dev.txt @@ -1,2 +0,0 @@ -git+git://github.com/databio/yacman@dev#yacman -git+git://github.com/pepkit/ubiquerg@dev#ubiquerg \ No newline at end of file