From 0f2292dd4e9a49cc68a2f70b1e3a7b4452347648 Mon Sep 17 00:00:00 2001 From: Vince Reuter Date: Thu, 2 May 2019 16:37:16 -0400 Subject: [PATCH 1/9] recover dev updates --- .gitignore | 3 ++- divvy/utils.py | 53 -------------------------------------------------- 2 files changed, 2 insertions(+), 54 deletions(-) diff --git a/.gitignore b/.gitignore index 6d08cdd..63862ee 100644 --- a/.gitignore +++ b/.gitignore @@ -73,8 +73,9 @@ open_pipelines/ *RESERVE* # Build-related stuff +build/ dist/ -peppy.egg-info/ +divvy.egg-info/ #ipynm checkpoints *ipynb_checkpoints* diff --git a/divvy/utils.py b/divvy/utils.py index 19645e8..ccf6bc7 100644 --- a/divvy/utils.py +++ b/divvy/utils.py @@ -7,7 +7,6 @@ import random import re import string -import subprocess as sp import sys if sys.version_info < (3, 0): from urlparse import urlparse @@ -21,58 +20,6 @@ _LOGGER = logging.getLogger(__name__) -def add_project_sample_constants(sample, project): - """ - Update a Sample with constants declared by a Project. - - :param Sample sample: sample instance for which to update constants - based on Project - :param Project project: Project with which to update Sample; it - may or may not declare constants. If not, no update occurs. - :return Sample: Updates Sample instance, according to any and all - constants declared by the Project. - """ - sample.update(project.constants) - return sample - - -def check_bam(bam, o): - """ - Check reads in BAM file for read type and lengths. - - :param str bam: BAM file path. - :param int o: Number of reads to look at for estimation. - """ - try: - p = sp.Popen(['samtools', 'view', bam], stdout=sp.PIPE) - # Count paired alignments - paired = 0 - read_lengths = defaultdict(int) - while o > 0: # Count down number of lines - line = p.stdout.readline().decode().split("\t") - flag = int(line[1]) - read_lengths[len(line[9])] += 1 - if 1 & flag: # check decimal flag contains 1 (paired) - paired += 1 - o -= 1 - p.kill() - except OSError: - reason = "Note (samtools not in path): For NGS inputs, " \ - "pep needs samtools to auto-populate " \ - "'read_length' and 'read_type' attributes; " \ - "these attributes were not populated." - raise OSError(reason) - - _LOGGER.debug("Read lengths: {}".format(read_lengths)) - _LOGGER.debug("paired: {}".format(paired)) - return read_lengths, paired - - -def check_fastq(fastq, o): - raise NotImplementedError("Detection of read type/length for " - "fastq input is not yet implemented.") - - def check_sample_sheet_row_count(sheet, filepath): """ Quick-and-dirt proxy for Sample count validation. From fafab50d338be44ebc8649c8b3569b7533ec8b18 Mon Sep 17 00:00:00 2001 From: Vince Reuter Date: Wed, 8 May 2019 10:10:56 -0400 Subject: [PATCH 2/9] remove more utils --- divvy/utils.py | 32 -------------------------------- 1 file changed, 32 deletions(-) diff --git a/divvy/utils.py b/divvy/utils.py index ccf6bc7..c673353 100644 --- a/divvy/utils.py +++ b/divvy/utils.py @@ -51,38 +51,6 @@ def copy(self): return obj -def expandpath(path): - """ - Expand a filesystem path that may or may not contain user/env vars. - - :param str path: path to expand - :return str: expanded version of input path - """ - return os.path.expandvars(os.path.expanduser(path)).replace("//", "/") - - -def get_file_size(filename): - """ - Get size of all files in gigabytes (Gb). - - :param str | collections.Iterable[str] filename: A space-separated - string or list of space-separated strings of absolute file paths. - :return float: size of file(s), in gigabytes. - """ - if filename is None: - return float(0) - if type(filename) is list: - return float(sum([get_file_size(x) for x in filename])) - try: - total_bytes = sum([float(os.stat(f).st_size) - for f in filename.split(" ") if f is not '']) - except OSError: - # File not found - return 0.0 - else: - return float(total_bytes) / (1024 ** 3) - - def import_from_source(module_filepath): """ Import a module from a particular filesystem location. From 41c396f1a45600d11297d5aab328563a08992ef7 Mon Sep 17 00:00:00 2001 From: Vince Reuter Date: Wed, 8 May 2019 11:22:55 -0400 Subject: [PATCH 3/9] remove parse_ftype, now in ngstk --- divvy/utils.py | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/divvy/utils.py b/divvy/utils.py index c673353..6843371 100644 --- a/divvy/utils.py +++ b/divvy/utils.py @@ -121,26 +121,6 @@ def parse_config_file(conf_file): return env_settings -def parse_ftype(input_file): - """ - Checks determine filetype from extension. - - :param str input_file: String to check. - :return str: filetype (extension without dot prefix) - :raises TypeError: if file does not appear of a supported type - """ - if input_file.endswith(".bam"): - return "bam" - elif input_file.endswith(".fastq") or \ - input_file.endswith(".fq") or \ - input_file.endswith(".fq.gz") or \ - input_file.endswith(".fastq.gz"): - return "fastq" - else: - raise TypeError("Type of input file ends in neither '.bam' " - "nor '.fastq' [file: '" + input_file + "']") - - def parse_text_data(lines_or_path, delimiter=os.linesep): """ Interpret input argument as lines of data. This is intended to support From eec8293b207911479ee08c3f819e7a3bc59c5270 Mon Sep 17 00:00:00 2001 From: nsheff Date: Thu, 9 May 2019 11:13:59 -0400 Subject: [PATCH 4/9] ignore built docs --- docs/autodoc_build/.gitignore | 2 + docs/autodoc_build/divvy.md | 207 ---------------------------------- 2 files changed, 2 insertions(+), 207 deletions(-) create mode 100644 docs/autodoc_build/.gitignore delete mode 100644 docs/autodoc_build/divvy.md diff --git a/docs/autodoc_build/.gitignore b/docs/autodoc_build/.gitignore new file mode 100644 index 0000000..d6b7ef3 --- /dev/null +++ b/docs/autodoc_build/.gitignore @@ -0,0 +1,2 @@ +* +!.gitignore diff --git a/docs/autodoc_build/divvy.md b/docs/autodoc_build/divvy.md deleted file mode 100644 index a821974..0000000 --- a/docs/autodoc_build/divvy.md +++ /dev/null @@ -1,207 +0,0 @@ -# Package divvy Documentation - -## Class ComputingConfiguration -Represents computing configuration objects. - -The ComputingConfiguration class provides a computing configuration object -that is an *in memory* representation of a `divvy` computing configuration -file. This object has various functions to allow a user to activate, modify, -and retrieve computing configuration files, and use these values to populate -job submission script templates. - -**Parameters:** - -- `config_file` -- `str`: YAML file specifying computing package data (The`DIVCFG` file). -- `no_env_error` -- `type`: type of exception to raise if divvysettings can't be established, optional; if null (the default), a warning message will be logged, and no exception will be raised. -- `no_compute_exception` -- `type`: type of exception to raise if computesettings can't be established, optional; if null (the default), a warning message will be logged, and no exception will be raised. - - -### activate\_package -Activates a compute package. - -This copies the computing attributes from the configuration file into -the `compute` attribute, where the class stores current compute -settings. -```python -def activate_package(self, package_name) -``` - -**Parameters:** - -- `package_name` -- `str`: name for non-resource compute bundle,the name of a subsection in an environment configuration file - - -**Returns:** - -`bool`: success flag for attempt to establish compute settings - - - - -### clean\_start -Clear current active settings and then activate the given package. -```python -def clean_start(self, package_name) -``` - -**Parameters:** - -- `package_name` -- `str`: name of the resource package to activate - - -**Returns:** - -`bool`: success flag - - - - -### compute\_env\_var -Environment variable through which to access compute settings. -```python -def compute_env_var(self) -``` - -**Returns:** - -`list[str]`: names of candidate environment variables, for whichvalue may be path to compute settings file; first found is used. - - - - -### default\_config\_file -Path to default compute environment settings file. -```python -def default_config_file(self) -``` - -**Returns:** - -`str`: Path to default compute settings file - - - - -### get\_active\_package -Returns settings for the currently active compute package -```python -def get_active_package(self) -``` - -**Returns:** - -`PathExAttMap`: data defining the active compute package - - - - -### list\_compute\_packages -Returns a list of available compute packages. -```python -def list_compute_packages(self) -``` - -**Returns:** - -`set[str]`: names of available compute packages - - - - -### reset\_active\_settings -Clear out current compute settings. -```python -def reset_active_settings(self) -``` - -**Returns:** - -`bool`: success flag - - - - -### template -Get the currently active submission template. -```python -def template(self) -``` - -**Returns:** - -`str`: submission script content template for current state - - - - -### templates\_folder -Path to folder with default submission templates. -```python -def templates_folder(self) -``` - -**Returns:** - -`str`: path to folder with default submission templates - - - - -### update\_packages -Parse data from divvy configuration file. - -Given a divvy configuration file, this function will update (not -overwrite) existing compute packages with existing values. It does not -affect any currently active settings. -```python -def update_packages(self, config_file) -``` - -**Parameters:** - -- `config_file` -- `str`: path to file withnew divvy configuration data - - - - -### write\_script -Given currently active settings, populate the active template to write a submission script. -```python -def write_script(self, output_path, extra_vars=None) -``` - -**Parameters:** - -- `output_path` -- `str`: Path to file to write as submission script -- `extra_vars` -- `Iterable[Mapping]`: A list of Dict objects with key-value pairswith which to populate template fields. These will override any values in the currently active compute package. - - -**Returns:** - -`str`: Path to the submission script file - - - - -### write\_submit\_script -Write a submission script by populating a template with data. -```python -def write_submit_script(fp, content, data) -``` - -**Parameters:** - -- `fp` -- `str`: Path to the file to which to create/write submissions script. -- `content` -- `str`: Template for submission script, defining keys thatwill be filled by given data -- `data` -- `Mapping`: a "pool" from which values are available to replacekeys in the template - - -**Returns:** - -`str`: Path to the submission script - - - - - -**Version Information**: `divvy` v0.3dev, generated by `lucidoc` v0.3 \ No newline at end of file From f772800b448c50b81aa04baf1f8f2d62523d6cda Mon Sep 17 00:00:00 2001 From: nsheff Date: Thu, 9 May 2019 11:14:34 -0400 Subject: [PATCH 5/9] update url --- mkdocs.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mkdocs.yml b/mkdocs.yml index 8e03e3f..15b661f 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -1,6 +1,6 @@ site_name: Divvy site_logo: img/divvy_logo_dark.svg -site_url: http://code.databio.org/divvy/ +site_url: http://divvy.databio.org/ repo_url: http://github.com/databio/divvy pypi_name: divvy From 8ec32f0b5028cc505e1f7d262b3aa0f7f8f2ba0a Mon Sep 17 00:00:00 2001 From: nsheff Date: Thu, 9 May 2019 11:47:32 -0400 Subject: [PATCH 6/9] update docs --- docs/configuration.md | 26 ++++++++++++-------------- docs/containers.md | 8 ++++---- 2 files changed, 16 insertions(+), 18 deletions(-) diff --git a/docs/configuration.md b/docs/configuration.md index 201f3e6..31aa0b0 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -1,17 +1,8 @@ +# The divvy configuration file -# Divvy configuration files +At the heart of `divvy` is a the *divvy configuration file*, or `DIVCFG` for short. This is a `yaml` file that specifies a user's available *compute packages*. Each compute package represents a computing resource; for example, by default we have a package (called `local`) that populates templates to simple run jobs in the local console, and another package (called `slurm`) with a generic template to submit jobs to a SLURM cluster resource manager. Users can customize compute packages as much as needed. Here is an example `divvy` configuration file: -## The DIVCFG environment variable - -At the heart of `divvy` is a `yaml` configuration file that specifies your available `compute_package`s. Each package represents a computing resource; for example, by default we have 1 package (called `local`) that populates templates to simple run jobs in the local console, and another package (called `slurm`) with a generic template to submit jobs to a SLURM cluster resource manager. By just choosing `local` or `slurm` you can change where your job is run. You can customize your compute packages as much as you need. - -The file specifying the compute packages is called the `DIVCFG` file. If using `divvy` from within `python`, you can pass a configuration file when you construct a new `ComputingConfiguration` object. If you don't specify one, `divvy` will first look for a file in the `$DIVCFG` environment variable. If it cannot find one there, then it will load a default configuration file with a few basic compute packages. - -## The DIVCFG file - -The DIVCFG file is a `yaml` file listing different *compute packages*. Here is an example `divvy` configuration file: - -```{yaml} +```{console} compute_packages: default: submission_template: templates/local_template.sub @@ -44,7 +35,7 @@ Each compute package specifies a path to a template file (`submission_template`) ## Resources -You may notice that the compute config file does not specify resources to request (like memory, CPUs, or time). Yet, these are required in order to submit a job to a cluster. **Resources are not handled by the divcfg file** because they not relative to a particular computing environment; instead they vary by pipeline and sample. As such, these items should be defined at other stages. +You may notice that the compute config file does not specify resources to request (like memory, CPUs, or time). Yet, these are required in order to submit a job to a cluster. **Resources are not handled by the divcfg file** because they not relative to a particular computing environment; instead they vary by pipeline and sample. As such, these items should be provided elsewhere. ## Template files @@ -69,4 +60,11 @@ srun {CODE} Template files use variables (*e.g.* `{VARIABLE}`), which will be populated independently for each job. -`Divvy` comes with a few commonly used templates (in the [submit_templates](/https://github.com/pepkit/divvy/tree/master/divvy/submit_templates) folder). Many users will not need to tweak the template files, but if you need to, you can also create your own templates, giving `divvy` ultimate flexibility to work with any compute infrastructure in any environment. To create a custom template, just follow the examples and put together what you need. Then, point to your custom template in the `submission_template` attribute of a compute package in your `DIVCFG` config file. +`Divvy` comes with a few commonly used templates (in the [submit_templates](https://github.com/pepkit/divvy/tree/master/divvy/submit_templates) folder). Many users will not need to tweak the template files, but if you need to, you can also create your own templates, giving `divvy` ultimate flexibility to work with any compute infrastructure in any environment. To create a custom template, just follow the examples and put together what you need. Then, point to your custom template in the `submission_template` attribute of a compute package in your `DIVCFG` config file. + + + +## Configuration file priority lookup + + When `divvy` starts, it checks a few places for the `DIVCFG` file. First, the user may may specify a `DIVCFG` file when invoking `divvy` either from the command line or from within python. If the file is not provided, `divvy` will next look file in the `$DIVCFG` environment variable. If it cannot find one there, then it will load a default configuration file with a few basic compute packages. We recommend for most users, setting the `DIVCFG` environment variable is the most convenient use case. + diff --git a/docs/containers.md b/docs/containers.md index 2bfc0a5..2240ee9 100644 --- a/docs/containers.md +++ b/docs/containers.md @@ -1,11 +1,11 @@ # Configuring containers with divvy -The `DIVCFG` framework is a natural way to run commands in a container, for example, using `docker` or `singularity`. All we need to do is 1) design a template that will run the job in the container, instead of natively; and 2) create a new compute package that will use that template. +The divvy tempplate framework is a natural way to run commands in a container, for example, using `docker` or `singularity`. All we need to do is 1) design a template that will run the job in the container, instead of natively; and 2) create a new compute package that will use that template. ## A template for container runs -[The divcfg repository](http://github.com/pepkit/pepenv) includes [templates](https://github.com/pepkit/pepenv/tree/master/templates) for the following scenarios: +[The divcfg repository](http://github.com/pepkit/divcfg) includes [templates](https://github.com/pepkit/pepenv/tree/master/templates) for the following scenarios: - singularity on SLURM - singularity on localhost @@ -20,11 +20,11 @@ srun singularity exec instance://{JOBNAME}_image {CODE} singularity instance.stop {JOBNAME}_image ``` -This template uses a few of the automatic variables defined earlier (`JOBNAME` and `CODE`) but adds two more: `{SINGULARITY_ARGS}` and `{SINGULARITY_IMAGE}`. For the *image*, this should point to a singularity image that could vary by pipeline, so it makes most sense to define this variable in the `pipeline_interface.yaml` file. So, any pipeline that provides a container should probably include a `singularity_image` attribute providing a place to point to the appropriate container image. +This particular template uses some variables provided by [looper](http://looper.databio.org) (`JOBNAME` and `CODE`), as well as a few singularity-specific arguments: `{SINGULARITY_ARGS}` and `{SINGULARITY_IMAGE}`. These arguments could be defined at different places. Fore xample, the *image* variable should point to a singularity image that could vary by pipeline, so it makes most sense to define this variable individually for each pipeline. So, any pipeline that provides a container should probably include a `singularity_image` attribute providing a place to point to the appropriate container image. Of course, you will also need to make sure that you have access to `singularity` command from the compute nodes; on some clusters, you may need to add a `module load singularity` (or some variation) to enable it. -The `{SINGULARITY_ARGS}` variable comes just right after the `instance.start` command, and can be used to pass any command-line arguments to singularity. We use these, for example, to bind host disk paths into the container. **It is critical that you explicitly bind any file systems with data necessary for the pipeline so the running container can see those files**. The [singularity documentation](https://singularity.lbl.gov/docs-mount#specifying-bind-paths) explains this, and you can find other arguments detailed there. Because this setting describes something about the computing environment (rather than an individual pipeline or sample), it makes most sense to put it in the `DIVCFG` environment configuration file. The next section includes examples of how to use `singularity_args`. +The `{SINGULARITY_ARGS}` variable comes just right after the `instance.start` command, and can be used to pass any command-line arguments to singularity. We use these, for example, to bind host disk paths into the container. **It is critical that you explicitly bind any file systems with data necessary for the pipeline so the running container can see those files**. The [singularity documentation](https://singularity.lbl.gov/docs-mount#specifying-bind-paths) explains this, and you can find other arguments detailed there. Because this setting describes something about the computing environment (rather than an individual pipeline or sample), it makes most sense to put it in the `DIVCFG` file for a particular compute package. The next section includes examples of how to use `singularity_args`. ## Adding compute packages for container templates From df670b8b85349f4d0c9e238ced0225575758b928 Mon Sep 17 00:00:00 2001 From: nsheff Date: Thu, 9 May 2019 12:04:12 -0400 Subject: [PATCH 7/9] standardize version numbers --- docs/changelog.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/changelog.md b/docs/changelog.md index d923c0b..09052d2 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -6,7 +6,7 @@ ### Changed - Minor updates to improve interface with looper and peppy -## divvy [0.3] - 2019-04-19 +## divvy [0.3.0] - 2019-04-19 ### Changed - Safer use of `yaml` - Reduced verbosity for clearer usage @@ -16,13 +16,13 @@ ### Changed - For environment variable population, use updated version of `attmap` -## divvy [0.2] - 2019-03-13 +## divvy [0.2.0] - 2019-03-13 ### Added - Implemented a `divvy` command-line interface ### Changed - reduced verbosity -## divvy [0.1] - 2019-03-04 +## divvy [0.1.0] - 2019-03-04 ### Changed - `divvy` looks for computing configuration file path in both `$DIVCFG` and `$PEPENV` environment variables and the former is given priority - `ComputingConfiguration` class extends `AttMap` class, not `AttributeDict` From bb80f5080ea9febead79d0b3b6925f84e805e504 Mon Sep 17 00:00:00 2001 From: nsheff Date: Thu, 9 May 2019 12:09:02 -0400 Subject: [PATCH 8/9] fix list subparser --- divvy/compute.py | 1 + 1 file changed, 1 insertion(+) diff --git a/divvy/compute.py b/divvy/compute.py index aac2be1..b321c1d 100644 --- a/divvy/compute.py +++ b/divvy/compute.py @@ -312,6 +312,7 @@ def add_subparser(cmd): cmd, description=msg_by_cmd[cmd], help=msg_by_cmd[cmd]) write_subparser = add_subparser("write") + list_subparser = add_subparser("list") write_subparser.add_argument( "-S", "--settings", From f1a2e78c07b886ce0715676df948bd50d8b81df7 Mon Sep 17 00:00:00 2001 From: nsheff Date: Thu, 9 May 2019 12:11:15 -0400 Subject: [PATCH 9/9] changelog, version --- divvy/_version.py | 2 +- docs/changelog.md | 14 +++++++++----- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/divvy/_version.py b/divvy/_version.py index 8b0856f..e9ba10a 100644 --- a/divvy/_version.py +++ b/divvy/_version.py @@ -1,2 +1,2 @@ -__version__ = "0.4dev" +__version__ = "0.3.2" diff --git a/docs/changelog.md b/docs/changelog.md index 09052d2..6b9d6fa 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -2,27 +2,31 @@ ## Unreleased -## divvy [0.3.1] - 2019-04-24 +## [0.3.2] -- 2019-05-09 +### Changed +- Fixed `divvy list` command + +## [0.3.1] -- 2019-04-24 ### Changed - Minor updates to improve interface with looper and peppy -## divvy [0.3.0] - 2019-04-19 +## [0.3.0] -- 2019-04-19 ### Changed - Safer use of `yaml` - Reduced verbosity for clearer usage - The CLI now uses `divvy write` and `divvy list` subcommands -## divvy [0.2.1] - 2019-03-19 +## [0.2.1] -- 2019-03-19 ### Changed - For environment variable population, use updated version of `attmap` -## divvy [0.2.0] - 2019-03-13 +## [0.2.0] -- 2019-03-13 ### Added - Implemented a `divvy` command-line interface ### Changed - reduced verbosity -## divvy [0.1.0] - 2019-03-04 +## [0.1.0] -- 2019-03-04 ### Changed - `divvy` looks for computing configuration file path in both `$DIVCFG` and `$PEPENV` environment variables and the former is given priority - `ComputingConfiguration` class extends `AttMap` class, not `AttributeDict`