Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Release 0.3 #28

Merged
merged 20 commits into from
Apr 20, 2019
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion divvy/_version.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
__version__ = "0.2.1"
__version__ = "0.3"

94 changes: 51 additions & 43 deletions divvy/compute.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,15 @@
import argparse
import logging
import os
from sys import stdout
import sys
import yaml
from yaml import SafeLoader

from attmap import PathExAttMap
from .const import \
COMPUTE_SETTINGS_VARNAME, \
DEFAULT_COMPUTE_RESOURCES_NAME
from .utils import write_submit_script, get_first_env_var
from . import __version__
from .const import COMPUTE_SETTINGS_VARNAME, DEFAULT_COMPUTE_RESOURCES_NAME, \
NEW_COMPUTE_KEY
from .utils import parse_config_file, write_submit_script, get_first_env_var
from . import __version__

_LOGGER = logging.getLogger(__name__)

Expand Down Expand Up @@ -215,36 +215,26 @@ def update_packages(self, config_file):
overwrite) existing compute packages with existing values. It does not
affect any currently active settings.

:param str config_file: path to file with
new divvy configuration data
:param str config_file: path to file with new divvy configuration data
"""
with open(config_file, 'r') as f:
_LOGGER.info("Loading divvy config file: %s", config_file)
env_settings = yaml.load(f)
_LOGGER.debug("Parsed environment settings: %s",
str(env_settings))

# Any compute.submission_template variables should be made
# absolute, relative to current divvy configuration file.
if "compute" in env_settings:
_LOGGER.warning("In your divvy config file, please use 'compute_packages' instead of 'compute'")
env_settings["compute_packages"] = env_settings["compute"]

loaded_packages = env_settings["compute_packages"]
for key, value in loaded_packages.items():
if type(loaded_packages[key]) is dict:
for key2, value2 in loaded_packages[key].items():
if key2 == "submission_template":
if not os.path.isabs(loaded_packages[key][key2]):
loaded_packages[key][key2] = os.path.join(
os.path.dirname(config_file),
loaded_packages[key][key2])

if self.compute_packages is None:
self.compute_packages = PathExAttMap(loaded_packages)
else:
self.compute_packages.add_entries(loaded_packages)
_LOGGER.info("Available packages: {}".format(', '.join(self.list_compute_packages())))
env_settings = parse_config_file(config_file)
loaded_packages = env_settings[NEW_COMPUTE_KEY]
for key, value in loaded_packages.items():
if type(loaded_packages[key]) is dict:
for key2, value2 in loaded_packages[key].items():
if key2 == "submission_template":
if not os.path.isabs(loaded_packages[key][key2]):
loaded_packages[key][key2] = os.path.join(
os.path.dirname(config_file),
loaded_packages[key][key2])

if self.compute_packages is None:
self.compute_packages = PathExAttMap(loaded_packages)
else:
self.compute_packages.add_entries(loaded_packages)

_LOGGER.debug("Available divvy packages: {}".
format(', '.join(self.list_compute_packages())))
self.config_file = config_file

def write_script(self, output_path, extra_vars=None):
Expand All @@ -271,7 +261,7 @@ def write_script(self, output_path, extra_vars=None):
def _handle_missing_env_attrs(self, config_file, when_missing):
""" Default environment settings aren't required; warn, though. """
missing_env_attrs = \
[attr for attr in ["compute_packages", "config_file"]
[attr for attr in [NEW_COMPUTE_KEY, "config_file"]
if not hasattr(self, attr) or getattr(self, attr) is None]
if not missing_env_attrs:
return
Expand All @@ -289,10 +279,11 @@ def format_help(self):
return "version: {}\n".format(__version__) + \
super(_VersionInHelpParser, self).format_help()


def main():
""" Primary workflow """

banner = "%(prog)s - write compute jobs that can be submitted to any computing resource"
banner = "%(prog)s - write compute job scripts that can be submitted to any computing resource"
additional_description = "\nhttps://github.com/pepkit/divvy"

parser = _VersionInHelpParser(
Expand All @@ -308,33 +299,50 @@ def main():
"-C", "--config",
help="Divvy configuration file.")

parser.add_argument(
subparsers = parser.add_subparsers(dest="command")

# Individual subcommands
msg_by_cmd = {
"list": "List available compute packages",
"write": "Write a submit script"
}

def add_subparser(cmd):
return subparsers.add_parser(cmd, description=msg_by_cmd[cmd], help=msg_by_cmd[cmd])

write_subparser = add_subparser("write")

write_subparser.add_argument(
"-S", "--settings",
help="YAML file with job settings to populate the template.")

parser.add_argument(
write_subparser.add_argument(
"-P", "--package", default="default",
help="Compute package")

parser.add_argument(
write_subparser.add_argument(
"-O", "--outfile", required=True,
help="Output filepath")


args, remaining_args = parser.parse_known_args()

keys = [str.replace(x, "--", "") for x in remaining_args[::2]]
custom_vars = dict(zip(keys, remaining_args[1::2]))
dcc = ComputingConfiguration(args.config)

if args.command == "list":
_LOGGER.info("Available compute packages: {}".format(', '.join(dcc.list_compute_packages())))
sys.exit(1)

dcc.activate_package(args.package)
if args.settings:
with open(args.settings, 'r') as f:
_LOGGER.info("Loading yaml settings file: %s", args.settings)
yaml_vars = yaml.load(f)
yaml_vars = yaml.load(f, SafeLoader)
dcc.write_script(args.outfile, [custom_vars, yaml_vars])
else:
dcc.write_script(args.outfile, custom_vars)


if __name__ == '__main__':
try:
sys.exit(main())
Expand Down
2 changes: 2 additions & 0 deletions divvy/const.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
# Compute-related
COMPUTE_SETTINGS_VARNAME = ["DIVCFG", "PEPENV"]
DEFAULT_COMPUTE_RESOURCES_NAME = "default"
OLD_COMPUTE_KEY = "compute"
NEW_COMPUTE_KEY = "compute_packages"
COMPUTE_CONSTANTS = ["COMPUTE_SETTINGS_VARNAME",
"DEFAULT_COMPUTE_RESOURCES_NAME"]

Expand Down
49 changes: 23 additions & 26 deletions divvy/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,14 @@
else:
from urllib.parse import urlparse
import warnings


import yaml

from .const import GENERIC_PROTOCOL_KEY, SAMPLE_INDEPENDENT_PROJECT_SECTIONS
from .const import GENERIC_PROTOCOL_KEY, NEW_COMPUTE_KEY, OLD_COMPUTE_KEY, \
SAMPLE_INDEPENDENT_PROJECT_SECTIONS


_LOGGER = logging.getLogger(__name__)



def add_project_sample_constants(sample, project):
"""
Update a Sample with constants declared by a Project.
Expand All @@ -40,7 +37,6 @@ def add_project_sample_constants(sample, project):
return sample



def alpha_cased(text, lower=False):
"""
Filter text to just letters and homogenize case.
Expand All @@ -54,7 +50,6 @@ def alpha_cased(text, lower=False):
return text.lower() if lower else text.upper()



def check_bam(bam, o):
"""
Check reads in BAM file for read type and lengths.
Expand Down Expand Up @@ -87,13 +82,11 @@ def check_bam(bam, o):
return read_lengths, paired



def check_fastq(fastq, o):
raise NotImplementedError("Detection of read type/length for "
"fastq input is not yet implemented.")



def check_sample_sheet_row_count(sheet, filepath):
"""
Quick-and-dirt proxy for Sample count validation.
Expand All @@ -113,7 +106,6 @@ def check_sample_sheet_row_count(sheet, filepath):
return len(sheet) == len(lines) - deduction



def copy(obj):
def copy(self):
"""
Expand All @@ -126,7 +118,6 @@ def copy(self):
return obj



def expandpath(path):
"""
Expand a filesystem path that may or may not contain user/env vars.
Expand All @@ -137,7 +128,6 @@ def expandpath(path):
return os.path.expandvars(os.path.expanduser(path)).replace("//", "/")



def get_file_size(filename):
"""
Get size of all files in gigabytes (Gb).
Expand All @@ -160,7 +150,6 @@ def get_file_size(filename):
return float(total_bytes) / (1024 ** 3)



def fetch_samples(proj, inclusion=None, exclusion=None):
"""
Collect samples of particular protocol(s).
Expand Down Expand Up @@ -217,7 +206,6 @@ def keep(s):
return list(filter(keep, proj.samples))



def grab_project_data(prj):
"""
From the given Project, grab Sample-independent data.
Expand Down Expand Up @@ -246,7 +234,6 @@ def grab_project_data(prj):
return data



def import_from_source(module_filepath):
"""
Import a module from a particular filesystem location.
Expand Down Expand Up @@ -285,7 +272,6 @@ def import_from_source(module_filepath):
return mod



def is_url(maybe_url):
"""
Determine whether a path is a URL.
Expand All @@ -296,6 +282,27 @@ def is_url(maybe_url):
return urlparse(maybe_url).scheme != ""


def parse_config_file(conf_file):
"""
Parse a divvy configuration file.

:param str conf_file: path to divvy configuration file
:return Mapping: compute settings as declared in config file
"""
with open(conf_file, 'r') as f:
_LOGGER.info("Loading divvy config file: %s", conf_file)
env_settings = yaml.load(f, yaml.SafeLoader)
_LOGGER.debug("Parsed environment settings: %s",
str(env_settings))
# Any compute.submission_template variables should be made
# absolute, relative to current divvy configuration file.
if OLD_COMPUTE_KEY in env_settings:
warnings.warn("Divvy compute configuration '{}' section changed "
"to '{}'".format(OLD_COMPUTE_KEY, NEW_COMPUTE_KEY),
DeprecationWarning)
env_settings[NEW_COMPUTE_KEY] = env_settings[OLD_COMPUTE_KEY]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It bothers me that this still allows subsequent mutations to have the keys get out of sync, but this is used in small enough fashion that I don't think it's a big deal. This sort of thing motivates pepkit/attmap#20.
Same as #28 (comment)

return env_settings


def parse_ftype(input_file):
"""
Expand All @@ -317,7 +324,6 @@ def parse_ftype(input_file):
"nor '.fastq' [file: '" + input_file + "']")



def parse_text_data(lines_or_path, delimiter=os.linesep):
"""
Interpret input argument as lines of data. This is intended to support
Expand Down Expand Up @@ -346,7 +352,6 @@ def parse_text_data(lines_or_path, delimiter=os.linesep):
format(lines_or_path, type(lines_or_path)))



def sample_folder(prj, sample):
"""
Get the path to this Project's root folder for the given Sample.
Expand All @@ -360,7 +365,6 @@ def sample_folder(prj, sample):
sample["sample_name"])



def write_submit_script(fp, content, data):
"""
Write a submission script by populating a template with data.
Expand Down Expand Up @@ -390,7 +394,6 @@ def write_submit_script(fp, content, data):
return fp



@contextlib.contextmanager
def standard_stream_redirector(stream):
"""
Expand All @@ -411,7 +414,6 @@ def standard_stream_redirector(stream):
sys.stdout, sys.stderr = genuine_stdout, genuine_stderr



def warn_derived_cols():
_warn_cols_to_attrs("derived")

Expand All @@ -425,7 +427,6 @@ def _warn_cols_to_attrs(prefix):
"as {pfx}_attributes".format(pfx=prefix), DeprecationWarning)



class CommandChecker(object):
"""
Validate PATH availability of executables referenced by a config file.
Expand All @@ -440,7 +441,6 @@ class CommandChecker(object):
the check names parameter, but for specific sections to skip.
"""


def __init__(self, path_conf_file,
sections_to_check=None, sections_to_skip=None):

Expand Down Expand Up @@ -506,7 +506,6 @@ def __init__(self, path_conf_file,
self._logger.debug("Command '%s': %s", command,
"SUCCESS" if success else "FAILURE")


def _store_status(self, section, command, name):
"""
Based on new command execution attempt, update instance's
Expand All @@ -522,7 +521,6 @@ def _store_status(self, section, command, name):
self.failures.add(command)
return succeeded


@property
def failed(self):
"""
Expand All @@ -540,7 +538,6 @@ def failed(self):
return 0 == len(self.failures)



def is_command_callable(command, name=""):
"""
Check if command can be called.
Expand Down
Loading