From 61e906bd8ce0c106a648272ab201f685786c6656 Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Mon, 25 Nov 2024 10:43:24 -0500 Subject: [PATCH 1/9] fix: get hpc modules out of dict --- src/ccbr_tools/pipeline/nextflow.py | 9 +++++---- tests/test_hpc.py | 3 +++ tests/test_nextflow.py | 13 ++++++++++--- 3 files changed, 18 insertions(+), 7 deletions(-) diff --git a/src/ccbr_tools/pipeline/nextflow.py b/src/ccbr_tools/pipeline/nextflow.py index 7a8343d..2281e1c 100755 --- a/src/ccbr_tools/pipeline/nextflow.py +++ b/src/ccbr_tools/pipeline/nextflow.py @@ -34,6 +34,7 @@ def run( nextflow_command = ["nextflow", "run", nextfile_path] hpc = get_hpc() + print("HPC", hpc.name) if mode == "slurm" and not hpc: raise ValueError("mode is 'slurm' but no HPC environment was detected") # add any additional Nextflow commands @@ -60,10 +61,8 @@ def run( ): # only add to the profiles if there are any. there are none when champagne is run on GitHub Actions. args_dict["-profile"] = ",".join(sorted(profiles)) nextflow_command += list(f"{k} {v}" for k, v in args_dict.items()) - - # Print nextflow command + # turn command into a string nextflow_command = " ".join(str(nf) for nf in nextflow_command) - msg_box("Nextflow command", errmsg=nextflow_command) if mode == "slurm": slurm_filename = "submit_slurm.sh" @@ -83,11 +82,13 @@ def run( msg_box("Slurm batch job", errmsg=run_command) elif mode == "local": if hpc: - nextflow_command = f'bash -c "module load {hpc.modules} && {hpc.env_vars} && {nextflow_command}"' + hpc_modules = hpc.modules["nxf"] + nextflow_command = f'bash -c "module load {hpc_modules} && {hpc.env_vars} && {nextflow_command}"' run_command = nextflow_command else: raise ValueError(f"mode {mode} not recognized") # Run Nextflow + msg_box("Nextflow command", errmsg=nextflow_command) if not debug: shell_run(run_command, capture_output=False) diff --git a/tests/test_hpc.py b/tests/test_hpc.py index 1eae843..ae50837 100644 --- a/tests/test_hpc.py +++ b/tests/test_hpc.py @@ -27,3 +27,6 @@ def test_hpc_frce(): def test_hpc_none(): hpc = get_hpc(debug=" ") assert not any([hpc, hpc.name, *hpc.modules.values(), hpc.singularity_sif_dir]) + + +print(get_hpc()) diff --git a/tests/test_nextflow.py b/tests/test_nextflow.py index fa7728d..589b748 100644 --- a/tests/test_nextflow.py +++ b/tests/test_nextflow.py @@ -1,8 +1,15 @@ from ccbr_tools.pipeline.nextflow import run +from ccbr_tools.pipeline.hpc import get_hpc from ccbr_tools.shell import exec_in_context def test_nextflow_basic(): - assert "nextflow run CCBR/CHAMPAGNE" in exec_in_context( - run, "CCBR/CHAMPAGNE", debug=True - ) + output = exec_in_context(run, "CCBR/CHAMPAGNE", debug=True) + assert "nextflow run CCBR/CHAMPAGNE" in output + + +def test_nextflow_hpc(): + hpc = get_hpc() + if hpc.name == "biowulf" or hpc.name == "frce": + output = exec_in_context(run, "CCBR/CHAMPAGNE", debug=True) + assert "module load" in output From 9195e49d1e584da332aeb916ffa90a8b8ad854df Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Mon, 25 Nov 2024 11:14:49 -0500 Subject: [PATCH 2/9] test: allow which_vpn.sh to fail --- scripts/which_vpn.sh | 3 ++- tests/test_scripts.py | 5 +++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/scripts/which_vpn.sh b/scripts/which_vpn.sh index d12fe7d..0aa0084 100755 --- a/scripts/which_vpn.sh +++ b/scripts/which_vpn.sh @@ -1,5 +1,6 @@ -#!/bin/bash +#!/usr/bin/env bash # trying to find out which VPN you are connected to?? +set -euo pipefail if [[ "$HOSTNAME" == "biowulf.nih.gov" ]] then diff --git a/tests/test_scripts.py b/tests/test_scripts.py index 800a832..b63f7d4 100644 --- a/tests/test_scripts.py +++ b/tests/test_scripts.py @@ -1,5 +1,6 @@ +import pytest from ccbr_tools.shell import shell_run -from ccbr_tools.pipeline.util import get_hpcname +from ccbr_tools.pipeline.hpc import get_hpcname def test_scripts_help(): @@ -9,7 +10,7 @@ def test_scripts_help(): def test_which_vpn(): - which_vpn = shell_run("which_vpn.sh") + which_vpn = shell_run("which_vpn.sh", check=False) hpc = get_hpcname() if hpc == "biowulf": assert "DO NOT RUN THIS ON a BIOWULF interactive node!" in which_vpn From f42f68828d1d6225433baec2958ad0f40a343694 Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Mon, 25 Nov 2024 11:48:20 -0500 Subject: [PATCH 3/9] refactor: move hpcname & scontrol to hpc module --- src/ccbr_tools/pipeline/hpc.py | 54 +++++++++++++++++++++++++++-- src/ccbr_tools/pipeline/nextflow.py | 1 - src/ccbr_tools/pipeline/util.py | 34 +----------------- tests/test_cli.py | 2 +- tests/test_hpc.py | 5 ++- 5 files changed, 57 insertions(+), 39 deletions(-) diff --git a/src/ccbr_tools/pipeline/hpc.py b/src/ccbr_tools/pipeline/hpc.py index fe5cde1..aea9ff8 100755 --- a/src/ccbr_tools/pipeline/hpc.py +++ b/src/ccbr_tools/pipeline/hpc.py @@ -5,8 +5,8 @@ which contains default attributes for supported clusters. """ -from .util import get_hpcname from .cache import get_singularity_cachedir +from ..shell import shell_run class Cluster: @@ -53,8 +53,12 @@ def __init__(self): super().__init__() self.name = "biowulf" self.modules = { - "nxf": "ccbrpipeliner nextflow", - "smk": "ccbrpipeliner snakemake/7 singularity", + "nxf": "nextflow" + "" + if is_loaded(module="ccbrpipeliner") + else " ccbrpipeliner", + "smk": "snakemake/7 singularity" + "" + if is_loaded(module="ccbrpipeliner") + else " ccbrpipeliner", } self.singularity_sif_dir = "/data/CCBR_Pipeliner/SIFs" @@ -102,3 +106,47 @@ def get_hpc(debug=False): hpc_options = {"biowulf": Biowulf, "frce": FRCE} hpc_name = get_hpcname() if not debug else debug return hpc_options.get(hpc_name, Cluster)() + + +def get_hpcname(): + """ + Get the HPC name using scontrol + + Returns: + hpcname (str): The HPC name (biowulf, frce, or an empty string) + """ + scontrol_out = scontrol_show() + hpc = scontrol_out["ClusterName"] if "ClusterName" in scontrol_out.keys() else "" + if hpc == "fnlcr": + hpc = "frce" + return hpc + + +def scontrol_show(): + """ + Run `scontrol show config` and parse the output as a dictionary + + Returns: + scontrol_dict (dict): dictionary containing the output of `scontrol show config` + """ + scontrol_dict = dict() + scontrol_out = shell_run( + "scontrol show config", shell=True, capture_output=True, text=True + ) + if len(scontrol_out) > 0: + for line in scontrol_out.split("\n"): + line_split = line.split("=") + if len(line_split) > 1: + scontrol_dict[line_split[0].strip()] = line_split[1].strip() + return scontrol_dict + + +def is_loaded(module="ccbrpipeliner"): + """ + Check whether the ccbrpipeliner module is loaded + + Returns: + is_loaded (bool): True if the module is loaded, False otherwise + """ + output = shell_run("bash -c 'module list'") + return module in output diff --git a/src/ccbr_tools/pipeline/nextflow.py b/src/ccbr_tools/pipeline/nextflow.py index 2281e1c..aae82b8 100755 --- a/src/ccbr_tools/pipeline/nextflow.py +++ b/src/ccbr_tools/pipeline/nextflow.py @@ -34,7 +34,6 @@ def run( nextflow_command = ["nextflow", "run", nextfile_path] hpc = get_hpc() - print("HPC", hpc.name) if mode == "slurm" and not hpc: raise ValueError("mode is 'slurm' but no HPC environment was detected") # add any additional Nextflow commands diff --git a/src/ccbr_tools/pipeline/util.py b/src/ccbr_tools/pipeline/util.py index 1f2f25d..0600532 100755 --- a/src/ccbr_tools/pipeline/util.py +++ b/src/ccbr_tools/pipeline/util.py @@ -17,39 +17,7 @@ import yaml from ..pkg_util import repo_base, msg - - -def scontrol_show(): - """ - Run `scontrol show config` and parse the output as a dictionary - - Returns: - scontrol_dict (dict): dictionary containing the output of `scontrol show config` - """ - scontrol_dict = dict() - scontrol_out = subprocess.run( - "scontrol show config", shell=True, capture_output=True, text=True - ).stdout - if len(scontrol_out) > 0: - for line in scontrol_out.split("\n"): - line_split = line.split("=") - if len(line_split) > 1: - scontrol_dict[line_split[0].strip()] = line_split[1].strip() - return scontrol_dict - - -def get_hpcname(): - """ - Get the HPC name using scontrol - - Returns: - hpcname (str): The HPC name (biowulf, frce, or an empty string) - """ - scontrol_out = scontrol_show() - hpc = scontrol_out["ClusterName"] if "ClusterName" in scontrol_out.keys() else "" - if hpc == "fnlcr": - hpc = "frce" - return hpc +from .hpc import get_hpcname def get_tmp_dir(tmp_dir, outdir, hpc=get_hpcname()): diff --git a/tests/test_cli.py b/tests/test_cli.py index ccca48e..6c41b8f 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -1,5 +1,5 @@ from ccbr_tools.shell import shell_run -from ccbr_tools.pipeline.util import get_hpcname +from ccbr_tools.pipeline.hpc import get_hpcname def test_version(): diff --git a/tests/test_hpc.py b/tests/test_hpc.py index ae50837..0aa767c 100644 --- a/tests/test_hpc.py +++ b/tests/test_hpc.py @@ -1,4 +1,6 @@ from ccbr_tools.pipeline.hpc import get_hpc +from ccbr_tools.shell import shell_run +import subprocess def test_hpc_biowulf(): @@ -29,4 +31,5 @@ def test_hpc_none(): assert not any([hpc, hpc.name, *hpc.modules.values(), hpc.singularity_sif_dir]) -print(get_hpc()) +if __name__ == "__main__": + print(subprocess.run("bash -c 'module list'", shell=True)) From ce171a29c438f33cb6330aa9046c3883cf043cba Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Mon, 25 Nov 2024 16:53:38 +0000 Subject: [PATCH 4/9] =?UTF-8?q?ci:=20=F0=9F=A4=96=20black=20formatting?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/ccbr_tools/pipeline/hpc.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/src/ccbr_tools/pipeline/hpc.py b/src/ccbr_tools/pipeline/hpc.py index aea9ff8..f777e11 100755 --- a/src/ccbr_tools/pipeline/hpc.py +++ b/src/ccbr_tools/pipeline/hpc.py @@ -53,12 +53,16 @@ def __init__(self): super().__init__() self.name = "biowulf" self.modules = { - "nxf": "nextflow" + "" - if is_loaded(module="ccbrpipeliner") - else " ccbrpipeliner", - "smk": "snakemake/7 singularity" + "" - if is_loaded(module="ccbrpipeliner") - else " ccbrpipeliner", + "nxf": ( + "nextflow" + "" + if is_loaded(module="ccbrpipeliner") + else " ccbrpipeliner" + ), + "smk": ( + "snakemake/7 singularity" + "" + if is_loaded(module="ccbrpipeliner") + else " ccbrpipeliner" + ), } self.singularity_sif_dir = "/data/CCBR_Pipeliner/SIFs" From 04fa8499e459052e694b222ae69cc9371e055a58 Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Mon, 25 Nov 2024 11:54:00 -0500 Subject: [PATCH 5/9] chore: update CHANGELOG.md --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4c6569e..774e654 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,7 @@ ## Tools development version - use major & minor version for docs website subdirectories. (#15, @kelly-sovacool) +- Fig bug where `nextflow.run()` did not import the correct HPC modules. (#20, @kelly-sovacool) ## Tools 0.1.1 From 02625898c32dbf4c37366a016d75e4ec97a8c974 Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Mon, 25 Nov 2024 11:57:17 -0500 Subject: [PATCH 6/9] fix: do not fail if scontrol fails --- src/ccbr_tools/pipeline/hpc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ccbr_tools/pipeline/hpc.py b/src/ccbr_tools/pipeline/hpc.py index f777e11..cb74366 100755 --- a/src/ccbr_tools/pipeline/hpc.py +++ b/src/ccbr_tools/pipeline/hpc.py @@ -135,7 +135,7 @@ def scontrol_show(): """ scontrol_dict = dict() scontrol_out = shell_run( - "scontrol show config", shell=True, capture_output=True, text=True + "scontrol show config", shell=True, capture_output=True, text=True, check=False ) if len(scontrol_out) > 0: for line in scontrol_out.split("\n"): From a9bf1ee02a3f11c4dd48ad58def8c0865e1dff27 Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Mon, 25 Nov 2024 12:04:20 -0500 Subject: [PATCH 7/9] fix: do not fail if "module list" command is not available --- src/ccbr_tools/pipeline/hpc.py | 2 +- tests/test_hpc.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ccbr_tools/pipeline/hpc.py b/src/ccbr_tools/pipeline/hpc.py index cb74366..aa7ea55 100755 --- a/src/ccbr_tools/pipeline/hpc.py +++ b/src/ccbr_tools/pipeline/hpc.py @@ -152,5 +152,5 @@ def is_loaded(module="ccbrpipeliner"): Returns: is_loaded (bool): True if the module is loaded, False otherwise """ - output = shell_run("bash -c 'module list'") + output = shell_run("bash -c 'module list'", check=False) return module in output diff --git a/tests/test_hpc.py b/tests/test_hpc.py index 0aa767c..d2c7594 100644 --- a/tests/test_hpc.py +++ b/tests/test_hpc.py @@ -32,4 +32,4 @@ def test_hpc_none(): if __name__ == "__main__": - print(subprocess.run("bash -c 'module list'", shell=True)) + print(get_hpc(debug="biowulf")) From 42567638346e1052ad63b18943db1ff0a491a706 Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Mon, 25 Nov 2024 12:14:09 -0500 Subject: [PATCH 8/9] test: write test for get_tmp_dir() --- tests/test_pipeline_util.py | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 tests/test_pipeline_util.py diff --git a/tests/test_pipeline_util.py b/tests/test_pipeline_util.py new file mode 100644 index 0000000..8efe905 --- /dev/null +++ b/tests/test_pipeline_util.py @@ -0,0 +1,11 @@ +from ccbr_tools.pipeline.util import get_tmp_dir + + +def test_tmp_dir(): + assert all( + [ + get_tmp_dir("", "./out", hpc="biowulf").startswith("/lscratch"), + get_tmp_dir("", "./out", hpc="frce").startswith("./out"), + get_tmp_dir("", "./out", hpc="none") == None, + ] + ) From 48c97410d700d5b969e1fde780916e9ae3f7ea10 Mon Sep 17 00:00:00 2001 From: Kelly Sovacool Date: Mon, 25 Nov 2024 12:21:58 -0500 Subject: [PATCH 9/9] test: slurm mode --- src/ccbr_tools/pipeline/nextflow.py | 2 +- tests/test_nextflow.py | 28 +++++++++++++++++++++++----- 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/src/ccbr_tools/pipeline/nextflow.py b/src/ccbr_tools/pipeline/nextflow.py index aae82b8..2495334 100755 --- a/src/ccbr_tools/pipeline/nextflow.py +++ b/src/ccbr_tools/pipeline/nextflow.py @@ -19,6 +19,7 @@ def run( mode="local", pipeline_name=None, debug=False, + hpc=get_hpc(), ): """ Run a Nextflow workflow @@ -33,7 +34,6 @@ def run( """ nextflow_command = ["nextflow", "run", nextfile_path] - hpc = get_hpc() if mode == "slurm" and not hpc: raise ValueError("mode is 'slurm' but no HPC environment was detected") # add any additional Nextflow commands diff --git a/tests/test_nextflow.py b/tests/test_nextflow.py index 589b748..0dfe241 100644 --- a/tests/test_nextflow.py +++ b/tests/test_nextflow.py @@ -1,7 +1,9 @@ from ccbr_tools.pipeline.nextflow import run -from ccbr_tools.pipeline.hpc import get_hpc +from ccbr_tools.pipeline.hpc import get_hpc, Biowulf, FRCE from ccbr_tools.shell import exec_in_context +import tempfile + def test_nextflow_basic(): output = exec_in_context(run, "CCBR/CHAMPAGNE", debug=True) @@ -9,7 +11,23 @@ def test_nextflow_basic(): def test_nextflow_hpc(): - hpc = get_hpc() - if hpc.name == "biowulf" or hpc.name == "frce": - output = exec_in_context(run, "CCBR/CHAMPAGNE", debug=True) - assert "module load" in output + assert all( + [ + "module load" + in exec_in_context(run, "CCBR/CHAMPAGNE", debug=True, hpc=Biowulf()), + "module load" + in exec_in_context(run, "CCBR/CHAMPAGNE", debug=True, hpc=FRCE()), + ] + ) + + +def test_nextflow_slurm(): + assert "sbatch submit_slurm.sh" in exec_in_context( + run, "CCBR/CHAMPAGNE", debug=True, hpc=Biowulf(), mode="slurm" + ) + + +if __name__ == "__main__": + print( + exec_in_context(run, "CCBR/CHAMPAGNE", debug=True, hpc=Biowulf(), mode="slurm") + )