Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: cgroup confinement #23

Merged
merged 17 commits into from
Apr 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 23 additions & 1 deletion snakemake_executor_plugin_slurm_jobstep/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
JobExecutorInterface,
)
from snakemake_interface_executor_plugins.settings import ExecMode, CommonSettings
from snakemake_interface_common.exceptions import WorkflowError


# Required:
Expand Down Expand Up @@ -111,7 +112,15 @@ def run_job(self, job: JobExecutorInterface):
# The -n1 is important to avoid that srun executes the given command
# multiple times, depending on the relation between
# cpus per task and the number of CPU cores.
call = f"srun -n1 --cpu-bind=q {self.format_job_exec(job)}"

# as of v22.11.0, the --cpu-per-task flag is needed to ensure that
# the job can utilize the c-group's resources.
# We set the limitation accordingly, assuming the submit executor
# has set the resources correctly.

call = "srun -n1 --cpu-bind=q "
call += f"--cpus-per-task {get_cpus_per_task(job)} "
call += f"{self.format_job_exec(job)}"

self.logger.debug(f"This job is a group job: {job.is_group()}")
self.logger.debug(f"The call for this job is: {call}")
Expand Down Expand Up @@ -144,3 +153,16 @@ def cores(self):

def get_exec_mode(self) -> ExecMode:
return ExecMode.REMOTE


def get_cpus_per_task(job: JobExecutorInterface):
cpus_per_task = job.threads
if job.resources.get("cpus_per_task"):
if not isinstance(cpus_per_task, int):
raise WorkflowError(
f"cpus_per_task must be an integer, but is {cpus_per_task}"
)
cpus_per_task = job.resources.cpus_per_task
# ensure that at least 1 cpu is requested
# because 0 is not allowed by slurm
return max(1, cpus_per_task)
10 changes: 10 additions & 0 deletions tests/test_github_issue41/Snakefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
rule all:
input: "1.out"

rule test1:
output: "1.out"
#threads: 2
resources:
cpus_per_task=1
shell: "touch $SLURM_CPUS_PER_TASK.out"

Empty file.
4 changes: 4 additions & 0 deletions tests/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,7 @@ def get_executor(self) -> str:
def get_executor_settings(self) -> Optional[ExecutorSettingsBase]:
# instatiate ExecutorSettings of this plugin as appropriate
return None


# def test_issue_41():
# run(dpath("test_github_issue41"))
Loading