Skip to content

Commit

Permalink
Add exclusive flag
Browse files Browse the repository at this point in the history
Signed-off-by: Igor Gitman <[email protected]>
  • Loading branch information
Kipok committed Dec 20, 2024
1 parent 786cac3 commit 8626fd5
Show file tree
Hide file tree
Showing 6 changed files with 15 additions and 1 deletion.
2 changes: 2 additions & 0 deletions nemo_skills/pipeline/check_contamination.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ def check_contamination(
help="Can specify a custom location for slurm logs. "
"If not specified, will be inside `ssh_tunnel.job_dir` part of your cluster config.",
),
exclusive: bool = typer.Option(False, help="If True, will use --exclusive flag for slurm"),
):
"""Check contamination between train/test via an LLM call.
Expand Down Expand Up @@ -149,6 +150,7 @@ def check_contamination(
task_dependencies=prev_tasks,
run_after=run_after,
reuse_code_exp=reuse_code_exp,
slurm_kwargs={"exclusive": exclusive} if exclusive else None,
)
prev_tasks = [new_task]
run_exp(exp, cluster_config)
Expand Down
4 changes: 3 additions & 1 deletion nemo_skills/pipeline/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,8 @@ def convert(
"Can provide an experiment name or an experiment object if running from code.",
),
config_dir: str = typer.Option(None, help="Can customize where we search for cluster configs"),
log_dir: str = typer.Option(None, help="Can specify a custom location for slurm logs. "),
log_dir: str = typer.Option(None, help="Can specify a custom location for slurm logs."),
exclusive: bool = typer.Option(False, help="If True, will use --exclusive flag for slurm"),
):
"""Convert a checkpoint from one format to another.
Expand Down Expand Up @@ -252,6 +253,7 @@ def convert(
time_min=time_min,
run_after=run_after,
reuse_code_exp=reuse_code_exp,
slurm_kwargs={"exclusive": exclusive} if exclusive else None,
)
run_exp(exp, cluster_config)

Expand Down
2 changes: 2 additions & 0 deletions nemo_skills/pipeline/eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@ def eval(
help="Path to a custom dataset folder that will be searched in addition to the main one. "
"Can also specify through NEMO_SKILLS_EXTRA_DATASETS.",
),
exclusive: bool = typer.Option(False, help="If True, will use --exclusive flag for slurm"),
):
"""Evaluate a model on specified benchmarks.
Expand Down Expand Up @@ -230,6 +231,7 @@ def eval(
reuse_code_exp=reuse_code_exp,
extra_package_dirs=[extra_datasets] if extra_datasets else None,
get_server_command=get_server_command,
slurm_kwargs={"exclusive": exclusive} if exclusive else None,
)
run_exp(exp, cluster_config)

Expand Down
3 changes: 3 additions & 0 deletions nemo_skills/pipeline/generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,7 @@ def generate(
),
config_dir: str = typer.Option(None, help="Can customize where we search for cluster configs"),
log_dir: str = typer.Option(None, help="Can specify a custom location for slurm logs."),
exclusive: bool = typer.Option(False, help="If True, will use --exclusive flag for slurm"),
):
"""Generate LLM completions for a given input file.
Expand Down Expand Up @@ -274,6 +275,7 @@ def generate(
reuse_code_exp=reuse_code_exp,
task_dependencies=prev_tasks,
get_server_command=get_server_command,
slurm_kwargs={"exclusive": exclusive} if exclusive else None,
)
prev_tasks = [new_task]
else:
Expand Down Expand Up @@ -316,6 +318,7 @@ def generate(
reuse_code_exp=reuse_code_exp,
task_dependencies=prev_tasks,
get_server_command=get_server_command,
slurm_kwargs={"exclusive": exclusive} if exclusive else None,
)
prev_tasks = [new_task]
run_exp(exp, cluster_config)
Expand Down
2 changes: 2 additions & 0 deletions nemo_skills/pipeline/start_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ def start_server(
help="Can specify a custom location for slurm logs. "
"If not specified, will be inside `ssh_tunnel.job_dir` part of your cluster config.",
),
exclusive: bool = typer.Option(False, help="If True, will use --exclusive flag for slurm"),
):
"""Self-host a model server."""
setup_logging(disable_hydra_logs=False)
Expand Down Expand Up @@ -87,6 +88,7 @@ def start_server(
time_min=time_min,
server_config=server_config,
with_sandbox=with_sandbox,
slurm_kwargs={"exclusive": exclusive} if exclusive else None,
)
# we don't want to detach in this case even on slurm, so not using run_exp
exp.run(detach=False, tail_logs=True)
Expand Down
3 changes: 3 additions & 0 deletions nemo_skills/pipeline/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,7 @@ def train(
),
config_dir: str = typer.Option(None, help="Can customize where we search for cluster configs"),
log_dir: str = typer.Option(None, help="Can specify a custom location for slurm logs. "),
exclusive: bool = typer.Option(False, help="If True, will use --exclusive flag for slurm"),
):
"""Train (SFT or DPO) an LLM model.
Expand Down Expand Up @@ -316,6 +317,7 @@ def train(
run_after=run_after,
reuse_code_exp=reuse_code_exp,
task_dependencies=[prev_task] if prev_task is not None else None,
slurm_kwargs={"exclusive": exclusive} if exclusive else None,
)

cmd = get_avg_checkpoints_cmd(
Expand All @@ -340,6 +342,7 @@ def train(
run_after=run_after,
reuse_code_exp=reuse_code_exp,
task_dependencies=[prev_task] if prev_task is not None else None,
slurm_kwargs={"exclusive": exclusive} if exclusive else None,
)

# explicitly setting sequential to False since we set dependencies directly
Expand Down

0 comments on commit 8626fd5

Please sign in to comment.