Skip to content

Commit

Permalink
Merge pull request #35 from glotzerlab/constraints
Browse files Browse the repository at this point in the history
Add submit options to the cluster configuration
  • Loading branch information
joaander authored Aug 15, 2024
2 parents 8b89dc4 + 68d9976 commit cb3bc5b
Show file tree
Hide file tree
Showing 6 changed files with 59 additions and 0 deletions.
5 changes: 5 additions & 0 deletions doc/src/clusters/cluster.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,11 @@ be one of:
* `"slurm"`
* `"bash"`

## submit_options

`cluster.submit_options`: **array** of **strings** - Scheduler submission options that
are passed to every job on this cluster.

## partition

`cluster.partition`: **array** of **tables** - Define the scheduler partitions that
Expand Down
3 changes: 3 additions & 0 deletions doc/src/release-notes.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
* Edit links to documentation pages.
* New arguments to `show status` display actions that are in the requested states:
`--completed`, `--eligible`, `--submitted`, and `--waiting`.
* `cluster.submit_options` configuration option in `clusters.toml`.

*Changed:*

Expand All @@ -15,6 +16,8 @@
* `show status` hides actions with 0 directories by default. Pass `--all` to show all
actions.
* `clean` now cleans all caches by default.
* Submit jobs with `--constraint="scratch"` by default on Delta.
* Submit jobs with `--constraint="nvme"` by default on Frontier.

*Fixed:*

Expand Down
6 changes: 6 additions & 0 deletions src/builtin.rs
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ fn andes() -> Cluster {
name: "andes".into(),
identify: IdentificationMethod::ByEnvironment("LMOD_SYSTEM_NAME".into(), "andes".into()),
scheduler: SchedulerType::Slurm,
submit_options: Vec::new(),
partition: vec![
// Auto-detected partitions: batch
Partition {
Expand All @@ -92,6 +93,7 @@ fn anvil() -> Cluster {
name: "anvil".into(),
identify: IdentificationMethod::ByEnvironment("RCAC_CLUSTER".into(), "anvil".into()),
scheduler: SchedulerType::Slurm,
submit_options: Vec::new(),
partition: vec![
// Auto-detected partitions: shared | wholenode | gpu
Partition {
Expand Down Expand Up @@ -149,6 +151,7 @@ fn delta() -> Cluster {
name: "delta".into(),
identify: IdentificationMethod::ByEnvironment("LMOD_SYSTEM_NAME".into(), "Delta".into()),
scheduler: SchedulerType::Slurm,
submit_options: vec!["--constraint=\"scratch\"".to_string()],
partition: vec![
// Auto-detected partitions: cpu | gpuA100x4
Partition {
Expand Down Expand Up @@ -206,6 +209,7 @@ fn frontier() -> Cluster {
name: "frontier".into(),
identify: IdentificationMethod::ByEnvironment("LMOD_SYSTEM_NAME".into(), "frontier".into()),
scheduler: SchedulerType::Slurm,
submit_options: vec!["--constraint=\"nvme\"".to_string()],
partition: vec![
// Auto-detected partitions: batch
Partition {
Expand All @@ -225,6 +229,7 @@ fn greatlakes() -> Cluster {
name: "greatlakes".into(),
identify: IdentificationMethod::ByEnvironment("CLUSTER_NAME".into(), "greatlakes".into()),
scheduler: SchedulerType::Slurm,
submit_options: Vec::new(),
partition: vec![
// Auto-detected partitions: standard | gpu_mig40,gpu | gpu.
Partition {
Expand Down Expand Up @@ -295,6 +300,7 @@ fn none() -> Cluster {
name: "none".into(),
identify: IdentificationMethod::Always(true),
scheduler: SchedulerType::Bash,
submit_options: Vec::new(),
partition: vec![Partition {
name: "none".into(),
..Partition::default()
Expand Down
13 changes: 13 additions & 0 deletions src/cluster.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,10 @@ pub struct Cluster {

/// The partitions in the cluster's queue.
pub partition: Vec<Partition>,

/// Submit options to include in every job submitted to this cluster.
#[serde(default)]
pub submit_options: Vec<String>,
}

/// Methods to identify clusters.
Expand Down Expand Up @@ -400,30 +404,35 @@ mod tests {
identify: IdentificationMethod::Always(false),
scheduler: SchedulerType::Bash,
partition: Vec::new(),
submit_options: Vec::new(),
},
Cluster {
name: "cluster1".into(),
identify: IdentificationMethod::ByEnvironment("_row_select".into(), "a".into()),
scheduler: SchedulerType::Bash,
partition: Vec::new(),
submit_options: Vec::new(),
},
Cluster {
name: "cluster2".into(),
identify: IdentificationMethod::ByEnvironment("_row_select".into(), "b".into()),
scheduler: SchedulerType::Bash,
partition: Vec::new(),
submit_options: Vec::new(),
},
Cluster {
name: "cluster3".into(),
identify: IdentificationMethod::Always(true),
scheduler: SchedulerType::Bash,
partition: Vec::new(),
submit_options: Vec::new(),
},
Cluster {
name: "cluster4".into(),
identify: IdentificationMethod::ByEnvironment("_row_Select".into(), "b".into()),
scheduler: SchedulerType::Bash,
partition: Vec::new(),
submit_options: Vec::new(),
},
];
let cluster_configuration = Configuration { cluster: clusters };
Expand Down Expand Up @@ -591,6 +600,7 @@ mod tests {
identify: IdentificationMethod::Always(true),
scheduler: SchedulerType::Bash,
partition: partitions,
submit_options: Vec::new(),
};

let cpu_resources = Resources {
Expand Down Expand Up @@ -728,6 +738,7 @@ name = "b"
assert_eq!(cluster.name, "a");
assert_eq!(cluster.identify, IdentificationMethod::Always(true));
assert_eq!(cluster.scheduler, SchedulerType::Bash);
assert!(cluster.submit_options.is_empty());
assert_eq!(
cluster.partition,
vec![Partition {
Expand All @@ -748,6 +759,7 @@ name = "b"
name = "a"
identify.by_environment = ["b", "c"]
scheduler = "slurm"
submit_options = ["option1", "option2"]
[[cluster.partition]]
name = "d"
Expand Down Expand Up @@ -777,6 +789,7 @@ account_suffix = "-gpu"
IdentificationMethod::ByEnvironment("b".into(), "c".into())
);
assert_eq!(cluster.scheduler, SchedulerType::Slurm);
assert_eq!(cluster.submit_options, vec!["option1", "option2"]);
assert_eq!(
cluster.partition,
vec![Partition {
Expand Down
1 change: 1 addition & 0 deletions src/scheduler/bash.rs
Original file line number Diff line number Diff line change
Expand Up @@ -539,6 +539,7 @@ mod tests {
scheduler: SchedulerType::Bash,
identify: IdentificationMethod::Always(false),
partition: Vec::new(),
submit_options: Vec::new(),
};
let script = Bash::new(cluster, launchers)
.make_script(&action, &directories)
Expand Down
31 changes: 31 additions & 0 deletions src/scheduler/slurm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,11 @@ impl Scheduler for Slurm {
let minutes = (total + 59) / 60;
let _ = writeln!(preamble, "#SBATCH --time={minutes}");

// Add global cluster submit options first so that users can override them.
for option in &self.cluster.submit_options {
let _ = writeln!(preamble, "#SBATCH {option}");
}

// Use provided submission options
if let Some(submit_options) = action.submit_options.get(&self.cluster.name) {
if let Some(ref account) = submit_options.account {
Expand Down Expand Up @@ -299,6 +304,7 @@ mod tests {
identify: IdentificationMethod::Always(false),
scheduler: SchedulerType::Slurm,
partition: vec![Partition::default()],
submit_options: Vec::new(),
};

let slurm = Slurm::new(cluster, launchers.by_cluster("cluster"));
Expand All @@ -323,6 +329,27 @@ mod tests {
assert!(script.contains("#SBATCH --time=180"));
}

#[test]
#[parallel]
fn cluster_submit_options() {
let (action, directories, mut slurm) = setup();
slurm.cluster.submit_options = vec!["--option=value".to_string()];

let script = slurm
.make_script(&action, &directories)
.expect("valid script");
println!("{script}");

assert!(script.contains("#SBATCH --job-name=action"));
assert!(script.contains("#SBATCH --ntasks=1"));
assert!(!script.contains("#SBATCH --account"));
assert!(script.contains("#SBATCH --partition=partition"));
assert!(!script.contains("#SBATCH --cpus-per-task"));
assert!(!script.contains("#SBATCH --gpus-per-task"));
assert!(script.contains("#SBATCH --time=180"));
assert!(script.contains("#SBATCH --option=value"));
}

#[test]
#[parallel]
fn ntasks() {
Expand Down Expand Up @@ -421,6 +448,7 @@ mod tests {
name: "cluster".into(),
identify: IdentificationMethod::Always(false),
scheduler: SchedulerType::Slurm,
submit_options: Vec::new(),
partition: vec![Partition {
memory_per_cpu: Some("a".into()),
..Partition::default()
Expand All @@ -447,6 +475,7 @@ mod tests {
name: "cluster".into(),
identify: IdentificationMethod::Always(false),
scheduler: SchedulerType::Slurm,
submit_options: Vec::new(),
partition: vec![Partition {
memory_per_gpu: Some("b".into()),
..Partition::default()
Expand Down Expand Up @@ -475,6 +504,7 @@ mod tests {
name: "cluster".into(),
identify: IdentificationMethod::Always(false),
scheduler: SchedulerType::Slurm,
submit_options: Vec::new(),
partition: vec![Partition {
cpus_per_node: Some(10),
..Partition::default()
Expand Down Expand Up @@ -503,6 +533,7 @@ mod tests {
name: "cluster".into(),
identify: IdentificationMethod::Always(false),
scheduler: SchedulerType::Slurm,
submit_options: Vec::new(),
partition: vec![Partition {
gpus_per_node: Some(5),
..Partition::default()
Expand Down

0 comments on commit cb3bc5b

Please sign in to comment.