Skip to content

Commit

Permalink
Merge pull request #341 from ComputeCanada/general_specs
Browse files Browse the repository at this point in the history
Generalize definition of instance's specs
  • Loading branch information
cmd-ntrf authored Jan 15, 2025
2 parents e8a6582 + ee8df9a commit bfd92ea
Show file tree
Hide file tree
Showing 7 changed files with 23 additions and 21 deletions.
6 changes: 2 additions & 4 deletions aws/infrastructure.tf
Original file line number Diff line number Diff line change
Expand Up @@ -163,13 +163,11 @@ locals {
local_ip = aws_network_interface.nic[x].private_ip
prefix = values.prefix
tags = values.tags
specs = {
specs = merge({
cpus = data.aws_ec2_instance_type.instance_type[values.prefix].default_vcpus
ram = data.aws_ec2_instance_type.instance_type[values.prefix].memory_size
gpus = try(one(data.aws_ec2_instance_type.instance_type[values.prefix].gpus).count, 0)
mig = lookup(values, "mig", null)
shard = lookup(values, "shard", null)
}
}, values.specs)
volumes = contains(keys(module.design.volume_per_instance), x) ? {
for pv_key, pv_values in var.volumes:
pv_key => {
Expand Down
6 changes: 2 additions & 4 deletions azure/infrastructure.tf
Original file line number Diff line number Diff line change
Expand Up @@ -155,13 +155,11 @@ locals {
local_ip = azurerm_network_interface.nic[x].private_ip_address
prefix = values.prefix
tags = values.tags
specs = {
specs = merge({
cpus = local.vmsizes[values.type].vcpus
ram = local.vmsizes[values.type].ram
gpus = local.vmsizes[values.type].gpus
mig = lookup(values, "mig", null)
shard = lookup(values, "shard", null)
}
}, values.specs)
volumes = contains(keys(module.design.volume_per_instance), x) ? {
for pv_key, pv_values in var.volumes:
pv_key => {
Expand Down
7 changes: 5 additions & 2 deletions common/design/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,11 @@ locals {
for prefix, attrs in var.instances : [
for i in range(lookup(attrs, "count", 1)) : {
(format("%s%d", prefix, i + 1)) = merge(
{ for attr, value in attrs : attr => value if attr != "count" },
{ prefix = prefix }
{ for attr, value in attrs : attr => value if ! contains(["count"], attr) },
{
prefix = prefix,
specs = { for attr, value in attrs : attr => value if ! contains(["count", "tags", "image"], attr) }
},
)
}
]
Expand Down
9 changes: 8 additions & 1 deletion docs/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -548,6 +548,13 @@ the operating system and service software
and with x86-64 processors (see [NVIDIA/mig-parted issue #30](https://github.com/NVIDIA/mig-parted/issues/30)).
6. `shard`: total number of [Sharding](https://slurm.schedmd.com/gres.html#Sharding) on the node. Sharding allows sharing the same GPU on multiple jobs. The total number of shards is evenly distributed across all GPUs on the node.
The instance specifications are retrieved from the cloud provider data source, but it is possible to explicitly specify them.
7. `cpus`: number of logical processors on the node - [`CPUs` in slurm.conf](https://slurm.schedmd.com/slurm.conf.html#OPT_CPUs)
8. `ram`: size of real memory on the node in megabyte - [`RealMemory` in slurm.conf](https://slurm.schedmd.com/slurm.conf.html#OPT_RealMemory)
9. `gpus`: number of graphical processor on the node - [`Gres=gpu:<gpus>` in slurm.conf](https://slurm.schedmd.com/slurm.conf.html#OPT_Gres_1)
10. `gpu_type`: type of graphical processor on the node - [`Gres=gpu:<gpu_type>:<gpus>` in slurm.conf](https://slurm.schedmd.com/slurm.conf.html#OPT_Gres_1)
For some cloud providers, it possible to define additional attributes.
The following sections present the available attributes per provider.
Expand Down Expand Up @@ -585,7 +592,7 @@ For more information on these attributes, refer to
- `gpu_type`: name of the GPU model to attach to the instance. Refer to
[Google Cloud documentation](https://cloud.google.com/compute/docs/gpus) for the list of
available models per region
- `gpu_count`: number of GPUs of the `gpu_type` model to attach to the instance
- `gpus`: number of GPUs of the `gpu_type` model to attach to the instance
#### 4.7.3 Post build modification effect
Expand Down
2 changes: 1 addition & 1 deletion examples/gcp/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ module "gcp" {
type = "n1-standard-2",
tags = ["node"],
count = 1,
gpus = 1
gpu_type = "nvidia-tesla-t4",
gpu_count = 1
}
}

Expand Down
8 changes: 3 additions & 5 deletions gcp/infrastructure.tf
Original file line number Diff line number Diff line change
Expand Up @@ -165,13 +165,11 @@ locals {
local_ip = google_compute_address.nic[x].address
prefix = values.prefix
tags = values.tags
specs = {
specs = merge({
cpus = data.external.machine_type[values["prefix"]].result["vcpus"]
ram = data.external.machine_type[values["prefix"]].result["ram"]
gpus = try(data.external.machine_type[values["prefix"]].result["gpus"], lookup(values, "gpu_count", 0))
mig = lookup(values, "mig", null)
shard = lookup(values, "shard", null)
}
gpus = try(data.external.machine_type[values["prefix"]].result["gpus"], 0)
}, values.specs)
volumes = contains(keys(module.design.volume_per_instance), x) ? {
for pv_key, pv_values in var.volumes:
pv_key => {
Expand Down
6 changes: 2 additions & 4 deletions openstack/infrastructure.tf
Original file line number Diff line number Diff line change
Expand Up @@ -120,16 +120,14 @@ locals {
local_ip = openstack_networking_port_v2.nic[x].all_fixed_ips[0]
prefix = values.prefix
tags = values.tags
specs = {
specs = merge({
cpus = data.openstack_compute_flavor_v2.flavors[values.prefix].vcpus
ram = data.openstack_compute_flavor_v2.flavors[values.prefix].ram
gpus = sum([
parseint(lookup(data.openstack_compute_flavor_v2.flavors[values.prefix].extra_specs, "resources:VGPU", "0"), 10),
parseint(split(":", lookup(data.openstack_compute_flavor_v2.flavors[values.prefix].extra_specs, "pci_passthrough:alias", "gpu:0"))[1], 10)
])
mig = lookup(values, "mig", null)
shard = lookup(values, "shard", null)
}
}, values.specs)
volumes = contains(keys(module.design.volume_per_instance), x) ? {
for pv_key, pv_values in var.volumes:
pv_key => {
Expand Down

0 comments on commit bfd92ea

Please sign in to comment.