Skip to content
This repository has been archived by the owner on Oct 9, 2023. It is now read-only.

Commit

Permalink
Fix affinity injection logic if partition size is not provided
Browse files Browse the repository at this point in the history
Signed-off-by: Jeev B <[email protected]>
  • Loading branch information
jeevb committed Aug 17, 2023
1 parent 3d0ceeb commit 3c6545f
Show file tree
Hide file tree
Showing 2 changed files with 92 additions and 38 deletions.
14 changes: 14 additions & 0 deletions go/tasks/pluginmachinery/flytek8s/pod_helper.go
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ func ApplyNodeSelectors(podSpec *v1.PodSpec, selectors ...*core.Selector) {
podSpec.Affinity = &v1.Affinity{}
}

Check warning on line 111 in go/tasks/pluginmachinery/flytek8s/pod_helper.go

View check run for this annotation

Codecov / codecov/patch

go/tasks/pluginmachinery/flytek8s/pod_helper.go#L110-L111

Added lines #L110 - L111 were not covered by tests

gpuPartitionSizeSpecified := false
for _, selector := range selectors {
var ns v1.NodeSelectorRequirement
switch selector.GetSelection().(type) {
Expand All @@ -120,6 +121,7 @@ func ApplyNodeSelectors(podSpec *v1.PodSpec, selectors ...*core.Selector) {
Values: []string{selector.GetGpuDevice()},
}
case *core.Selector_GpuPartitionSize:
gpuPartitionSizeSpecified = true
ns = v1.NodeSelectorRequirement{
Key: config.GetK8sPluginConfig().GpuPartitionSizeNodeLabel,
Operator: v1.NodeSelectorOpIn,
Expand All @@ -133,6 +135,18 @@ func ApplyNodeSelectors(podSpec *v1.PodSpec, selectors ...*core.Selector) {
AddRequiredNodeSelectorRequirements(podSpec.Affinity, ns)
}
}

// If a gpu partition size selector was not specified, we assume that the user
// wants full, unpartitioned GPUs.
if !gpuPartitionSizeSpecified {
AddRequiredNodeSelectorRequirements(
podSpec.Affinity,
v1.NodeSelectorRequirement{
Key: config.GetK8sPluginConfig().GpuPartitionSizeNodeLabel,
Operator: v1.NodeSelectorOpDoesNotExist,
},
)
}
}

// UpdatePod updates the base pod spec used to execute tasks. This is configured with plugins and task metadata-specific options
Expand Down
116 changes: 78 additions & 38 deletions go/tasks/pluginmachinery/flytek8s/pod_helper_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -417,56 +417,96 @@ func TestApplyInterruptibleNodeAffinity(t *testing.T) {
}

func TestApplyNodeSelectors(t *testing.T) {
podSpec := &v1.PodSpec{
Affinity: &v1.Affinity{},
}
ApplyNodeSelectors(
podSpec,
&core.Selector{
Selection: &core.Selector_GpuDevice{
GpuDevice: "nvidia-tesla-a100",
t.Run("with gpu device and partition size", func(t *testing.T) {
podSpec := &v1.PodSpec{
Affinity: &v1.Affinity{},
}
ApplyNodeSelectors(
podSpec,
&core.Selector{
Selection: &core.Selector_GpuDevice{
GpuDevice: "nvidia-tesla-a100",
},
},
},
&core.Selector{
Selection: &core.Selector_GpuPartitionSize{
GpuPartitionSize: "1g.5gb",
&core.Selector{
Selection: &core.Selector_GpuPartitionSize{
GpuPartitionSize: "1g.5gb",
},
OnlyPreferred: true,
},
OnlyPreferred: true,
},
)
assert.EqualValues(
t,
[]v1.NodeSelectorTerm{
v1.NodeSelectorTerm{
MatchExpressions: []v1.NodeSelectorRequirement{
v1.NodeSelectorRequirement{
Key: config.GetK8sPluginConfig().GpuDeviceNodeLabel,
Operator: v1.NodeSelectorOpIn,
Values: []string{"nvidia-tesla-a100"},
)
assert.EqualValues(
t,
[]v1.NodeSelectorTerm{
v1.NodeSelectorTerm{
MatchExpressions: []v1.NodeSelectorRequirement{
v1.NodeSelectorRequirement{
Key: config.GetK8sPluginConfig().GpuDeviceNodeLabel,
Operator: v1.NodeSelectorOpIn,
Values: []string{"nvidia-tesla-a100"},
},
},
},
},
},
podSpec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms,
)
assert.EqualValues(
t,
[]v1.PreferredSchedulingTerm{
v1.PreferredSchedulingTerm{
Weight: 10,
Preference: v1.NodeSelectorTerm{
podSpec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms,
)
assert.EqualValues(
t,
[]v1.PreferredSchedulingTerm{
v1.PreferredSchedulingTerm{
Weight: 10,
Preference: v1.NodeSelectorTerm{
MatchExpressions: []v1.NodeSelectorRequirement{
v1.NodeSelectorRequirement{
Key: config.GetK8sPluginConfig().GpuPartitionSizeNodeLabel,
Operator: v1.NodeSelectorOpIn,
Values: []string{"1g.5gb"},
},
},
},
},
},
podSpec.Affinity.NodeAffinity.PreferredDuringSchedulingIgnoredDuringExecution,
)
})

t.Run("with gpu device but without partition size", func(t *testing.T) {
podSpec := &v1.PodSpec{
Affinity: &v1.Affinity{},
}
ApplyNodeSelectors(
podSpec,
&core.Selector{
Selection: &core.Selector_GpuDevice{
GpuDevice: "nvidia-tesla-a100",
},
},
)
assert.EqualValues(
t,
[]v1.NodeSelectorTerm{
v1.NodeSelectorTerm{
MatchExpressions: []v1.NodeSelectorRequirement{
v1.NodeSelectorRequirement{
Key: config.GetK8sPluginConfig().GpuPartitionSizeNodeLabel,
Key: config.GetK8sPluginConfig().GpuDeviceNodeLabel,
Operator: v1.NodeSelectorOpIn,
Values: []string{"1g.5gb"},
Values: []string{"nvidia-tesla-a100"},
},
v1.NodeSelectorRequirement{
Key: config.GetK8sPluginConfig().GpuPartitionSizeNodeLabel,
Operator: v1.NodeSelectorOpDoesNotExist,
},
},
},
},
},
podSpec.Affinity.NodeAffinity.PreferredDuringSchedulingIgnoredDuringExecution,
)
podSpec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms,
)
assert.EqualValues(
t,
0,
len(podSpec.Affinity.NodeAffinity.PreferredDuringSchedulingIgnoredDuringExecution),
)
})
}

func updatePod(t *testing.T) {
Expand Down

0 comments on commit 3c6545f

Please sign in to comment.