Skip to content

Commit

Permalink
Restore setting RoCE configuration
Browse files Browse the repository at this point in the history
The issue breaking RoCE config was in the previous call to setpci
that set an incorrect value subsequently breaking the runtime config
for the device until the next reboot

Signed-off-by: Alexander Maslennikov <[email protected]>
  • Loading branch information
almaslennikov committed Oct 15, 2024
1 parent f923b80 commit a6a3609
Show file tree
Hide file tree
Showing 6 changed files with 95 additions and 104 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ spec:
nodeSelector: {{- toYaml .Values.operator.nodeSelector | nindent 8 }}
serviceAccountName: {{ include "nic-configuration-operator.serviceAccountName" . }}
terminationGracePeriodSeconds: 10
hostNetwork: true
hostPID: true
priorityClassName: system-node-critical
containers:
Expand Down
25 changes: 11 additions & 14 deletions pkg/host/configvalidation.go
Original file line number Diff line number Diff line change
Expand Up @@ -245,9 +245,7 @@ func (v *configValidationImpl) AdvancedPCISettingsEnabled(currentConfig map[stri
func (v *configValidationImpl) RuntimeConfigApplied(device *v1alpha1.NicDevice) (bool, error) {
ports := device.Status.Ports

// TODO uncomment after a fix to mlnx_qos command
//desiredMaxReadReqSize, desiredTrust, desiredPfc := v.CalculateDesiredRuntimeConfig(device)
desiredMaxReadReqSize, _, _ := v.CalculateDesiredRuntimeConfig(device)
desiredMaxReadReqSize, desiredTrust, desiredPfc := v.CalculateDesiredRuntimeConfig(device)

if desiredMaxReadReqSize != 0 {
for _, port := range ports {
Expand All @@ -262,17 +260,16 @@ func (v *configValidationImpl) RuntimeConfigApplied(device *v1alpha1.NicDevice)
}
}

// TODO uncomment after a fix to mlnx_qos command
//for _, port := range ports {
// actualTrust, actualPfc, err := v.utils.GetTrustAndPFC(port.NetworkInterface)
// if err != nil {
// log.Log.Error(err, "can't validate QoS settings", "device", device.Name)
// return false, err
// }
// if actualTrust != desiredTrust || actualPfc != desiredPfc {
// return false, nil
// }
//}
for _, port := range ports {
actualTrust, actualPfc, err := v.utils.GetTrustAndPFC(port.NetworkInterface)
if err != nil {
log.Log.Error(err, "can't validate QoS settings", "device", device.Name)
return false, err
}
if actualTrust != desiredTrust || actualPfc != desiredPfc {
return false, nil
}
}

return true, nil
}
Expand Down
148 changes: 72 additions & 76 deletions pkg/host/configvalidation_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -723,44 +723,42 @@ var _ = Describe("ConfigValidationImpl", func() {
})
})

// TODO uncomment after a fix to mlnx_qos command
//Context("when trust setting does not match on the first port", func() {
// BeforeEach(func() {
// desiredMaxReadReqSize, _, desiredPfc := validator.CalculateDesiredRuntimeConfig(device)
//
// mockHostUtils.On("GetMaxReadRequestSize", "0000:03:00.0").Return(desiredMaxReadReqSize, nil)
// mockHostUtils.On("GetMaxReadRequestSize", "0000:03:00.1").Return(desiredMaxReadReqSize, nil)
//
// mockHostUtils.On("GetTrustAndPFC", "interface0").Return("differentTrust", desiredPfc, nil)
// // The second port should not be called since the first port already fails
// })
//
// It("should return false with no error", func() {
// applied, err = validator.RuntimeConfigApplied(device)
// Expect(err).NotTo(HaveOccurred())
// Expect(applied).To(BeFalse())
// })
//})
//
// TODO uncomment after a fix to mlnx_qos command
//Context("when PFC setting does not match on the second port", func() {
// BeforeEach(func() {
// desiredMaxReadReqSize, desiredTrust, desiredPfc := validator.CalculateDesiredRuntimeConfig(device)
//
// mockHostUtils.On("GetMaxReadRequestSize", "0000:03:00.0").Return(desiredMaxReadReqSize, nil)
// mockHostUtils.On("GetMaxReadRequestSize", "0000:03:00.1").Return(desiredMaxReadReqSize, nil)
//
// mockHostUtils.On("GetTrustAndPFC", "interface0").Return(desiredTrust, desiredPfc, nil)
//
// mockHostUtils.On("GetTrustAndPFC", "interface1").Return(desiredTrust, "differentPfc", nil)
// })
//
// It("should return false with no error", func() {
// applied, err = validator.RuntimeConfigApplied(device)
// Expect(err).NotTo(HaveOccurred())
// Expect(applied).To(BeFalse())
// })
//})
Context("when trust setting does not match on the first port", func() {
BeforeEach(func() {
desiredMaxReadReqSize, _, desiredPfc := validator.CalculateDesiredRuntimeConfig(device)

mockHostUtils.On("GetMaxReadRequestSize", "0000:03:00.0").Return(desiredMaxReadReqSize, nil)
mockHostUtils.On("GetMaxReadRequestSize", "0000:03:00.1").Return(desiredMaxReadReqSize, nil)

mockHostUtils.On("GetTrustAndPFC", "interface0").Return("differentTrust", desiredPfc, nil)
// The second port should not be called since the first port already fails
})

It("should return false with no error", func() {
applied, err = validator.RuntimeConfigApplied(device)
Expect(err).NotTo(HaveOccurred())
Expect(applied).To(BeFalse())
})
})

Context("when PFC setting does not match on the second port", func() {
BeforeEach(func() {
desiredMaxReadReqSize, desiredTrust, desiredPfc := validator.CalculateDesiredRuntimeConfig(device)

mockHostUtils.On("GetMaxReadRequestSize", "0000:03:00.0").Return(desiredMaxReadReqSize, nil)
mockHostUtils.On("GetMaxReadRequestSize", "0000:03:00.1").Return(desiredMaxReadReqSize, nil)

mockHostUtils.On("GetTrustAndPFC", "interface0").Return(desiredTrust, desiredPfc, nil)

mockHostUtils.On("GetTrustAndPFC", "interface1").Return(desiredTrust, "differentPfc", nil)
})

It("should return false with no error", func() {
applied, err = validator.RuntimeConfigApplied(device)
Expect(err).NotTo(HaveOccurred())
Expect(applied).To(BeFalse())
})
})

Context("when GetMaxReadRequestSize returns an error", func() {
BeforeEach(func() {
Expand Down Expand Up @@ -789,24 +787,23 @@ var _ = Describe("ConfigValidationImpl", func() {
})
})

// TODO uncomment after a fix to mlnx_qos command
//Context("when GetTrustAndPFC returns an error on the first port", func() {
// BeforeEach(func() {
// desiredMaxReadReqSize, _, _ := validator.CalculateDesiredRuntimeConfig(device)
//
// mockHostUtils.On("GetMaxReadRequestSize", "0000:03:00.0").Return(desiredMaxReadReqSize, nil)
// mockHostUtils.On("GetMaxReadRequestSize", "0000:03:00.1").Return(desiredMaxReadReqSize, nil)
//
// mockHostUtils.On("GetTrustAndPFC", "interface0").Return("", "", fmt.Errorf("failed to get trust and pfc"))
// })
//
// It("should return false with the error", func() {
// applied, err = validator.RuntimeConfigApplied(device)
// Expect(err).To(HaveOccurred())
// Expect(err.Error()).To(ContainSubstring("failed to get trust and pfc"))
// Expect(applied).To(BeFalse())
// })
//})
Context("when GetTrustAndPFC returns an error on the first port", func() {
BeforeEach(func() {
desiredMaxReadReqSize, _, _ := validator.CalculateDesiredRuntimeConfig(device)

mockHostUtils.On("GetMaxReadRequestSize", "0000:03:00.0").Return(desiredMaxReadReqSize, nil)
mockHostUtils.On("GetMaxReadRequestSize", "0000:03:00.1").Return(desiredMaxReadReqSize, nil)

mockHostUtils.On("GetTrustAndPFC", "interface0").Return("", "", fmt.Errorf("failed to get trust and pfc"))
})

It("should return false with the error", func() {
applied, err = validator.RuntimeConfigApplied(device)
Expect(err).To(HaveOccurred())
Expect(err.Error()).To(ContainSubstring("failed to get trust and pfc"))
Expect(applied).To(BeFalse())
})
})

Context("when device has a single port and all settings are applied correctly", func() {
BeforeEach(func() {
Expand All @@ -828,25 +825,24 @@ var _ = Describe("ConfigValidationImpl", func() {
})
})

// TODO uncomment after a fix to mlnx_qos command
//Context("when device has a single port and trust setting does not match", func() {
// BeforeEach(func() {
// device := device
// device.Status.Ports = []v1alpha1.NicDevicePortSpec{
// {PCI: "0000:03:00.0", NetworkInterface: "interface0"},
// }
//
// desiredMaxReadReqSize, _, desiredPfc := validator.CalculateDesiredRuntimeConfig(device)
//
// mockHostUtils.On("GetMaxReadRequestSize", "0000:03:00.0").Return(desiredMaxReadReqSize, nil)
// mockHostUtils.On("GetTrustAndPFC", "interface0").Return("differentTrust", desiredPfc, nil)
// })
//
// It("should return false with no error", func() {
// applied, err = validator.RuntimeConfigApplied(device)
// Expect(err).NotTo(HaveOccurred())
// Expect(applied).To(BeFalse())
// })
//})
Context("when device has a single port and trust setting does not match", func() {
BeforeEach(func() {
device := device
device.Status.Ports = []v1alpha1.NicDevicePortSpec{
{PCI: "0000:03:00.0", NetworkInterface: "interface0"},
}

desiredMaxReadReqSize, _, desiredPfc := validator.CalculateDesiredRuntimeConfig(device)

mockHostUtils.On("GetMaxReadRequestSize", "0000:03:00.0").Return(desiredMaxReadReqSize, nil)
mockHostUtils.On("GetTrustAndPFC", "interface0").Return("differentTrust", desiredPfc, nil)
})

It("should return false with no error", func() {
applied, err = validator.RuntimeConfigApplied(device)
Expect(err).NotTo(HaveOccurred())
Expect(applied).To(BeFalse())
})
})
})
})
19 changes: 8 additions & 11 deletions pkg/host/host.go
Original file line number Diff line number Diff line change
Expand Up @@ -293,9 +293,7 @@ func (h hostManager) ApplyDeviceRuntimeSpec(device *v1alpha1.NicDevice) error {
return nil
}

// TODO uncomment after a fix to mlnx_qos command
//desiredMaxReadReqSize, desiredTrust, desiredPfc := h.configValidation.CalculateDesiredRuntimeConfig(device)
desiredMaxReadReqSize, _, _ := h.configValidation.CalculateDesiredRuntimeConfig(device)
desiredMaxReadReqSize, desiredTrust, desiredPfc := h.configValidation.CalculateDesiredRuntimeConfig(device)

ports := device.Status.Ports

Expand All @@ -309,14 +307,13 @@ func (h hostManager) ApplyDeviceRuntimeSpec(device *v1alpha1.NicDevice) error {
}
}

// TODO uncomment after a fix to mlnx_qos command
//for _, port := range ports {
// err = h.hostUtils.SetTrustAndPFC(port.NetworkInterface, desiredTrust, desiredPfc)
// if err != nil {
// log.Log.Error(err, "failed to apply runtime configuration", "device", device)
// return err
// }
//}
for _, port := range ports {
err = h.hostUtils.SetTrustAndPFC(port.NetworkInterface, desiredTrust, desiredPfc)
if err != nil {
log.Log.Error(err, "failed to apply runtime configuration", "device", device)
return err
}
}

return nil
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/host/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -544,7 +544,7 @@ func (h *hostUtils) SetMaxReadRequestSize(pciAddr string, maxReadRequestSize int
return err
}

cmd := h.execInterface.Command("setpci", "-s", pciAddr, fmt.Sprintf("CAP_EXP+08.w=%d000:FFFF", valueToApply))
cmd := h.execInterface.Command("setpci", "-s", pciAddr, fmt.Sprintf("CAP_EXP+08.w=%d000:F000", valueToApply))
_, err := cmd.Output()
if err != nil {
log.Log.Error(err, "SetMaxReadRequestSize(): Failed to run setpci")
Expand Down
4 changes: 2 additions & 2 deletions pkg/host/utils_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -634,7 +634,7 @@ Device type: ConnectX4

fakeExec.CommandScript = append(fakeExec.CommandScript, func(cmd string, args ...string) exec.Cmd {
Expect(cmd).To(Equal("setpci"))
Expect(args).To(Equal([]string{"-s", pciAddr, fmt.Sprintf("CAP_EXP+08.w=%d000:FFFF", valueToApply)}))
Expect(args).To(Equal([]string{"-s", pciAddr, fmt.Sprintf("CAP_EXP+08.w=%d000:F000", valueToApply)}))
return fakeCmd
})

Expand Down Expand Up @@ -670,7 +670,7 @@ Device type: ConnectX4

fakeExec.CommandScript = append(fakeExec.CommandScript, func(cmd string, args ...string) exec.Cmd {
Expect(cmd).To(Equal("setpci"))
Expect(args).To(Equal([]string{"-s", pciAddr, fmt.Sprintf("CAP_EXP+08.w=%d000:FFFF", valueToApply)}))
Expect(args).To(Equal([]string{"-s", pciAddr, fmt.Sprintf("CAP_EXP+08.w=%d000:F000", valueToApply)}))
return fakeCmd
})

Expand Down

0 comments on commit a6a3609

Please sign in to comment.