From a6a3609dd33c9095f7c44e5f77e3abe555dcf8b3 Mon Sep 17 00:00:00 2001 From: Alexander Maslennikov Date: Tue, 15 Oct 2024 12:32:25 +0200 Subject: [PATCH] Restore setting RoCE configuration The issue breaking RoCE config was in the previous call to setpci that set an incorrect value subsequently breaking the runtime config for the device until the next reboot Signed-off-by: Alexander Maslennikov --- .../templates/config-daemon.yaml | 1 + pkg/host/configvalidation.go | 25 ++- pkg/host/configvalidation_test.go | 148 +++++++++--------- pkg/host/host.go | 19 +-- pkg/host/utils.go | 2 +- pkg/host/utils_test.go | 4 +- 6 files changed, 95 insertions(+), 104 deletions(-) diff --git a/deployment/nic-configuration-operator-chart/templates/config-daemon.yaml b/deployment/nic-configuration-operator-chart/templates/config-daemon.yaml index 38ff24b..9715cd0 100644 --- a/deployment/nic-configuration-operator-chart/templates/config-daemon.yaml +++ b/deployment/nic-configuration-operator-chart/templates/config-daemon.yaml @@ -24,6 +24,7 @@ spec: nodeSelector: {{- toYaml .Values.operator.nodeSelector | nindent 8 }} serviceAccountName: {{ include "nic-configuration-operator.serviceAccountName" . }} terminationGracePeriodSeconds: 10 + hostNetwork: true hostPID: true priorityClassName: system-node-critical containers: diff --git a/pkg/host/configvalidation.go b/pkg/host/configvalidation.go index c928fa7..7c1afd9 100644 --- a/pkg/host/configvalidation.go +++ b/pkg/host/configvalidation.go @@ -245,9 +245,7 @@ func (v *configValidationImpl) AdvancedPCISettingsEnabled(currentConfig map[stri func (v *configValidationImpl) RuntimeConfigApplied(device *v1alpha1.NicDevice) (bool, error) { ports := device.Status.Ports - // TODO uncomment after a fix to mlnx_qos command - //desiredMaxReadReqSize, desiredTrust, desiredPfc := v.CalculateDesiredRuntimeConfig(device) - desiredMaxReadReqSize, _, _ := v.CalculateDesiredRuntimeConfig(device) + desiredMaxReadReqSize, desiredTrust, desiredPfc := v.CalculateDesiredRuntimeConfig(device) if desiredMaxReadReqSize != 0 { for _, port := range ports { @@ -262,17 +260,16 @@ func (v *configValidationImpl) RuntimeConfigApplied(device *v1alpha1.NicDevice) } } - // TODO uncomment after a fix to mlnx_qos command - //for _, port := range ports { - // actualTrust, actualPfc, err := v.utils.GetTrustAndPFC(port.NetworkInterface) - // if err != nil { - // log.Log.Error(err, "can't validate QoS settings", "device", device.Name) - // return false, err - // } - // if actualTrust != desiredTrust || actualPfc != desiredPfc { - // return false, nil - // } - //} + for _, port := range ports { + actualTrust, actualPfc, err := v.utils.GetTrustAndPFC(port.NetworkInterface) + if err != nil { + log.Log.Error(err, "can't validate QoS settings", "device", device.Name) + return false, err + } + if actualTrust != desiredTrust || actualPfc != desiredPfc { + return false, nil + } + } return true, nil } diff --git a/pkg/host/configvalidation_test.go b/pkg/host/configvalidation_test.go index 0468e74..6f44b6b 100644 --- a/pkg/host/configvalidation_test.go +++ b/pkg/host/configvalidation_test.go @@ -723,44 +723,42 @@ var _ = Describe("ConfigValidationImpl", func() { }) }) - // TODO uncomment after a fix to mlnx_qos command - //Context("when trust setting does not match on the first port", func() { - // BeforeEach(func() { - // desiredMaxReadReqSize, _, desiredPfc := validator.CalculateDesiredRuntimeConfig(device) - // - // mockHostUtils.On("GetMaxReadRequestSize", "0000:03:00.0").Return(desiredMaxReadReqSize, nil) - // mockHostUtils.On("GetMaxReadRequestSize", "0000:03:00.1").Return(desiredMaxReadReqSize, nil) - // - // mockHostUtils.On("GetTrustAndPFC", "interface0").Return("differentTrust", desiredPfc, nil) - // // The second port should not be called since the first port already fails - // }) - // - // It("should return false with no error", func() { - // applied, err = validator.RuntimeConfigApplied(device) - // Expect(err).NotTo(HaveOccurred()) - // Expect(applied).To(BeFalse()) - // }) - //}) - // - // TODO uncomment after a fix to mlnx_qos command - //Context("when PFC setting does not match on the second port", func() { - // BeforeEach(func() { - // desiredMaxReadReqSize, desiredTrust, desiredPfc := validator.CalculateDesiredRuntimeConfig(device) - // - // mockHostUtils.On("GetMaxReadRequestSize", "0000:03:00.0").Return(desiredMaxReadReqSize, nil) - // mockHostUtils.On("GetMaxReadRequestSize", "0000:03:00.1").Return(desiredMaxReadReqSize, nil) - // - // mockHostUtils.On("GetTrustAndPFC", "interface0").Return(desiredTrust, desiredPfc, nil) - // - // mockHostUtils.On("GetTrustAndPFC", "interface1").Return(desiredTrust, "differentPfc", nil) - // }) - // - // It("should return false with no error", func() { - // applied, err = validator.RuntimeConfigApplied(device) - // Expect(err).NotTo(HaveOccurred()) - // Expect(applied).To(BeFalse()) - // }) - //}) + Context("when trust setting does not match on the first port", func() { + BeforeEach(func() { + desiredMaxReadReqSize, _, desiredPfc := validator.CalculateDesiredRuntimeConfig(device) + + mockHostUtils.On("GetMaxReadRequestSize", "0000:03:00.0").Return(desiredMaxReadReqSize, nil) + mockHostUtils.On("GetMaxReadRequestSize", "0000:03:00.1").Return(desiredMaxReadReqSize, nil) + + mockHostUtils.On("GetTrustAndPFC", "interface0").Return("differentTrust", desiredPfc, nil) + // The second port should not be called since the first port already fails + }) + + It("should return false with no error", func() { + applied, err = validator.RuntimeConfigApplied(device) + Expect(err).NotTo(HaveOccurred()) + Expect(applied).To(BeFalse()) + }) + }) + + Context("when PFC setting does not match on the second port", func() { + BeforeEach(func() { + desiredMaxReadReqSize, desiredTrust, desiredPfc := validator.CalculateDesiredRuntimeConfig(device) + + mockHostUtils.On("GetMaxReadRequestSize", "0000:03:00.0").Return(desiredMaxReadReqSize, nil) + mockHostUtils.On("GetMaxReadRequestSize", "0000:03:00.1").Return(desiredMaxReadReqSize, nil) + + mockHostUtils.On("GetTrustAndPFC", "interface0").Return(desiredTrust, desiredPfc, nil) + + mockHostUtils.On("GetTrustAndPFC", "interface1").Return(desiredTrust, "differentPfc", nil) + }) + + It("should return false with no error", func() { + applied, err = validator.RuntimeConfigApplied(device) + Expect(err).NotTo(HaveOccurred()) + Expect(applied).To(BeFalse()) + }) + }) Context("when GetMaxReadRequestSize returns an error", func() { BeforeEach(func() { @@ -789,24 +787,23 @@ var _ = Describe("ConfigValidationImpl", func() { }) }) - // TODO uncomment after a fix to mlnx_qos command - //Context("when GetTrustAndPFC returns an error on the first port", func() { - // BeforeEach(func() { - // desiredMaxReadReqSize, _, _ := validator.CalculateDesiredRuntimeConfig(device) - // - // mockHostUtils.On("GetMaxReadRequestSize", "0000:03:00.0").Return(desiredMaxReadReqSize, nil) - // mockHostUtils.On("GetMaxReadRequestSize", "0000:03:00.1").Return(desiredMaxReadReqSize, nil) - // - // mockHostUtils.On("GetTrustAndPFC", "interface0").Return("", "", fmt.Errorf("failed to get trust and pfc")) - // }) - // - // It("should return false with the error", func() { - // applied, err = validator.RuntimeConfigApplied(device) - // Expect(err).To(HaveOccurred()) - // Expect(err.Error()).To(ContainSubstring("failed to get trust and pfc")) - // Expect(applied).To(BeFalse()) - // }) - //}) + Context("when GetTrustAndPFC returns an error on the first port", func() { + BeforeEach(func() { + desiredMaxReadReqSize, _, _ := validator.CalculateDesiredRuntimeConfig(device) + + mockHostUtils.On("GetMaxReadRequestSize", "0000:03:00.0").Return(desiredMaxReadReqSize, nil) + mockHostUtils.On("GetMaxReadRequestSize", "0000:03:00.1").Return(desiredMaxReadReqSize, nil) + + mockHostUtils.On("GetTrustAndPFC", "interface0").Return("", "", fmt.Errorf("failed to get trust and pfc")) + }) + + It("should return false with the error", func() { + applied, err = validator.RuntimeConfigApplied(device) + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring("failed to get trust and pfc")) + Expect(applied).To(BeFalse()) + }) + }) Context("when device has a single port and all settings are applied correctly", func() { BeforeEach(func() { @@ -828,25 +825,24 @@ var _ = Describe("ConfigValidationImpl", func() { }) }) - // TODO uncomment after a fix to mlnx_qos command - //Context("when device has a single port and trust setting does not match", func() { - // BeforeEach(func() { - // device := device - // device.Status.Ports = []v1alpha1.NicDevicePortSpec{ - // {PCI: "0000:03:00.0", NetworkInterface: "interface0"}, - // } - // - // desiredMaxReadReqSize, _, desiredPfc := validator.CalculateDesiredRuntimeConfig(device) - // - // mockHostUtils.On("GetMaxReadRequestSize", "0000:03:00.0").Return(desiredMaxReadReqSize, nil) - // mockHostUtils.On("GetTrustAndPFC", "interface0").Return("differentTrust", desiredPfc, nil) - // }) - // - // It("should return false with no error", func() { - // applied, err = validator.RuntimeConfigApplied(device) - // Expect(err).NotTo(HaveOccurred()) - // Expect(applied).To(BeFalse()) - // }) - //}) + Context("when device has a single port and trust setting does not match", func() { + BeforeEach(func() { + device := device + device.Status.Ports = []v1alpha1.NicDevicePortSpec{ + {PCI: "0000:03:00.0", NetworkInterface: "interface0"}, + } + + desiredMaxReadReqSize, _, desiredPfc := validator.CalculateDesiredRuntimeConfig(device) + + mockHostUtils.On("GetMaxReadRequestSize", "0000:03:00.0").Return(desiredMaxReadReqSize, nil) + mockHostUtils.On("GetTrustAndPFC", "interface0").Return("differentTrust", desiredPfc, nil) + }) + + It("should return false with no error", func() { + applied, err = validator.RuntimeConfigApplied(device) + Expect(err).NotTo(HaveOccurred()) + Expect(applied).To(BeFalse()) + }) + }) }) }) diff --git a/pkg/host/host.go b/pkg/host/host.go index 28ec623..5b3e871 100644 --- a/pkg/host/host.go +++ b/pkg/host/host.go @@ -293,9 +293,7 @@ func (h hostManager) ApplyDeviceRuntimeSpec(device *v1alpha1.NicDevice) error { return nil } - // TODO uncomment after a fix to mlnx_qos command - //desiredMaxReadReqSize, desiredTrust, desiredPfc := h.configValidation.CalculateDesiredRuntimeConfig(device) - desiredMaxReadReqSize, _, _ := h.configValidation.CalculateDesiredRuntimeConfig(device) + desiredMaxReadReqSize, desiredTrust, desiredPfc := h.configValidation.CalculateDesiredRuntimeConfig(device) ports := device.Status.Ports @@ -309,14 +307,13 @@ func (h hostManager) ApplyDeviceRuntimeSpec(device *v1alpha1.NicDevice) error { } } - // TODO uncomment after a fix to mlnx_qos command - //for _, port := range ports { - // err = h.hostUtils.SetTrustAndPFC(port.NetworkInterface, desiredTrust, desiredPfc) - // if err != nil { - // log.Log.Error(err, "failed to apply runtime configuration", "device", device) - // return err - // } - //} + for _, port := range ports { + err = h.hostUtils.SetTrustAndPFC(port.NetworkInterface, desiredTrust, desiredPfc) + if err != nil { + log.Log.Error(err, "failed to apply runtime configuration", "device", device) + return err + } + } return nil } diff --git a/pkg/host/utils.go b/pkg/host/utils.go index 23190f2..6770b85 100644 --- a/pkg/host/utils.go +++ b/pkg/host/utils.go @@ -544,7 +544,7 @@ func (h *hostUtils) SetMaxReadRequestSize(pciAddr string, maxReadRequestSize int return err } - cmd := h.execInterface.Command("setpci", "-s", pciAddr, fmt.Sprintf("CAP_EXP+08.w=%d000:FFFF", valueToApply)) + cmd := h.execInterface.Command("setpci", "-s", pciAddr, fmt.Sprintf("CAP_EXP+08.w=%d000:F000", valueToApply)) _, err := cmd.Output() if err != nil { log.Log.Error(err, "SetMaxReadRequestSize(): Failed to run setpci") diff --git a/pkg/host/utils_test.go b/pkg/host/utils_test.go index 0e3333b..5d4ba5a 100644 --- a/pkg/host/utils_test.go +++ b/pkg/host/utils_test.go @@ -634,7 +634,7 @@ Device type: ConnectX4 fakeExec.CommandScript = append(fakeExec.CommandScript, func(cmd string, args ...string) exec.Cmd { Expect(cmd).To(Equal("setpci")) - Expect(args).To(Equal([]string{"-s", pciAddr, fmt.Sprintf("CAP_EXP+08.w=%d000:FFFF", valueToApply)})) + Expect(args).To(Equal([]string{"-s", pciAddr, fmt.Sprintf("CAP_EXP+08.w=%d000:F000", valueToApply)})) return fakeCmd }) @@ -670,7 +670,7 @@ Device type: ConnectX4 fakeExec.CommandScript = append(fakeExec.CommandScript, func(cmd string, args ...string) exec.Cmd { Expect(cmd).To(Equal("setpci")) - Expect(args).To(Equal([]string{"-s", pciAddr, fmt.Sprintf("CAP_EXP+08.w=%d000:FFFF", valueToApply)})) + Expect(args).To(Equal([]string{"-s", pciAddr, fmt.Sprintf("CAP_EXP+08.w=%d000:F000", valueToApply)})) return fakeCmd })