Skip to content

Commit

Permalink
cnf-tests: Add debug information for flake tests
Browse files Browse the repository at this point in the history
Test case
```
[sriov] NUMA node alignment [BeforeAll] Validate the creation of a pod with excludeTopology set to False and an SRIOV interface in a different NUMA node than the pod
```

flakes with the following error:
```
Can't find a suitable node for testing: node [cnfdu3] has no NUMA0 devices, node [cnfdu4] has no NUMA0 devices,
```

Dump `lspci` command output when failing.
Use Gomega instead of `ginkgo.Fail` to trigger a k8sreporter archive

Signed-off-by: Andrea Panattoni <[email protected]>
  • Loading branch information
zeeke committed Nov 20, 2024
1 parent 734a648 commit aa314d7
Showing 1 changed file with 19 additions and 4 deletions.
23 changes: 19 additions & 4 deletions cnf-tests/testsuites/e2esuite/dpdk/numa_node_sriov.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ import (
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/klog/v2"
"k8s.io/klog"

mcv1 "github.com/openshift/machine-config-operator/pkg/apis/machineconfiguration.openshift.io/v1"
kubeletconfigv1beta1 "k8s.io/kubelet/config/v1beta1"
Expand Down Expand Up @@ -450,19 +450,34 @@ func findTestingNodeWithDevicesInTwoNUMANodes(sriovCapableNodes *sriovcluster.En

numa0Devices := findDevicesOnNUMANode(testingNode, sriovDevices, "0")
if len(numa0Devices) == 0 {
failureMessage += fmt.Sprintf("node [%s] has no NUMA0 devices, ", nodeName)
failureMessage += fmt.Sprintf("node [%s] has no NUMA0 devices\n%s\n", nodeName, dumpLspciForEthernetDevices(testingNode))
continue
}

numa1Devices := findDevicesOnNUMANode(testingNode, sriovDevices, "1")
if len(numa1Devices) == 0 {
failureMessage += fmt.Sprintf("node [%s] has no NUMA1 devices, ", nodeName)
failureMessage += fmt.Sprintf("node [%s] has no NUMA1 devices\n%s\n", nodeName, dumpLspciForEthernetDevices(testingNode))
continue
}

return testingNode, numa0Devices, numa1Devices
}

Fail("Can't find a suitable node for testing: " + failureMessage)
// Invoking ginkgo.Fail directly does not trigger the FailHandler, which generates k8sreporter archives.
Expect(false).To(BeTrue(), "Can't find a suitable node for testing: "+failureMessage)
return nil, nil, nil
}

func dumpLspciForEthernetDevices(node *corev1.Node) string {
// [0200] is the class for Ethernet controllers
// https://admin.pci-ids.ucw.cz/read/PD/
out, err := testnode.ExecCommandOnNodeViaSriovDaemon(client.Client, node, []string{
"sh", "-c", `for d in /sys/class/net/*/device/numa_node; do echo -n "$d "; cat ${d}; done`,
})

if err != nil {
return fmt.Sprintf("failed to dump PCI devices: out(%s) err(%s)", string(out), err.Error())
}

return string(out)
}

0 comments on commit aa314d7

Please sign in to comment.