From 7b03913c55e578c3507ff4ab12e9a304dc5a87bd Mon Sep 17 00:00:00 2001 From: Evan Wies Date: Wed, 24 Jan 2024 14:48:56 -0500 Subject: [PATCH] Add num_psuedo, some XDP pre-work, model/vendor cleanup --- README.md | 11 ++++++---- internal/onload_device/fingerprint.go | 29 ++++++++++++++++++--------- internal/onload_device/plugin.go | 16 ++++++++++++--- internal/onload_device/probes.go | 25 +++++++++++++++++++++-- 4 files changed, 63 insertions(+), 18 deletions(-) diff --git a/README.md b/README.md index 91e5c05..7972dac 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,7 @@ # nomad-onload +** This is still experimental! ** + `nomad-onload` is tooling to integrate [Nomad](https://www.nomadproject.io) and [OpenOnload](https://github.com/Xilinx-CNS/onload). It provides a [Nomad Device Plugin](https://developer.hashicorp.com/nomad/docs/concepts/plugins/devices) that exposes OpenOnload capabilites to Nomad via virtual devices. This enables kernel-bypass of the networking stack of any Docker-driver Nomad Job. In addition to TCP and UDP acceleration, facilities like `epoll` and pipes are brought to userspace as well. @@ -68,18 +70,18 @@ task { ---- -Here are how Onload devices are made fingerprinted: +Here is how Onload devices are fingerprinted: * If Onload/TCPDirect is not installed, there are no devices available. * Each "SFC interface" (Solarflare/Xilinx/AMD Network Card) is discovered with Vendor `amd` * If there are no SFC interfaces found, we create a fake one called `none` So if we have both Onload and TCPDirect installed along with two SFC interfces `eth0` and `eth1`, we'd have the following devices available to a Nomad Client: - * `amd/eth0/onload` `amd/eth0/zf` `amd/eth0/onloadzf` - * `amd/eth1/onload` `amd/eth1/zf` `amd/eth1/onloadzf` + * `amd/onload/eth0` `amd/zf/eth0` `amd/onloadzf/eth0` + * `amd/onload/eth1` `amd/zf/eth1` `amd/onloadzf/eth1` Or similarly, with Onload and TCPDirect installed, but without SFC interfaces: - * `amd/none/onload` `amd/none/zf` `amd/none/onloadzf` + * `amd/onload/none` `amd/zf/none` `amd/onloadzf/none` Nomad allows devices to be selected per this [device name](https://developer.hashicorp.com/nomad/docs/job-specification/device#name): @@ -104,6 +106,7 @@ If `mount_onload` is enables mounting of all the files and paths configured belo |:-----|:----:|:-------:|:------------| | `set_preload` | `bool` | `true` | Should the Device Plugin set the `LD_PRELOAD` environment variable in the Nomad Task? | | `mount_onload` | `bool` | `true` | Should the Device Plugin mount Onload files into the Nomad Task? | +| `num_psuedo` | `number` | `false` | `10` | Number of psuedo-devices per Interface, limiting the number of simultaneous Onloaded Jobs | | `ignored_interfaces` | `list(string)` | `[]` | List of interfaces to ignore. Include `none` to prevent that pseudo-devices creation | | `task_device_path` | `string` | `"/dev"` | Path to place device files in the Nomad Task | | `host_device_path` | `string` | `"/dev"` | Path to find device files on the Host | diff --git a/internal/onload_device/fingerprint.go b/internal/onload_device/fingerprint.go index 233b1b7..c22421b 100644 --- a/internal/onload_device/fingerprint.go +++ b/internal/onload_device/fingerprint.go @@ -30,11 +30,12 @@ type FingerprintDeviceData struct { Interface string // also its Name DeviceType string Vendor string + Model string PCIBusID string } func (d *FingerprintDeviceData) GroupNameKey() string { - return fmt.Sprintf("%s/%s/%s", d.Vendor, d.DeviceType, d.Interface) + return fmt.Sprintf("%s/%s/%s", d.Vendor, d.DeviceType, d.Model) } // FingerprintData represets attributes of driver/devices @@ -66,7 +67,11 @@ func (d *OnloadDevicePlugin) getFingerprintData() (*FingerprintData, error) { // Onload can be used without it, so we publish // a fake device called "none" to still allow Onload enablement // via the "" name configuration - nics = append(nics, NICInfo{deviceName_none, ""}) + nics = append(nics, NICInfo{ + Interface: deviceName_None, + Vendor: vendor_None, + PCIBusID: "", + }) } // list of eligble device types @@ -82,12 +87,18 @@ func (d *OnloadDevicePlugin) getFingerprintData() (*FingerprintData, error) { devices := make([]*FingerprintDeviceData, 0, len(deviceTypes)*len(nics)) for _, nic := range nics { for _, deviceType := range deviceTypes { - devices = append(devices, &FingerprintDeviceData{ - Interface: nic.Interface, - DeviceType: deviceType, - Vendor: vendor_SFC, - PCIBusID: nic.PCIBusID, - }) + // create pseudo-devices for non-exclusive access + const devicesPerInterface = 10 + for i := 0; i < devicesPerInterface; i++ { + deviceID := fmt.Sprintf("%s-%d", nic.Interface, i) + devices = append(devices, &FingerprintDeviceData{ + Interface: deviceID, + Model: nic.Interface, // hard to know actual Model, so allow Interface as specifier + DeviceType: deviceType, + Vendor: nic.Vendor, + PCIBusID: nic.PCIBusID, + }) + } } } @@ -152,7 +163,7 @@ func (d *OnloadDevicePlugin) writeFingerprintToChannel(devices chan<- *device.Fi for _, device := range fingerprintData.Devices { key := device.GroupNameKey() if key == "" { - key = groupName_notAvailable + key = groupName_NotAvailable } deviceListByGroupNameKey[key] = append(deviceListByGroupNameKey[key], device) } diff --git a/internal/onload_device/plugin.go b/internal/onload_device/plugin.go index b037c11..6dca9ef 100644 --- a/internal/onload_device/plugin.go +++ b/internal/onload_device/plugin.go @@ -33,16 +33,18 @@ const ( // SFC devices were created by Solaflare, which was acquired by Xilinx, // which was acquired by AMD. We will assign "amd" to "sfc" devices. vendor_SFC = "amd" - // XDP driver discovery? + // XDP driver discovery vendor_XDP = "xdp" + // No vendor, but not blank + vendor_None = "none" // deviceType is the "type" of device being returned deviceType_Onload = "onload" deviceType_ZF = "zf" deviceType_OnloadZF = "onloadzf" - groupName_notAvailable = "NA" - deviceName_none = "none" + groupName_NotAvailable = "NA" + deviceName_None = "none" // attribute names attr_OnloadVersion = "onload_version" @@ -64,6 +66,7 @@ type configDesc struct { type OnloadDevicePluginConfig struct { SetPreload bool `codec:"set_preload"` MountOnload bool `codec:"mount_onload"` + NumPsuedoDevices int `codec:"num_psuedo"` IgnoredInterfaces []string `codec:"ignored_interfaces"` TaskDevicePath string `codec:"task_device_path"` HostDevicePath string `codec:"host_device_path"` @@ -90,9 +93,11 @@ var ( } // configDescriptions is converted into configSpec, the specification of the schema for this plugin's config. + // Config Defaults are stored here configDescriptions = []configDesc{ {"set_preload", "bool", false, `true`, "Should the Device Plugin set the LD_PRELOAD environment variable in the Nomad Task?"}, {"mount_onload", "bool", false, `true`, "Should the Device Plugin mount Onload files into the Nomad Task?"}, + {"num_psuedo", "number", false, `10`, "Number of psuedo-devices per Interface, limiting the number of simultaneous Onloaded Jobs"}, {"ignored_interfaces", "list(string)", false, `[]`, "List of interfaces to ignore. Include `none` to prevent that pseudo-devices creation"}, {"task_device_path", "string", false, `"/dev"`, "Path to place device files in the Nomad Task"}, {"host_device_path", "string", false, `"/dev"`, "Path to find device files on the Host"}, @@ -190,6 +195,11 @@ func (d *OnloadDevicePlugin) SetConfig(c *base.Config) error { // save the configuration to the plugin d.config = config + // Fixup NumPsuedoDevices + if d.config.NumPsuedoDevices < 0 { + d.config.NumPsuedoDevices = 0 + } + // convert the fingerprint poll period from an HCL string into a time.Duration period, err := time.ParseDuration(config.FingerprintPeriod) if err != nil { diff --git a/internal/onload_device/probes.go b/internal/onload_device/probes.go index d2709f2..2ad29d3 100644 --- a/internal/onload_device/probes.go +++ b/internal/onload_device/probes.go @@ -5,6 +5,7 @@ package onload_device import ( "bufio" + "errors" "fmt" "os" "os/exec" @@ -71,13 +72,23 @@ func ProbeZFVersion(binPath string) (string, error) { // SPDX-License-Identifier: MIT // SPDX-FileCopyrightText: (c) Copyright 2023 Advanced Micro Devices, Inc. +// While the Nomad DeviceGroup has a Model concept, that is hard to extract. +// We use the Interface name instead. type NICInfo struct { Interface string + Vendor string PCIBusID string } -// Returns a list of the Solarflare interfaces present on the node +// Returns a list of the Onload-enabled interfaces present on the node func ProbeOnloadNics() ([]NICInfo, error) { + onloadNics, onloadErr := ProbeOnloadSFCNics() + xdpNics, xdpErr := ProbeOnloadSFCNics() + return append(onloadNics, xdpNics...), errors.Join(onloadErr, xdpErr) +} + +// Returns a list of the Solarflare (SFC) interfaces present on the node +func ProbeOnloadSFCNics() ([]NICInfo, error) { // Takes the output from lshw and returns the device name for each Solarflare device. // "lshw -businfo -class network" sample output: @@ -111,7 +122,11 @@ func ProbeOnloadNics() ([]NICInfo, error) { m := r.FindStringSubmatch(line) if len(m) == 3 { iface, busid := m[2], m[1] - nics = append(nics, NICInfo{iface, busid}) + nics = append(nics, NICInfo{ + Interface: iface, + Vendor: vendor_SFC, + PCIBusID: busid, + }) } } @@ -120,3 +135,9 @@ func ProbeOnloadNics() ([]NICInfo, error) { } return nics, nil } + +// Returns a list of the Onload-XDP interfaces present on the node +func ProbeOnloadXDPNics() ([]NICInfo, error) { + // TODO: probe it... use vendor_XDP + return nil, nil +}