diff --git a/zeusd/README.md b/zeusd/README.md index ebdc5e8b..36c5d7d3 100644 --- a/zeusd/README.md +++ b/zeusd/README.md @@ -31,7 +31,7 @@ Full help message: ```console $ zeusd --help -The Zeus daemon runs with elevated provileges and communicates with unprivileged Zeus clients over a Unix domain socket to allow them to interact with and control compute devices on the node +The Zeus daemon runs with elevated provileges and communicates with unprivileged Zeus clients to allow them to interact with and control compute devices on the node Usage: zeusd [OPTIONS] diff --git a/zeusd/src/config.rs b/zeusd/src/config.rs index 93393c53..aa0fe192 100644 --- a/zeusd/src/config.rs +++ b/zeusd/src/config.rs @@ -1,11 +1,11 @@ -//! Configuration. +//! Zeus daemon configuration. use anyhow::Context; use clap::{Parser, ValueEnum}; /// The Zeus daemon runs with elevated provileges and communicates with -/// unprivileged Zeus clients over a Unix domain socket to allow them to -/// interact with and control compute devices on the node. +/// unprivileged Zeus clients to allow them to interact with and control +/// compute devices on the node. #[derive(Parser, Debug)] #[command(version)] pub struct Config { @@ -50,10 +50,6 @@ impl Config { } } -pub fn get_config() -> Config { - Config::parse() -} - /// The mode of connection to use for the daemon. #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, ValueEnum, Debug)] pub enum ConnectionMode { @@ -62,3 +58,8 @@ pub enum ConnectionMode { /// TCP. TCP, } + +/// Parse command line arguments and return the resulting configuration object. +pub fn get_config() -> Config { + Config::parse() +} diff --git a/zeusd/src/devices/gpu/linux.rs b/zeusd/src/devices/gpu/linux.rs index 69061ac4..f31b0262 100644 --- a/zeusd/src/devices/gpu/linux.rs +++ b/zeusd/src/devices/gpu/linux.rs @@ -2,8 +2,8 @@ //! //! Note that NVML is only available on Linux. -use nvml_wrapper::enums::device::GpuLockedClocksSetting; use nvml_wrapper::{Device, Nvml}; +use nvml_wrapper::enums::device::GpuLockedClocksSetting; use crate::devices::gpu::GpuManager; use crate::error::ZeusdError; diff --git a/zeusd/src/devices/gpu/macos.rs b/zeusd/src/devices/gpu/macos.rs index 2ada9c0b..02869139 100644 --- a/zeusd/src/devices/gpu/macos.rs +++ b/zeusd/src/devices/gpu/macos.rs @@ -1,7 +1,7 @@ //! Fake `NvmlGpu` implementation to allow development and testing on MacOS. -use crate::devices::gpu::GpuManager; use crate::error::ZeusdError; +use crate::devices::gpu::GpuManager; pub struct NvmlGpu; diff --git a/zeusd/src/devices/gpu/mod.rs b/zeusd/src/devices/gpu/mod.rs index 10953c07..1cfdc3c5 100644 --- a/zeusd/src/devices/gpu/mod.rs +++ b/zeusd/src/devices/gpu/mod.rs @@ -1,283 +1,17 @@ -//! GPU management module that interfaces with NVML +//! GPU management +// NVIDIA GPUs +mod nvml; +pub use nvml::*; + +// Real NVML interface. #[cfg(target_os = "linux")] mod linux; - #[cfg(target_os = "linux")] -pub use linux::NvmlGpu; // Real NVML interface. +pub use linux::NvmlGpu; +// Fake NVML interface for dev and testing on macOS. #[cfg(target_os = "macos")] mod macos; - #[cfg(target_os = "macos")] -pub use macos::NvmlGpu; // Fake NVML interface for dev and testing on macOS. - -use std::time::Instant; -use tokio::sync::mpsc::{Sender, UnboundedReceiver, UnboundedSender}; -use tracing::Span; - -use crate::error::ZeusdError; - -/// A trait for structs that manage one GPU. -/// -/// This trait can be used to abstract over different GPU management libraries. -/// Currently, this was done to facilitate testing. -pub trait GpuManager { - fn device_count() -> Result - where - Self: Sized; - fn set_persistent_mode(&mut self, enabled: bool) -> Result<(), ZeusdError>; - fn set_power_management_limit(&mut self, power_limit: u32) -> Result<(), ZeusdError>; - fn set_gpu_locked_clocks( - &mut self, - min_clock_mhz: u32, - max_clock_mhz: u32, - ) -> Result<(), ZeusdError>; - fn reset_gpu_locked_clocks(&mut self) -> Result<(), ZeusdError>; - fn set_mem_locked_clocks( - &mut self, - min_clock_mhz: u32, - max_clock_mhz: u32, - ) -> Result<(), ZeusdError>; - fn reset_mem_locked_clocks(&mut self) -> Result<(), ZeusdError>; -} - -/// A request to execute a GPU command. -/// -/// This is the type that is sent to the GPU management background task. -/// The optional `Sender` is used to send a response back to the caller if the -/// user wanted to block until the command is executed. -/// The `Span` is used to propagate tracing context starting from the request. -pub type GpuCommandRequest = ( - GpuCommand, - Option>>, - Instant, - Span, -); - -/// A collection of GPU management tasks. -/// -/// This struct is used to send commands to the GPU management tasks. -/// It's also application state that gets cloned and passed to request handlers by actix-web. -#[derive(Clone)] -pub struct GpuManagementTasks { - // Senders to the GPU management tasks. index is the GPU ID. - senders: Vec>, -} - -impl GpuManagementTasks { - /// Start GPU management tasks for the given GPUs. - /// It's generic over the type of GPU manager to allow for testing. - pub fn start(gpus: Vec) -> anyhow::Result - where - T: GpuManager + Send + 'static, - { - let mut senders = Vec::with_capacity(gpus.len()); - for (gpu_id, gpu) in gpus.into_iter().enumerate() { - // Channel to send commands to the GPU management task. - let (tx, rx) = tokio::sync::mpsc::unbounded_channel(); - senders.push(tx); - // The GPU management task will automatically terminate - // when the server terminates and the last sender is dropped. - tokio::spawn(gpu_management_task(gpu, rx)); - tracing::info!("Background task for GPU {} successfully spawned", gpu_id); - } - Ok(Self { senders }) - } - - /// Send a command to the corresponding GPU management task and immediately return - /// without checking the result. Results will be logged via tracing. - /// Returns `Ok(())` if the command was *sent* successfully. - pub fn send_command_nonblocking( - &self, - gpu_id: usize, - command: GpuCommand, - request_start_time: Instant, - ) -> Result<(), ZeusdError> { - if gpu_id >= self.senders.len() { - return Err(ZeusdError::GpuNotFoundError(gpu_id)); - } - self.senders[gpu_id] - .send((command, None, request_start_time, Span::current())) - .map_err(|e| e.into()) - } - - /// Send a command to the corresponding GPU management task and wait for completion. - /// Returns `Ok(())` if the command was *executed* successfully. - pub async fn send_command_blocking( - &self, - gpu_id: usize, - command: GpuCommand, - request_start_time: Instant, - ) -> Result<(), ZeusdError> { - let (tx, mut rx) = tokio::sync::mpsc::channel(1); - self.senders[gpu_id] - .send((command, Some(tx), request_start_time, Span::current())) - .map_err(ZeusdError::from)?; - match rx.recv().await { - Some(result) => result, - None => Err(ZeusdError::GpuManagementTaskTerminatedError(gpu_id)), - } - } -} - -/// A asynchronous Tokio background task that manages one GPU. -/// -/// Listens for commands on a channel and executes them on the GPU it manages. -async fn gpu_management_task( - mut gpu: T, - mut rx: UnboundedReceiver, -) { - while let Some((command, response, start_time, span)) = rx.recv().await { - let _span_guard = span.enter(); - let result = command.execute(&mut gpu, start_time); - if let Some(response) = response { - if response.send(result).await.is_err() { - tracing::error!("Failed to send response to caller"); - } - } - } -} - -/// A GPU command that can be executed on a GPU. -#[derive(Debug)] -pub enum GpuCommand { - /// Enable or disable persistent mode. - SetPersistentMode { enabled: bool }, - /// Set the power management limit in milliwatts. - SetPowerLimit { power_limit_mw: u32 }, - /// Set the GPU's locked clock range in MHz. - SetGpuLockedClocks { - min_clock_mhz: u32, - max_clock_mhz: u32, - }, - /// Reset the GPU's locked clocks. - ResetGpuLockedClocks, - /// Set the GPU's memory locked clock range in MHz. - SetMemLockedClocks { - min_clock_mhz: u32, - max_clock_mhz: u32, - }, - /// Reset the GPU's memory locked clocks. - ResetMemLockedClocks, -} - -impl GpuCommand { - fn execute(&self, device: &mut T, request_start_time: Instant) -> Result<(), ZeusdError> - where - T: GpuManager, - { - match *self { - Self::SetPersistentMode { enabled } => { - let result = device.set_persistent_mode(enabled); - if result.is_ok() { - tracing::info!( - "Persistent mode {} (took {:?})", - if enabled { "enabled" } else { "disabled" }, - request_start_time.elapsed() - ); - } else { - tracing::warn!( - "Cannot {} persistent mode (took {:?})", - if enabled { "enable" } else { "disable" }, - request_start_time.elapsed() - ); - } - result - } - Self::SetPowerLimit { - power_limit_mw: power_limit, - } => { - let result = device.set_power_management_limit(power_limit); - if result.is_ok() { - tracing::info!( - "Power limit set to {} W (took {:?})", - power_limit / 1000, - request_start_time.elapsed() - ); - } else { - tracing::warn!( - "Cannot set power limit to {} W (took {:?}", - power_limit / 1000, - request_start_time.elapsed() - ); - } - result - } - Self::SetGpuLockedClocks { - min_clock_mhz, - max_clock_mhz, - } => { - let result = device.set_gpu_locked_clocks(min_clock_mhz, max_clock_mhz); - if result.is_ok() { - tracing::info!( - "GPU frequency set to [{}, {}] MHz (took {:?})", - min_clock_mhz, - max_clock_mhz, - request_start_time.elapsed() - ); - } else { - tracing::warn!( - "Cannot set GPU frequency to [{}, {}] MHz (took {:?})", - min_clock_mhz, - max_clock_mhz, - request_start_time.elapsed() - ); - } - result - } - Self::ResetGpuLockedClocks => { - let result = device.reset_gpu_locked_clocks(); - if result.is_ok() { - tracing::info!( - "GPU locked clocks reset (took {:?})", - request_start_time.elapsed() - ); - } else { - tracing::warn!( - "Cannot reset GPU locked clocks (took {:?})", - request_start_time.elapsed() - ); - } - result - } - Self::SetMemLockedClocks { - min_clock_mhz, - max_clock_mhz, - } => { - let result = device.set_mem_locked_clocks(min_clock_mhz, max_clock_mhz); - if result.is_ok() { - tracing::info!( - "Memory locked clocks set to [{}, {}] MHz (took {:?})", - min_clock_mhz, - max_clock_mhz, - request_start_time.elapsed() - ); - } else { - tracing::warn!( - "Cannot set memory locked clocks to [{}, {}] MHz (took {:?})", - min_clock_mhz, - max_clock_mhz, - request_start_time.elapsed() - ); - } - result - } - Self::ResetMemLockedClocks => { - let result = device.reset_mem_locked_clocks(); - if result.is_ok() { - tracing::info!( - "Memory locked clocks reset (took {:?})", - request_start_time.elapsed() - ); - } else { - tracing::warn!( - "Cannot reset memory locked clocks (took {:?})", - request_start_time.elapsed() - ); - } - result - } - } - } -} +pub use macos::NvmlGpu; diff --git a/zeusd/src/devices/gpu/nvml.rs b/zeusd/src/devices/gpu/nvml.rs new file mode 100644 index 00000000..998c703e --- /dev/null +++ b/zeusd/src/devices/gpu/nvml.rs @@ -0,0 +1,272 @@ +//! GPU management for NVIDIA GPUs using NVML. + +use std::time::Instant; +use tokio::sync::mpsc::{Sender, UnboundedReceiver, UnboundedSender}; +use tracing::Span; + +use crate::error::ZeusdError; + +/// A trait for structs that manage one GPU. +/// +/// This trait can be used to abstract over different GPU management libraries. +/// Currently, this was done to facilitate testing. +pub trait GpuManager { + fn device_count() -> Result + where + Self: Sized; + fn set_persistent_mode(&mut self, enabled: bool) -> Result<(), ZeusdError>; + fn set_power_management_limit(&mut self, power_limit: u32) -> Result<(), ZeusdError>; + fn set_gpu_locked_clocks( + &mut self, + min_clock_mhz: u32, + max_clock_mhz: u32, + ) -> Result<(), ZeusdError>; + fn reset_gpu_locked_clocks(&mut self) -> Result<(), ZeusdError>; + fn set_mem_locked_clocks( + &mut self, + min_clock_mhz: u32, + max_clock_mhz: u32, + ) -> Result<(), ZeusdError>; + fn reset_mem_locked_clocks(&mut self) -> Result<(), ZeusdError>; +} + +/// A request to execute a GPU command. +/// +/// This is the type that is sent to the GPU management background task. +/// The optional `Sender` is used to send a response back to the caller if the +/// user wanted to block until the command is executed. +/// The `Span` is used to propagate tracing context starting from the request. +pub type GpuCommandRequest = ( + GpuCommand, + Option>>, + Instant, + Span, +); + +/// A collection of GPU management tasks. +/// +/// This struct is used to send commands to the GPU management tasks. +/// It's also application state that gets cloned and passed to request handlers by actix-web. +#[derive(Clone)] +pub struct GpuManagementTasks { + // Senders to the GPU management tasks. index is the GPU ID. + senders: Vec>, +} + +impl GpuManagementTasks { + /// Start GPU management tasks for the given GPUs. + /// It's generic over the type of GPU manager to allow for testing. + pub fn start(gpus: Vec) -> anyhow::Result + where + T: GpuManager + Send + 'static, + { + let mut senders = Vec::with_capacity(gpus.len()); + for (gpu_id, gpu) in gpus.into_iter().enumerate() { + // Channel to send commands to the GPU management task. + let (tx, rx) = tokio::sync::mpsc::unbounded_channel(); + senders.push(tx); + // The GPU management task will automatically terminate + // when the server terminates and the last sender is dropped. + tokio::spawn(gpu_management_task(gpu, rx)); + tracing::info!("Background task for GPU {} successfully spawned", gpu_id); + } + Ok(Self { senders }) + } + + /// Send a command to the corresponding GPU management task and immediately return + /// without checking the result. Results will be logged via tracing. + /// Returns `Ok(())` if the command was *sent* successfully. + pub fn send_command_nonblocking( + &self, + gpu_id: usize, + command: GpuCommand, + request_start_time: Instant, + ) -> Result<(), ZeusdError> { + if gpu_id >= self.senders.len() { + return Err(ZeusdError::GpuNotFoundError(gpu_id)); + } + self.senders[gpu_id] + .send((command, None, request_start_time, Span::current())) + .map_err(|e| e.into()) + } + + /// Send a command to the corresponding GPU management task and wait for completion. + /// Returns `Ok(())` if the command was *executed* successfully. + pub async fn send_command_blocking( + &self, + gpu_id: usize, + command: GpuCommand, + request_start_time: Instant, + ) -> Result<(), ZeusdError> { + let (tx, mut rx) = tokio::sync::mpsc::channel(1); + self.senders[gpu_id] + .send((command, Some(tx), request_start_time, Span::current())) + .map_err(ZeusdError::from)?; + match rx.recv().await { + Some(result) => result, + None => Err(ZeusdError::GpuManagementTaskTerminatedError(gpu_id)), + } + } +} + +/// A asynchronous Tokio background task that manages one GPU. +/// +/// Listens for commands on a channel and executes them on the GPU it manages. +async fn gpu_management_task( + mut gpu: T, + mut rx: UnboundedReceiver, +) { + while let Some((command, response, start_time, span)) = rx.recv().await { + let _span_guard = span.enter(); + let result = command.execute(&mut gpu, start_time); + if let Some(response) = response { + if response.send(result).await.is_err() { + tracing::error!("Failed to send response to caller"); + } + } + } +} + +/// A GPU command that can be executed on a GPU. +#[derive(Debug)] +pub enum GpuCommand { + /// Enable or disable persistent mode. + SetPersistentMode { enabled: bool }, + /// Set the power management limit in milliwatts. + SetPowerLimit { power_limit_mw: u32 }, + /// Set the GPU's locked clock range in MHz. + SetGpuLockedClocks { + min_clock_mhz: u32, + max_clock_mhz: u32, + }, + /// Reset the GPU's locked clocks. + ResetGpuLockedClocks, + /// Set the GPU's memory locked clock range in MHz. + SetMemLockedClocks { + min_clock_mhz: u32, + max_clock_mhz: u32, + }, + /// Reset the GPU's memory locked clocks. + ResetMemLockedClocks, +} + +impl GpuCommand { + fn execute(&self, device: &mut T, request_start_time: Instant) -> Result<(), ZeusdError> + where + T: GpuManager, + { + match *self { + Self::SetPersistentMode { enabled } => { + let result = device.set_persistent_mode(enabled); + if result.is_ok() { + tracing::info!( + "Persistent mode {} (took {:?})", + if enabled { "enabled" } else { "disabled" }, + request_start_time.elapsed() + ); + } else { + tracing::warn!( + "Cannot {} persistent mode (took {:?})", + if enabled { "enable" } else { "disable" }, + request_start_time.elapsed() + ); + } + result + } + Self::SetPowerLimit { + power_limit_mw: power_limit, + } => { + let result = device.set_power_management_limit(power_limit); + if result.is_ok() { + tracing::info!( + "Power limit set to {} W (took {:?})", + power_limit / 1000, + request_start_time.elapsed() + ); + } else { + tracing::warn!( + "Cannot set power limit to {} W (took {:?}", + power_limit / 1000, + request_start_time.elapsed() + ); + } + result + } + Self::SetGpuLockedClocks { + min_clock_mhz, + max_clock_mhz, + } => { + let result = device.set_gpu_locked_clocks(min_clock_mhz, max_clock_mhz); + if result.is_ok() { + tracing::info!( + "GPU frequency set to [{}, {}] MHz (took {:?})", + min_clock_mhz, + max_clock_mhz, + request_start_time.elapsed() + ); + } else { + tracing::warn!( + "Cannot set GPU frequency to [{}, {}] MHz (took {:?})", + min_clock_mhz, + max_clock_mhz, + request_start_time.elapsed() + ); + } + result + } + Self::ResetGpuLockedClocks => { + let result = device.reset_gpu_locked_clocks(); + if result.is_ok() { + tracing::info!( + "GPU locked clocks reset (took {:?})", + request_start_time.elapsed() + ); + } else { + tracing::warn!( + "Cannot reset GPU locked clocks (took {:?})", + request_start_time.elapsed() + ); + } + result + } + Self::SetMemLockedClocks { + min_clock_mhz, + max_clock_mhz, + } => { + let result = device.set_mem_locked_clocks(min_clock_mhz, max_clock_mhz); + if result.is_ok() { + tracing::info!( + "Memory locked clocks set to [{}, {}] MHz (took {:?})", + min_clock_mhz, + max_clock_mhz, + request_start_time.elapsed() + ); + } else { + tracing::warn!( + "Cannot set memory locked clocks to [{}, {}] MHz (took {:?})", + min_clock_mhz, + max_clock_mhz, + request_start_time.elapsed() + ); + } + result + } + Self::ResetMemLockedClocks => { + let result = device.reset_mem_locked_clocks(); + if result.is_ok() { + tracing::info!( + "Memory locked clocks reset (took {:?})", + request_start_time.elapsed() + ); + } else { + tracing::warn!( + "Cannot reset memory locked clocks (took {:?})", + request_start_time.elapsed() + ); + } + result + } + } + } +} + diff --git a/zeusd/src/error.rs b/zeusd/src/error.rs index 2cbbe13e..0733dacd 100644 --- a/zeusd/src/error.rs +++ b/zeusd/src/error.rs @@ -2,6 +2,9 @@ //! //! This module defines the `ZeusdError` enum, which is used to represent errors //! that can occur when handling requests to the Zeus daemon. +//! +//! Note that errors that occur during the initialization of the daemon are +//! handled with `anyhow` and eventually end up terminating the process. use actix_web::http::StatusCode; use actix_web::ResponseError; @@ -22,6 +25,7 @@ pub enum ZeusdError { GpuManagementTaskTerminatedError(usize), } +/// This allows us to return a custom HTTP status code for each error variant. impl ResponseError for ZeusdError { fn status_code(&self) -> StatusCode { match self { diff --git a/zeusd/src/main.rs b/zeusd/src/main.rs index 413d3fce..78d9af40 100644 --- a/zeusd/src/main.rs +++ b/zeusd/src/main.rs @@ -1,4 +1,4 @@ -//! Entry point for the daemon. +//! Entry point for the Zeus daemon. use std::net::TcpListener; @@ -20,7 +20,7 @@ async fn main() -> anyhow::Result<()> { } let device_tasks = start_device_tasks()?; - tracing::info!("Started device tasks"); + tracing::info!("Started all device tasks"); let num_workers = config.num_workers.unwrap_or_else(|| { std::thread::available_parallelism() diff --git a/zeusd/src/routes/gpu.rs b/zeusd/src/routes/gpu.rs index c2387db7..6f9b3825 100644 --- a/zeusd/src/routes/gpu.rs +++ b/zeusd/src/routes/gpu.rs @@ -10,36 +10,35 @@ use crate::error::ZeusdError; /// Macro to generate a handler for a GPU command. /// /// This macro takes -/// - the action (set, reset, etc.), -/// - the API name (power_limit, persistent_mode, etc.), +/// - the API name (set_power_limit, set_persistent_mode, etc.), /// - the method and path for the request handler, /// - and a list of `field name ` pairs of the corresponding `GpuCommand` variant. /// /// Gien this, the macro generates -/// - a request payload struct named action + API name (e.g., SetPowerLimit) and all the +/// - a request payload struct named API name (e.g., SetPowerLimit) and all the /// fields specified plus `block: bool` to indicate whether the request should block, /// - an implementation of `From` for the payload struct to convert it to the /// - a handler function that takes the request payload, converts it to a `GpuCommand` variant, /// and sends it to the `GpuManagementTasks` actor. /// /// Assumptions: -/// - The `GpuCommand` variant name is a concatenation of the action and API name -/// (e.g., set and power_limit -> SetPowerLimit). +/// - The `GpuCommand` variant name is the same as the API name, but the former is camel case +/// and the latter is snake case (e.g., SetPowerLimit vs. set_power_limit). macro_rules! impl_handler_for_gpu_command { - ($action:ident, $api:ident, $path:expr, $($field:ident <$ftype:ty>,)*) => { + ($api:ident, $path:expr, $($field:ident <$ftype:ty>,)*) => { paste! { // Request payload structure. #[derive(Serialize, Deserialize, Debug)] - pub struct [<$action:camel $api:camel>] { + pub struct [<$api:camel>] { $(pub $field: $ftype,)* pub block: bool, } // Implement conversion to the GpuCommand variant. - impl From<[<$action:camel $api:camel>]> for GpuCommand { + impl From<[<$api:camel>]> for GpuCommand { // Prefixing with underscore to avoid lint errors when $field is empty. - fn from(_request: [<$action:camel $api:camel>]) -> Self { - GpuCommand::[<$action:camel $api:camel>] { + fn from(_request: [<$api:camel>]) -> Self { + GpuCommand::[<$api:camel>] { $($field: _request.$field),* } } @@ -48,34 +47,34 @@ macro_rules! impl_handler_for_gpu_command { // Generate the request handler. #[actix_web::$path] #[tracing::instrument( - skip(gpu, request, device_tasks), + skip(gpu_id, request, device_tasks), fields( - gpu_id = %gpu, + gpu_id = %gpu_id, block = %request.block, $($field = %request.$field),* ) )] - pub async fn [<$action:snake _ $api:snake _handler>]( - gpu: web::Path, - request: web::Json<[<$action:camel $api:camel>]>, + async fn [<$api:snake _handler>]( + gpu_id: web::Path, + request: web::Json<[<$api:camel>]>, device_tasks: web::Data, ) -> Result { let now = std::time::Instant::now(); - let gpu = gpu.into_inner(); + let gpu_id = gpu_id.into_inner(); let request = request.into_inner(); tracing::info!( - "Received reqeust to GPU {} ({:?})", - gpu, + "Received reqeust for GPU {} ({:?})", + gpu_id, request, ); if request.block { device_tasks - .send_command_blocking(gpu, request.into(), now) + .send_command_blocking(gpu_id, request.into(), now) .await?; } else { - device_tasks.send_command_nonblocking(gpu, request.into(), now)?; + device_tasks.send_command_nonblocking(gpu_id, request.into(), now)?; } Ok(HttpResponse::Ok().finish()) @@ -85,44 +84,38 @@ macro_rules! impl_handler_for_gpu_command { } impl_handler_for_gpu_command!( - set, - persistent_mode, + set_persistent_mode, post("/{gpu_id}/set_persistent_mode"), enabled, ); impl_handler_for_gpu_command!( - set, - power_limit, + set_power_limit, post("/{gpu_id}/set_power_limit"), power_limit_mw, ); impl_handler_for_gpu_command!( - set, - gpu_locked_clocks, + set_gpu_locked_clocks, post("/{gpu_id}/set_gpu_locked_clocks"), min_clock_mhz, max_clock_mhz, ); impl_handler_for_gpu_command!( - reset, - gpu_locked_clocks, + reset_gpu_locked_clocks, post("/{gpu_id}/reset_gpu_locked_clocks"), ); impl_handler_for_gpu_command!( - set, - mem_locked_clocks, + set_mem_locked_clocks, post("/{gpu_id}/set_mem_locked_clocks"), min_clock_mhz, max_clock_mhz, ); impl_handler_for_gpu_command!( - reset, - mem_locked_clocks, + reset_mem_locked_clocks, post("/{gpu_id}/reset_mem_locked_clocks"), ); diff --git a/zeusd/src/startup.rs b/zeusd/src/startup.rs index f958f8cf..1036430b 100644 --- a/zeusd/src/startup.rs +++ b/zeusd/src/startup.rs @@ -15,6 +15,7 @@ use tracing_subscriber::{EnvFilter, Registry}; use crate::devices::gpu::{GpuManagementTasks, GpuManager, NvmlGpu}; use crate::routes::gpu_routes; +/// Initialize tracing with the given where to write logs to. pub fn init_tracing(sink: S) -> anyhow::Result<()> where S: for<'a> MakeWriter<'a> + Send + Sync + 'static,