From ae08f0cb417b7946753179468f9ed989377a7fa0 Mon Sep 17 00:00:00 2001 From: Jae-Won Chung Date: Mon, 27 May 2024 19:32:16 -0400 Subject: [PATCH] Allow zeusd dev on MacOS --- zeusd/src/devices/gpu/linux.rs | 75 ++++++++++++++++++++++ zeusd/src/devices/gpu/macos.rs | 48 ++++++++++++++ zeusd/src/devices/{gpu.rs => gpu/mod.rs} | 82 ++++-------------------- 3 files changed, 135 insertions(+), 70 deletions(-) create mode 100644 zeusd/src/devices/gpu/linux.rs create mode 100644 zeusd/src/devices/gpu/macos.rs rename zeusd/src/devices/{gpu.rs => gpu/mod.rs} (82%) diff --git a/zeusd/src/devices/gpu/linux.rs b/zeusd/src/devices/gpu/linux.rs new file mode 100644 index 00000000..bfb5bb55 --- /dev/null +++ b/zeusd/src/devices/gpu/linux.rs @@ -0,0 +1,75 @@ +use nvml_wrapper::enums::device::GpuLockedClocksSetting; +use nvml_wrapper::error::{Device, Nvml}; + +use crate::devices::gpu::GpuManager; +use crate::error::ZeusdError; + +#[cfg(target_os = "linux")] +pub struct NvmlGpu<'n> { + _nvml: &'static Nvml, + device: Device<'n>, +} + +#[cfg(target_os = "linux")] +impl NvmlGpu<'static> { + pub fn init(index: u32) -> Result { + // `Device` needs to hold a reference to `Nvml`, meaning that `Nvml` must outlive `Device`. + // We can achieve this by leaking a `Box` containing `Nvml` and holding a reference to it. + // `Nvml` will actually live until the server terminates inside the GPU management task. + let _nvml = Box::leak(Box::new(Nvml::init()?)); + let device = _nvml.device_by_index(index)?; + Ok(Self { _nvml, device }) + } +} + +#[cfg(target_os = "linux")] +impl GpuManager for NvmlGpu<'static> { + fn device_count() -> Result { + let nvml = Nvml::init()?; + Ok(nvml.device_count()?) + } + + #[inline] + fn set_persistent_mode(&mut self, enabled: bool) -> Result<(), ZeusdError> { + Ok(self.device.set_persistent(enabled)?) + } + + #[inline] + fn set_power_management_limit(&mut self, power_limit_mw: u32) -> Result<(), ZeusdError> { + Ok(self.device.set_power_management_limit(power_limit_mw)?) + } + + #[inline] + fn set_gpu_locked_clocks( + &mut self, + min_clock_mhz: u32, + max_clock_mhz: u32, + ) -> Result<(), ZeusdError> { + let setting = GpuLockedClocksSetting::Numeric { + min_clock_mhz, + max_clock_mhz, + }; + Ok(self.device.set_gpu_locked_clocks(setting)?) + } + + #[inline] + fn reset_gpu_locked_clocks(&mut self) -> Result<(), ZeusdError> { + Ok(self.device.reset_gpu_locked_clocks()?) + } + + #[inline] + fn set_mem_locked_clocks( + &mut self, + min_clock_mhz: u32, + max_clock_mhz: u32, + ) -> Result<(), ZeusdError> { + Ok(self + .device + .set_mem_locked_clocks(min_clock_mhz, max_clock_mhz)?) + } + + #[inline] + fn reset_mem_locked_clocks(&mut self) -> Result<(), ZeusdError> { + Ok(self.device.reset_mem_locked_clocks()?) + } +} diff --git a/zeusd/src/devices/gpu/macos.rs b/zeusd/src/devices/gpu/macos.rs new file mode 100644 index 00000000..4bf98650 --- /dev/null +++ b/zeusd/src/devices/gpu/macos.rs @@ -0,0 +1,48 @@ +use crate::devices::gpu::GpuManager; +use crate::error::ZeusdError; + +pub struct NvmlGpu; + +impl NvmlGpu { + pub fn init(_index: u32) -> Result { + Ok(Self) + } +} + +impl GpuManager for NvmlGpu { + fn device_count() -> Result { + Ok(1) + } + + fn set_persistent_mode(&mut self, _enabled: bool) -> Result<(), ZeusdError> { + Ok(()) + } + + fn set_power_management_limit(&mut self, _power_limit_mw: u32) -> Result<(), ZeusdError> { + Ok(()) + } + + fn set_gpu_locked_clocks( + &mut self, + _min_clock_mhz: u32, + _max_clock_mhz: u32, + ) -> Result<(), ZeusdError> { + Ok(()) + } + + fn reset_gpu_locked_clocks(&mut self) -> Result<(), ZeusdError> { + Ok(()) + } + + fn set_mem_locked_clocks( + &mut self, + _min_clock_mhz: u32, + _max_clock_mhz: u32, + ) -> Result<(), ZeusdError> { + Ok(()) + } + + fn reset_mem_locked_clocks(&mut self) -> Result<(), ZeusdError> { + Ok(()) + } +} diff --git a/zeusd/src/devices/gpu.rs b/zeusd/src/devices/gpu/mod.rs similarity index 82% rename from zeusd/src/devices/gpu.rs rename to zeusd/src/devices/gpu/mod.rs index 9ee77a35..4745f2e3 100644 --- a/zeusd/src/devices/gpu.rs +++ b/zeusd/src/devices/gpu/mod.rs @@ -1,9 +1,18 @@ //! GPU management module that interfaces with NVML -use std::time::Instant; +#[cfg(target_os = "linux")] +mod linux; + +#[cfg(target_os = "linux")] +pub use linux::NvmlGpu; + +#[cfg(target_os = "macos")] +mod macos; -use nvml_wrapper::enums::device::GpuLockedClocksSetting; -use nvml_wrapper::{Device, Nvml}; +#[cfg(target_os = "macos")] +pub use macos::NvmlGpu; + +use std::time::Instant; use tokio::sync::mpsc::{Sender, UnboundedReceiver, UnboundedSender}; use tracing::Span; @@ -33,73 +42,6 @@ pub trait GpuManager { fn reset_mem_locked_clocks(&mut self) -> Result<(), ZeusdError>; } -pub struct NvmlGpu<'n> { - _nvml: &'static Nvml, - device: Device<'n>, -} - -impl NvmlGpu<'static> { - pub fn init(index: u32) -> Result { - // `Device` needs to hold a reference to `Nvml`, meaning that `Nvml` must outlive `Device`. - // We can achieve this by leaking a `Box` containing `Nvml` and holding a reference to it. - // `Nvml` will actually live until the server terminates inside the GPU management task. - let _nvml = Box::leak(Box::new(Nvml::init()?)); - let device = _nvml.device_by_index(index)?; - Ok(Self { _nvml, device }) - } -} - -impl GpuManager for NvmlGpu<'static> { - fn device_count() -> Result { - let nvml = Nvml::init()?; - Ok(nvml.device_count()?) - } - - #[inline] - fn set_persistent_mode(&mut self, enabled: bool) -> Result<(), ZeusdError> { - Ok(self.device.set_persistent(enabled)?) - } - - #[inline] - fn set_power_management_limit(&mut self, power_limit_mw: u32) -> Result<(), ZeusdError> { - Ok(self.device.set_power_management_limit(power_limit_mw)?) - } - - #[inline] - fn set_gpu_locked_clocks( - &mut self, - min_clock_mhz: u32, - max_clock_mhz: u32, - ) -> Result<(), ZeusdError> { - let setting = GpuLockedClocksSetting::Numeric { - min_clock_mhz, - max_clock_mhz, - }; - Ok(self.device.set_gpu_locked_clocks(setting)?) - } - - #[inline] - fn reset_gpu_locked_clocks(&mut self) -> Result<(), ZeusdError> { - Ok(self.device.reset_gpu_locked_clocks()?) - } - - #[inline] - fn set_mem_locked_clocks( - &mut self, - min_clock_mhz: u32, - max_clock_mhz: u32, - ) -> Result<(), ZeusdError> { - Ok(self - .device - .set_mem_locked_clocks(min_clock_mhz, max_clock_mhz)?) - } - - #[inline] - fn reset_mem_locked_clocks(&mut self) -> Result<(), ZeusdError> { - Ok(self.device.reset_mem_locked_clocks()?) - } -} - /// A request to execute a GPU command. /// /// This is the type that is sent to the GPU management background task.