Skip to content

Commit

Permalink
Allow zeusd dev on MacOS
Browse files Browse the repository at this point in the history
  • Loading branch information
jaywonchung committed May 27, 2024
1 parent b2e3f55 commit ae08f0c
Show file tree
Hide file tree
Showing 3 changed files with 135 additions and 70 deletions.
75 changes: 75 additions & 0 deletions zeusd/src/devices/gpu/linux.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
use nvml_wrapper::enums::device::GpuLockedClocksSetting;
use nvml_wrapper::error::{Device, Nvml};

use crate::devices::gpu::GpuManager;
use crate::error::ZeusdError;

#[cfg(target_os = "linux")]
pub struct NvmlGpu<'n> {
_nvml: &'static Nvml,
device: Device<'n>,
}

#[cfg(target_os = "linux")]
impl NvmlGpu<'static> {
pub fn init(index: u32) -> Result<Self, ZeusdError> {
// `Device` needs to hold a reference to `Nvml`, meaning that `Nvml` must outlive `Device`.
// We can achieve this by leaking a `Box` containing `Nvml` and holding a reference to it.
// `Nvml` will actually live until the server terminates inside the GPU management task.
let _nvml = Box::leak(Box::new(Nvml::init()?));
let device = _nvml.device_by_index(index)?;
Ok(Self { _nvml, device })
}
}

#[cfg(target_os = "linux")]
impl GpuManager for NvmlGpu<'static> {
fn device_count() -> Result<u32, ZeusdError> {
let nvml = Nvml::init()?;
Ok(nvml.device_count()?)
}

#[inline]
fn set_persistent_mode(&mut self, enabled: bool) -> Result<(), ZeusdError> {
Ok(self.device.set_persistent(enabled)?)
}

#[inline]
fn set_power_management_limit(&mut self, power_limit_mw: u32) -> Result<(), ZeusdError> {
Ok(self.device.set_power_management_limit(power_limit_mw)?)
}

#[inline]
fn set_gpu_locked_clocks(
&mut self,
min_clock_mhz: u32,
max_clock_mhz: u32,
) -> Result<(), ZeusdError> {
let setting = GpuLockedClocksSetting::Numeric {
min_clock_mhz,
max_clock_mhz,
};
Ok(self.device.set_gpu_locked_clocks(setting)?)
}

#[inline]
fn reset_gpu_locked_clocks(&mut self) -> Result<(), ZeusdError> {
Ok(self.device.reset_gpu_locked_clocks()?)
}

#[inline]
fn set_mem_locked_clocks(
&mut self,
min_clock_mhz: u32,
max_clock_mhz: u32,
) -> Result<(), ZeusdError> {
Ok(self
.device
.set_mem_locked_clocks(min_clock_mhz, max_clock_mhz)?)
}

#[inline]
fn reset_mem_locked_clocks(&mut self) -> Result<(), ZeusdError> {
Ok(self.device.reset_mem_locked_clocks()?)
}
}
48 changes: 48 additions & 0 deletions zeusd/src/devices/gpu/macos.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
use crate::devices::gpu::GpuManager;
use crate::error::ZeusdError;

pub struct NvmlGpu;

impl NvmlGpu {
pub fn init(_index: u32) -> Result<Self, ZeusdError> {
Ok(Self)
}
}

impl GpuManager for NvmlGpu {
fn device_count() -> Result<u32, ZeusdError> {
Ok(1)
}

fn set_persistent_mode(&mut self, _enabled: bool) -> Result<(), ZeusdError> {
Ok(())
}

fn set_power_management_limit(&mut self, _power_limit_mw: u32) -> Result<(), ZeusdError> {
Ok(())
}

fn set_gpu_locked_clocks(
&mut self,
_min_clock_mhz: u32,
_max_clock_mhz: u32,
) -> Result<(), ZeusdError> {
Ok(())
}

fn reset_gpu_locked_clocks(&mut self) -> Result<(), ZeusdError> {
Ok(())
}

fn set_mem_locked_clocks(
&mut self,
_min_clock_mhz: u32,
_max_clock_mhz: u32,
) -> Result<(), ZeusdError> {
Ok(())
}

fn reset_mem_locked_clocks(&mut self) -> Result<(), ZeusdError> {
Ok(())
}
}
82 changes: 12 additions & 70 deletions zeusd/src/devices/gpu.rs → zeusd/src/devices/gpu/mod.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,18 @@
//! GPU management module that interfaces with NVML
use std::time::Instant;
#[cfg(target_os = "linux")]
mod linux;

#[cfg(target_os = "linux")]
pub use linux::NvmlGpu;

#[cfg(target_os = "macos")]
mod macos;

use nvml_wrapper::enums::device::GpuLockedClocksSetting;
use nvml_wrapper::{Device, Nvml};
#[cfg(target_os = "macos")]
pub use macos::NvmlGpu;

use std::time::Instant;
use tokio::sync::mpsc::{Sender, UnboundedReceiver, UnboundedSender};
use tracing::Span;

Expand Down Expand Up @@ -33,73 +42,6 @@ pub trait GpuManager {
fn reset_mem_locked_clocks(&mut self) -> Result<(), ZeusdError>;
}

pub struct NvmlGpu<'n> {
_nvml: &'static Nvml,
device: Device<'n>,
}

impl NvmlGpu<'static> {
pub fn init(index: u32) -> Result<Self, ZeusdError> {
// `Device` needs to hold a reference to `Nvml`, meaning that `Nvml` must outlive `Device`.
// We can achieve this by leaking a `Box` containing `Nvml` and holding a reference to it.
// `Nvml` will actually live until the server terminates inside the GPU management task.
let _nvml = Box::leak(Box::new(Nvml::init()?));
let device = _nvml.device_by_index(index)?;
Ok(Self { _nvml, device })
}
}

impl GpuManager for NvmlGpu<'static> {
fn device_count() -> Result<u32, ZeusdError> {
let nvml = Nvml::init()?;
Ok(nvml.device_count()?)
}

#[inline]
fn set_persistent_mode(&mut self, enabled: bool) -> Result<(), ZeusdError> {
Ok(self.device.set_persistent(enabled)?)
}

#[inline]
fn set_power_management_limit(&mut self, power_limit_mw: u32) -> Result<(), ZeusdError> {
Ok(self.device.set_power_management_limit(power_limit_mw)?)
}

#[inline]
fn set_gpu_locked_clocks(
&mut self,
min_clock_mhz: u32,
max_clock_mhz: u32,
) -> Result<(), ZeusdError> {
let setting = GpuLockedClocksSetting::Numeric {
min_clock_mhz,
max_clock_mhz,
};
Ok(self.device.set_gpu_locked_clocks(setting)?)
}

#[inline]
fn reset_gpu_locked_clocks(&mut self) -> Result<(), ZeusdError> {
Ok(self.device.reset_gpu_locked_clocks()?)
}

#[inline]
fn set_mem_locked_clocks(
&mut self,
min_clock_mhz: u32,
max_clock_mhz: u32,
) -> Result<(), ZeusdError> {
Ok(self
.device
.set_mem_locked_clocks(min_clock_mhz, max_clock_mhz)?)
}

#[inline]
fn reset_mem_locked_clocks(&mut self) -> Result<(), ZeusdError> {
Ok(self.device.reset_mem_locked_clocks()?)
}
}

/// A request to execute a GPU command.
///
/// This is the type that is sent to the GPU management background task.
Expand Down

0 comments on commit ae08f0c

Please sign in to comment.