From b75e89204594837da0fbe7f6ddf0da246d44b1d0 Mon Sep 17 00:00:00 2001 From: Jisang Ahn Date: Sat, 18 Jan 2025 18:22:57 -0500 Subject: [PATCH 01/19] Add ZeusdRAPLCPU class --- zeus/device/cpu/rapl.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/zeus/device/cpu/rapl.py b/zeus/device/cpu/rapl.py index 20e0e552..9ccd618b 100644 --- a/zeus/device/cpu/rapl.py +++ b/zeus/device/cpu/rapl.py @@ -261,6 +261,33 @@ def supportsGetDramEnergyConsumption(self) -> bool: return self.dram is not None +class ZeusdRAPLCPU(RAPLCPU): + """Add description.""" + + def __init__( + self, + cpu_index: int, + rapl_dir: str, + zeusd_sock_path: str = "/var/run/zeusd.sock", + ) -> None: + """Add description.""" + super().__init__(cpu_index, rapl_dir) + self.zeusd_sock_path = zeusd_sock_path + + self._client = httpx.Client(transport=httpx.HTTPTransport(uds=zeusd_sock_path)) + self._url_prefix = f"http://zeusd/gpu/{gpu_index}" + + def getTotalEnergyConsumption(self) -> CpuDramMeasurement: + """Add description.""" + # TODO: finish + pass + + def supportsGetDramEnergyConsumption(self) -> bool: + """Add description.""" + # TODO: finish + pass + + class RAPLCPUs(cpu_common.CPUs): """RAPL CPU Manager object, containing individual RAPLCPU objects, abstracting RAPL calls and handling related exceptions.""" From 995971263d6d96c076cd1cfc0740b81f5484257e Mon Sep 17 00:00:00 2001 From: Jisang Ahn Date: Mon, 20 Jan 2025 13:37:28 -0500 Subject: [PATCH 02/19] Change ZeusdRAPLCPU to make requests to Zeusd --- zeus/device/cpu/rapl.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/zeus/device/cpu/rapl.py b/zeus/device/cpu/rapl.py index 9ccd618b..c0067431 100644 --- a/zeus/device/cpu/rapl.py +++ b/zeus/device/cpu/rapl.py @@ -271,21 +271,32 @@ def __init__( zeusd_sock_path: str = "/var/run/zeusd.sock", ) -> None: """Add description.""" + self.cpu_index = cpu_index super().__init__(cpu_index, rapl_dir) - self.zeusd_sock_path = zeusd_sock_path self._client = httpx.Client(transport=httpx.HTTPTransport(uds=zeusd_sock_path)) self._url_prefix = f"http://zeusd/gpu/{gpu_index}" def getTotalEnergyConsumption(self) -> CpuDramMeasurement: """Add description.""" - # TODO: finish - pass + resp = self._client.post( + self._url_prefix + f"/{self.cpu_index}/get_index_energy", + json={ + "cpu": True, + "dram": True, + }, + ) + if resp.status_code != 200: + raise ZeusdError(f"Failed to get total energy consumption: {resp.text}") + data = resp.json() + cpu_mj = data.get("cpu_energy_uj") / 1000 + dram_mj = data.get("dram_energy_uj") / 1000 + return CpuDramMeasurement(cpu_mj=cpu_mj, dram_mj=dram_mj) def supportsGetDramEnergyConsumption(self) -> bool: """Add description.""" # TODO: finish - pass + return super().supportsGetDramEnergyConsumption() class RAPLCPUs(cpu_common.CPUs): From bf67e56a7819c1515746e625c0f17b5052501b5f Mon Sep 17 00:00:00 2001 From: Jisang Ahn Date: Mon, 20 Jan 2025 14:09:26 -0500 Subject: [PATCH 03/19] Change RAPLCPUs to use ZeusdRAPLCPU is ZEUSD_SOCK_PATH is set --- zeus/device/cpu/rapl.py | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/zeus/device/cpu/rapl.py b/zeus/device/cpu/rapl.py index c0067431..0fda6d59 100644 --- a/zeus/device/cpu/rapl.py +++ b/zeus/device/cpu/rapl.py @@ -319,13 +319,32 @@ def _init_cpus(self) -> None: """Initialize all Intel CPUs.""" self._cpus = [] + cpu_indices = [] def sort_key(dir): return int(dir.split(":")[1]) - for dir in sorted(glob(f"{self.rapl_dir}/intel-rapl:*"), key=sort_key): parts = dir.split(":") if len(parts) > 1 and parts[1].isdigit(): - self._cpus.append(RAPLCPU(int(parts[1]), self.rapl_dir)) + cpu_indices.append(int(parts[1])) + + # If `ZEUSD_SOCK_PATH` is set, always use ZeusdRAPLCPU + if (sock_path := os.environ.get("ZEUSD_SOCK_PATH")) is not None: + if not Path(sock_path).exists(): + raise ZeusdError( + f"ZEUSD_SOCK_PATH points to non-existent file: {sock_path}" + ) + if not Path(sock_path).is_socket(): + raise ZeusdError(f"ZEUSD_SOCK_PATH is not a socket: {sock_path}") + if not os.access(sock_path, os.W_OK): + raise ZeusdError(f"ZEUSD_SOCK_PATH is not writable: {sock_path}") + self._cpus = [ + ZeusdRAPLCPU(cpu_index, self.rapl_dir, sock_path) for cpu_index in cpu_indices + ] + else: + self._cpus = [ + RAPLCPU(cpu_index, self.rapl_dir) for cpu_index in cpu_indices + ] + def __del__(self) -> None: """Shuts down the Intel CPU monitoring.""" From c57c02aeb49b31af54a1e8028550fdcdfb359391 Mon Sep 17 00:00:00 2001 From: Jisang Ahn Date: Wed, 22 Jan 2025 18:39:42 -0500 Subject: [PATCH 04/19] Add pathlib import --- zeus/device/cpu/rapl.py | 1 + 1 file changed, 1 insertion(+) diff --git a/zeus/device/cpu/rapl.py b/zeus/device/cpu/rapl.py index 0fda6d59..44349f70 100644 --- a/zeus/device/cpu/rapl.py +++ b/zeus/device/cpu/rapl.py @@ -18,6 +18,7 @@ import os import time import warnings +from pathlib import Path from functools import lru_cache from glob import glob from multiprocessing.sharedctypes import Synchronized From c9440f31253f60eb832a9fead5945697861b99bf Mon Sep 17 00:00:00 2001 From: Jisang Ahn Date: Wed, 22 Jan 2025 18:40:30 -0500 Subject: [PATCH 05/19] Add handler to Zeusd for checking if CPU supports DRAM energy --- zeusd/src/devices/cpu/mod.rs | 12 ++++++++++++ zeusd/src/routes/cpu.rs | 25 +++++++++++++++++++++++++ 2 files changed, 37 insertions(+) diff --git a/zeusd/src/devices/cpu/mod.rs b/zeusd/src/devices/cpu/mod.rs index f79ee1ee..58e4b50b 100644 --- a/zeusd/src/devices/cpu/mod.rs +++ b/zeusd/src/devices/cpu/mod.rs @@ -35,6 +35,11 @@ pub struct RaplResponse { pub dram_energy_uj: Option, } +#[derive(Serialize, Deserialize, Debug)] +pub struct DramResponse { + pub dram_available: bool, +} + pub trait CpuManager { /// Get the number of CPUs available. fn device_count() -> Result; @@ -128,6 +133,8 @@ impl CpuManagementTasks { pub enum CpuCommand { /// Get the CPU and DRAM energy measurement for the CPU index GetIndexEnergy { cpu: bool, dram: bool }, + /// Return if the specified CPU supports DRAM energy measurement + SupportsDramEnergy, /// Stop the monitoring task for CPU and DRAM if they have been started. StopMonitoring, } @@ -177,6 +184,11 @@ impl CpuCommand { dram_energy_uj, }) } + Self::SupportsDramEnergy {} => { + Ok(DramResponse { + dram_available: device.is_dram_available(), + }) + } Self::StopMonitoring {} => { device.stop_monitoring(); Ok(RaplResponse { diff --git a/zeusd/src/routes/cpu.rs b/zeusd/src/routes/cpu.rs index 092c4c74..f2e23005 100644 --- a/zeusd/src/routes/cpu.rs +++ b/zeusd/src/routes/cpu.rs @@ -48,6 +48,31 @@ async fn get_index_energy_handler( Ok(HttpResponse::Ok().json(measurement)) } + +#[actix_web::get("/{cpu_id}/supportsDramEnergy")] +#[tracing::instrument( + skip(cpu_id, _device_tasks), + fields( + cpu_id = %cpu_id, + ) +)] +async fn supports_dram_energy_handler( + cpu_id: web::Path, + _device_tasks: web::Data, +) -> Result { + let now = Instant::now(); + tracing::info!("Received request"); + let cpu_id = cpu_id.into_inner(); + + let answer = _device_tasks + .send_command_blocking(cpu_id, CpuCommand::SupportsDramEnergy, now) + .await?; + + Ok(HttpResponse::Ok().json(answer)) +} + + pub fn cpu_routes(cfg: &mut web::ServiceConfig) { cfg.service(get_index_energy_handler); + cfg.service(supports_dram_energy_handler); } From 465c88e180121b8d913691c0b5ff1895c9dd61f4 Mon Sep 17 00:00:00 2001 From: Jisang Ahn Date: Wed, 22 Jan 2025 19:15:35 -0500 Subject: [PATCH 06/19] Fix return type errors --- zeusd/src/devices/cpu/mod.rs | 31 ++++++++++++++++++++----------- zeusd/src/routes/cpu.rs | 16 +++++++++++++--- 2 files changed, 33 insertions(+), 14 deletions(-) diff --git a/zeusd/src/devices/cpu/mod.rs b/zeusd/src/devices/cpu/mod.rs index 58e4b50b..5ff7f95f 100644 --- a/zeusd/src/devices/cpu/mod.rs +++ b/zeusd/src/devices/cpu/mod.rs @@ -40,6 +40,13 @@ pub struct DramResponse { pub dram_available: bool, } +/// Unified CPU response type +#[derive(Serialize, Deserialize, Debug)] +pub enum CpuResponse { + Rapl(RaplResponse), + Dram(DramResponse), +} + pub trait CpuManager { /// Get the number of CPUs available. fn device_count() -> Result; @@ -60,7 +67,7 @@ pub trait CpuManager { pub type CpuCommandRequest = ( CpuCommand, - Option>>, + Option>>, Instant, Span, ); @@ -94,7 +101,7 @@ impl CpuManagementTasks { cpu_id: usize, command: CpuCommand, request_start_time: Instant, - ) -> Result { + ) -> Result { if cpu_id >= self.senders.len() { return Err(ZeusdError::CpuNotFoundError(cpu_id)); } @@ -163,7 +170,7 @@ impl CpuCommand { &self, device: &mut T, _request_arrival_time: Instant, - ) -> Result + ) -> Result where T: CpuManager, { @@ -179,22 +186,24 @@ impl CpuCommand { } else { None }; - Ok(RaplResponse { + // Wrap the RaplResponse in CpuResponse::Rapl + Ok(CpuResponse::Rapl(RaplResponse { cpu_energy_uj, dram_energy_uj, - }) + })) } - Self::SupportsDramEnergy {} => { - Ok(DramResponse { + Self::SupportsDramEnergy => { + // Wrap the DramResponse in CpuResponse::Dram + Ok(CpuResponse::Dram(DramResponse { dram_available: device.is_dram_available(), - }) + })) } - Self::StopMonitoring {} => { + Self::StopMonitoring => { device.stop_monitoring(); - Ok(RaplResponse { + Ok(CpuResponse::Rapl(RaplResponse { cpu_energy_uj: Some(0), dram_energy_uj: Some(0), - }) + })) } } } diff --git a/zeusd/src/routes/cpu.rs b/zeusd/src/routes/cpu.rs index f2e23005..c6ae5f47 100644 --- a/zeusd/src/routes/cpu.rs +++ b/zeusd/src/routes/cpu.rs @@ -4,7 +4,7 @@ use actix_web::{web, HttpResponse}; use serde::{Deserialize, Serialize}; use std::time::Instant; -use crate::devices::cpu::{CpuCommand, CpuManagementTasks}; +use crate::devices::cpu::{CpuCommand, CpuManagementTasks, CpuResponse}; use crate::error::ZeusdError; #[derive(Serialize, Deserialize, Debug)] @@ -45,7 +45,12 @@ async fn get_index_energy_handler( .send_command_blocking(cpu_id, request.into(), now) .await?; - Ok(HttpResponse::Ok().json(measurement)) + let response = match measurement { + CpuResponse::Rapl(r) => serde_json::to_value(r)?, + CpuResponse::Dram(d) => serde_json::to_value(d)?, + }; + + Ok(HttpResponse::Ok().json(response)) } @@ -68,7 +73,12 @@ async fn supports_dram_energy_handler( .send_command_blocking(cpu_id, CpuCommand::SupportsDramEnergy, now) .await?; - Ok(HttpResponse::Ok().json(answer)) + let response = match answer { + CpuResponse::Rapl(r) => serde_json::to_value(r)?, + CpuResponse::Dram(d) => serde_json::to_value(d)?, + }; + + Ok(HttpResponse::Ok().json(response)) } From 863d802e639f85d1c2b9d20bdd1ad7b3ae98780d Mon Sep 17 00:00:00 2001 From: Jisang Ahn Date: Wed, 22 Jan 2025 19:34:39 -0500 Subject: [PATCH 07/19] Fix API response shape --- zeusd/src/devices/cpu/mod.rs | 1 + zeusd/src/routes/cpu.rs | 18 ++++-------------- 2 files changed, 5 insertions(+), 14 deletions(-) diff --git a/zeusd/src/devices/cpu/mod.rs b/zeusd/src/devices/cpu/mod.rs index 5ff7f95f..a328551a 100644 --- a/zeusd/src/devices/cpu/mod.rs +++ b/zeusd/src/devices/cpu/mod.rs @@ -42,6 +42,7 @@ pub struct DramResponse { /// Unified CPU response type #[derive(Serialize, Deserialize, Debug)] +#[serde(untagged)] pub enum CpuResponse { Rapl(RaplResponse), Dram(DramResponse), diff --git a/zeusd/src/routes/cpu.rs b/zeusd/src/routes/cpu.rs index c6ae5f47..d64b6a3d 100644 --- a/zeusd/src/routes/cpu.rs +++ b/zeusd/src/routes/cpu.rs @@ -4,7 +4,7 @@ use actix_web::{web, HttpResponse}; use serde::{Deserialize, Serialize}; use std::time::Instant; -use crate::devices::cpu::{CpuCommand, CpuManagementTasks, CpuResponse}; +use crate::devices::cpu::{CpuCommand, CpuManagementTasks}; use crate::error::ZeusdError; #[derive(Serialize, Deserialize, Debug)] @@ -45,12 +45,7 @@ async fn get_index_energy_handler( .send_command_blocking(cpu_id, request.into(), now) .await?; - let response = match measurement { - CpuResponse::Rapl(r) => serde_json::to_value(r)?, - CpuResponse::Dram(d) => serde_json::to_value(d)?, - }; - - Ok(HttpResponse::Ok().json(response)) + Ok(HttpResponse::Ok().json(measurement)) } @@ -73,16 +68,11 @@ async fn supports_dram_energy_handler( .send_command_blocking(cpu_id, CpuCommand::SupportsDramEnergy, now) .await?; - let response = match answer { - CpuResponse::Rapl(r) => serde_json::to_value(r)?, - CpuResponse::Dram(d) => serde_json::to_value(d)?, - }; - - Ok(HttpResponse::Ok().json(response)) + Ok(HttpResponse::Ok().json(answer)) } pub fn cpu_routes(cfg: &mut web::ServiceConfig) { cfg.service(get_index_energy_handler); cfg.service(supports_dram_energy_handler); -} +} \ No newline at end of file From 9dff2367a469cc29051dad1d1f594f1204a9165f Mon Sep 17 00:00:00 2001 From: Jisang Ahn Date: Wed, 22 Jan 2025 19:52:42 -0500 Subject: [PATCH 08/19] Fix errors in rapl.py --- zeus/device/cpu/rapl.py | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/zeus/device/cpu/rapl.py b/zeus/device/cpu/rapl.py index 44349f70..8a86b1f8 100644 --- a/zeus/device/cpu/rapl.py +++ b/zeus/device/cpu/rapl.py @@ -24,6 +24,8 @@ from multiprocessing.sharedctypes import Synchronized from typing import Sequence +import httpx + import zeus.device.cpu.common as cpu_common from zeus.device.cpu.common import CpuDramMeasurement from zeus.device.exception import ZeusBaseCPUError @@ -273,15 +275,14 @@ def __init__( ) -> None: """Add description.""" self.cpu_index = cpu_index - super().__init__(cpu_index, rapl_dir) self._client = httpx.Client(transport=httpx.HTTPTransport(uds=zeusd_sock_path)) - self._url_prefix = f"http://zeusd/gpu/{gpu_index}" + self._url_prefix = f"http://zeusd/cpu/{cpu_index}" def getTotalEnergyConsumption(self) -> CpuDramMeasurement: """Add description.""" resp = self._client.post( - self._url_prefix + f"/{self.cpu_index}/get_index_energy", + self._url_prefix + f"/get_index_energy", json={ "cpu": True, "dram": True, @@ -289,15 +290,24 @@ def getTotalEnergyConsumption(self) -> CpuDramMeasurement: ) if resp.status_code != 200: raise ZeusdError(f"Failed to get total energy consumption: {resp.text}") + data = resp.json() - cpu_mj = data.get("cpu_energy_uj") / 1000 - dram_mj = data.get("dram_energy_uj") / 1000 + cpu_uj = data.get("cpu_energy_uj") + dram_uj = data.get("dram_energy_uj") + cpu_mj = None if cpu_uj is None else cpu_uj / 1000 + dram_mj = None if dram_uj is None else dram_uj / 1000 + return CpuDramMeasurement(cpu_mj=cpu_mj, dram_mj=dram_mj) def supportsGetDramEnergyConsumption(self) -> bool: """Add description.""" - # TODO: finish - return super().supportsGetDramEnergyConsumption() + resp = self._client.get( + self._url_prefix + f"/supportsDramEnergy", + ) + if resp.status_code != 200: + raise ZeusdError(f"Failed to get whether DRAM energy is supported: {resp.text}") + data = resp.json() + return data.get("dram_available") class RAPLCPUs(cpu_common.CPUs): From f6b0dd1ce2f3805fa86d5ae66f93b5389d268cc8 Mon Sep 17 00:00:00 2001 From: Jisang Ahn Date: Fri, 24 Jan 2025 11:30:55 -0500 Subject: [PATCH 09/19] Add docstrings and minor cleanups --- zeus/device/cpu/rapl.py | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/zeus/device/cpu/rapl.py b/zeus/device/cpu/rapl.py index 8a86b1f8..1598e783 100644 --- a/zeus/device/cpu/rapl.py +++ b/zeus/device/cpu/rapl.py @@ -265,22 +265,30 @@ def supportsGetDramEnergyConsumption(self) -> bool: class ZeusdRAPLCPU(RAPLCPU): - """Add description.""" + """A RAPLCPU that interfaces with RAPL via zeusd. + + The parent RAPLCPU class requires root privileges to interface with RAPL. + ZeusdRAPLCPU (this class) overrides RAPLCPU's methods so that they instead send requests + to the Zeus daemon, which will interface with RAPL on behalf of ZeusdRAPLCPU. As a result, + ZeusdRAPLCPU does not need root privileges to monitor CPU and DRAM energy consumption. + + See [here](https://ml.energy/zeus/getting_started/#system-privileges) + for details on system privileges required. + """ def __init__( self, cpu_index: int, - rapl_dir: str, zeusd_sock_path: str = "/var/run/zeusd.sock", ) -> None: - """Add description.""" + """Initialize the Intel CPU with a specified index.""" self.cpu_index = cpu_index self._client = httpx.Client(transport=httpx.HTTPTransport(uds=zeusd_sock_path)) self._url_prefix = f"http://zeusd/cpu/{cpu_index}" def getTotalEnergyConsumption(self) -> CpuDramMeasurement: - """Add description.""" + """Returns the total energy consumption of the specified powerzone. Units: mJ.""" resp = self._client.post( self._url_prefix + f"/get_index_energy", json={ @@ -300,14 +308,17 @@ def getTotalEnergyConsumption(self) -> CpuDramMeasurement: return CpuDramMeasurement(cpu_mj=cpu_mj, dram_mj=dram_mj) def supportsGetDramEnergyConsumption(self) -> bool: - """Add description.""" + """Returns True if the specified CPU powerzone supports retrieving the subpackage energy consumption.""" resp = self._client.get( self._url_prefix + f"/supportsDramEnergy", ) if resp.status_code != 200: raise ZeusdError(f"Failed to get whether DRAM energy is supported: {resp.text}") data = resp.json() - return data.get("dram_available") + dram_available = data.get("dram_available") + if dram_available is None: + raise ZeusdError(f"Failed to get whether DRAM energy is supported.") + return dram_available class RAPLCPUs(cpu_common.CPUs): @@ -349,7 +360,7 @@ def sort_key(dir): if not os.access(sock_path, os.W_OK): raise ZeusdError(f"ZEUSD_SOCK_PATH is not writable: {sock_path}") self._cpus = [ - ZeusdRAPLCPU(cpu_index, self.rapl_dir, sock_path) for cpu_index in cpu_indices + ZeusdRAPLCPU(cpu_index, sock_path) for cpu_index in cpu_indices ] else: self._cpus = [ From 557f6c77c677fb51b7f74676bc55a596a8dc5d2a Mon Sep 17 00:00:00 2001 From: Jisang Ahn Date: Sun, 26 Jan 2025 10:30:53 -0500 Subject: [PATCH 10/19] Add newline at end of file --- zeusd/src/routes/cpu.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zeusd/src/routes/cpu.rs b/zeusd/src/routes/cpu.rs index d64b6a3d..f2e23005 100644 --- a/zeusd/src/routes/cpu.rs +++ b/zeusd/src/routes/cpu.rs @@ -75,4 +75,4 @@ async fn supports_dram_energy_handler( pub fn cpu_routes(cfg: &mut web::ServiceConfig) { cfg.service(get_index_energy_handler); cfg.service(supports_dram_energy_handler); -} \ No newline at end of file +} From db2d474c3c3622cd3f7cd05579ad5253e7eb5d5b Mon Sep 17 00:00:00 2001 From: Jisang Ahn Date: Sun, 26 Jan 2025 10:31:50 -0500 Subject: [PATCH 11/19] Add test for supportsDramEnergy route --- zeusd/tests/cpu.rs | 18 ++++++++++++++++++ zeusd/tests/helpers/mod.rs | 2 +- 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/zeusd/tests/cpu.rs b/zeusd/tests/cpu.rs index 35c5a92b..2be91f71 100644 --- a/zeusd/tests/cpu.rs +++ b/zeusd/tests/cpu.rs @@ -154,3 +154,21 @@ async fn test_invalid_requests() { .expect("Failed to send request"); assert_eq!(resp.status(), 400); } + +#[tokio::test] +async fn test_supports_dram_energy() { + let app = TestApp::start().await; + let url = format!("http://127.0.0.1:{}/cpu/0/supportsDramEnergy", app.port); + let client = reqwest::Client::new(); + + let resp = client + .get(url) + .send() + .await + .expect("Failed to send request"); + assert_eq!(resp.status(), 200); + + let dram_response: DramResponse = serde_json::from_str(&resp.text().await.unwrap()) + .expect("Failed to deserialize response body"); + assert_eq!(dram_response.dram_available, true); +} diff --git a/zeusd/tests/helpers/mod.rs b/zeusd/tests/helpers/mod.rs index 610f1ca3..9a563ad3 100644 --- a/zeusd/tests/helpers/mod.rs +++ b/zeusd/tests/helpers/mod.rs @@ -266,7 +266,7 @@ impl_zeusd_request_cpu!(GetIndexEnergy); /// A test application that starts a server over TCP and provides helper methods /// for sending requests and fetching what happened to the fake GPUs. pub struct TestApp { - port: u16, + pub port: u16, observers: Vec, cpu_injectors: Vec, } From ce89581e2a36317db9f557ac35991ac168967c39 Mon Sep 17 00:00:00 2001 From: Jisang Ahn Date: Sun, 26 Jan 2025 10:44:27 -0500 Subject: [PATCH 12/19] Add missing import --- zeusd/tests/cpu.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/zeusd/tests/cpu.rs b/zeusd/tests/cpu.rs index 2be91f71..2455f0de 100644 --- a/zeusd/tests/cpu.rs +++ b/zeusd/tests/cpu.rs @@ -1,6 +1,7 @@ mod helpers; use zeusd::devices::cpu::RaplResponse; +use zeusd::devices::cpu::DramResponse; use zeusd::routes::cpu::GetIndexEnergy; use crate::helpers::{TestApp, ZeusdRequest}; From 5e579546adc40f4faa9b816222e330604192028f Mon Sep 17 00:00:00 2001 From: Jisang Ahn Date: Sun, 26 Jan 2025 14:52:05 -0500 Subject: [PATCH 13/19] Apply linting --- zeus/device/cpu/rapl.py | 9 ++++++--- zeusd/src/routes/cpu.rs | 2 -- zeusd/tests/cpu.rs | 6 +++--- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/zeus/device/cpu/rapl.py b/zeus/device/cpu/rapl.py index 1598e783..e5dee992 100644 --- a/zeus/device/cpu/rapl.py +++ b/zeus/device/cpu/rapl.py @@ -298,7 +298,7 @@ def getTotalEnergyConsumption(self) -> CpuDramMeasurement: ) if resp.status_code != 200: raise ZeusdError(f"Failed to get total energy consumption: {resp.text}") - + data = resp.json() cpu_uj = data.get("cpu_energy_uj") dram_uj = data.get("dram_energy_uj") @@ -313,7 +313,9 @@ def supportsGetDramEnergyConsumption(self) -> bool: self._url_prefix + f"/supportsDramEnergy", ) if resp.status_code != 200: - raise ZeusdError(f"Failed to get whether DRAM energy is supported: {resp.text}") + raise ZeusdError( + f"Failed to get whether DRAM energy is supported: {resp.text}" + ) data = resp.json() dram_available = data.get("dram_available") if dram_available is None: @@ -342,8 +344,10 @@ def _init_cpus(self) -> None: self._cpus = [] cpu_indices = [] + def sort_key(dir): return int(dir.split(":")[1]) + for dir in sorted(glob(f"{self.rapl_dir}/intel-rapl:*"), key=sort_key): parts = dir.split(":") if len(parts) > 1 and parts[1].isdigit(): @@ -367,7 +371,6 @@ def sort_key(dir): RAPLCPU(cpu_index, self.rapl_dir) for cpu_index in cpu_indices ] - def __del__(self) -> None: """Shuts down the Intel CPU monitoring.""" pass diff --git a/zeusd/src/routes/cpu.rs b/zeusd/src/routes/cpu.rs index f2e23005..bf4f53ec 100644 --- a/zeusd/src/routes/cpu.rs +++ b/zeusd/src/routes/cpu.rs @@ -48,7 +48,6 @@ async fn get_index_energy_handler( Ok(HttpResponse::Ok().json(measurement)) } - #[actix_web::get("/{cpu_id}/supportsDramEnergy")] #[tracing::instrument( skip(cpu_id, _device_tasks), @@ -71,7 +70,6 @@ async fn supports_dram_energy_handler( Ok(HttpResponse::Ok().json(answer)) } - pub fn cpu_routes(cfg: &mut web::ServiceConfig) { cfg.service(get_index_energy_handler); cfg.service(supports_dram_energy_handler); diff --git a/zeusd/tests/cpu.rs b/zeusd/tests/cpu.rs index 2455f0de..e55bd23d 100644 --- a/zeusd/tests/cpu.rs +++ b/zeusd/tests/cpu.rs @@ -1,7 +1,7 @@ mod helpers; -use zeusd::devices::cpu::RaplResponse; use zeusd::devices::cpu::DramResponse; +use zeusd::devices::cpu::RaplResponse; use zeusd::routes::cpu::GetIndexEnergy; use crate::helpers::{TestApp, ZeusdRequest}; @@ -161,7 +161,7 @@ async fn test_supports_dram_energy() { let app = TestApp::start().await; let url = format!("http://127.0.0.1:{}/cpu/0/supportsDramEnergy", app.port); let client = reqwest::Client::new(); - + let resp = client .get(url) .send() @@ -170,6 +170,6 @@ async fn test_supports_dram_energy() { assert_eq!(resp.status(), 200); let dram_response: DramResponse = serde_json::from_str(&resp.text().await.unwrap()) - .expect("Failed to deserialize response body"); + .expect("Failed to deserialize response body"); assert_eq!(dram_response.dram_available, true); } From ea2d8ee4802321e426e316369e14e04615b3a7e2 Mon Sep 17 00:00:00 2001 From: Jisang Ahn Date: Mon, 27 Jan 2025 12:57:01 -0500 Subject: [PATCH 14/19] Fix ruff issues --- zeus/device/cpu/rapl.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/zeus/device/cpu/rapl.py b/zeus/device/cpu/rapl.py index e5dee992..fbe8d9c0 100644 --- a/zeus/device/cpu/rapl.py +++ b/zeus/device/cpu/rapl.py @@ -29,6 +29,7 @@ import zeus.device.cpu.common as cpu_common from zeus.device.cpu.common import CpuDramMeasurement from zeus.device.exception import ZeusBaseCPUError +from zeus.device.exception import ZeusdError from zeus.utils.logging import get_logger logger = get_logger(name=__name__) @@ -290,7 +291,7 @@ def __init__( def getTotalEnergyConsumption(self) -> CpuDramMeasurement: """Returns the total energy consumption of the specified powerzone. Units: mJ.""" resp = self._client.post( - self._url_prefix + f"/get_index_energy", + self._url_prefix + "/get_index_energy", json={ "cpu": True, "dram": True, @@ -310,7 +311,7 @@ def getTotalEnergyConsumption(self) -> CpuDramMeasurement: def supportsGetDramEnergyConsumption(self) -> bool: """Returns True if the specified CPU powerzone supports retrieving the subpackage energy consumption.""" resp = self._client.get( - self._url_prefix + f"/supportsDramEnergy", + self._url_prefix + "/supportsDramEnergy", ) if resp.status_code != 200: raise ZeusdError( @@ -319,7 +320,7 @@ def supportsGetDramEnergyConsumption(self) -> bool: data = resp.json() dram_available = data.get("dram_available") if dram_available is None: - raise ZeusdError(f"Failed to get whether DRAM energy is supported.") + raise ZeusdError("Failed to get whether DRAM energy is supported.") return dram_available From 97d9f0cd05200a5bc8c3d43467503b6aa737b559 Mon Sep 17 00:00:00 2001 From: Jisang Ahn Date: Mon, 27 Jan 2025 22:58:08 -0500 Subject: [PATCH 15/19] Remove incorrect assumption about cpu_energy_uj None possibility --- zeus/device/cpu/rapl.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/zeus/device/cpu/rapl.py b/zeus/device/cpu/rapl.py index fbe8d9c0..7697b60a 100644 --- a/zeus/device/cpu/rapl.py +++ b/zeus/device/cpu/rapl.py @@ -301,10 +301,9 @@ def getTotalEnergyConsumption(self) -> CpuDramMeasurement: raise ZeusdError(f"Failed to get total energy consumption: {resp.text}") data = resp.json() - cpu_uj = data.get("cpu_energy_uj") + cpu_mj = data["cpu_energy_uj"] / 1000 dram_uj = data.get("dram_energy_uj") - cpu_mj = None if cpu_uj is None else cpu_uj / 1000 - dram_mj = None if dram_uj is None else dram_uj / 1000 + dram_mj = dram_uj / 1000 if dram_uj is not None else None return CpuDramMeasurement(cpu_mj=cpu_mj, dram_mj=dram_mj) From 552fafab2a062819b6ea9b093f54b2cbf36836cd Mon Sep 17 00:00:00 2001 From: Jisang Ahn Date: Mon, 27 Jan 2025 23:08:54 -0500 Subject: [PATCH 16/19] Rename endpoint to follow consistent casing --- zeus/device/cpu/rapl.py | 2 +- zeusd/src/routes/cpu.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/zeus/device/cpu/rapl.py b/zeus/device/cpu/rapl.py index 7697b60a..a8bfb7c1 100644 --- a/zeus/device/cpu/rapl.py +++ b/zeus/device/cpu/rapl.py @@ -310,7 +310,7 @@ def getTotalEnergyConsumption(self) -> CpuDramMeasurement: def supportsGetDramEnergyConsumption(self) -> bool: """Returns True if the specified CPU powerzone supports retrieving the subpackage energy consumption.""" resp = self._client.get( - self._url_prefix + "/supportsDramEnergy", + self._url_prefix + "/supports_dram_energy", ) if resp.status_code != 200: raise ZeusdError( diff --git a/zeusd/src/routes/cpu.rs b/zeusd/src/routes/cpu.rs index bf4f53ec..df798636 100644 --- a/zeusd/src/routes/cpu.rs +++ b/zeusd/src/routes/cpu.rs @@ -48,7 +48,7 @@ async fn get_index_energy_handler( Ok(HttpResponse::Ok().json(measurement)) } -#[actix_web::get("/{cpu_id}/supportsDramEnergy")] +#[actix_web::get("/{cpu_id}/supports_dram_energy")] #[tracing::instrument( skip(cpu_id, _device_tasks), fields( From 4ea715c0a0a72bd4dd07f4425a8658a25899c50e Mon Sep 17 00:00:00 2001 From: Jisang Ahn Date: Mon, 27 Jan 2025 23:12:29 -0500 Subject: [PATCH 17/19] Rename DramResponse to DramAvailabilityResponse --- zeusd/src/devices/cpu/mod.rs | 8 ++++---- zeusd/tests/cpu.rs | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/zeusd/src/devices/cpu/mod.rs b/zeusd/src/devices/cpu/mod.rs index a328551a..fbfc6c44 100644 --- a/zeusd/src/devices/cpu/mod.rs +++ b/zeusd/src/devices/cpu/mod.rs @@ -36,7 +36,7 @@ pub struct RaplResponse { } #[derive(Serialize, Deserialize, Debug)] -pub struct DramResponse { +pub struct DramAvailabilityResponse { pub dram_available: bool, } @@ -45,7 +45,7 @@ pub struct DramResponse { #[serde(untagged)] pub enum CpuResponse { Rapl(RaplResponse), - Dram(DramResponse), + Dram(DramAvailabilityResponse), } pub trait CpuManager { @@ -194,8 +194,8 @@ impl CpuCommand { })) } Self::SupportsDramEnergy => { - // Wrap the DramResponse in CpuResponse::Dram - Ok(CpuResponse::Dram(DramResponse { + // Wrap the DramAvailabilityResponse in CpuResponse::Dram + Ok(CpuResponse::Dram(DramAvailabilityResponse { dram_available: device.is_dram_available(), })) } diff --git a/zeusd/tests/cpu.rs b/zeusd/tests/cpu.rs index e55bd23d..a1bc4591 100644 --- a/zeusd/tests/cpu.rs +++ b/zeusd/tests/cpu.rs @@ -1,6 +1,6 @@ mod helpers; -use zeusd::devices::cpu::DramResponse; +use zeusd::devices::cpu::DramAvailabilityResponse; use zeusd::devices::cpu::RaplResponse; use zeusd::routes::cpu::GetIndexEnergy; @@ -169,7 +169,7 @@ async fn test_supports_dram_energy() { .expect("Failed to send request"); assert_eq!(resp.status(), 200); - let dram_response: DramResponse = serde_json::from_str(&resp.text().await.unwrap()) + let dram_response: DramAvailabilityResponse = serde_json::from_str(&resp.text().await.unwrap()) .expect("Failed to deserialize response body"); assert_eq!(dram_response.dram_available, true); } From 71189a8e704d3f140fe517e0943ac8e4a4ec26e1 Mon Sep 17 00:00:00 2001 From: Jisang Ahn Date: Mon, 27 Jan 2025 23:14:34 -0500 Subject: [PATCH 18/19] Merge imports for Zeus exceptions --- zeus/device/cpu/rapl.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/zeus/device/cpu/rapl.py b/zeus/device/cpu/rapl.py index a8bfb7c1..2b8c5176 100644 --- a/zeus/device/cpu/rapl.py +++ b/zeus/device/cpu/rapl.py @@ -28,8 +28,7 @@ import zeus.device.cpu.common as cpu_common from zeus.device.cpu.common import CpuDramMeasurement -from zeus.device.exception import ZeusBaseCPUError -from zeus.device.exception import ZeusdError +from zeus.device.exception import ZeusBaseCPUError, ZeusdError from zeus.utils.logging import get_logger logger = get_logger(name=__name__) From 4cbe58f8e834e179f5f76aefb5cedbedc67bdda8 Mon Sep 17 00:00:00 2001 From: Jisang Ahn Date: Mon, 27 Jan 2025 23:36:54 -0500 Subject: [PATCH 19/19] Fix incorrect path name in test --- zeusd/tests/cpu.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zeusd/tests/cpu.rs b/zeusd/tests/cpu.rs index a1bc4591..49359e48 100644 --- a/zeusd/tests/cpu.rs +++ b/zeusd/tests/cpu.rs @@ -159,7 +159,7 @@ async fn test_invalid_requests() { #[tokio::test] async fn test_supports_dram_energy() { let app = TestApp::start().await; - let url = format!("http://127.0.0.1:{}/cpu/0/supportsDramEnergy", app.port); + let url = format!("http://127.0.0.1:{}/cpu/0/supports_dram_energy", app.port); let client = reqwest::Client::new(); let resp = client