diff --git a/CHANGELOG.md b/CHANGELOG.md index 071ac4f2..7757192d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,7 +8,10 @@ Full documentation for amd_smi_lib is available at [https://rocm.docs.amd.com/pr ### Changes -- **Added Pytest functionality to test amdsmi API calls in Python**. +- **Added Pytest functionality to test amdsmi API calls in Python**. + +- **Changed the `power` parameter in `amdsmi_get_energy_count()` to `energy_accumulator`**. +Changes propagate forwards into the python interface as well, however we are maintaing backwards compatibility and keeping the `power` field in the python API until ROCm 6.4. ### Removals @@ -28,7 +31,7 @@ Full documentation for amd_smi_lib is available at [https://rocm.docs.amd.com/pr ### Upcoming changes -- N/A +- **Python API for `amdsmi_get_energy_count()` will deprecate the `power` field in ROCm 6.4 and use `energy_accumulator` field instead**. ## amd_smi_lib for ROCm 6.2.1 @@ -46,9 +49,9 @@ Guest VMs can view enabled/disabled ras features that are on Host cards. ### Fixes -- **Fixed TypeError in `amd-smi process -G`**. +- **Fixed TypeError in `amd-smi process -G`**. -- **Updated CLI error strings to handle empty and invalid GPU/CPU inputs**. +- **Updated CLI error strings to handle empty and invalid GPU/CPU inputs**. - **Fixed Guest VM showing passthrough options**. diff --git a/amdsmi_cli/amdsmi_commands.py b/amdsmi_cli/amdsmi_commands.py index 63e4298f..284b093d 100644 --- a/amdsmi_cli/amdsmi_commands.py +++ b/amdsmi_cli/amdsmi_commands.py @@ -1884,7 +1884,7 @@ def metric_gpu(self, args, multiple_devices=False, watching_output=False, gpu=No try: energy_dict = amdsmi_interface.amdsmi_get_energy_count(args.gpu) - energy = energy_dict['power'] * round(energy_dict['counter_resolution'], 1) + energy = round(energy_dict["energy_accumulator"] * energy_dict["counter_resolution"], 3) energy /= 1000000 energy = round(energy, 3) diff --git a/docs/how-to/using-amdsmi-for-python.md b/docs/how-to/using-amdsmi-for-python.md index 5ab542f9..6611d7b4 100644 --- a/docs/how-to/using-amdsmi-for-python.md +++ b/docs/how-to/using-amdsmi-for-python.md @@ -1534,7 +1534,8 @@ except AmdSmiException as e: ### amdsmi_get_energy_count -Description: Get the energy accumulator counter of the device. +Description: Get the energy accumulator counter information of the device. +energy_accumulator * counter_resolution = total_energy_consumption in micro-Joules It is not supported on virtual machine guest Input parameters: @@ -1545,7 +1546,8 @@ Output: Dictionary with fields Field | Content ---|--- -`power` | power +`power` | counter for energy accumulation since last restart/gpu rest (Deprecating in 6.4) +`energy_accumulator` | counter for energy accumulation since last restart/gpu rest `counter_resolution` | counter resolution `timestamp` | timestamp @@ -1564,8 +1566,8 @@ try: print("No GPUs on machine") else: for device in devices: - power = amdsmi_get_energy_count(device) - print(power) + energy_dict = amdsmi_get_energy_count(device) + print(energy_dict) except AmdSmiException as e: print(e) ``` diff --git a/include/amd_smi/amdsmi.h b/include/amd_smi/amdsmi.h index 8be2c18b..2268b303 100644 --- a/include/amd_smi/amdsmi.h +++ b/include/amd_smi/amdsmi.h @@ -2382,14 +2382,13 @@ amdsmi_status_t amdsmi_set_gpu_pci_bandwidth(amdsmi_processor_handle processor_h * @p power, and a pointer to a uint64_t @p timestamp, this function will write * amount of energy consumed to the uint64_t pointed to by @p power, * and the timestamp to the uint64_t pointed to by @p timestamp. - * The amdsmi_get_power_ave() is an average of a short time. This function - * accumulates all energy consumed. + * This function accumulates all energy consumed. * * @param[in] processor_handle a processor handle * @param[in,out] counter_resolution resolution of the counter @p power in * micro Joules * - * @param[in,out] power a pointer to uint64_t to which the energy + * @param[in,out] energy_accumulator a pointer to uint64_t to which the energy * counter will be written * If this parameter is nullptr, this function will return * ::AMDSMI_STATUS_INVAL if the function is supported with the provided, @@ -2402,7 +2401,7 @@ amdsmi_status_t amdsmi_set_gpu_pci_bandwidth(amdsmi_processor_handle processor_h * @return ::amdsmi_status_t | ::AMDSMI_STATUS_SUCCESS on success, non-zero on fail */ amdsmi_status_t -amdsmi_get_energy_count(amdsmi_processor_handle processor_handle, uint64_t *power, +amdsmi_get_energy_count(amdsmi_processor_handle processor_handle, uint64_t *energy_accumulator, float *counter_resolution, uint64_t *timestamp); /** @} End PowerQuer */ diff --git a/py-interface/README.md b/py-interface/README.md index 5ab542f9..3bbaa7d4 100644 --- a/py-interface/README.md +++ b/py-interface/README.md @@ -1534,7 +1534,8 @@ except AmdSmiException as e: ### amdsmi_get_energy_count -Description: Get the energy accumulator counter of the device. +Description: Get the energy accumulator counter information of the device. +energy_accumulator * counter_resolution = total_energy_consumption in micro-Joules It is not supported on virtual machine guest Input parameters: @@ -1545,7 +1546,8 @@ Output: Dictionary with fields Field | Content ---|--- -`power` | power +`power` | counter for energy accumulation since last restart/gpu rest (Deprecating in 6.4) +`energy_accumulator` | counter for energy accumulation since last restart/gpu rest `counter_resolution` | counter resolution `timestamp` | timestamp @@ -1564,8 +1566,8 @@ try: print("No GPUs on machine") else: for device in devices: - power = amdsmi_get_energy_count(device) - print(power) + energy_dict = amdsmi_get_energy_count(device) + print(energy_dict) except AmdSmiException as e: print(e) ``` @@ -1608,7 +1610,7 @@ except AmdSmiException as e: ### amdsmi_set_gpu_od_clk_info -Description: This function sets the clock frequency information +Description: This function sets the clock frequency information. It is not supported on virtual machine guest Input parameters: diff --git a/py-interface/amdsmi_interface.py b/py-interface/amdsmi_interface.py index 8c001011..4c80f9d8 100644 --- a/py-interface/amdsmi_interface.py +++ b/py-interface/amdsmi_interface.py @@ -3105,17 +3105,18 @@ def amdsmi_get_energy_count(processor_handle: amdsmi_wrapper.amdsmi_processor_ha processor_handle, amdsmi_wrapper.amdsmi_processor_handle ) - power = ctypes.c_uint64() + energy_accumulator= ctypes.c_uint64() counter_resolution = ctypes.c_float() timestamp = ctypes.c_uint64() _check_res( amdsmi_wrapper.amdsmi_get_energy_count(processor_handle, ctypes.byref( - power), ctypes.byref(counter_resolution), ctypes.byref(timestamp)) + energy_accumulator), ctypes.byref(counter_resolution), ctypes.byref(timestamp)) ) return { - 'power': power.value, + 'power': energy_accumulator.value, # deprecating in 6.4 + 'energy_accumulator': energy_accumulator.value, 'counter_resolution': counter_resolution.value, 'timestamp': timestamp.value, } diff --git a/src/amd_smi/amd_smi.cc b/src/amd_smi/amd_smi.cc index 80ee92cb..d2c84fd1 100644 --- a/src/amd_smi/amd_smi.cc +++ b/src/amd_smi/amd_smi.cc @@ -1591,9 +1591,9 @@ amdsmi_status_t amdsmi_get_utilization_count(amdsmi_processor_handle processor_h } amdsmi_status_t amdsmi_get_energy_count(amdsmi_processor_handle processor_handle, - uint64_t *power, float *counter_resolution, uint64_t *timestamp) { + uint64_t *energy_accumulator, float *counter_resolution, uint64_t *timestamp) { return rsmi_wrapper(rsmi_dev_energy_count_get, processor_handle, - power, counter_resolution, timestamp); + energy_accumulator, counter_resolution, timestamp); } amdsmi_status_t amdsmi_get_gpu_bdf_id( diff --git a/tests/amd_smi_test/functional/metrics_counter_read.cc b/tests/amd_smi_test/functional/metrics_counter_read.cc index 868f046d..5e6be28a 100644 --- a/tests/amd_smi_test/functional/metrics_counter_read.cc +++ b/tests/amd_smi_test/functional/metrics_counter_read.cc @@ -104,10 +104,10 @@ void TestMetricsCounterRead::Run(void) { std::cout << "\t**GPU METRICS ENERGY COUNTER:\n"; } - uint64_t power; + uint64_t energy_accumulator; uint64_t timestamp; float counter_resolution; - err = amdsmi_get_energy_count(processor_handles_[i], &power, &counter_resolution, ×tamp); + err = amdsmi_get_energy_count(processor_handles_[i], &energy_accumulator, &counter_resolution, ×tamp); if (err != AMDSMI_STATUS_SUCCESS) { if (err == AMDSMI_STATUS_NOT_SUPPORTED) { IF_VERB(STANDARD) { @@ -119,10 +119,10 @@ void TestMetricsCounterRead::Run(void) { } else { CHK_ERR_ASRT(err); IF_VERB(STANDARD) { - std::cout << std::dec << "power counter=" - << power << '\n'; - std::cout << "power in uJ=" - << (double)(power * counter_resolution) << '\n'; + std::cout << std::dec << "energy_accumulator counter=" + << energy_accumulator << '\n'; + std::cout << "energy_accumulator in uJ=" + << (double)(energy_accumulator * counter_resolution) << '\n'; std::cout << std::dec << "timestamp=" << timestamp << '\n'; }