Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

telemetry: systick info #8737

Merged
merged 6 commits into from
Apr 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions app/boards/intel_adsp_ace15_mtpm.conf
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ CONFIG_INTEL_ADSP_TIMER=y
CONFIG_MM_DRV_INTEL_ADSP_TLB_REMAP_UNUSED_RAM=y
CONFIG_AMS=y
CONFIG_COUNTER=y
CONFIG_SOF_TELEMETRY=y

CONFIG_HEAP_MEM_POOL_SIZE=8192
CONFIG_L3_HEAP=y
Expand Down Expand Up @@ -78,6 +79,8 @@ CONFIG_INTEL_ADSP_IPC=y
CONFIG_WATCHDOG=y
CONFIG_LL_WATCHDOG=y

CONFIG_MEMORY_WIN_2_SIZE=12288

# Temporary disabled options
CONFIG_TRACE=n
CONFIG_COMP_KPB=y
Expand Down
4 changes: 4 additions & 0 deletions app/boards/intel_adsp_ace20_lnl.conf
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ CONFIG_INTEL_ADSP_TIMER=y
CONFIG_MM_DRV_INTEL_ADSP_TLB_REMAP_UNUSED_RAM=y
CONFIG_AMS=y
CONFIG_COUNTER=y
CONFIG_SOF_TELEMETRY=y

tobonex marked this conversation as resolved.
Show resolved Hide resolved
CONFIG_HEAP_MEM_POOL_SIZE=8192
CONFIG_L3_HEAP=y
Expand Down Expand Up @@ -73,6 +74,9 @@ CONFIG_INTEL_ADSP_IPC=y
CONFIG_PROBE=y
CONFIG_PROBE_DMA_MAX=2

CONFIG_MEMORY_WIN_2_SIZE=12288


# Temporary disabled options
CONFIG_TRACE=n
CONFIG_COMP_KPB=y
Expand Down
2 changes: 2 additions & 0 deletions src/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,5 @@ rsource "ipc/Kconfig"
rsource "math/Kconfig"

rsource "library_manager/Kconfig"

rsource "debug/telemetry/Kconfig"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should be in the previous patch "telemetry: Add systick_info part of telemetry". No functional impact as this commit is the first user, but just noting.

Copy link
Collaborator

@marc-hb marc-hb Apr 10, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

However you must make sure every commit can compile and run basic (not telemetry) tests

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Every commit here does compile

91 changes: 88 additions & 3 deletions src/audio/base_fw.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,18 @@
#if defined(CONFIG_SOC_SERIES_INTEL_ADSP_ACE)
#include <intel_adsp_hda.h>
#endif
#include <sof/audio/module_adapter/module/generic.h>
#include <sof/schedule/dp_schedule.h>
#include <sof/schedule/ll_schedule.h>
#include <sof/debug/telemetry/telemetry.h>
/* FIXME:
* Builds for some platforms like tgl fail because their defines related to memory windows are
* already defined somewhere else. Remove this ifdef after it's cleaned up
*/
#ifdef CONFIG_SOF_TELEMETRY
#include "mem_window.h"
#include "adsp_debug_window.h"
#endif
tobonex marked this conversation as resolved.
Show resolved Hide resolved

#if CONFIG_ACE_V1X_ART_COUNTER || CONFIG_ACE_V1X_RTC_COUNTER
#include <zephyr/device.h>
Expand Down Expand Up @@ -455,16 +467,85 @@ static int basefw_set_fw_config(bool first_block,
return 0;
}

int schedulers_info_get(uint32_t *data_off_size,
char *data,
uint32_t core_id)
{
/* TODO
* Core id parameter is not yet used. For now we only get scheduler info from current core
* Other cores info can be added by implementing idc request for this data.
* Do this if Schedulers info get ipc has uses for accurate info per core
*/
tobonex marked this conversation as resolved.
Show resolved Hide resolved

struct scheduler_props *scheduler_props;
/* the internal structs have irregular sizes so we cannot use indexing, and have to
* reassign pointers for each element
*/
struct schedulers_info *schedulers_info = (struct schedulers_info *)data;

schedulers_info->scheduler_count = 0;

/* smallest response possible is just zero schedulers count
* here we replace max_len from data_off_size to serve as output size
*/
*data_off_size = sizeof(struct schedulers_info);

/* ===================== LL_TIMER SCHEDULER INFO ============================ */
schedulers_info->scheduler_count++;
scheduler_props = (struct scheduler_props *)(data + *data_off_size);
scheduler_get_task_info_ll(scheduler_props, data_off_size);

/* ===================== DP SCHEDULER INFO ============================ */
#if CONFIG_ZEPHYR_DP_SCHEDULER
schedulers_info->scheduler_count++;
scheduler_props = (struct scheduler_props *)(data + *data_off_size);
kv2019i marked this conversation as resolved.
Show resolved Hide resolved
scheduler_get_task_info_dp(scheduler_props, data_off_size);
#endif
return 0;
}

int set_perf_meas_state(const char *data)
{
#ifdef CONFIG_SOF_TELEMETRY
enum ipc4_perf_measurements_state_set state = *data;

struct telemetry_wnd_data *wnd_data =
(struct telemetry_wnd_data *)ADSP_DW->slots[SOF_DW_TELEMETRY_SLOT];
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is going to a bit wrong direction in base_fw.c w.r.t. keeping in platform neutral (see #7549 and #8391 ). OTOH, the file is not really vendor neutral now and we don't have framework how to handle this, so I can't really point to alternative solution here. But this does add tech debt we need to address later. I think the telemetry backend should be abstracted better. E.g. Intel platforms would use a specific memory window, but other platforms may want to use another transport even if the telemetry data and objects are the same and same IPC commands are used to control telemetry operation.

struct system_tick_info *systick_info =
(struct system_tick_info *)wnd_data->system_tick_info;

switch (state) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We probably want to print the state change for telemetry in the logs as it may have impact on performance or on issue reproducibility. i.e. a developer will know telemetry state when debugging.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This made me think a bit. This IPC doesn't do much as of yet. It does some additional things in reference firmware, but nothing directly related to systick measurement (tests still use this one so we need it). It seems that systick measurement in reference FW was on all the time, same here. This does not seem like it would take a lot of time but still...
I could modify this IPC and add a bool so that systick measurement is stopped unless it's triggered by this IPC.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks, this would just be informational to the developer. They would at least know if telemetry was on/off when an issue occurred.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We've had a LOT of "heisenbugs" in the past...

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Got some more info, systick info measurement works all the time by design. So there's no on/off switch. Still, what I can do is adding the log at the init of telemetry indicating that systick info measurement is active.

case IPC4_PERF_MEASUREMENTS_DISABLED:
break;
case IPC4_PERF_MEASUREMENTS_STOPPED:
for (int i = 0; i < CONFIG_MAX_CORE_COUNT; i++)
systick_info[i].peak_utilization = 0;
tobonex marked this conversation as resolved.
Show resolved Hide resolved
break;
case IPC4_PERF_MEASUREMENTS_STARTED:
case IPC4_PERF_MEASUREMENTS_PAUSED:
break;
default:
return -EINVAL;
}
#endif
return IPC4_SUCCESS;
}

static int basefw_get_large_config(struct comp_dev *dev,
uint32_t param_id,
bool first_block,
bool last_block,
uint32_t *data_offset,
char *data)
{
/* We can use extended param id for both extended and standard param id */
union ipc4_extended_param_id extended_param_id;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@tobonex I assume the vendor config get means that it is flexible and can be used to get/set any custom data for any vendor ? If so, is this the right structure, i.e. the telemetry data will be ABI standard data and part of a generic telemetry ABI.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This one is just a special case of large_config_get that reference FW used. Scheduler_info_get IPC uses this format. Normal param_id uses one byte out of 4 and extended one uses the remaining bytes as space for additional argument.


extended_param_id.full = param_id;

uint32_t ret = -EINVAL;

switch (param_id) {
switch (extended_param_id.part.parameter_type) {
case IPC4_PERF_MEASUREMENTS_STATE:
case IPC4_GLOBAL_PERF_DATA:
break;
Expand All @@ -473,7 +554,7 @@ static int basefw_get_large_config(struct comp_dev *dev,
return -EINVAL;
}

switch (param_id) {
switch (extended_param_id.part.parameter_type) {
case IPC4_FW_CONFIG:
return basefw_config(data_offset, data);
case IPC4_HW_CONFIG_GET:
Expand All @@ -493,13 +574,15 @@ static int basefw_get_large_config(struct comp_dev *dev,
break;
case IPC4_POWER_STATE_INFO_GET:
return basefw_power_state_info_get(data_offset, data);
case IPC4_SCHEDULERS_INFO_GET:
return schedulers_info_get(data_offset, data,
extended_param_id.part.parameter_instance);
/* TODO: add more support */
case IPC4_DSP_RESOURCE_STATE:
case IPC4_NOTIFICATION_MASK:
case IPC4_MODULES_INFO_GET:
case IPC4_PIPELINE_LIST_INFO_GET:
case IPC4_PIPELINE_PROPS_GET:
case IPC4_SCHEDULERS_INFO_GET:
case IPC4_GATEWAYS_INFO_GET:
case IPC4_LIBRARIES_INFO_GET:
case IPC4_PERF_MEASUREMENTS_STATE:
Expand All @@ -522,6 +605,8 @@ static int basefw_set_large_config(struct comp_dev *dev,
switch (param_id) {
case IPC4_FW_CONFIG:
return basefw_set_fw_config(first_block, last_block, data_offset, data);
case IPC4_PERF_MEASUREMENTS_STATE:
return set_perf_meas_state(data);
case IPC4_SYSTEM_TIME:
return basefw_set_system_time(param_id, first_block,
last_block, data_offset, data);
Expand Down
3 changes: 3 additions & 0 deletions src/debug/telemetry/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# SPDX-License-Identifier: BSD-3-Clause

add_local_sources_ifdef(CONFIG_SOF_TELEMETRY sof telemetry.c)
11 changes: 11 additions & 0 deletions src/debug/telemetry/Kconfig
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# SPDX-License-Identifier: BSD-3-Clause

config SOF_TELEMETRY
bool "enable telemetry"
default n
help
Enables telemetry. Enables performance measurements and debug utilities
that use memory window 2 (debug window) as interface. Measurements include
systick_info measurement which measures scheduler task performance and may
slightly affect overall performance.

151 changes: 151 additions & 0 deletions src/debug/telemetry/telemetry.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
// SPDX-License-Identifier: BSD-3-Clause
tobonex marked this conversation as resolved.
Show resolved Hide resolved
//
// Copyright(c) 2024 Intel Corporation. All rights reserved.
//
// Author: Tobiasz Dryjanski <[email protected]>

#include <zephyr/debug/sparse.h>

#include <sof/audio/module_adapter/module/generic.h>
#include <sof/debug/telemetry/telemetry.h>

#include <ipc/trace.h>

#include <adsp_debug_window.h>
#include <errno.h>
#include <limits.h>
#include <mem_window.h>
#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>

LOG_MODULE_DECLARE(ipc, CONFIG_SOF_LOG_LEVEL);

/* Systic variables, one set per core */
static int telemetry_systick_counter[CONFIG_MAX_CORE_COUNT];
static int telemetry_prev_ccount[CONFIG_MAX_CORE_COUNT];
static int telemetry_perf_period_sum[CONFIG_MAX_CORE_COUNT];
static int telemetry_perf_period_cnt[CONFIG_MAX_CORE_COUNT];
static struct telemetry_perf_queue telemetry_perf_queue[CONFIG_MAX_CORE_COUNT];

static void telemetry_perf_queue_append(struct telemetry_perf_queue *q, size_t element)
{
if (!q->full) {
q->elements[q->index] = element;
q->sum += element;
q->index++;
q->size++;
if (q->index >= SOF_AVG_PERF_MEAS_DEPTH) {
q->index = 0;
q->size = SOF_AVG_PERF_MEAS_DEPTH;
q->full = true;
}
} else {
/* no space, pop tail */
lyakh marked this conversation as resolved.
Show resolved Hide resolved
q->sum -= q->elements[q->index];
/* replace tail */
q->elements[q->index] = element;
q->sum += element;
/* move tail */
q->index++;
if (q->index >= SOF_AVG_PERF_MEAS_DEPTH)
q->index = 0;
}
}

static size_t telemetry_perf_queue_avg(struct telemetry_perf_queue *q)
{
if (!q->size)
return 0;
return q->sum / q->size;
}

int telemetry_init(void)
{
/* systick_init */
uint8_t slot_num = SOF_DW_TELEMETRY_SLOT;
volatile struct adsp_debug_window *window = ADSP_DW;
struct telemetry_wnd_data *wnd_data = (struct telemetry_wnd_data *)ADSP_DW->slots[slot_num];
struct system_tick_info *systick_info =
(struct system_tick_info *)wnd_data->system_tick_info;

tr_info(&ipc_tr, "Telemetry enabled. May affect performance");

window->descs[slot_num].type = ADSP_DW_SLOT_TELEMETRY;
window->descs[slot_num].resource_id = 0;
wnd_data->separator_1 = 0x0000C0DE;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

some magic number ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Separators were defined in reference FW. Those are only used to identify telemetry structs when looking at the memory directly (e.g. using RWE), so those can be anything really.

Copy link
Member

@lgirdwood lgirdwood Jan 18, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok, so to be scalable (where FW can add and modify data payloads in a way that wont break host based tooling) we would need to put a small header as the separator to convey extra information instead of just a magic number e.g.

struct telem_hdr {
    uint16_t magic; // 0xc0de
    uint16_t type; // type of C struct, i.e. what am I.
    uint16_t words;  // size of data elem in words
    uint16_t elems; // > 1 if payload is array
} __packed;
// payload follows here

This could convey all the info tooling needs and would allow FW to grow data and not break tooling along teh way. i.e. if tooling does not know about a type then it can go to the next header (and so on).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The structure of telemetry data in memory window 2 here is the same as in reference firmware. If we want to be compatible it would be best to leave it as it is.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We do need to maintain compatability but we also need to scale and grow as new features upstream. We can wrap any legacy/reference structure as-is, but they would need to be pre-fixed with above header so that new data could be added (without impacting any legacy data).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Still not sure if we can just change the separators. Pre-fixing with the header would also move the fields from their designated places in the MW slot, and things would just stop working.

About scaling: memory window 2 is pretty scalable by itself. We can still extend the window to get more slots where we can add new structures.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we have to live with the fixed location (slot + slot offset + separator) to keep compatibility.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a debug mechanism that has to grow and scale as FW features, use cases and capabilities grow and scale (and they will), its way simpler and cheaper to fix the debug tooling (i.e. look for a header to find start, all other data is the same) than to continue with an ABI that cant grow with the FW.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@tobonex the only way to scale/grow over time is to say that

window->descs[slot_num].type = ADSP_DW_SLOT_TELEMETRY;
window->descs[slot_num].resource_id = 0;

is the compatibility mode, and then changing

window->descs[slot_num].type = ADSP_DW_SLOT_TELEMETRY_V2;
window->descs[slot_num].resource_id = SOME_NEW_RESOURCE_ID;

to support changes in the ABI payloads.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, it would look something like this. More details:

We have a few slots available, what data is in what slot is known by reading the descriptors. Here we set descriptor to ADSP_DW_SLOT_TELEMETRY to reserve the slot for telemetry. Data in this slot is telemetry_wnd_data structure. Only systick data is here for now, but there are more structures not ported yet, currently commented out. I didn't count the actual space needed, but this slot will be probably filled whole after the rest is ported. So this slot would only contain the legacy telemetry data.

We can mark another slot as ADSP_DW_SLOT_TELEMETRY_2 for example and do basically anything in this another slot.


/* Zero values per core */
for (int i = 0; i < CONFIG_MAX_CORE_COUNT; i++) {
systick_info[i].count = 0;
systick_info[i].last_time_elapsed = 0;
systick_info[i].max_time_elapsed = 0;
systick_info[i].last_ccount = 0;
systick_info[i].avg_utilization = 0;
systick_info[i].peak_utilization = 0;
systick_info[i].peak_utilization_4k = 0;
systick_info[i].peak_utilization_8k = 0;
}
return 0;
}

void telemetry_update(uint32_t begin_stamp, uint32_t current_stamp)
{
int prid = cpu_get_id();

++telemetry_systick_counter[prid];

struct telemetry_wnd_data *wnd_data =
(struct telemetry_wnd_data *)ADSP_DW->slots[SOF_DW_TELEMETRY_SLOT];
struct system_tick_info *systick_info =
(struct system_tick_info *)wnd_data->system_tick_info;

systick_info[prid].count = telemetry_systick_counter[prid];
systick_info[prid].last_time_elapsed = current_stamp - begin_stamp;
systick_info[prid].max_time_elapsed =
MAX(current_stamp - begin_stamp,
systick_info[prid].max_time_elapsed);
systick_info[prid].last_ccount = current_stamp;

#ifdef SOF_PERFORMANCE_MEASUREMENTS
const size_t measured_systick = begin_stamp - telemetry_prev_ccount[prid];

telemetry_prev_ccount[prid] = begin_stamp;
if (telemetry_systick_counter[prid] > 2) {
telemetry_perf_period_sum[prid] += measured_systick;
telemetry_perf_period_cnt[prid] =
(telemetry_perf_period_cnt[prid] + 1) % SOF_AVG_PERF_MEAS_PERIOD;
if (telemetry_perf_period_cnt[prid] == 0) {
/* Append average of last SOF_AVG_PERF_MEAS_PERIOD runs */
telemetry_perf_queue_append(&telemetry_perf_queue[prid],
telemetry_perf_period_sum[prid]
/ SOF_AVG_PERF_MEAS_PERIOD);
telemetry_perf_period_sum[prid] = 0;
/* Calculate average from all buckets */
systick_info[prid].avg_utilization =
telemetry_perf_queue_avg(&telemetry_perf_queue[prid]);
}

systick_info[prid].peak_utilization =
MAX(systick_info[prid].peak_utilization,
measured_systick);
systick_info[prid].peak_utilization_4k =
MAX(systick_info[prid].peak_utilization_4k,
measured_systick);
systick_info[prid].peak_utilization_8k =
MAX(systick_info[prid].peak_utilization_8k,
measured_systick);

/* optimized: counter % 0x1000 */
if ((telemetry_systick_counter[prid] & 0xfff) == 0)
systick_info[prid].peak_utilization_4k = 0;
/* optimized: counter % 0x2000 */
if ((telemetry_systick_counter[prid] & 0x1fff) == 0)
systick_info[prid].peak_utilization_8k = 0;
}
#endif
}

/* init telemetry using Zephyr*/
SYS_INIT(telemetry_init, APPLICATION, CONFIG_APPLICATION_INIT_PRIORITY);

30 changes: 30 additions & 0 deletions src/include/ipc4/base_fw.h
Original file line number Diff line number Diff line change
Expand Up @@ -671,4 +671,34 @@ enum ipc4_power_state_type {
IPC4_CORE_KCPS = 1,
};

/* Scheduler info get structures */

struct task_props {
/* Unique ID of task. */
uint32_t task_id;
/* Specifies number of items in module_instance_id array. */
uint32_t module_instance_count;
/* Array of IDs of module instances running inside the task. */
uint32_t module_instance_id[];
} __packed __aligned(4);

struct scheduler_props {
/* Processing domain, one of:
* COMP_PROCESSING_DOMAIN_LL or COMP_PROCESSING_DOMAIN_DP
*/
uint32_t processing_domain;
/* ID of core that scheduler is running on. */
uint32_t core_id;
/* Specifies number of items in task_info array. */
uint32_t task_count;
struct task_props task_info[];
} __packed __aligned(4);

struct schedulers_info {
/* Specifies number of items in scheduler_info array. */
uint32_t scheduler_count;
/* Array of scheduler properties. */
struct scheduler_props scheduler_info[];
} __packed __aligned(4);

#endif /* __SOF_IPC4_BASE_FW_H__ */
Loading
Loading