intel/perf: allow metric sets to be loaded with on OA reports

A bunch of performance counters rely on register snapshots on top of
the OA reports. Those are already conditional to the query mode in the
equations :

   availability="true $QueryMode &&"

This change allows to disable counters that are only available with
additional register snapshots. This will be useful if you only want to
OA reports to extract performance counter values.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Acked-by: Antonio Caggiano <antonio.caggiano@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10216>
This commit is contained in:
Lionel Landwerlin
2021-04-29 11:28:30 +03:00
committed by Marge Bot
parent fba189a349
commit 5d95aa3964
6 changed files with 77 additions and 52 deletions

View File

@@ -102,7 +102,8 @@ iris_monitor_init_metrics(struct iris_screen *screen)
iris_perf_init_vtbl(perf_cfg);
intel_perf_init_metrics(perf_cfg, &screen->devinfo, screen->fd,
true /* pipeline stats*/);
true /* pipeline stats*/,
true /* register snapshots */);
return perf_cfg->n_counters > 0;
}

View File

@@ -66,7 +66,9 @@ iris_init_perf_query_info(struct pipe_context *pipe)
iris_perf_init_vtbl(perf_cfg);
intel_perf_init_metrics(perf_cfg, &screen->devinfo, screen->fd, true /* pipeline_statistics */);
intel_perf_init_metrics(perf_cfg, &screen->devinfo, screen->fd,
true /* pipeline_statistics */,
true /* register snapshots */);
intel_perf_init_context(ice->perf_ctx,
perf_cfg,

View File

@@ -401,7 +401,9 @@ compute_topology_builtins(struct intel_perf_config *perf,
}
static bool
init_oa_sys_vars(struct intel_perf_config *perf, const struct intel_device_info *devinfo)
init_oa_sys_vars(struct intel_perf_config *perf,
const struct intel_device_info *devinfo,
bool use_register_snapshots)
{
uint64_t min_freq_mhz = 0, max_freq_mhz = 0;
@@ -421,7 +423,7 @@ init_oa_sys_vars(struct intel_perf_config *perf, const struct intel_device_info
perf->sys_vars.gt_max_freq = max_freq_mhz * 1000000;
perf->sys_vars.timestamp_frequency = devinfo->timestamp_frequency;
perf->sys_vars.revision = devinfo->revision;
perf->sys_vars.query_mode = true;
perf->sys_vars.query_mode = use_register_snapshots;
compute_topology_builtins(perf, devinfo);
return true;
@@ -708,7 +710,8 @@ build_unique_counter_list(struct intel_perf_config *perf)
static bool
oa_metrics_available(struct intel_perf_config *perf, int fd,
const struct intel_device_info *devinfo)
const struct intel_device_info *devinfo,
bool use_register_snapshots)
{
perf_register_oa_queries_t oa_register = get_register_queries_function(devinfo);
bool i915_perf_oa_available = false;
@@ -745,7 +748,7 @@ oa_metrics_available(struct intel_perf_config *perf, int fd,
return i915_perf_oa_available &&
oa_register &&
get_sysfs_dev_dir(perf, fd) &&
init_oa_sys_vars(perf, devinfo);
init_oa_sys_vars(perf, devinfo, use_register_snapshots);
}
static void
@@ -1297,7 +1300,8 @@ add_query_register(struct intel_perf_query_field_layout *layout,
static void
intel_perf_init_query_fields(struct intel_perf_config *perf_cfg,
const struct intel_device_info *devinfo)
const struct intel_device_info *devinfo,
bool use_register_snapshots)
{
struct intel_perf_query_field_layout *layout = &perf_cfg->query_layout;
@@ -1311,49 +1315,51 @@ intel_perf_init_query_fields(struct intel_perf_config *perf_cfg,
add_query_register(layout, INTEL_PERF_QUERY_FIELD_TYPE_MI_RPC,
0, 256, 0);
if (devinfo->ver <= 11) {
struct intel_perf_query_field *field =
if (use_register_snapshots) {
if (devinfo->ver <= 11) {
struct intel_perf_query_field *field =
add_query_register(layout,
INTEL_PERF_QUERY_FIELD_TYPE_SRM_PERFCNT,
PERF_CNT_1_DW0, 8, 0);
field->mask = PERF_CNT_VALUE_MASK;
field = add_query_register(layout,
INTEL_PERF_QUERY_FIELD_TYPE_SRM_PERFCNT,
PERF_CNT_2_DW0, 8, 1);
field->mask = PERF_CNT_VALUE_MASK;
}
if (devinfo->ver == 8 && !devinfo->is_cherryview) {
add_query_register(layout,
INTEL_PERF_QUERY_FIELD_TYPE_SRM_PERFCNT,
PERF_CNT_1_DW0, 8, 0);
field->mask = PERF_CNT_VALUE_MASK;
field = add_query_register(layout,
INTEL_PERF_QUERY_FIELD_TYPE_SRM_PERFCNT,
PERF_CNT_2_DW0, 8, 1);
field->mask = PERF_CNT_VALUE_MASK;
}
if (devinfo->ver == 8 && !devinfo->is_cherryview) {
add_query_register(layout,
INTEL_PERF_QUERY_FIELD_TYPE_SRM_RPSTAT,
GFX7_RPSTAT1, 4, 0);
}
GFX7_RPSTAT1, 4, 0);
}
if (devinfo->ver >= 9) {
add_query_register(layout,
INTEL_PERF_QUERY_FIELD_TYPE_SRM_RPSTAT,
GFX9_RPSTAT0, 4, 0);
}
if (devinfo->ver >= 9) {
add_query_register(layout,
INTEL_PERF_QUERY_FIELD_TYPE_SRM_RPSTAT,
GFX9_RPSTAT0, 4, 0);
}
if (!can_use_mi_rpc_bc_counters(devinfo)) {
if (devinfo->ver >= 8 && devinfo->ver <= 11) {
for (uint32_t i = 0; i < GFX8_N_OA_PERF_B32; i++) {
add_query_register(layout, INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_B,
GFX8_OA_PERF_B32(i), 4, i);
}
for (uint32_t i = 0; i < GFX8_N_OA_PERF_C32; i++) {
add_query_register(layout, INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_C,
GFX8_OA_PERF_C32(i), 4, i);
}
} else if (devinfo->ver == 12) {
for (uint32_t i = 0; i < GFX12_N_OAG_PERF_B32; i++) {
add_query_register(layout, INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_B,
GFX12_OAG_PERF_B32(i), 4, i);
}
for (uint32_t i = 0; i < GFX12_N_OAG_PERF_C32; i++) {
add_query_register(layout, INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_C,
GFX12_OAG_PERF_C32(i), 4, i);
if (!can_use_mi_rpc_bc_counters(devinfo)) {
if (devinfo->ver >= 8 && devinfo->ver <= 11) {
for (uint32_t i = 0; i < GFX8_N_OA_PERF_B32; i++) {
add_query_register(layout, INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_B,
GFX8_OA_PERF_B32(i), 4, i);
}
for (uint32_t i = 0; i < GFX8_N_OA_PERF_C32; i++) {
add_query_register(layout, INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_C,
GFX8_OA_PERF_C32(i), 4, i);
}
} else if (devinfo->ver == 12) {
for (uint32_t i = 0; i < GFX12_N_OAG_PERF_B32; i++) {
add_query_register(layout, INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_B,
GFX12_OAG_PERF_B32(i), 4, i);
}
for (uint32_t i = 0; i < GFX12_N_OAG_PERF_C32; i++) {
add_query_register(layout, INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_C,
GFX12_OAG_PERF_C32(i), 4, i);
}
}
}
}
@@ -1368,16 +1374,18 @@ void
intel_perf_init_metrics(struct intel_perf_config *perf_cfg,
const struct intel_device_info *devinfo,
int drm_fd,
bool include_pipeline_statistics)
bool include_pipeline_statistics,
bool use_register_snapshots)
{
intel_perf_init_query_fields(perf_cfg, devinfo);
intel_perf_init_query_fields(perf_cfg, devinfo, use_register_snapshots);
if (include_pipeline_statistics) {
load_pipeline_statistic_metrics(perf_cfg, devinfo);
intel_perf_register_mdapi_statistic_query(perf_cfg, devinfo);
}
bool oa_metrics = oa_metrics_available(perf_cfg, drm_fd, devinfo);
bool oa_metrics = oa_metrics_available(perf_cfg, drm_fd, devinfo,
use_register_snapshots);
if (oa_metrics)
load_oa_metrics(perf_cfg, drm_fd, devinfo);

View File

@@ -388,10 +388,21 @@ struct intel_perf_counter_pass {
uint32_t pass;
};
/** Initialize the intel_perf_config object for a given device.
*
* include_pipeline_statistics : Whether to add a pipeline statistic query
* intel_perf_query_info object
*
* use_register_snapshots : Whether the queries should include counters
* that rely on register snapshots using command
* streamer instructions (not possible when using
* only the OA buffer data).
*/
void intel_perf_init_metrics(struct intel_perf_config *perf_cfg,
const struct intel_device_info *devinfo,
int drm_fd,
bool include_pipeline_statistics);
bool include_pipeline_statistics,
bool use_register_snapshots);
/** Query i915 for a metric id using guid.
*/

View File

@@ -48,7 +48,9 @@ anv_physical_device_init_perf(struct anv_physical_device *device, int fd)
struct intel_perf_config *perf = intel_perf_new(NULL);
intel_perf_init_metrics(perf, &device->info, fd, false /* pipeline statistics */);
intel_perf_init_metrics(perf, &device->info, fd,
false /* pipeline statistics */,
true /* register snapshots */);
if (!perf->n_queries) {
if (perf->platform_supported) {

View File

@@ -507,7 +507,8 @@ brw_init_perf_query_info(struct gl_context *ctx)
perf_cfg->vtbl.bo_busy = (bo_busy_t)brw_bo_busy;
intel_perf_init_metrics(perf_cfg, devinfo, brw->screen->fd,
true /* pipeline stats */);
true /* pipeline stats */,
true /* register snapshots */);
intel_perf_init_context(perf_ctx, perf_cfg, brw->mem_ctx, brw, brw->bufmgr,
devinfo, brw->hw_ctx, brw->screen->fd);