intel/perf: Add and use a function to return platform OA format

The platform version check to return the OA format was duplicated
in a few places, so adding a function and dropping this duplication.

While at it, already making it future proof for Xe KMD support and
split i915 specific code to its own file.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Signed-off-by: José Roberto de Souza <jose.souza@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29077>
This commit is contained in:
José Roberto de Souza
2024-05-03 13:07:46 -07:00
committed by Marge Bot
parent b98538d54c
commit d27dcb815e
9 changed files with 53 additions and 13 deletions

View File

@@ -0,0 +1,20 @@
/*
* Copyright 2024 Intel Corporation
* SPDX-License-Identifier: MIT
*/
#include "perf/i915/intel_perf.h"
#include "perf/intel_perf.h"
#include "drm-uapi/i915_drm.h"
uint64_t i915_perf_get_oa_format(struct intel_perf_config *perf)
{
if (perf->devinfo->verx10 <= 75)
return I915_OA_FORMAT_A45_B8_C8;
else if (perf->devinfo->verx10 <= 120)
return I915_OA_FORMAT_A32u40_A4u32_B8_C8;
else
return I915_OA_FORMAT_A24u40_A14u32_B8_C8;
}

View File

@@ -0,0 +1,12 @@
/*
* Copyright 2024 Intel Corporation
* SPDX-License-Identifier: MIT
*/
#pragma once
#include <stdint.h>
struct intel_perf_config;
uint64_t i915_perf_get_oa_format(struct intel_perf_config *perf);

View File

@@ -41,6 +41,7 @@
#include "dev/intel_debug.h" #include "dev/intel_debug.h"
#include "dev/intel_device_info.h" #include "dev/intel_device_info.h"
#include "perf/i915/intel_perf.h"
#include "perf/intel_perf.h" #include "perf/intel_perf.h"
#include "perf/intel_perf_regs.h" #include "perf/intel_perf_regs.h"
#include "perf/intel_perf_mdapi.h" #include "perf/intel_perf_mdapi.h"
@@ -1567,3 +1568,15 @@ intel_perf_free(struct intel_perf_config *perf_cfg)
{ {
ralloc_free(perf_cfg); ralloc_free(perf_cfg);
} }
uint64_t
intel_perf_get_oa_format(struct intel_perf_config *perf_cfg)
{
switch (perf_cfg->devinfo->kmd_type) {
case INTEL_KMD_TYPE_I915:
return i915_perf_get_oa_format(perf_cfg);
default:
unreachable("missing");
return 0;
}
}

View File

@@ -534,6 +534,8 @@ intel_perf_new(void *ctx)
void intel_perf_free(struct intel_perf_config *perf_cfg); void intel_perf_free(struct intel_perf_config *perf_cfg);
uint64_t intel_perf_get_oa_format(struct intel_perf_config *perf_cfg);
/** Whether we have the ability to hold off preemption on a batch so we don't /** Whether we have the ability to hold off preemption on a batch so we don't
* have to look at the OA buffer to subtract unrelated workloads off the * have to look at the OA buffer to subtract unrelated workloads off the
* values captured through MI_* commands. * values captured through MI_* commands.

View File

@@ -243,7 +243,6 @@ intel_perf_register_mdapi_oa_query(struct intel_perf_config *perf,
switch (devinfo->ver) { switch (devinfo->ver) {
case 7: { case 7: {
query = intel_perf_append_query_info(perf, 1 + 45 + 16 + 7); query = intel_perf_append_query_info(perf, 1 + 45 + 16 + 7);
query->oa_format = I915_OA_FORMAT_A45_B8_C8;
struct gfx7_mdapi_metrics metric_data; struct gfx7_mdapi_metrics metric_data;
query->data_size = sizeof(metric_data); query->data_size = sizeof(metric_data);
@@ -268,7 +267,6 @@ intel_perf_register_mdapi_oa_query(struct intel_perf_config *perf,
} }
case 8: { case 8: {
query = intel_perf_append_query_info(perf, 2 + 36 + 16 + 16); query = intel_perf_append_query_info(perf, 2 + 36 + 16 + 16);
query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
struct gfx8_mdapi_metrics metric_data; struct gfx8_mdapi_metrics metric_data;
query->data_size = sizeof(metric_data); query->data_size = sizeof(metric_data);
@@ -305,7 +303,6 @@ intel_perf_register_mdapi_oa_query(struct intel_perf_config *perf,
case 11: case 11:
case 12: { case 12: {
query = intel_perf_append_query_info(perf, 2 + 36 + 16 + 16 + 16 + 2); query = intel_perf_append_query_info(perf, 2 + 36 + 16 + 16 + 16 + 2);
query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
struct gfx9_mdapi_metrics metric_data; struct gfx9_mdapi_metrics metric_data;
query->data_size = sizeof(metric_data); query->data_size = sizeof(metric_data);
@@ -349,6 +346,7 @@ intel_perf_register_mdapi_oa_query(struct intel_perf_config *perf,
break; break;
} }
query->oa_format = intel_perf_get_oa_format(perf);
query->kind = INTEL_PERF_QUERY_TYPE_RAW; query->kind = INTEL_PERF_QUERY_TYPE_RAW;
query->name = "Intel_Raw_Hardware_Counters_Set_0_Query"; query->name = "Intel_Raw_Hardware_Counters_Set_0_Query";
query->guid = INTEL_PERF_QUERY_GUID_MDAPI; query->guid = INTEL_PERF_QUERY_GUID_MDAPI;

View File

@@ -38,10 +38,10 @@ intel_query_alloc(struct intel_perf_config *perf, int ncounters)
query->n_counters = 0; query->n_counters = 0;
query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */ query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */
query->counters = rzalloc_array(query, struct intel_perf_query_counter, ncounters); query->counters = rzalloc_array(query, struct intel_perf_query_counter, ncounters);
query->oa_format = intel_perf_get_oa_format(perf);
/* Accumulation buffer offsets... */ /* Accumulation buffer offsets... */
if (perf->devinfo->verx10 <= 75) { if (perf->devinfo->verx10 <= 75) {
query->oa_format = I915_OA_FORMAT_A45_B8_C8;
query->gpu_time_offset = 0; query->gpu_time_offset = 0;
query->a_offset = query->gpu_time_offset + 1; query->a_offset = query->gpu_time_offset + 1;
query->b_offset = query->a_offset + 45; query->b_offset = query->a_offset + 45;
@@ -49,7 +49,6 @@ intel_query_alloc(struct intel_perf_config *perf, int ncounters)
query->perfcnt_offset = query->c_offset + 8; query->perfcnt_offset = query->c_offset + 8;
query->rpstat_offset = query->perfcnt_offset + 2; query->rpstat_offset = query->perfcnt_offset + 2;
} else if (perf->devinfo->verx10 <= 120) { } else if (perf->devinfo->verx10 <= 120) {
query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
query->gpu_time_offset = 0; query->gpu_time_offset = 0;
query->gpu_clock_offset = query->gpu_time_offset + 1; query->gpu_clock_offset = query->gpu_time_offset + 1;
query->a_offset = query->gpu_clock_offset + 1; query->a_offset = query->gpu_clock_offset + 1;
@@ -58,7 +57,6 @@ intel_query_alloc(struct intel_perf_config *perf, int ncounters)
query->perfcnt_offset = query->c_offset + 8; query->perfcnt_offset = query->c_offset + 8;
query->rpstat_offset = query->perfcnt_offset + 2; query->rpstat_offset = query->perfcnt_offset + 2;
} else { } else {
query->oa_format = I915_OA_FORMAT_A24u40_A14u32_B8_C8;
query->gpu_time_offset = 0; query->gpu_time_offset = 0;
query->gpu_clock_offset = query->gpu_time_offset + 1; query->gpu_clock_offset = query->gpu_time_offset + 1;
query->a_offset = query->gpu_clock_offset + 1; query->a_offset = query->gpu_clock_offset + 1;

View File

@@ -17,6 +17,8 @@ foreach hw : intel_hw_metrics
endforeach endforeach
intel_perf_sources = [ intel_perf_sources = [
'i915/intel_perf.c',
'i915/intel_perf.h',
'intel_perf.c', 'intel_perf.c',
'intel_perf_query.c', 'intel_perf_query.c',
'intel_perf_mdapi.c', 'intel_perf_mdapi.c',

View File

@@ -107,10 +107,7 @@ anv_device_perf_open(struct anv_device *device, uint64_t metric_id)
properties[p++] = metric_id; properties[p++] = metric_id;
properties[p++] = DRM_I915_PERF_PROP_OA_FORMAT; properties[p++] = DRM_I915_PERF_PROP_OA_FORMAT;
properties[p++] = properties[p++] = intel_perf_get_oa_format(device->physical->perf);
device->info->verx10 >= 125 ?
I915_OA_FORMAT_A24u40_A14u32_B8_C8 :
I915_OA_FORMAT_A32u40_A4u32_B8_C8;
properties[p++] = DRM_I915_PERF_PROP_OA_EXPONENT; properties[p++] = DRM_I915_PERF_PROP_OA_EXPONENT;
properties[p++] = 31; /* slowest sampling period */ properties[p++] = 31; /* slowest sampling period */

View File

@@ -115,9 +115,7 @@ anv_device_perf_open(struct anv_device *device, uint64_t metric_id)
properties[p++] = metric_id; properties[p++] = metric_id;
properties[p++] = DRM_I915_PERF_PROP_OA_FORMAT; properties[p++] = DRM_I915_PERF_PROP_OA_FORMAT;
properties[p++] = device->info->ver >= 8 ? properties[p++] = intel_perf_get_oa_format(device->physical->perf);
I915_OA_FORMAT_A32u40_A4u32_B8_C8 :
I915_OA_FORMAT_A45_B8_C8;
properties[p++] = DRM_I915_PERF_PROP_OA_EXPONENT; properties[p++] = DRM_I915_PERF_PROP_OA_EXPONENT;
properties[p++] = 31; /* slowest sampling period */ properties[p++] = 31; /* slowest sampling period */