intel/perf: Add INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_PEC

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Signed-off-by: José Roberto de Souza <jose.souza@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29899>
This commit is contained in:
José Roberto de Souza
2024-05-24 13:03:59 -07:00
committed by Marge Bot
parent 6e1852981b
commit 0a6fe638f3
7 changed files with 36 additions and 6 deletions

View File

@@ -1208,6 +1208,8 @@ query_accumulator_offset(const struct intel_perf_query_info *query,
return query->b_offset + index;
case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_C:
return query->c_offset + index;
case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_PEC:
return query->pec_offset + index;
default:
unreachable("Invalid register type");
return 0;
@@ -1300,6 +1302,11 @@ intel_perf_query_result_print_fields(const struct intel_perf_query_info *query,
case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_C:
fprintf(stderr, "C%u: 0x%08x\n", field->index, *value32);
break;
case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_PEC: {
const uint64_t *value64 = data + field->location;
fprintf(stderr, "PEC%u: 0x%" PRIx64 "\n", field->index, *value64);
break;
}
default:
break;
}
@@ -1315,12 +1322,13 @@ intel_perf_compare_query_names(const void *v1, const void *v2)
return strcmp(q1->name, q2->name);
}
#define MAX_QUERY_FIELDS(devinfo) (5 + 16)
/* Xe2: (64 x PEC) + SRM_RPSTAT + MI_RPC */
#define MAX_QUERY_FIELDS(devinfo) (devinfo->verx10 >= 200 ? (64 + 2) : (5 + 16))
static inline struct intel_perf_query_field *
add_query_register(struct intel_perf_config *perf_cfg,
enum intel_perf_query_field_type type,
uint16_t offset,
uint32_t offset,
uint16_t size,
uint8_t index)
{
@@ -1423,6 +1431,11 @@ intel_perf_init_query_fields(struct intel_perf_config *perf_cfg,
add_query_register(perf_cfg, INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_C,
GFX12_OAG_PERF_C32(i), 4, i);
}
} else if (devinfo->verx10 >= 200) {
for (uint32_t i = 0; i < XE2_N_OAG_PERF_PEC; i++) {
add_query_register(perf_cfg, INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_PEC,
XE2_OAG_PERF_PEC(i), 8, i);
}
}
}
}

View File

@@ -118,10 +118,12 @@ struct intel_pipeline_stat {
* 1 timestamp, 45 A counters, 8 B counters and 8 C counters.
* For Gfx8+
* 1 timestamp, 1 clock, 36 A counters, 8 B counters and 8 C counters
* For Xe2:
* 1 timestamp, 1 clock, 64 PEC counters
*
* Plus 2 PERF_CNT registers and 1 RPSTAT register.
*/
#define MAX_OA_REPORT_COUNTERS (62 + 2 + 1)
#define MAX_OA_REPORT_COUNTERS (2 + 64 + 3)
/*
* When currently allocate only one page for pipeline statistics queries. Here
@@ -283,7 +285,7 @@ struct intel_perf_query_field_layout {
struct intel_perf_query_field {
/* MMIO location of this register */
uint16_t mmio_offset;
uint32_t mmio_offset;
/* Location of this register in the storage */
uint16_t location;
@@ -298,6 +300,7 @@ struct intel_perf_query_field_layout {
INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_A,
INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_B,
INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_C,
INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_PEC,
} type;
/* Index of register in the given type (for instance A31 or B2,

View File

@@ -705,6 +705,7 @@ snapshot_query_layout(struct intel_perf_context *perf_ctx,
case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_A:
case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_B:
case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_C:
case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_PEC:
perf_cfg->vtbl.store_register_mem(perf_ctx->ctx, query->oa.bo,
field->mmio_offset, field->size,
offset + field->location);

View File

@@ -74,6 +74,9 @@
#define GFX125_OAG_PERF_A36 (0xdb20)
#define GFX125_OAG_PERF_A37 (0xdb28)
#define XE2_N_OAG_PERF_PEC 64
#define XE2_OAG_PERF_PEC(idx) (0x14200 + (idx) * 8)
/* Pipeline statistic counters */
#define IA_VERTICES_COUNT 0x2310
#define IA_PRIMITIVES_COUNT 0x2318

View File

@@ -41,7 +41,13 @@ intel_query_alloc(struct intel_perf_config *perf, int ncounters)
query->oa_format = intel_perf_get_oa_format(perf);
/* Accumulation buffer offsets... */
if (perf->devinfo->verx10 >= 125) {
if (perf->devinfo->verx10 >= 200) {
query->gpu_time_offset = 0;
query->gpu_clock_offset = query->gpu_time_offset + 1;
query->pec_offset = query->gpu_clock_offset + 1;
query->perfcnt_offset = query->pec_offset + 64;
query->rpstat_offset = query->perfcnt_offset + 2;
} else if (perf->devinfo->verx10 >= 125) {
query->gpu_time_offset = 0;
query->gpu_clock_offset = query->gpu_time_offset + 1;
query->a_offset = query->gpu_clock_offset + 1;

View File

@@ -72,6 +72,7 @@ anv_physical_device_init_perf(struct anv_physical_device *device, int fd)
case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_A:
case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_B:
case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_C:
case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_PEC:
device->n_perf_query_commands += field->size / 4;
break;
default:

View File

@@ -998,7 +998,8 @@ emit_perf_intel_query(struct anv_cmd_buffer *cmd_buffer,
case INTEL_PERF_QUERY_FIELD_TYPE_SRM_RPSTAT:
case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_A:
case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_B:
case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_C: {
case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_C:
case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_PEC: {
struct anv_address addr = anv_address_add(data_addr, field->location);
struct mi_value src = field->size == 8 ?
mi_reg64(field->mmio_offset) :
@@ -1190,6 +1191,7 @@ void genX(CmdBeginQueryIndexedEXT)(
case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_A:
case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_B:
case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_C:
case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_PEC:
dws =
anv_batch_emitn(&cmd_buffer->batch,
GENX(MI_STORE_REGISTER_MEM_length),
@@ -1353,6 +1355,7 @@ void genX(CmdEndQueryIndexedEXT)(
case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_A:
case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_B:
case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_C:
case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_PEC:
dws =
anv_batch_emitn(&cmd_buffer->batch,
GENX(MI_STORE_REGISTER_MEM_length),