From 0a6fe638f3623fd1feaf26a9c1b3eefc9cf09e26 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Roberto=20de=20Souza?= Date: Fri, 24 May 2024 13:03:59 -0700 Subject: [PATCH] intel/perf: Add INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_PEC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Lionel Landwerlin Signed-off-by: José Roberto de Souza Part-of: --- src/intel/perf/intel_perf.c | 17 +++++++++++++++-- src/intel/perf/intel_perf.h | 7 +++++-- src/intel/perf/intel_perf_query.c | 1 + src/intel/perf/intel_perf_regs.h | 3 +++ src/intel/perf/intel_perf_setup.h | 8 +++++++- src/intel/vulkan/anv_perf.c | 1 + src/intel/vulkan/genX_query.c | 5 ++++- 7 files changed, 36 insertions(+), 6 deletions(-) diff --git a/src/intel/perf/intel_perf.c b/src/intel/perf/intel_perf.c index 0255cc3046e..9c93db92e3c 100644 --- a/src/intel/perf/intel_perf.c +++ b/src/intel/perf/intel_perf.c @@ -1208,6 +1208,8 @@ query_accumulator_offset(const struct intel_perf_query_info *query, return query->b_offset + index; case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_C: return query->c_offset + index; + case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_PEC: + return query->pec_offset + index; default: unreachable("Invalid register type"); return 0; @@ -1300,6 +1302,11 @@ intel_perf_query_result_print_fields(const struct intel_perf_query_info *query, case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_C: fprintf(stderr, "C%u: 0x%08x\n", field->index, *value32); break; + case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_PEC: { + const uint64_t *value64 = data + field->location; + fprintf(stderr, "PEC%u: 0x%" PRIx64 "\n", field->index, *value64); + break; + } default: break; } @@ -1315,12 +1322,13 @@ intel_perf_compare_query_names(const void *v1, const void *v2) return strcmp(q1->name, q2->name); } -#define MAX_QUERY_FIELDS(devinfo) (5 + 16) +/* Xe2: (64 x PEC) + SRM_RPSTAT + MI_RPC */ +#define MAX_QUERY_FIELDS(devinfo) (devinfo->verx10 >= 200 ? (64 + 2) : (5 + 16)) static inline struct intel_perf_query_field * add_query_register(struct intel_perf_config *perf_cfg, enum intel_perf_query_field_type type, - uint16_t offset, + uint32_t offset, uint16_t size, uint8_t index) { @@ -1423,6 +1431,11 @@ intel_perf_init_query_fields(struct intel_perf_config *perf_cfg, add_query_register(perf_cfg, INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_C, GFX12_OAG_PERF_C32(i), 4, i); } + } else if (devinfo->verx10 >= 200) { + for (uint32_t i = 0; i < XE2_N_OAG_PERF_PEC; i++) { + add_query_register(perf_cfg, INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_PEC, + XE2_OAG_PERF_PEC(i), 8, i); + } } } } diff --git a/src/intel/perf/intel_perf.h b/src/intel/perf/intel_perf.h index 515ac98bf85..dc5bd7c8996 100644 --- a/src/intel/perf/intel_perf.h +++ b/src/intel/perf/intel_perf.h @@ -118,10 +118,12 @@ struct intel_pipeline_stat { * 1 timestamp, 45 A counters, 8 B counters and 8 C counters. * For Gfx8+ * 1 timestamp, 1 clock, 36 A counters, 8 B counters and 8 C counters + * For Xe2: + * 1 timestamp, 1 clock, 64 PEC counters * * Plus 2 PERF_CNT registers and 1 RPSTAT register. */ -#define MAX_OA_REPORT_COUNTERS (62 + 2 + 1) +#define MAX_OA_REPORT_COUNTERS (2 + 64 + 3) /* * When currently allocate only one page for pipeline statistics queries. Here @@ -283,7 +285,7 @@ struct intel_perf_query_field_layout { struct intel_perf_query_field { /* MMIO location of this register */ - uint16_t mmio_offset; + uint32_t mmio_offset; /* Location of this register in the storage */ uint16_t location; @@ -298,6 +300,7 @@ struct intel_perf_query_field_layout { INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_A, INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_B, INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_C, + INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_PEC, } type; /* Index of register in the given type (for instance A31 or B2, diff --git a/src/intel/perf/intel_perf_query.c b/src/intel/perf/intel_perf_query.c index 6070809ed0d..5f0d0b24d24 100644 --- a/src/intel/perf/intel_perf_query.c +++ b/src/intel/perf/intel_perf_query.c @@ -705,6 +705,7 @@ snapshot_query_layout(struct intel_perf_context *perf_ctx, case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_A: case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_B: case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_C: + case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_PEC: perf_cfg->vtbl.store_register_mem(perf_ctx->ctx, query->oa.bo, field->mmio_offset, field->size, offset + field->location); diff --git a/src/intel/perf/intel_perf_regs.h b/src/intel/perf/intel_perf_regs.h index ae406cb9c2b..cb224c74ecd 100644 --- a/src/intel/perf/intel_perf_regs.h +++ b/src/intel/perf/intel_perf_regs.h @@ -74,6 +74,9 @@ #define GFX125_OAG_PERF_A36 (0xdb20) #define GFX125_OAG_PERF_A37 (0xdb28) +#define XE2_N_OAG_PERF_PEC 64 +#define XE2_OAG_PERF_PEC(idx) (0x14200 + (idx) * 8) + /* Pipeline statistic counters */ #define IA_VERTICES_COUNT 0x2310 #define IA_PRIMITIVES_COUNT 0x2318 diff --git a/src/intel/perf/intel_perf_setup.h b/src/intel/perf/intel_perf_setup.h index e6f19317d8c..e513cb1b287 100644 --- a/src/intel/perf/intel_perf_setup.h +++ b/src/intel/perf/intel_perf_setup.h @@ -41,7 +41,13 @@ intel_query_alloc(struct intel_perf_config *perf, int ncounters) query->oa_format = intel_perf_get_oa_format(perf); /* Accumulation buffer offsets... */ - if (perf->devinfo->verx10 >= 125) { + if (perf->devinfo->verx10 >= 200) { + query->gpu_time_offset = 0; + query->gpu_clock_offset = query->gpu_time_offset + 1; + query->pec_offset = query->gpu_clock_offset + 1; + query->perfcnt_offset = query->pec_offset + 64; + query->rpstat_offset = query->perfcnt_offset + 2; + } else if (perf->devinfo->verx10 >= 125) { query->gpu_time_offset = 0; query->gpu_clock_offset = query->gpu_time_offset + 1; query->a_offset = query->gpu_clock_offset + 1; diff --git a/src/intel/vulkan/anv_perf.c b/src/intel/vulkan/anv_perf.c index 0e9a959c872..73ad30f2f06 100644 --- a/src/intel/vulkan/anv_perf.c +++ b/src/intel/vulkan/anv_perf.c @@ -72,6 +72,7 @@ anv_physical_device_init_perf(struct anv_physical_device *device, int fd) case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_A: case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_B: case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_C: + case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_PEC: device->n_perf_query_commands += field->size / 4; break; default: diff --git a/src/intel/vulkan/genX_query.c b/src/intel/vulkan/genX_query.c index ff685a20129..c086d12f6ec 100644 --- a/src/intel/vulkan/genX_query.c +++ b/src/intel/vulkan/genX_query.c @@ -998,7 +998,8 @@ emit_perf_intel_query(struct anv_cmd_buffer *cmd_buffer, case INTEL_PERF_QUERY_FIELD_TYPE_SRM_RPSTAT: case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_A: case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_B: - case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_C: { + case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_C: + case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_PEC: { struct anv_address addr = anv_address_add(data_addr, field->location); struct mi_value src = field->size == 8 ? mi_reg64(field->mmio_offset) : @@ -1190,6 +1191,7 @@ void genX(CmdBeginQueryIndexedEXT)( case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_A: case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_B: case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_C: + case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_PEC: dws = anv_batch_emitn(&cmd_buffer->batch, GENX(MI_STORE_REGISTER_MEM_length), @@ -1353,6 +1355,7 @@ void genX(CmdEndQueryIndexedEXT)( case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_A: case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_B: case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_C: + case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_PEC: dws = anv_batch_emitn(&cmd_buffer->batch, GENX(MI_STORE_REGISTER_MEM_length),