intel/perf: use the new OA format for Gfx12.5+

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Ivan Briano <ivan.briano@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18893>
This commit is contained in:
Lionel Landwerlin
2021-06-25 13:08:47 +03:00
committed by Marge Bot
parent 02608c25ec
commit fdf602a545
4 changed files with 81 additions and 1 deletions

View File

@@ -964,6 +964,8 @@ def main():
if gen.chipset == "hsw": if gen.chipset == "hsw":
c("struct intel_perf_query_info *query = hsw_query_alloc(perf, %u);\n" % len(counters)) c("struct intel_perf_query_info *query = hsw_query_alloc(perf, %u);\n" % len(counters))
elif gen.chipset.startswith("acm"):
c("struct intel_perf_query_info *query = xehp_query_alloc(perf, %u);\n" % len(counters))
else: else:
c("struct intel_perf_query_info *query = bdw_query_alloc(perf, %u);\n" % len(counters)) c("struct intel_perf_query_info *query = bdw_query_alloc(perf, %u);\n" % len(counters))
c("\n") c("\n")

View File

@@ -1108,6 +1108,66 @@ intel_perf_query_result_accumulate(struct intel_perf_query_result *result,
result->reports_accumulated++; result->reports_accumulated++;
switch (query->oa_format) { switch (query->oa_format) {
case I915_OA_FORMAT_A24u40_A14u32_B8_C8:
result->accumulator[query->gpu_time_offset] =
intel_perf_report_timestamp(query, end) -
intel_perf_report_timestamp(query, start);
accumulate_uint32(start + 3, end + 3,
result->accumulator + query->gpu_clock_offset); /* clock */
/* A0-A3 counters are 32bits */
for (i = 0; i < 4; i++) {
accumulate_uint32(start + 4 + i, end + 4 + i,
result->accumulator + query->a_offset + i);
}
/* A4-A23 counters are 40bits */
for (i = 4; i < 24; i++) {
accumulate_uint40(i, start, end,
result->accumulator + query->a_offset + i);
}
/* A24-27 counters are 32bits */
for (i = 0; i < 4; i++) {
accumulate_uint32(start + 28 + i, end + 28 + i,
result->accumulator + query->a_offset + 24 + i);
}
/* A28-31 counters are 40bits */
for (i = 28; i < 32; i++) {
accumulate_uint40(i, start, end,
result->accumulator + query->a_offset + i);
}
/* A32-35 counters are 32bits */
for (i = 0; i < 4; i++) {
accumulate_uint32(start + 36 + i, end + 36 + i,
result->accumulator + query->a_offset + 32 + i);
}
if (can_use_mi_rpc_bc_counters(&query->perf->devinfo) ||
!query->perf->sys_vars.query_mode) {
/* A36-37 counters are 32bits */
accumulate_uint32(start + 40, end + 40,
result->accumulator + query->a_offset + 36);
accumulate_uint32(start + 46, end + 46,
result->accumulator + query->a_offset + 37);
/* 8x 32bit B counters */
for (i = 0; i < 8; i++) {
accumulate_uint32(start + 48 + i, end + 48 + i,
result->accumulator + query->b_offset + i);
}
/* 8x 32bit C counters... */
for (i = 0; i < 8; i++) {
accumulate_uint32(start + 56 + i, end + 56 + i,
result->accumulator + query->c_offset + i);
}
}
break;
case I915_OA_FORMAT_A32u40_A4u32_B8_C8: case I915_OA_FORMAT_A32u40_A4u32_B8_C8:
result->accumulator[query->gpu_time_offset] = result->accumulator[query->gpu_time_offset] =
intel_perf_report_timestamp(query, end) - intel_perf_report_timestamp(query, end) -

View File

@@ -72,6 +72,21 @@ bdw_query_alloc(struct intel_perf_config *perf, int ncounters)
return query; return query;
} }
static struct intel_perf_query_info *
xehp_query_alloc(struct intel_perf_config *perf, int ncounters)
{
struct intel_perf_query_info *query = intel_query_alloc(perf, ncounters);
query->oa_format = I915_OA_FORMAT_A24u40_A14u32_B8_C8;
query->gpu_time_offset = 0;
query->gpu_clock_offset = query->gpu_time_offset + 1;
query->a_offset = query->gpu_clock_offset + 1;
query->b_offset = query->a_offset + 38;
query->c_offset = query->b_offset + 8;
query->perfcnt_offset = query->c_offset + 8;
query->rpstat_offset = query->perfcnt_offset + 2;
return query;
}
struct intel_perf_query_counter_data { struct intel_perf_query_counter_data {
uint32_t name_idx; uint32_t name_idx;
uint32_t desc_idx; uint32_t desc_idx;

View File

@@ -109,7 +109,10 @@ anv_device_perf_open(struct anv_device *device, uint64_t metric_id)
properties[p++] = metric_id; properties[p++] = metric_id;
properties[p++] = DRM_I915_PERF_PROP_OA_FORMAT; properties[p++] = DRM_I915_PERF_PROP_OA_FORMAT;
properties[p++] = I915_OA_FORMAT_A32u40_A4u32_B8_C8; properties[p++] =
device->info->verx10 >= 125 ?
I915_OA_FORMAT_A24u40_A14u32_B8_C8 :
I915_OA_FORMAT_A32u40_A4u32_B8_C8;
properties[p++] = DRM_I915_PERF_PROP_OA_EXPONENT; properties[p++] = DRM_I915_PERF_PROP_OA_EXPONENT;
properties[p++] = 31; /* slowest sampling period */ properties[p++] = 31; /* slowest sampling period */