intel/perf: use the new OA format for Gfx12.5+
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Ivan Briano <ivan.briano@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18893>
This commit is contained in:

committed by
Marge Bot

parent
02608c25ec
commit
fdf602a545
@@ -964,6 +964,8 @@ def main():
|
||||
|
||||
if gen.chipset == "hsw":
|
||||
c("struct intel_perf_query_info *query = hsw_query_alloc(perf, %u);\n" % len(counters))
|
||||
elif gen.chipset.startswith("acm"):
|
||||
c("struct intel_perf_query_info *query = xehp_query_alloc(perf, %u);\n" % len(counters))
|
||||
else:
|
||||
c("struct intel_perf_query_info *query = bdw_query_alloc(perf, %u);\n" % len(counters))
|
||||
c("\n")
|
||||
|
@@ -1108,6 +1108,66 @@ intel_perf_query_result_accumulate(struct intel_perf_query_result *result,
|
||||
result->reports_accumulated++;
|
||||
|
||||
switch (query->oa_format) {
|
||||
case I915_OA_FORMAT_A24u40_A14u32_B8_C8:
|
||||
result->accumulator[query->gpu_time_offset] =
|
||||
intel_perf_report_timestamp(query, end) -
|
||||
intel_perf_report_timestamp(query, start);
|
||||
|
||||
accumulate_uint32(start + 3, end + 3,
|
||||
result->accumulator + query->gpu_clock_offset); /* clock */
|
||||
|
||||
/* A0-A3 counters are 32bits */
|
||||
for (i = 0; i < 4; i++) {
|
||||
accumulate_uint32(start + 4 + i, end + 4 + i,
|
||||
result->accumulator + query->a_offset + i);
|
||||
}
|
||||
|
||||
/* A4-A23 counters are 40bits */
|
||||
for (i = 4; i < 24; i++) {
|
||||
accumulate_uint40(i, start, end,
|
||||
result->accumulator + query->a_offset + i);
|
||||
}
|
||||
|
||||
/* A24-27 counters are 32bits */
|
||||
for (i = 0; i < 4; i++) {
|
||||
accumulate_uint32(start + 28 + i, end + 28 + i,
|
||||
result->accumulator + query->a_offset + 24 + i);
|
||||
}
|
||||
|
||||
/* A28-31 counters are 40bits */
|
||||
for (i = 28; i < 32; i++) {
|
||||
accumulate_uint40(i, start, end,
|
||||
result->accumulator + query->a_offset + i);
|
||||
}
|
||||
|
||||
/* A32-35 counters are 32bits */
|
||||
for (i = 0; i < 4; i++) {
|
||||
accumulate_uint32(start + 36 + i, end + 36 + i,
|
||||
result->accumulator + query->a_offset + 32 + i);
|
||||
}
|
||||
|
||||
if (can_use_mi_rpc_bc_counters(&query->perf->devinfo) ||
|
||||
!query->perf->sys_vars.query_mode) {
|
||||
/* A36-37 counters are 32bits */
|
||||
accumulate_uint32(start + 40, end + 40,
|
||||
result->accumulator + query->a_offset + 36);
|
||||
accumulate_uint32(start + 46, end + 46,
|
||||
result->accumulator + query->a_offset + 37);
|
||||
|
||||
/* 8x 32bit B counters */
|
||||
for (i = 0; i < 8; i++) {
|
||||
accumulate_uint32(start + 48 + i, end + 48 + i,
|
||||
result->accumulator + query->b_offset + i);
|
||||
}
|
||||
|
||||
/* 8x 32bit C counters... */
|
||||
for (i = 0; i < 8; i++) {
|
||||
accumulate_uint32(start + 56 + i, end + 56 + i,
|
||||
result->accumulator + query->c_offset + i);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case I915_OA_FORMAT_A32u40_A4u32_B8_C8:
|
||||
result->accumulator[query->gpu_time_offset] =
|
||||
intel_perf_report_timestamp(query, end) -
|
||||
|
@@ -72,6 +72,21 @@ bdw_query_alloc(struct intel_perf_config *perf, int ncounters)
|
||||
return query;
|
||||
}
|
||||
|
||||
static struct intel_perf_query_info *
|
||||
xehp_query_alloc(struct intel_perf_config *perf, int ncounters)
|
||||
{
|
||||
struct intel_perf_query_info *query = intel_query_alloc(perf, ncounters);
|
||||
query->oa_format = I915_OA_FORMAT_A24u40_A14u32_B8_C8;
|
||||
query->gpu_time_offset = 0;
|
||||
query->gpu_clock_offset = query->gpu_time_offset + 1;
|
||||
query->a_offset = query->gpu_clock_offset + 1;
|
||||
query->b_offset = query->a_offset + 38;
|
||||
query->c_offset = query->b_offset + 8;
|
||||
query->perfcnt_offset = query->c_offset + 8;
|
||||
query->rpstat_offset = query->perfcnt_offset + 2;
|
||||
return query;
|
||||
}
|
||||
|
||||
struct intel_perf_query_counter_data {
|
||||
uint32_t name_idx;
|
||||
uint32_t desc_idx;
|
||||
|
@@ -109,7 +109,10 @@ anv_device_perf_open(struct anv_device *device, uint64_t metric_id)
|
||||
properties[p++] = metric_id;
|
||||
|
||||
properties[p++] = DRM_I915_PERF_PROP_OA_FORMAT;
|
||||
properties[p++] = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
|
||||
properties[p++] =
|
||||
device->info->verx10 >= 125 ?
|
||||
I915_OA_FORMAT_A24u40_A14u32_B8_C8 :
|
||||
I915_OA_FORMAT_A32u40_A4u32_B8_C8;
|
||||
|
||||
properties[p++] = DRM_I915_PERF_PROP_OA_EXPONENT;
|
||||
properties[p++] = 31; /* slowest sampling period */
|
||||
|
Reference in New Issue
Block a user