intel/perf: use the new OA format for Gfx12.5+
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Ivan Briano <ivan.briano@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18893>
This commit is contained in:

committed by
Marge Bot

parent
02608c25ec
commit
fdf602a545
@@ -964,6 +964,8 @@ def main():
|
|||||||
|
|
||||||
if gen.chipset == "hsw":
|
if gen.chipset == "hsw":
|
||||||
c("struct intel_perf_query_info *query = hsw_query_alloc(perf, %u);\n" % len(counters))
|
c("struct intel_perf_query_info *query = hsw_query_alloc(perf, %u);\n" % len(counters))
|
||||||
|
elif gen.chipset.startswith("acm"):
|
||||||
|
c("struct intel_perf_query_info *query = xehp_query_alloc(perf, %u);\n" % len(counters))
|
||||||
else:
|
else:
|
||||||
c("struct intel_perf_query_info *query = bdw_query_alloc(perf, %u);\n" % len(counters))
|
c("struct intel_perf_query_info *query = bdw_query_alloc(perf, %u);\n" % len(counters))
|
||||||
c("\n")
|
c("\n")
|
||||||
|
@@ -1108,6 +1108,66 @@ intel_perf_query_result_accumulate(struct intel_perf_query_result *result,
|
|||||||
result->reports_accumulated++;
|
result->reports_accumulated++;
|
||||||
|
|
||||||
switch (query->oa_format) {
|
switch (query->oa_format) {
|
||||||
|
case I915_OA_FORMAT_A24u40_A14u32_B8_C8:
|
||||||
|
result->accumulator[query->gpu_time_offset] =
|
||||||
|
intel_perf_report_timestamp(query, end) -
|
||||||
|
intel_perf_report_timestamp(query, start);
|
||||||
|
|
||||||
|
accumulate_uint32(start + 3, end + 3,
|
||||||
|
result->accumulator + query->gpu_clock_offset); /* clock */
|
||||||
|
|
||||||
|
/* A0-A3 counters are 32bits */
|
||||||
|
for (i = 0; i < 4; i++) {
|
||||||
|
accumulate_uint32(start + 4 + i, end + 4 + i,
|
||||||
|
result->accumulator + query->a_offset + i);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* A4-A23 counters are 40bits */
|
||||||
|
for (i = 4; i < 24; i++) {
|
||||||
|
accumulate_uint40(i, start, end,
|
||||||
|
result->accumulator + query->a_offset + i);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* A24-27 counters are 32bits */
|
||||||
|
for (i = 0; i < 4; i++) {
|
||||||
|
accumulate_uint32(start + 28 + i, end + 28 + i,
|
||||||
|
result->accumulator + query->a_offset + 24 + i);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* A28-31 counters are 40bits */
|
||||||
|
for (i = 28; i < 32; i++) {
|
||||||
|
accumulate_uint40(i, start, end,
|
||||||
|
result->accumulator + query->a_offset + i);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* A32-35 counters are 32bits */
|
||||||
|
for (i = 0; i < 4; i++) {
|
||||||
|
accumulate_uint32(start + 36 + i, end + 36 + i,
|
||||||
|
result->accumulator + query->a_offset + 32 + i);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (can_use_mi_rpc_bc_counters(&query->perf->devinfo) ||
|
||||||
|
!query->perf->sys_vars.query_mode) {
|
||||||
|
/* A36-37 counters are 32bits */
|
||||||
|
accumulate_uint32(start + 40, end + 40,
|
||||||
|
result->accumulator + query->a_offset + 36);
|
||||||
|
accumulate_uint32(start + 46, end + 46,
|
||||||
|
result->accumulator + query->a_offset + 37);
|
||||||
|
|
||||||
|
/* 8x 32bit B counters */
|
||||||
|
for (i = 0; i < 8; i++) {
|
||||||
|
accumulate_uint32(start + 48 + i, end + 48 + i,
|
||||||
|
result->accumulator + query->b_offset + i);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* 8x 32bit C counters... */
|
||||||
|
for (i = 0; i < 8; i++) {
|
||||||
|
accumulate_uint32(start + 56 + i, end + 56 + i,
|
||||||
|
result->accumulator + query->c_offset + i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
case I915_OA_FORMAT_A32u40_A4u32_B8_C8:
|
case I915_OA_FORMAT_A32u40_A4u32_B8_C8:
|
||||||
result->accumulator[query->gpu_time_offset] =
|
result->accumulator[query->gpu_time_offset] =
|
||||||
intel_perf_report_timestamp(query, end) -
|
intel_perf_report_timestamp(query, end) -
|
||||||
|
@@ -72,6 +72,21 @@ bdw_query_alloc(struct intel_perf_config *perf, int ncounters)
|
|||||||
return query;
|
return query;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static struct intel_perf_query_info *
|
||||||
|
xehp_query_alloc(struct intel_perf_config *perf, int ncounters)
|
||||||
|
{
|
||||||
|
struct intel_perf_query_info *query = intel_query_alloc(perf, ncounters);
|
||||||
|
query->oa_format = I915_OA_FORMAT_A24u40_A14u32_B8_C8;
|
||||||
|
query->gpu_time_offset = 0;
|
||||||
|
query->gpu_clock_offset = query->gpu_time_offset + 1;
|
||||||
|
query->a_offset = query->gpu_clock_offset + 1;
|
||||||
|
query->b_offset = query->a_offset + 38;
|
||||||
|
query->c_offset = query->b_offset + 8;
|
||||||
|
query->perfcnt_offset = query->c_offset + 8;
|
||||||
|
query->rpstat_offset = query->perfcnt_offset + 2;
|
||||||
|
return query;
|
||||||
|
}
|
||||||
|
|
||||||
struct intel_perf_query_counter_data {
|
struct intel_perf_query_counter_data {
|
||||||
uint32_t name_idx;
|
uint32_t name_idx;
|
||||||
uint32_t desc_idx;
|
uint32_t desc_idx;
|
||||||
|
@@ -109,7 +109,10 @@ anv_device_perf_open(struct anv_device *device, uint64_t metric_id)
|
|||||||
properties[p++] = metric_id;
|
properties[p++] = metric_id;
|
||||||
|
|
||||||
properties[p++] = DRM_I915_PERF_PROP_OA_FORMAT;
|
properties[p++] = DRM_I915_PERF_PROP_OA_FORMAT;
|
||||||
properties[p++] = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
|
properties[p++] =
|
||||||
|
device->info->verx10 >= 125 ?
|
||||||
|
I915_OA_FORMAT_A24u40_A14u32_B8_C8 :
|
||||||
|
I915_OA_FORMAT_A32u40_A4u32_B8_C8;
|
||||||
|
|
||||||
properties[p++] = DRM_I915_PERF_PROP_OA_EXPONENT;
|
properties[p++] = DRM_I915_PERF_PROP_OA_EXPONENT;
|
||||||
properties[p++] = 31; /* slowest sampling period */
|
properties[p++] = 31; /* slowest sampling period */
|
||||||
|
Reference in New Issue
Block a user