diff --git a/src/intel/perf/gen_perf.py b/src/intel/perf/gen_perf.py index 2b3150592e5..dd158ffa8c1 100644 --- a/src/intel/perf/gen_perf.py +++ b/src/intel/perf/gen_perf.py @@ -964,6 +964,8 @@ def main(): if gen.chipset == "hsw": c("struct intel_perf_query_info *query = hsw_query_alloc(perf, %u);\n" % len(counters)) + elif gen.chipset.startswith("acm"): + c("struct intel_perf_query_info *query = xehp_query_alloc(perf, %u);\n" % len(counters)) else: c("struct intel_perf_query_info *query = bdw_query_alloc(perf, %u);\n" % len(counters)) c("\n") diff --git a/src/intel/perf/intel_perf.c b/src/intel/perf/intel_perf.c index 07e6c0fe512..2ac968712af 100644 --- a/src/intel/perf/intel_perf.c +++ b/src/intel/perf/intel_perf.c @@ -1108,6 +1108,66 @@ intel_perf_query_result_accumulate(struct intel_perf_query_result *result, result->reports_accumulated++; switch (query->oa_format) { + case I915_OA_FORMAT_A24u40_A14u32_B8_C8: + result->accumulator[query->gpu_time_offset] = + intel_perf_report_timestamp(query, end) - + intel_perf_report_timestamp(query, start); + + accumulate_uint32(start + 3, end + 3, + result->accumulator + query->gpu_clock_offset); /* clock */ + + /* A0-A3 counters are 32bits */ + for (i = 0; i < 4; i++) { + accumulate_uint32(start + 4 + i, end + 4 + i, + result->accumulator + query->a_offset + i); + } + + /* A4-A23 counters are 40bits */ + for (i = 4; i < 24; i++) { + accumulate_uint40(i, start, end, + result->accumulator + query->a_offset + i); + } + + /* A24-27 counters are 32bits */ + for (i = 0; i < 4; i++) { + accumulate_uint32(start + 28 + i, end + 28 + i, + result->accumulator + query->a_offset + 24 + i); + } + + /* A28-31 counters are 40bits */ + for (i = 28; i < 32; i++) { + accumulate_uint40(i, start, end, + result->accumulator + query->a_offset + i); + } + + /* A32-35 counters are 32bits */ + for (i = 0; i < 4; i++) { + accumulate_uint32(start + 36 + i, end + 36 + i, + result->accumulator + query->a_offset + 32 + i); + } + + if (can_use_mi_rpc_bc_counters(&query->perf->devinfo) || + !query->perf->sys_vars.query_mode) { + /* A36-37 counters are 32bits */ + accumulate_uint32(start + 40, end + 40, + result->accumulator + query->a_offset + 36); + accumulate_uint32(start + 46, end + 46, + result->accumulator + query->a_offset + 37); + + /* 8x 32bit B counters */ + for (i = 0; i < 8; i++) { + accumulate_uint32(start + 48 + i, end + 48 + i, + result->accumulator + query->b_offset + i); + } + + /* 8x 32bit C counters... */ + for (i = 0; i < 8; i++) { + accumulate_uint32(start + 56 + i, end + 56 + i, + result->accumulator + query->c_offset + i); + } + } + break; + case I915_OA_FORMAT_A32u40_A4u32_B8_C8: result->accumulator[query->gpu_time_offset] = intel_perf_report_timestamp(query, end) - diff --git a/src/intel/perf/intel_perf_setup.h b/src/intel/perf/intel_perf_setup.h index a5a97df5179..cfa5a3c75d7 100644 --- a/src/intel/perf/intel_perf_setup.h +++ b/src/intel/perf/intel_perf_setup.h @@ -72,6 +72,21 @@ bdw_query_alloc(struct intel_perf_config *perf, int ncounters) return query; } +static struct intel_perf_query_info * +xehp_query_alloc(struct intel_perf_config *perf, int ncounters) +{ + struct intel_perf_query_info *query = intel_query_alloc(perf, ncounters); + query->oa_format = I915_OA_FORMAT_A24u40_A14u32_B8_C8; + query->gpu_time_offset = 0; + query->gpu_clock_offset = query->gpu_time_offset + 1; + query->a_offset = query->gpu_clock_offset + 1; + query->b_offset = query->a_offset + 38; + query->c_offset = query->b_offset + 8; + query->perfcnt_offset = query->c_offset + 8; + query->rpstat_offset = query->perfcnt_offset + 2; + return query; +} + struct intel_perf_query_counter_data { uint32_t name_idx; uint32_t desc_idx; diff --git a/src/intel/vulkan/anv_perf.c b/src/intel/vulkan/anv_perf.c index 3e970b2fb03..d6a82be3a18 100644 --- a/src/intel/vulkan/anv_perf.c +++ b/src/intel/vulkan/anv_perf.c @@ -109,7 +109,10 @@ anv_device_perf_open(struct anv_device *device, uint64_t metric_id) properties[p++] = metric_id; properties[p++] = DRM_I915_PERF_PROP_OA_FORMAT; - properties[p++] = I915_OA_FORMAT_A32u40_A4u32_B8_C8; + properties[p++] = + device->info->verx10 >= 125 ? + I915_OA_FORMAT_A24u40_A14u32_B8_C8 : + I915_OA_FORMAT_A32u40_A4u32_B8_C8; properties[p++] = DRM_I915_PERF_PROP_OA_EXPONENT; properties[p++] = 31; /* slowest sampling period */