intel/perf: Implement intel_perf_query_result_accumulate() for gfx 20+
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Signed-off-by: José Roberto de Souza <jose.souza@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29529>
This commit is contained in:

committed by
Marge Bot

parent
5a8f6ea35c
commit
18775827bd
@@ -206,7 +206,7 @@ std::vector<PerfRecord> IntelDriver::parse_perf_records(const std::vector<uint8_
|
|||||||
// Report is next to the header
|
// Report is next to the header
|
||||||
const uint32_t *report = reinterpret_cast<const uint32_t *>(header + 1);
|
const uint32_t *report = reinterpret_cast<const uint32_t *>(header + 1);
|
||||||
uint64_t gpu_timestamp_ldw =
|
uint64_t gpu_timestamp_ldw =
|
||||||
intel_perf_report_timestamp(selected_query, report);
|
intel_perf_report_timestamp(selected_query, &perf->devinfo, report);
|
||||||
|
|
||||||
/* Our HW only provides us with the lower 32 bits of the 36bits
|
/* Our HW only provides us with the lower 32 bits of the 36bits
|
||||||
* timestamp counter value. If we haven't captured the top bits yet,
|
* timestamp counter value. If we haven't captured the top bits yet,
|
||||||
|
@@ -684,8 +684,11 @@ oa_metrics_available(struct intel_perf_config *perf, int fd,
|
|||||||
|
|
||||||
perf->enable_all_metrics = debug_get_bool_option("INTEL_EXTENDED_METRICS", false);
|
perf->enable_all_metrics = debug_get_bool_option("INTEL_EXTENDED_METRICS", false);
|
||||||
|
|
||||||
/* TODO: We should query this from i915 */
|
/* TODO: We should query this from i915?
|
||||||
if (devinfo->verx10 >= 125)
|
* Looks like Xe2 platforms don't need it but don't have a spec quote to
|
||||||
|
* back it.
|
||||||
|
*/
|
||||||
|
if (devinfo->verx10 == 125)
|
||||||
perf->oa_timestamp_shift = 1;
|
perf->oa_timestamp_shift = 1;
|
||||||
|
|
||||||
perf->oa_timestamp_mask =
|
perf->oa_timestamp_mask =
|
||||||
@@ -992,6 +995,15 @@ accumulate_uint40(int a_index,
|
|||||||
*accumulator += delta;
|
*accumulator += delta;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Accumulate 64bits OA counters */
|
||||||
|
static inline void
|
||||||
|
accumulate_uint64(const uint32_t *report0,
|
||||||
|
const uint32_t *report1,
|
||||||
|
uint64_t *accumulator)
|
||||||
|
{
|
||||||
|
*accumulator += *((const uint64_t *)report1) - *((const uint64_t *)report0);
|
||||||
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
gfx8_read_report_clock_ratios(const uint32_t *report,
|
gfx8_read_report_clock_ratios(const uint32_t *report,
|
||||||
uint64_t *slice_freq_hz,
|
uint64_t *slice_freq_hz,
|
||||||
@@ -1054,8 +1066,14 @@ can_use_mi_rpc_bc_counters(const struct intel_device_info *devinfo)
|
|||||||
|
|
||||||
uint64_t
|
uint64_t
|
||||||
intel_perf_report_timestamp(const struct intel_perf_query_info *query,
|
intel_perf_report_timestamp(const struct intel_perf_query_info *query,
|
||||||
|
const struct intel_device_info *devinfo,
|
||||||
const uint32_t *report)
|
const uint32_t *report)
|
||||||
{
|
{
|
||||||
|
if (query->perf->devinfo->verx10 >= 200) {
|
||||||
|
uint64_t data_u64 = *((const uint64_t *)&report[2]);
|
||||||
|
return data_u64 >> query->perf->oa_timestamp_shift;
|
||||||
|
}
|
||||||
|
|
||||||
return report[1] >> query->perf->oa_timestamp_shift;
|
return report[1] >> query->perf->oa_timestamp_shift;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1065,25 +1083,43 @@ intel_perf_query_result_accumulate(struct intel_perf_query_result *result,
|
|||||||
const uint32_t *start,
|
const uint32_t *start,
|
||||||
const uint32_t *end)
|
const uint32_t *end)
|
||||||
{
|
{
|
||||||
|
const struct intel_device_info *devinfo = query->perf->devinfo;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
if (result->hw_id == INTEL_PERF_INVALID_CTX_ID &&
|
if (query->perf->devinfo->verx10 >= 200) {
|
||||||
start[2] != INTEL_PERF_INVALID_CTX_ID)
|
if (result->hw_id == INTEL_PERF_INVALID_CTX_ID &&
|
||||||
result->hw_id = start[2];
|
start[4] != INTEL_PERF_INVALID_CTX_ID)
|
||||||
|
result->hw_id = start[4];
|
||||||
|
} else {
|
||||||
|
if (result->hw_id == INTEL_PERF_INVALID_CTX_ID &&
|
||||||
|
start[2] != INTEL_PERF_INVALID_CTX_ID)
|
||||||
|
result->hw_id = start[2];
|
||||||
|
}
|
||||||
|
|
||||||
if (result->reports_accumulated == 0)
|
if (result->reports_accumulated == 0)
|
||||||
result->begin_timestamp = intel_perf_report_timestamp(query, start);
|
result->begin_timestamp = intel_perf_report_timestamp(query, devinfo, start);
|
||||||
result->end_timestamp = intel_perf_report_timestamp(query, end);
|
result->end_timestamp = intel_perf_report_timestamp(query, devinfo, end);
|
||||||
result->reports_accumulated++;
|
result->reports_accumulated++;
|
||||||
|
|
||||||
/* oa format handling needs to match with platform version returned in
|
/* oa format handling needs to match with platform version returned in
|
||||||
* intel_perf_get_oa_format()
|
* intel_perf_get_oa_format()
|
||||||
*/
|
*/
|
||||||
assert(intel_perf_get_oa_format(query->perf) == query->oa_format);
|
assert(intel_perf_get_oa_format(query->perf) == query->oa_format);
|
||||||
if (query->perf->devinfo->verx10 >= 125) {
|
if (query->perf->devinfo->verx10 >= 200) {
|
||||||
|
/* PEC64u64 */
|
||||||
|
result->accumulator[query->gpu_time_offset] =
|
||||||
|
intel_perf_report_timestamp(query, devinfo, end) -
|
||||||
|
intel_perf_report_timestamp(query, devinfo, start);
|
||||||
|
accumulate_uint64(start + 6, end + 6, &result->accumulator[query->gpu_clock_offset]);
|
||||||
|
|
||||||
|
for (i = 0; i < 64; i++)
|
||||||
|
accumulate_uint64(start + 8 + (2 * i), end + 8 + (2 * i),
|
||||||
|
&result->accumulator[query->pec_offset + i]);
|
||||||
|
} else if (query->perf->devinfo->verx10 >= 125) {
|
||||||
/* I915_OA_FORMAT_A24u40_A14u32_B8_C8 */
|
/* I915_OA_FORMAT_A24u40_A14u32_B8_C8 */
|
||||||
result->accumulator[query->gpu_time_offset] =
|
result->accumulator[query->gpu_time_offset] =
|
||||||
intel_perf_report_timestamp(query, end) -
|
intel_perf_report_timestamp(query, devinfo, end) -
|
||||||
intel_perf_report_timestamp(query, start);
|
intel_perf_report_timestamp(query, devinfo, start);
|
||||||
|
|
||||||
accumulate_uint32(start + 3, end + 3,
|
accumulate_uint32(start + 3, end + 3,
|
||||||
result->accumulator + query->gpu_clock_offset); /* clock */
|
result->accumulator + query->gpu_clock_offset); /* clock */
|
||||||
@@ -1141,8 +1177,8 @@ intel_perf_query_result_accumulate(struct intel_perf_query_result *result,
|
|||||||
} else if (query->perf->devinfo->verx10 >= 120) {
|
} else if (query->perf->devinfo->verx10 >= 120) {
|
||||||
/* I915_OA_FORMAT_A32u40_A4u32_B8_C8 */
|
/* I915_OA_FORMAT_A32u40_A4u32_B8_C8 */
|
||||||
result->accumulator[query->gpu_time_offset] =
|
result->accumulator[query->gpu_time_offset] =
|
||||||
intel_perf_report_timestamp(query, end) -
|
intel_perf_report_timestamp(query, devinfo, end) -
|
||||||
intel_perf_report_timestamp(query, start);
|
intel_perf_report_timestamp(query, devinfo, start);
|
||||||
|
|
||||||
accumulate_uint32(start + 3, end + 3,
|
accumulate_uint32(start + 3, end + 3,
|
||||||
result->accumulator + query->gpu_clock_offset); /* clock */
|
result->accumulator + query->gpu_clock_offset); /* clock */
|
||||||
@@ -1176,8 +1212,8 @@ intel_perf_query_result_accumulate(struct intel_perf_query_result *result,
|
|||||||
} else {
|
} else {
|
||||||
/* I915_OA_FORMAT_A24u40_A14u32_B8_C8 */
|
/* I915_OA_FORMAT_A24u40_A14u32_B8_C8 */
|
||||||
result->accumulator[query->gpu_time_offset] =
|
result->accumulator[query->gpu_time_offset] =
|
||||||
intel_perf_report_timestamp(query, end) -
|
intel_perf_report_timestamp(query, devinfo, end) -
|
||||||
intel_perf_report_timestamp(query, start);
|
intel_perf_report_timestamp(query, devinfo, start);
|
||||||
|
|
||||||
for (i = 0; i < 61; i++) {
|
for (i = 0; i < 61; i++) {
|
||||||
accumulate_uint32(start + 3 + i, end + 3 + i,
|
accumulate_uint32(start + 3 + i, end + 3 + i,
|
||||||
|
@@ -510,6 +510,7 @@ void intel_perf_query_result_accumulate(struct intel_perf_query_result *result,
|
|||||||
/** Read the timestamp value in a report.
|
/** Read the timestamp value in a report.
|
||||||
*/
|
*/
|
||||||
uint64_t intel_perf_report_timestamp(const struct intel_perf_query_info *query,
|
uint64_t intel_perf_report_timestamp(const struct intel_perf_query_info *query,
|
||||||
|
const struct intel_device_info *devinfo,
|
||||||
const uint32_t *report);
|
const uint32_t *report);
|
||||||
|
|
||||||
/** Accumulate the delta between 2 snapshots of OA perf registers (layout
|
/** Accumulate the delta between 2 snapshots of OA perf registers (layout
|
||||||
|
Reference in New Issue
Block a user