intel/perf: simplify the processing of OA reports
This is a more accurate description of what happens in processing the OA reports. Previously we only had a somewhat difficult to parse state machine tracking the context ID. What we really only need to do to decide if the delta between 2 reports (r0 & r1) should be accumulated in the query result is : * whether the r0 is tagged with the context ID relevant to us * if r0 is not tagged with our context ID and r1 is: does r0 have a invalid context id? If not then we're in a case where i915 has resubmitted the same context for execution through the execlist submission port v2: Update comment (Ken) Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Cc: <mesa-stable@lists.freedesktop.org> Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
This commit is contained in:
@@ -2221,6 +2221,17 @@ discard_all_queries(struct gen_perf_context *perf_ctx)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Looks for the validity bit of context ID (dword 2) of an OA report. */
|
||||||
|
static bool
|
||||||
|
oa_report_ctx_id_valid(const struct gen_device_info *devinfo,
|
||||||
|
const uint32_t *report)
|
||||||
|
{
|
||||||
|
assert(devinfo->gen >= 8);
|
||||||
|
if (devinfo->gen == 8)
|
||||||
|
return (report[0] & (1 << 25)) != 0;
|
||||||
|
return (report[0] & (1 << 16)) != 0;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Accumulate raw OA counter values based on deltas between pairs of
|
* Accumulate raw OA counter values based on deltas between pairs of
|
||||||
* OA reports.
|
* OA reports.
|
||||||
@@ -2248,7 +2259,7 @@ accumulate_oa_reports(struct gen_perf_context *perf_ctx,
|
|||||||
uint32_t *last;
|
uint32_t *last;
|
||||||
uint32_t *end;
|
uint32_t *end;
|
||||||
struct exec_node *first_samples_node;
|
struct exec_node *first_samples_node;
|
||||||
bool in_ctx = true;
|
bool last_report_ctx_match = true;
|
||||||
int out_duration = 0;
|
int out_duration = 0;
|
||||||
|
|
||||||
assert(query->oa.map != NULL);
|
assert(query->oa.map != NULL);
|
||||||
@@ -2302,6 +2313,7 @@ accumulate_oa_reports(struct gen_perf_context *perf_ctx,
|
|||||||
switch (header->type) {
|
switch (header->type) {
|
||||||
case DRM_I915_PERF_RECORD_SAMPLE: {
|
case DRM_I915_PERF_RECORD_SAMPLE: {
|
||||||
uint32_t *report = (uint32_t *)(header + 1);
|
uint32_t *report = (uint32_t *)(header + 1);
|
||||||
|
bool report_ctx_match = true;
|
||||||
bool add = true;
|
bool add = true;
|
||||||
|
|
||||||
/* Ignore reports that come before the start marker.
|
/* Ignore reports that come before the start marker.
|
||||||
@@ -2330,35 +2342,30 @@ accumulate_oa_reports(struct gen_perf_context *perf_ctx,
|
|||||||
* of OA counters while any other context is acctive.
|
* of OA counters while any other context is acctive.
|
||||||
*/
|
*/
|
||||||
if (devinfo->gen >= 8) {
|
if (devinfo->gen >= 8) {
|
||||||
if (in_ctx && report[2] != query->oa.result.hw_id) {
|
/* Consider that the current report matches our context only if
|
||||||
DBG("i915 perf: Switch AWAY (observed by ID change)\n");
|
* the report says the report ID is valid.
|
||||||
in_ctx = false;
|
*/
|
||||||
|
report_ctx_match = oa_report_ctx_id_valid(devinfo, report) &&
|
||||||
|
report[2] == start[2];
|
||||||
|
if (report_ctx_match)
|
||||||
out_duration = 0;
|
out_duration = 0;
|
||||||
} else if (in_ctx == false && report[2] == query->oa.result.hw_id) {
|
else
|
||||||
DBG("i915 perf: Switch TO\n");
|
|
||||||
in_ctx = true;
|
|
||||||
|
|
||||||
/* From experimentation in IGT, we found that the OA unit
|
|
||||||
* might label some report as "idle" (using an invalid
|
|
||||||
* context ID), right after a report for a given context.
|
|
||||||
* Deltas generated by those reports actually belong to the
|
|
||||||
* previous context, even though they're not labelled as
|
|
||||||
* such.
|
|
||||||
*
|
|
||||||
* We didn't *really* Switch AWAY in the case that we e.g.
|
|
||||||
* saw a single periodic report while idle...
|
|
||||||
*/
|
|
||||||
if (out_duration >= 1)
|
|
||||||
add = false;
|
|
||||||
} else if (in_ctx) {
|
|
||||||
assert(report[2] == query->oa.result.hw_id);
|
|
||||||
DBG("i915 perf: Continuation IN\n");
|
|
||||||
} else {
|
|
||||||
assert(report[2] != query->oa.result.hw_id);
|
|
||||||
DBG("i915 perf: Continuation OUT\n");
|
|
||||||
add = false;
|
|
||||||
out_duration++;
|
out_duration++;
|
||||||
}
|
|
||||||
|
/* Only add the delta between <last, report> if the last report
|
||||||
|
* was clearly identified as our context, or if we have at most
|
||||||
|
* 1 report without a matching ID.
|
||||||
|
*
|
||||||
|
* The OA unit will sometimes label reports with an invalid
|
||||||
|
* context ID when i915 rewrites the execlist submit register
|
||||||
|
* with the same context as the one currently running. This
|
||||||
|
* happens when i915 wants to notify the HW of ringbuffer tail
|
||||||
|
* register update. We have to consider this report as part of
|
||||||
|
* our context as the 3d pipeline behind the OACS unit is still
|
||||||
|
* processing the operations started at the previous execlist
|
||||||
|
* submission.
|
||||||
|
*/
|
||||||
|
add = last_report_ctx_match && out_duration < 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (add) {
|
if (add) {
|
||||||
@@ -2368,6 +2375,7 @@ accumulate_oa_reports(struct gen_perf_context *perf_ctx,
|
|||||||
}
|
}
|
||||||
|
|
||||||
last = report;
|
last = report;
|
||||||
|
last_report_ctx_match = report_ctx_match;
|
||||||
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user