intel/perf: query register descriptions

This will be useful when we implement queries using a series of MI_SRM
instead of MI_RPC.

Unfortunately on Gen12, the MI_RPC command sources values from the OAR
unit which has a similar series of register as the OAG unit but some
of the configuration of HW doesn't reach OAR so we have to snapshot
OAG manually instead.

v2: Fix comments
    Use const

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6518>
This commit is contained in:
Lionel Landwerlin
2020-09-03 10:52:34 +03:00
committed by Marge Bot
parent a6e980e9bf
commit f32d1bf529
3 changed files with 203 additions and 2 deletions

View File

@@ -1130,6 +1130,74 @@ gen_perf_query_result_read_perfcnts(struct gen_perf_query_result *result,
}
}
static uint32_t
query_accumulator_offset(const struct gen_perf_query_info *query,
enum gen_perf_query_field_type type,
uint8_t index)
{
switch (type) {
case GEN_PERF_QUERY_FIELD_TYPE_SRM_PERFCNT:
return query->perfcnt_offset + index;
default:
unreachable("Invalid register type");
return 0;
}
}
void
gen_perf_query_result_accumulate_fields(struct gen_perf_query_result *result,
const struct gen_perf_query_info *query,
const struct gen_device_info *devinfo,
const void *start,
const void *end,
bool no_oa_accumulate)
{
struct gen_perf_query_field_layout *layout = &query->perf->query_layout;
for (uint32_t r = 0; r < layout->n_fields; r++) {
struct gen_perf_query_field *field = &layout->fields[r];
if (field->type == GEN_PERF_QUERY_FIELD_TYPE_MI_RPC) {
gen_perf_query_result_read_frequencies(result, devinfo,
start + field->location,
end + field->location);
/* no_oa_accumulate=true is used when doing GL perf queries, we
* manually parse the OA reports from the OA buffer and substract
* unrelated deltas, so don't accumulate the begin/end reports here.
*/
if (!no_oa_accumulate) {
gen_perf_query_result_accumulate(result, query,
start + field->location,
end + field->location);
}
} else {
uint64_t v0, v1;
if (field->size == 4) {
v0 = *(const uint32_t *)(start + field->location);
v1 = *(const uint32_t *)(end + field->location);
} else {
assert(field->size == 8);
v0 = *(const uint64_t *)(start + field->location);
v1 = *(const uint64_t *)(end + field->location);
}
if (field->mask) {
v0 = field->mask & v0;
v1 = field->mask & v1;
}
/* RPSTAT is a bit of a special case because its begin/end values
* represent frequencies. We store it in a separate location.
*/
if (field->type == GEN_PERF_QUERY_FIELD_TYPE_SRM_RPSTAT)
gen_perf_query_result_read_gt_frequency(result, devinfo, v0, v1);
else
result->accumulator[query_accumulator_offset(query, field->type, field->index)] = v1 - v0;
}
}
}
void
gen_perf_query_result_clear(struct gen_perf_query_result *result)
{
@@ -1146,12 +1214,86 @@ gen_perf_compare_query_names(const void *v1, const void *v2)
return strcmp(q1->name, q2->name);
}
static inline struct gen_perf_query_field *
add_query_register(struct gen_perf_query_field_layout *layout,
enum gen_perf_query_field_type type,
uint16_t offset,
uint16_t size,
uint8_t index)
{
/* Align MI_RPC to 64bytes (HW requirement) & 64bit registers to 8bytes
* (shows up nicely in the debugger).
*/
if (type == GEN_PERF_QUERY_FIELD_TYPE_MI_RPC)
layout->size = align(layout->size, 64);
else if (size % 8 == 0)
layout->size = align(layout->size, 8);
layout->fields[layout->n_fields++] = (struct gen_perf_query_field) {
.mmio_offset = offset,
.location = layout->size,
.type = type,
.index = index,
.size = size,
};
layout->size += size;
return &layout->fields[layout->n_fields - 1];
}
static void
gen_perf_init_query_fields(struct gen_perf_config *perf_cfg,
const struct gen_device_info *devinfo)
{
struct gen_perf_query_field_layout *layout = &perf_cfg->query_layout;
layout->n_fields = 0;
/* MI_RPC requires a 64byte alignment. */
layout->alignment = 64;
add_query_register(layout, GEN_PERF_QUERY_FIELD_TYPE_MI_RPC,
0, 256, 0);
if (devinfo->gen <= 11) {
struct gen_perf_query_field *field =
add_query_register(layout,
GEN_PERF_QUERY_FIELD_TYPE_SRM_PERFCNT,
PERF_CNT_1_DW0, 8, 0);
field->mask = PERF_CNT_VALUE_MASK;
field = add_query_register(layout,
GEN_PERF_QUERY_FIELD_TYPE_SRM_PERFCNT,
PERF_CNT_2_DW0, 8, 1);
field->mask = PERF_CNT_VALUE_MASK;
}
if (devinfo->gen == 8 && !devinfo->is_cherryview) {
add_query_register(layout,
GEN_PERF_QUERY_FIELD_TYPE_SRM_RPSTAT,
GEN7_RPSTAT1, 4, 0);
}
if (devinfo->gen >= 9) {
add_query_register(layout,
GEN_PERF_QUERY_FIELD_TYPE_SRM_RPSTAT,
GEN9_RPSTAT0, 4, 0);
}
/* Align the whole package to 64bytes so that 2 snapshots can be put
* together without extract alignment for the user.
*/
layout->size = align(layout->size, 64);
}
void
gen_perf_init_metrics(struct gen_perf_config *perf_cfg,
const struct gen_device_info *devinfo,
int drm_fd,
bool include_pipeline_statistics)
{
gen_perf_init_query_fields(perf_cfg, devinfo);
if (include_pipeline_statistics) {
load_pipeline_statistic_metrics(perf_cfg, devinfo);
gen_perf_register_mdapi_statistic_query(perf_cfg, devinfo);

View File

@@ -109,9 +109,9 @@ struct gen_pipeline_stat {
* For Gen8+
* 1 timestamp, 1 clock, 36 A counters, 8 B counters and 8 C counters
*
* Plus 2 PERF_CNT registers.
* Plus 2 PERF_CNT registers and 1 RPSTAT register.
*/
#define MAX_OA_REPORT_COUNTERS (62 + 2)
#define MAX_OA_REPORT_COUNTERS (62 + 2 + 1)
/*
* When currently allocate only one page for pipeline statistics queries. Here
@@ -234,10 +234,54 @@ struct gen_perf_query_info {
int b_offset;
int c_offset;
int perfcnt_offset;
int rpstat_offset;
struct gen_perf_registers config;
};
/* When not using the MI_RPC command, this structure describes the list of
* register offsets as well as their storage location so that they can be
* stored through a series of MI_SRM commands and accumulated with
* gen_perf_query_result_accumulate_snapshots().
*/
struct gen_perf_query_field_layout {
/* Alignment for the layout */
uint32_t alignment;
/* Size of the whole layout */
uint32_t size;
uint32_t n_fields;
struct gen_perf_query_field {
/* MMIO location of this register */
uint16_t mmio_offset;
/* Location of this register in the storage */
uint16_t location;
/* Type of register, for accumulation (see gen_perf_query_info:*_offset
* fields)
*/
enum gen_perf_query_field_type {
GEN_PERF_QUERY_FIELD_TYPE_MI_RPC,
GEN_PERF_QUERY_FIELD_TYPE_SRM_PERFCNT,
GEN_PERF_QUERY_FIELD_TYPE_SRM_RPSTAT
} type;
/* Index of register in the given type (for instance A31 or B2,
* etc...)
*/
uint8_t index;
/* 4, 8 or 256 */
uint16_t size;
/* If not 0, mask to apply to the register value. */
uint64_t mask;
} *fields;
};
struct gen_perf_query_counter_info {
struct gen_perf_query_counter *counter;
@@ -269,6 +313,8 @@ struct gen_perf_config {
struct gen_perf_query_counter_info *counter_infos;
int n_counters;
struct gen_perf_query_field_layout query_layout;
/* Variables referenced in the XML meta data for OA performance
* counters, e.g in the normalization equations.
*
@@ -387,6 +433,17 @@ void gen_perf_query_result_accumulate(struct gen_perf_query_result *result,
const struct gen_perf_query_info *query,
const uint32_t *start,
const uint32_t *end);
/** Accumulate the delta between 2 snapshots of OA perf registers (layout
* should match description specified through gen_perf_query_register_layout).
*/
void gen_perf_query_result_accumulate_fields(struct gen_perf_query_result *result,
const struct gen_perf_query_info *query,
const struct gen_device_info *devinfo,
const void *start,
const void *end,
bool no_oa_accumulate);
void gen_perf_query_result_clear(struct gen_perf_query_result *result);
static inline size_t

View File

@@ -743,6 +743,7 @@ def main():
query->b_offset = query->a_offset + 45;
query->c_offset = query->b_offset + 8;
query->perfcnt_offset = query->c_offset + 8;
query->rpstat_offset = query->perfcnt_offset + 2;
"""))
else:
c(textwrap.dedent("""\
@@ -754,6 +755,7 @@ def main():
query->b_offset = query->a_offset + 36;
query->c_offset = query->b_offset + 8;
query->perfcnt_offset = query->c_offset + 8;
query->rpstat_offset = query->perfcnt_offset + 2;
"""))