intel/perf: query register descriptions
This will be useful when we implement queries using a series of MI_SRM instead of MI_RPC. Unfortunately on Gen12, the MI_RPC command sources values from the OAR unit which has a similar series of register as the OAG unit but some of the configuration of HW doesn't reach OAR so we have to snapshot OAG manually instead. v2: Fix comments Use const Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6518>
This commit is contained in:

committed by
Marge Bot

parent
a6e980e9bf
commit
f32d1bf529
@@ -1130,6 +1130,74 @@ gen_perf_query_result_read_perfcnts(struct gen_perf_query_result *result,
|
||||
}
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
query_accumulator_offset(const struct gen_perf_query_info *query,
|
||||
enum gen_perf_query_field_type type,
|
||||
uint8_t index)
|
||||
{
|
||||
switch (type) {
|
||||
case GEN_PERF_QUERY_FIELD_TYPE_SRM_PERFCNT:
|
||||
return query->perfcnt_offset + index;
|
||||
default:
|
||||
unreachable("Invalid register type");
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
gen_perf_query_result_accumulate_fields(struct gen_perf_query_result *result,
|
||||
const struct gen_perf_query_info *query,
|
||||
const struct gen_device_info *devinfo,
|
||||
const void *start,
|
||||
const void *end,
|
||||
bool no_oa_accumulate)
|
||||
{
|
||||
struct gen_perf_query_field_layout *layout = &query->perf->query_layout;
|
||||
|
||||
for (uint32_t r = 0; r < layout->n_fields; r++) {
|
||||
struct gen_perf_query_field *field = &layout->fields[r];
|
||||
|
||||
if (field->type == GEN_PERF_QUERY_FIELD_TYPE_MI_RPC) {
|
||||
gen_perf_query_result_read_frequencies(result, devinfo,
|
||||
start + field->location,
|
||||
end + field->location);
|
||||
/* no_oa_accumulate=true is used when doing GL perf queries, we
|
||||
* manually parse the OA reports from the OA buffer and substract
|
||||
* unrelated deltas, so don't accumulate the begin/end reports here.
|
||||
*/
|
||||
if (!no_oa_accumulate) {
|
||||
gen_perf_query_result_accumulate(result, query,
|
||||
start + field->location,
|
||||
end + field->location);
|
||||
}
|
||||
} else {
|
||||
uint64_t v0, v1;
|
||||
|
||||
if (field->size == 4) {
|
||||
v0 = *(const uint32_t *)(start + field->location);
|
||||
v1 = *(const uint32_t *)(end + field->location);
|
||||
} else {
|
||||
assert(field->size == 8);
|
||||
v0 = *(const uint64_t *)(start + field->location);
|
||||
v1 = *(const uint64_t *)(end + field->location);
|
||||
}
|
||||
|
||||
if (field->mask) {
|
||||
v0 = field->mask & v0;
|
||||
v1 = field->mask & v1;
|
||||
}
|
||||
|
||||
/* RPSTAT is a bit of a special case because its begin/end values
|
||||
* represent frequencies. We store it in a separate location.
|
||||
*/
|
||||
if (field->type == GEN_PERF_QUERY_FIELD_TYPE_SRM_RPSTAT)
|
||||
gen_perf_query_result_read_gt_frequency(result, devinfo, v0, v1);
|
||||
else
|
||||
result->accumulator[query_accumulator_offset(query, field->type, field->index)] = v1 - v0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
gen_perf_query_result_clear(struct gen_perf_query_result *result)
|
||||
{
|
||||
@@ -1146,12 +1214,86 @@ gen_perf_compare_query_names(const void *v1, const void *v2)
|
||||
return strcmp(q1->name, q2->name);
|
||||
}
|
||||
|
||||
static inline struct gen_perf_query_field *
|
||||
add_query_register(struct gen_perf_query_field_layout *layout,
|
||||
enum gen_perf_query_field_type type,
|
||||
uint16_t offset,
|
||||
uint16_t size,
|
||||
uint8_t index)
|
||||
{
|
||||
/* Align MI_RPC to 64bytes (HW requirement) & 64bit registers to 8bytes
|
||||
* (shows up nicely in the debugger).
|
||||
*/
|
||||
if (type == GEN_PERF_QUERY_FIELD_TYPE_MI_RPC)
|
||||
layout->size = align(layout->size, 64);
|
||||
else if (size % 8 == 0)
|
||||
layout->size = align(layout->size, 8);
|
||||
|
||||
layout->fields[layout->n_fields++] = (struct gen_perf_query_field) {
|
||||
.mmio_offset = offset,
|
||||
.location = layout->size,
|
||||
.type = type,
|
||||
.index = index,
|
||||
.size = size,
|
||||
};
|
||||
layout->size += size;
|
||||
|
||||
return &layout->fields[layout->n_fields - 1];
|
||||
}
|
||||
|
||||
static void
|
||||
gen_perf_init_query_fields(struct gen_perf_config *perf_cfg,
|
||||
const struct gen_device_info *devinfo)
|
||||
{
|
||||
struct gen_perf_query_field_layout *layout = &perf_cfg->query_layout;
|
||||
|
||||
layout->n_fields = 0;
|
||||
|
||||
/* MI_RPC requires a 64byte alignment. */
|
||||
layout->alignment = 64;
|
||||
|
||||
add_query_register(layout, GEN_PERF_QUERY_FIELD_TYPE_MI_RPC,
|
||||
0, 256, 0);
|
||||
|
||||
if (devinfo->gen <= 11) {
|
||||
struct gen_perf_query_field *field =
|
||||
add_query_register(layout,
|
||||
GEN_PERF_QUERY_FIELD_TYPE_SRM_PERFCNT,
|
||||
PERF_CNT_1_DW0, 8, 0);
|
||||
field->mask = PERF_CNT_VALUE_MASK;
|
||||
|
||||
field = add_query_register(layout,
|
||||
GEN_PERF_QUERY_FIELD_TYPE_SRM_PERFCNT,
|
||||
PERF_CNT_2_DW0, 8, 1);
|
||||
field->mask = PERF_CNT_VALUE_MASK;
|
||||
}
|
||||
|
||||
if (devinfo->gen == 8 && !devinfo->is_cherryview) {
|
||||
add_query_register(layout,
|
||||
GEN_PERF_QUERY_FIELD_TYPE_SRM_RPSTAT,
|
||||
GEN7_RPSTAT1, 4, 0);
|
||||
}
|
||||
|
||||
if (devinfo->gen >= 9) {
|
||||
add_query_register(layout,
|
||||
GEN_PERF_QUERY_FIELD_TYPE_SRM_RPSTAT,
|
||||
GEN9_RPSTAT0, 4, 0);
|
||||
}
|
||||
|
||||
/* Align the whole package to 64bytes so that 2 snapshots can be put
|
||||
* together without extract alignment for the user.
|
||||
*/
|
||||
layout->size = align(layout->size, 64);
|
||||
}
|
||||
|
||||
void
|
||||
gen_perf_init_metrics(struct gen_perf_config *perf_cfg,
|
||||
const struct gen_device_info *devinfo,
|
||||
int drm_fd,
|
||||
bool include_pipeline_statistics)
|
||||
{
|
||||
gen_perf_init_query_fields(perf_cfg, devinfo);
|
||||
|
||||
if (include_pipeline_statistics) {
|
||||
load_pipeline_statistic_metrics(perf_cfg, devinfo);
|
||||
gen_perf_register_mdapi_statistic_query(perf_cfg, devinfo);
|
||||
|
@@ -109,9 +109,9 @@ struct gen_pipeline_stat {
|
||||
* For Gen8+
|
||||
* 1 timestamp, 1 clock, 36 A counters, 8 B counters and 8 C counters
|
||||
*
|
||||
* Plus 2 PERF_CNT registers.
|
||||
* Plus 2 PERF_CNT registers and 1 RPSTAT register.
|
||||
*/
|
||||
#define MAX_OA_REPORT_COUNTERS (62 + 2)
|
||||
#define MAX_OA_REPORT_COUNTERS (62 + 2 + 1)
|
||||
|
||||
/*
|
||||
* When currently allocate only one page for pipeline statistics queries. Here
|
||||
@@ -234,10 +234,54 @@ struct gen_perf_query_info {
|
||||
int b_offset;
|
||||
int c_offset;
|
||||
int perfcnt_offset;
|
||||
int rpstat_offset;
|
||||
|
||||
struct gen_perf_registers config;
|
||||
};
|
||||
|
||||
/* When not using the MI_RPC command, this structure describes the list of
|
||||
* register offsets as well as their storage location so that they can be
|
||||
* stored through a series of MI_SRM commands and accumulated with
|
||||
* gen_perf_query_result_accumulate_snapshots().
|
||||
*/
|
||||
struct gen_perf_query_field_layout {
|
||||
/* Alignment for the layout */
|
||||
uint32_t alignment;
|
||||
|
||||
/* Size of the whole layout */
|
||||
uint32_t size;
|
||||
|
||||
uint32_t n_fields;
|
||||
|
||||
struct gen_perf_query_field {
|
||||
/* MMIO location of this register */
|
||||
uint16_t mmio_offset;
|
||||
|
||||
/* Location of this register in the storage */
|
||||
uint16_t location;
|
||||
|
||||
/* Type of register, for accumulation (see gen_perf_query_info:*_offset
|
||||
* fields)
|
||||
*/
|
||||
enum gen_perf_query_field_type {
|
||||
GEN_PERF_QUERY_FIELD_TYPE_MI_RPC,
|
||||
GEN_PERF_QUERY_FIELD_TYPE_SRM_PERFCNT,
|
||||
GEN_PERF_QUERY_FIELD_TYPE_SRM_RPSTAT
|
||||
} type;
|
||||
|
||||
/* Index of register in the given type (for instance A31 or B2,
|
||||
* etc...)
|
||||
*/
|
||||
uint8_t index;
|
||||
|
||||
/* 4, 8 or 256 */
|
||||
uint16_t size;
|
||||
|
||||
/* If not 0, mask to apply to the register value. */
|
||||
uint64_t mask;
|
||||
} *fields;
|
||||
};
|
||||
|
||||
struct gen_perf_query_counter_info {
|
||||
struct gen_perf_query_counter *counter;
|
||||
|
||||
@@ -269,6 +313,8 @@ struct gen_perf_config {
|
||||
struct gen_perf_query_counter_info *counter_infos;
|
||||
int n_counters;
|
||||
|
||||
struct gen_perf_query_field_layout query_layout;
|
||||
|
||||
/* Variables referenced in the XML meta data for OA performance
|
||||
* counters, e.g in the normalization equations.
|
||||
*
|
||||
@@ -387,6 +433,17 @@ void gen_perf_query_result_accumulate(struct gen_perf_query_result *result,
|
||||
const struct gen_perf_query_info *query,
|
||||
const uint32_t *start,
|
||||
const uint32_t *end);
|
||||
|
||||
/** Accumulate the delta between 2 snapshots of OA perf registers (layout
|
||||
* should match description specified through gen_perf_query_register_layout).
|
||||
*/
|
||||
void gen_perf_query_result_accumulate_fields(struct gen_perf_query_result *result,
|
||||
const struct gen_perf_query_info *query,
|
||||
const struct gen_device_info *devinfo,
|
||||
const void *start,
|
||||
const void *end,
|
||||
bool no_oa_accumulate);
|
||||
|
||||
void gen_perf_query_result_clear(struct gen_perf_query_result *result);
|
||||
|
||||
static inline size_t
|
||||
|
@@ -743,6 +743,7 @@ def main():
|
||||
query->b_offset = query->a_offset + 45;
|
||||
query->c_offset = query->b_offset + 8;
|
||||
query->perfcnt_offset = query->c_offset + 8;
|
||||
query->rpstat_offset = query->perfcnt_offset + 2;
|
||||
"""))
|
||||
else:
|
||||
c(textwrap.dedent("""\
|
||||
@@ -754,6 +755,7 @@ def main():
|
||||
query->b_offset = query->a_offset + 36;
|
||||
query->c_offset = query->b_offset + 8;
|
||||
query->perfcnt_offset = query->c_offset + 8;
|
||||
query->rpstat_offset = query->perfcnt_offset + 2;
|
||||
"""))
|
||||
|
||||
|
||||
|
Reference in New Issue
Block a user