intel/perf: query register descriptions
This will be useful when we implement queries using a series of MI_SRM instead of MI_RPC. Unfortunately on Gen12, the MI_RPC command sources values from the OAR unit which has a similar series of register as the OAG unit but some of the configuration of HW doesn't reach OAR so we have to snapshot OAG manually instead. v2: Fix comments Use const Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6518>
This commit is contained in:

committed by
Marge Bot

parent
a6e980e9bf
commit
f32d1bf529
@@ -1130,6 +1130,74 @@ gen_perf_query_result_read_perfcnts(struct gen_perf_query_result *result,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static uint32_t
|
||||||
|
query_accumulator_offset(const struct gen_perf_query_info *query,
|
||||||
|
enum gen_perf_query_field_type type,
|
||||||
|
uint8_t index)
|
||||||
|
{
|
||||||
|
switch (type) {
|
||||||
|
case GEN_PERF_QUERY_FIELD_TYPE_SRM_PERFCNT:
|
||||||
|
return query->perfcnt_offset + index;
|
||||||
|
default:
|
||||||
|
unreachable("Invalid register type");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
gen_perf_query_result_accumulate_fields(struct gen_perf_query_result *result,
|
||||||
|
const struct gen_perf_query_info *query,
|
||||||
|
const struct gen_device_info *devinfo,
|
||||||
|
const void *start,
|
||||||
|
const void *end,
|
||||||
|
bool no_oa_accumulate)
|
||||||
|
{
|
||||||
|
struct gen_perf_query_field_layout *layout = &query->perf->query_layout;
|
||||||
|
|
||||||
|
for (uint32_t r = 0; r < layout->n_fields; r++) {
|
||||||
|
struct gen_perf_query_field *field = &layout->fields[r];
|
||||||
|
|
||||||
|
if (field->type == GEN_PERF_QUERY_FIELD_TYPE_MI_RPC) {
|
||||||
|
gen_perf_query_result_read_frequencies(result, devinfo,
|
||||||
|
start + field->location,
|
||||||
|
end + field->location);
|
||||||
|
/* no_oa_accumulate=true is used when doing GL perf queries, we
|
||||||
|
* manually parse the OA reports from the OA buffer and substract
|
||||||
|
* unrelated deltas, so don't accumulate the begin/end reports here.
|
||||||
|
*/
|
||||||
|
if (!no_oa_accumulate) {
|
||||||
|
gen_perf_query_result_accumulate(result, query,
|
||||||
|
start + field->location,
|
||||||
|
end + field->location);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
uint64_t v0, v1;
|
||||||
|
|
||||||
|
if (field->size == 4) {
|
||||||
|
v0 = *(const uint32_t *)(start + field->location);
|
||||||
|
v1 = *(const uint32_t *)(end + field->location);
|
||||||
|
} else {
|
||||||
|
assert(field->size == 8);
|
||||||
|
v0 = *(const uint64_t *)(start + field->location);
|
||||||
|
v1 = *(const uint64_t *)(end + field->location);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (field->mask) {
|
||||||
|
v0 = field->mask & v0;
|
||||||
|
v1 = field->mask & v1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* RPSTAT is a bit of a special case because its begin/end values
|
||||||
|
* represent frequencies. We store it in a separate location.
|
||||||
|
*/
|
||||||
|
if (field->type == GEN_PERF_QUERY_FIELD_TYPE_SRM_RPSTAT)
|
||||||
|
gen_perf_query_result_read_gt_frequency(result, devinfo, v0, v1);
|
||||||
|
else
|
||||||
|
result->accumulator[query_accumulator_offset(query, field->type, field->index)] = v1 - v0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
gen_perf_query_result_clear(struct gen_perf_query_result *result)
|
gen_perf_query_result_clear(struct gen_perf_query_result *result)
|
||||||
{
|
{
|
||||||
@@ -1146,12 +1214,86 @@ gen_perf_compare_query_names(const void *v1, const void *v2)
|
|||||||
return strcmp(q1->name, q2->name);
|
return strcmp(q1->name, q2->name);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline struct gen_perf_query_field *
|
||||||
|
add_query_register(struct gen_perf_query_field_layout *layout,
|
||||||
|
enum gen_perf_query_field_type type,
|
||||||
|
uint16_t offset,
|
||||||
|
uint16_t size,
|
||||||
|
uint8_t index)
|
||||||
|
{
|
||||||
|
/* Align MI_RPC to 64bytes (HW requirement) & 64bit registers to 8bytes
|
||||||
|
* (shows up nicely in the debugger).
|
||||||
|
*/
|
||||||
|
if (type == GEN_PERF_QUERY_FIELD_TYPE_MI_RPC)
|
||||||
|
layout->size = align(layout->size, 64);
|
||||||
|
else if (size % 8 == 0)
|
||||||
|
layout->size = align(layout->size, 8);
|
||||||
|
|
||||||
|
layout->fields[layout->n_fields++] = (struct gen_perf_query_field) {
|
||||||
|
.mmio_offset = offset,
|
||||||
|
.location = layout->size,
|
||||||
|
.type = type,
|
||||||
|
.index = index,
|
||||||
|
.size = size,
|
||||||
|
};
|
||||||
|
layout->size += size;
|
||||||
|
|
||||||
|
return &layout->fields[layout->n_fields - 1];
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
gen_perf_init_query_fields(struct gen_perf_config *perf_cfg,
|
||||||
|
const struct gen_device_info *devinfo)
|
||||||
|
{
|
||||||
|
struct gen_perf_query_field_layout *layout = &perf_cfg->query_layout;
|
||||||
|
|
||||||
|
layout->n_fields = 0;
|
||||||
|
|
||||||
|
/* MI_RPC requires a 64byte alignment. */
|
||||||
|
layout->alignment = 64;
|
||||||
|
|
||||||
|
add_query_register(layout, GEN_PERF_QUERY_FIELD_TYPE_MI_RPC,
|
||||||
|
0, 256, 0);
|
||||||
|
|
||||||
|
if (devinfo->gen <= 11) {
|
||||||
|
struct gen_perf_query_field *field =
|
||||||
|
add_query_register(layout,
|
||||||
|
GEN_PERF_QUERY_FIELD_TYPE_SRM_PERFCNT,
|
||||||
|
PERF_CNT_1_DW0, 8, 0);
|
||||||
|
field->mask = PERF_CNT_VALUE_MASK;
|
||||||
|
|
||||||
|
field = add_query_register(layout,
|
||||||
|
GEN_PERF_QUERY_FIELD_TYPE_SRM_PERFCNT,
|
||||||
|
PERF_CNT_2_DW0, 8, 1);
|
||||||
|
field->mask = PERF_CNT_VALUE_MASK;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (devinfo->gen == 8 && !devinfo->is_cherryview) {
|
||||||
|
add_query_register(layout,
|
||||||
|
GEN_PERF_QUERY_FIELD_TYPE_SRM_RPSTAT,
|
||||||
|
GEN7_RPSTAT1, 4, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (devinfo->gen >= 9) {
|
||||||
|
add_query_register(layout,
|
||||||
|
GEN_PERF_QUERY_FIELD_TYPE_SRM_RPSTAT,
|
||||||
|
GEN9_RPSTAT0, 4, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Align the whole package to 64bytes so that 2 snapshots can be put
|
||||||
|
* together without extract alignment for the user.
|
||||||
|
*/
|
||||||
|
layout->size = align(layout->size, 64);
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
gen_perf_init_metrics(struct gen_perf_config *perf_cfg,
|
gen_perf_init_metrics(struct gen_perf_config *perf_cfg,
|
||||||
const struct gen_device_info *devinfo,
|
const struct gen_device_info *devinfo,
|
||||||
int drm_fd,
|
int drm_fd,
|
||||||
bool include_pipeline_statistics)
|
bool include_pipeline_statistics)
|
||||||
{
|
{
|
||||||
|
gen_perf_init_query_fields(perf_cfg, devinfo);
|
||||||
|
|
||||||
if (include_pipeline_statistics) {
|
if (include_pipeline_statistics) {
|
||||||
load_pipeline_statistic_metrics(perf_cfg, devinfo);
|
load_pipeline_statistic_metrics(perf_cfg, devinfo);
|
||||||
gen_perf_register_mdapi_statistic_query(perf_cfg, devinfo);
|
gen_perf_register_mdapi_statistic_query(perf_cfg, devinfo);
|
||||||
|
@@ -109,9 +109,9 @@ struct gen_pipeline_stat {
|
|||||||
* For Gen8+
|
* For Gen8+
|
||||||
* 1 timestamp, 1 clock, 36 A counters, 8 B counters and 8 C counters
|
* 1 timestamp, 1 clock, 36 A counters, 8 B counters and 8 C counters
|
||||||
*
|
*
|
||||||
* Plus 2 PERF_CNT registers.
|
* Plus 2 PERF_CNT registers and 1 RPSTAT register.
|
||||||
*/
|
*/
|
||||||
#define MAX_OA_REPORT_COUNTERS (62 + 2)
|
#define MAX_OA_REPORT_COUNTERS (62 + 2 + 1)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* When currently allocate only one page for pipeline statistics queries. Here
|
* When currently allocate only one page for pipeline statistics queries. Here
|
||||||
@@ -234,10 +234,54 @@ struct gen_perf_query_info {
|
|||||||
int b_offset;
|
int b_offset;
|
||||||
int c_offset;
|
int c_offset;
|
||||||
int perfcnt_offset;
|
int perfcnt_offset;
|
||||||
|
int rpstat_offset;
|
||||||
|
|
||||||
struct gen_perf_registers config;
|
struct gen_perf_registers config;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/* When not using the MI_RPC command, this structure describes the list of
|
||||||
|
* register offsets as well as their storage location so that they can be
|
||||||
|
* stored through a series of MI_SRM commands and accumulated with
|
||||||
|
* gen_perf_query_result_accumulate_snapshots().
|
||||||
|
*/
|
||||||
|
struct gen_perf_query_field_layout {
|
||||||
|
/* Alignment for the layout */
|
||||||
|
uint32_t alignment;
|
||||||
|
|
||||||
|
/* Size of the whole layout */
|
||||||
|
uint32_t size;
|
||||||
|
|
||||||
|
uint32_t n_fields;
|
||||||
|
|
||||||
|
struct gen_perf_query_field {
|
||||||
|
/* MMIO location of this register */
|
||||||
|
uint16_t mmio_offset;
|
||||||
|
|
||||||
|
/* Location of this register in the storage */
|
||||||
|
uint16_t location;
|
||||||
|
|
||||||
|
/* Type of register, for accumulation (see gen_perf_query_info:*_offset
|
||||||
|
* fields)
|
||||||
|
*/
|
||||||
|
enum gen_perf_query_field_type {
|
||||||
|
GEN_PERF_QUERY_FIELD_TYPE_MI_RPC,
|
||||||
|
GEN_PERF_QUERY_FIELD_TYPE_SRM_PERFCNT,
|
||||||
|
GEN_PERF_QUERY_FIELD_TYPE_SRM_RPSTAT
|
||||||
|
} type;
|
||||||
|
|
||||||
|
/* Index of register in the given type (for instance A31 or B2,
|
||||||
|
* etc...)
|
||||||
|
*/
|
||||||
|
uint8_t index;
|
||||||
|
|
||||||
|
/* 4, 8 or 256 */
|
||||||
|
uint16_t size;
|
||||||
|
|
||||||
|
/* If not 0, mask to apply to the register value. */
|
||||||
|
uint64_t mask;
|
||||||
|
} *fields;
|
||||||
|
};
|
||||||
|
|
||||||
struct gen_perf_query_counter_info {
|
struct gen_perf_query_counter_info {
|
||||||
struct gen_perf_query_counter *counter;
|
struct gen_perf_query_counter *counter;
|
||||||
|
|
||||||
@@ -269,6 +313,8 @@ struct gen_perf_config {
|
|||||||
struct gen_perf_query_counter_info *counter_infos;
|
struct gen_perf_query_counter_info *counter_infos;
|
||||||
int n_counters;
|
int n_counters;
|
||||||
|
|
||||||
|
struct gen_perf_query_field_layout query_layout;
|
||||||
|
|
||||||
/* Variables referenced in the XML meta data for OA performance
|
/* Variables referenced in the XML meta data for OA performance
|
||||||
* counters, e.g in the normalization equations.
|
* counters, e.g in the normalization equations.
|
||||||
*
|
*
|
||||||
@@ -387,6 +433,17 @@ void gen_perf_query_result_accumulate(struct gen_perf_query_result *result,
|
|||||||
const struct gen_perf_query_info *query,
|
const struct gen_perf_query_info *query,
|
||||||
const uint32_t *start,
|
const uint32_t *start,
|
||||||
const uint32_t *end);
|
const uint32_t *end);
|
||||||
|
|
||||||
|
/** Accumulate the delta between 2 snapshots of OA perf registers (layout
|
||||||
|
* should match description specified through gen_perf_query_register_layout).
|
||||||
|
*/
|
||||||
|
void gen_perf_query_result_accumulate_fields(struct gen_perf_query_result *result,
|
||||||
|
const struct gen_perf_query_info *query,
|
||||||
|
const struct gen_device_info *devinfo,
|
||||||
|
const void *start,
|
||||||
|
const void *end,
|
||||||
|
bool no_oa_accumulate);
|
||||||
|
|
||||||
void gen_perf_query_result_clear(struct gen_perf_query_result *result);
|
void gen_perf_query_result_clear(struct gen_perf_query_result *result);
|
||||||
|
|
||||||
static inline size_t
|
static inline size_t
|
||||||
|
@@ -743,6 +743,7 @@ def main():
|
|||||||
query->b_offset = query->a_offset + 45;
|
query->b_offset = query->a_offset + 45;
|
||||||
query->c_offset = query->b_offset + 8;
|
query->c_offset = query->b_offset + 8;
|
||||||
query->perfcnt_offset = query->c_offset + 8;
|
query->perfcnt_offset = query->c_offset + 8;
|
||||||
|
query->rpstat_offset = query->perfcnt_offset + 2;
|
||||||
"""))
|
"""))
|
||||||
else:
|
else:
|
||||||
c(textwrap.dedent("""\
|
c(textwrap.dedent("""\
|
||||||
@@ -754,6 +755,7 @@ def main():
|
|||||||
query->b_offset = query->a_offset + 36;
|
query->b_offset = query->a_offset + 36;
|
||||||
query->c_offset = query->b_offset + 8;
|
query->c_offset = query->b_offset + 8;
|
||||||
query->perfcnt_offset = query->c_offset + 8;
|
query->perfcnt_offset = query->c_offset + 8;
|
||||||
|
query->rpstat_offset = query->perfcnt_offset + 2;
|
||||||
"""))
|
"""))
|
||||||
|
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user