intel/perf: move initialization of pipeline statistics metrics to gen_perf
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
This commit is contained in:
@@ -61,6 +61,11 @@
|
|||||||
#define GEN9_RPSTAT0_PREV_GT_FREQ_SHIFT 0
|
#define GEN9_RPSTAT0_PREV_GT_FREQ_SHIFT 0
|
||||||
#define GEN9_RPSTAT0_PREV_GT_FREQ_MASK INTEL_MASK(8, 0)
|
#define GEN9_RPSTAT0_PREV_GT_FREQ_MASK INTEL_MASK(8, 0)
|
||||||
|
|
||||||
|
#define GEN6_SO_PRIM_STORAGE_NEEDED 0x2280
|
||||||
|
#define GEN7_SO_PRIM_STORAGE_NEEDED(n) (0x5240 + (n) * 8)
|
||||||
|
#define GEN6_SO_NUM_PRIMS_WRITTEN 0x2288
|
||||||
|
#define GEN7_SO_NUM_PRIMS_WRITTEN(n) (0x5200 + (n) * 8)
|
||||||
|
|
||||||
#define MAP_READ (1 << 0)
|
#define MAP_READ (1 << 0)
|
||||||
#define MAP_WRITE (1 << 1)
|
#define MAP_WRITE (1 << 1)
|
||||||
|
|
||||||
@@ -165,13 +170,32 @@ read_sysfs_drm_device_file_uint64(struct gen_perf_config *perf,
|
|||||||
return read_file_uint64(buf, value);
|
return read_file_uint64(buf, value);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline struct gen_perf_query_info *
|
||||||
|
append_query_info(struct gen_perf_config *perf, int max_counters)
|
||||||
|
{
|
||||||
|
struct gen_perf_query_info *query;
|
||||||
|
|
||||||
|
perf->queries = reralloc(perf, perf->queries,
|
||||||
|
struct gen_perf_query_info,
|
||||||
|
++perf->n_queries);
|
||||||
|
query = &perf->queries[perf->n_queries - 1];
|
||||||
|
memset(query, 0, sizeof(*query));
|
||||||
|
|
||||||
|
if (max_counters > 0) {
|
||||||
|
query->max_counters = max_counters;
|
||||||
|
query->counters =
|
||||||
|
rzalloc_array(perf, struct gen_perf_query_counter, max_counters);
|
||||||
|
}
|
||||||
|
|
||||||
|
return query;
|
||||||
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
register_oa_config(struct gen_perf_config *perf,
|
register_oa_config(struct gen_perf_config *perf,
|
||||||
const struct gen_perf_query_info *query,
|
const struct gen_perf_query_info *query,
|
||||||
uint64_t config_id)
|
uint64_t config_id)
|
||||||
{
|
{
|
||||||
struct gen_perf_query_info *registred_query =
|
struct gen_perf_query_info *registred_query = append_query_info(perf, 0);
|
||||||
gen_perf_query_append_query_info(perf, 0);
|
|
||||||
|
|
||||||
*registred_query = *query;
|
*registred_query = *query;
|
||||||
registred_query->oa_metrics_set_id = config_id;
|
registred_query->oa_metrics_set_id = config_id;
|
||||||
@@ -395,8 +419,123 @@ get_register_queries_function(const struct gen_device_info *devinfo)
|
|||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool
|
static inline void
|
||||||
gen_perf_load_oa_metrics(struct gen_perf_config *perf, int fd,
|
add_stat_reg(struct gen_perf_query_info *query, uint32_t reg,
|
||||||
|
uint32_t numerator, uint32_t denominator,
|
||||||
|
const char *name, const char *description)
|
||||||
|
{
|
||||||
|
struct gen_perf_query_counter *counter;
|
||||||
|
|
||||||
|
assert(query->n_counters < query->max_counters);
|
||||||
|
|
||||||
|
counter = &query->counters[query->n_counters];
|
||||||
|
counter->name = name;
|
||||||
|
counter->desc = description;
|
||||||
|
counter->type = GEN_PERF_COUNTER_TYPE_RAW;
|
||||||
|
counter->data_type = GEN_PERF_COUNTER_DATA_TYPE_UINT64;
|
||||||
|
counter->offset = sizeof(uint64_t) * query->n_counters;
|
||||||
|
counter->pipeline_stat.reg = reg;
|
||||||
|
counter->pipeline_stat.numerator = numerator;
|
||||||
|
counter->pipeline_stat.denominator = denominator;
|
||||||
|
|
||||||
|
query->n_counters++;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void
|
||||||
|
add_basic_stat_reg(struct gen_perf_query_info *query,
|
||||||
|
uint32_t reg, const char *name)
|
||||||
|
{
|
||||||
|
add_stat_reg(query, reg, 1, 1, name, name);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
load_pipeline_statistic_metrics(struct gen_perf_config *perf_cfg,
|
||||||
|
const struct gen_device_info *devinfo)
|
||||||
|
{
|
||||||
|
struct gen_perf_query_info *query =
|
||||||
|
append_query_info(perf_cfg, MAX_STAT_COUNTERS);
|
||||||
|
|
||||||
|
query->kind = GEN_PERF_QUERY_TYPE_PIPELINE;
|
||||||
|
query->name = "Pipeline Statistics Registers";
|
||||||
|
|
||||||
|
add_basic_stat_reg(query, IA_VERTICES_COUNT,
|
||||||
|
"N vertices submitted");
|
||||||
|
add_basic_stat_reg(query, IA_PRIMITIVES_COUNT,
|
||||||
|
"N primitives submitted");
|
||||||
|
add_basic_stat_reg(query, VS_INVOCATION_COUNT,
|
||||||
|
"N vertex shader invocations");
|
||||||
|
|
||||||
|
if (devinfo->gen == 6) {
|
||||||
|
add_stat_reg(query, GEN6_SO_PRIM_STORAGE_NEEDED, 1, 1,
|
||||||
|
"SO_PRIM_STORAGE_NEEDED",
|
||||||
|
"N geometry shader stream-out primitives (total)");
|
||||||
|
add_stat_reg(query, GEN6_SO_NUM_PRIMS_WRITTEN, 1, 1,
|
||||||
|
"SO_NUM_PRIMS_WRITTEN",
|
||||||
|
"N geometry shader stream-out primitives (written)");
|
||||||
|
} else {
|
||||||
|
add_stat_reg(query, GEN7_SO_PRIM_STORAGE_NEEDED(0), 1, 1,
|
||||||
|
"SO_PRIM_STORAGE_NEEDED (Stream 0)",
|
||||||
|
"N stream-out (stream 0) primitives (total)");
|
||||||
|
add_stat_reg(query, GEN7_SO_PRIM_STORAGE_NEEDED(1), 1, 1,
|
||||||
|
"SO_PRIM_STORAGE_NEEDED (Stream 1)",
|
||||||
|
"N stream-out (stream 1) primitives (total)");
|
||||||
|
add_stat_reg(query, GEN7_SO_PRIM_STORAGE_NEEDED(2), 1, 1,
|
||||||
|
"SO_PRIM_STORAGE_NEEDED (Stream 2)",
|
||||||
|
"N stream-out (stream 2) primitives (total)");
|
||||||
|
add_stat_reg(query, GEN7_SO_PRIM_STORAGE_NEEDED(3), 1, 1,
|
||||||
|
"SO_PRIM_STORAGE_NEEDED (Stream 3)",
|
||||||
|
"N stream-out (stream 3) primitives (total)");
|
||||||
|
add_stat_reg(query, GEN7_SO_NUM_PRIMS_WRITTEN(0), 1, 1,
|
||||||
|
"SO_NUM_PRIMS_WRITTEN (Stream 0)",
|
||||||
|
"N stream-out (stream 0) primitives (written)");
|
||||||
|
add_stat_reg(query, GEN7_SO_NUM_PRIMS_WRITTEN(1), 1, 1,
|
||||||
|
"SO_NUM_PRIMS_WRITTEN (Stream 1)",
|
||||||
|
"N stream-out (stream 1) primitives (written)");
|
||||||
|
add_stat_reg(query, GEN7_SO_NUM_PRIMS_WRITTEN(2), 1, 1,
|
||||||
|
"SO_NUM_PRIMS_WRITTEN (Stream 2)",
|
||||||
|
"N stream-out (stream 2) primitives (written)");
|
||||||
|
add_stat_reg(query, GEN7_SO_NUM_PRIMS_WRITTEN(3), 1, 1,
|
||||||
|
"SO_NUM_PRIMS_WRITTEN (Stream 3)",
|
||||||
|
"N stream-out (stream 3) primitives (written)");
|
||||||
|
}
|
||||||
|
|
||||||
|
add_basic_stat_reg(query, HS_INVOCATION_COUNT,
|
||||||
|
"N TCS shader invocations");
|
||||||
|
add_basic_stat_reg(query, DS_INVOCATION_COUNT,
|
||||||
|
"N TES shader invocations");
|
||||||
|
|
||||||
|
add_basic_stat_reg(query, GS_INVOCATION_COUNT,
|
||||||
|
"N geometry shader invocations");
|
||||||
|
add_basic_stat_reg(query, GS_PRIMITIVES_COUNT,
|
||||||
|
"N geometry shader primitives emitted");
|
||||||
|
|
||||||
|
add_basic_stat_reg(query, CL_INVOCATION_COUNT,
|
||||||
|
"N primitives entering clipping");
|
||||||
|
add_basic_stat_reg(query, CL_PRIMITIVES_COUNT,
|
||||||
|
"N primitives leaving clipping");
|
||||||
|
|
||||||
|
if (devinfo->is_haswell || devinfo->gen == 8) {
|
||||||
|
add_stat_reg(query, PS_INVOCATION_COUNT, 1, 4,
|
||||||
|
"N fragment shader invocations",
|
||||||
|
"N fragment shader invocations");
|
||||||
|
} else {
|
||||||
|
add_basic_stat_reg(query, PS_INVOCATION_COUNT,
|
||||||
|
"N fragment shader invocations");
|
||||||
|
}
|
||||||
|
|
||||||
|
add_basic_stat_reg(query, PS_DEPTH_COUNT,
|
||||||
|
"N z-pass fragments");
|
||||||
|
|
||||||
|
if (devinfo->gen >= 7) {
|
||||||
|
add_basic_stat_reg(query, CS_INVOCATION_COUNT,
|
||||||
|
"N compute shader invocations");
|
||||||
|
}
|
||||||
|
|
||||||
|
query->data_size = sizeof(uint64_t) * query->n_counters;
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
load_oa_metrics(struct gen_perf_config *perf, int fd,
|
||||||
const struct gen_device_info *devinfo)
|
const struct gen_device_info *devinfo)
|
||||||
{
|
{
|
||||||
perf_register_oa_queries_t oa_register = get_register_queries_function(devinfo);
|
perf_register_oa_queries_t oa_register = get_register_queries_function(devinfo);
|
||||||
@@ -582,6 +721,62 @@ gen_perf_query_result_clear(struct gen_perf_query_result *result)
|
|||||||
result->hw_id = 0xffffffff; /* invalid */
|
result->hw_id = 0xffffffff; /* invalid */
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
gen_perf_query_register_mdapi_statistic_query(struct gen_perf_config *perf_cfg,
|
||||||
|
const struct gen_device_info *devinfo)
|
||||||
|
{
|
||||||
|
if (!(devinfo->gen >= 7 && devinfo->gen <= 11))
|
||||||
|
return;
|
||||||
|
|
||||||
|
struct gen_perf_query_info *query =
|
||||||
|
append_query_info(perf_cfg, MAX_STAT_COUNTERS);
|
||||||
|
|
||||||
|
query->kind = GEN_PERF_QUERY_TYPE_PIPELINE;
|
||||||
|
query->name = "Intel_Raw_Pipeline_Statistics_Query";
|
||||||
|
|
||||||
|
/* The order has to match mdapi_pipeline_metrics. */
|
||||||
|
add_basic_stat_reg(query, IA_VERTICES_COUNT,
|
||||||
|
"N vertices submitted");
|
||||||
|
add_basic_stat_reg(query, IA_PRIMITIVES_COUNT,
|
||||||
|
"N primitives submitted");
|
||||||
|
add_basic_stat_reg(query, VS_INVOCATION_COUNT,
|
||||||
|
"N vertex shader invocations");
|
||||||
|
add_basic_stat_reg(query, GS_INVOCATION_COUNT,
|
||||||
|
"N geometry shader invocations");
|
||||||
|
add_basic_stat_reg(query, GS_PRIMITIVES_COUNT,
|
||||||
|
"N geometry shader primitives emitted");
|
||||||
|
add_basic_stat_reg(query, CL_INVOCATION_COUNT,
|
||||||
|
"N primitives entering clipping");
|
||||||
|
add_basic_stat_reg(query, CL_PRIMITIVES_COUNT,
|
||||||
|
"N primitives leaving clipping");
|
||||||
|
if (devinfo->is_haswell || devinfo->gen == 8) {
|
||||||
|
add_stat_reg(query, PS_INVOCATION_COUNT, 1, 4,
|
||||||
|
"N fragment shader invocations",
|
||||||
|
"N fragment shader invocations");
|
||||||
|
} else {
|
||||||
|
add_basic_stat_reg(query, PS_INVOCATION_COUNT,
|
||||||
|
"N fragment shader invocations");
|
||||||
|
}
|
||||||
|
add_basic_stat_reg(query, HS_INVOCATION_COUNT,
|
||||||
|
"N TCS shader invocations");
|
||||||
|
add_basic_stat_reg(query, DS_INVOCATION_COUNT,
|
||||||
|
"N TES shader invocations");
|
||||||
|
if (devinfo->gen >= 7) {
|
||||||
|
add_basic_stat_reg(query, CS_INVOCATION_COUNT,
|
||||||
|
"N compute shader invocations");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (devinfo->gen >= 10) {
|
||||||
|
/* Reuse existing CS invocation register until we can expose this new
|
||||||
|
* one.
|
||||||
|
*/
|
||||||
|
add_basic_stat_reg(query, CS_INVOCATION_COUNT,
|
||||||
|
"Reserved1");
|
||||||
|
}
|
||||||
|
|
||||||
|
query->data_size = sizeof(uint64_t) * query->n_counters;
|
||||||
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
fill_mdapi_perf_query_counter(struct gen_perf_query_info *query,
|
fill_mdapi_perf_query_counter(struct gen_perf_query_info *query,
|
||||||
const char *name,
|
const char *name,
|
||||||
@@ -618,9 +813,9 @@ fill_mdapi_perf_query_counter(struct gen_perf_query_info *query,
|
|||||||
sizeof(struct_name.field_name[0]), \
|
sizeof(struct_name.field_name[0]), \
|
||||||
GEN_PERF_COUNTER_DATA_TYPE_##type_name)
|
GEN_PERF_COUNTER_DATA_TYPE_##type_name)
|
||||||
|
|
||||||
void
|
static void
|
||||||
gen_perf_query_register_mdapi_oa_query(const struct gen_device_info *devinfo,
|
register_mdapi_oa_query(const struct gen_device_info *devinfo,
|
||||||
struct gen_perf_config *perf)
|
struct gen_perf_config *perf)
|
||||||
{
|
{
|
||||||
struct gen_perf_query_info *query = NULL;
|
struct gen_perf_query_info *query = NULL;
|
||||||
|
|
||||||
@@ -632,7 +827,7 @@ gen_perf_query_register_mdapi_oa_query(const struct gen_device_info *devinfo,
|
|||||||
|
|
||||||
switch (devinfo->gen) {
|
switch (devinfo->gen) {
|
||||||
case 7: {
|
case 7: {
|
||||||
query = gen_perf_query_append_query_info(perf, 1 + 45 + 16 + 7);
|
query = append_query_info(perf, 1 + 45 + 16 + 7);
|
||||||
query->oa_format = I915_OA_FORMAT_A45_B8_C8;
|
query->oa_format = I915_OA_FORMAT_A45_B8_C8;
|
||||||
|
|
||||||
struct gen7_mdapi_metrics metric_data;
|
struct gen7_mdapi_metrics metric_data;
|
||||||
@@ -657,7 +852,7 @@ gen_perf_query_register_mdapi_oa_query(const struct gen_device_info *devinfo,
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case 8: {
|
case 8: {
|
||||||
query = gen_perf_query_append_query_info(perf, 2 + 36 + 16 + 16);
|
query = append_query_info(perf, 2 + 36 + 16 + 16);
|
||||||
query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
|
query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
|
||||||
|
|
||||||
struct gen8_mdapi_metrics metric_data;
|
struct gen8_mdapi_metrics metric_data;
|
||||||
@@ -694,7 +889,7 @@ gen_perf_query_register_mdapi_oa_query(const struct gen_device_info *devinfo,
|
|||||||
case 9:
|
case 9:
|
||||||
case 10:
|
case 10:
|
||||||
case 11: {
|
case 11: {
|
||||||
query = gen_perf_query_append_query_info(perf, 2 + 36 + 16 + 16 + 16 + 2);
|
query = append_query_info(perf, 2 + 36 + 16 + 16 + 16 + 2);
|
||||||
query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
|
query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
|
||||||
|
|
||||||
struct gen9_mdapi_metrics metric_data;
|
struct gen9_mdapi_metrics metric_data;
|
||||||
@@ -756,62 +951,6 @@ gen_perf_query_register_mdapi_oa_query(const struct gen_device_info *devinfo,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
|
||||||
gen_perf_query_register_mdapi_statistic_query(const struct gen_device_info *devinfo,
|
|
||||||
struct gen_perf_config *perf)
|
|
||||||
{
|
|
||||||
if (!(devinfo->gen >= 7 && devinfo->gen <= 11))
|
|
||||||
return;
|
|
||||||
|
|
||||||
struct gen_perf_query_info *query =
|
|
||||||
gen_perf_query_append_query_info(perf, MAX_STAT_COUNTERS);
|
|
||||||
|
|
||||||
query->kind = GEN_PERF_QUERY_TYPE_PIPELINE;
|
|
||||||
query->name = "Intel_Raw_Pipeline_Statistics_Query";
|
|
||||||
|
|
||||||
/* The order has to match mdapi_pipeline_metrics. */
|
|
||||||
gen_perf_query_info_add_basic_stat_reg(query, IA_VERTICES_COUNT,
|
|
||||||
"N vertices submitted");
|
|
||||||
gen_perf_query_info_add_basic_stat_reg(query, IA_PRIMITIVES_COUNT,
|
|
||||||
"N primitives submitted");
|
|
||||||
gen_perf_query_info_add_basic_stat_reg(query, VS_INVOCATION_COUNT,
|
|
||||||
"N vertex shader invocations");
|
|
||||||
gen_perf_query_info_add_basic_stat_reg(query, GS_INVOCATION_COUNT,
|
|
||||||
"N geometry shader invocations");
|
|
||||||
gen_perf_query_info_add_basic_stat_reg(query, GS_PRIMITIVES_COUNT,
|
|
||||||
"N geometry shader primitives emitted");
|
|
||||||
gen_perf_query_info_add_basic_stat_reg(query, CL_INVOCATION_COUNT,
|
|
||||||
"N primitives entering clipping");
|
|
||||||
gen_perf_query_info_add_basic_stat_reg(query, CL_PRIMITIVES_COUNT,
|
|
||||||
"N primitives leaving clipping");
|
|
||||||
if (devinfo->is_haswell || devinfo->gen == 8) {
|
|
||||||
gen_perf_query_info_add_stat_reg(query, PS_INVOCATION_COUNT, 1, 4,
|
|
||||||
"N fragment shader invocations",
|
|
||||||
"N fragment shader invocations");
|
|
||||||
} else {
|
|
||||||
gen_perf_query_info_add_basic_stat_reg(query, PS_INVOCATION_COUNT,
|
|
||||||
"N fragment shader invocations");
|
|
||||||
}
|
|
||||||
gen_perf_query_info_add_basic_stat_reg(query, HS_INVOCATION_COUNT,
|
|
||||||
"N TCS shader invocations");
|
|
||||||
gen_perf_query_info_add_basic_stat_reg(query, DS_INVOCATION_COUNT,
|
|
||||||
"N TES shader invocations");
|
|
||||||
if (devinfo->gen >= 7) {
|
|
||||||
gen_perf_query_info_add_basic_stat_reg(query, CS_INVOCATION_COUNT,
|
|
||||||
"N compute shader invocations");
|
|
||||||
}
|
|
||||||
|
|
||||||
if (devinfo->gen >= 10) {
|
|
||||||
/* Reuse existing CS invocation register until we can expose this new
|
|
||||||
* one.
|
|
||||||
*/
|
|
||||||
gen_perf_query_info_add_basic_stat_reg(query, CS_INVOCATION_COUNT,
|
|
||||||
"Reserved1");
|
|
||||||
}
|
|
||||||
|
|
||||||
query->data_size = sizeof(uint64_t) * query->n_counters;
|
|
||||||
}
|
|
||||||
|
|
||||||
uint64_t
|
uint64_t
|
||||||
gen_perf_query_get_metric_id(struct gen_perf_config *perf,
|
gen_perf_query_get_metric_id(struct gen_perf_config *perf,
|
||||||
const struct gen_perf_query_info *query)
|
const struct gen_perf_query_info *query)
|
||||||
@@ -1011,6 +1150,17 @@ gen_perf_dec_n_users(struct gen_perf_context *perf_ctx)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
gen_perf_init_metrics(struct gen_perf_config *perf_cfg,
|
||||||
|
const struct gen_device_info *devinfo,
|
||||||
|
int drm_fd)
|
||||||
|
{
|
||||||
|
load_pipeline_statistic_metrics(perf_cfg, devinfo);
|
||||||
|
gen_perf_query_register_mdapi_statistic_query(perf_cfg, devinfo);
|
||||||
|
if (load_oa_metrics(perf_cfg, drm_fd, devinfo))
|
||||||
|
register_mdapi_oa_query(devinfo, perf_cfg);
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
gen_perf_init_context(struct gen_perf_context *perf_ctx,
|
gen_perf_init_context(struct gen_perf_context *perf_ctx,
|
||||||
struct gen_perf_config *perf_cfg,
|
struct gen_perf_config *perf_cfg,
|
||||||
|
@@ -505,6 +505,9 @@ struct gen_perf_context {
|
|||||||
int n_query_instances;
|
int n_query_instances;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
void gen_perf_init_metrics(struct gen_perf_config *perf_cfg,
|
||||||
|
const struct gen_device_info *devinfo,
|
||||||
|
int drm_fd);
|
||||||
void gen_perf_init_context(struct gen_perf_context *perf_ctx,
|
void gen_perf_init_context(struct gen_perf_context *perf_ctx,
|
||||||
struct gen_perf_config *perf_cfg,
|
struct gen_perf_config *perf_cfg,
|
||||||
void * ctx, /* driver context (eg, brw_context) */
|
void * ctx, /* driver context (eg, brw_context) */
|
||||||
@@ -532,58 +535,6 @@ gen_perf_query_counter_get_size(const struct gen_perf_query_counter *counter)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline struct gen_perf_query_info *
|
|
||||||
gen_perf_query_append_query_info(struct gen_perf_config *perf, int max_counters)
|
|
||||||
{
|
|
||||||
struct gen_perf_query_info *query;
|
|
||||||
|
|
||||||
perf->queries = reralloc(perf, perf->queries,
|
|
||||||
struct gen_perf_query_info,
|
|
||||||
++perf->n_queries);
|
|
||||||
query = &perf->queries[perf->n_queries - 1];
|
|
||||||
memset(query, 0, sizeof(*query));
|
|
||||||
|
|
||||||
if (max_counters > 0) {
|
|
||||||
query->max_counters = max_counters;
|
|
||||||
query->counters =
|
|
||||||
rzalloc_array(perf, struct gen_perf_query_counter, max_counters);
|
|
||||||
}
|
|
||||||
|
|
||||||
return query;
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void
|
|
||||||
gen_perf_query_info_add_stat_reg(struct gen_perf_query_info *query,
|
|
||||||
uint32_t reg,
|
|
||||||
uint32_t numerator,
|
|
||||||
uint32_t denominator,
|
|
||||||
const char *name,
|
|
||||||
const char *description)
|
|
||||||
{
|
|
||||||
struct gen_perf_query_counter *counter;
|
|
||||||
|
|
||||||
assert(query->n_counters < query->max_counters);
|
|
||||||
|
|
||||||
counter = &query->counters[query->n_counters];
|
|
||||||
counter->name = name;
|
|
||||||
counter->desc = description;
|
|
||||||
counter->type = GEN_PERF_COUNTER_TYPE_RAW;
|
|
||||||
counter->data_type = GEN_PERF_COUNTER_DATA_TYPE_UINT64;
|
|
||||||
counter->offset = sizeof(uint64_t) * query->n_counters;
|
|
||||||
counter->pipeline_stat.reg = reg;
|
|
||||||
counter->pipeline_stat.numerator = numerator;
|
|
||||||
counter->pipeline_stat.denominator = denominator;
|
|
||||||
|
|
||||||
query->n_counters++;
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void
|
|
||||||
gen_perf_query_info_add_basic_stat_reg(struct gen_perf_query_info *query,
|
|
||||||
uint32_t reg, const char *name)
|
|
||||||
{
|
|
||||||
gen_perf_query_info_add_stat_reg(query, reg, 1, 1, name, name);
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline struct gen_perf_config *
|
static inline struct gen_perf_config *
|
||||||
gen_perf_new(void *ctx)
|
gen_perf_new(void *ctx)
|
||||||
{
|
{
|
||||||
@@ -591,8 +542,6 @@ gen_perf_new(void *ctx)
|
|||||||
return perf;
|
return perf;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool gen_perf_load_oa_metrics(struct gen_perf_config *perf, int fd,
|
|
||||||
const struct gen_device_info *devinfo);
|
|
||||||
bool gen_perf_load_metric_id(struct gen_perf_config *perf, const char *guid,
|
bool gen_perf_load_metric_id(struct gen_perf_config *perf, const char *guid,
|
||||||
uint64_t *metric_id);
|
uint64_t *metric_id);
|
||||||
|
|
||||||
@@ -605,10 +554,6 @@ void gen_perf_query_result_accumulate(struct gen_perf_query_result *result,
|
|||||||
const uint32_t *start,
|
const uint32_t *start,
|
||||||
const uint32_t *end);
|
const uint32_t *end);
|
||||||
void gen_perf_query_result_clear(struct gen_perf_query_result *result);
|
void gen_perf_query_result_clear(struct gen_perf_query_result *result);
|
||||||
void gen_perf_query_register_mdapi_statistic_query(const struct gen_device_info *devinfo,
|
|
||||||
struct gen_perf_config *perf);
|
|
||||||
void gen_perf_query_register_mdapi_oa_query(const struct gen_device_info *devinfo,
|
|
||||||
struct gen_perf_config *perf);
|
|
||||||
uint64_t gen_perf_query_get_metric_id(struct gen_perf_config *perf,
|
uint64_t gen_perf_query_get_metric_id(struct gen_perf_config *perf,
|
||||||
const struct gen_perf_query_info *query);
|
const struct gen_perf_query_info *query);
|
||||||
struct oa_sample_buf * gen_perf_get_free_sample_buf(struct gen_perf_context *perf);
|
struct oa_sample_buf * gen_perf_get_free_sample_buf(struct gen_perf_context *perf);
|
||||||
|
@@ -420,94 +420,6 @@ brw_delete_perf_query(struct gl_context *ctx,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/******************************************************************************/
|
/******************************************************************************/
|
||||||
|
|
||||||
static void
|
|
||||||
init_pipeline_statistic_query_registers(struct brw_context *brw)
|
|
||||||
{
|
|
||||||
const struct gen_device_info *devinfo = &brw->screen->devinfo;
|
|
||||||
struct gen_perf_config *perf = brw->perf_ctx.perf;
|
|
||||||
struct gen_perf_query_info *query =
|
|
||||||
gen_perf_query_append_query_info(perf, MAX_STAT_COUNTERS);
|
|
||||||
|
|
||||||
query->kind = GEN_PERF_QUERY_TYPE_PIPELINE;
|
|
||||||
query->name = "Pipeline Statistics Registers";
|
|
||||||
|
|
||||||
gen_perf_query_info_add_basic_stat_reg(query, IA_VERTICES_COUNT,
|
|
||||||
"N vertices submitted");
|
|
||||||
gen_perf_query_info_add_basic_stat_reg(query, IA_PRIMITIVES_COUNT,
|
|
||||||
"N primitives submitted");
|
|
||||||
gen_perf_query_info_add_basic_stat_reg(query, VS_INVOCATION_COUNT,
|
|
||||||
"N vertex shader invocations");
|
|
||||||
|
|
||||||
if (devinfo->gen == 6) {
|
|
||||||
gen_perf_query_info_add_stat_reg(query, GEN6_SO_PRIM_STORAGE_NEEDED, 1, 1,
|
|
||||||
"SO_PRIM_STORAGE_NEEDED",
|
|
||||||
"N geometry shader stream-out primitives (total)");
|
|
||||||
gen_perf_query_info_add_stat_reg(query, GEN6_SO_NUM_PRIMS_WRITTEN, 1, 1,
|
|
||||||
"SO_NUM_PRIMS_WRITTEN",
|
|
||||||
"N geometry shader stream-out primitives (written)");
|
|
||||||
} else {
|
|
||||||
gen_perf_query_info_add_stat_reg(query, GEN7_SO_PRIM_STORAGE_NEEDED(0), 1, 1,
|
|
||||||
"SO_PRIM_STORAGE_NEEDED (Stream 0)",
|
|
||||||
"N stream-out (stream 0) primitives (total)");
|
|
||||||
gen_perf_query_info_add_stat_reg(query, GEN7_SO_PRIM_STORAGE_NEEDED(1), 1, 1,
|
|
||||||
"SO_PRIM_STORAGE_NEEDED (Stream 1)",
|
|
||||||
"N stream-out (stream 1) primitives (total)");
|
|
||||||
gen_perf_query_info_add_stat_reg(query, GEN7_SO_PRIM_STORAGE_NEEDED(2), 1, 1,
|
|
||||||
"SO_PRIM_STORAGE_NEEDED (Stream 2)",
|
|
||||||
"N stream-out (stream 2) primitives (total)");
|
|
||||||
gen_perf_query_info_add_stat_reg(query, GEN7_SO_PRIM_STORAGE_NEEDED(3), 1, 1,
|
|
||||||
"SO_PRIM_STORAGE_NEEDED (Stream 3)",
|
|
||||||
"N stream-out (stream 3) primitives (total)");
|
|
||||||
gen_perf_query_info_add_stat_reg(query, GEN7_SO_NUM_PRIMS_WRITTEN(0), 1, 1,
|
|
||||||
"SO_NUM_PRIMS_WRITTEN (Stream 0)",
|
|
||||||
"N stream-out (stream 0) primitives (written)");
|
|
||||||
gen_perf_query_info_add_stat_reg(query, GEN7_SO_NUM_PRIMS_WRITTEN(1), 1, 1,
|
|
||||||
"SO_NUM_PRIMS_WRITTEN (Stream 1)",
|
|
||||||
"N stream-out (stream 1) primitives (written)");
|
|
||||||
gen_perf_query_info_add_stat_reg(query, GEN7_SO_NUM_PRIMS_WRITTEN(2), 1, 1,
|
|
||||||
"SO_NUM_PRIMS_WRITTEN (Stream 2)",
|
|
||||||
"N stream-out (stream 2) primitives (written)");
|
|
||||||
gen_perf_query_info_add_stat_reg(query, GEN7_SO_NUM_PRIMS_WRITTEN(3), 1, 1,
|
|
||||||
"SO_NUM_PRIMS_WRITTEN (Stream 3)",
|
|
||||||
"N stream-out (stream 3) primitives (written)");
|
|
||||||
}
|
|
||||||
|
|
||||||
gen_perf_query_info_add_basic_stat_reg(query, HS_INVOCATION_COUNT,
|
|
||||||
"N TCS shader invocations");
|
|
||||||
gen_perf_query_info_add_basic_stat_reg(query, DS_INVOCATION_COUNT,
|
|
||||||
"N TES shader invocations");
|
|
||||||
|
|
||||||
gen_perf_query_info_add_basic_stat_reg(query, GS_INVOCATION_COUNT,
|
|
||||||
"N geometry shader invocations");
|
|
||||||
gen_perf_query_info_add_basic_stat_reg(query, GS_PRIMITIVES_COUNT,
|
|
||||||
"N geometry shader primitives emitted");
|
|
||||||
|
|
||||||
gen_perf_query_info_add_basic_stat_reg(query, CL_INVOCATION_COUNT,
|
|
||||||
"N primitives entering clipping");
|
|
||||||
gen_perf_query_info_add_basic_stat_reg(query, CL_PRIMITIVES_COUNT,
|
|
||||||
"N primitives leaving clipping");
|
|
||||||
|
|
||||||
if (devinfo->is_haswell || devinfo->gen == 8) {
|
|
||||||
gen_perf_query_info_add_stat_reg(query, PS_INVOCATION_COUNT, 1, 4,
|
|
||||||
"N fragment shader invocations",
|
|
||||||
"N fragment shader invocations");
|
|
||||||
} else {
|
|
||||||
gen_perf_query_info_add_basic_stat_reg(query, PS_INVOCATION_COUNT,
|
|
||||||
"N fragment shader invocations");
|
|
||||||
}
|
|
||||||
|
|
||||||
gen_perf_query_info_add_basic_stat_reg(query, PS_DEPTH_COUNT,
|
|
||||||
"N z-pass fragments");
|
|
||||||
|
|
||||||
if (devinfo->gen >= 7) {
|
|
||||||
gen_perf_query_info_add_basic_stat_reg(query, CS_INVOCATION_COUNT,
|
|
||||||
"N compute shader invocations");
|
|
||||||
}
|
|
||||||
|
|
||||||
query->data_size = sizeof(uint64_t) * query->n_counters;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* gen_device_info will have incorrect default topology values for unsupported kernels.
|
/* gen_device_info will have incorrect default topology values for unsupported kernels.
|
||||||
* verify kernel support to ensure OA metrics are accurate.
|
* verify kernel support to ensure OA metrics are accurate.
|
||||||
*/
|
*/
|
||||||
@@ -620,12 +532,10 @@ brw_init_perf_query_info(struct gl_context *ctx)
|
|||||||
gen_perf_init_context(perf_ctx, perf_cfg, brw, brw->bufmgr, devinfo,
|
gen_perf_init_context(perf_ctx, perf_cfg, brw, brw->bufmgr, devinfo,
|
||||||
brw->hw_ctx, brw->screen->driScrnPriv->fd);
|
brw->hw_ctx, brw->screen->driScrnPriv->fd);
|
||||||
|
|
||||||
init_pipeline_statistic_query_registers(brw);
|
if (!oa_metrics_kernel_support(perf_ctx->drm_fd, devinfo))
|
||||||
gen_perf_query_register_mdapi_statistic_query(devinfo, perf_cfg);
|
return 0;
|
||||||
|
|
||||||
if ((oa_metrics_kernel_support(perf_ctx->drm_fd, devinfo)) &&
|
gen_perf_init_metrics(perf_cfg, devinfo, perf_ctx->drm_fd);
|
||||||
(gen_perf_load_oa_metrics(perf_cfg, perf_ctx->drm_fd, devinfo)))
|
|
||||||
gen_perf_query_register_mdapi_oa_query(devinfo, perf_cfg);
|
|
||||||
|
|
||||||
return perf_cfg->n_queries;
|
return perf_cfg->n_queries;
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user