intel/perf: move perf-related state into gen_perf_context
To move more operations into intel/perf, several state items are needed. Save references to that state in the perf_ctxt, rather than passing them in for every operation. This commit includes an initializer for gen_perf_context, to set those references and also encapsulate the initialization of the sample buffer state. Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
This commit is contained in:
@@ -991,3 +991,39 @@ gen_perf_dec_n_users(struct gen_perf_context *perf_ctx)
|
|||||||
DBG("WARNING: Error disabling gen perf stream: %m\n");
|
DBG("WARNING: Error disabling gen perf stream: %m\n");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
gen_perf_init_context(struct gen_perf_context *perf_ctx,
|
||||||
|
struct gen_perf_config *perf_cfg,
|
||||||
|
void * ctx, /* driver context (eg, brw_context) */
|
||||||
|
void * bufmgr, /* eg brw_bufmgr */
|
||||||
|
const struct gen_device_info *devinfo,
|
||||||
|
uint32_t hw_ctx,
|
||||||
|
int drm_fd)
|
||||||
|
{
|
||||||
|
perf_ctx->perf = perf_cfg;
|
||||||
|
perf_ctx->ctx = ctx;
|
||||||
|
perf_ctx->bufmgr = bufmgr;
|
||||||
|
perf_ctx->drm_fd = drm_fd;
|
||||||
|
perf_ctx->hw_ctx = hw_ctx;
|
||||||
|
perf_ctx->devinfo = devinfo;
|
||||||
|
|
||||||
|
perf_ctx->unaccumulated =
|
||||||
|
ralloc_array(ctx, struct gen_perf_query_object *, 2);
|
||||||
|
perf_ctx->unaccumulated_elements = 0;
|
||||||
|
perf_ctx->unaccumulated_array_size = 2;
|
||||||
|
|
||||||
|
exec_list_make_empty(&perf_ctx->sample_buffers);
|
||||||
|
exec_list_make_empty(&perf_ctx->free_sample_buffers);
|
||||||
|
|
||||||
|
/* It's convenient to guarantee that this linked list of sample
|
||||||
|
* buffers is never empty so we add an empty head so when we
|
||||||
|
* Begin an OA query we can always take a reference on a buffer
|
||||||
|
* in this list.
|
||||||
|
*/
|
||||||
|
struct oa_sample_buf *buf = gen_perf_get_free_sample_buf(perf_ctx);
|
||||||
|
exec_list_push_head(&perf_ctx->sample_buffers, &buf->link);
|
||||||
|
|
||||||
|
perf_ctx->oa_stream_fd = -1;
|
||||||
|
perf_ctx->next_query_start_report_id = 1000;
|
||||||
|
}
|
||||||
|
@@ -428,6 +428,13 @@ struct gen_perf_query_object
|
|||||||
struct gen_perf_context {
|
struct gen_perf_context {
|
||||||
struct gen_perf_config *perf;
|
struct gen_perf_config *perf;
|
||||||
|
|
||||||
|
void * ctx; /* driver context (eg, brw_context) */
|
||||||
|
void * bufmgr;
|
||||||
|
const struct gen_device_info *devinfo;
|
||||||
|
|
||||||
|
uint32_t hw_ctx;
|
||||||
|
int drm_fd;
|
||||||
|
|
||||||
/* The i915 perf stream we open to setup + enable the OA counters */
|
/* The i915 perf stream we open to setup + enable the OA counters */
|
||||||
int oa_stream_fd;
|
int oa_stream_fd;
|
||||||
|
|
||||||
@@ -483,6 +490,14 @@ struct gen_perf_context {
|
|||||||
int n_query_instances;
|
int n_query_instances;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
void gen_perf_init_context(struct gen_perf_context *perf_ctx,
|
||||||
|
struct gen_perf_config *perf_cfg,
|
||||||
|
void * ctx, /* driver context (eg, brw_context) */
|
||||||
|
void * bufmgr, /* eg brw_bufmgr */
|
||||||
|
const struct gen_device_info *devinfo,
|
||||||
|
uint32_t hw_ctx,
|
||||||
|
int drm_fd);
|
||||||
|
|
||||||
static inline size_t
|
static inline size_t
|
||||||
gen_perf_query_counter_get_size(const struct gen_perf_query_counter *counter)
|
gen_perf_query_counter_get_size(const struct gen_perf_query_counter *counter)
|
||||||
{
|
{
|
||||||
|
@@ -706,7 +706,7 @@ brw_begin_perf_query(struct gl_context *ctx,
|
|||||||
* This is our Begin synchronization point to drain current work on the
|
* This is our Begin synchronization point to drain current work on the
|
||||||
* GPU before we capture our first counter snapshot...
|
* GPU before we capture our first counter snapshot...
|
||||||
*/
|
*/
|
||||||
perf_cfg->vtbl.emit_mi_flush(brw);
|
perf_cfg->vtbl.emit_mi_flush(perf_ctx->ctx);
|
||||||
|
|
||||||
switch (query->kind) {
|
switch (query->kind) {
|
||||||
case GEN_PERF_QUERY_TYPE_OA:
|
case GEN_PERF_QUERY_TYPE_OA:
|
||||||
@@ -734,8 +734,7 @@ brw_begin_perf_query(struct gl_context *ctx,
|
|||||||
/* If the OA counters aren't already on, enable them. */
|
/* If the OA counters aren't already on, enable them. */
|
||||||
|
|
||||||
if (perf_ctx->oa_stream_fd == -1) {
|
if (perf_ctx->oa_stream_fd == -1) {
|
||||||
__DRIscreen *screen = brw->screen->driScrnPriv;
|
const struct gen_device_info *devinfo = perf_ctx->devinfo;
|
||||||
const struct gen_device_info *devinfo = &brw->screen->devinfo;
|
|
||||||
|
|
||||||
/* The period_exponent gives a sampling period as follows:
|
/* The period_exponent gives a sampling period as follows:
|
||||||
* sample_period = timestamp_period * 2^(period_exponent + 1)
|
* sample_period = timestamp_period * 2^(period_exponent + 1)
|
||||||
@@ -791,7 +790,8 @@ brw_begin_perf_query(struct gl_context *ctx,
|
|||||||
prev_sample_period / 1000000ul);
|
prev_sample_period / 1000000ul);
|
||||||
|
|
||||||
if (!gen_perf_open(perf_ctx, metric_id, query->oa_format,
|
if (!gen_perf_open(perf_ctx, metric_id, query->oa_format,
|
||||||
period_exponent, screen->fd, brw->hw_ctx))
|
period_exponent, perf_ctx->drm_fd,
|
||||||
|
perf_ctx->hw_ctx))
|
||||||
return false;
|
return false;
|
||||||
} else {
|
} else {
|
||||||
assert(perf_ctx->current_oa_metrics_set_id == metric_id &&
|
assert(perf_ctx->current_oa_metrics_set_id == metric_id &&
|
||||||
@@ -808,15 +808,14 @@ brw_begin_perf_query(struct gl_context *ctx,
|
|||||||
obj->oa.bo = NULL;
|
obj->oa.bo = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
obj->oa.bo =
|
obj->oa.bo = perf_cfg->vtbl.bo_alloc(perf_ctx->bufmgr,
|
||||||
brw->perf_ctx.perf->vtbl.bo_alloc(brw->bufmgr,
|
|
||||||
"perf. query OA MI_RPC bo",
|
"perf. query OA MI_RPC bo",
|
||||||
MI_RPC_BO_SIZE);
|
MI_RPC_BO_SIZE);
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
/* Pre-filling the BO helps debug whether writes landed. */
|
/* Pre-filling the BO helps debug whether writes landed. */
|
||||||
void *map = brw->perf_ctx.perf->vtbl.bo_map(brw, obj->oa.bo, MAP_WRITE);
|
void *map = perf_cfg->vtbl.bo_map(perf_ctx->ctx, obj->oa.bo, MAP_WRITE);
|
||||||
memset(map, 0x80, MI_RPC_BO_SIZE);
|
memset(map, 0x80, MI_RPC_BO_SIZE);
|
||||||
brw->perf_ctx.perf->vtbl.bo_unmap(obj->oa.bo);
|
perf_cfg->vtbl.bo_unmap(obj->oa.bo);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
obj->oa.begin_report_id = perf_ctx->next_query_start_report_id;
|
obj->oa.begin_report_id = perf_ctx->next_query_start_report_id;
|
||||||
@@ -828,12 +827,12 @@ brw_begin_perf_query(struct gl_context *ctx,
|
|||||||
* scheduler to load a new request into the hardware. This is manifested in
|
* scheduler to load a new request into the hardware. This is manifested in
|
||||||
* tools like frameretrace by spikes in the "GPU Core Clocks" counter.
|
* tools like frameretrace by spikes in the "GPU Core Clocks" counter.
|
||||||
*/
|
*/
|
||||||
perf_cfg->vtbl.batchbuffer_flush(brw, __FILE__, __LINE__);
|
perf_cfg->vtbl.batchbuffer_flush(perf_ctx->ctx, __FILE__, __LINE__);
|
||||||
|
|
||||||
/* Take a starting OA counter snapshot. */
|
/* Take a starting OA counter snapshot. */
|
||||||
perf_cfg->vtbl.emit_mi_report_perf_count(brw, obj->oa.bo, 0,
|
perf_cfg->vtbl.emit_mi_report_perf_count(perf_ctx->ctx, obj->oa.bo, 0,
|
||||||
obj->oa.begin_report_id);
|
obj->oa.begin_report_id);
|
||||||
perf_cfg->vtbl.capture_frequency_stat_register(brw, obj->oa.bo,
|
perf_cfg->vtbl.capture_frequency_stat_register(perf_ctx->ctx, obj->oa.bo,
|
||||||
MI_FREQ_START_OFFSET_BYTES);
|
MI_FREQ_START_OFFSET_BYTES);
|
||||||
|
|
||||||
++perf_ctx->n_active_oa_queries;
|
++perf_ctx->n_active_oa_queries;
|
||||||
@@ -858,23 +857,23 @@ brw_begin_perf_query(struct gl_context *ctx,
|
|||||||
gen_perf_query_result_clear(&obj->oa.result);
|
gen_perf_query_result_clear(&obj->oa.result);
|
||||||
obj->oa.results_accumulated = false;
|
obj->oa.results_accumulated = false;
|
||||||
|
|
||||||
add_to_unaccumulated_query_list(brw, obj);
|
add_to_unaccumulated_query_list(perf_ctx->ctx, obj);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
case GEN_PERF_QUERY_TYPE_PIPELINE:
|
case GEN_PERF_QUERY_TYPE_PIPELINE:
|
||||||
if (obj->pipeline_stats.bo) {
|
if (obj->pipeline_stats.bo) {
|
||||||
brw->perf_ctx.perf->vtbl.bo_unreference(obj->pipeline_stats.bo);
|
perf_cfg->vtbl.bo_unreference(obj->pipeline_stats.bo);
|
||||||
obj->pipeline_stats.bo = NULL;
|
obj->pipeline_stats.bo = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
obj->pipeline_stats.bo =
|
obj->pipeline_stats.bo =
|
||||||
brw->perf_ctx.perf->vtbl.bo_alloc(brw->bufmgr,
|
perf_cfg->vtbl.bo_alloc(perf_ctx->bufmgr,
|
||||||
"perf. query pipeline stats bo",
|
"perf. query pipeline stats bo",
|
||||||
STATS_BO_SIZE);
|
STATS_BO_SIZE);
|
||||||
|
|
||||||
/* Take starting snapshots. */
|
/* Take starting snapshots. */
|
||||||
gen_perf_snapshot_statistics_registers(brw, perf_cfg, obj, 0);
|
gen_perf_snapshot_statistics_registers(perf_ctx->ctx, perf_cfg, obj, 0);
|
||||||
|
|
||||||
++perf_ctx->n_active_pipeline_stats_queries;
|
++perf_ctx->n_active_pipeline_stats_queries;
|
||||||
break;
|
break;
|
||||||
@@ -911,7 +910,7 @@ brw_end_perf_query(struct gl_context *ctx,
|
|||||||
* For more details see comment in brw_begin_perf_query for
|
* For more details see comment in brw_begin_perf_query for
|
||||||
* corresponding flush.
|
* corresponding flush.
|
||||||
*/
|
*/
|
||||||
perf_cfg->vtbl.emit_mi_flush(brw);
|
perf_cfg->vtbl.emit_mi_flush(perf_ctx->ctx);
|
||||||
|
|
||||||
switch (obj->queryinfo->kind) {
|
switch (obj->queryinfo->kind) {
|
||||||
case GEN_PERF_QUERY_TYPE_OA:
|
case GEN_PERF_QUERY_TYPE_OA:
|
||||||
@@ -924,9 +923,9 @@ brw_end_perf_query(struct gl_context *ctx,
|
|||||||
*/
|
*/
|
||||||
if (!obj->oa.results_accumulated) {
|
if (!obj->oa.results_accumulated) {
|
||||||
/* Take an ending OA counter snapshot. */
|
/* Take an ending OA counter snapshot. */
|
||||||
perf_cfg->vtbl.capture_frequency_stat_register(brw, obj->oa.bo,
|
perf_cfg->vtbl.capture_frequency_stat_register(perf_ctx->ctx, obj->oa.bo,
|
||||||
MI_FREQ_END_OFFSET_BYTES);
|
MI_FREQ_END_OFFSET_BYTES);
|
||||||
brw->vtbl.emit_mi_report_perf_count(brw, obj->oa.bo,
|
brw->vtbl.emit_mi_report_perf_count(perf_ctx->ctx, obj->oa.bo,
|
||||||
MI_RPC_BO_END_OFFSET_BYTES,
|
MI_RPC_BO_END_OFFSET_BYTES,
|
||||||
obj->oa.begin_report_id + 1);
|
obj->oa.begin_report_id + 1);
|
||||||
}
|
}
|
||||||
@@ -1127,7 +1126,7 @@ get_pipeline_stats_data(struct brw_context *brw,
|
|||||||
int n_counters = obj->queryinfo->n_counters;
|
int n_counters = obj->queryinfo->n_counters;
|
||||||
uint8_t *p = data;
|
uint8_t *p = data;
|
||||||
|
|
||||||
uint64_t *start = perf_cfg->vtbl.bo_map(brw, obj->pipeline_stats.bo, MAP_READ);
|
uint64_t *start = perf_cfg->vtbl.bo_map(perf_ctx->ctx, obj->pipeline_stats.bo, MAP_READ);
|
||||||
uint64_t *end = start + (STATS_BO_END_OFFSET_BYTES / sizeof(uint64_t));
|
uint64_t *end = start + (STATS_BO_END_OFFSET_BYTES / sizeof(uint64_t));
|
||||||
|
|
||||||
for (int i = 0; i < n_counters; i++) {
|
for (int i = 0; i < n_counters; i++) {
|
||||||
@@ -1471,7 +1470,6 @@ brw_init_perf_query_info(struct gl_context *ctx)
|
|||||||
{
|
{
|
||||||
struct brw_context *brw = brw_context(ctx);
|
struct brw_context *brw = brw_context(ctx);
|
||||||
const struct gen_device_info *devinfo = &brw->screen->devinfo;
|
const struct gen_device_info *devinfo = &brw->screen->devinfo;
|
||||||
__DRIscreen *screen = brw->screen->driScrnPriv;
|
|
||||||
|
|
||||||
struct gen_perf_context *perf_ctx = &brw->perf_ctx;
|
struct gen_perf_context *perf_ctx = &brw->perf_ctx;
|
||||||
if (perf_ctx->perf)
|
if (perf_ctx->perf)
|
||||||
@@ -1493,34 +1491,16 @@ brw_init_perf_query_info(struct gl_context *ctx)
|
|||||||
perf_cfg->vtbl.store_register_mem64 =
|
perf_cfg->vtbl.store_register_mem64 =
|
||||||
(store_register_mem64_t) brw_store_register_mem64;
|
(store_register_mem64_t) brw_store_register_mem64;
|
||||||
|
|
||||||
|
gen_perf_init_context(perf_ctx, perf_cfg, brw, brw->bufmgr, devinfo,
|
||||||
|
brw->hw_ctx, brw->screen->driScrnPriv->fd);
|
||||||
|
|
||||||
init_pipeline_statistic_query_registers(brw);
|
init_pipeline_statistic_query_registers(brw);
|
||||||
gen_perf_query_register_mdapi_statistic_query(&brw->screen->devinfo,
|
gen_perf_query_register_mdapi_statistic_query(devinfo, perf_cfg);
|
||||||
brw->perf_ctx.perf);
|
|
||||||
|
|
||||||
if ((oa_metrics_kernel_support(screen->fd, devinfo)) &&
|
if ((oa_metrics_kernel_support(perf_ctx->drm_fd, devinfo)) &&
|
||||||
(gen_perf_load_oa_metrics(perf_cfg, screen->fd, devinfo)))
|
(gen_perf_load_oa_metrics(perf_cfg, perf_ctx->drm_fd, devinfo)))
|
||||||
gen_perf_query_register_mdapi_oa_query(devinfo, perf_cfg);
|
gen_perf_query_register_mdapi_oa_query(devinfo, perf_cfg);
|
||||||
|
|
||||||
perf_ctx->unaccumulated =
|
|
||||||
ralloc_array(brw, struct gen_perf_query_object *, 2);
|
|
||||||
perf_ctx->unaccumulated_elements = 0;
|
|
||||||
perf_ctx->unaccumulated_array_size = 2;
|
|
||||||
|
|
||||||
exec_list_make_empty(&perf_ctx->sample_buffers);
|
|
||||||
exec_list_make_empty(&perf_ctx->free_sample_buffers);
|
|
||||||
|
|
||||||
/* It's convenient to guarantee that this linked list of sample
|
|
||||||
* buffers is never empty so we add an empty head so when we
|
|
||||||
* Begin an OA query we can always take a reference on a buffer
|
|
||||||
* in this list.
|
|
||||||
*/
|
|
||||||
struct oa_sample_buf *buf = gen_perf_get_free_sample_buf(&brw->perf_ctx);
|
|
||||||
exec_list_push_head(&perf_ctx->sample_buffers, &buf->link);
|
|
||||||
|
|
||||||
perf_ctx->oa_stream_fd = -1;
|
|
||||||
|
|
||||||
perf_ctx->next_query_start_report_id = 1000;
|
|
||||||
|
|
||||||
return perf_cfg->n_queries;
|
return perf_cfg->n_queries;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user