mesa/glthread: add glthread "perf" counters and pass them to gallium HUD

for HUD integration in following commits. This valuable profiling data
will allow us to see on the HUD how well glthread is able to utilize
parallelism. This is better than benchmarking, because you can see
exactly what's happening and you don't have to be CPU-bound.

u_threaded_context has the same counters.

Reviewed-by: Timothy Arceri <tarceri@itsqueeze.com>
This commit is contained in:
Marek Olšák
2017-06-21 20:45:38 +02:00
parent 833f3c1c31
commit 5fa69be3c8
10 changed files with 65 additions and 7 deletions

View File

@@ -1694,3 +1694,11 @@ hud_destroy(struct hud_context *hud)
pipe_resource_reference(&hud->font.texture, NULL);
FREE(hud);
}
void
hud_add_queue_for_monitoring(struct hud_context *hud,
struct util_queue_monitoring *queue_info)
{
assert(!hud->monitored_queue);
hud->monitored_queue = queue_info;
}

View File

@@ -32,6 +32,7 @@ struct hud_context;
struct cso_context;
struct pipe_context;
struct pipe_resource;
struct util_queue_monitoring;
struct hud_context *
hud_create(struct pipe_context *pipe, struct cso_context *cso);
@@ -42,4 +43,8 @@ hud_destroy(struct hud_context *hud);
void
hud_draw(struct hud_context *hud, struct pipe_resource *tex);
void
hud_add_queue_for_monitoring(struct hud_context *hud,
struct util_queue_monitoring *queue_info);
#endif

View File

@@ -40,6 +40,8 @@ struct hud_context {
struct hud_batch_query_context *batch_query;
struct list_head pane_list;
struct util_queue_monitoring *monitored_queue;
/* states */
struct pipe_blend_state no_blend, alpha_blend;
struct pipe_depth_stencil_alpha_state dsa;

View File

@@ -179,6 +179,7 @@ enum st_manager_param {
struct pipe_context;
struct pipe_resource;
struct pipe_fence_handle;
struct util_queue_monitoring;
/**
* Used in st_context_iface->get_resource_for_egl_image.
@@ -474,7 +475,8 @@ struct st_manager
* Call the loader function setBackgroundContext. Called from the worker
* thread.
*/
void (*set_background_context)(struct st_context_iface *stctxi);
void (*set_background_context)(struct st_context_iface *stctxi,
struct util_queue_monitoring *queue_info);
};
/**

View File

@@ -447,7 +447,8 @@ dri_postprocessing_init(struct dri_screen *screen)
}
static void
dri_set_background_context(struct st_context_iface *st)
dri_set_background_context(struct st_context_iface *st,
struct util_queue_monitoring *queue_info)
{
struct dri_context *ctx = (struct dri_context *)st->st_manager_private;
const __DRIbackgroundCallableExtension *backgroundCallable =
@@ -459,6 +460,9 @@ dri_set_background_context(struct st_context_iface *st)
*/
assert(backgroundCallable);
backgroundCallable->setBackgroundContext(ctx->cPriv->loaderPrivate);
if (ctx->hud)
hud_add_queue_for_monitoring(ctx->hud, queue_info);
}
unsigned

View File

@@ -50,6 +50,7 @@ struct gl_shader_program;
struct gl_texture_image;
struct gl_texture_object;
struct gl_memory_info;
struct util_queue_monitoring;
/* GL_ARB_vertex_buffer_object */
/* Modifies GL_MAP_UNSYNCHRONIZED_BIT to allow driver to fail (return
@@ -1039,7 +1040,8 @@ struct dd_function_table {
*
* Mesa will only call this function if GL multithreading is enabled.
*/
void (*SetBackgroundContext)(struct gl_context *ctx);
void (*SetBackgroundContext)(struct gl_context *ctx,
struct util_queue_monitoring *queue_info);
/**
* \name GL_ARB_sparse_buffer interface

View File

@@ -36,6 +36,7 @@
#include "main/glthread.h"
#include "main/marshal.h"
#include "main/marshal_generated.h"
#include "util/u_atomic.h"
#include "util/u_thread.h"
@@ -60,7 +61,7 @@ glthread_thread_initialization(void *job, int thread_index)
{
struct gl_context *ctx = (struct gl_context*)job;
ctx->Driver.SetBackgroundContext(ctx);
ctx->Driver.SetBackgroundContext(ctx, &ctx->GLThread->stats);
_glapi_set_context(ctx);
}
@@ -90,6 +91,7 @@ _mesa_glthread_init(struct gl_context *ctx)
util_queue_fence_init(&glthread->batches[i].fence);
}
glthread->stats.queue = &glthread->queue;
ctx->CurrentClientDispatch = ctx->MarshalExec;
ctx->GLThread = glthread;
@@ -159,6 +161,8 @@ _mesa_glthread_flush_batch(struct gl_context *ctx)
return;
}
p_atomic_add(&glthread->stats.num_offloaded_items, next->used);
util_queue_add_job(&glthread->queue, next, &next->fence,
glthread_unmarshal_batch, NULL);
glthread->last = glthread->next;
@@ -188,16 +192,29 @@ _mesa_glthread_finish(struct gl_context *ctx)
struct glthread_batch *last = &glthread->batches[glthread->last];
struct glthread_batch *next = &glthread->batches[glthread->next];
bool synced = false;
if (!util_queue_fence_is_signalled(&last->fence))
if (!util_queue_fence_is_signalled(&last->fence)) {
util_queue_fence_wait(&last->fence);
synced = true;
}
if (next->used) {
p_atomic_add(&glthread->stats.num_direct_items, next->used);
/* Since glthread_unmarshal_batch changes the dispatch to direct,
* restore it after it's done.
*/
struct _glapi_table *dispatch = _glapi_get_dispatch();
glthread_unmarshal_batch(next, 0);
_glapi_set_dispatch(dispatch);
/* It's not a sync because we don't enqueue partial batches, but
* it would be a sync if we did. So count it anyway.
*/
synced = true;
}
if (synced)
p_atomic_inc(&glthread->stats.num_syncs);
}

View File

@@ -65,6 +65,9 @@ struct glthread_state
/** Multithreaded queue. */
struct util_queue queue;
/** This is sent to the driver for framebuffer overlay / HUD. */
struct util_queue_monitoring stats;
/** The ring of batches in memory. */
struct glthread_batch batches[MARSHAL_MAX_BATCHES];

View File

@@ -629,14 +629,15 @@ st_emit_string_marker(struct gl_context *ctx, const GLchar *string, GLsizei len)
}
static void
st_set_background_context(struct gl_context *ctx)
st_set_background_context(struct gl_context *ctx,
struct util_queue_monitoring *queue_info)
{
struct st_context *st = ctx->st;
struct st_manager *smapi =
(struct st_manager*)st->iface.st_context_private;
assert(smapi->set_background_context);
smapi->set_background_context(&st->iface);
smapi->set_background_context(&st->iface, queue_info);
}
void st_init_driver_functions(struct pipe_screen *screen,

View File

@@ -115,6 +115,20 @@ util_queue_fence_is_signalled(struct util_queue_fence *fence)
return fence->signalled != 0;
}
/* Convenient structure for monitoring the queue externally and passing
* the structure between Mesa components. The queue doesn't use it directly.
*/
struct util_queue_monitoring
{
/* For querying the thread busyness. */
struct util_queue *queue;
/* Counters updated by the user of the queue. */
unsigned num_offloaded_items;
unsigned num_direct_items;
unsigned num_syncs;
};
#ifdef __cplusplus
}
#endif