vc4: Implement job shuffling

Track rendering to each FBO independently and flush rendering only when
necessary.  This lets us avoid the overhead of storing and loading the
frame when an application momentarily switches to rendering to some other
texture in order to continue rendering the main scene.

Improves glmark -b desktop:effect=shadow:windows=4 by 27%
Improves glmark -b
    desktop:blur-radius=5:effect=blur:passes=1:separable=true:windows=4
    by 17%

While I haven't tested other apps, this should help X rendering a lot, and
I've heard GLBenchmark needed it too.
This commit is contained in:
Eric Anholt
2016-09-08 12:56:11 -07:00
parent f473348468
commit f597ac3966
8 changed files with 332 additions and 193 deletions

View File

@@ -51,10 +51,6 @@ static bool
vc4_tile_blit(struct pipe_context *pctx, const struct pipe_blit_info *info) vc4_tile_blit(struct pipe_context *pctx, const struct pipe_blit_info *info)
{ {
struct vc4_context *vc4 = vc4_context(pctx); struct vc4_context *vc4 = vc4_context(pctx);
struct vc4_job *job = vc4->job;
bool old_msaa = job->msaa;
int old_tile_width = job->tile_width;
int old_tile_height = job->tile_height;
bool msaa = (info->src.resource->nr_samples > 1 || bool msaa = (info->src.resource->nr_samples > 1 ||
info->dst.resource->nr_samples > 1); info->dst.resource->nr_samples > 1);
int tile_width = msaa ? 32 : 64; int tile_width = msaa ? 32 : 64;
@@ -115,8 +111,6 @@ vc4_tile_blit(struct pipe_context *pctx, const struct pipe_blit_info *info)
if (info->dst.resource->format != info->src.resource->format) if (info->dst.resource->format != info->src.resource->format)
return false; return false;
vc4_flush(pctx);
if (false) { if (false) {
fprintf(stderr, "RCL blit from %d,%d to %d,%d (%d,%d)\n", fprintf(stderr, "RCL blit from %d,%d to %d,%d (%d,%d)\n",
info->src.box.x, info->src.box.x,
@@ -132,11 +126,19 @@ vc4_tile_blit(struct pipe_context *pctx, const struct pipe_blit_info *info)
struct pipe_surface *src_surf = struct pipe_surface *src_surf =
vc4_get_blit_surface(pctx, info->src.resource, info->src.level); vc4_get_blit_surface(pctx, info->src.resource, info->src.level);
vc4_flush_jobs_reading_resource(vc4, info->src.resource);
struct vc4_job *job = vc4_get_job(vc4, dst_surf, NULL);
pipe_surface_reference(&job->color_read, src_surf); pipe_surface_reference(&job->color_read, src_surf);
if (dst_surf->texture->nr_samples > 1)
pipe_surface_reference(&job->color_write, dst_surf); /* If we're resolving from MSAA to single sample, we still need to run
else * the engine in MSAA mode for the load.
pipe_surface_reference(&job->msaa_color_write, dst_surf); */
if (!job->msaa && info->src.resource->nr_samples > 1) {
job->msaa = true;
job->tile_width = 32;
job->tile_height = 32;
}
job->draw_min_x = info->dst.box.x; job->draw_min_x = info->dst.box.x;
job->draw_min_y = info->dst.box.y; job->draw_min_y = info->dst.box.y;
@@ -153,10 +155,6 @@ vc4_tile_blit(struct pipe_context *pctx, const struct pipe_blit_info *info)
vc4_job_submit(vc4, job); vc4_job_submit(vc4, job);
job->msaa = old_msaa;
job->tile_width = old_tile_width;
job->tile_height = old_tile_height;
pipe_surface_reference(&dst_surf, NULL); pipe_surface_reference(&dst_surf, NULL);
pipe_surface_reference(&src_surf, NULL); pipe_surface_reference(&src_surf, NULL);

View File

@@ -41,38 +41,12 @@ void
vc4_flush(struct pipe_context *pctx) vc4_flush(struct pipe_context *pctx)
{ {
struct vc4_context *vc4 = vc4_context(pctx); struct vc4_context *vc4 = vc4_context(pctx);
struct pipe_surface *cbuf = vc4->framebuffer.cbufs[0];
struct pipe_surface *zsbuf = vc4->framebuffer.zsbuf;
struct vc4_job *job = vc4->job;
if (cbuf && (job->resolve & PIPE_CLEAR_COLOR0)) { struct hash_entry *entry;
if (cbuf->texture->nr_samples > 1) { hash_table_foreach(vc4->jobs, entry) {
pipe_surface_reference(&job->msaa_color_write, cbuf); struct vc4_job *job = entry->data;
} else { vc4_job_submit(vc4, job);
pipe_surface_reference(&job->color_write, cbuf);
}
pipe_surface_reference(&job->color_read, cbuf);
} }
if (zsbuf && (job->resolve & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL))) {
if (zsbuf->texture->nr_samples > 1) {
pipe_surface_reference(&job->msaa_zs_write, zsbuf);
} else {
pipe_surface_reference(&job->zs_write, zsbuf);
}
pipe_surface_reference(&job->zs_read, zsbuf);
}
vc4_job_submit(vc4, job);
/* We have no hardware context saved between our draw calls, so we
* need to flag the next draw as needing all state emitted. Emitting
* all state at the start of our draws is also what ensures that we
* return to the state we need after a previous tile has finished.
*/
vc4->dirty = ~0;
} }
static void static void
@@ -92,64 +66,18 @@ vc4_pipe_flush(struct pipe_context *pctx, struct pipe_fence_handle **fence,
} }
} }
/**
* Flushes the current command lists if they reference the given BO.
*
* This helps avoid flushing the command buffers when unnecessary.
*/
bool
vc4_cl_references_bo(struct pipe_context *pctx, struct vc4_bo *bo,
bool include_reads)
{
struct vc4_context *vc4 = vc4_context(pctx);
struct vc4_job *job = vc4->job;
if (!job->needs_flush)
return false;
/* Walk all the referenced BOs in the drawing command list to see if
* they match.
*/
if (include_reads) {
struct vc4_bo **referenced_bos = job->bo_pointers.base;
for (int i = 0; i < cl_offset(&job->bo_handles) / 4; i++) {
if (referenced_bos[i] == bo) {
return true;
}
}
}
/* Also check for the Z/color buffers, since the references to those
* are only added immediately before submit.
*/
struct vc4_surface *csurf = vc4_surface(vc4->framebuffer.cbufs[0]);
if (csurf) {
struct vc4_resource *ctex = vc4_resource(csurf->base.texture);
if (ctex->bo == bo) {
return true;
}
}
struct vc4_surface *zsurf = vc4_surface(vc4->framebuffer.zsbuf);
if (zsurf) {
struct vc4_resource *ztex =
vc4_resource(zsurf->base.texture);
if (ztex->bo == bo) {
return true;
}
}
return false;
}
static void static void
vc4_invalidate_resource(struct pipe_context *pctx, struct pipe_resource *prsc) vc4_invalidate_resource(struct pipe_context *pctx, struct pipe_resource *prsc)
{ {
struct vc4_context *vc4 = vc4_context(pctx); struct vc4_context *vc4 = vc4_context(pctx);
struct pipe_surface *zsurf = vc4->framebuffer.zsbuf; struct hash_entry *entry = _mesa_hash_table_search(vc4->write_jobs,
prsc);
if (!entry)
return;
if (zsurf && zsurf->texture == prsc) struct vc4_job *job = entry->data;
vc4->job->resolve &= ~(PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL); if (job->key.zsbuf && job->key.zsbuf->texture == prsc)
job->resolve &= ~(PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL);
} }
static void static void
@@ -157,6 +85,8 @@ vc4_context_destroy(struct pipe_context *pctx)
{ {
struct vc4_context *vc4 = vc4_context(pctx); struct vc4_context *vc4 = vc4_context(pctx);
vc4_flush(pctx);
if (vc4->blitter) if (vc4->blitter)
util_blitter_destroy(vc4->blitter); util_blitter_destroy(vc4->blitter);
@@ -205,8 +135,7 @@ vc4_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags)
vc4_query_init(pctx); vc4_query_init(pctx);
vc4_resource_context_init(pctx); vc4_resource_context_init(pctx);
vc4->job = rzalloc(vc4, struct vc4_job); vc4_job_init(vc4);
vc4_job_init(vc4->job);
vc4->fd = screen->fd; vc4->fd = screen->fd;

View File

@@ -190,6 +190,12 @@ struct vc4_vertex_stateobj {
unsigned num_elements; unsigned num_elements;
}; };
/* Hash table key for vc4->jobs */
struct vc4_job_key {
struct pipe_surface *cbuf;
struct pipe_surface *zsbuf;
};
/** /**
* A complete bin/render job. * A complete bin/render job.
* *
@@ -266,6 +272,8 @@ struct vc4_job {
* the current job. * the current job.
*/ */
uint32_t draw_calls_queued; uint32_t draw_calls_queued;
struct vc4_job_key key;
}; };
struct vc4_context { struct vc4_context {
@@ -274,9 +282,21 @@ struct vc4_context {
int fd; int fd;
struct vc4_screen *screen; struct vc4_screen *screen;
/** The render job for the currently bound FBO. */ /** The 3D rendering job for the currently bound FBO. */
struct vc4_job *job; struct vc4_job *job;
/* Map from struct vc4_job_key to the job for that FBO.
*/
struct hash_table *jobs;
/**
* Map from vc4_resource to a job writing to that resource.
*
* Primarily for flushing jobs rendering to textures that are now
* being read from.
*/
struct hash_table *write_jobs;
struct slab_mempool transfer_pool; struct slab_mempool transfer_pool;
struct blitter_context *blitter; struct blitter_context *blitter;
@@ -404,7 +424,8 @@ void vc4_program_fini(struct pipe_context *pctx);
void vc4_query_init(struct pipe_context *pctx); void vc4_query_init(struct pipe_context *pctx);
void vc4_simulator_init(struct vc4_screen *screen); void vc4_simulator_init(struct vc4_screen *screen);
int vc4_simulator_flush(struct vc4_context *vc4, int vc4_simulator_flush(struct vc4_context *vc4,
struct drm_vc4_submit_cl *args); struct drm_vc4_submit_cl *args,
struct vc4_job *job);
void vc4_set_shader_uniform_dirty_flags(struct vc4_compiled_shader *shader); void vc4_set_shader_uniform_dirty_flags(struct vc4_compiled_shader *shader);
void vc4_write_uniforms(struct vc4_context *vc4, void vc4_write_uniforms(struct vc4_context *vc4,
@@ -413,11 +434,17 @@ void vc4_write_uniforms(struct vc4_context *vc4,
struct vc4_texture_stateobj *texstate); struct vc4_texture_stateobj *texstate);
void vc4_flush(struct pipe_context *pctx); void vc4_flush(struct pipe_context *pctx);
void vc4_job_init(struct vc4_job *job); void vc4_job_init(struct vc4_context *vc4);
struct vc4_job *vc4_get_job(struct vc4_context *vc4,
struct pipe_surface *cbuf,
struct pipe_surface *zsbuf);
struct vc4_job *vc4_get_job_for_fbo(struct vc4_context *vc4);
void vc4_job_submit(struct vc4_context *vc4, struct vc4_job *job); void vc4_job_submit(struct vc4_context *vc4, struct vc4_job *job);
void vc4_job_reset(struct vc4_job *job); void vc4_flush_jobs_writing_resource(struct vc4_context *vc4,
bool vc4_cl_references_bo(struct pipe_context *pctx, struct vc4_bo *bo, struct pipe_resource *prsc);
bool include_reads); void vc4_flush_jobs_reading_resource(struct vc4_context *vc4,
struct pipe_resource *prsc);
void vc4_emit_state(struct pipe_context *pctx); void vc4_emit_state(struct pipe_context *pctx);
void vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c); void vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c);
struct qpu_reg *vc4_register_allocate(struct vc4_context *vc4, struct vc4_compile *c); struct qpu_reg *vc4_register_allocate(struct vc4_context *vc4, struct vc4_compile *c);

View File

@@ -116,9 +116,11 @@ vc4_start_draw(struct vc4_context *vc4, int vert_count)
} }
static void static void
vc4_update_shadow_textures(struct pipe_context *pctx, vc4_predraw_check_textures(struct pipe_context *pctx,
struct vc4_texture_stateobj *stage_tex) struct vc4_texture_stateobj *stage_tex)
{ {
struct vc4_context *vc4 = vc4_context(pctx);
for (int i = 0; i < stage_tex->num_textures; i++) { for (int i = 0; i < stage_tex->num_textures; i++) {
struct pipe_sampler_view *view = stage_tex->textures[i]; struct pipe_sampler_view *view = stage_tex->textures[i];
if (!view) if (!view)
@@ -126,6 +128,8 @@ vc4_update_shadow_textures(struct pipe_context *pctx,
struct vc4_resource *rsc = vc4_resource(view->texture); struct vc4_resource *rsc = vc4_resource(view->texture);
if (rsc->shadow_parent) if (rsc->shadow_parent)
vc4_update_shadow_baselevel_texture(pctx, view); vc4_update_shadow_baselevel_texture(pctx, view);
vc4_flush_jobs_writing_resource(vc4, view->texture);
} }
} }
@@ -263,12 +267,12 @@ static void
vc4_hw_2116_workaround(struct pipe_context *pctx) vc4_hw_2116_workaround(struct pipe_context *pctx)
{ {
struct vc4_context *vc4 = vc4_context(pctx); struct vc4_context *vc4 = vc4_context(pctx);
struct vc4_job *job = vc4->job; struct vc4_job *job = vc4_get_job_for_fbo(vc4);
if (job->draw_calls_queued == 0x1ef0) { if (job->draw_calls_queued == 0x1ef0) {
perf_debug("Flushing batch due to HW-2116 workaround " perf_debug("Flushing batch due to HW-2116 workaround "
"(too many draw calls per scene\n"); "(too many draw calls per scene\n");
vc4_flush(pctx); vc4_job_submit(vc4, job);
} }
} }
@@ -276,7 +280,6 @@ static void
vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
{ {
struct vc4_context *vc4 = vc4_context(pctx); struct vc4_context *vc4 = vc4_context(pctx);
struct vc4_job *job = vc4->job;
if (info->mode >= PIPE_PRIM_QUADS) { if (info->mode >= PIPE_PRIM_QUADS) {
util_primconvert_save_index_buffer(vc4->primconvert, &vc4->indexbuf); util_primconvert_save_index_buffer(vc4->primconvert, &vc4->indexbuf);
@@ -288,11 +291,13 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
} }
/* Before setting up the draw, do any fixup blits necessary. */ /* Before setting up the draw, do any fixup blits necessary. */
vc4_update_shadow_textures(pctx, &vc4->verttex); vc4_predraw_check_textures(pctx, &vc4->verttex);
vc4_update_shadow_textures(pctx, &vc4->fragtex); vc4_predraw_check_textures(pctx, &vc4->fragtex);
vc4_hw_2116_workaround(pctx); vc4_hw_2116_workaround(pctx);
struct vc4_job *job = vc4_get_job_for_fbo(vc4);
vc4_get_draw_cl_space(job, info->count); vc4_get_draw_cl_space(job, info->count);
if (vc4->prim_mode != info->mode) { if (vc4->prim_mode != info->mode) {
@@ -466,14 +471,15 @@ vc4_clear(struct pipe_context *pctx, unsigned buffers,
const union pipe_color_union *color, double depth, unsigned stencil) const union pipe_color_union *color, double depth, unsigned stencil)
{ {
struct vc4_context *vc4 = vc4_context(pctx); struct vc4_context *vc4 = vc4_context(pctx);
struct vc4_job *job = vc4->job; struct vc4_job *job = vc4_get_job_for_fbo(vc4);
/* We can't flag new buffers for clearing once we've queued draws. We /* We can't flag new buffers for clearing once we've queued draws. We
* could avoid this by using the 3d engine to clear. * could avoid this by using the 3d engine to clear.
*/ */
if (job->draw_calls_queued) { if (job->draw_calls_queued) {
perf_debug("Flushing rendering to process new clear.\n"); perf_debug("Flushing rendering to process new clear.\n");
vc4_flush(pctx); vc4_job_submit(vc4, job);
job = vc4_get_job_for_fbo(vc4);
} }
/* Clearing ZS will clear both Z and stencil, so if we're trying to /* Clearing ZS will clear both Z and stencil, so if we're trying to

View File

@@ -28,49 +28,239 @@
#include <xf86drm.h> #include <xf86drm.h>
#include "vc4_context.h" #include "vc4_context.h"
#include "util/hash_table.h"
void static void
vc4_job_init(struct vc4_job *job) remove_from_ht(struct hash_table *ht, void *key)
{ {
vc4_init_cl(job, &job->bcl); struct hash_entry *entry = _mesa_hash_table_search(ht, key);
vc4_init_cl(job, &job->shader_rec); _mesa_hash_table_remove(ht, entry);
vc4_init_cl(job, &job->uniforms);
vc4_init_cl(job, &job->bo_handles);
vc4_init_cl(job, &job->bo_pointers);
vc4_job_reset(job);
} }
void static void
vc4_job_reset(struct vc4_job *job) vc4_job_free(struct vc4_context *vc4, struct vc4_job *job)
{ {
struct vc4_bo **referenced_bos = job->bo_pointers.base; struct vc4_bo **referenced_bos = job->bo_pointers.base;
for (int i = 0; i < cl_offset(&job->bo_handles) / 4; i++) { for (int i = 0; i < cl_offset(&job->bo_handles) / 4; i++) {
vc4_bo_unreference(&referenced_bos[i]); vc4_bo_unreference(&referenced_bos[i]);
} }
vc4_reset_cl(&job->bcl);
vc4_reset_cl(&job->shader_rec);
vc4_reset_cl(&job->uniforms);
vc4_reset_cl(&job->bo_handles);
vc4_reset_cl(&job->bo_pointers);
job->shader_rec_count = 0;
job->needs_flush = false; remove_from_ht(vc4->jobs, &job->key);
job->draw_calls_queued = 0;
job->resolve = 0; if (job->color_write) {
job->cleared = 0; remove_from_ht(vc4->write_jobs, job->color_write->texture);
pipe_surface_reference(&job->color_write, NULL);
}
if (job->msaa_color_write) {
remove_from_ht(vc4->write_jobs, job->msaa_color_write->texture);
pipe_surface_reference(&job->msaa_color_write, NULL);
}
if (job->zs_write) {
remove_from_ht(vc4->write_jobs, job->zs_write->texture);
pipe_surface_reference(&job->zs_write, NULL);
}
if (job->msaa_zs_write) {
remove_from_ht(vc4->write_jobs, job->msaa_zs_write->texture);
pipe_surface_reference(&job->msaa_zs_write, NULL);
}
pipe_surface_reference(&job->color_read, NULL);
pipe_surface_reference(&job->zs_read, NULL);
if (vc4->job == job)
vc4->job = NULL;
ralloc_free(job);
}
static struct vc4_job *
vc4_job_create(struct vc4_context *vc4)
{
struct vc4_job *job = rzalloc(vc4, struct vc4_job);
vc4_init_cl(job, &job->bcl);
vc4_init_cl(job, &job->shader_rec);
vc4_init_cl(job, &job->uniforms);
vc4_init_cl(job, &job->bo_handles);
vc4_init_cl(job, &job->bo_pointers);
job->draw_min_x = ~0; job->draw_min_x = ~0;
job->draw_min_y = ~0; job->draw_min_y = ~0;
job->draw_max_x = 0; job->draw_max_x = 0;
job->draw_max_y = 0; job->draw_max_y = 0;
pipe_surface_reference(&job->color_write, NULL); return job;
pipe_surface_reference(&job->color_read, NULL); }
pipe_surface_reference(&job->msaa_color_write, NULL);
pipe_surface_reference(&job->zs_write, NULL); void
pipe_surface_reference(&job->zs_read, NULL); vc4_flush_jobs_writing_resource(struct vc4_context *vc4,
pipe_surface_reference(&job->msaa_zs_write, NULL); struct pipe_resource *prsc)
{
struct hash_entry *entry = _mesa_hash_table_search(vc4->write_jobs,
prsc);
if (entry) {
struct vc4_job *job = entry->data;
vc4_job_submit(vc4, job);
}
}
void
vc4_flush_jobs_reading_resource(struct vc4_context *vc4,
struct pipe_resource *prsc)
{
struct vc4_resource *rsc = vc4_resource(prsc);
vc4_flush_jobs_writing_resource(vc4, prsc);
struct hash_entry *entry;
hash_table_foreach(vc4->jobs, entry) {
struct vc4_job *job = entry->data;
struct vc4_bo **referenced_bos = job->bo_pointers.base;
for (int i = 0; i < cl_offset(&job->bo_handles) / 4; i++) {
if (referenced_bos[i] == rsc->bo) {
vc4_job_submit(vc4, job);
continue;
}
}
/* Also check for the Z/color buffers, since the references to
* those are only added immediately before submit.
*/
if (job->color_read && !(job->cleared & PIPE_CLEAR_COLOR)) {
struct vc4_resource *ctex =
vc4_resource(job->color_read->texture);
if (ctex->bo == rsc->bo) {
vc4_job_submit(vc4, job);
continue;
}
}
if (job->zs_read && !(job->cleared &
(PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL))) {
struct vc4_resource *ztex =
vc4_resource(job->zs_read->texture);
if (ztex->bo == rsc->bo) {
vc4_job_submit(vc4, job);
continue;
}
}
}
}
/**
* Returns a vc4_job struture for tracking V3D rendering to a particular FBO.
*
* If we've already started rendering to this FBO, then return old same job,
* otherwise make a new one. If we're beginning rendering to an FBO, make
* sure that any previous reads of the FBO (or writes to its color/Z surfaces)
* have been flushed.
*/
struct vc4_job *
vc4_get_job(struct vc4_context *vc4,
struct pipe_surface *cbuf, struct pipe_surface *zsbuf)
{
/* Return the existing job for this FBO if we have one */
struct vc4_job_key local_key = {.cbuf = cbuf, .zsbuf = zsbuf};
struct hash_entry *entry = _mesa_hash_table_search(vc4->jobs,
&local_key);
if (entry)
return entry->data;
/* Creating a new job. Make sure that any previous jobs reading or
* writing these buffers are flushed.
*/
if (cbuf)
vc4_flush_jobs_reading_resource(vc4, cbuf->texture);
if (zsbuf)
vc4_flush_jobs_reading_resource(vc4, zsbuf->texture);
struct vc4_job *job = vc4_job_create(vc4);
if (cbuf) {
if (cbuf->texture->nr_samples > 1) {
job->msaa = true;
pipe_surface_reference(&job->msaa_color_write, cbuf);
} else {
pipe_surface_reference(&job->color_write, cbuf);
}
}
if (zsbuf) {
if (zsbuf->texture->nr_samples > 1) {
job->msaa = true;
pipe_surface_reference(&job->msaa_zs_write, zsbuf);
} else {
pipe_surface_reference(&job->zs_write, zsbuf);
}
}
if (job->msaa) {
job->tile_width = 32;
job->tile_height = 32;
} else {
job->tile_width = 64;
job->tile_height = 64;
}
if (cbuf)
_mesa_hash_table_insert(vc4->write_jobs, cbuf->texture, job);
if (zsbuf)
_mesa_hash_table_insert(vc4->write_jobs, zsbuf->texture, job);
job->key.cbuf = cbuf;
job->key.zsbuf = zsbuf;
_mesa_hash_table_insert(vc4->jobs, &job->key, job);
return job;
}
struct vc4_job *
vc4_get_job_for_fbo(struct vc4_context *vc4)
{
if (vc4->job)
return vc4->job;
struct pipe_surface *cbuf = vc4->framebuffer.cbufs[0];
struct pipe_surface *zsbuf = vc4->framebuffer.zsbuf;
struct vc4_job *job = vc4_get_job(vc4, cbuf, zsbuf);
/* The dirty flags are tracking what's been updated while vc4->job has
* been bound, so set them all to ~0 when switching between jobs. We
* also need to reset all state at the start of rendering.
*/
vc4->dirty = ~0;
/* Set up the read surfaces in the job. If they aren't actually
* getting read (due to a clear starting the frame), job->cleared will
* mask out the read.
*/
pipe_surface_reference(&job->color_read, cbuf);
pipe_surface_reference(&job->zs_read, zsbuf);
/* If we're binding to uninitialized buffers, no need to load their
* contents before drawing.
*/
if (cbuf) {
struct vc4_resource *rsc = vc4_resource(cbuf->texture);
if (!rsc->writes)
job->cleared |= PIPE_CLEAR_COLOR0;
}
if (zsbuf) {
struct vc4_resource *rsc = vc4_resource(zsbuf->texture);
if (!rsc->writes)
job->cleared |= PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL;
}
job->draw_tiles_x = DIV_ROUND_UP(vc4->framebuffer.width,
job->tile_width);
job->draw_tiles_y = DIV_ROUND_UP(vc4->framebuffer.height,
job->tile_height);
vc4->job = job;
return job;
} }
static void static void
@@ -166,15 +356,14 @@ void
vc4_job_submit(struct vc4_context *vc4, struct vc4_job *job) vc4_job_submit(struct vc4_context *vc4, struct vc4_job *job)
{ {
if (!job->needs_flush) if (!job->needs_flush)
return; goto done;
/* The RCL setup would choke if the draw bounds cause no drawing, so /* The RCL setup would choke if the draw bounds cause no drawing, so
* just drop the drawing if that's the case. * just drop the drawing if that's the case.
*/ */
if (job->draw_max_x <= job->draw_min_x || if (job->draw_max_x <= job->draw_min_x ||
job->draw_max_y <= job->draw_min_y) { job->draw_max_y <= job->draw_min_y) {
vc4_job_reset(job); goto done;
return;
} }
if (vc4_debug & VC4_DEBUG_CL) { if (vc4_debug & VC4_DEBUG_CL) {
@@ -275,7 +464,7 @@ vc4_job_submit(struct vc4_context *vc4, struct vc4_job *job)
#ifndef USE_VC4_SIMULATOR #ifndef USE_VC4_SIMULATOR
ret = drmIoctl(vc4->fd, DRM_IOCTL_VC4_SUBMIT_CL, &submit); ret = drmIoctl(vc4->fd, DRM_IOCTL_VC4_SUBMIT_CL, &submit);
#else #else
ret = vc4_simulator_flush(vc4, &submit); ret = vc4_simulator_flush(vc4, &submit, job);
#endif #endif
static bool warned = false; static bool warned = false;
if (ret && !warned) { if (ret && !warned) {
@@ -304,5 +493,30 @@ vc4_job_submit(struct vc4_context *vc4, struct vc4_job *job)
} }
} }
vc4_job_reset(vc4->job); done:
vc4_job_free(vc4, job);
} }
static bool
vc4_job_compare(const void *a, const void *b)
{
return memcmp(a, b, sizeof(struct vc4_job_key)) == 0;
}
static uint32_t
vc4_job_hash(const void *key)
{
return _mesa_hash_data(key, sizeof(struct vc4_job_key));
}
void
vc4_job_init(struct vc4_context *vc4)
{
vc4->jobs = _mesa_hash_table_create(vc4,
vc4_job_hash,
vc4_job_compare);
vc4->write_jobs = _mesa_hash_table_create(vc4,
_mesa_hash_pointer,
_mesa_key_pointer_equal);
}

View File

@@ -115,7 +115,6 @@ vc4_resource_transfer_unmap(struct pipe_context *pctx,
blit.filter = PIPE_TEX_FILTER_NEAREST; blit.filter = PIPE_TEX_FILTER_NEAREST;
pctx->blit(pctx, &blit); pctx->blit(pctx, &blit);
vc4_flush(pctx);
pipe_resource_reference(&trans->ss_resource, NULL); pipe_resource_reference(&trans->ss_resource, NULL);
} }
@@ -178,20 +177,20 @@ vc4_resource_transfer_map(struct pipe_context *pctx,
if (prsc->bind & PIPE_BIND_VERTEX_BUFFER) if (prsc->bind & PIPE_BIND_VERTEX_BUFFER)
vc4->dirty |= VC4_DIRTY_VTXBUF; vc4->dirty |= VC4_DIRTY_VTXBUF;
} else { } else {
/* If we failed to reallocate, flush everything so /* If we failed to reallocate, flush users so that we
* that we don't violate any syncing requirements. * don't violate any syncing requirements.
*/ */
vc4_flush(pctx); vc4_flush_jobs_reading_resource(vc4, prsc);
} }
} else if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) { } else if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
/* If we're writing and the buffer is being used by the CL, we /* If we're writing and the buffer is being used by the CL, we
* have to flush the CL first. If we're only reading, we need * have to flush the CL first. If we're only reading, we need
* to flush if the CL has written our buffer. * to flush if the CL has written our buffer.
*/ */
if (vc4_cl_references_bo(pctx, rsc->bo, if (usage & PIPE_TRANSFER_WRITE)
usage & PIPE_TRANSFER_WRITE)) { vc4_flush_jobs_reading_resource(vc4, prsc);
vc4_flush(pctx); else
} vc4_flush_jobs_writing_resource(vc4, prsc);
} }
if (usage & PIPE_TRANSFER_WRITE) if (usage & PIPE_TRANSFER_WRITE)
@@ -245,7 +244,7 @@ vc4_resource_transfer_map(struct pipe_context *pctx,
blit.filter = PIPE_TEX_FILTER_NEAREST; blit.filter = PIPE_TEX_FILTER_NEAREST;
pctx->blit(pctx, &blit); pctx->blit(pctx, &blit);
vc4_flush(pctx); vc4_flush_jobs_writing_resource(vc4, blit.dst.resource);
} }
/* The rest of the mapping process should use our temporary. */ /* The rest of the mapping process should use our temporary. */

View File

@@ -74,11 +74,10 @@ drm_gem_cma_create(struct drm_device *dev, size_t size)
} }
static int static int
vc4_simulator_pin_bos(struct drm_device *dev, struct vc4_exec_info *exec) vc4_simulator_pin_bos(struct drm_device *dev, struct vc4_job *job,
struct vc4_exec_info *exec)
{ {
struct drm_vc4_submit_cl *args = exec->args; struct drm_vc4_submit_cl *args = exec->args;
struct vc4_context *vc4 = dev->vc4;
struct vc4_job *job = vc4->job;
struct vc4_bo **bos = job->bo_pointers.base; struct vc4_bo **bos = job->bo_pointers.base;
exec->bo_count = args->bo_handle_count; exec->bo_count = args->bo_handle_count;
@@ -220,7 +219,8 @@ vc4_dump_to_file(struct vc4_exec_info *exec)
} }
int int
vc4_simulator_flush(struct vc4_context *vc4, struct drm_vc4_submit_cl *args) vc4_simulator_flush(struct vc4_context *vc4,
struct drm_vc4_submit_cl *args, struct vc4_job *job)
{ {
struct vc4_screen *screen = vc4->screen; struct vc4_screen *screen = vc4->screen;
struct vc4_surface *csurf = vc4_surface(vc4->framebuffer.cbufs[0]); struct vc4_surface *csurf = vc4_surface(vc4->framebuffer.cbufs[0]);
@@ -257,7 +257,7 @@ vc4_simulator_flush(struct vc4_context *vc4, struct drm_vc4_submit_cl *args)
exec.args = args; exec.args = args;
ret = vc4_simulator_pin_bos(dev, &exec); ret = vc4_simulator_pin_bos(dev, job, &exec);
if (ret) if (ret)
return ret; return ret;

View File

@@ -406,11 +406,10 @@ vc4_set_framebuffer_state(struct pipe_context *pctx,
const struct pipe_framebuffer_state *framebuffer) const struct pipe_framebuffer_state *framebuffer)
{ {
struct vc4_context *vc4 = vc4_context(pctx); struct vc4_context *vc4 = vc4_context(pctx);
struct vc4_job *job = vc4->job;
struct pipe_framebuffer_state *cso = &vc4->framebuffer; struct pipe_framebuffer_state *cso = &vc4->framebuffer;
unsigned i; unsigned i;
vc4_flush(pctx); vc4->job = NULL;
for (i = 0; i < framebuffer->nr_cbufs; i++) for (i = 0; i < framebuffer->nr_cbufs; i++)
pipe_surface_reference(&cso->cbufs[i], framebuffer->cbufs[i]); pipe_surface_reference(&cso->cbufs[i], framebuffer->cbufs[i]);
@@ -424,23 +423,6 @@ vc4_set_framebuffer_state(struct pipe_context *pctx,
cso->width = framebuffer->width; cso->width = framebuffer->width;
cso->height = framebuffer->height; cso->height = framebuffer->height;
/* If we're binding to uninitialized buffers, no need to load their
* contents before drawing..
*/
if (cso->cbufs[0]) {
struct vc4_resource *rsc =
vc4_resource(cso->cbufs[0]->texture);
if (!rsc->writes)
job->cleared |= PIPE_CLEAR_COLOR0;
}
if (cso->zsbuf) {
struct vc4_resource *rsc =
vc4_resource(cso->zsbuf->texture);
if (!rsc->writes)
job->cleared |= PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL;
}
/* Nonzero texture mipmap levels are laid out as if they were in /* Nonzero texture mipmap levels are laid out as if they were in
* power-of-two-sized spaces. The renderbuffer config infers its * power-of-two-sized spaces. The renderbuffer config infers its
* stride from the width parameter, so we need to configure our * stride from the width parameter, so we need to configure our
@@ -461,22 +443,6 @@ vc4_set_framebuffer_state(struct pipe_context *pctx,
rsc->cpp); rsc->cpp);
} }
job->msaa = false;
if (cso->cbufs[0])
job->msaa = cso->cbufs[0]->texture->nr_samples > 1;
else if (cso->zsbuf)
job->msaa = cso->zsbuf->texture->nr_samples > 1;
if (job->msaa) {
job->tile_width = 32;
job->tile_height = 32;
} else {
job->tile_width = 64;
job->tile_height = 64;
}
job->draw_tiles_x = DIV_ROUND_UP(cso->width, job->tile_width);
job->draw_tiles_y = DIV_ROUND_UP(cso->height, job->tile_height);
vc4->dirty |= VC4_DIRTY_FRAMEBUFFER; vc4->dirty |= VC4_DIRTY_FRAMEBUFFER;
} }