zink: batch mem barrier hooks
memory barriers are redundant, so batch them and apply based on actual usage to be slightly more efficient Reviewed-by: Dave Airlie <airlied@redhat.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12576>
This commit is contained in:

committed by
Marge Bot

parent
115935cc53
commit
3674839d11
@@ -109,6 +109,7 @@ struct zink_batch {
|
||||
unsigned work_count;
|
||||
|
||||
bool has_work;
|
||||
bool last_was_compute;
|
||||
bool in_rp; //renderpass is currently active
|
||||
};
|
||||
|
||||
|
@@ -2696,71 +2696,80 @@ zink_texture_barrier(struct pipe_context *pctx, unsigned flags)
|
||||
}
|
||||
|
||||
static inline void
|
||||
mem_barrier(struct zink_batch *batch, VkPipelineStageFlags src_stage, VkPipelineStageFlags dst_stage, VkAccessFlags src, VkAccessFlags dst)
|
||||
mem_barrier(struct zink_context *ctx, VkPipelineStageFlags src_stage, VkPipelineStageFlags dst_stage, VkAccessFlags src, VkAccessFlags dst)
|
||||
{
|
||||
struct zink_batch *batch = &ctx->batch;
|
||||
VkMemoryBarrier mb;
|
||||
mb.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER;
|
||||
mb.pNext = NULL;
|
||||
mb.srcAccessMask = src;
|
||||
mb.dstAccessMask = dst;
|
||||
zink_end_render_pass(ctx, batch);
|
||||
vkCmdPipelineBarrier(batch->state->cmdbuf, src_stage, dst_stage, 0, 1, &mb, 0, NULL, 0, NULL);
|
||||
}
|
||||
|
||||
void
|
||||
zink_flush_memory_barrier(struct zink_context *ctx, bool is_compute)
|
||||
{
|
||||
const VkPipelineStageFlags gfx_flags = VK_PIPELINE_STAGE_VERTEX_SHADER_BIT |
|
||||
VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT |
|
||||
VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT |
|
||||
VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT |
|
||||
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
|
||||
const VkPipelineStageFlags cs_flags = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
|
||||
VkPipelineStageFlags src = ctx->batch.last_was_compute ? cs_flags : gfx_flags;
|
||||
VkPipelineStageFlags dst = is_compute ? cs_flags : gfx_flags;
|
||||
|
||||
if (ctx->memory_barrier & (PIPE_BARRIER_TEXTURE | PIPE_BARRIER_SHADER_BUFFER | PIPE_BARRIER_IMAGE))
|
||||
mem_barrier(ctx, src, dst, VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT);
|
||||
|
||||
if (ctx->memory_barrier & PIPE_BARRIER_CONSTANT_BUFFER)
|
||||
mem_barrier(ctx, src, dst,
|
||||
VK_ACCESS_SHADER_WRITE_BIT,
|
||||
VK_ACCESS_UNIFORM_READ_BIT);
|
||||
|
||||
if (!is_compute) {
|
||||
if (ctx->memory_barrier & PIPE_BARRIER_INDIRECT_BUFFER)
|
||||
mem_barrier(ctx, src, VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT,
|
||||
VK_ACCESS_SHADER_WRITE_BIT,
|
||||
VK_ACCESS_INDIRECT_COMMAND_READ_BIT);
|
||||
if (ctx->memory_barrier & PIPE_BARRIER_VERTEX_BUFFER)
|
||||
mem_barrier(ctx, gfx_flags, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT,
|
||||
VK_ACCESS_SHADER_WRITE_BIT,
|
||||
VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT);
|
||||
|
||||
if (ctx->memory_barrier & PIPE_BARRIER_INDEX_BUFFER)
|
||||
mem_barrier(ctx, gfx_flags, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT,
|
||||
VK_ACCESS_SHADER_WRITE_BIT,
|
||||
VK_ACCESS_INDEX_READ_BIT);
|
||||
if (ctx->memory_barrier & PIPE_BARRIER_FRAMEBUFFER)
|
||||
zink_texture_barrier(&ctx->base, 0);
|
||||
if (ctx->memory_barrier & PIPE_BARRIER_STREAMOUT_BUFFER)
|
||||
mem_barrier(ctx, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT |
|
||||
VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT |
|
||||
VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT,
|
||||
VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT,
|
||||
VK_ACCESS_SHADER_READ_BIT,
|
||||
VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT |
|
||||
VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT);
|
||||
}
|
||||
ctx->memory_barrier = 0;
|
||||
}
|
||||
|
||||
static void
|
||||
zink_memory_barrier(struct pipe_context *pctx, unsigned flags)
|
||||
{
|
||||
struct zink_context *ctx = zink_context(pctx);
|
||||
|
||||
VkPipelineStageFlags all_flags = VK_PIPELINE_STAGE_VERTEX_SHADER_BIT |
|
||||
VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT |
|
||||
VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT |
|
||||
VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT |
|
||||
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT |
|
||||
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
|
||||
|
||||
if (!(flags & ~PIPE_BARRIER_UPDATE))
|
||||
flags &= ~PIPE_BARRIER_UPDATE;
|
||||
if (!flags)
|
||||
return;
|
||||
|
||||
struct zink_batch *batch = &ctx->batch;
|
||||
zink_end_render_pass(ctx, batch);
|
||||
|
||||
if (flags & PIPE_BARRIER_MAPPED_BUFFER) {
|
||||
/* TODO: this should flush all persistent buffers in use as I think */
|
||||
flags &= ~PIPE_BARRIER_MAPPED_BUFFER;
|
||||
}
|
||||
|
||||
if (flags & (PIPE_BARRIER_TEXTURE | PIPE_BARRIER_SHADER_BUFFER | PIPE_BARRIER_IMAGE))
|
||||
mem_barrier(batch, all_flags, all_flags, VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT);
|
||||
|
||||
if (flags & PIPE_BARRIER_VERTEX_BUFFER)
|
||||
mem_barrier(batch, all_flags, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT,
|
||||
VK_ACCESS_SHADER_WRITE_BIT,
|
||||
VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT);
|
||||
|
||||
if (flags & PIPE_BARRIER_INDEX_BUFFER)
|
||||
mem_barrier(batch, all_flags, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT,
|
||||
VK_ACCESS_SHADER_WRITE_BIT,
|
||||
VK_ACCESS_INDEX_READ_BIT);
|
||||
|
||||
if (flags & PIPE_BARRIER_CONSTANT_BUFFER)
|
||||
mem_barrier(batch, all_flags, all_flags,
|
||||
VK_ACCESS_SHADER_WRITE_BIT,
|
||||
VK_ACCESS_UNIFORM_READ_BIT);
|
||||
|
||||
if (flags & PIPE_BARRIER_INDIRECT_BUFFER)
|
||||
mem_barrier(batch, all_flags, VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT,
|
||||
VK_ACCESS_SHADER_WRITE_BIT,
|
||||
VK_ACCESS_INDIRECT_COMMAND_READ_BIT);
|
||||
|
||||
if (flags & PIPE_BARRIER_FRAMEBUFFER)
|
||||
zink_texture_barrier(pctx, 0);
|
||||
if (flags & PIPE_BARRIER_STREAMOUT_BUFFER)
|
||||
mem_barrier(batch, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT |
|
||||
VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT |
|
||||
VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT,
|
||||
VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT,
|
||||
VK_ACCESS_SHADER_READ_BIT,
|
||||
VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT |
|
||||
VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT);
|
||||
ctx->memory_barrier = flags;
|
||||
}
|
||||
|
||||
static void
|
||||
|
@@ -300,6 +300,7 @@ struct zink_context {
|
||||
struct set *need_barriers[2]; //gfx, compute
|
||||
struct set update_barriers[2][2]; //[gfx, compute][current, next]
|
||||
uint8_t barrier_set_idx[2];
|
||||
unsigned memory_barrier;
|
||||
|
||||
uint32_t num_so_targets;
|
||||
struct pipe_stream_output_target *so_targets[PIPE_MAX_SO_OUTPUTS];
|
||||
@@ -417,6 +418,8 @@ zink_pipeline_flags_from_pipe_stage(enum pipe_shader_type pstage)
|
||||
void
|
||||
zink_rebind_all_buffers(struct zink_context *ctx);
|
||||
|
||||
void
|
||||
zink_flush_memory_barrier(struct zink_context *ctx, bool is_compute);
|
||||
void
|
||||
zink_init_draw_functions(struct zink_context *ctx, struct zink_screen *screen);
|
||||
void
|
||||
|
@@ -430,6 +430,7 @@ zink_draw_vbo(struct pipe_context *pctx,
|
||||
unsigned work_count = ctx->batch.work_count;
|
||||
enum pipe_prim_type mode = (enum pipe_prim_type)dinfo->mode;
|
||||
|
||||
zink_flush_memory_barrier(ctx, false);
|
||||
update_barriers(ctx, false);
|
||||
|
||||
if (unlikely(ctx->buffer_rebind_counter < screen->buffer_rebind_counter)) {
|
||||
@@ -756,6 +757,7 @@ zink_draw_vbo(struct pipe_context *pctx,
|
||||
screen->vk.CmdEndTransformFeedbackEXT(batch->state->cmdbuf, 0, ctx->num_so_targets, counter_buffers, counter_buffer_offsets);
|
||||
}
|
||||
batch->has_work = true;
|
||||
batch->last_was_compute = false;
|
||||
ctx->batch.work_count = work_count;
|
||||
/* flush if there's >100k draws */
|
||||
if (unlikely(work_count >= 30000) || ctx->oom_flush)
|
||||
@@ -771,6 +773,7 @@ zink_launch_grid(struct pipe_context *pctx, const struct pipe_grid_info *info)
|
||||
struct zink_batch *batch = &ctx->batch;
|
||||
|
||||
update_barriers(ctx, true);
|
||||
zink_flush_memory_barrier(ctx, true);
|
||||
|
||||
if (zink_program_has_descriptors(&ctx->curr_compute->base))
|
||||
screen->descriptors_update(ctx, true);
|
||||
@@ -804,6 +807,7 @@ zink_launch_grid(struct pipe_context *pctx, const struct pipe_grid_info *info)
|
||||
} else
|
||||
vkCmdDispatch(batch->state->cmdbuf, info->grid[0], info->grid[1], info->grid[2]);
|
||||
batch->has_work = true;
|
||||
batch->last_was_compute = true;
|
||||
/* flush if there's >100k computes */
|
||||
if (unlikely(ctx->batch.work_count >= 30000) || ctx->oom_flush)
|
||||
pctx->flush(pctx, NULL, 0);
|
||||
|
Reference in New Issue
Block a user