zink: batch mem barrier hooks

memory barriers are redundant, so batch them and apply based on actual
usage to be slightly more efficient

Reviewed-by: Dave Airlie <airlied@redhat.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12576>
This commit is contained in:
Mike Blumenkrantz
2021-07-16 09:44:51 -04:00
committed by Marge Bot
parent 115935cc53
commit 3674839d11
4 changed files with 63 additions and 46 deletions

View File

@@ -109,6 +109,7 @@ struct zink_batch {
unsigned work_count;
bool has_work;
bool last_was_compute;
bool in_rp; //renderpass is currently active
};

View File

@@ -2696,71 +2696,80 @@ zink_texture_barrier(struct pipe_context *pctx, unsigned flags)
}
static inline void
mem_barrier(struct zink_batch *batch, VkPipelineStageFlags src_stage, VkPipelineStageFlags dst_stage, VkAccessFlags src, VkAccessFlags dst)
mem_barrier(struct zink_context *ctx, VkPipelineStageFlags src_stage, VkPipelineStageFlags dst_stage, VkAccessFlags src, VkAccessFlags dst)
{
struct zink_batch *batch = &ctx->batch;
VkMemoryBarrier mb;
mb.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER;
mb.pNext = NULL;
mb.srcAccessMask = src;
mb.dstAccessMask = dst;
zink_end_render_pass(ctx, batch);
vkCmdPipelineBarrier(batch->state->cmdbuf, src_stage, dst_stage, 0, 1, &mb, 0, NULL, 0, NULL);
}
void
zink_flush_memory_barrier(struct zink_context *ctx, bool is_compute)
{
const VkPipelineStageFlags gfx_flags = VK_PIPELINE_STAGE_VERTEX_SHADER_BIT |
VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT |
VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT |
VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT |
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
const VkPipelineStageFlags cs_flags = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
VkPipelineStageFlags src = ctx->batch.last_was_compute ? cs_flags : gfx_flags;
VkPipelineStageFlags dst = is_compute ? cs_flags : gfx_flags;
if (ctx->memory_barrier & (PIPE_BARRIER_TEXTURE | PIPE_BARRIER_SHADER_BUFFER | PIPE_BARRIER_IMAGE))
mem_barrier(ctx, src, dst, VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT);
if (ctx->memory_barrier & PIPE_BARRIER_CONSTANT_BUFFER)
mem_barrier(ctx, src, dst,
VK_ACCESS_SHADER_WRITE_BIT,
VK_ACCESS_UNIFORM_READ_BIT);
if (!is_compute) {
if (ctx->memory_barrier & PIPE_BARRIER_INDIRECT_BUFFER)
mem_barrier(ctx, src, VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT,
VK_ACCESS_SHADER_WRITE_BIT,
VK_ACCESS_INDIRECT_COMMAND_READ_BIT);
if (ctx->memory_barrier & PIPE_BARRIER_VERTEX_BUFFER)
mem_barrier(ctx, gfx_flags, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT,
VK_ACCESS_SHADER_WRITE_BIT,
VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT);
if (ctx->memory_barrier & PIPE_BARRIER_INDEX_BUFFER)
mem_barrier(ctx, gfx_flags, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT,
VK_ACCESS_SHADER_WRITE_BIT,
VK_ACCESS_INDEX_READ_BIT);
if (ctx->memory_barrier & PIPE_BARRIER_FRAMEBUFFER)
zink_texture_barrier(&ctx->base, 0);
if (ctx->memory_barrier & PIPE_BARRIER_STREAMOUT_BUFFER)
mem_barrier(ctx, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT |
VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT |
VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT,
VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT,
VK_ACCESS_SHADER_READ_BIT,
VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT |
VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT);
}
ctx->memory_barrier = 0;
}
static void
zink_memory_barrier(struct pipe_context *pctx, unsigned flags)
{
struct zink_context *ctx = zink_context(pctx);
VkPipelineStageFlags all_flags = VK_PIPELINE_STAGE_VERTEX_SHADER_BIT |
VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT |
VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT |
VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT |
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT |
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
if (!(flags & ~PIPE_BARRIER_UPDATE))
flags &= ~PIPE_BARRIER_UPDATE;
if (!flags)
return;
struct zink_batch *batch = &ctx->batch;
zink_end_render_pass(ctx, batch);
if (flags & PIPE_BARRIER_MAPPED_BUFFER) {
/* TODO: this should flush all persistent buffers in use as I think */
flags &= ~PIPE_BARRIER_MAPPED_BUFFER;
}
if (flags & (PIPE_BARRIER_TEXTURE | PIPE_BARRIER_SHADER_BUFFER | PIPE_BARRIER_IMAGE))
mem_barrier(batch, all_flags, all_flags, VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT);
if (flags & PIPE_BARRIER_VERTEX_BUFFER)
mem_barrier(batch, all_flags, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT,
VK_ACCESS_SHADER_WRITE_BIT,
VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT);
if (flags & PIPE_BARRIER_INDEX_BUFFER)
mem_barrier(batch, all_flags, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT,
VK_ACCESS_SHADER_WRITE_BIT,
VK_ACCESS_INDEX_READ_BIT);
if (flags & PIPE_BARRIER_CONSTANT_BUFFER)
mem_barrier(batch, all_flags, all_flags,
VK_ACCESS_SHADER_WRITE_BIT,
VK_ACCESS_UNIFORM_READ_BIT);
if (flags & PIPE_BARRIER_INDIRECT_BUFFER)
mem_barrier(batch, all_flags, VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT,
VK_ACCESS_SHADER_WRITE_BIT,
VK_ACCESS_INDIRECT_COMMAND_READ_BIT);
if (flags & PIPE_BARRIER_FRAMEBUFFER)
zink_texture_barrier(pctx, 0);
if (flags & PIPE_BARRIER_STREAMOUT_BUFFER)
mem_barrier(batch, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT |
VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT |
VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT,
VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT,
VK_ACCESS_SHADER_READ_BIT,
VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT |
VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT);
ctx->memory_barrier = flags;
}
static void

View File

@@ -300,6 +300,7 @@ struct zink_context {
struct set *need_barriers[2]; //gfx, compute
struct set update_barriers[2][2]; //[gfx, compute][current, next]
uint8_t barrier_set_idx[2];
unsigned memory_barrier;
uint32_t num_so_targets;
struct pipe_stream_output_target *so_targets[PIPE_MAX_SO_OUTPUTS];
@@ -417,6 +418,8 @@ zink_pipeline_flags_from_pipe_stage(enum pipe_shader_type pstage)
void
zink_rebind_all_buffers(struct zink_context *ctx);
void
zink_flush_memory_barrier(struct zink_context *ctx, bool is_compute);
void
zink_init_draw_functions(struct zink_context *ctx, struct zink_screen *screen);
void

View File

@@ -430,6 +430,7 @@ zink_draw_vbo(struct pipe_context *pctx,
unsigned work_count = ctx->batch.work_count;
enum pipe_prim_type mode = (enum pipe_prim_type)dinfo->mode;
zink_flush_memory_barrier(ctx, false);
update_barriers(ctx, false);
if (unlikely(ctx->buffer_rebind_counter < screen->buffer_rebind_counter)) {
@@ -756,6 +757,7 @@ zink_draw_vbo(struct pipe_context *pctx,
screen->vk.CmdEndTransformFeedbackEXT(batch->state->cmdbuf, 0, ctx->num_so_targets, counter_buffers, counter_buffer_offsets);
}
batch->has_work = true;
batch->last_was_compute = false;
ctx->batch.work_count = work_count;
/* flush if there's >100k draws */
if (unlikely(work_count >= 30000) || ctx->oom_flush)
@@ -771,6 +773,7 @@ zink_launch_grid(struct pipe_context *pctx, const struct pipe_grid_info *info)
struct zink_batch *batch = &ctx->batch;
update_barriers(ctx, true);
zink_flush_memory_barrier(ctx, true);
if (zink_program_has_descriptors(&ctx->curr_compute->base))
screen->descriptors_update(ctx, true);
@@ -804,6 +807,7 @@ zink_launch_grid(struct pipe_context *pctx, const struct pipe_grid_info *info)
} else
vkCmdDispatch(batch->state->cmdbuf, info->grid[0], info->grid[1], info->grid[2]);
batch->has_work = true;
batch->last_was_compute = true;
/* flush if there's >100k computes */
if (unlikely(ctx->batch.work_count >= 30000) || ctx->oom_flush)
pctx->flush(pctx, NULL, 0);