From 4194774edf5998e3e0126ab1b058278da978fdaa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Thu, 22 Aug 2024 13:29:08 -0400 Subject: [PATCH] radeonsi: move barriers out of si_launch_grid_internal_ssbos Reviewed-by: Pierre-Eric Pelloux-Prayer Part-of: --- src/gallium/drivers/radeonsi/gfx11_query.c | 11 +++++++--- .../drivers/radeonsi/si_compute_blit.c | 20 +++++++++++++++---- src/gallium/drivers/radeonsi/si_query.c | 8 +++++++- 3 files changed, 31 insertions(+), 8 deletions(-) diff --git a/src/gallium/drivers/radeonsi/gfx11_query.c b/src/gallium/drivers/radeonsi/gfx11_query.c index ef33b4b2e40..154f492d6cc 100644 --- a/src/gallium/drivers/radeonsi/gfx11_query.c +++ b/src/gallium/drivers/radeonsi/gfx11_query.c @@ -392,9 +392,14 @@ static void gfx11_sh_query_get_result_resource(struct si_context *sctx, struct s /* ssbo[2] is either tmp_buffer or resource */ assert(ssbo[2].buffer); - si_launch_grid_internal_ssbos(sctx, &grid, sctx->sh_query_result_shader, - SI_OP_SYNC_PS_BEFORE | SI_OP_SYNC_AFTER, - 3, ssbo, (1 << 2) | (ssbo[1].buffer ? 1 << 1 : 0)); + + unsigned op_flags = SI_OP_SYNC_PS_BEFORE | SI_OP_SYNC_AFTER; + unsigned writable_bitmask = (1 << 2) | (ssbo[1].buffer ? 1 << 1 : 0); + + si_barrier_before_internal_op(sctx, op_flags, 3, ssbo, writable_bitmask, 0, NULL); + si_launch_grid_internal_ssbos(sctx, &grid, sctx->sh_query_result_shader, op_flags, 3, ssbo, + writable_bitmask); + si_barrier_after_internal_op(sctx, op_flags, 3, ssbo, writable_bitmask, 0, NULL); if (qbuf == query->last) break; diff --git a/src/gallium/drivers/radeonsi/si_compute_blit.c b/src/gallium/drivers/radeonsi/si_compute_blit.c index 52597247856..a7bdf5cc3ba 100644 --- a/src/gallium/drivers/radeonsi/si_compute_blit.c +++ b/src/gallium/drivers/radeonsi/si_compute_blit.c @@ -223,11 +223,9 @@ void si_launch_grid_internal_ssbos(struct si_context *sctx, struct pipe_grid_inf writeable_bitmask, true /* don't update bind_history to prevent unnecessary syncs later */); - si_barrier_before_internal_op(sctx, flags, num_buffers, buffers, writeable_bitmask, 0, NULL); si_compute_begin_internal(sctx, flags); si_launch_grid_internal(sctx, info, shader); si_compute_end_internal(sctx); - si_barrier_after_internal_op(sctx, flags, num_buffers, buffers, writeable_bitmask, 0, NULL); /* Restore states. */ sctx->b.set_shader_buffers(&sctx->b, PIPE_SHADER_COMPUTE, 0, num_buffers, saved_sb, @@ -284,7 +282,9 @@ void si_compute_clear_buffer_rmw(struct si_context *sctx, struct pipe_resource * if (!sctx->cs_clear_buffer_rmw) sctx->cs_clear_buffer_rmw = si_create_clear_buffer_rmw_cs(sctx); + si_barrier_before_internal_op(sctx, flags, 1, &sb, 0x1, 0, NULL); si_launch_grid_internal_ssbos(sctx, &info, sctx->cs_clear_buffer_rmw, flags, 1, &sb, 0x1); + si_barrier_after_internal_op(sctx, flags, 1, &sb, 0x1, 0, NULL); } /** @@ -356,8 +356,12 @@ bool si_compute_clear_copy_buffer(struct si_context *sctx, struct pipe_resource struct pipe_grid_info grid = {}; set_work_size(&grid, dispatch.workgroup_size, 1, 1, dispatch.num_threads, 1, 1); + unsigned writable_bitmask = is_copy ? 0x2 : 0x1; + + si_barrier_before_internal_op(sctx, flags, dispatch.num_ssbos, sb, writable_bitmask, 0, NULL); si_launch_grid_internal_ssbos(sctx, &grid, shader, flags, dispatch.num_ssbos, sb, - is_copy ? 0x2 : 0x1); + writable_bitmask); + si_barrier_after_internal_op(sctx, flags, dispatch.num_ssbos, sb, writable_bitmask, 0, NULL); return true; } @@ -455,7 +459,9 @@ void si_compute_shorten_ubyte_buffer(struct si_context *sctx, struct pipe_resour sb[1].buffer_offset = src_offset; sb[1].buffer_size = count; + si_barrier_before_internal_op(sctx, flags, 2, sb, 0x1, 0, NULL); si_launch_grid_internal_ssbos(sctx, &info, sctx->cs_ubyte_to_ushort, flags, 2, sb, 0x1); + si_barrier_after_internal_op(sctx, flags, 2, sb, 0x1, 0, NULL); } static void si_compute_save_and_bind_images(struct si_context *sctx, unsigned num_images, @@ -540,7 +546,11 @@ void si_retile_dcc(struct si_context *sctx, struct si_texture *tex) struct pipe_grid_info info = {}; set_work_size(&info, 8, 8, 1, width, height, 1); - si_launch_grid_internal_ssbos(sctx, &info, *shader, SI_OP_SYNC_BEFORE, 1, &sb, 0x1); + unsigned flags = SI_OP_SYNC_BEFORE; + + si_barrier_before_internal_op(sctx, flags, 1, &sb, 0x1, 0, NULL); + si_launch_grid_internal_ssbos(sctx, &info, *shader, flags, 1, &sb, 0x1); + si_barrier_after_internal_op(sctx, flags, 1, &sb, 0x1, 0, NULL); /* Don't flush caches. L2 will be flushed by the kernel fence. */ } @@ -585,7 +595,9 @@ void gfx9_clear_dcc_msaa(struct si_context *sctx, struct pipe_resource *res, uin struct pipe_grid_info info = {}; set_work_size(&info, 8, 8, 1, width, height, depth); + si_barrier_before_internal_op(sctx, flags, 1, &sb, 0x1, 0, NULL); si_launch_grid_internal_ssbos(sctx, &info, *shader, flags, 1, &sb, 0x1); + si_barrier_after_internal_op(sctx, flags, 1, &sb, 0x1, 0, NULL); } /* Expand FMASK to make it identity, so that image stores can ignore it. */ diff --git a/src/gallium/drivers/radeonsi/si_query.c b/src/gallium/drivers/radeonsi/si_query.c index 316a7bfa83e..5376b88ef95 100644 --- a/src/gallium/drivers/radeonsi/si_query.c +++ b/src/gallium/drivers/radeonsi/si_query.c @@ -1644,8 +1644,14 @@ static void si_query_hw_get_result_resource(struct si_context *sctx, struct si_q si_cp_wait_mem(sctx, &sctx->gfx_cs, va, 0x80000000, 0x80000000, WAIT_REG_MEM_EQUAL); } + + unsigned op_flags = SI_OP_SYNC_AFTER; + unsigned writable_bitmask = 0x4; + + si_barrier_before_internal_op(sctx, op_flags, 3, ssbo, writable_bitmask, 0, NULL); si_launch_grid_internal_ssbos(sctx, &grid, sctx->query_result_shader, - SI_OP_SYNC_AFTER, 3, ssbo, 0x4); + op_flags, 3, ssbo, writable_bitmask); + si_barrier_after_internal_op(sctx, op_flags, 3, ssbo, writable_bitmask, 0, NULL); } si_restore_qbo_state(sctx, &saved_state);