ac/llvm: fix the remaining s_barriers for LLVM 15
LLVM 15 doesn't insert s_waitcnt before barriers. Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16304>
This commit is contained in:
@@ -4025,6 +4025,7 @@ void ac_build_wg_wavescan_bottom(struct ac_llvm_context *ctx, struct ac_wg_scan
|
||||
void ac_build_wg_wavescan(struct ac_llvm_context *ctx, struct ac_wg_scan *ws)
|
||||
{
|
||||
ac_build_wg_wavescan_top(ctx, ws);
|
||||
ac_build_waitcnt(ctx, AC_WAIT_LGKM);
|
||||
ac_build_s_barrier(ctx, ws->stage);
|
||||
ac_build_wg_wavescan_bottom(ctx, ws);
|
||||
}
|
||||
@@ -4087,6 +4088,7 @@ void ac_build_wg_scan_bottom(struct ac_llvm_context *ctx, struct ac_wg_scan *ws)
|
||||
void ac_build_wg_scan(struct ac_llvm_context *ctx, struct ac_wg_scan *ws)
|
||||
{
|
||||
ac_build_wg_scan_top(ctx, ws);
|
||||
ac_build_waitcnt(ctx, AC_WAIT_LGKM);
|
||||
ac_build_s_barrier(ctx, ws->stage);
|
||||
ac_build_wg_scan_bottom(ctx, ws);
|
||||
}
|
||||
|
@@ -1338,6 +1338,7 @@ handle_ngg_outputs_post_2(struct radv_shader_context *ctx)
|
||||
|
||||
if (ctx->stage == MESA_SHADER_VERTEX) {
|
||||
/* Wait for GS stores to finish. */
|
||||
ac_build_waitcnt(&ctx->ac, AC_WAIT_LGKM);
|
||||
ac_build_s_barrier(&ctx->ac, ctx->stage);
|
||||
|
||||
tmp = ac_build_gep0(&ctx->ac, ctx->esgs_ring, get_thread_id_in_tg(ctx));
|
||||
@@ -1384,6 +1385,7 @@ gfx10_ngg_gs_emit_prologue(struct radv_shader_context *ctx)
|
||||
LLVMBuildBr(ctx->ac.builder, merge_block);
|
||||
LLVMPositionBuilderAtEnd(ctx->ac.builder, merge_block);
|
||||
|
||||
ac_build_waitcnt(&ctx->ac, AC_WAIT_LGKM);
|
||||
ac_build_s_barrier(&ctx->ac, ctx->stage);
|
||||
}
|
||||
|
||||
@@ -1459,6 +1461,7 @@ gfx10_ngg_gs_emit_epilogue_2(struct radv_shader_context *ctx)
|
||||
LLVMBuilderRef builder = ctx->ac.builder;
|
||||
LLVMValueRef tmp, tmp2;
|
||||
|
||||
ac_build_waitcnt(&ctx->ac, AC_WAIT_LGKM);
|
||||
ac_build_s_barrier(&ctx->ac, ctx->stage);
|
||||
|
||||
const LLVMValueRef tid = get_thread_id_in_tg(ctx);
|
||||
@@ -1565,6 +1568,7 @@ gfx10_ngg_gs_emit_epilogue_2(struct radv_shader_context *ctx)
|
||||
}
|
||||
ac_build_endif(&ctx->ac, 5130);
|
||||
|
||||
ac_build_waitcnt(&ctx->ac, AC_WAIT_LGKM);
|
||||
ac_build_s_barrier(&ctx->ac, ctx->stage);
|
||||
|
||||
/* Export primitive data */
|
||||
@@ -2150,6 +2154,7 @@ ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm,
|
||||
* and contains a barrier, it will wait there and then
|
||||
* reach s_endpgm.
|
||||
*/
|
||||
ac_build_waitcnt(&ctx.ac, AC_WAIT_LGKM);
|
||||
ac_build_s_barrier(&ctx.ac, shaders[shader_idx]->info.stage);
|
||||
}
|
||||
|
||||
|
@@ -500,6 +500,7 @@ static void build_streamout(struct si_shader_context *ctx, struct ngg_streamout
|
||||
}
|
||||
}
|
||||
|
||||
ac_build_waitcnt(&ctx->ac, AC_WAIT_LGKM);
|
||||
ac_build_s_barrier(&ctx->ac, ctx->stage);
|
||||
|
||||
/* Fetch the per-buffer offsets and per-stream emit counts in all waves. */
|
||||
@@ -1023,6 +1024,8 @@ void gfx10_emit_ngg_culling_epilogue(struct ac_shader_abi *abi)
|
||||
builder, packed_data,
|
||||
ac_build_gep0(&ctx->ac, es_vtxptr, LLVMConstInt(ctx->ac.i32, lds_packed_data, 0)));
|
||||
ac_build_endif(&ctx->ac, ctx->merged_wrap_if_label);
|
||||
|
||||
ac_build_waitcnt(&ctx->ac, AC_WAIT_LGKM);
|
||||
ac_build_s_barrier(&ctx->ac, ctx->stage);
|
||||
|
||||
LLVMValueRef tid = ac_get_thread_id(&ctx->ac);
|
||||
@@ -1142,6 +1145,8 @@ void gfx10_emit_ngg_culling_epilogue(struct ac_shader_abi *abi)
|
||||
cull_primitive(ctx, pos, clipdist_accepted, gs_accepted, gs_vtxptr);
|
||||
}
|
||||
ac_build_endif(&ctx->ac, 16002);
|
||||
|
||||
ac_build_waitcnt(&ctx->ac, AC_WAIT_LGKM);
|
||||
ac_build_s_barrier(&ctx->ac, ctx->stage);
|
||||
|
||||
gs_accepted = LLVMBuildLoad(builder, gs_accepted, "");
|
||||
@@ -1172,6 +1177,7 @@ void gfx10_emit_ngg_culling_epilogue(struct ac_shader_abi *abi)
|
||||
}
|
||||
ac_build_endif(&ctx->ac, 16008);
|
||||
|
||||
ac_build_waitcnt(&ctx->ac, AC_WAIT_LGKM);
|
||||
ac_build_s_barrier(&ctx->ac, ctx->stage);
|
||||
|
||||
/* Load the vertex masks and compute the new ES thread count. */
|
||||
@@ -1263,6 +1269,8 @@ void gfx10_emit_ngg_culling_epilogue(struct ac_shader_abi *abi)
|
||||
ac_build_s_endpgm(&ctx->ac);
|
||||
}
|
||||
ac_build_endif(&ctx->ac, 19202);
|
||||
|
||||
ac_build_waitcnt(&ctx->ac, AC_WAIT_LGKM);
|
||||
ac_build_s_barrier(&ctx->ac, ctx->stage);
|
||||
|
||||
/* Send the final vertex and primitive counts. */
|
||||
@@ -1408,8 +1416,10 @@ void gfx10_emit_ngg_culling_epilogue(struct ac_shader_abi *abi)
|
||||
|
||||
/* These two also use LDS. */
|
||||
if (gfx10_ngg_writes_user_edgeflags(shader) ||
|
||||
(ctx->stage == MESA_SHADER_VERTEX && shader->key.ge.mono.u.vs_export_prim_id))
|
||||
(ctx->stage == MESA_SHADER_VERTEX && shader->key.ge.mono.u.vs_export_prim_id)) {
|
||||
ac_build_waitcnt(&ctx->ac, AC_WAIT_LGKM);
|
||||
ac_build_s_barrier(&ctx->ac, ctx->stage);
|
||||
}
|
||||
|
||||
ctx->return_value = ret;
|
||||
}
|
||||
@@ -1512,8 +1522,10 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi)
|
||||
assert(!unterminated_es_if_block);
|
||||
|
||||
/* Streamout already inserted the barrier, so don't insert it again. */
|
||||
if (!ctx->so.num_outputs)
|
||||
if (!ctx->so.num_outputs) {
|
||||
ac_build_waitcnt(&ctx->ac, AC_WAIT_LGKM);
|
||||
ac_build_s_barrier(&ctx->ac, ctx->stage);
|
||||
}
|
||||
|
||||
ac_build_ifcc(&ctx->ac, is_gs_thread, 5400);
|
||||
/* Load edge flags from ES threads and store them into VGPRs in GS threads. */
|
||||
@@ -1536,8 +1548,10 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi)
|
||||
assert(!unterminated_es_if_block);
|
||||
|
||||
/* Streamout and edge flags use LDS. Make it idle, so that we can reuse it. */
|
||||
if (ctx->so.num_outputs || gfx10_ngg_writes_user_edgeflags(ctx->shader))
|
||||
if (ctx->so.num_outputs || gfx10_ngg_writes_user_edgeflags(ctx->shader)) {
|
||||
ac_build_waitcnt(&ctx->ac, AC_WAIT_LGKM);
|
||||
ac_build_s_barrier(&ctx->ac, ctx->stage);
|
||||
}
|
||||
|
||||
ac_build_ifcc(&ctx->ac, is_gs_thread, 5400);
|
||||
/* Extract the PROVOKING_VTX_INDEX field. */
|
||||
@@ -1630,7 +1644,8 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi)
|
||||
outputs[i].vertex_streams = 0;
|
||||
|
||||
if (ctx->stage == MESA_SHADER_VERTEX) {
|
||||
/* Wait for GS stores to finish. */
|
||||
/* Wait for LDS stores to finish. */
|
||||
ac_build_waitcnt(&ctx->ac, AC_WAIT_LGKM);
|
||||
ac_build_s_barrier(&ctx->ac, ctx->stage);
|
||||
|
||||
tmp = ngg_nogs_vertex_ptr(ctx, gfx10_get_thread_id_in_tg(ctx));
|
||||
@@ -1862,6 +1877,7 @@ void gfx10_ngg_gs_emit_prologue(struct si_shader_context *ctx)
|
||||
}
|
||||
ac_build_endif(&ctx->ac, 15090);
|
||||
|
||||
ac_build_waitcnt(&ctx->ac, AC_WAIT_LGKM);
|
||||
ac_build_s_barrier(&ctx->ac, ctx->stage);
|
||||
}
|
||||
|
||||
@@ -1926,6 +1942,7 @@ void gfx10_ngg_gs_emit_epilogue(struct si_shader_context *ctx)
|
||||
|
||||
ac_build_endif(&ctx->ac, ctx->merged_wrap_if_label);
|
||||
|
||||
ac_build_waitcnt(&ctx->ac, AC_WAIT_LGKM);
|
||||
ac_build_s_barrier(&ctx->ac, ctx->stage);
|
||||
|
||||
const LLVMValueRef tid = gfx10_get_thread_id_in_tg(ctx);
|
||||
@@ -2003,8 +2020,10 @@ void gfx10_ngg_gs_emit_epilogue(struct si_shader_context *ctx)
|
||||
LLVMValueRef prim_enable = LLVMBuildAnd(builder, live, is_emit, "");
|
||||
|
||||
/* Wait for streamout to finish before we kill primitives. */
|
||||
if (ctx->so.num_outputs)
|
||||
if (ctx->so.num_outputs) {
|
||||
ac_build_waitcnt(&ctx->ac, AC_WAIT_LGKM);
|
||||
ac_build_s_barrier(&ctx->ac, ctx->stage);
|
||||
}
|
||||
|
||||
ac_build_ifcc(&ctx->ac, prim_enable, 0);
|
||||
{
|
||||
@@ -2062,6 +2081,8 @@ void gfx10_ngg_gs_emit_epilogue(struct si_shader_context *ctx)
|
||||
ac_build_endif(&ctx->ac, 0);
|
||||
}
|
||||
ac_build_endif(&ctx->ac, 0);
|
||||
|
||||
ac_build_waitcnt(&ctx->ac, AC_WAIT_LGKM);
|
||||
ac_build_s_barrier(&ctx->ac, ctx->stage);
|
||||
}
|
||||
|
||||
@@ -2131,6 +2152,7 @@ void gfx10_ngg_gs_emit_epilogue(struct si_shader_context *ctx)
|
||||
}
|
||||
ac_build_endif(&ctx->ac, 5130);
|
||||
|
||||
ac_build_waitcnt(&ctx->ac, AC_WAIT_LGKM);
|
||||
ac_build_s_barrier(&ctx->ac, ctx->stage);
|
||||
|
||||
/* Export primitive data */
|
||||
|
@@ -1015,10 +1015,13 @@ bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shader *shad
|
||||
/* We need the barrier only if TCS inputs are read from LDS. */
|
||||
if (!shader->key.ge.opt.same_patch_vertices ||
|
||||
shader->selector->info.base.inputs_read &
|
||||
~shader->selector->info.tcs_vgpr_only_inputs)
|
||||
~shader->selector->info.tcs_vgpr_only_inputs) {
|
||||
ac_build_waitcnt(&ctx->ac, AC_WAIT_LGKM);
|
||||
ac_build_s_barrier(&ctx->ac, ctx->stage);
|
||||
}
|
||||
} else if (ctx->stage == MESA_SHADER_GEOMETRY && !shader->key.ge.as_ngg) {
|
||||
/* gfx10_ngg_gs_emit_prologue inserts the barrier for NGG. */
|
||||
ac_build_waitcnt(&ctx->ac, AC_WAIT_LGKM);
|
||||
ac_build_s_barrier(&ctx->ac, ctx->stage);
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user