From bdf3797aeb77a8346c1af6d86f77a7a834cca777 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sun, 15 May 2022 02:17:20 -0400 Subject: [PATCH] ac,radeonsi: don't export null from PS if it has no effect on gfx10+ We just need to pass the uses_discard flag to the epilog. The hw skips the export anyway. This will hang if SPI registers declare an output format or KILL_ENABLE is set because those cases require an export with done=1. Reviewed-by: Pierre-Eric Pelloux-Prayer Part-of: --- src/amd/llvm/ac_llvm_build.c | 8 +++++++- src/amd/llvm/ac_llvm_build.h | 2 +- src/amd/vulkan/radv_nir_to_llvm.c | 2 +- src/gallium/drivers/radeonsi/si_shader.c | 1 + src/gallium/drivers/radeonsi/si_shader.h | 9 +++++++++ src/gallium/drivers/radeonsi/si_shader_llvm_ps.c | 2 +- src/gallium/drivers/radeonsi/si_state_shaders.cpp | 5 +---- 7 files changed, 21 insertions(+), 8 deletions(-) diff --git a/src/amd/llvm/ac_llvm_build.c b/src/amd/llvm/ac_llvm_build.c index 9401ed27a52..100ed9fa251 100644 --- a/src/amd/llvm/ac_llvm_build.c +++ b/src/amd/llvm/ac_llvm_build.c @@ -1996,10 +1996,16 @@ void ac_build_export(struct ac_llvm_context *ctx, struct ac_export_args *a) } } -void ac_build_export_null(struct ac_llvm_context *ctx) +void ac_build_export_null(struct ac_llvm_context *ctx, bool uses_discard) { struct ac_export_args args; + /* Gfx10+ doesn't need to export anything if we don't need to export the EXEC mask + * for discard. + */ + if (ctx->gfx_level >= GFX10 && !uses_discard) + return; + args.enabled_channels = 0x0; /* enabled channels */ args.valid_mask = 1; /* whether the EXEC mask is valid */ args.done = 1; /* DONE bit */ diff --git a/src/amd/llvm/ac_llvm_build.h b/src/amd/llvm/ac_llvm_build.h index 0b871e78fc5..afe01985c16 100644 --- a/src/amd/llvm/ac_llvm_build.h +++ b/src/amd/llvm/ac_llvm_build.h @@ -342,7 +342,7 @@ struct ac_export_args { void ac_build_export(struct ac_llvm_context *ctx, struct ac_export_args *a); -void ac_build_export_null(struct ac_llvm_context *ctx); +void ac_build_export_null(struct ac_llvm_context *ctx, bool uses_discard); enum ac_image_opcode { diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c index 8a5c2d783e0..9aaf6287edf 100644 --- a/src/amd/vulkan/radv_nir_to_llvm.c +++ b/src/amd/vulkan/radv_nir_to_llvm.c @@ -1797,7 +1797,7 @@ handle_fs_outputs_post(struct radv_shader_context *ctx) if (depth || stencil || samplemask) radv_export_mrt_z(ctx, depth, stencil, samplemask); else if (!index) - ac_build_export_null(&ctx->ac); + ac_build_export_null(&ctx->ac, ctx->shader_info->ps.can_discard); } static void diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 0fb7abd37a3..68c61ec4305 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -2222,6 +2222,7 @@ void si_get_ps_epilog_key(struct si_shader *shader, union si_shader_part_key *ke struct si_shader_info *info = &shader->selector->info; memset(key, 0, sizeof(*key)); key->ps_epilog.wave32 = shader->wave_size == 32; + key->ps_epilog.uses_discard = si_shader_uses_discard(shader); key->ps_epilog.colors_written = info->colors_written; key->ps_epilog.color_types = info->output_color_types; key->ps_epilog.writes_z = info->writes_z; diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 2eee541dfc1..c86e2f27f2a 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -642,6 +642,7 @@ union si_shader_part_key { struct { struct si_ps_epilog_bits states; unsigned wave32 : 1; + unsigned uses_discard : 1; unsigned colors_written : 8; unsigned color_types : 16; unsigned writes_z : 1; @@ -1053,6 +1054,14 @@ static inline bool si_shader_uses_streamout(struct si_shader *shader) !shader->key.ge.opt.remove_streamout; } +static inline bool si_shader_uses_discard(struct si_shader *shader) +{ + /* Changes to this should also update ps_modifies_zs. */ + return shader->selector->info.base.fs.uses_discard || + shader->key.ps.part.prolog.poly_stipple || + shader->key.ps.part.epilog.alpha_func != PIPE_FUNC_ALWAYS; +} + #ifdef __cplusplus } #endif diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_ps.c b/src/gallium/drivers/radeonsi/si_shader_llvm_ps.c index a3f4c7fd184..282c32ac094 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm_ps.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm_ps.c @@ -946,7 +946,7 @@ void si_llvm_build_ps_epilog(struct si_shader_context *ctx, union si_shader_part for (unsigned i = 0; i < exp.num; i++) ac_build_export(&ctx->ac, &exp.args[i]); } else { - ac_build_export_null(&ctx->ac); + ac_build_export_null(&ctx->ac, key->ps_epilog.uses_discard); } /* Compile. */ diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.cpp b/src/gallium/drivers/radeonsi/si_state_shaders.cpp index fc2bbb727f9..d2901c61939 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.cpp +++ b/src/gallium/drivers/radeonsi/si_state_shaders.cpp @@ -1871,10 +1871,7 @@ static void si_shader_ps(struct si_screen *sscreen, struct si_shader *shader) S_02880C_Z_EXPORT_ENABLE(info->writes_z) | S_02880C_STENCIL_TEST_VAL_EXPORT_ENABLE(info->writes_stencil) | S_02880C_MASK_EXPORT_ENABLE(info->writes_samplemask) | - /* Changes KILL_ENABLE should also update ps_modifies_zs. */ - S_02880C_KILL_ENABLE(info->base.fs.uses_discard || - shader->key.ps.part.prolog.poly_stipple || - shader->key.ps.part.epilog.alpha_func != PIPE_FUNC_ALWAYS); + S_02880C_KILL_ENABLE(si_shader_uses_discard(shader)); switch (info->base.fs.depth_layout) { case FRAG_DEPTH_LAYOUT_GREATER: