radeonsi,radv/llvm: fix amdgpu-color/depth-export with epilogs

The main shader wouldn't use ac_build_export(), and the discard exit would
have no export.

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Fixes: 1174ab6d56 ("ac/llvm: use amdgpu-color-export/amdgpu-depth-export")
Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Tested-by: Mikhail Gavrilov <mikhail.v.gavrilov@gmail.com>
Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/7991
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20482>
This commit is contained in:
Rhys Perry
2023-01-02 18:05:14 +00:00
committed by Marge Bot
parent c68530bedb
commit 1825ad134b
7 changed files with 40 additions and 20 deletions

View File

@@ -59,7 +59,7 @@ void ac_llvm_context_init(struct ac_llvm_context *ctx, struct ac_llvm_compiler *
enum amd_gfx_level gfx_level, enum radeon_family family, enum amd_gfx_level gfx_level, enum radeon_family family,
bool has_3d_cube_border_color_mipmap, bool has_3d_cube_border_color_mipmap,
enum ac_float_mode float_mode, unsigned wave_size, enum ac_float_mode float_mode, unsigned wave_size,
unsigned ballot_mask_bits) unsigned ballot_mask_bits, bool exports_color_null, bool exports_mrtz)
{ {
ctx->context = LLVMContextCreate(); ctx->context = LLVMContextCreate();
@@ -69,6 +69,8 @@ void ac_llvm_context_init(struct ac_llvm_context *ctx, struct ac_llvm_compiler *
ctx->wave_size = wave_size; ctx->wave_size = wave_size;
ctx->ballot_mask_bits = ballot_mask_bits; ctx->ballot_mask_bits = ballot_mask_bits;
ctx->float_mode = float_mode; ctx->float_mode = float_mode;
ctx->exports_color_null = exports_color_null;
ctx->exports_mrtz = exports_mrtz;
ctx->module = ac_create_module(compiler->tm, ctx->context); ctx->module = ac_create_module(compiler->tm, ctx->context);
ctx->builder = ac_create_builder(ctx->context, float_mode); ctx->builder = ac_create_builder(ctx->context, float_mode);
@@ -2028,15 +2030,6 @@ void ac_build_export(struct ac_llvm_context *ctx, struct ac_export_args *a)
ac_build_intrinsic(ctx, "llvm.amdgcn.exp.f32", ctx->voidt, args, 8, 0); ac_build_intrinsic(ctx, "llvm.amdgcn.exp.f32", ctx->voidt, args, 8, 0);
} }
if (LLVM_VERSION_MAJOR >= 15 && a->target == V_008DFC_SQ_EXP_MRTZ) {
LLVMAddTargetDependentFunctionAttr(ctx->main_function.value, "amdgpu-depth-export", "1");
} else if (LLVM_VERSION_MAJOR >= 15 && a->target <= V_008DFC_SQ_EXP_NULL) {
/* We need this attribute even for NULL targets, so that an export is created for full-wave
* discards on GFX10+.
*/
LLVMAddTargetDependentFunctionAttr(ctx->main_function.value, "amdgpu-color-export", "1");
}
} }
void ac_build_export_null(struct ac_llvm_context *ctx, bool uses_discard) void ac_build_export_null(struct ac_llvm_context *ctx, bool uses_discard)
@@ -4630,8 +4623,10 @@ struct ac_llvm_pointer ac_build_main(const struct ac_shader_args *args, struct a
"preserve-sign,preserve-sign"); "preserve-sign,preserve-sign");
if (LLVM_VERSION_MAJOR >= 15 && convention == AC_LLVM_AMDGPU_PS) { if (LLVM_VERSION_MAJOR >= 15 && convention == AC_LLVM_AMDGPU_PS) {
LLVMAddTargetDependentFunctionAttr(main_function, "amdgpu-depth-export", "0"); LLVMAddTargetDependentFunctionAttr(main_function, "amdgpu-depth-export",
LLVMAddTargetDependentFunctionAttr(main_function, "amdgpu-color-export", "0"); ctx->exports_mrtz ? "1" : "0");
LLVMAddTargetDependentFunctionAttr(main_function, "amdgpu-color-export",
ctx->exports_color_null ? "1" : "0");
} }
return ctx->main_function; return ctx->main_function;

View File

@@ -156,6 +156,9 @@ struct ac_llvm_context {
unsigned float_mode; unsigned float_mode;
bool exports_color_null;
bool exports_mrtz;
struct ac_llvm_pointer lds; struct ac_llvm_pointer lds;
}; };
@@ -163,7 +166,7 @@ void ac_llvm_context_init(struct ac_llvm_context *ctx, struct ac_llvm_compiler *
enum amd_gfx_level gfx_level, enum radeon_family family, enum amd_gfx_level gfx_level, enum radeon_family family,
bool has_3d_cube_border_color_mipmap, bool has_3d_cube_border_color_mipmap,
enum ac_float_mode float_mode, unsigned wave_size, enum ac_float_mode float_mode, unsigned wave_size,
unsigned ballot_mask_bits); unsigned ballot_mask_bits, bool exports_color_null, bool exports_mrtz);
void ac_llvm_context_dispose(struct ac_llvm_context *ctx); void ac_llvm_context_dispose(struct ac_llvm_context *ctx);

View File

@@ -1243,9 +1243,16 @@ ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm,
float_mode = AC_FLOAT_MODE_DENORM_FLUSH_TO_ZERO; float_mode = AC_FLOAT_MODE_DENORM_FLUSH_TO_ZERO;
} }
bool exports_mrtz = false;
bool exports_color_null = false;
if (shaders[0]->info.stage == MESA_SHADER_FRAGMENT) {
exports_mrtz = info->ps.writes_z || info->ps.writes_stencil || info->ps.writes_sample_mask;
exports_color_null = !exports_mrtz || (shaders[0]->info.outputs_written & (0xffu << FRAG_RESULT_DATA0));
}
ac_llvm_context_init(&ctx.ac, ac_llvm, options->gfx_level, options->family, ac_llvm_context_init(&ctx.ac, ac_llvm, options->gfx_level, options->family,
options->has_3d_cube_border_color_mipmap, options->has_3d_cube_border_color_mipmap,
float_mode, info->wave_size, info->ballot_bit_size); float_mode, info->wave_size, info->ballot_bit_size, exports_color_null, exports_mrtz);
ctx.context = ctx.ac.context; ctx.context = ctx.ac.context;
ctx.max_workgroup_size = info->workgroup_size; ctx.max_workgroup_size = info->workgroup_size;

View File

@@ -2176,6 +2176,8 @@ si_get_shader_part(struct si_screen *sscreen, struct si_shader_part **list,
struct si_shader shader = {}; struct si_shader shader = {};
shader.selector = &sel; shader.selector = &sel;
bool wave32 = false; bool wave32 = false;
bool exports_color_null = false;
bool exports_mrtz = false;
switch (stage) { switch (stage) {
case MESA_SHADER_VERTEX: case MESA_SHADER_VERTEX:
@@ -2196,6 +2198,11 @@ si_get_shader_part(struct si_screen *sscreen, struct si_shader_part **list,
} else { } else {
shader.key.ps.part.epilog = key->ps_epilog.states; shader.key.ps.part.epilog = key->ps_epilog.states;
wave32 = key->ps_epilog.wave32; wave32 = key->ps_epilog.wave32;
exports_color_null = key->ps_epilog.colors_written;
exports_mrtz = key->ps_epilog.writes_z || key->ps_epilog.writes_stencil ||
key->ps_epilog.writes_samplemask;
if (!exports_mrtz && !exports_color_null)
exports_color_null = key->ps_epilog.uses_discard || sscreen->info.gfx_level < GFX10;
} }
break; break;
default: default:
@@ -2203,7 +2210,7 @@ si_get_shader_part(struct si_screen *sscreen, struct si_shader_part **list,
} }
struct si_shader_context ctx; struct si_shader_context ctx;
si_llvm_context_init(&ctx, sscreen, compiler, wave32 ? 32 : 64); si_llvm_context_init(&ctx, sscreen, compiler, wave32 ? 32 : 64, exports_color_null, exports_mrtz);
ctx.shader = &shader; ctx.shader = &shader;
ctx.stage = stage; ctx.stage = stage;

View File

@@ -199,7 +199,8 @@ bool si_compile_llvm(struct si_screen *sscreen, struct si_shader_binary *binary,
struct ac_llvm_context *ac, struct util_debug_callback *debug, struct ac_llvm_context *ac, struct util_debug_callback *debug,
gl_shader_stage stage, const char *name, bool less_optimized); gl_shader_stage stage, const char *name, bool less_optimized);
void si_llvm_context_init(struct si_shader_context *ctx, struct si_screen *sscreen, void si_llvm_context_init(struct si_shader_context *ctx, struct si_screen *sscreen,
struct ac_llvm_compiler *compiler, unsigned wave_size); struct ac_llvm_compiler *compiler, unsigned wave_size,
bool exports_color_null, bool exports_mrtz);
void si_llvm_create_func(struct si_shader_context *ctx, const char *name, LLVMTypeRef *return_types, void si_llvm_create_func(struct si_shader_context *ctx, const char *name, LLVMTypeRef *return_types,
unsigned num_return_elems, unsigned max_workgroup_size); unsigned num_return_elems, unsigned max_workgroup_size);
void si_llvm_create_main_func(struct si_shader_context *ctx); void si_llvm_create_main_func(struct si_shader_context *ctx);

View File

@@ -127,14 +127,16 @@ bool si_compile_llvm(struct si_screen *sscreen, struct si_shader_binary *binary,
} }
void si_llvm_context_init(struct si_shader_context *ctx, struct si_screen *sscreen, void si_llvm_context_init(struct si_shader_context *ctx, struct si_screen *sscreen,
struct ac_llvm_compiler *compiler, unsigned wave_size) struct ac_llvm_compiler *compiler, unsigned wave_size,
bool exports_color_null, bool exports_mrtz)
{ {
memset(ctx, 0, sizeof(*ctx)); memset(ctx, 0, sizeof(*ctx));
ctx->screen = sscreen; ctx->screen = sscreen;
ctx->compiler = compiler; ctx->compiler = compiler;
ac_llvm_context_init(&ctx->ac, compiler, sscreen->info.gfx_level, sscreen->info.family, ac_llvm_context_init(&ctx->ac, compiler, sscreen->info.gfx_level, sscreen->info.family,
sscreen->info.has_3d_cube_border_color_mipmap, AC_FLOAT_MODE_DEFAULT_OPENGL, wave_size, 64); sscreen->info.has_3d_cube_border_color_mipmap, AC_FLOAT_MODE_DEFAULT_OPENGL,
wave_size, 64, exports_color_null, exports_mrtz);
} }
void si_llvm_create_func(struct si_shader_context *ctx, const char *name, LLVMTypeRef *return_types, void si_llvm_create_func(struct si_shader_context *ctx, const char *name, LLVMTypeRef *return_types,
@@ -1101,7 +1103,12 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *
struct si_shader_selector *sel = shader->selector; struct si_shader_selector *sel = shader->selector;
struct si_shader_context ctx; struct si_shader_context ctx;
si_llvm_context_init(&ctx, sscreen, compiler, shader->wave_size); bool exports_color_null = sel->info.colors_written;
bool exports_mrtz = sel->info.writes_z || sel->info.writes_stencil || sel->info.writes_samplemask;
if (!exports_mrtz && !exports_color_null)
exports_color_null = si_shader_uses_discard(shader) || sscreen->info.gfx_level < GFX10;
si_llvm_context_init(&ctx, sscreen, compiler, shader->wave_size, exports_color_null, exports_mrtz);
ctx.so = *so; ctx.so = *so;
ctx.args = args; ctx.args = args;

View File

@@ -449,7 +449,7 @@ struct si_shader *si_generate_gs_copy_shader(struct si_screen *sscreen,
shader->info.vs_output_param_mask |= BITFIELD64_BIT(i); shader->info.vs_output_param_mask |= BITFIELD64_BIT(i);
} }
si_llvm_context_init(&ctx, sscreen, compiler, shader->wave_size); si_llvm_context_init(&ctx, sscreen, compiler, shader->wave_size, false, false);
ctx.shader = shader; ctx.shader = shader;
ctx.stage = MESA_SHADER_VERTEX; ctx.stage = MESA_SHADER_VERTEX;
ctx.so = *so; ctx.so = *so;