radeonsi,radv/llvm: fix amdgpu-color/depth-export with epilogs
The main shader wouldn't use ac_build_export(), and the discard exit would
have no export.
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Fixes: 1174ab6d56
("ac/llvm: use amdgpu-color-export/amdgpu-depth-export")
Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Tested-by: Mikhail Gavrilov <mikhail.v.gavrilov@gmail.com>
Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/7991
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20482>
This commit is contained in:
@@ -59,7 +59,7 @@ void ac_llvm_context_init(struct ac_llvm_context *ctx, struct ac_llvm_compiler *
|
|||||||
enum amd_gfx_level gfx_level, enum radeon_family family,
|
enum amd_gfx_level gfx_level, enum radeon_family family,
|
||||||
bool has_3d_cube_border_color_mipmap,
|
bool has_3d_cube_border_color_mipmap,
|
||||||
enum ac_float_mode float_mode, unsigned wave_size,
|
enum ac_float_mode float_mode, unsigned wave_size,
|
||||||
unsigned ballot_mask_bits)
|
unsigned ballot_mask_bits, bool exports_color_null, bool exports_mrtz)
|
||||||
{
|
{
|
||||||
ctx->context = LLVMContextCreate();
|
ctx->context = LLVMContextCreate();
|
||||||
|
|
||||||
@@ -69,6 +69,8 @@ void ac_llvm_context_init(struct ac_llvm_context *ctx, struct ac_llvm_compiler *
|
|||||||
ctx->wave_size = wave_size;
|
ctx->wave_size = wave_size;
|
||||||
ctx->ballot_mask_bits = ballot_mask_bits;
|
ctx->ballot_mask_bits = ballot_mask_bits;
|
||||||
ctx->float_mode = float_mode;
|
ctx->float_mode = float_mode;
|
||||||
|
ctx->exports_color_null = exports_color_null;
|
||||||
|
ctx->exports_mrtz = exports_mrtz;
|
||||||
ctx->module = ac_create_module(compiler->tm, ctx->context);
|
ctx->module = ac_create_module(compiler->tm, ctx->context);
|
||||||
ctx->builder = ac_create_builder(ctx->context, float_mode);
|
ctx->builder = ac_create_builder(ctx->context, float_mode);
|
||||||
|
|
||||||
@@ -2028,15 +2030,6 @@ void ac_build_export(struct ac_llvm_context *ctx, struct ac_export_args *a)
|
|||||||
|
|
||||||
ac_build_intrinsic(ctx, "llvm.amdgcn.exp.f32", ctx->voidt, args, 8, 0);
|
ac_build_intrinsic(ctx, "llvm.amdgcn.exp.f32", ctx->voidt, args, 8, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (LLVM_VERSION_MAJOR >= 15 && a->target == V_008DFC_SQ_EXP_MRTZ) {
|
|
||||||
LLVMAddTargetDependentFunctionAttr(ctx->main_function.value, "amdgpu-depth-export", "1");
|
|
||||||
} else if (LLVM_VERSION_MAJOR >= 15 && a->target <= V_008DFC_SQ_EXP_NULL) {
|
|
||||||
/* We need this attribute even for NULL targets, so that an export is created for full-wave
|
|
||||||
* discards on GFX10+.
|
|
||||||
*/
|
|
||||||
LLVMAddTargetDependentFunctionAttr(ctx->main_function.value, "amdgpu-color-export", "1");
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void ac_build_export_null(struct ac_llvm_context *ctx, bool uses_discard)
|
void ac_build_export_null(struct ac_llvm_context *ctx, bool uses_discard)
|
||||||
@@ -4630,8 +4623,10 @@ struct ac_llvm_pointer ac_build_main(const struct ac_shader_args *args, struct a
|
|||||||
"preserve-sign,preserve-sign");
|
"preserve-sign,preserve-sign");
|
||||||
|
|
||||||
if (LLVM_VERSION_MAJOR >= 15 && convention == AC_LLVM_AMDGPU_PS) {
|
if (LLVM_VERSION_MAJOR >= 15 && convention == AC_LLVM_AMDGPU_PS) {
|
||||||
LLVMAddTargetDependentFunctionAttr(main_function, "amdgpu-depth-export", "0");
|
LLVMAddTargetDependentFunctionAttr(main_function, "amdgpu-depth-export",
|
||||||
LLVMAddTargetDependentFunctionAttr(main_function, "amdgpu-color-export", "0");
|
ctx->exports_mrtz ? "1" : "0");
|
||||||
|
LLVMAddTargetDependentFunctionAttr(main_function, "amdgpu-color-export",
|
||||||
|
ctx->exports_color_null ? "1" : "0");
|
||||||
}
|
}
|
||||||
|
|
||||||
return ctx->main_function;
|
return ctx->main_function;
|
||||||
|
@@ -156,6 +156,9 @@ struct ac_llvm_context {
|
|||||||
|
|
||||||
unsigned float_mode;
|
unsigned float_mode;
|
||||||
|
|
||||||
|
bool exports_color_null;
|
||||||
|
bool exports_mrtz;
|
||||||
|
|
||||||
struct ac_llvm_pointer lds;
|
struct ac_llvm_pointer lds;
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -163,7 +166,7 @@ void ac_llvm_context_init(struct ac_llvm_context *ctx, struct ac_llvm_compiler *
|
|||||||
enum amd_gfx_level gfx_level, enum radeon_family family,
|
enum amd_gfx_level gfx_level, enum radeon_family family,
|
||||||
bool has_3d_cube_border_color_mipmap,
|
bool has_3d_cube_border_color_mipmap,
|
||||||
enum ac_float_mode float_mode, unsigned wave_size,
|
enum ac_float_mode float_mode, unsigned wave_size,
|
||||||
unsigned ballot_mask_bits);
|
unsigned ballot_mask_bits, bool exports_color_null, bool exports_mrtz);
|
||||||
|
|
||||||
void ac_llvm_context_dispose(struct ac_llvm_context *ctx);
|
void ac_llvm_context_dispose(struct ac_llvm_context *ctx);
|
||||||
|
|
||||||
|
@@ -1243,9 +1243,16 @@ ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm,
|
|||||||
float_mode = AC_FLOAT_MODE_DENORM_FLUSH_TO_ZERO;
|
float_mode = AC_FLOAT_MODE_DENORM_FLUSH_TO_ZERO;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool exports_mrtz = false;
|
||||||
|
bool exports_color_null = false;
|
||||||
|
if (shaders[0]->info.stage == MESA_SHADER_FRAGMENT) {
|
||||||
|
exports_mrtz = info->ps.writes_z || info->ps.writes_stencil || info->ps.writes_sample_mask;
|
||||||
|
exports_color_null = !exports_mrtz || (shaders[0]->info.outputs_written & (0xffu << FRAG_RESULT_DATA0));
|
||||||
|
}
|
||||||
|
|
||||||
ac_llvm_context_init(&ctx.ac, ac_llvm, options->gfx_level, options->family,
|
ac_llvm_context_init(&ctx.ac, ac_llvm, options->gfx_level, options->family,
|
||||||
options->has_3d_cube_border_color_mipmap,
|
options->has_3d_cube_border_color_mipmap,
|
||||||
float_mode, info->wave_size, info->ballot_bit_size);
|
float_mode, info->wave_size, info->ballot_bit_size, exports_color_null, exports_mrtz);
|
||||||
ctx.context = ctx.ac.context;
|
ctx.context = ctx.ac.context;
|
||||||
|
|
||||||
ctx.max_workgroup_size = info->workgroup_size;
|
ctx.max_workgroup_size = info->workgroup_size;
|
||||||
|
@@ -2176,6 +2176,8 @@ si_get_shader_part(struct si_screen *sscreen, struct si_shader_part **list,
|
|||||||
struct si_shader shader = {};
|
struct si_shader shader = {};
|
||||||
shader.selector = &sel;
|
shader.selector = &sel;
|
||||||
bool wave32 = false;
|
bool wave32 = false;
|
||||||
|
bool exports_color_null = false;
|
||||||
|
bool exports_mrtz = false;
|
||||||
|
|
||||||
switch (stage) {
|
switch (stage) {
|
||||||
case MESA_SHADER_VERTEX:
|
case MESA_SHADER_VERTEX:
|
||||||
@@ -2196,6 +2198,11 @@ si_get_shader_part(struct si_screen *sscreen, struct si_shader_part **list,
|
|||||||
} else {
|
} else {
|
||||||
shader.key.ps.part.epilog = key->ps_epilog.states;
|
shader.key.ps.part.epilog = key->ps_epilog.states;
|
||||||
wave32 = key->ps_epilog.wave32;
|
wave32 = key->ps_epilog.wave32;
|
||||||
|
exports_color_null = key->ps_epilog.colors_written;
|
||||||
|
exports_mrtz = key->ps_epilog.writes_z || key->ps_epilog.writes_stencil ||
|
||||||
|
key->ps_epilog.writes_samplemask;
|
||||||
|
if (!exports_mrtz && !exports_color_null)
|
||||||
|
exports_color_null = key->ps_epilog.uses_discard || sscreen->info.gfx_level < GFX10;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
@@ -2203,7 +2210,7 @@ si_get_shader_part(struct si_screen *sscreen, struct si_shader_part **list,
|
|||||||
}
|
}
|
||||||
|
|
||||||
struct si_shader_context ctx;
|
struct si_shader_context ctx;
|
||||||
si_llvm_context_init(&ctx, sscreen, compiler, wave32 ? 32 : 64);
|
si_llvm_context_init(&ctx, sscreen, compiler, wave32 ? 32 : 64, exports_color_null, exports_mrtz);
|
||||||
|
|
||||||
ctx.shader = &shader;
|
ctx.shader = &shader;
|
||||||
ctx.stage = stage;
|
ctx.stage = stage;
|
||||||
|
@@ -199,7 +199,8 @@ bool si_compile_llvm(struct si_screen *sscreen, struct si_shader_binary *binary,
|
|||||||
struct ac_llvm_context *ac, struct util_debug_callback *debug,
|
struct ac_llvm_context *ac, struct util_debug_callback *debug,
|
||||||
gl_shader_stage stage, const char *name, bool less_optimized);
|
gl_shader_stage stage, const char *name, bool less_optimized);
|
||||||
void si_llvm_context_init(struct si_shader_context *ctx, struct si_screen *sscreen,
|
void si_llvm_context_init(struct si_shader_context *ctx, struct si_screen *sscreen,
|
||||||
struct ac_llvm_compiler *compiler, unsigned wave_size);
|
struct ac_llvm_compiler *compiler, unsigned wave_size,
|
||||||
|
bool exports_color_null, bool exports_mrtz);
|
||||||
void si_llvm_create_func(struct si_shader_context *ctx, const char *name, LLVMTypeRef *return_types,
|
void si_llvm_create_func(struct si_shader_context *ctx, const char *name, LLVMTypeRef *return_types,
|
||||||
unsigned num_return_elems, unsigned max_workgroup_size);
|
unsigned num_return_elems, unsigned max_workgroup_size);
|
||||||
void si_llvm_create_main_func(struct si_shader_context *ctx);
|
void si_llvm_create_main_func(struct si_shader_context *ctx);
|
||||||
|
@@ -127,14 +127,16 @@ bool si_compile_llvm(struct si_screen *sscreen, struct si_shader_binary *binary,
|
|||||||
}
|
}
|
||||||
|
|
||||||
void si_llvm_context_init(struct si_shader_context *ctx, struct si_screen *sscreen,
|
void si_llvm_context_init(struct si_shader_context *ctx, struct si_screen *sscreen,
|
||||||
struct ac_llvm_compiler *compiler, unsigned wave_size)
|
struct ac_llvm_compiler *compiler, unsigned wave_size,
|
||||||
|
bool exports_color_null, bool exports_mrtz)
|
||||||
{
|
{
|
||||||
memset(ctx, 0, sizeof(*ctx));
|
memset(ctx, 0, sizeof(*ctx));
|
||||||
ctx->screen = sscreen;
|
ctx->screen = sscreen;
|
||||||
ctx->compiler = compiler;
|
ctx->compiler = compiler;
|
||||||
|
|
||||||
ac_llvm_context_init(&ctx->ac, compiler, sscreen->info.gfx_level, sscreen->info.family,
|
ac_llvm_context_init(&ctx->ac, compiler, sscreen->info.gfx_level, sscreen->info.family,
|
||||||
sscreen->info.has_3d_cube_border_color_mipmap, AC_FLOAT_MODE_DEFAULT_OPENGL, wave_size, 64);
|
sscreen->info.has_3d_cube_border_color_mipmap, AC_FLOAT_MODE_DEFAULT_OPENGL,
|
||||||
|
wave_size, 64, exports_color_null, exports_mrtz);
|
||||||
}
|
}
|
||||||
|
|
||||||
void si_llvm_create_func(struct si_shader_context *ctx, const char *name, LLVMTypeRef *return_types,
|
void si_llvm_create_func(struct si_shader_context *ctx, const char *name, LLVMTypeRef *return_types,
|
||||||
@@ -1101,7 +1103,12 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *
|
|||||||
struct si_shader_selector *sel = shader->selector;
|
struct si_shader_selector *sel = shader->selector;
|
||||||
struct si_shader_context ctx;
|
struct si_shader_context ctx;
|
||||||
|
|
||||||
si_llvm_context_init(&ctx, sscreen, compiler, shader->wave_size);
|
bool exports_color_null = sel->info.colors_written;
|
||||||
|
bool exports_mrtz = sel->info.writes_z || sel->info.writes_stencil || sel->info.writes_samplemask;
|
||||||
|
if (!exports_mrtz && !exports_color_null)
|
||||||
|
exports_color_null = si_shader_uses_discard(shader) || sscreen->info.gfx_level < GFX10;
|
||||||
|
|
||||||
|
si_llvm_context_init(&ctx, sscreen, compiler, shader->wave_size, exports_color_null, exports_mrtz);
|
||||||
ctx.so = *so;
|
ctx.so = *so;
|
||||||
ctx.args = args;
|
ctx.args = args;
|
||||||
|
|
||||||
|
@@ -449,7 +449,7 @@ struct si_shader *si_generate_gs_copy_shader(struct si_screen *sscreen,
|
|||||||
shader->info.vs_output_param_mask |= BITFIELD64_BIT(i);
|
shader->info.vs_output_param_mask |= BITFIELD64_BIT(i);
|
||||||
}
|
}
|
||||||
|
|
||||||
si_llvm_context_init(&ctx, sscreen, compiler, shader->wave_size);
|
si_llvm_context_init(&ctx, sscreen, compiler, shader->wave_size, false, false);
|
||||||
ctx.shader = shader;
|
ctx.shader = shader;
|
||||||
ctx.stage = MESA_SHADER_VERTEX;
|
ctx.stage = MESA_SHADER_VERTEX;
|
||||||
ctx.so = *so;
|
ctx.so = *so;
|
||||||
|
Reference in New Issue
Block a user