radeonsi: remove the gl_SampleMask FS output if MSAA is disabled
It's better to remove the output than what the previous code did, which only unset MASK_EXPORT_ENABLE. Reviewed-by: Qiang Yu <yuq825@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23216>
This commit is contained in:
@@ -298,6 +298,7 @@ typedef struct {
|
||||
/* OpenGL only */
|
||||
bool clamp_color;
|
||||
bool alpha_to_one;
|
||||
bool kill_samplemask;
|
||||
enum pipe_compare_func alpha_func;
|
||||
unsigned broadcast_last_cbuf;
|
||||
|
||||
|
@@ -348,6 +348,8 @@ emit_ps_color_clamp_and_alpha_test(nir_builder *b, lower_ps_state *s)
|
||||
static void
|
||||
emit_ps_mrtz_export(nir_builder *b, lower_ps_state *s)
|
||||
{
|
||||
uint64_t outputs_written = b->shader->info.outputs_written;
|
||||
|
||||
nir_ssa_def *mrtz_alpha = NULL;
|
||||
if (s->options->alpha_to_coverage_via_mrtz) {
|
||||
mrtz_alpha = s->outputs[FRAG_RESULT_COLOR][3] ?
|
||||
@@ -359,11 +361,15 @@ emit_ps_mrtz_export(nir_builder *b, lower_ps_state *s)
|
||||
nir_ssa_def *stencil = s->outputs[FRAG_RESULT_STENCIL][0];
|
||||
nir_ssa_def *sample_mask = s->outputs[FRAG_RESULT_SAMPLE_MASK][0];
|
||||
|
||||
if (s->options->kill_samplemask) {
|
||||
sample_mask = NULL;
|
||||
outputs_written &= ~BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK);
|
||||
}
|
||||
|
||||
/* skip mrtz export if no one has written to any of them */
|
||||
if (!depth && !stencil && !sample_mask && !mrtz_alpha)
|
||||
return;
|
||||
|
||||
uint64_t outputs_written = b->shader->info.outputs_written;
|
||||
/* use outputs_written to determine export format as we use it to set
|
||||
* R_028710_SPI_SHADER_Z_FORMAT instead of relying on the real store output,
|
||||
* because store output may be optimized out.
|
||||
|
@@ -704,7 +704,7 @@ void si_init_shader_args(struct si_shader *shader, struct si_shader_args *args)
|
||||
num_returns =
|
||||
num_return_sgprs + util_bitcount(shader->selector->info.colors_written) * 4 +
|
||||
shader->selector->info.writes_z + shader->selector->info.writes_stencil +
|
||||
shader->selector->info.writes_samplemask + 1 /* SampleMaskIn */;
|
||||
shader->ps.writes_samplemask + 1 /* SampleMaskIn */;
|
||||
|
||||
for (i = 0; i < num_return_sgprs; i++)
|
||||
ac_add_return(&args->ac, AC_ARG_SGPR);
|
||||
@@ -1149,7 +1149,7 @@ void si_shader_dump_stats_for_shader_db(struct si_screen *screen, struct si_shad
|
||||
num_outputs = util_bitcount(shader->selector->info.colors_written) +
|
||||
(shader->selector->info.writes_z ||
|
||||
shader->selector->info.writes_stencil ||
|
||||
shader->selector->info.writes_samplemask);
|
||||
shader->ps.writes_samplemask);
|
||||
}
|
||||
|
||||
util_debug_message(debug, SHADER_INFO,
|
||||
@@ -1394,6 +1394,8 @@ static void si_dump_shader_key(const struct si_shader *shader, FILE *f)
|
||||
fprintf(f, " epilog.alpha_to_coverage_via_mrtz = %u\n", key->ps.part.epilog.alpha_to_coverage_via_mrtz);
|
||||
fprintf(f, " epilog.clamp_color = %u\n", key->ps.part.epilog.clamp_color);
|
||||
fprintf(f, " epilog.dual_src_blend_swizzle = %u\n", key->ps.part.epilog.dual_src_blend_swizzle);
|
||||
fprintf(f, " epilog.rbplus_depth_only_opt = %u\n", key->ps.part.epilog.rbplus_depth_only_opt);
|
||||
fprintf(f, " epilog.kill_samplemask = %u\n", key->ps.part.epilog.kill_samplemask);
|
||||
fprintf(f, " mono.poly_line_smoothing = %u\n", key->ps.mono.poly_line_smoothing);
|
||||
fprintf(f, " mono.point_smoothing = %u\n", key->ps.mono.point_smoothing);
|
||||
fprintf(f, " mono.interpolate_at_sample_force_center = %u\n",
|
||||
@@ -2232,6 +2234,7 @@ struct nir_shader *si_get_nir_shader(struct si_shader *shader,
|
||||
.alpha_to_one = key->ps.part.epilog.alpha_to_one,
|
||||
.alpha_func = key->ps.part.epilog.alpha_func,
|
||||
.broadcast_last_cbuf = key->ps.part.epilog.last_cbuf,
|
||||
.kill_samplemask = key->ps.part.epilog.kill_samplemask,
|
||||
|
||||
.bc_optimize_for_persp = key->ps.part.prolog.bc_optimize_for_persp,
|
||||
.bc_optimize_for_linear = key->ps.part.prolog.bc_optimize_for_linear,
|
||||
@@ -3076,7 +3079,8 @@ void si_get_ps_epilog_key(struct si_shader *shader, union si_shader_part_key *ke
|
||||
key->ps_epilog.color_types = info->output_color_types;
|
||||
key->ps_epilog.writes_z = info->writes_z;
|
||||
key->ps_epilog.writes_stencil = info->writes_stencil;
|
||||
key->ps_epilog.writes_samplemask = info->writes_samplemask;
|
||||
key->ps_epilog.writes_samplemask = info->writes_samplemask &&
|
||||
!shader->key.ps.part.epilog.kill_samplemask;
|
||||
key->ps_epilog.states = shader->key.ps.part.epilog;
|
||||
}
|
||||
|
||||
@@ -3153,6 +3157,11 @@ bool si_create_shader_variant(struct si_screen *sscreen, struct ac_llvm_compiler
|
||||
struct si_shader_selector *sel = shader->selector;
|
||||
struct si_shader *mainp = *si_get_main_shader_part(sel, &shader->key);
|
||||
|
||||
if (sel->stage == MESA_SHADER_FRAGMENT) {
|
||||
shader->ps.writes_samplemask = sel->info.writes_samplemask &&
|
||||
!shader->key.ps.part.epilog.kill_samplemask;
|
||||
}
|
||||
|
||||
/* LS, ES, VS are compiled on demand if the main part hasn't been
|
||||
* compiled for that stage.
|
||||
*
|
||||
|
@@ -606,6 +606,7 @@ struct si_ps_epilog_bits {
|
||||
unsigned clamp_color : 1;
|
||||
unsigned dual_src_blend_swizzle : 1; /* gfx11+ */
|
||||
unsigned rbplus_depth_only_opt:1;
|
||||
unsigned kill_samplemask:1;
|
||||
};
|
||||
|
||||
union si_shader_part_key {
|
||||
@@ -966,6 +967,7 @@ struct si_shader {
|
||||
unsigned cb_shader_mask;
|
||||
unsigned db_shader_control;
|
||||
unsigned num_interp;
|
||||
bool writes_samplemask;
|
||||
} ps;
|
||||
};
|
||||
|
||||
|
@@ -1042,11 +1042,15 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *
|
||||
struct si_shader_selector *sel = shader->selector;
|
||||
struct si_shader_context ctx;
|
||||
enum ac_float_mode float_mode = nir->info.stage == MESA_SHADER_KERNEL ? AC_FLOAT_MODE_DEFAULT : AC_FLOAT_MODE_DEFAULT_OPENGL;
|
||||
bool exports_color_null = false;
|
||||
bool exports_mrtz = false;
|
||||
|
||||
bool exports_color_null = sel->info.colors_written;
|
||||
bool exports_mrtz = sel->info.writes_z || sel->info.writes_stencil || sel->info.writes_samplemask;
|
||||
if (!exports_mrtz && !exports_color_null)
|
||||
exports_color_null = si_shader_uses_discard(shader) || sscreen->info.gfx_level < GFX10;
|
||||
if (sel->stage == MESA_SHADER_FRAGMENT) {
|
||||
exports_color_null = sel->info.colors_written;
|
||||
exports_mrtz = sel->info.writes_z || sel->info.writes_stencil || shader->ps.writes_samplemask;
|
||||
if (!exports_mrtz && !exports_color_null)
|
||||
exports_color_null = si_shader_uses_discard(shader) || sscreen->info.gfx_level < GFX10;
|
||||
}
|
||||
|
||||
si_llvm_context_init(&ctx, sscreen, compiler, shader->wave_size, exports_color_null, exports_mrtz,
|
||||
float_mode);
|
||||
|
@@ -1150,7 +1150,6 @@ static void si_bind_rs_state(struct pipe_context *ctx, void *state)
|
||||
rs = (struct si_state_rasterizer *)sctx->discard_rasterizer_state;
|
||||
|
||||
if (old_rs->multisample_enable != rs->multisample_enable) {
|
||||
si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state);
|
||||
si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config);
|
||||
|
||||
/* Update the small primitive filter workaround if necessary. */
|
||||
@@ -1507,7 +1506,6 @@ void si_restore_qbo_state(struct si_context *sctx, struct si_qbo_state *st)
|
||||
|
||||
static void si_emit_db_render_state(struct si_context *sctx)
|
||||
{
|
||||
struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
|
||||
unsigned db_shader_control, db_render_control, db_count_control, vrs_override_cntl = 0;
|
||||
|
||||
/* DB_RENDER_CONTROL */
|
||||
@@ -1575,10 +1573,6 @@ static void si_emit_db_render_state(struct si_context *sctx)
|
||||
|
||||
db_shader_control = sctx->ps_db_shader_control;
|
||||
|
||||
/* Disable the gl_SampleMask fragment shader output if MSAA is disabled. */
|
||||
if (!rs->multisample_enable)
|
||||
db_shader_control &= C_02880C_MASK_EXPORT_ENABLE;
|
||||
|
||||
if (sctx->screen->info.has_export_conflict_bug &&
|
||||
sctx->queued.named.blend->blend_enable_4bit &&
|
||||
si_get_num_coverage_samples(sctx) == 1) {
|
||||
|
@@ -1803,7 +1803,7 @@ static void si_shader_ps(struct si_screen *sscreen, struct si_shader *shader)
|
||||
/* DB_SHADER_CONTROL */
|
||||
shader->ps.db_shader_control = S_02880C_Z_EXPORT_ENABLE(info->writes_z) |
|
||||
S_02880C_STENCIL_TEST_VAL_EXPORT_ENABLE(info->writes_stencil) |
|
||||
S_02880C_MASK_EXPORT_ENABLE(info->writes_samplemask) |
|
||||
S_02880C_MASK_EXPORT_ENABLE(shader->ps.writes_samplemask) |
|
||||
S_02880C_KILL_ENABLE(si_shader_uses_discard(shader));
|
||||
|
||||
switch (info->base.fs.depth_layout) {
|
||||
@@ -1887,7 +1887,7 @@ static void si_shader_ps(struct si_screen *sscreen, struct si_shader *shader)
|
||||
shader->ps.spi_ps_input_addr = shader->config.spi_ps_input_addr;
|
||||
shader->ps.num_interp = si_get_ps_num_interp(shader);
|
||||
shader->ps.spi_shader_z_format =
|
||||
ac_get_spi_shader_z_format(info->writes_z, info->writes_stencil, info->writes_samplemask,
|
||||
ac_get_spi_shader_z_format(info->writes_z, info->writes_stencil, shader->ps.writes_samplemask,
|
||||
shader->key.ps.part.epilog.alpha_to_coverage_via_mrtz);
|
||||
|
||||
/* Ensure that some export memory is always allocated, for two reasons:
|
||||
@@ -1907,7 +1907,7 @@ static void si_shader_ps(struct si_screen *sscreen, struct si_shader *shader)
|
||||
*
|
||||
* RB+ depth-only rendering requires SPI_SHADER_32_R.
|
||||
*/
|
||||
bool has_mrtz = info->writes_z || info->writes_stencil || info->writes_samplemask;
|
||||
bool has_mrtz = info->writes_z || info->writes_stencil || shader->ps.writes_samplemask;
|
||||
|
||||
if (!shader->ps.spi_shader_col_format) {
|
||||
if (shader->key.ps.part.epilog.rbplus_depth_only_opt) {
|
||||
@@ -2217,6 +2217,13 @@ void si_ps_key_update_framebuffer_blend_rasterizer(struct si_context *sctx)
|
||||
sctx->gfx_level >= GFX11 && alpha_to_coverage &&
|
||||
(sel->info.writes_z || sel->info.writes_stencil || sel->info.writes_samplemask);
|
||||
|
||||
/* Remove the gl_SampleMask fragment shader output if MSAA is disabled.
|
||||
* This is required for correctness and it's also an optimization.
|
||||
*/
|
||||
key->ps.part.epilog.kill_samplemask = sel->info.writes_samplemask &&
|
||||
(sctx->framebuffer.nr_samples <= 1 ||
|
||||
!rs->multisample_enable);
|
||||
|
||||
/* If alpha-to-coverage isn't exported via MRTZ, set that we need to export alpha
|
||||
* through MRT0.
|
||||
*/
|
||||
|
Reference in New Issue
Block a user