From 55d81214c9d800c0667337808a82143ebab17c1a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Mon, 20 Nov 2023 07:06:29 -0500 Subject: [PATCH] radeonsi: replace gl_FrontFacing with a constant if one side is always culled Reviewed-by: Pierre-Eric Pelloux-Prayer Part-of: --- .../drivers/radeonsi/si_nir_lower_abi.c | 5 ++++ src/gallium/drivers/radeonsi/si_shader.c | 6 +++- src/gallium/drivers/radeonsi/si_shader.h | 3 ++ src/gallium/drivers/radeonsi/si_state.c | 15 ++++++++-- src/gallium/drivers/radeonsi/si_state.h | 1 + .../drivers/radeonsi/si_state_shaders.cpp | 30 +++++++++++++++---- 6 files changed, 50 insertions(+), 10 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_nir_lower_abi.c b/src/gallium/drivers/radeonsi/si_nir_lower_abi.c index fdd29fdbbc5..e57d9590be6 100644 --- a/src/gallium/drivers/radeonsi/si_nir_lower_abi.c +++ b/src/gallium/drivers/radeonsi/si_nir_lower_abi.c @@ -587,6 +587,11 @@ static bool lower_intrinsic(nir_builder *b, nir_instr *instr, struct lower_abi_s case nir_intrinsic_load_alpha_reference_amd: replacement = ac_nir_load_arg(b, &args->ac, args->alpha_reference); break; + case nir_intrinsic_load_front_face: + if (!key->ps.opt.force_front_face_input) + return false; + replacement = nir_imm_bool(b, key->ps.opt.force_front_face_input == 1); + break; case nir_intrinsic_load_barycentric_optimize_amd: { nir_def *prim_mask = ac_nir_load_arg(b, &args->ac, args->ac.prim_mask); /* enabled when bit 31 is set */ diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 1f24c78a57e..89c4c7a93f1 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -2645,6 +2645,10 @@ si_set_spi_ps_input_config(struct si_shader *shader) const struct si_shader_info *info = &sel->info; const union si_shader_key *key = &shader->key; + /* TODO: This should be determined from the final NIR instead of the input NIR, + * otherwise LLVM will have a performance advantage here because it determines + * VGPR inputs for each shader variant after LLVM optimizations. + */ shader->config.spi_ps_input_ena = S_0286CC_PERSP_CENTER_ENA(info->uses_persp_center) | S_0286CC_PERSP_CENTROID_ENA(info->uses_persp_centroid) | @@ -2652,7 +2656,7 @@ si_set_spi_ps_input_config(struct si_shader *shader) S_0286CC_LINEAR_CENTER_ENA(info->uses_linear_center) | S_0286CC_LINEAR_CENTROID_ENA(info->uses_linear_centroid) | S_0286CC_LINEAR_SAMPLE_ENA(info->uses_linear_sample) | - S_0286CC_FRONT_FACE_ENA(info->uses_frontface) | + S_0286CC_FRONT_FACE_ENA(info->uses_frontface && !key->ps.opt.force_front_face_input) | S_0286CC_SAMPLE_COVERAGE_ENA(info->reads_samplemask) | S_0286CC_ANCILLARY_ENA(info->uses_sampleid || info->uses_layer_id); diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index edf054999be..f7dcc961c76 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -797,6 +797,9 @@ struct si_shader_key_ps { unsigned prefer_mono : 1; unsigned inline_uniforms:1; + /* This eliminates the FRONT_FACE input VGPR as well as shader code using it. */ + int force_front_face_input : 2; /* 0 = gl_FrontFacing, 1 = true, -1 = false */ + /* This must be kept last to limit the number of variants * depending only on the uniform values. */ diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 7671a66fcc4..2c633cf55e1 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -1061,6 +1061,14 @@ static void *si_create_rs_state(struct pipe_context *ctx, const struct pipe_rast } } + /* Force gl_FrontFacing to true or false if the other face is culled. */ + if (util_bitcount(state->cull_face) == 1) { + if (state->cull_face & PIPE_FACE_FRONT) + rs->force_front_face_input = -1; + else + rs->force_front_face_input = 1; + } + unsigned spi_interp_control_0 = S_0286D4_FLAT_SHADE_ENA(1) | S_0286D4_PNT_SPRITE_ENA(state->point_quad_rasterization) | @@ -1256,8 +1264,7 @@ static void si_bind_rs_state(struct pipe_context *ctx, void *state) if (old_rs->multisample_enable != rs->multisample_enable) si_ps_key_update_framebuffer_blend_rasterizer(sctx); - if (old_rs->two_side != rs->two_side || - old_rs->flatshade != rs->flatshade || + if (old_rs->flatshade != rs->flatshade || old_rs->clamp_fragment_color != rs->clamp_fragment_color) si_ps_key_update_rasterizer(sctx); @@ -1276,7 +1283,9 @@ static void si_bind_rs_state(struct pipe_context *ctx, void *state) old_rs->line_smooth != rs->line_smooth || old_rs->poly_smooth != rs->poly_smooth || old_rs->polygon_mode_is_points != rs->polygon_mode_is_points || - old_rs->poly_stipple_enable != rs->poly_stipple_enable) + old_rs->poly_stipple_enable != rs->poly_stipple_enable || + old_rs->two_side != rs->two_side || + old_rs->force_front_face_input != rs->force_front_face_input) si_vs_ps_key_update_rast_prim_smooth_stipple(sctx); /* Used by si_get_vs_key_outputs in si_update_shaders: */ diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index 9441459cf2c..8b8f5dcaf11 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -85,6 +85,7 @@ struct si_state_rasterizer { unsigned polygon_mode_is_points : 1; unsigned perpendicular_end_caps : 1; unsigned bottom_edge_rule : 1; + int force_front_face_input : 2; }; struct si_dsa_stencil_ref_part { diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.cpp b/src/gallium/drivers/radeonsi/si_state_shaders.cpp index e52f799fcc8..ef7672368eb 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.cpp +++ b/src/gallium/drivers/radeonsi/si_state_shaders.cpp @@ -1872,6 +1872,14 @@ static void si_shader_ps(struct si_screen *sscreen, struct si_shader *shader) assert(!shader->key.ps.part.prolog.force_linear_sample_interp || (!G_0286CC_LINEAR_CENTER_ENA(input_ena) && !G_0286CC_LINEAR_CENTROID_ENA(input_ena))); + /* color_two_side always enables FRONT_FACE. Since st/mesa disables two-side colors if the back + * face is culled, the only case when both color_two_side and force_front_face_input can be set + * is when the front face is culled (which means force_front_face_input == -1). + */ + assert(!shader->key.ps.opt.force_front_face_input || !G_0286CC_FRONT_FACE_ENA(input_ena) || + (shader->key.ps.part.prolog.color_two_side && + shader->key.ps.opt.force_front_face_input == -1)); + /* Validate cases when the optimizations are off (read as implications). */ assert(shader->key.ps.part.prolog.bc_optimize_for_persp || !G_0286CC_PERSP_CENTER_ENA(input_ena) || !G_0286CC_PERSP_CENTROID_ENA(input_ena)); @@ -2241,7 +2249,9 @@ void si_update_ps_inputs_read_or_disabled(struct si_context *sctx) void si_vs_ps_key_update_rast_prim_smooth_stipple(struct si_context *sctx) { struct si_shader_ctx_state *hw_vs = si_get_vs(sctx); - if (!hw_vs->cso) + struct si_shader_selector *ps = sctx->shader.ps.cso; + + if (!hw_vs->cso || !ps) return; struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; @@ -2249,33 +2259,44 @@ void si_vs_ps_key_update_rast_prim_smooth_stipple(struct si_context *sctx) union si_shader_key *ps_key = &sctx->shader.ps.key; bool old_kill_pointsize = vs_key->ge.opt.kill_pointsize; + bool old_color_two_side = ps_key->ps.part.prolog.color_two_side; bool old_poly_stipple = ps_key->ps.part.prolog.poly_stipple; bool old_poly_line_smoothing = ps_key->ps.mono.poly_line_smoothing; bool old_point_smoothing = ps_key->ps.mono.point_smoothing; + int old_force_front_face_input = ps_key->ps.opt.force_front_face_input; if (sctx->current_rast_prim == MESA_PRIM_POINTS) { vs_key->ge.opt.kill_pointsize = 0; + ps_key->ps.part.prolog.color_two_side = 0; ps_key->ps.part.prolog.poly_stipple = 0; ps_key->ps.mono.poly_line_smoothing = 0; ps_key->ps.mono.point_smoothing = rs->point_smooth; + ps_key->ps.opt.force_front_face_input = ps->info.uses_frontface; } else if (util_prim_is_lines(sctx->current_rast_prim)) { vs_key->ge.opt.kill_pointsize = hw_vs->cso->info.writes_psize; + ps_key->ps.part.prolog.color_two_side = 0; ps_key->ps.part.prolog.poly_stipple = 0; ps_key->ps.mono.poly_line_smoothing = rs->line_smooth && sctx->framebuffer.nr_samples <= 1; ps_key->ps.mono.point_smoothing = 0; + ps_key->ps.opt.force_front_face_input = ps->info.uses_frontface; } else { /* Triangles. */ vs_key->ge.opt.kill_pointsize = hw_vs->cso->info.writes_psize && !rs->polygon_mode_is_points; + ps_key->ps.part.prolog.color_two_side = rs->two_side && ps->info.colors_read; ps_key->ps.part.prolog.poly_stipple = rs->poly_stipple_enable; ps_key->ps.mono.poly_line_smoothing = rs->poly_smooth && sctx->framebuffer.nr_samples <= 1; ps_key->ps.mono.point_smoothing = 0; + ps_key->ps.opt.force_front_face_input = rs->force_front_face_input && + ps->info.uses_frontface; } if (vs_key->ge.opt.kill_pointsize != old_kill_pointsize || + ps_key->ps.part.prolog.color_two_side != old_color_two_side || ps_key->ps.part.prolog.poly_stipple != old_poly_stipple || ps_key->ps.mono.poly_line_smoothing != old_poly_line_smoothing || - ps_key->ps.mono.point_smoothing != old_point_smoothing) + ps_key->ps.mono.point_smoothing != old_point_smoothing || + ps_key->ps.opt.force_front_face_input != old_force_front_face_input) sctx->do_update_shaders = true; } @@ -2486,16 +2507,13 @@ void si_ps_key_update_rasterizer(struct si_context *sctx) if (!sel) return; - bool old_color_two_side = key->ps.part.prolog.color_two_side; bool old_flatshade_colors = key->ps.part.prolog.flatshade_colors; bool old_clamp_color = key->ps.part.epilog.clamp_color; - key->ps.part.prolog.color_two_side = rs->two_side && sel->info.colors_read; key->ps.part.prolog.flatshade_colors = rs->flatshade && sel->info.uses_interp_color; key->ps.part.epilog.clamp_color = rs->clamp_fragment_color; - if (key->ps.part.prolog.color_two_side != old_color_two_side || - key->ps.part.prolog.flatshade_colors != old_flatshade_colors || + if (key->ps.part.prolog.flatshade_colors != old_flatshade_colors || key->ps.part.epilog.clamp_color != old_clamp_color) sctx->do_update_shaders = true; }