r300: nir fcsel/CMP lowering pass for R500

Right now this is done in the backend so move it up to NIR. Doing this
in the backend is easier, as at that time we can have a better idea
about when we hit the hardware limits of three different TMP sources,
however moving this to NIR allows for some optimizations. Specifically,
at this time if we decide we actually have to lower we still have the
info if we have plain fcsel for which we can save the comparison and
emit flrp only. During translation to TGSI all of fcsel, fcsel_gt, and
fcsel_ge translate to CMP so at that point the comparison is always needed.

Shader-db RV530:
total instructions in shared programs: 126057 -> 125823 (-0.19%)
instructions in affected programs: 11359 -> 11125 (-2.06%)
helped: 68
HURT: 12
total temps in shared programs: 17043 -> 17023 (-0.12%)
temps in affected programs: 459 -> 439 (-4.36%)
helped: 32
HURT: 12
total cycles in shared programs: 191604 -> 191294 (-0.16%)
cycles in affected programs: 11834 -> 11524 (-2.62%)
helped: 68
HURT: 12

The hurt shaders are some GTK shaders where there is some bad
interaction with nir_move_vec_src_uses_to_dest. This is known and might
be improved later by thweking the pass more.

Reviewed-by: Filip Gawin <filip.gawin@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26816>
This commit is contained in:
Pavel Ondračka
2023-09-21 11:59:07 +02:00
parent 75e7790ee5
commit 024491f60f
4 changed files with 140 additions and 0 deletions

View File

@@ -27,6 +27,7 @@
#include "compiler/nir/nir_worklist.h"
#include "nir_to_rc.h"
#include "r300_nir.h"
#include "r300_screen.h"
#include "pipe/p_screen.h"
#include "pipe/p_state.h"
#include "tgsi/tgsi_dump.h"
@@ -2372,6 +2373,7 @@ const void *nir_to_rc_options(struct nir_shader *s,
{
struct ntr_compile *c;
const void *tgsi_tokens;
bool is_r500 = r300_screen(screen)->caps.is_r500;
nir_variable_mode no_indirects_mask = ntr_no_indirects_mask(s, screen);
/* Lower array indexing on FS inputs. Since we don't set
@@ -2446,6 +2448,10 @@ const void *nir_to_rc_options(struct nir_shader *s,
!options->lower_cmp && !options->lower_fabs);
/* bool_to_float generates MOVs for b2f32 that we want to clean up. */
NIR_PASS_V(s, nir_copy_prop);
if (s->info.stage == MESA_SHADER_VERTEX) {
if (is_r500)
NIR_PASS_V(s, r300_nir_lower_fcsel_r500);
}
NIR_PASS_V(s, nir_opt_dce);
nir_move_options move_all =

View File

@@ -81,4 +81,6 @@ extern bool r300_nir_clean_double_fneg(struct nir_shader *shader);
extern bool r300_nir_post_integer_lowering(struct nir_shader *shader);
extern bool r300_nir_lower_fcsel_r500(nir_shader *shader);
#endif /* R300_NIR_H */

View File

@@ -0,0 +1,131 @@
#include <stdbool.h>
#include "r300_nir.h"
#include "nir_builder.h"
static int
follow_modifiers(nir_instr *instr)
{
/* We don't have texturing so the only other options besides alus are
* just load input, load ubo or phi. We can copy propagate the first two
* in most cases. The cases when the copy propagate is not guaranteed
* to work is with indirect ubo load and in the presence of control flow.
* So just be safe and count this as a separate tmp.
*/
if (instr->type == nir_instr_type_intrinsic) {
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
/* It should be enough to check if any of the uses is in phi. */
if (intrin->intrinsic == nir_intrinsic_load_ubo_vec4 ||
intrin->intrinsic == nir_intrinsic_load_constant ||
intrin->intrinsic == nir_intrinsic_load_input) {
nir_foreach_use(use, &intrin->def) {
if (nir_src_parent_instr(use)->type == nir_instr_type_phi)
return intrin->def.index;
}
}
if (intrin->intrinsic == nir_intrinsic_load_ubo_vec4 &&
!nir_src_is_const(intrin->src[1]))
return intrin->def.index;
}
/* Assume the worst when we see a phi. */
if (instr->type == nir_instr_type_phi)
return nir_instr_as_phi(instr)->def.index;
if (instr->type != nir_instr_type_alu)
return -1;
nir_alu_instr *alu = nir_instr_as_alu(instr);
if (alu->op == nir_op_fneg || alu->op == nir_op_fabs) {
return follow_modifiers(alu->src[0].src.ssa->parent_instr);
}
return alu->def.index;
}
static bool
has_three_different_tmp_sources(nir_alu_instr *fcsel)
{
unsigned src_def_index[3];
for (unsigned i = 0; i < 3; i++) {
int index = follow_modifiers(fcsel->src[i].src.ssa->parent_instr);
if (index == -1)
return false;
else
src_def_index[i] = index;
}
return src_def_index[0] != src_def_index[1] &&
src_def_index[0] != src_def_index[2] &&
src_def_index[1] != src_def_index[2];
}
static bool
is_comparison(nir_instr *instr)
{
if (instr->type != nir_instr_type_alu)
return false;
nir_alu_instr *alu = nir_instr_as_alu(instr);
switch (alu->op) {
case nir_op_sge:
case nir_op_slt:
case nir_op_seq:
case nir_op_sne:
return true;
default:
return false;
}
}
static bool
r300_nir_lower_fcsel_instr(nir_builder *b, nir_instr *instr, void *data)
{
if (instr->type != nir_instr_type_alu)
return false;
nir_alu_instr *alu = nir_instr_as_alu(instr);
if (alu->op != nir_op_fcsel && alu->op != nir_op_fcsel_ge && alu->op != nir_op_fcsel_gt)
return false;
if (has_three_different_tmp_sources(alu)) {
nir_def *lrp;
b->cursor = nir_before_instr(&alu->instr);
/* Lower to LRP.
* At this point there are no fcsels as all bcsels were converted to
* fcsel_gt by nir_lower_bool_to_float, however we can save on the slt
* even for nir_op_fcsel_gt if the source is 0 or 1 anyway.
*/
nir_instr *src0_instr = alu->src[0].src.ssa->parent_instr;
if (alu->op == nir_op_fcsel ||
(alu->op == nir_op_fcsel_gt && is_comparison(src0_instr))) {
lrp = nir_flrp(b, nir_ssa_for_alu_src(b, alu, 2),
nir_ssa_for_alu_src(b, alu, 1),
nir_ssa_for_alu_src(b, alu, 0));
} else if (alu->op == nir_op_fcsel_ge) {
nir_def *sge = nir_sge(b, nir_ssa_for_alu_src(b, alu, 0), nir_imm_float(b, 0.0));
lrp = nir_flrp(b, nir_ssa_for_alu_src(b, alu, 2),
nir_ssa_for_alu_src(b, alu, 1), sge);
} else {
nir_def *slt = nir_slt(b, nir_fneg(b, nir_ssa_for_alu_src(b, alu, 0)),
nir_imm_float(b, 0.0));
lrp = nir_flrp(b, nir_ssa_for_alu_src(b, alu, 2),
nir_ssa_for_alu_src(b, alu, 1), slt);
}
nir_def_rewrite_uses(&alu->def, lrp);
nir_instr_remove(&alu->instr);
return true;
}
return false;
}
bool
r300_nir_lower_fcsel_r500(nir_shader *shader)
{
bool progress = nir_shader_instructions_pass(shader,
r300_nir_lower_fcsel_instr,
nir_metadata_block_index |
nir_metadata_dominance,
NULL);
return progress;
}

View File

@@ -77,6 +77,7 @@ files_r300 = files(
'compiler/r500_fragprog.h',
'compiler/r300_nir.c',
'compiler/r300_nir.h',
'compiler/r500_nir_lower_fcsel.c',
'compiler/radeon_code.c',
'compiler/radeon_code.h',
'compiler/radeon_compiler.c',