diff --git a/src/gallium/drivers/r300/compiler/nir_to_rc.c b/src/gallium/drivers/r300/compiler/nir_to_rc.c index 18b9f17272d..a54cd0c2f0d 100644 --- a/src/gallium/drivers/r300/compiler/nir_to_rc.c +++ b/src/gallium/drivers/r300/compiler/nir_to_rc.c @@ -27,6 +27,7 @@ #include "compiler/nir/nir_worklist.h" #include "nir_to_rc.h" #include "r300_nir.h" +#include "r300_screen.h" #include "pipe/p_screen.h" #include "pipe/p_state.h" #include "tgsi/tgsi_dump.h" @@ -2372,6 +2373,7 @@ const void *nir_to_rc_options(struct nir_shader *s, { struct ntr_compile *c; const void *tgsi_tokens; + bool is_r500 = r300_screen(screen)->caps.is_r500; nir_variable_mode no_indirects_mask = ntr_no_indirects_mask(s, screen); /* Lower array indexing on FS inputs. Since we don't set @@ -2446,6 +2448,10 @@ const void *nir_to_rc_options(struct nir_shader *s, !options->lower_cmp && !options->lower_fabs); /* bool_to_float generates MOVs for b2f32 that we want to clean up. */ NIR_PASS_V(s, nir_copy_prop); + if (s->info.stage == MESA_SHADER_VERTEX) { + if (is_r500) + NIR_PASS_V(s, r300_nir_lower_fcsel_r500); + } NIR_PASS_V(s, nir_opt_dce); nir_move_options move_all = diff --git a/src/gallium/drivers/r300/compiler/r300_nir.h b/src/gallium/drivers/r300/compiler/r300_nir.h index fcf815f3c41..b546a35077d 100644 --- a/src/gallium/drivers/r300/compiler/r300_nir.h +++ b/src/gallium/drivers/r300/compiler/r300_nir.h @@ -81,4 +81,6 @@ extern bool r300_nir_clean_double_fneg(struct nir_shader *shader); extern bool r300_nir_post_integer_lowering(struct nir_shader *shader); +extern bool r300_nir_lower_fcsel_r500(nir_shader *shader); + #endif /* R300_NIR_H */ diff --git a/src/gallium/drivers/r300/compiler/r500_nir_lower_fcsel.c b/src/gallium/drivers/r300/compiler/r500_nir_lower_fcsel.c new file mode 100644 index 00000000000..ac280870850 --- /dev/null +++ b/src/gallium/drivers/r300/compiler/r500_nir_lower_fcsel.c @@ -0,0 +1,131 @@ +#include +#include "r300_nir.h" +#include "nir_builder.h" + +static int +follow_modifiers(nir_instr *instr) +{ + /* We don't have texturing so the only other options besides alus are + * just load input, load ubo or phi. We can copy propagate the first two + * in most cases. The cases when the copy propagate is not guaranteed + * to work is with indirect ubo load and in the presence of control flow. + * So just be safe and count this as a separate tmp. + */ + if (instr->type == nir_instr_type_intrinsic) { + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + /* It should be enough to check if any of the uses is in phi. */ + if (intrin->intrinsic == nir_intrinsic_load_ubo_vec4 || + intrin->intrinsic == nir_intrinsic_load_constant || + intrin->intrinsic == nir_intrinsic_load_input) { + nir_foreach_use(use, &intrin->def) { + if (nir_src_parent_instr(use)->type == nir_instr_type_phi) + return intrin->def.index; + } + } + if (intrin->intrinsic == nir_intrinsic_load_ubo_vec4 && + !nir_src_is_const(intrin->src[1])) + return intrin->def.index; + } + /* Assume the worst when we see a phi. */ + if (instr->type == nir_instr_type_phi) + return nir_instr_as_phi(instr)->def.index; + + if (instr->type != nir_instr_type_alu) + return -1; + + nir_alu_instr *alu = nir_instr_as_alu(instr); + + if (alu->op == nir_op_fneg || alu->op == nir_op_fabs) { + return follow_modifiers(alu->src[0].src.ssa->parent_instr); + } + return alu->def.index; +} + +static bool +has_three_different_tmp_sources(nir_alu_instr *fcsel) +{ + unsigned src_def_index[3]; + for (unsigned i = 0; i < 3; i++) { + int index = follow_modifiers(fcsel->src[i].src.ssa->parent_instr); + if (index == -1) + return false; + else + src_def_index[i] = index; + } + return src_def_index[0] != src_def_index[1] && + src_def_index[0] != src_def_index[2] && + src_def_index[1] != src_def_index[2]; +} + +static bool +is_comparison(nir_instr *instr) +{ + if (instr->type != nir_instr_type_alu) + return false; + + nir_alu_instr *alu = nir_instr_as_alu(instr); + + switch (alu->op) { + case nir_op_sge: + case nir_op_slt: + case nir_op_seq: + case nir_op_sne: + return true; + default: + return false; + } +} + +static bool +r300_nir_lower_fcsel_instr(nir_builder *b, nir_instr *instr, void *data) +{ + if (instr->type != nir_instr_type_alu) + return false; + + nir_alu_instr *alu = nir_instr_as_alu(instr); + + if (alu->op != nir_op_fcsel && alu->op != nir_op_fcsel_ge && alu->op != nir_op_fcsel_gt) + return false; + + if (has_three_different_tmp_sources(alu)) { + nir_def *lrp; + b->cursor = nir_before_instr(&alu->instr); + /* Lower to LRP. + * At this point there are no fcsels as all bcsels were converted to + * fcsel_gt by nir_lower_bool_to_float, however we can save on the slt + * even for nir_op_fcsel_gt if the source is 0 or 1 anyway. + */ + nir_instr *src0_instr = alu->src[0].src.ssa->parent_instr; + if (alu->op == nir_op_fcsel || + (alu->op == nir_op_fcsel_gt && is_comparison(src0_instr))) { + lrp = nir_flrp(b, nir_ssa_for_alu_src(b, alu, 2), + nir_ssa_for_alu_src(b, alu, 1), + nir_ssa_for_alu_src(b, alu, 0)); + } else if (alu->op == nir_op_fcsel_ge) { + nir_def *sge = nir_sge(b, nir_ssa_for_alu_src(b, alu, 0), nir_imm_float(b, 0.0)); + lrp = nir_flrp(b, nir_ssa_for_alu_src(b, alu, 2), + nir_ssa_for_alu_src(b, alu, 1), sge); + } else { + nir_def *slt = nir_slt(b, nir_fneg(b, nir_ssa_for_alu_src(b, alu, 0)), + nir_imm_float(b, 0.0)); + lrp = nir_flrp(b, nir_ssa_for_alu_src(b, alu, 2), + nir_ssa_for_alu_src(b, alu, 1), slt); + } + + nir_def_rewrite_uses(&alu->def, lrp); + nir_instr_remove(&alu->instr); + return true; + } + return false; +} + +bool +r300_nir_lower_fcsel_r500(nir_shader *shader) +{ + bool progress = nir_shader_instructions_pass(shader, + r300_nir_lower_fcsel_instr, + nir_metadata_block_index | + nir_metadata_dominance, + NULL); + return progress; +} diff --git a/src/gallium/drivers/r300/meson.build b/src/gallium/drivers/r300/meson.build index ec675f4ade8..e47aea43e2e 100644 --- a/src/gallium/drivers/r300/meson.build +++ b/src/gallium/drivers/r300/meson.build @@ -77,6 +77,7 @@ files_r300 = files( 'compiler/r500_fragprog.h', 'compiler/r300_nir.c', 'compiler/r300_nir.h', + 'compiler/r500_nir_lower_fcsel.c', 'compiler/radeon_code.c', 'compiler/radeon_code.h', 'compiler/radeon_compiler.c',