diff --git a/src/amd/compiler/aco_instruction_selection_setup.cpp b/src/amd/compiler/aco_instruction_selection_setup.cpp index 3aef36e91be..c23f59e30a9 100644 --- a/src/amd/compiler/aco_instruction_selection_setup.cpp +++ b/src/amd/compiler/aco_instruction_selection_setup.cpp @@ -884,6 +884,21 @@ setup_nir(isel_context *ctx, nir_shader *nir) NIR_PASS(more_algebraic, nir, nir_opt_algebraic); } + /* Do late algebraic optimization to turn add(a, neg(b)) back into + * subs, then the mandatory cleanup after algebraic. Note that it may + * produce fnegs, and if so then we need to keep running to squash + * fneg(fneg(a)). + */ + bool more_late_algebraic = true; + while (more_late_algebraic) { + more_late_algebraic = false; + NIR_PASS(more_late_algebraic, nir, nir_opt_algebraic_late); + NIR_PASS_V(nir, nir_opt_constant_folding); + NIR_PASS_V(nir, nir_copy_prop); + NIR_PASS_V(nir, nir_opt_dce); + NIR_PASS_V(nir, nir_opt_cse); + } + /* cleanup passes */ nir_lower_load_const_to_scalar(nir); nir_opt_shrink_load(nir);