diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp index f0176106de2..6bb753ed356 100644 --- a/src/amd/compiler/aco_optimizer.cpp +++ b/src/amd/compiler/aco_optimizer.cpp @@ -2990,6 +2990,21 @@ apply_sgprs(opt_ctx& ctx, aco_ptr& instr) } } +bool +interp_can_become_fma(opt_ctx& ctx, aco_ptr& instr) +{ + if (instr->opcode != aco_opcode::v_interp_p2_f32_inreg) + return false; + + instr->opcode = aco_opcode::v_fma_f32; + instr->format = Format::VOP3; + bool dpp_allowed = can_use_DPP(ctx.program->gfx_level, instr, false); + instr->opcode = aco_opcode::v_interp_p2_f32_inreg; + instr->format = Format::VINTERP_INREG; + + return dpp_allowed; +} + void interp_p2_f32_inreg_to_fma_dpp(aco_ptr& instr) { @@ -3020,9 +3035,8 @@ apply_omod_clamp(opt_ctx& ctx, aco_ptr& instr) return false; /* SDWA omod is GFX9+. */ - bool can_use_omod = - (can_vop3 || ctx.program->gfx_level >= GFX9) && !instr->isVOP3P() && - (!instr->isVINTERP_INREG() || instr->opcode == aco_opcode::v_interp_p2_f32_inreg); + bool can_use_omod = (can_vop3 || ctx.program->gfx_level >= GFX9) && !instr->isVOP3P() && + (!instr->isVINTERP_INREG() || interp_can_become_fma(ctx, instr)); ssa_info& def_info = ctx.info[instr->definitions[0].tempId()]; @@ -3581,7 +3595,7 @@ combine_output_conversion(opt_ctx& ctx, aco_ptr& instr) if (conv->usesModifiers()) return false; - if (instr->opcode == aco_opcode::v_interp_p2_f32_inreg) + if (interp_can_become_fma(ctx, instr)) interp_p2_f32_inreg_to_fma_dpp(instr); if (!can_use_mad_mix(ctx, instr))