From ad5fbc440767ee020ebf761bd7f3aaba0895c92d Mon Sep 17 00:00:00 2001 From: Georg Lehmann Date: Thu, 8 Feb 2024 14:51:25 +0100 Subject: [PATCH] aco: use fmamk/ak instead of fma with inline constant for more VOPD MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Foz-DB navi31, forced wave32: Totals from 24438 (31.29% of 78112) affected shaders: Instrs: 21632788 -> 21551766 (-0.37%); split: -0.38%, +0.01% CodeSize: 126181860 -> 126083848 (-0.08%); split: -0.10%, +0.02% Latency: 162491062 -> 162516234 (+0.02%); split: -0.05%, +0.07% InvThroughput: 31121194 -> 31002125 (-0.38%); split: -0.40%, +0.02% VClause: 420176 -> 420169 (-0.00%); split: -0.00%, +0.00% SClause: 791844 -> 791762 (-0.01%); split: -0.01%, +0.00% Reviewed-by: Daniel Schürmann Part-of: --- src/amd/compiler/aco_optimizer_postRA.cpp | 35 +++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/src/amd/compiler/aco_optimizer_postRA.cpp b/src/amd/compiler/aco_optimizer_postRA.cpp index cee1fd0cb57..25be6300ee3 100644 --- a/src/amd/compiler/aco_optimizer_postRA.cpp +++ b/src/amd/compiler/aco_optimizer_postRA.cpp @@ -708,6 +708,39 @@ try_reassign_split_vector(pr_opt_ctx& ctx, aco_ptr& instr) } } +void +try_convert_fma_to_vop2(pr_opt_ctx& ctx, aco_ptr& instr) +{ + /* We convert v_fma_f32 with inline constant to fmamk/fmaak. + * This is only benefical if it allows more VOPD. + */ + if (ctx.program->gfx_level < GFX11 || ctx.program->wave_size != 32 || + instr->opcode != aco_opcode::v_fma_f32 || instr->usesModifiers()) + return; + + int constant_idx = -1; + int vgpr_idx = -1; + for (int i = 0; i < 3; i++) { + const Operand& op = instr->operands[i]; + if (op.isConstant() && !op.isLiteral()) + constant_idx = i; + else if (op.isOfType(RegType::vgpr)) + vgpr_idx = i; + else + return; + } + + if (constant_idx < 0 || vgpr_idx < 0) + return; + + std::swap(instr->operands[constant_idx], instr->operands[2]); + if (constant_idx == 0 || vgpr_idx == 0) + std::swap(instr->operands[0], instr->operands[1]); + instr->operands[2] = Operand::literal32(instr->operands[2].constantValue()); + instr->opcode = constant_idx == 2 ? aco_opcode::v_fmaak_f32 : aco_opcode::v_fmamk_f32; + instr->format = Format::VOP2; +} + void process_instruction(pr_opt_ctx& ctx, aco_ptr& instr) { @@ -726,6 +759,8 @@ process_instruction(pr_opt_ctx& ctx, aco_ptr& instr) try_reassign_split_vector(ctx, instr); + try_convert_fma_to_vop2(ctx, instr); + if (instr) save_reg_writes(ctx, instr);