aco/gfx11: support vinterp as fma_mix

Totals from 718 (0.94% of 76572) affected shaders:
Instrs: 657897 -> 654219 (-0.56%)
CodeSize: 3471668 -> 3457352 (-0.41%); split: -0.41%, +0.00%
VGPRs: 34200 -> 34164 (-0.11%)
Latency: 11687698 -> 11677030 (-0.09%); split: -0.10%, +0.00%
InvThroughput: 1455371 -> 1451537 (-0.26%); split: -0.26%, +0.00%
VClause: 7598 -> 7600 (+0.03%)
SClause: 18293 -> 18241 (-0.28%); split: -0.44%, +0.15%
Copies: 34641 -> 34644 (+0.01%); split: -0.05%, +0.06%

Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25220>
This commit is contained in:
Georg Lehmann
2023-09-14 13:25:07 +02:00
committed by Marge Bot
parent 7d7657ef74
commit 7b4f0e714c
2 changed files with 13 additions and 4 deletions

View File

@@ -3993,15 +3993,18 @@ combine_output_conversion(opt_ctx& ctx, aco_ptr<Instruction>& instr)
return false; return false;
Instruction* conv = def_info.instr; Instruction* conv = def_info.instr;
if (!can_use_mad_mix(ctx, instr) || ctx.uses[instr->definitions[0].tempId()] != 1) if (!ctx.uses[conv->definitions[0].tempId()] || ctx.uses[instr->definitions[0].tempId()] != 1)
return false;
if (!ctx.uses[conv->definitions[0].tempId()])
return false; return false;
if (conv->usesModifiers()) if (conv->usesModifiers())
return false; return false;
if (instr->opcode == aco_opcode::v_interp_p2_f32_inreg)
interp_p2_f32_inreg_to_fma_dpp(instr);
if (!can_use_mad_mix(ctx, instr))
return false;
if (!instr->isVOP3P()) if (!instr->isVOP3P())
to_mad_mix(ctx, instr); to_mad_mix(ctx, instr);

View File

@@ -2225,5 +2225,11 @@ BEGIN_TEST(optimize.vinterp_inreg_output_modifiers)
tmp = bld.vop2(aco_opcode::v_mul_f16, bld.def(v2b), Operand::c16(0x4000u), tmp); tmp = bld.vop2(aco_opcode::v_mul_f16, bld.def(v2b), Operand::c16(0x4000u), tmp);
writeout(3, tmp); writeout(3, tmp);
//! v2b: %res4 = v_fma_mixlo_f16 %c, %b, %a quad_perm:[2,2,2,2] fi
//! p_unit_test 4, %res4
tmp = bld.vinterp_inreg(aco_opcode::v_interp_p2_f32_inreg, bld.def(v1), inputs[2], inputs[1],
inputs[0]);
writeout(4, f2f16(tmp));
finish_opt_test(); finish_opt_test();
END_TEST END_TEST