From 254b178d5bb66e30b5566858e6450e8d0acb32f3 Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Fri, 11 Nov 2022 19:38:38 +0000 Subject: [PATCH] aco: disallow SGPRS/constants with interpolation instructions https://reviews.llvm.org/D137575 The VINTRP format cannot encode anything except VGPRs. Reading VINTERPInstructions.td, looks like it's the same for GFX11. No fossil-db changes. Signed-off-by: Rhys Perry Reviewed-by: Georg Lehmann Part-of: --- src/amd/compiler/aco_optimizer.cpp | 31 ++++++++++++++++++++--- src/amd/compiler/tests/test_assembler.cpp | 26 +++++++++---------- 2 files changed, 41 insertions(+), 16 deletions(-) diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp index c4bcbdd2a60..54d6a333686 100644 --- a/src/amd/compiler/aco_optimizer.cpp +++ b/src/amd/compiler/aco_optimizer.cpp @@ -597,6 +597,7 @@ pseudo_propagate_temp(opt_ctx& ctx, aco_ptr& instr, Temp temp, unsi bool can_apply_sgprs(opt_ctx& ctx, aco_ptr& instr) { + assert(instr->isVALU()); if (instr->isSDWA() && ctx.program->gfx_level < GFX9) return false; return instr->opcode != aco_opcode::v_readfirstlane_b32 && @@ -605,7 +606,20 @@ can_apply_sgprs(opt_ctx& ctx, aco_ptr& instr) instr->opcode != aco_opcode::v_writelane_b32 && instr->opcode != aco_opcode::v_writelane_b32_e64 && instr->opcode != aco_opcode::v_permlane16_b32 && - instr->opcode != aco_opcode::v_permlanex16_b32; + instr->opcode != aco_opcode::v_permlanex16_b32 && + instr->opcode != aco_opcode::v_interp_p1_f32 && + instr->opcode != aco_opcode::v_interp_p2_f32 && + instr->opcode != aco_opcode::v_interp_mov_f32 && + instr->opcode != aco_opcode::v_interp_p1ll_f16 && + instr->opcode != aco_opcode::v_interp_p1lv_f16 && + instr->opcode != aco_opcode::v_interp_p2_legacy_f16 && + instr->opcode != aco_opcode::v_interp_p2_f16 && + instr->opcode != aco_opcode::v_interp_p10_f32_inreg && + instr->opcode != aco_opcode::v_interp_p2_f32_inreg && + instr->opcode != aco_opcode::v_interp_p10_f16_f32_inreg && + instr->opcode != aco_opcode::v_interp_p2_f16_f32_inreg && + instr->opcode != aco_opcode::v_interp_p10_rtz_f16_f32_inreg && + instr->opcode != aco_opcode::v_interp_p2_rtz_f16_f32_inreg; } void @@ -658,7 +672,6 @@ bool alu_can_accept_constant(aco_opcode opcode, unsigned operand) { switch (opcode) { - case aco_opcode::v_interp_p2_f32: case aco_opcode::v_mac_f32: case aco_opcode::v_writelane_b32: case aco_opcode::v_writelane_b32_e64: @@ -677,7 +690,19 @@ alu_can_accept_constant(aco_opcode opcode, unsigned operand) case aco_opcode::p_bpermute_gfx10w64: case aco_opcode::p_bpermute_gfx11w64: case aco_opcode::p_interp_gfx11: - case aco_opcode::p_dual_src_export_gfx11: return false; + case aco_opcode::p_dual_src_export_gfx11: + case aco_opcode::v_interp_p1_f32: + case aco_opcode::v_interp_p2_f32: + case aco_opcode::v_interp_mov_f32: + case aco_opcode::v_interp_p1ll_f16: + case aco_opcode::v_interp_p1lv_f16: + case aco_opcode::v_interp_p2_legacy_f16: + case aco_opcode::v_interp_p10_f32_inreg: + case aco_opcode::v_interp_p2_f32_inreg: + case aco_opcode::v_interp_p10_f16_f32_inreg: + case aco_opcode::v_interp_p2_f16_f32_inreg: + case aco_opcode::v_interp_p10_rtz_f16_f32_inreg: + case aco_opcode::v_interp_p2_rtz_f16_f32_inreg: return false; default: return true; } } diff --git a/src/amd/compiler/tests/test_assembler.cpp b/src/amd/compiler/tests/test_assembler.cpp index e46ee13954e..533f69c6ba7 100644 --- a/src/amd/compiler/tests/test_assembler.cpp +++ b/src/amd/compiler/tests/test_assembler.cpp @@ -735,40 +735,40 @@ BEGIN_TEST(assembler.gfx11.vinterp) Operand op1(bld.tmp(v1)); op1.setFixed(PhysReg(256 + 20)); - Operand op2(bld.tmp(s1)); - op2.setFixed(PhysReg(30)); + Operand op2(bld.tmp(v1)); + op2.setFixed(PhysReg(256 + 30)); - //>> v_interp_p10_f32 v42, v10, v20, s30 wait_exp:7 ; cd00072a 007a290a + //>> v_interp_p10_f32 v42, v10, v20, v30 wait_exp:7 ; cd00072a 047a290a bld.vinterp_inreg(aco_opcode::v_interp_p10_f32_inreg, dst, op0, op1, op2); - //! v_interp_p10_f32 v42, v10, v20, s30 wait_exp:6 ; cd00062a 007a290a + //! v_interp_p10_f32 v42, v10, v20, v30 wait_exp:6 ; cd00062a 047a290a bld.vinterp_inreg(aco_opcode::v_interp_p10_f32_inreg, dst, op0, op1, op2, 6); - //! v_interp_p2_f32 v42, v10, v20, s30 ; cd01002a 007a290a + //! v_interp_p2_f32 v42, v10, v20, v30 ; cd01002a 047a290a bld.vinterp_inreg(aco_opcode::v_interp_p2_f32_inreg, dst, op0, op1, op2, 0); - //! v_interp_p10_f32 v42, -v10, v20, s30 ; cd00002a 207a290a + //! v_interp_p10_f32 v42, -v10, v20, v30 ; cd00002a 247a290a bld.vinterp_inreg(aco_opcode::v_interp_p10_f32_inreg, dst, op0, op1, op2, 0)->vinterp_inreg().neg[0] = true; - //! v_interp_p10_f32 v42, v10, -v20, s30 ; cd00002a 407a290a + //! v_interp_p10_f32 v42, v10, -v20, v30 ; cd00002a 447a290a bld.vinterp_inreg(aco_opcode::v_interp_p10_f32_inreg, dst, op0, op1, op2, 0)->vinterp_inreg().neg[1] = true; - //! v_interp_p10_f32 v42, v10, v20, -s30 ; cd00002a 807a290a + //! v_interp_p10_f32 v42, v10, v20, -v30 ; cd00002a 847a290a bld.vinterp_inreg(aco_opcode::v_interp_p10_f32_inreg, dst, op0, op1, op2, 0)->vinterp_inreg().neg[2] = true; - //! v_interp_p10_f16_f32 v42, v10, v20, s30 op_sel:[1,0,0,0] ; cd02082a 007a290a + //! v_interp_p10_f16_f32 v42, v10, v20, v30 op_sel:[1,0,0,0] ; cd02082a 047a290a bld.vinterp_inreg(aco_opcode::v_interp_p10_f16_f32_inreg, dst, op0, op1, op2, 0, 0x1); - //! v_interp_p2_f16_f32 v42, v10, v20, s30 op_sel:[0,1,0,0] ; cd03102a 007a290a + //! v_interp_p2_f16_f32 v42, v10, v20, v30 op_sel:[0,1,0,0] ; cd03102a 047a290a bld.vinterp_inreg(aco_opcode::v_interp_p2_f16_f32_inreg, dst, op0, op1, op2, 0, 0x2); - //! v_interp_p10_rtz_f16_f32 v42, v10, v20, s30 op_sel:[0,0,1,0] ; cd04202a 007a290a + //! v_interp_p10_rtz_f16_f32 v42, v10, v20, v30 op_sel:[0,0,1,0] ; cd04202a 047a290a bld.vinterp_inreg(aco_opcode::v_interp_p10_rtz_f16_f32_inreg, dst, op0, op1, op2, 0, 0x4); - //! v_interp_p2_rtz_f16_f32 v42, v10, v20, s30 op_sel:[0,0,0,1] ; cd05402a 007a290a + //! v_interp_p2_rtz_f16_f32 v42, v10, v20, v30 op_sel:[0,0,0,1] ; cd05402a 047a290a bld.vinterp_inreg(aco_opcode::v_interp_p2_rtz_f16_f32_inreg, dst, op0, op1, op2, 0, 0x8); - //! v_interp_p10_f32 v42, v10, v20, s30 clamp ; cd00802a 007a290a + //! v_interp_p10_f32 v42, v10, v20, v30 clamp ; cd00802a 047a290a bld.vinterp_inreg(aco_opcode::v_interp_p10_f32_inreg, dst, op0, op1, op2, 0)->vinterp_inreg().clamp = true; finish_assembler_test();