aco: disallow SGPRS/constants with interpolation instructions

https://reviews.llvm.org/D137575

The VINTRP format cannot encode anything except VGPRs.

Reading VINTERPInstructions.td, looks like it's the same for GFX11.

No fossil-db changes.

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Georg Lehmann <dadschoorse@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20251>
This commit is contained in:
Rhys Perry
2022-11-11 19:38:38 +00:00
committed by Marge Bot
parent 5af891a747
commit 254b178d5b
2 changed files with 41 additions and 16 deletions

View File

@@ -597,6 +597,7 @@ pseudo_propagate_temp(opt_ctx& ctx, aco_ptr<Instruction>& instr, Temp temp, unsi
bool
can_apply_sgprs(opt_ctx& ctx, aco_ptr<Instruction>& instr)
{
assert(instr->isVALU());
if (instr->isSDWA() && ctx.program->gfx_level < GFX9)
return false;
return instr->opcode != aco_opcode::v_readfirstlane_b32 &&
@@ -605,7 +606,20 @@ can_apply_sgprs(opt_ctx& ctx, aco_ptr<Instruction>& instr)
instr->opcode != aco_opcode::v_writelane_b32 &&
instr->opcode != aco_opcode::v_writelane_b32_e64 &&
instr->opcode != aco_opcode::v_permlane16_b32 &&
instr->opcode != aco_opcode::v_permlanex16_b32;
instr->opcode != aco_opcode::v_permlanex16_b32 &&
instr->opcode != aco_opcode::v_interp_p1_f32 &&
instr->opcode != aco_opcode::v_interp_p2_f32 &&
instr->opcode != aco_opcode::v_interp_mov_f32 &&
instr->opcode != aco_opcode::v_interp_p1ll_f16 &&
instr->opcode != aco_opcode::v_interp_p1lv_f16 &&
instr->opcode != aco_opcode::v_interp_p2_legacy_f16 &&
instr->opcode != aco_opcode::v_interp_p2_f16 &&
instr->opcode != aco_opcode::v_interp_p10_f32_inreg &&
instr->opcode != aco_opcode::v_interp_p2_f32_inreg &&
instr->opcode != aco_opcode::v_interp_p10_f16_f32_inreg &&
instr->opcode != aco_opcode::v_interp_p2_f16_f32_inreg &&
instr->opcode != aco_opcode::v_interp_p10_rtz_f16_f32_inreg &&
instr->opcode != aco_opcode::v_interp_p2_rtz_f16_f32_inreg;
}
void
@@ -658,7 +672,6 @@ bool
alu_can_accept_constant(aco_opcode opcode, unsigned operand)
{
switch (opcode) {
case aco_opcode::v_interp_p2_f32:
case aco_opcode::v_mac_f32:
case aco_opcode::v_writelane_b32:
case aco_opcode::v_writelane_b32_e64:
@@ -677,7 +690,19 @@ alu_can_accept_constant(aco_opcode opcode, unsigned operand)
case aco_opcode::p_bpermute_gfx10w64:
case aco_opcode::p_bpermute_gfx11w64:
case aco_opcode::p_interp_gfx11:
case aco_opcode::p_dual_src_export_gfx11: return false;
case aco_opcode::p_dual_src_export_gfx11:
case aco_opcode::v_interp_p1_f32:
case aco_opcode::v_interp_p2_f32:
case aco_opcode::v_interp_mov_f32:
case aco_opcode::v_interp_p1ll_f16:
case aco_opcode::v_interp_p1lv_f16:
case aco_opcode::v_interp_p2_legacy_f16:
case aco_opcode::v_interp_p10_f32_inreg:
case aco_opcode::v_interp_p2_f32_inreg:
case aco_opcode::v_interp_p10_f16_f32_inreg:
case aco_opcode::v_interp_p2_f16_f32_inreg:
case aco_opcode::v_interp_p10_rtz_f16_f32_inreg:
case aco_opcode::v_interp_p2_rtz_f16_f32_inreg: return false;
default: return true;
}
}

View File

@@ -735,40 +735,40 @@ BEGIN_TEST(assembler.gfx11.vinterp)
Operand op1(bld.tmp(v1));
op1.setFixed(PhysReg(256 + 20));
Operand op2(bld.tmp(s1));
op2.setFixed(PhysReg(30));
Operand op2(bld.tmp(v1));
op2.setFixed(PhysReg(256 + 30));
//>> v_interp_p10_f32 v42, v10, v20, s30 wait_exp:7 ; cd00072a 007a290a
//>> v_interp_p10_f32 v42, v10, v20, v30 wait_exp:7 ; cd00072a 047a290a
bld.vinterp_inreg(aco_opcode::v_interp_p10_f32_inreg, dst, op0, op1, op2);
//! v_interp_p10_f32 v42, v10, v20, s30 wait_exp:6 ; cd00062a 007a290a
//! v_interp_p10_f32 v42, v10, v20, v30 wait_exp:6 ; cd00062a 047a290a
bld.vinterp_inreg(aco_opcode::v_interp_p10_f32_inreg, dst, op0, op1, op2, 6);
//! v_interp_p2_f32 v42, v10, v20, s30 ; cd01002a 007a290a
//! v_interp_p2_f32 v42, v10, v20, v30 ; cd01002a 047a290a
bld.vinterp_inreg(aco_opcode::v_interp_p2_f32_inreg, dst, op0, op1, op2, 0);
//! v_interp_p10_f32 v42, -v10, v20, s30 ; cd00002a 207a290a
//! v_interp_p10_f32 v42, -v10, v20, v30 ; cd00002a 247a290a
bld.vinterp_inreg(aco_opcode::v_interp_p10_f32_inreg, dst, op0, op1, op2, 0)->vinterp_inreg().neg[0] = true;
//! v_interp_p10_f32 v42, v10, -v20, s30 ; cd00002a 407a290a
//! v_interp_p10_f32 v42, v10, -v20, v30 ; cd00002a 447a290a
bld.vinterp_inreg(aco_opcode::v_interp_p10_f32_inreg, dst, op0, op1, op2, 0)->vinterp_inreg().neg[1] = true;
//! v_interp_p10_f32 v42, v10, v20, -s30 ; cd00002a 807a290a
//! v_interp_p10_f32 v42, v10, v20, -v30 ; cd00002a 847a290a
bld.vinterp_inreg(aco_opcode::v_interp_p10_f32_inreg, dst, op0, op1, op2, 0)->vinterp_inreg().neg[2] = true;
//! v_interp_p10_f16_f32 v42, v10, v20, s30 op_sel:[1,0,0,0] ; cd02082a 007a290a
//! v_interp_p10_f16_f32 v42, v10, v20, v30 op_sel:[1,0,0,0] ; cd02082a 047a290a
bld.vinterp_inreg(aco_opcode::v_interp_p10_f16_f32_inreg, dst, op0, op1, op2, 0, 0x1);
//! v_interp_p2_f16_f32 v42, v10, v20, s30 op_sel:[0,1,0,0] ; cd03102a 007a290a
//! v_interp_p2_f16_f32 v42, v10, v20, v30 op_sel:[0,1,0,0] ; cd03102a 047a290a
bld.vinterp_inreg(aco_opcode::v_interp_p2_f16_f32_inreg, dst, op0, op1, op2, 0, 0x2);
//! v_interp_p10_rtz_f16_f32 v42, v10, v20, s30 op_sel:[0,0,1,0] ; cd04202a 007a290a
//! v_interp_p10_rtz_f16_f32 v42, v10, v20, v30 op_sel:[0,0,1,0] ; cd04202a 047a290a
bld.vinterp_inreg(aco_opcode::v_interp_p10_rtz_f16_f32_inreg, dst, op0, op1, op2, 0, 0x4);
//! v_interp_p2_rtz_f16_f32 v42, v10, v20, s30 op_sel:[0,0,0,1] ; cd05402a 007a290a
//! v_interp_p2_rtz_f16_f32 v42, v10, v20, v30 op_sel:[0,0,0,1] ; cd05402a 047a290a
bld.vinterp_inreg(aco_opcode::v_interp_p2_rtz_f16_f32_inreg, dst, op0, op1, op2, 0, 0x8);
//! v_interp_p10_f32 v42, v10, v20, s30 clamp ; cd00802a 007a290a
//! v_interp_p10_f32 v42, v10, v20, v30 clamp ; cd00802a 047a290a
bld.vinterp_inreg(aco_opcode::v_interp_p10_f32_inreg, dst, op0, op1, op2, 0)->vinterp_inreg().clamp = true;
finish_assembler_test();