aco: disallow SGPRS/constants with interpolation instructions
https://reviews.llvm.org/D137575 The VINTRP format cannot encode anything except VGPRs. Reading VINTERPInstructions.td, looks like it's the same for GFX11. No fossil-db changes. Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Georg Lehmann <dadschoorse@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20251>
This commit is contained in:
@@ -597,6 +597,7 @@ pseudo_propagate_temp(opt_ctx& ctx, aco_ptr<Instruction>& instr, Temp temp, unsi
|
||||
bool
|
||||
can_apply_sgprs(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
||||
{
|
||||
assert(instr->isVALU());
|
||||
if (instr->isSDWA() && ctx.program->gfx_level < GFX9)
|
||||
return false;
|
||||
return instr->opcode != aco_opcode::v_readfirstlane_b32 &&
|
||||
@@ -605,7 +606,20 @@ can_apply_sgprs(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
||||
instr->opcode != aco_opcode::v_writelane_b32 &&
|
||||
instr->opcode != aco_opcode::v_writelane_b32_e64 &&
|
||||
instr->opcode != aco_opcode::v_permlane16_b32 &&
|
||||
instr->opcode != aco_opcode::v_permlanex16_b32;
|
||||
instr->opcode != aco_opcode::v_permlanex16_b32 &&
|
||||
instr->opcode != aco_opcode::v_interp_p1_f32 &&
|
||||
instr->opcode != aco_opcode::v_interp_p2_f32 &&
|
||||
instr->opcode != aco_opcode::v_interp_mov_f32 &&
|
||||
instr->opcode != aco_opcode::v_interp_p1ll_f16 &&
|
||||
instr->opcode != aco_opcode::v_interp_p1lv_f16 &&
|
||||
instr->opcode != aco_opcode::v_interp_p2_legacy_f16 &&
|
||||
instr->opcode != aco_opcode::v_interp_p2_f16 &&
|
||||
instr->opcode != aco_opcode::v_interp_p10_f32_inreg &&
|
||||
instr->opcode != aco_opcode::v_interp_p2_f32_inreg &&
|
||||
instr->opcode != aco_opcode::v_interp_p10_f16_f32_inreg &&
|
||||
instr->opcode != aco_opcode::v_interp_p2_f16_f32_inreg &&
|
||||
instr->opcode != aco_opcode::v_interp_p10_rtz_f16_f32_inreg &&
|
||||
instr->opcode != aco_opcode::v_interp_p2_rtz_f16_f32_inreg;
|
||||
}
|
||||
|
||||
void
|
||||
@@ -658,7 +672,6 @@ bool
|
||||
alu_can_accept_constant(aco_opcode opcode, unsigned operand)
|
||||
{
|
||||
switch (opcode) {
|
||||
case aco_opcode::v_interp_p2_f32:
|
||||
case aco_opcode::v_mac_f32:
|
||||
case aco_opcode::v_writelane_b32:
|
||||
case aco_opcode::v_writelane_b32_e64:
|
||||
@@ -677,7 +690,19 @@ alu_can_accept_constant(aco_opcode opcode, unsigned operand)
|
||||
case aco_opcode::p_bpermute_gfx10w64:
|
||||
case aco_opcode::p_bpermute_gfx11w64:
|
||||
case aco_opcode::p_interp_gfx11:
|
||||
case aco_opcode::p_dual_src_export_gfx11: return false;
|
||||
case aco_opcode::p_dual_src_export_gfx11:
|
||||
case aco_opcode::v_interp_p1_f32:
|
||||
case aco_opcode::v_interp_p2_f32:
|
||||
case aco_opcode::v_interp_mov_f32:
|
||||
case aco_opcode::v_interp_p1ll_f16:
|
||||
case aco_opcode::v_interp_p1lv_f16:
|
||||
case aco_opcode::v_interp_p2_legacy_f16:
|
||||
case aco_opcode::v_interp_p10_f32_inreg:
|
||||
case aco_opcode::v_interp_p2_f32_inreg:
|
||||
case aco_opcode::v_interp_p10_f16_f32_inreg:
|
||||
case aco_opcode::v_interp_p2_f16_f32_inreg:
|
||||
case aco_opcode::v_interp_p10_rtz_f16_f32_inreg:
|
||||
case aco_opcode::v_interp_p2_rtz_f16_f32_inreg: return false;
|
||||
default: return true;
|
||||
}
|
||||
}
|
||||
|
@@ -735,40 +735,40 @@ BEGIN_TEST(assembler.gfx11.vinterp)
|
||||
Operand op1(bld.tmp(v1));
|
||||
op1.setFixed(PhysReg(256 + 20));
|
||||
|
||||
Operand op2(bld.tmp(s1));
|
||||
op2.setFixed(PhysReg(30));
|
||||
Operand op2(bld.tmp(v1));
|
||||
op2.setFixed(PhysReg(256 + 30));
|
||||
|
||||
//>> v_interp_p10_f32 v42, v10, v20, s30 wait_exp:7 ; cd00072a 007a290a
|
||||
//>> v_interp_p10_f32 v42, v10, v20, v30 wait_exp:7 ; cd00072a 047a290a
|
||||
bld.vinterp_inreg(aco_opcode::v_interp_p10_f32_inreg, dst, op0, op1, op2);
|
||||
|
||||
//! v_interp_p10_f32 v42, v10, v20, s30 wait_exp:6 ; cd00062a 007a290a
|
||||
//! v_interp_p10_f32 v42, v10, v20, v30 wait_exp:6 ; cd00062a 047a290a
|
||||
bld.vinterp_inreg(aco_opcode::v_interp_p10_f32_inreg, dst, op0, op1, op2, 6);
|
||||
|
||||
//! v_interp_p2_f32 v42, v10, v20, s30 ; cd01002a 007a290a
|
||||
//! v_interp_p2_f32 v42, v10, v20, v30 ; cd01002a 047a290a
|
||||
bld.vinterp_inreg(aco_opcode::v_interp_p2_f32_inreg, dst, op0, op1, op2, 0);
|
||||
|
||||
//! v_interp_p10_f32 v42, -v10, v20, s30 ; cd00002a 207a290a
|
||||
//! v_interp_p10_f32 v42, -v10, v20, v30 ; cd00002a 247a290a
|
||||
bld.vinterp_inreg(aco_opcode::v_interp_p10_f32_inreg, dst, op0, op1, op2, 0)->vinterp_inreg().neg[0] = true;
|
||||
|
||||
//! v_interp_p10_f32 v42, v10, -v20, s30 ; cd00002a 407a290a
|
||||
//! v_interp_p10_f32 v42, v10, -v20, v30 ; cd00002a 447a290a
|
||||
bld.vinterp_inreg(aco_opcode::v_interp_p10_f32_inreg, dst, op0, op1, op2, 0)->vinterp_inreg().neg[1] = true;
|
||||
|
||||
//! v_interp_p10_f32 v42, v10, v20, -s30 ; cd00002a 807a290a
|
||||
//! v_interp_p10_f32 v42, v10, v20, -v30 ; cd00002a 847a290a
|
||||
bld.vinterp_inreg(aco_opcode::v_interp_p10_f32_inreg, dst, op0, op1, op2, 0)->vinterp_inreg().neg[2] = true;
|
||||
|
||||
//! v_interp_p10_f16_f32 v42, v10, v20, s30 op_sel:[1,0,0,0] ; cd02082a 007a290a
|
||||
//! v_interp_p10_f16_f32 v42, v10, v20, v30 op_sel:[1,0,0,0] ; cd02082a 047a290a
|
||||
bld.vinterp_inreg(aco_opcode::v_interp_p10_f16_f32_inreg, dst, op0, op1, op2, 0, 0x1);
|
||||
|
||||
//! v_interp_p2_f16_f32 v42, v10, v20, s30 op_sel:[0,1,0,0] ; cd03102a 007a290a
|
||||
//! v_interp_p2_f16_f32 v42, v10, v20, v30 op_sel:[0,1,0,0] ; cd03102a 047a290a
|
||||
bld.vinterp_inreg(aco_opcode::v_interp_p2_f16_f32_inreg, dst, op0, op1, op2, 0, 0x2);
|
||||
|
||||
//! v_interp_p10_rtz_f16_f32 v42, v10, v20, s30 op_sel:[0,0,1,0] ; cd04202a 007a290a
|
||||
//! v_interp_p10_rtz_f16_f32 v42, v10, v20, v30 op_sel:[0,0,1,0] ; cd04202a 047a290a
|
||||
bld.vinterp_inreg(aco_opcode::v_interp_p10_rtz_f16_f32_inreg, dst, op0, op1, op2, 0, 0x4);
|
||||
|
||||
//! v_interp_p2_rtz_f16_f32 v42, v10, v20, s30 op_sel:[0,0,0,1] ; cd05402a 007a290a
|
||||
//! v_interp_p2_rtz_f16_f32 v42, v10, v20, v30 op_sel:[0,0,0,1] ; cd05402a 047a290a
|
||||
bld.vinterp_inreg(aco_opcode::v_interp_p2_rtz_f16_f32_inreg, dst, op0, op1, op2, 0, 0x8);
|
||||
|
||||
//! v_interp_p10_f32 v42, v10, v20, s30 clamp ; cd00802a 007a290a
|
||||
//! v_interp_p10_f32 v42, v10, v20, v30 clamp ; cd00802a 047a290a
|
||||
bld.vinterp_inreg(aco_opcode::v_interp_p10_f32_inreg, dst, op0, op1, op2, 0)->vinterp_inreg().clamp = true;
|
||||
|
||||
finish_assembler_test();
|
||||
|
Reference in New Issue
Block a user