diff --git a/.pick_status.json b/.pick_status.json index 95c04c06286..6b1136d4942 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -284,7 +284,7 @@ "description": "aco: fix >8 byte linear vgpr copies", "nominated": true, "nomination_type": 0, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": null, "notes": null diff --git a/src/amd/compiler/aco_lower_to_hw_instr.cpp b/src/amd/compiler/aco_lower_to_hw_instr.cpp index edadca72bb9..657aa1a2f77 100644 --- a/src/amd/compiler/aco_lower_to_hw_instr.cpp +++ b/src/amd/compiler/aco_lower_to_hw_instr.cpp @@ -1817,17 +1817,15 @@ handle_operands(std::map& copy_map, lower_context* ctx, if (it->second.bytes > 8) { assert(!it->second.op.isConstant()); assert(!it->second.def.regClass().is_subdword()); - RegClass rc = RegClass(it->second.def.regClass().type(), it->second.def.size() - 2); + RegClass rc = it->second.def.regClass().resize(it->second.def.bytes() - 8); Definition hi_def = Definition(PhysReg{it->first + 2}, rc); - rc = RegClass(it->second.op.regClass().type(), it->second.op.size() - 2); + rc = it->second.op.regClass().resize(it->second.op.bytes() - 8); Operand hi_op = Operand(PhysReg{it->second.op.physReg() + 2}, rc); copy_operation copy = {hi_op, hi_def, it->second.bytes - 8}; copy_map[hi_def.physReg()] = copy; assert(it->second.op.physReg().byte() == 0 && it->second.def.physReg().byte() == 0); - it->second.op = Operand(it->second.op.physReg(), - it->second.op.regClass().type() == RegType::sgpr ? s2 : v2); - it->second.def = Definition(it->second.def.physReg(), - it->second.def.regClass().type() == RegType::sgpr ? s2 : v2); + it->second.op = Operand(it->second.op.physReg(), it->second.op.regClass().resize(8)); + it->second.def = Definition(it->second.def.physReg(), it->second.def.regClass().resize(8)); it->second.bytes = 8; } diff --git a/src/amd/compiler/tests/test_to_hw_instr.cpp b/src/amd/compiler/tests/test_to_hw_instr.cpp index c067c83fe63..73084fa6652 100644 --- a/src/amd/compiler/tests/test_to_hw_instr.cpp +++ b/src/amd/compiler/tests/test_to_hw_instr.cpp @@ -809,6 +809,32 @@ BEGIN_TEST(to_hw_instr.swap_linear_vgpr) finish_to_hw_instr_test(); END_TEST +BEGIN_TEST(to_hw_instr.copy_linear_vgpr_v3) + if (!setup_cs(NULL, GFX10)) + return; + + PhysReg reg_v0{256}; + PhysReg reg_v4{256 + 4}; + RegClass v3_linear = v3.as_linear(); + + //>> p_unit_test 0 + bld.pseudo(aco_opcode::p_unit_test, Operand::zero()); + + //! lv2: %0:v[0-1] = v_lshrrev_b64 0, %0:v[4-5] + //! s2: %0:exec, s1: %0:scc = s_not_b64 %0:exec + //! lv2: %0:v[0-1] = v_lshrrev_b64 0, %0:v[4-5] + //! s2: %0:exec, s1: %0:scc = s_not_b64 %0:exec + //! lv1: %0:v[2] = v_mov_b32 %0:v[6] + //! s2: %0:exec, s1: %0:scc = s_not_b64 %0:exec + //! lv1: %0:v[2] = v_mov_b32 %0:v[6] + //! s2: %0:exec, s1: %0:scc = s_not_b64 %0:exec + Instruction* instr = bld.pseudo(aco_opcode::p_parallelcopy, Definition(reg_v0, v3_linear), + Operand(reg_v4, v3_linear)); + instr->pseudo().scratch_sgpr = m0; + + finish_to_hw_instr_test(); +END_TEST + BEGIN_TEST(to_hw_instr.pack2x16_constant) PhysReg v0_lo{256}; PhysReg v0_hi{256};