diff --git a/.pick_status.json b/.pick_status.json index d1785690af7..5e34f402f6c 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -1687,7 +1687,7 @@ "description": "aco: Don't use opsel for p_insert.", "nominated": true, "nomination_type": 1, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": "54292e99c7844500314bfd623469c65adef954c5" }, diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp index a3f8555e15e..94fbfabded4 100644 --- a/src/amd/compiler/aco_optimizer.cpp +++ b/src/amd/compiler/aco_optimizer.cpp @@ -3245,21 +3245,13 @@ apply_insert(opt_ctx& ctx, aco_ptr& instr) SubdwordSel sel = parse_insert(def_info.instr); assert(sel); - if (instr->isVOP3() && sel.size() == 2 && !sel.sign_extend() && - can_use_opsel(ctx.program->gfx_level, instr->opcode, -1)) { - if (instr->vop3().opsel & (1 << 3)) - return false; - if (sel.offset()) - instr->vop3().opsel |= 1 << 3; - } else { - if (!can_use_SDWA(ctx.program->gfx_level, instr, true)) - return false; + if (!can_use_SDWA(ctx.program->gfx_level, instr, true)) + return false; - to_SDWA(ctx, instr); - if (instr->sdwa().dst_sel.size() != 4) - return false; - static_cast(instr.get())->dst_sel = sel; - } + to_SDWA(ctx, instr); + if (instr->sdwa().dst_sel.size() != 4) + return false; + static_cast(instr.get())->dst_sel = sel; instr->definitions[0].swapTemp(def_info.instr->definitions[0]); ctx.info[instr->definitions[0].tempId()].label = 0; diff --git a/src/amd/compiler/tests/test_sdwa.cpp b/src/amd/compiler/tests/test_sdwa.cpp index 8d784ef6883..9df87eb568c 100644 --- a/src/amd/compiler/tests/test_sdwa.cpp +++ b/src/amd/compiler/tests/test_sdwa.cpp @@ -496,17 +496,15 @@ BEGIN_TEST(optimize.sdwa.insert) bld.pseudo(ins, bld.def(v1), val, Operand::c32(1u), Operand::c32(16u)); writeout(10, val); - //~gfx8! v1: %tmp11 = v_sub_i16 %a, %b - //~gfx8! v1: %res11 = p_insert %tmp11, 0, 16 - //~gfx(9|10)! v1: %res11 = v_sub_i16 %a, %b - //~gfx(8|9|10)! p_unit_test 11, %res11 + //~gfx[^7]! v1: %tmp11 = v_sub_i16 %a, %b + //~gfx[^7]! v1: %res11 = p_insert %tmp11, 0, 16 + //~gfx[^7]! p_unit_test 11, %res11 val = bld.vop3(aco_opcode::v_sub_i16, bld.def(v1), inputs[0], inputs[1]); writeout(11, bld.pseudo(ins, bld.def(v1), val, Operand::zero(), Operand::c32(16u))); - //~gfx8! v1: %tmp12 = v_sub_i16 %a, %b - //~gfx8! v1: %res12 = p_insert %tmp12, 1, 16 - //~gfx(9|10)! v1: %res12 = v_sub_i16 %a, %b opsel_hi - //~gfx(8|9|10)! p_unit_test 12, %res12 + //~gfx[^7]! v1: %tmp12 = v_sub_i16 %a, %b + //~gfx[^7]! v1: %res12 = p_insert %tmp12, 1, 16 + //~gfx[^7]! p_unit_test 12, %res12 val = bld.vop3(aco_opcode::v_sub_i16, bld.def(v1), inputs[0], inputs[1]); writeout(12, bld.pseudo(ins, bld.def(v1), val, Operand::c32(1u), Operand::c32(16u)));