aco: Don't use opsel for p_insert.

This doesn't make sense, opsel preserves the not selected half of the register,
p_insert zeros it.

No Foz-DB changes.

Signed-off-by: Georg Lehmann <dadschoorse@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>

Fixes: 54292e99c7 ("aco: optimize 32-bit extracts and inserts using SDWA")
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19253>
(cherry picked from commit 616d3908dc)
This commit is contained in:
Georg Lehmann
2022-10-22 16:59:36 +02:00
committed by Dylan Baker
parent 2572a61af1
commit 80f296268e
3 changed files with 13 additions and 23 deletions

View File

@@ -1687,7 +1687,7 @@
"description": "aco: Don't use opsel for p_insert.",
"nominated": true,
"nomination_type": 1,
"resolution": 0,
"resolution": 1,
"main_sha": null,
"because_sha": "54292e99c7844500314bfd623469c65adef954c5"
},

View File

@@ -3245,21 +3245,13 @@ apply_insert(opt_ctx& ctx, aco_ptr<Instruction>& instr)
SubdwordSel sel = parse_insert(def_info.instr);
assert(sel);
if (instr->isVOP3() && sel.size() == 2 && !sel.sign_extend() &&
can_use_opsel(ctx.program->gfx_level, instr->opcode, -1)) {
if (instr->vop3().opsel & (1 << 3))
return false;
if (sel.offset())
instr->vop3().opsel |= 1 << 3;
} else {
if (!can_use_SDWA(ctx.program->gfx_level, instr, true))
return false;
if (!can_use_SDWA(ctx.program->gfx_level, instr, true))
return false;
to_SDWA(ctx, instr);
if (instr->sdwa().dst_sel.size() != 4)
return false;
static_cast<SDWA_instruction*>(instr.get())->dst_sel = sel;
}
to_SDWA(ctx, instr);
if (instr->sdwa().dst_sel.size() != 4)
return false;
static_cast<SDWA_instruction*>(instr.get())->dst_sel = sel;
instr->definitions[0].swapTemp(def_info.instr->definitions[0]);
ctx.info[instr->definitions[0].tempId()].label = 0;

View File

@@ -496,17 +496,15 @@ BEGIN_TEST(optimize.sdwa.insert)
bld.pseudo(ins, bld.def(v1), val, Operand::c32(1u), Operand::c32(16u));
writeout(10, val);
//~gfx8! v1: %tmp11 = v_sub_i16 %a, %b
//~gfx8! v1: %res11 = p_insert %tmp11, 0, 16
//~gfx(9|10)! v1: %res11 = v_sub_i16 %a, %b
//~gfx(8|9|10)! p_unit_test 11, %res11
//~gfx[^7]! v1: %tmp11 = v_sub_i16 %a, %b
//~gfx[^7]! v1: %res11 = p_insert %tmp11, 0, 16
//~gfx[^7]! p_unit_test 11, %res11
val = bld.vop3(aco_opcode::v_sub_i16, bld.def(v1), inputs[0], inputs[1]);
writeout(11, bld.pseudo(ins, bld.def(v1), val, Operand::zero(), Operand::c32(16u)));
//~gfx8! v1: %tmp12 = v_sub_i16 %a, %b
//~gfx8! v1: %res12 = p_insert %tmp12, 1, 16
//~gfx(9|10)! v1: %res12 = v_sub_i16 %a, %b opsel_hi
//~gfx(8|9|10)! p_unit_test 12, %res12
//~gfx[^7]! v1: %tmp12 = v_sub_i16 %a, %b
//~gfx[^7]! v1: %res12 = p_insert %tmp12, 1, 16
//~gfx[^7]! p_unit_test 12, %res12
val = bld.vop3(aco_opcode::v_sub_i16, bld.def(v1), inputs[0], inputs[1]);
writeout(12, bld.pseudo(ins, bld.def(v1), val, Operand::c32(1u), Operand::c32(16u)));