aco: Don't use opsel for p_insert.

This doesn't make sense, opsel preserves the not selected half of the register,
p_insert zeros it.

No Foz-DB changes.

Signed-off-by: Georg Lehmann <dadschoorse@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>

Fixes: 54292e99c7 ("aco: optimize 32-bit extracts and inserts using SDWA")
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19253>
This commit is contained in:
Georg Lehmann
2022-10-22 16:59:36 +02:00
committed by Marge Bot
parent 79a8a7662b
commit 616d3908dc
2 changed files with 12 additions and 22 deletions

View File

@@ -3201,21 +3201,13 @@ apply_insert(opt_ctx& ctx, aco_ptr<Instruction>& instr)
SubdwordSel sel = parse_insert(def_info.instr);
assert(sel);
if (instr->isVOP3() && sel.size() == 2 && !sel.sign_extend() &&
can_use_opsel(ctx.program->gfx_level, instr->opcode, -1)) {
if (instr->vop3().opsel & (1 << 3))
return false;
if (sel.offset())
instr->vop3().opsel |= 1 << 3;
} else {
if (!can_use_SDWA(ctx.program->gfx_level, instr, true))
return false;
if (!can_use_SDWA(ctx.program->gfx_level, instr, true))
return false;
to_SDWA(ctx, instr);
if (instr->sdwa().dst_sel.size() != 4)
return false;
static_cast<SDWA_instruction*>(instr.get())->dst_sel = sel;
}
to_SDWA(ctx, instr);
if (instr->sdwa().dst_sel.size() != 4)
return false;
static_cast<SDWA_instruction*>(instr.get())->dst_sel = sel;
instr->definitions[0].swapTemp(def_info.instr->definitions[0]);
ctx.info[instr->definitions[0].tempId()].label = 0;

View File

@@ -496,17 +496,15 @@ BEGIN_TEST(optimize.sdwa.insert)
bld.pseudo(ins, bld.def(v1), val, Operand::c32(1u), Operand::c32(16u));
writeout(10, val);
//~gfx8! v1: %tmp11 = v_sub_i16 %a, %b
//~gfx8! v1: %res11 = p_insert %tmp11, 0, 16
//~gfx(9|10)! v1: %res11 = v_sub_i16 %a, %b
//~gfx(8|9|10)! p_unit_test 11, %res11
//~gfx[^7]! v1: %tmp11 = v_sub_i16 %a, %b
//~gfx[^7]! v1: %res11 = p_insert %tmp11, 0, 16
//~gfx[^7]! p_unit_test 11, %res11
val = bld.vop3(aco_opcode::v_sub_i16, bld.def(v1), inputs[0], inputs[1]);
writeout(11, bld.pseudo(ins, bld.def(v1), val, Operand::zero(), Operand::c32(16u)));
//~gfx8! v1: %tmp12 = v_sub_i16 %a, %b
//~gfx8! v1: %res12 = p_insert %tmp12, 1, 16
//~gfx(9|10)! v1: %res12 = v_sub_i16 %a, %b opsel_hi
//~gfx(8|9|10)! p_unit_test 12, %res12
//~gfx[^7]! v1: %tmp12 = v_sub_i16 %a, %b
//~gfx[^7]! v1: %res12 = p_insert %tmp12, 1, 16
//~gfx[^7]! p_unit_test 12, %res12
val = bld.vop3(aco_opcode::v_sub_i16, bld.def(v1), inputs[0], inputs[1]);
writeout(12, bld.pseudo(ins, bld.def(v1), val, Operand::c32(1u), Operand::c32(16u)));