aco: Don't use opsel for p_insert.
This doesn't make sense, opsel preserves the not selected half of the register,
p_insert zeros it.
No Foz-DB changes.
Signed-off-by: Georg Lehmann <dadschoorse@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Fixes: 54292e99c7
("aco: optimize 32-bit extracts and inserts using SDWA")
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19253>
This commit is contained in:
@@ -3201,21 +3201,13 @@ apply_insert(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
||||
SubdwordSel sel = parse_insert(def_info.instr);
|
||||
assert(sel);
|
||||
|
||||
if (instr->isVOP3() && sel.size() == 2 && !sel.sign_extend() &&
|
||||
can_use_opsel(ctx.program->gfx_level, instr->opcode, -1)) {
|
||||
if (instr->vop3().opsel & (1 << 3))
|
||||
return false;
|
||||
if (sel.offset())
|
||||
instr->vop3().opsel |= 1 << 3;
|
||||
} else {
|
||||
if (!can_use_SDWA(ctx.program->gfx_level, instr, true))
|
||||
return false;
|
||||
if (!can_use_SDWA(ctx.program->gfx_level, instr, true))
|
||||
return false;
|
||||
|
||||
to_SDWA(ctx, instr);
|
||||
if (instr->sdwa().dst_sel.size() != 4)
|
||||
return false;
|
||||
static_cast<SDWA_instruction*>(instr.get())->dst_sel = sel;
|
||||
}
|
||||
to_SDWA(ctx, instr);
|
||||
if (instr->sdwa().dst_sel.size() != 4)
|
||||
return false;
|
||||
static_cast<SDWA_instruction*>(instr.get())->dst_sel = sel;
|
||||
|
||||
instr->definitions[0].swapTemp(def_info.instr->definitions[0]);
|
||||
ctx.info[instr->definitions[0].tempId()].label = 0;
|
||||
|
@@ -496,17 +496,15 @@ BEGIN_TEST(optimize.sdwa.insert)
|
||||
bld.pseudo(ins, bld.def(v1), val, Operand::c32(1u), Operand::c32(16u));
|
||||
writeout(10, val);
|
||||
|
||||
//~gfx8! v1: %tmp11 = v_sub_i16 %a, %b
|
||||
//~gfx8! v1: %res11 = p_insert %tmp11, 0, 16
|
||||
//~gfx(9|10)! v1: %res11 = v_sub_i16 %a, %b
|
||||
//~gfx(8|9|10)! p_unit_test 11, %res11
|
||||
//~gfx[^7]! v1: %tmp11 = v_sub_i16 %a, %b
|
||||
//~gfx[^7]! v1: %res11 = p_insert %tmp11, 0, 16
|
||||
//~gfx[^7]! p_unit_test 11, %res11
|
||||
val = bld.vop3(aco_opcode::v_sub_i16, bld.def(v1), inputs[0], inputs[1]);
|
||||
writeout(11, bld.pseudo(ins, bld.def(v1), val, Operand::zero(), Operand::c32(16u)));
|
||||
|
||||
//~gfx8! v1: %tmp12 = v_sub_i16 %a, %b
|
||||
//~gfx8! v1: %res12 = p_insert %tmp12, 1, 16
|
||||
//~gfx(9|10)! v1: %res12 = v_sub_i16 %a, %b opsel_hi
|
||||
//~gfx(8|9|10)! p_unit_test 12, %res12
|
||||
//~gfx[^7]! v1: %tmp12 = v_sub_i16 %a, %b
|
||||
//~gfx[^7]! v1: %res12 = p_insert %tmp12, 1, 16
|
||||
//~gfx[^7]! p_unit_test 12, %res12
|
||||
val = bld.vop3(aco_opcode::v_sub_i16, bld.def(v1), inputs[0], inputs[1]);
|
||||
writeout(12, bld.pseudo(ins, bld.def(v1), val, Operand::c32(1u), Operand::c32(16u)));
|
||||
|
||||
|
Reference in New Issue
Block a user