aco: remove Format::{VOP3A,VOP3B}
These are really the same as Format::VOP3. Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8595>
This commit is contained in:
@@ -563,8 +563,8 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
|
||||
unreachable("Pseudo instructions should be lowered before assembly.");
|
||||
break;
|
||||
default:
|
||||
if ((uint16_t) instr->format & (uint16_t) Format::VOP3A) {
|
||||
VOP3A_instruction* vop3 = static_cast<VOP3A_instruction*>(instr);
|
||||
if ((uint16_t) instr->format & (uint16_t) Format::VOP3) {
|
||||
VOP3_instruction* vop3 = static_cast<VOP3_instruction*>(instr);
|
||||
|
||||
if ((uint16_t) instr->format & (uint16_t) Format::VOP2) {
|
||||
opcode = opcode + 0x100;
|
||||
|
@@ -484,7 +484,7 @@ public:
|
||||
int num_defs = carry_out ? 2 : 1;
|
||||
aco_ptr<Instruction> sub;
|
||||
if (vop3)
|
||||
sub.reset(create_instruction<VOP3A_instruction>(op, Format::VOP3B, num_ops, num_defs));
|
||||
sub.reset(create_instruction<VOP3_instruction>(op, Format::VOP3, num_ops, num_defs));
|
||||
else
|
||||
sub.reset(create_instruction<VOP2_instruction>(op, Format::VOP2, num_ops, num_defs));
|
||||
sub->operands[0] = a.op;
|
||||
@@ -534,15 +534,15 @@ formats = [("pseudo", [Format.PSEUDO], 'Pseudo_instruction', list(itertools.prod
|
||||
("vop2", [Format.VOP2], 'VOP2_instruction', itertools.product([1, 2], [2, 3])),
|
||||
("vop2_sdwa", [Format.VOP2, Format.SDWA], 'SDWA_instruction', itertools.product([1, 2], [2, 3])),
|
||||
("vopc", [Format.VOPC], 'VOPC_instruction', itertools.product([1, 2], [2])),
|
||||
("vop3", [Format.VOP3A], 'VOP3A_instruction', [(1, 3), (1, 2), (1, 1), (2, 2)]),
|
||||
("vop3", [Format.VOP3], 'VOP3_instruction', [(1, 3), (1, 2), (1, 1), (2, 2)]),
|
||||
("vop3p", [Format.VOP3P], 'VOP3P_instruction', [(1, 2), (1, 3)]),
|
||||
("vintrp", [Format.VINTRP], 'Interp_instruction', [(1, 2), (1, 3)]),
|
||||
("vop1_dpp", [Format.VOP1, Format.DPP], 'DPP_instruction', [(1, 1)]),
|
||||
("vop2_dpp", [Format.VOP2, Format.DPP], 'DPP_instruction', itertools.product([1, 2], [2, 3])),
|
||||
("vopc_dpp", [Format.VOPC, Format.DPP], 'DPP_instruction', itertools.product([1, 2], [2])),
|
||||
("vop1_e64", [Format.VOP1, Format.VOP3A], 'VOP3A_instruction', itertools.product([1], [1])),
|
||||
("vop2_e64", [Format.VOP2, Format.VOP3A], 'VOP3A_instruction', itertools.product([1, 2], [2, 3])),
|
||||
("vopc_e64", [Format.VOPC, Format.VOP3A], 'VOP3A_instruction', itertools.product([1, 2], [2])),
|
||||
("vop1_e64", [Format.VOP1, Format.VOP3], 'VOP3_instruction', itertools.product([1], [1])),
|
||||
("vop2_e64", [Format.VOP2, Format.VOP3], 'VOP3_instruction', itertools.product([1, 2], [2, 3])),
|
||||
("vopc_e64", [Format.VOPC, Format.VOP3], 'VOP3_instruction', itertools.product([1, 2], [2])),
|
||||
("flat", [Format.FLAT], 'FLAT_instruction', [(0, 3), (1, 2)]),
|
||||
("global", [Format.GLOBAL], 'FLAT_instruction', [(0, 3), (1, 2)])]
|
||||
formats = [(f if len(f) == 5 else f + ('',)) for f in formats]
|
||||
|
@@ -1224,7 +1224,7 @@ Temp emit_floor_f64(isel_context *ctx, Builder& bld, Definition dst, Temp val)
|
||||
Temp v = bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), dst0, dst1);
|
||||
|
||||
Instruction* add = bld.vop3(aco_opcode::v_add_f64, Definition(dst), src0, v);
|
||||
static_cast<VOP3A_instruction*>(add)->neg[1] = true;
|
||||
static_cast<VOP3_instruction*>(add)->neg[1] = true;
|
||||
|
||||
return add->definitions[0].getTemp();
|
||||
}
|
||||
@@ -1692,10 +1692,10 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
|
||||
std::swap(src0, src1);
|
||||
add_instr = bld.vop2_e64(aco_opcode::v_add_u16, Definition(dst), src0, as_vgpr(ctx, src1)).instr;
|
||||
}
|
||||
static_cast<VOP3A_instruction*>(add_instr)->clamp = 1;
|
||||
static_cast<VOP3_instruction*>(add_instr)->clamp = 1;
|
||||
} else if (dst.regClass() == v1) {
|
||||
if (ctx->options->chip_class >= GFX9) {
|
||||
aco_ptr<VOP3A_instruction> add{create_instruction<VOP3A_instruction>(aco_opcode::v_add_u32, asVOP3(Format::VOP2), 2, 1)};
|
||||
aco_ptr<VOP3_instruction> add{create_instruction<VOP3_instruction>(aco_opcode::v_add_u32, asVOP3(Format::VOP2), 2, 1)};
|
||||
add->operands[0] = Operand(src0);
|
||||
add->operands[1] = Operand(src1);
|
||||
add->definitions[0] = Definition(dst);
|
||||
@@ -1965,7 +1965,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
|
||||
} else if (dst.regClass() == v2) {
|
||||
Instruction* add = bld.vop3(aco_opcode::v_add_f64, Definition(dst),
|
||||
as_vgpr(ctx, src0), as_vgpr(ctx, src1));
|
||||
VOP3A_instruction* sub = static_cast<VOP3A_instruction*>(add);
|
||||
VOP3_instruction* sub = static_cast<VOP3_instruction*>(add);
|
||||
sub->neg[1] = true;
|
||||
} else {
|
||||
isel_err(&instr->instr, "Unimplemented NIR instr bit size");
|
||||
@@ -2115,7 +2115,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
|
||||
// TODO: confirm that this holds under any circumstances
|
||||
} else if (dst.regClass() == v2) {
|
||||
Instruction* add = bld.vop3(aco_opcode::v_add_f64, Definition(dst), src, Operand(0u));
|
||||
VOP3A_instruction* vop3 = static_cast<VOP3A_instruction*>(add);
|
||||
VOP3_instruction* vop3 = static_cast<VOP3_instruction*>(add);
|
||||
vop3->clamp = true;
|
||||
} else {
|
||||
isel_err(&instr->instr, "Unimplemented NIR instr bit size");
|
||||
@@ -2255,12 +2255,12 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
|
||||
Temp bfi = bld.vop3(aco_opcode::v_bfi_b32, bld.def(v1), bitmask, bld.copy(bld.def(v1), Operand(0x43300000u)), as_vgpr(ctx, src0_hi));
|
||||
Temp tmp = bld.vop3(aco_opcode::v_add_f64, bld.def(v2), src0, bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), Operand(0u), bfi));
|
||||
Instruction *sub = bld.vop3(aco_opcode::v_add_f64, bld.def(v2), tmp, bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), Operand(0u), bfi));
|
||||
static_cast<VOP3A_instruction*>(sub)->neg[1] = true;
|
||||
static_cast<VOP3_instruction*>(sub)->neg[1] = true;
|
||||
tmp = sub->definitions[0].getTemp();
|
||||
|
||||
Temp v = bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), Operand(-1u), Operand(0x432fffffu));
|
||||
Instruction* vop3 = bld.vopc_e64(aco_opcode::v_cmp_gt_f64, bld.hint_vcc(bld.def(bld.lm)), src0, v);
|
||||
static_cast<VOP3A_instruction*>(vop3)->abs[0] = true;
|
||||
static_cast<VOP3_instruction*>(vop3)->abs[0] = true;
|
||||
Temp cond = vop3->definitions[0].getTemp();
|
||||
|
||||
Temp tmp_lo = bld.tmp(v1), tmp_hi = bld.tmp(v1);
|
||||
@@ -2926,7 +2926,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
|
||||
f32 = bld.vop1(aco_opcode::v_cvt_f32_f16, bld.def(v1), f16);
|
||||
Temp smallest = bld.copy(bld.def(s1), Operand(0x38800000u));
|
||||
Instruction* vop3 = bld.vopc_e64(aco_opcode::v_cmp_nlt_f32, bld.hint_vcc(bld.def(bld.lm)), f32, smallest);
|
||||
static_cast<VOP3A_instruction*>(vop3)->abs[0] = true;
|
||||
static_cast<VOP3_instruction*>(vop3)->abs[0] = true;
|
||||
cmp_res = vop3->definitions[0].getTemp();
|
||||
}
|
||||
|
||||
@@ -8847,7 +8847,7 @@ void prepare_cube_coords(isel_context *ctx, std::vector<Temp>& coords, Temp* ddx
|
||||
|
||||
ma = bld.vop3(aco_opcode::v_cubema_f32, bld.def(v1), coords[0], coords[1], coords[2]);
|
||||
|
||||
aco_ptr<VOP3A_instruction> vop3a{create_instruction<VOP3A_instruction>(aco_opcode::v_rcp_f32, asVOP3(Format::VOP1), 1, 1)};
|
||||
aco_ptr<VOP3_instruction> vop3a{create_instruction<VOP3_instruction>(aco_opcode::v_rcp_f32, asVOP3(Format::VOP1), 1, 1)};
|
||||
vop3a->operands[0] = Operand(ma);
|
||||
vop3a->abs[0] = true;
|
||||
Temp invma = bld.tmp(v1);
|
||||
|
@@ -170,7 +170,7 @@ bool can_use_SDWA(chip_class chip, const aco_ptr<Instruction>& instr)
|
||||
return true;
|
||||
|
||||
if (instr->isVOP3()) {
|
||||
VOP3A_instruction *vop3 = static_cast<VOP3A_instruction*>(instr.get());
|
||||
VOP3_instruction *vop3 = static_cast<VOP3_instruction*>(instr.get());
|
||||
if (instr->format == Format::VOP3)
|
||||
return false;
|
||||
if (vop3->clamp && instr->format == asVOP3(Format::VOPC) && chip != GFX8)
|
||||
@@ -235,7 +235,7 @@ aco_ptr<Instruction> convert_to_SDWA(chip_class chip, aco_ptr<Instruction>& inst
|
||||
SDWA_instruction *sdwa = static_cast<SDWA_instruction*>(instr.get());
|
||||
|
||||
if (tmp->isVOP3()) {
|
||||
VOP3A_instruction *vop3 = static_cast<VOP3A_instruction*>(tmp.get());
|
||||
VOP3_instruction *vop3 = static_cast<VOP3_instruction*>(tmp.get());
|
||||
memcpy(sdwa->neg, vop3->neg, sizeof(sdwa->neg));
|
||||
memcpy(sdwa->abs, vop3->abs, sizeof(sdwa->abs));
|
||||
sdwa->omod = vop3->omod;
|
||||
|
@@ -60,7 +60,7 @@ enum {
|
||||
/**
|
||||
* Representation of the instruction's microcode encoding format
|
||||
* Note: Some Vector ALU Formats can be combined, such that:
|
||||
* - VOP2* | VOP3A represents a VOP2 instruction in VOP3A encoding
|
||||
* - VOP2* | VOP3 represents a VOP2 instruction in VOP3 encoding
|
||||
* - VOP2* | DPP represents a VOP2 instruction with data parallel primitive.
|
||||
* - VOP2* | SDWA represents a VOP2 instruction with sub-dword addressing.
|
||||
*
|
||||
@@ -101,8 +101,6 @@ enum class Format : std::uint16_t {
|
||||
VOP2 = 1 << 9,
|
||||
VOPC = 1 << 10,
|
||||
VOP3 = 1 << 11,
|
||||
VOP3A = 1 << 11,
|
||||
VOP3B = 1 << 11,
|
||||
/* Vector Parameter Interpolation Format */
|
||||
VINTRP = 1 << 12,
|
||||
DPP = 1 << 13,
|
||||
@@ -1001,8 +999,7 @@ struct Instruction {
|
||||
return ((uint16_t) format & (uint16_t) Format::VOP1) == (uint16_t) Format::VOP1
|
||||
|| ((uint16_t) format & (uint16_t) Format::VOP2) == (uint16_t) Format::VOP2
|
||||
|| ((uint16_t) format & (uint16_t) Format::VOPC) == (uint16_t) Format::VOPC
|
||||
|| ((uint16_t) format & (uint16_t) Format::VOP3A) == (uint16_t) Format::VOP3A
|
||||
|| ((uint16_t) format & (uint16_t) Format::VOP3B) == (uint16_t) Format::VOP3B
|
||||
|| ((uint16_t) format & (uint16_t) Format::VOP3) == (uint16_t) Format::VOP3
|
||||
|| format == Format::VOP3P;
|
||||
}
|
||||
|
||||
@@ -1029,8 +1026,7 @@ struct Instruction {
|
||||
|
||||
constexpr bool isVOP3() const noexcept
|
||||
{
|
||||
return ((uint16_t) format & (uint16_t) Format::VOP3A) ||
|
||||
((uint16_t) format & (uint16_t) Format::VOP3B);
|
||||
return (uint16_t) format & (uint16_t) Format::VOP3;
|
||||
}
|
||||
|
||||
constexpr bool isSDWA() const noexcept
|
||||
@@ -1114,7 +1110,7 @@ struct VOPC_instruction : public Instruction {
|
||||
};
|
||||
static_assert(sizeof(VOPC_instruction) == sizeof(Instruction) + 0, "Unexpected padding");
|
||||
|
||||
struct VOP3A_instruction : public Instruction {
|
||||
struct VOP3_instruction : public Instruction {
|
||||
bool abs[3];
|
||||
bool neg[3];
|
||||
uint8_t opsel : 4;
|
||||
@@ -1123,7 +1119,7 @@ struct VOP3A_instruction : public Instruction {
|
||||
uint8_t padding0 : 1;
|
||||
uint8_t padding1;
|
||||
};
|
||||
static_assert(sizeof(VOP3A_instruction) == sizeof(Instruction) + 8, "Unexpected padding");
|
||||
static_assert(sizeof(VOP3_instruction) == sizeof(Instruction) + 8, "Unexpected padding");
|
||||
|
||||
struct VOP3P_instruction : public Instruction {
|
||||
bool neg_lo[3];
|
||||
@@ -1450,7 +1446,7 @@ constexpr bool Instruction::usesModifiers() const noexcept
|
||||
}
|
||||
return vop3p->opsel_lo || vop3p->clamp;
|
||||
} else if (isVOP3()) {
|
||||
const VOP3A_instruction *vop3 = static_cast<const VOP3A_instruction*>(this);
|
||||
const VOP3_instruction *vop3 = static_cast<const VOP3_instruction*>(this);
|
||||
for (unsigned i = 0; i < operands.size(); i++) {
|
||||
if (vop3->abs[i] || vop3->neg[i])
|
||||
return true;
|
||||
|
@@ -646,7 +646,7 @@ void emit_reduction(lower_context *ctx, aco_opcode op, ReduceOp reduce_op, unsig
|
||||
Definition(PhysReg{vtmp+i}, v1),
|
||||
Operand(PhysReg{tmp+i}, v1),
|
||||
Operand(0xffffffffu), Operand(0xffffffffu)).instr;
|
||||
static_cast<VOP3A_instruction*>(perm)->opsel = 1; /* FI (Fetch Inactive) */
|
||||
static_cast<VOP3_instruction*>(perm)->opsel = 1; /* FI (Fetch Inactive) */
|
||||
}
|
||||
bld.sop1(Builder::s_mov, Definition(exec, bld.lm), Operand(UINT64_MAX));
|
||||
|
||||
@@ -757,7 +757,7 @@ void emit_reduction(lower_context *ctx, aco_opcode op, ReduceOp reduce_op, unsig
|
||||
Definition(PhysReg{vtmp+i}, v1),
|
||||
Operand(PhysReg{tmp+i}, v1),
|
||||
Operand(0xffffffffu), Operand(0xffffffffu)).instr;
|
||||
static_cast<VOP3A_instruction*>(perm)->opsel = 1; /* FI (Fetch Inactive) */
|
||||
static_cast<VOP3_instruction*>(perm)->opsel = 1; /* FI (Fetch Inactive) */
|
||||
}
|
||||
emit_op(ctx, tmp, tmp, vtmp, PhysReg{0}, reduce_op, src.size());
|
||||
|
||||
@@ -1052,12 +1052,12 @@ void copy_constant(lower_context *ctx, Builder& bld, Definition dst, Operand op)
|
||||
if (dst.physReg().byte() == 2) {
|
||||
Operand def_lo(dst.physReg().advance(-2), v2b);
|
||||
Instruction* instr = bld.vop3(aco_opcode::v_pack_b32_f16, dst, def_lo, op);
|
||||
static_cast<VOP3A_instruction*>(instr)->opsel = 0;
|
||||
static_cast<VOP3_instruction*>(instr)->opsel = 0;
|
||||
} else {
|
||||
assert(dst.physReg().byte() == 0);
|
||||
Operand def_hi(dst.physReg().advance(2), v2b);
|
||||
Instruction* instr = bld.vop3(aco_opcode::v_pack_b32_f16, dst, op, def_hi);
|
||||
static_cast<VOP3A_instruction*>(instr)->opsel = 2;
|
||||
static_cast<VOP3_instruction*>(instr)->opsel = 2;
|
||||
}
|
||||
} else {
|
||||
uint32_t offset = dst.physReg().byte() * 8u;
|
||||
@@ -1251,7 +1251,7 @@ void do_pack_2x16(lower_context *ctx, Builder& bld, Definition def, Operand lo,
|
||||
if (can_use_pack) {
|
||||
Instruction* instr = bld.vop3(aco_opcode::v_pack_b32_f16, def, lo, hi);
|
||||
/* opsel: 0 = select low half, 1 = select high half. [0] = src0, [1] = src1 */
|
||||
static_cast<VOP3A_instruction*>(instr)->opsel = hi.physReg().byte() | (lo.physReg().byte() >> 1);
|
||||
static_cast<VOP3_instruction*>(instr)->opsel = hi.physReg().byte() | (lo.physReg().byte() >> 1);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@@ -53,8 +53,7 @@ class Format(Enum):
|
||||
VOP1 = 1 << 8
|
||||
VOP2 = 1 << 9
|
||||
VOPC = 1 << 10
|
||||
VOP3A = 1 << 11
|
||||
VOP3B = 1 << 11
|
||||
VOP3 = 1 << 11
|
||||
VINTRP = 1 << 12
|
||||
DPP = 1 << 13
|
||||
SDWA = 1 << 14
|
||||
@@ -1082,7 +1081,7 @@ VOP3 = {
|
||||
( -1, -1, -1, -1, 0x140, "v_fma_legacy_f32", True, True), #GFX10.3+
|
||||
}
|
||||
for (gfx6, gfx7, gfx8, gfx9, gfx10, name, in_mod, out_mod) in VOP3:
|
||||
opcode(name, gfx7, gfx9, gfx10, Format.VOP3A, in_mod, out_mod)
|
||||
opcode(name, gfx7, gfx9, gfx10, Format.VOP3, in_mod, out_mod)
|
||||
|
||||
|
||||
# DS instructions: 3 inputs (1 addr, 2 data), 1 output
|
||||
|
@@ -81,7 +81,7 @@ struct InstrHash {
|
||||
std::size_t operator()(Instruction* instr) const
|
||||
{
|
||||
if (instr->isVOP3())
|
||||
return hash_murmur_32<VOP3A_instruction>(instr);
|
||||
return hash_murmur_32<VOP3_instruction>(instr);
|
||||
|
||||
if (instr->isDPP())
|
||||
return hash_murmur_32<DPP_instruction>(instr);
|
||||
@@ -178,8 +178,8 @@ struct InstrPred {
|
||||
return false;
|
||||
|
||||
if (a->isVOP3()) {
|
||||
VOP3A_instruction* a3 = static_cast<VOP3A_instruction*>(a);
|
||||
VOP3A_instruction* b3 = static_cast<VOP3A_instruction*>(b);
|
||||
VOP3_instruction* a3 = static_cast<VOP3_instruction*>(a);
|
||||
VOP3_instruction* b3 = static_cast<VOP3_instruction*>(b);
|
||||
for (unsigned i = 0; i < 3; i++) {
|
||||
if (a3->abs[i] != b3->abs[i] ||
|
||||
a3->neg[i] != b3->neg[i])
|
||||
|
@@ -706,7 +706,7 @@ void to_VOP3(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
||||
|
||||
aco_ptr<Instruction> tmp = std::move(instr);
|
||||
Format format = asVOP3(tmp->format);
|
||||
instr.reset(create_instruction<VOP3A_instruction>(tmp->opcode, format, tmp->operands.size(), tmp->definitions.size()));
|
||||
instr.reset(create_instruction<VOP3_instruction>(tmp->opcode, format, tmp->operands.size(), tmp->definitions.size()));
|
||||
std::copy(tmp->operands.cbegin(), tmp->operands.cend(), instr->operands.begin());
|
||||
for (unsigned i = 0; i < instr->definitions.size(); i++) {
|
||||
instr->definitions[i] = tmp->definitions[i];
|
||||
@@ -953,7 +953,7 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr)
|
||||
else if (instr->isSDWA())
|
||||
static_cast<SDWA_instruction*>(instr.get())->abs[i] = true;
|
||||
else
|
||||
static_cast<VOP3A_instruction*>(instr.get())->abs[i] = true;
|
||||
static_cast<VOP3_instruction*>(instr.get())->abs[i] = true;
|
||||
}
|
||||
if (info.is_neg() && instr->opcode == aco_opcode::v_add_f32) {
|
||||
instr->opcode = i ? aco_opcode::v_sub_f32 : aco_opcode::v_subrev_f32;
|
||||
@@ -972,7 +972,7 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr)
|
||||
else if (instr->isSDWA())
|
||||
static_cast<SDWA_instruction*>(instr.get())->neg[i] = true;
|
||||
else
|
||||
static_cast<VOP3A_instruction*>(instr.get())->neg[i] = true;
|
||||
static_cast<VOP3_instruction*>(instr.get())->neg[i] = true;
|
||||
continue;
|
||||
}
|
||||
unsigned bits = get_operand_size(instr, i);
|
||||
@@ -1365,7 +1365,7 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr)
|
||||
}
|
||||
case aco_opcode::v_med3_f16:
|
||||
case aco_opcode::v_med3_f32: { /* clamp */
|
||||
VOP3A_instruction* vop3 = static_cast<VOP3A_instruction*>(instr.get());
|
||||
VOP3_instruction* vop3 = static_cast<VOP3_instruction*>(instr.get());
|
||||
if (vop3->abs[0] || vop3->abs[1] || vop3->abs[2] ||
|
||||
vop3->neg[0] || vop3->neg[1] || vop3->neg[2] ||
|
||||
vop3->omod != 0 || vop3->opsel != 0)
|
||||
@@ -1682,7 +1682,7 @@ bool combine_ordering_test(opt_ctx &ctx, aco_ptr<Instruction>& instr)
|
||||
return false;
|
||||
|
||||
if (op_instr[i]->isVOP3()) {
|
||||
VOP3A_instruction *vop3 = static_cast<VOP3A_instruction*>(op_instr[i]);
|
||||
VOP3_instruction *vop3 = static_cast<VOP3_instruction*>(op_instr[i]);
|
||||
if (vop3->neg[0] != vop3->neg[1] || vop3->abs[0] != vop3->abs[1] || vop3->opsel == 1 || vop3->opsel == 2)
|
||||
return false;
|
||||
neg[i] = vop3->neg[0];
|
||||
@@ -1726,7 +1726,7 @@ bool combine_ordering_test(opt_ctx &ctx, aco_ptr<Instruction>& instr)
|
||||
}
|
||||
Instruction *new_instr;
|
||||
if (neg[0] || neg[1] || abs[0] || abs[1] || opsel || num_sgprs > 1) {
|
||||
VOP3A_instruction *vop3 = create_instruction<VOP3A_instruction>(new_op, asVOP3(Format::VOPC), 2, 1);
|
||||
VOP3_instruction *vop3 = create_instruction<VOP3_instruction>(new_op, asVOP3(Format::VOPC), 2, 1);
|
||||
for (unsigned i = 0; i < 2; i++) {
|
||||
vop3->neg[i] = neg[i];
|
||||
vop3->abs[i] = abs[i];
|
||||
@@ -1797,8 +1797,8 @@ bool combine_comparison_ordering(opt_ctx &ctx, aco_ptr<Instruction>& instr)
|
||||
aco_opcode new_op = is_or ? get_unordered(cmp->opcode) : get_ordered(cmp->opcode);
|
||||
Instruction *new_instr;
|
||||
if (cmp->isVOP3()) {
|
||||
VOP3A_instruction *new_vop3 = create_instruction<VOP3A_instruction>(new_op, asVOP3(Format::VOPC), 2, 1);
|
||||
VOP3A_instruction *cmp_vop3 = static_cast<VOP3A_instruction*>(cmp);
|
||||
VOP3_instruction *new_vop3 = create_instruction<VOP3_instruction>(new_op, asVOP3(Format::VOPC), 2, 1);
|
||||
VOP3_instruction *cmp_vop3 = static_cast<VOP3_instruction*>(cmp);
|
||||
memcpy(new_vop3->abs, cmp_vop3->abs, sizeof(new_vop3->abs));
|
||||
memcpy(new_vop3->neg, cmp_vop3->neg, sizeof(new_vop3->neg));
|
||||
new_vop3->clamp = cmp_vop3->clamp;
|
||||
@@ -1885,7 +1885,7 @@ bool combine_constant_comparison_ordering(opt_ctx &ctx, aco_ptr<Instruction>& in
|
||||
return false;
|
||||
|
||||
if (nan_test->isVOP3()) {
|
||||
VOP3A_instruction *vop3 = static_cast<VOP3A_instruction*>(nan_test);
|
||||
VOP3_instruction *vop3 = static_cast<VOP3_instruction*>(nan_test);
|
||||
if (vop3->neg[0] != vop3->neg[1] || vop3->abs[0] != vop3->abs[1] || vop3->opsel == 1 || vop3->opsel == 2)
|
||||
return false;
|
||||
}
|
||||
@@ -1916,8 +1916,8 @@ bool combine_constant_comparison_ordering(opt_ctx &ctx, aco_ptr<Instruction>& in
|
||||
aco_opcode new_op = is_or ? get_unordered(cmp->opcode) : get_ordered(cmp->opcode);
|
||||
Instruction *new_instr;
|
||||
if (cmp->isVOP3()) {
|
||||
VOP3A_instruction *new_vop3 = create_instruction<VOP3A_instruction>(new_op, asVOP3(Format::VOPC), 2, 1);
|
||||
VOP3A_instruction *cmp_vop3 = static_cast<VOP3A_instruction*>(cmp);
|
||||
VOP3_instruction *new_vop3 = create_instruction<VOP3_instruction>(new_op, asVOP3(Format::VOPC), 2, 1);
|
||||
VOP3_instruction *cmp_vop3 = static_cast<VOP3_instruction*>(cmp);
|
||||
memcpy(new_vop3->abs, cmp_vop3->abs, sizeof(new_vop3->abs));
|
||||
memcpy(new_vop3->neg, cmp_vop3->neg, sizeof(new_vop3->neg));
|
||||
new_vop3->clamp = cmp_vop3->clamp;
|
||||
@@ -1965,8 +1965,8 @@ bool combine_inverse_comparison(opt_ctx &ctx, aco_ptr<Instruction>& instr)
|
||||
* comparison so that the comparison is done with the correct exec mask. */
|
||||
Instruction *new_instr;
|
||||
if (cmp->isVOP3()) {
|
||||
VOP3A_instruction *new_vop3 = create_instruction<VOP3A_instruction>(new_opcode, asVOP3(Format::VOPC), 2, 1);
|
||||
VOP3A_instruction *cmp_vop3 = static_cast<VOP3A_instruction*>(cmp);
|
||||
VOP3_instruction *new_vop3 = create_instruction<VOP3_instruction>(new_opcode, asVOP3(Format::VOPC), 2, 1);
|
||||
VOP3_instruction *cmp_vop3 = static_cast<VOP3_instruction*>(cmp);
|
||||
memcpy(new_vop3->abs, cmp_vop3->abs, sizeof(new_vop3->abs));
|
||||
memcpy(new_vop3->neg, cmp_vop3->neg, sizeof(new_vop3->neg));
|
||||
new_vop3->clamp = cmp_vop3->clamp;
|
||||
@@ -2019,8 +2019,8 @@ bool match_op3_for_vop3(opt_ctx &ctx, aco_opcode op1, aco_opcode op2,
|
||||
if (fixed_to_exec(op2_instr->operands[0]) || fixed_to_exec(op2_instr->operands[1]))
|
||||
return false;
|
||||
|
||||
VOP3A_instruction *op1_vop3 = op1_instr->isVOP3() ? static_cast<VOP3A_instruction *>(op1_instr) : NULL;
|
||||
VOP3A_instruction *op2_vop3 = op2_instr->isVOP3() ? static_cast<VOP3A_instruction *>(op2_instr) : NULL;
|
||||
VOP3_instruction *op1_vop3 = op1_instr->isVOP3() ? static_cast<VOP3_instruction *>(op1_instr) : NULL;
|
||||
VOP3_instruction *op2_vop3 = op2_instr->isVOP3() ? static_cast<VOP3_instruction *>(op2_instr) : NULL;
|
||||
|
||||
if (op1_instr->isSDWA() || op2_instr->isSDWA())
|
||||
return false;
|
||||
@@ -2081,7 +2081,7 @@ void create_vop3_for_op3(opt_ctx& ctx, aco_opcode opcode, aco_ptr<Instruction>&
|
||||
Operand operands[3], bool neg[3], bool abs[3], uint8_t opsel,
|
||||
bool clamp, unsigned omod)
|
||||
{
|
||||
VOP3A_instruction *new_instr = create_instruction<VOP3A_instruction>(opcode, Format::VOP3A, 3, 1);
|
||||
VOP3_instruction *new_instr = create_instruction<VOP3_instruction>(opcode, Format::VOP3, 3, 1);
|
||||
memcpy(new_instr->abs, abs, sizeof(bool[3]));
|
||||
memcpy(new_instr->neg, neg, sizeof(bool[3]));
|
||||
new_instr->clamp = clamp;
|
||||
@@ -2306,7 +2306,7 @@ bool combine_add_sub_b2i(opt_ctx& ctx, aco_ptr<Instruction>& instr, aco_opcode n
|
||||
new_instr.reset(create_instruction<VOP2_instruction>(new_op, Format::VOP2, 3, 2));
|
||||
} else if (ctx.program->chip_class >= GFX10 ||
|
||||
(instr->operands[!i].isConstant() && !instr->operands[!i].isLiteral())) {
|
||||
new_instr.reset(create_instruction<VOP3A_instruction>(new_op, asVOP3(Format::VOP2), 3, 2));
|
||||
new_instr.reset(create_instruction<VOP3_instruction>(new_op, asVOP3(Format::VOP2), 3, 2));
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
@@ -2347,7 +2347,7 @@ bool combine_add_bcnt(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
||||
op_instr->operands[0].isTemp() &&
|
||||
op_instr->operands[0].getTemp().type() == RegType::vgpr &&
|
||||
op_instr->operands[1].constantEquals(0)) {
|
||||
aco_ptr<Instruction> new_instr{create_instruction<VOP3A_instruction>(aco_opcode::v_bcnt_u32_b32, Format::VOP3, 2, 1)};
|
||||
aco_ptr<Instruction> new_instr{create_instruction<VOP3_instruction>(aco_opcode::v_bcnt_u32_b32, Format::VOP3, 2, 1)};
|
||||
ctx.uses[instr->operands[i].tempId()]--;
|
||||
new_instr->operands[0] = op_instr->operands[0];
|
||||
new_instr->operands[1] = instr->operands[!i];
|
||||
@@ -2645,7 +2645,7 @@ bool apply_omod_clamp(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr)
|
||||
return false;
|
||||
} else {
|
||||
to_VOP3(ctx, instr);
|
||||
if (!apply_omod_clamp_helper(ctx, static_cast<VOP3A_instruction *>(instr.get()), def_info))
|
||||
if (!apply_omod_clamp_helper(ctx, static_cast<VOP3_instruction *>(instr.get()), def_info))
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -2675,7 +2675,7 @@ bool combine_and_subbrev(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
||||
new_instr.reset(create_instruction<VOP2_instruction>(aco_opcode::v_cndmask_b32, Format::VOP2, 3, 1));
|
||||
} else if (ctx.program->chip_class >= GFX10 ||
|
||||
(instr->operands[!i].isConstant() && !instr->operands[!i].isLiteral())) {
|
||||
new_instr.reset(create_instruction<VOP3A_instruction>(aco_opcode::v_cndmask_b32, asVOP3(Format::VOP2), 3, 1));
|
||||
new_instr.reset(create_instruction<VOP3_instruction>(aco_opcode::v_cndmask_b32, asVOP3(Format::VOP2), 3, 1));
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
@@ -2729,7 +2729,7 @@ bool combine_add_lshl(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
||||
|
||||
ctx.uses[instr->operands[i].tempId()]--;
|
||||
|
||||
aco_ptr<VOP3A_instruction> new_instr{create_instruction<VOP3A_instruction>(aco_opcode::v_mad_u32_u24, Format::VOP3A, 3, 1)};
|
||||
aco_ptr<VOP3_instruction> new_instr{create_instruction<VOP3_instruction>(aco_opcode::v_mad_u32_u24, Format::VOP3, 3, 1)};
|
||||
new_instr->operands[0] = op_instr->operands[!shift_op_idx];
|
||||
new_instr->operands[1] = Operand(multiplier);
|
||||
new_instr->operands[2] = instr->operands[!i];
|
||||
@@ -2944,7 +2944,7 @@ void combine_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr
|
||||
|
||||
if (mul_instr->operands[0].isLiteral())
|
||||
return;
|
||||
if (mul_instr->isVOP3() && static_cast<VOP3A_instruction*>(mul_instr)->clamp)
|
||||
if (mul_instr->isVOP3() && static_cast<VOP3_instruction*>(mul_instr)->clamp)
|
||||
return;
|
||||
if (mul_instr->isSDWA())
|
||||
return;
|
||||
@@ -2954,13 +2954,13 @@ void combine_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr
|
||||
Definition def = instr->definitions[0];
|
||||
/* neg(abs(mul(a, b))) -> mul(neg(abs(a)), abs(b)) */
|
||||
bool is_abs = ctx.info[instr->definitions[0].tempId()].is_abs();
|
||||
instr.reset(create_instruction<VOP3A_instruction>(mul_instr->opcode, asVOP3(Format::VOP2), 2, 1));
|
||||
instr.reset(create_instruction<VOP3_instruction>(mul_instr->opcode, asVOP3(Format::VOP2), 2, 1));
|
||||
instr->operands[0] = mul_instr->operands[0];
|
||||
instr->operands[1] = mul_instr->operands[1];
|
||||
instr->definitions[0] = def;
|
||||
VOP3A_instruction* new_mul = static_cast<VOP3A_instruction*>(instr.get());
|
||||
VOP3_instruction* new_mul = static_cast<VOP3_instruction*>(instr.get());
|
||||
if (mul_instr->isVOP3()) {
|
||||
VOP3A_instruction* mul = static_cast<VOP3A_instruction*>(mul_instr);
|
||||
VOP3_instruction* mul = static_cast<VOP3_instruction*>(mul_instr);
|
||||
new_mul->neg[0] = mul->neg[0] && !is_abs;
|
||||
new_mul->neg[1] = mul->neg[1] && !is_abs;
|
||||
new_mul->abs[0] = mul->abs[0] || is_abs;
|
||||
@@ -3003,8 +3003,8 @@ void combine_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr
|
||||
|
||||
/* no clamp/omod allowed between mul and add */
|
||||
if (info.instr->isVOP3() &&
|
||||
(static_cast<VOP3A_instruction*>(info.instr)->clamp ||
|
||||
static_cast<VOP3A_instruction*>(info.instr)->omod))
|
||||
(static_cast<VOP3_instruction*>(info.instr)->clamp ||
|
||||
static_cast<VOP3_instruction*>(info.instr)->omod))
|
||||
continue;
|
||||
|
||||
Operand op[3] = {info.instr->operands[0], info.instr->operands[1], instr->operands[1 - i]};
|
||||
@@ -3035,7 +3035,7 @@ void combine_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr
|
||||
bool clamp = false;
|
||||
|
||||
if (mul_instr->isVOP3()) {
|
||||
VOP3A_instruction* vop3 = static_cast<VOP3A_instruction*> (mul_instr);
|
||||
VOP3_instruction* vop3 = static_cast<VOP3_instruction*> (mul_instr);
|
||||
neg[0] = vop3->neg[0];
|
||||
neg[1] = vop3->neg[1];
|
||||
abs[0] = vop3->abs[0];
|
||||
@@ -3043,7 +3043,7 @@ void combine_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr
|
||||
}
|
||||
|
||||
if (instr->isVOP3()) {
|
||||
VOP3A_instruction* vop3 = static_cast<VOP3A_instruction*> (instr.get());
|
||||
VOP3_instruction* vop3 = static_cast<VOP3_instruction*> (instr.get());
|
||||
neg[2] = vop3->neg[add_op_idx];
|
||||
abs[2] = vop3->abs[add_op_idx];
|
||||
omod = vop3->omod;
|
||||
@@ -3068,7 +3068,7 @@ void combine_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr
|
||||
mad_op = need_fma ? (ctx.program->chip_class == GFX8 ? aco_opcode::v_fma_legacy_f16 : aco_opcode::v_fma_f16) :
|
||||
(ctx.program->chip_class == GFX8 ? aco_opcode::v_mad_legacy_f16 : aco_opcode::v_mad_f16);
|
||||
|
||||
aco_ptr<VOP3A_instruction> mad{create_instruction<VOP3A_instruction>(mad_op, Format::VOP3A, 3, 1)};
|
||||
aco_ptr<VOP3_instruction> mad{create_instruction<VOP3_instruction>(mad_op, Format::VOP3, 3, 1)};
|
||||
for (unsigned i = 0; i < 3; i++) {
|
||||
mad->operands[i] = op[i];
|
||||
mad->neg[i] = neg[i];
|
||||
|
@@ -588,7 +588,7 @@ static void print_instr_format_specific(const Instruction *instr, FILE *output)
|
||||
}
|
||||
}
|
||||
if (instr->isVOP3()) {
|
||||
const VOP3A_instruction* vop3 = static_cast<const VOP3A_instruction*>(instr);
|
||||
const VOP3_instruction* vop3 = static_cast<const VOP3_instruction*>(instr);
|
||||
switch (vop3->omod) {
|
||||
case 1:
|
||||
fprintf(output, " *2");
|
||||
@@ -693,8 +693,8 @@ void aco_print_instr(const Instruction *instr, FILE *output)
|
||||
bool *const neg = (bool *)alloca(instr->operands.size() * sizeof(bool));
|
||||
bool *const opsel = (bool *)alloca(instr->operands.size() * sizeof(bool));
|
||||
uint8_t *const sel = (uint8_t *)alloca(instr->operands.size() * sizeof(uint8_t));
|
||||
if ((int)instr->format & (int)Format::VOP3A) {
|
||||
const VOP3A_instruction* vop3 = static_cast<const VOP3A_instruction*>(instr);
|
||||
if ((int)instr->format & (int)Format::VOP3) {
|
||||
const VOP3_instruction* vop3 = static_cast<const VOP3_instruction*>(instr);
|
||||
for (unsigned i = 0; i < instr->operands.size(); ++i) {
|
||||
abs[i] = vop3->abs[i];
|
||||
neg[i] = vop3->neg[i];
|
||||
|
@@ -503,7 +503,7 @@ void add_subdword_operand(ra_ctx& ctx, aco_ptr<Instruction>& instr, unsigned idx
|
||||
update_phi_map(ctx, tmp.get(), instr.get());
|
||||
return;
|
||||
} else if (rc.bytes() == 2 && can_use_opsel(chip, instr->opcode, idx, byte / 2)) {
|
||||
VOP3A_instruction* vop3 = static_cast<VOP3A_instruction *>(instr.get());
|
||||
VOP3_instruction* vop3 = static_cast<VOP3_instruction *>(instr.get());
|
||||
vop3->opsel |= (byte / 2) << idx;
|
||||
return;
|
||||
} else if (instr->format == Format::VOP3P && byte == 2) {
|
||||
@@ -614,7 +614,7 @@ void add_subdword_definition(Program *program, aco_ptr<Instruction>& instr, unsi
|
||||
convert_to_SDWA(chip, instr);
|
||||
return;
|
||||
} else if (reg.byte() && rc.bytes() == 2 && can_use_opsel(chip, instr->opcode, -1, reg.byte() / 2)) {
|
||||
VOP3A_instruction *vop3 = static_cast<VOP3A_instruction *>(instr.get());
|
||||
VOP3_instruction *vop3 = static_cast<VOP3_instruction *>(instr.get());
|
||||
if (reg.byte() == 2)
|
||||
vop3->opsel |= (1 << 3); /* dst in high half */
|
||||
return;
|
||||
@@ -2478,7 +2478,7 @@ void register_allocation(Program *program, std::vector<IDSet>& live_out_per_bloc
|
||||
/* change the instruction to VOP3 to enable an arbitrary register pair as dst */
|
||||
aco_ptr<Instruction> tmp = std::move(instr);
|
||||
Format format = asVOP3(tmp->format);
|
||||
instr.reset(create_instruction<VOP3A_instruction>(tmp->opcode, format, tmp->operands.size(), tmp->definitions.size()));
|
||||
instr.reset(create_instruction<VOP3_instruction>(tmp->opcode, format, tmp->operands.size(), tmp->definitions.size()));
|
||||
std::copy(tmp->operands.begin(), tmp->operands.end(), instr->operands.begin());
|
||||
std::copy(tmp->definitions.begin(), tmp->definitions.end(), instr->definitions.begin());
|
||||
update_phi_map(ctx, tmp.get(), instr.get());
|
||||
|
@@ -136,7 +136,7 @@ bool validate_ir(Program* program)
|
||||
base_format == Format::VOP1 ||
|
||||
base_format == Format::VOPC ||
|
||||
base_format == Format::VINTRP,
|
||||
"Format cannot have VOP3A/VOP3B applied", instr.get());
|
||||
"Format cannot have VOP3/VOP3B applied", instr.get());
|
||||
}
|
||||
|
||||
/* check SDWA */
|
||||
@@ -188,7 +188,7 @@ bool validate_ir(Program* program)
|
||||
|
||||
/* check opsel */
|
||||
if (instr->isVOP3()) {
|
||||
VOP3A_instruction *vop3 = static_cast<VOP3A_instruction*>(instr.get());
|
||||
VOP3_instruction *vop3 = static_cast<VOP3_instruction*>(instr.get());
|
||||
check(vop3->opsel == 0 || program->chip_class >= GFX9, "Opsel is only supported on GFX9+", instr.get());
|
||||
|
||||
for (unsigned i = 0; i < 3; i++) {
|
||||
|
@@ -235,7 +235,7 @@ BEGIN_TEST(assembler.v_add3)
|
||||
|
||||
//~gfx9>> v_add3_u32 v0, 0, 0, 0 ; d1ff0000 02010080
|
||||
//~gfx10>> v_add3_u32 v0, 0, 0, 0 ; d76d0000 02010080
|
||||
aco_ptr<VOP3A_instruction> add3{create_instruction<VOP3A_instruction>(aco_opcode::v_add3_u32, Format::VOP3A, 3, 1)};
|
||||
aco_ptr<VOP3_instruction> add3{create_instruction<VOP3_instruction>(aco_opcode::v_add3_u32, Format::VOP3, 3, 1)};
|
||||
add3->operands[0] = Operand(0u);
|
||||
add3->operands[1] = Operand(0u);
|
||||
add3->operands[2] = Operand(0u);
|
||||
@@ -253,7 +253,7 @@ BEGIN_TEST(assembler.v_add3_clamp)
|
||||
|
||||
//~gfx9>> integer addition + clamp ; d1ff8000 02010080
|
||||
//~gfx10>> integer addition + clamp ; d76d8000 02010080
|
||||
aco_ptr<VOP3A_instruction> add3{create_instruction<VOP3A_instruction>(aco_opcode::v_add3_u32, Format::VOP3A, 3, 1)};
|
||||
aco_ptr<VOP3_instruction> add3{create_instruction<VOP3_instruction>(aco_opcode::v_add3_u32, Format::VOP3, 3, 1)};
|
||||
add3->operands[0] = Operand(0u);
|
||||
add3->operands[1] = Operand(0u);
|
||||
add3->operands[2] = Operand(0u);
|
||||
|
@@ -735,7 +735,7 @@ BEGIN_TEST(optimize.add3)
|
||||
//! v1: %res1 = v_add_u32 %a, %tmp1
|
||||
//! p_unit_test 1, %res1
|
||||
tmp = bld.vop2_e64(aco_opcode::v_add_u32, bld.def(v1), inputs[1], inputs[2]);
|
||||
static_cast<VOP3A_instruction *>(tmp.instr)->clamp = true;
|
||||
static_cast<VOP3_instruction *>(tmp.instr)->clamp = true;
|
||||
writeout(1, bld.vop2(aco_opcode::v_add_u32, bld.def(v1), inputs[0], tmp));
|
||||
|
||||
//! v1: %tmp2 = v_add_u32 %b, %c
|
||||
@@ -743,7 +743,7 @@ BEGIN_TEST(optimize.add3)
|
||||
//! p_unit_test 2, %res2
|
||||
tmp = bld.vop2(aco_opcode::v_add_u32, bld.def(v1), inputs[1], inputs[2]);
|
||||
tmp = bld.vop2_e64(aco_opcode::v_add_u32, bld.def(v1), inputs[0], tmp);
|
||||
static_cast<VOP3A_instruction *>(tmp.instr)->clamp = true;
|
||||
static_cast<VOP3_instruction *>(tmp.instr)->clamp = true;
|
||||
writeout(2, tmp);
|
||||
|
||||
finish_opt_test();
|
||||
|
Reference in New Issue
Block a user