aco: improve support for v_fma_mix

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14769>
This commit is contained in:
Rhys Perry
2022-01-27 14:00:38 +00:00
committed by Marge Bot
parent 79c8740c6e
commit e12bee3cb7
4 changed files with 30 additions and 3 deletions

View File

@@ -150,6 +150,11 @@ init_program(Program* program, Stage stage, const struct radv_shader_info* info,
program->dev.has_fast_fma32 = true;
program->dev.has_mac_legacy32 = program->chip_class <= GFX7 || program->chip_class >= GFX10;
program->dev.fused_mad_mix = program->chip_class >= GFX10;
if (program->family == CHIP_VEGA12 || program->family == CHIP_VEGA20 ||
program->family == CHIP_ARCTURUS || program->family == CHIP_ALDEBARAN)
program->dev.fused_mad_mix = true;
program->wgp_mode = wgp_mode;
program->progress = CompilationProgress::after_isel;

View File

@@ -1405,7 +1405,7 @@ static_assert(sizeof(VOP3_instruction) == sizeof(Instruction) + 8, "Unexpected p
struct VOP3P_instruction : public Instruction {
bool neg_lo[3];
bool neg_hi[3];
bool neg_hi[3]; /* abs modifier, for v_mad_mix/v_fma_mix */
uint8_t opsel_lo : 3;
uint8_t opsel_hi : 3;
bool clamp : 1;
@@ -2047,6 +2047,7 @@ struct DeviceInfo {
unsigned simd_per_cu;
bool has_fast_fma32 = false;
bool has_mac_legacy32 = false;
bool fused_mad_mix = false;
bool xnack_enabled = false;
bool sram_ecc_enabled = false;
};

View File

@@ -664,11 +664,16 @@ aco_print_instr(const Instruction* instr, FILE* output, unsigned flags)
bool* const abs = (bool*)alloca(num_operands * sizeof(bool));
bool* const neg = (bool*)alloca(num_operands * sizeof(bool));
bool* const opsel = (bool*)alloca(num_operands * sizeof(bool));
bool* const f2f32 = (bool*)alloca(num_operands * sizeof(bool));
for (unsigned i = 0; i < num_operands; ++i) {
abs[i] = false;
neg[i] = false;
opsel[i] = false;
f2f32[i] = false;
}
bool is_mad_mix = instr->opcode == aco_opcode::v_fma_mix_f32 ||
instr->opcode == aco_opcode::v_fma_mixlo_f16 ||
instr->opcode == aco_opcode::v_fma_mixhi_f16;
if (instr->isVOP3()) {
const VOP3_instruction& vop3 = instr->vop3();
for (unsigned i = 0; i < MIN2(num_operands, 3); ++i) {
@@ -690,6 +695,14 @@ aco_print_instr(const Instruction* instr, FILE* output, unsigned flags)
neg[i] = sdwa.neg[i];
opsel[i] = false;
}
} else if (instr->isVOP3P() && is_mad_mix) {
const VOP3P_instruction& vop3p = instr->vop3p();
for (unsigned i = 0; i < MIN2(num_operands, 3); ++i) {
abs[i] = vop3p.neg_hi[i];
neg[i] = vop3p.neg_lo[i];
f2f32[i] = vop3p.opsel_hi & (1 << i);
opsel[i] = f2f32[i] && (vop3p.opsel_lo & (1 << i));
}
}
for (unsigned i = 0; i < num_operands; ++i) {
if (i)
@@ -703,13 +716,15 @@ aco_print_instr(const Instruction* instr, FILE* output, unsigned flags)
fprintf(output, "|");
if (opsel[i])
fprintf(output, "hi(");
else if (f2f32[i])
fprintf(output, "lo(");
aco_print_operand(&instr->operands[i], output, flags);
if (opsel[i])
if (f2f32[i] || opsel[i])
fprintf(output, ")");
if (abs[i])
fprintf(output, "|");
if (instr->isVOP3P()) {
if (instr->isVOP3P() && !is_mad_mix) {
const VOP3P_instruction& vop3 = instr->vop3p();
if ((vop3.opsel_lo & (1 << i)) || !(vop3.opsel_hi & (1 << i))) {
fprintf(output, ".%c%c", vop3.opsel_lo & (1 << i) ? 'y' : 'x',

View File

@@ -236,6 +236,12 @@ validate_ir(Program* program)
if (instr->definitions[0].regClass().is_subdword() && !instr->definitions[0].isFixed())
check((vop3.opsel & (1 << 3)) == 0, "Unexpected opsel for sub-dword definition",
instr.get());
} else if (instr->opcode == aco_opcode::v_fma_mixlo_f16 ||
instr->opcode == aco_opcode::v_fma_mixhi_f16 ||
instr->opcode == aco_opcode::v_fma_mix_f32) {
check(instr->definitions[0].regClass() ==
(instr->opcode == aco_opcode::v_fma_mix_f32 ? v1 : v2b),
"v_fma_mix_f32/v_fma_mix_f16 must have v1/v2b definition", instr.get());
} else if (instr->isVOP3P()) {
VOP3P_instruction& vop3p = instr->vop3p();
for (unsigned i = 0; i < instr->operands.size(); i++) {