From e12bee3cb7f757408a3f739e788561a56d09041f Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Thu, 27 Jan 2022 14:00:38 +0000 Subject: [PATCH] aco: improve support for v_fma_mix MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Rhys Perry Reviewed-by: Daniel Schürmann Part-of: --- src/amd/compiler/aco_ir.cpp | 5 +++++ src/amd/compiler/aco_ir.h | 3 ++- src/amd/compiler/aco_print_ir.cpp | 19 +++++++++++++++++-- src/amd/compiler/aco_validate.cpp | 6 ++++++ 4 files changed, 30 insertions(+), 3 deletions(-) diff --git a/src/amd/compiler/aco_ir.cpp b/src/amd/compiler/aco_ir.cpp index cd705bc1a18..23aa602ebac 100644 --- a/src/amd/compiler/aco_ir.cpp +++ b/src/amd/compiler/aco_ir.cpp @@ -150,6 +150,11 @@ init_program(Program* program, Stage stage, const struct radv_shader_info* info, program->dev.has_fast_fma32 = true; program->dev.has_mac_legacy32 = program->chip_class <= GFX7 || program->chip_class >= GFX10; + program->dev.fused_mad_mix = program->chip_class >= GFX10; + if (program->family == CHIP_VEGA12 || program->family == CHIP_VEGA20 || + program->family == CHIP_ARCTURUS || program->family == CHIP_ALDEBARAN) + program->dev.fused_mad_mix = true; + program->wgp_mode = wgp_mode; program->progress = CompilationProgress::after_isel; diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h index fa4f885c873..8ec61795db3 100644 --- a/src/amd/compiler/aco_ir.h +++ b/src/amd/compiler/aco_ir.h @@ -1405,7 +1405,7 @@ static_assert(sizeof(VOP3_instruction) == sizeof(Instruction) + 8, "Unexpected p struct VOP3P_instruction : public Instruction { bool neg_lo[3]; - bool neg_hi[3]; + bool neg_hi[3]; /* abs modifier, for v_mad_mix/v_fma_mix */ uint8_t opsel_lo : 3; uint8_t opsel_hi : 3; bool clamp : 1; @@ -2047,6 +2047,7 @@ struct DeviceInfo { unsigned simd_per_cu; bool has_fast_fma32 = false; bool has_mac_legacy32 = false; + bool fused_mad_mix = false; bool xnack_enabled = false; bool sram_ecc_enabled = false; }; diff --git a/src/amd/compiler/aco_print_ir.cpp b/src/amd/compiler/aco_print_ir.cpp index f650de37791..2b92eff8101 100644 --- a/src/amd/compiler/aco_print_ir.cpp +++ b/src/amd/compiler/aco_print_ir.cpp @@ -664,11 +664,16 @@ aco_print_instr(const Instruction* instr, FILE* output, unsigned flags) bool* const abs = (bool*)alloca(num_operands * sizeof(bool)); bool* const neg = (bool*)alloca(num_operands * sizeof(bool)); bool* const opsel = (bool*)alloca(num_operands * sizeof(bool)); + bool* const f2f32 = (bool*)alloca(num_operands * sizeof(bool)); for (unsigned i = 0; i < num_operands; ++i) { abs[i] = false; neg[i] = false; opsel[i] = false; + f2f32[i] = false; } + bool is_mad_mix = instr->opcode == aco_opcode::v_fma_mix_f32 || + instr->opcode == aco_opcode::v_fma_mixlo_f16 || + instr->opcode == aco_opcode::v_fma_mixhi_f16; if (instr->isVOP3()) { const VOP3_instruction& vop3 = instr->vop3(); for (unsigned i = 0; i < MIN2(num_operands, 3); ++i) { @@ -690,6 +695,14 @@ aco_print_instr(const Instruction* instr, FILE* output, unsigned flags) neg[i] = sdwa.neg[i]; opsel[i] = false; } + } else if (instr->isVOP3P() && is_mad_mix) { + const VOP3P_instruction& vop3p = instr->vop3p(); + for (unsigned i = 0; i < MIN2(num_operands, 3); ++i) { + abs[i] = vop3p.neg_hi[i]; + neg[i] = vop3p.neg_lo[i]; + f2f32[i] = vop3p.opsel_hi & (1 << i); + opsel[i] = f2f32[i] && (vop3p.opsel_lo & (1 << i)); + } } for (unsigned i = 0; i < num_operands; ++i) { if (i) @@ -703,13 +716,15 @@ aco_print_instr(const Instruction* instr, FILE* output, unsigned flags) fprintf(output, "|"); if (opsel[i]) fprintf(output, "hi("); + else if (f2f32[i]) + fprintf(output, "lo("); aco_print_operand(&instr->operands[i], output, flags); - if (opsel[i]) + if (f2f32[i] || opsel[i]) fprintf(output, ")"); if (abs[i]) fprintf(output, "|"); - if (instr->isVOP3P()) { + if (instr->isVOP3P() && !is_mad_mix) { const VOP3P_instruction& vop3 = instr->vop3p(); if ((vop3.opsel_lo & (1 << i)) || !(vop3.opsel_hi & (1 << i))) { fprintf(output, ".%c%c", vop3.opsel_lo & (1 << i) ? 'y' : 'x', diff --git a/src/amd/compiler/aco_validate.cpp b/src/amd/compiler/aco_validate.cpp index 13e1b55f602..198aa072c0b 100644 --- a/src/amd/compiler/aco_validate.cpp +++ b/src/amd/compiler/aco_validate.cpp @@ -236,6 +236,12 @@ validate_ir(Program* program) if (instr->definitions[0].regClass().is_subdword() && !instr->definitions[0].isFixed()) check((vop3.opsel & (1 << 3)) == 0, "Unexpected opsel for sub-dword definition", instr.get()); + } else if (instr->opcode == aco_opcode::v_fma_mixlo_f16 || + instr->opcode == aco_opcode::v_fma_mixhi_f16 || + instr->opcode == aco_opcode::v_fma_mix_f32) { + check(instr->definitions[0].regClass() == + (instr->opcode == aco_opcode::v_fma_mix_f32 ? v1 : v2b), + "v_fma_mix_f32/v_fma_mix_f16 must have v1/v2b definition", instr.get()); } else if (instr->isVOP3P()) { VOP3P_instruction& vop3p = instr->vop3p(); for (unsigned i = 0; i < instr->operands.size(); i++) {