radv,aco: implement nir_op_ffma

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9805>
This commit is contained in:
Rhys Perry
2021-03-24 17:17:38 +00:00
committed by Marge Bot
parent c5f02a1cd3
commit 165ca5088b
4 changed files with 32 additions and 3 deletions

View File

@@ -2092,6 +2092,35 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
} }
break; break;
} }
case nir_op_ffma: {
if (dst.regClass() == v2b) {
emit_vop3a_instruction(ctx, instr, aco_opcode::v_fma_f16, dst, false, 3);
} else if (dst.regClass() == v1 && instr->dest.dest.ssa.bit_size == 16) {
assert(instr->dest.dest.ssa.num_components == 2);
Temp src0 = as_vgpr(ctx, get_alu_src_vop3p(ctx, instr->src[0]));
Temp src1 = as_vgpr(ctx, get_alu_src_vop3p(ctx, instr->src[1]));
Temp src2 = as_vgpr(ctx, get_alu_src_vop3p(ctx, instr->src[2]));
/* swizzle to opsel: all swizzles are either 0 (x) or 1 (y) */
unsigned opsel_lo = 0, opsel_hi = 0;
for (unsigned i = 0; i < 3; i++) {
opsel_lo |= (instr->src[i].swizzle[0] & 1) << i;
opsel_hi |= (instr->src[i].swizzle[1] & 1) << i;
}
bld.vop3p(aco_opcode::v_pk_fma_f16, Definition(dst), src0, src1, src2, opsel_lo, opsel_hi);
emit_split_vector(ctx, dst, 2);
} else if (dst.regClass() == v1) {
emit_vop3a_instruction(ctx, instr, aco_opcode::v_fma_f32, dst,
ctx->block->fp_mode.must_flush_denorms32, 3);
} else if (dst.regClass() == v2) {
emit_vop3a_instruction(ctx, instr, aco_opcode::v_fma_f64, dst, false, 3);
} else {
isel_err(&instr->instr, "Unimplemented NIR instr bit size");
}
break;
}
case nir_op_fmax: { case nir_op_fmax: {
if (dst.regClass() == v2b) { if (dst.regClass() == v2b) {
// TODO: check fp_mode.must_flush_denorms16_64 // TODO: check fp_mode.must_flush_denorms16_64

View File

@@ -467,6 +467,7 @@ init_context(isel_context* ctx, nir_shader* shader)
case nir_op_fmul: case nir_op_fmul:
case nir_op_fadd: case nir_op_fadd:
case nir_op_fsub: case nir_op_fsub:
case nir_op_ffma:
case nir_op_fmax: case nir_op_fmax:
case nir_op_fmin: case nir_op_fmin:
case nir_op_fneg: case nir_op_fneg:

View File

@@ -3112,9 +3112,7 @@ combine_vop3p(opt_ctx& ctx, aco_ptr<Instruction>& instr)
/* check for fneg modifiers */ /* check for fneg modifiers */
if (instr_info.can_use_input_modifiers[(int)instr->opcode]) { if (instr_info.can_use_input_modifiers[(int)instr->opcode]) {
/* at this point, we only have 2-operand instructions */ for (unsigned i = 0; i < instr->operands.size(); i++) {
assert(instr->operands.size() == 2);
for (unsigned i = 0; i < 2; i++) {
Operand& op = instr->operands[i]; Operand& op = instr->operands[i];
if (!op.isTemp()) if (!op.isTemp())
continue; continue;

View File

@@ -3396,6 +3396,7 @@ opt_vectorize_callback(const nir_instr *instr, void *_)
case nir_op_fadd: case nir_op_fadd:
case nir_op_fsub: case nir_op_fsub:
case nir_op_fmul: case nir_op_fmul:
case nir_op_ffma:
case nir_op_fneg: case nir_op_fneg:
case nir_op_fsat: case nir_op_fsat:
case nir_op_fmin: case nir_op_fmin: