radv: vectorize nir_op_fabs

Totals from 4 (0.00% of 134913) affected shaders: (GFX10.3)
CodeSize: 37868 -> 36576 (-3.41%)
Instrs: 5332 -> 5169 (-3.06%)
Latency: 24452 -> 24174 (-1.14%)
InvThroughput: 9784 -> 9462 (-3.29%)
VClause: 54 -> 50 (-7.41%)
Copies: 520 -> 519 (-0.19%)
PreVGPRs: 266 -> 264 (-0.75%)

Reviewed-by: Georg Lehmann <dadschoorse@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15176>
This commit is contained in:
Daniel Schürmann
2021-10-07 20:20:23 +02:00
committed by Marge Bot
parent b45a39c44b
commit 2e895f8b04
2 changed files with 11 additions and 0 deletions

View File

@@ -2522,6 +2522,16 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
break;
}
case nir_op_fabs: {
if (dst.regClass() == v1 && instr->dest.dest.ssa.bit_size == 16) {
Temp src = get_alu_src_vop3p(ctx, instr->src[0]);
Instruction* vop3p =
bld.vop3p(aco_opcode::v_pk_max_f16, Definition(dst), src, src,
instr->src[0].swizzle[0] & 1 ? 3 : 0, instr->src[0].swizzle[1] & 1 ? 3 : 0)
.instr;
vop3p->vop3p().neg_lo[1] = true;
vop3p->vop3p().neg_hi[1] = true;
break;
}
Temp src = get_alu_src(ctx, instr->src[0]);
if (dst.regClass() == v2b) {
Instruction* mul = bld.vop2_e64(aco_opcode::v_mul_f16, Definition(dst),

View File

@@ -4072,6 +4072,7 @@ opt_vectorize_callback(const nir_instr *instr, const void *_)
case nir_op_ffma:
case nir_op_fdiv:
case nir_op_flrp:
case nir_op_fabs:
case nir_op_fneg:
case nir_op_fsat:
case nir_op_fmin: