radv: vectorize nir_op_fabs
Totals from 4 (0.00% of 134913) affected shaders: (GFX10.3) CodeSize: 37868 -> 36576 (-3.41%) Instrs: 5332 -> 5169 (-3.06%) Latency: 24452 -> 24174 (-1.14%) InvThroughput: 9784 -> 9462 (-3.29%) VClause: 54 -> 50 (-7.41%) Copies: 520 -> 519 (-0.19%) PreVGPRs: 266 -> 264 (-0.75%) Reviewed-by: Georg Lehmann <dadschoorse@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15176>
This commit is contained in:

committed by
Marge Bot

parent
b45a39c44b
commit
2e895f8b04
@@ -2522,6 +2522,16 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
|
||||
break;
|
||||
}
|
||||
case nir_op_fabs: {
|
||||
if (dst.regClass() == v1 && instr->dest.dest.ssa.bit_size == 16) {
|
||||
Temp src = get_alu_src_vop3p(ctx, instr->src[0]);
|
||||
Instruction* vop3p =
|
||||
bld.vop3p(aco_opcode::v_pk_max_f16, Definition(dst), src, src,
|
||||
instr->src[0].swizzle[0] & 1 ? 3 : 0, instr->src[0].swizzle[1] & 1 ? 3 : 0)
|
||||
.instr;
|
||||
vop3p->vop3p().neg_lo[1] = true;
|
||||
vop3p->vop3p().neg_hi[1] = true;
|
||||
break;
|
||||
}
|
||||
Temp src = get_alu_src(ctx, instr->src[0]);
|
||||
if (dst.regClass() == v2b) {
|
||||
Instruction* mul = bld.vop2_e64(aco_opcode::v_mul_f16, Definition(dst),
|
||||
|
@@ -4072,6 +4072,7 @@ opt_vectorize_callback(const nir_instr *instr, const void *_)
|
||||
case nir_op_ffma:
|
||||
case nir_op_fdiv:
|
||||
case nir_op_flrp:
|
||||
case nir_op_fabs:
|
||||
case nir_op_fneg:
|
||||
case nir_op_fsat:
|
||||
case nir_op_fmin:
|
||||
|
Reference in New Issue
Block a user