aco/gfx11.5+: use vinterp for fddx/fddy

Since GFX11.5 VINTERP can be dual issued, DPP cannot.

Foz-DB GFX11.5:
Totals from 8401 (10.58% of 79395) affected shaders:
MaxWaves: 247880 -> 247848 (-0.01%)
Instrs: 6802675 -> 6815061 (+0.18%); split: -0.08%, +0.26%
CodeSize: 36539444 -> 36500948 (-0.11%); split: -0.22%, +0.11%
VGPRs: 444324 -> 445932 (+0.36%); split: -0.01%, +0.37%
SpillSGPRs: 1350 -> 1346 (-0.30%)
Latency: 63628380 -> 63523687 (-0.16%); split: -0.20%, +0.04%
InvThroughput: 10566750 -> 10486009 (-0.76%); split: -0.77%, +0.01%
VClause: 100171 -> 100248 (+0.08%); split: -0.08%, +0.16%
SClause: 175467 -> 176208 (+0.42%); split: -0.05%, +0.47%
Copies: 356817 -> 356935 (+0.03%); split: -0.17%, +0.20%
PreVGPRs: 283403 -> 283898 (+0.17%); split: -0.02%, +0.20%
VALU: 4217969 -> 4229831 (+0.28%); split: -0.03%, +0.31%
SALU: 479367 -> 479428 (+0.01%); split: -0.00%, +0.01%

Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30350>
This commit is contained in:
Georg Lehmann
2024-07-24 17:32:22 +02:00
committed by Marge Bot
parent 8c6e299141
commit bee487df48

View File

@@ -4219,8 +4219,21 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
aco_opcode subrev =
instr->def.bit_size == 16 ? aco_opcode::v_subrev_f16 : aco_opcode::v_subrev_f32;
bool use_interp = dpp_ctrl1 == dpp_quad_perm(0, 0, 0, 0) && instr->def.bit_size == 32 &&
ctx->program->gfx_level >= GFX11_5;
if (!nir_src_is_divergent(instr->src[0].src)) {
bld.vop2(subrev, Definition(dst), src, src);
} else if (use_interp && dpp_ctrl2 == dpp_quad_perm(1, 1, 1, 1)) {
bld.vinterp_inreg(aco_opcode::v_interp_p10_f32_inreg, Definition(dst), src,
Operand::c32(0x3f800000), src)
->valu()
.neg[2] = true;
} else if (use_interp && dpp_ctrl2 == dpp_quad_perm(2, 2, 2, 2)) {
Builder::Result tmp = bld.vinterp_inreg(aco_opcode::v_interp_p10_f32_inreg, bld.def(v1),
Operand::c32(0), Operand::c32(0), src);
tmp->valu().neg = 0x6;
bld.vinterp_inreg(aco_opcode::v_interp_p2_f32_inreg, Definition(dst), src,
Operand::c32(0x3f800000), tmp);
} else if (ctx->program->gfx_level >= GFX8) {
Temp tmp = bld.vop2_dpp(subrev, bld.def(v1), src, src, dpp_ctrl1);
bld.vop1_dpp(aco_opcode::v_mov_b32, Definition(dst), tmp, dpp_ctrl2);