aco/gfx11.5+: use vinterp for fddx/fddy
Since GFX11.5 VINTERP can be dual issued, DPP cannot. Foz-DB GFX11.5: Totals from 8401 (10.58% of 79395) affected shaders: MaxWaves: 247880 -> 247848 (-0.01%) Instrs: 6802675 -> 6815061 (+0.18%); split: -0.08%, +0.26% CodeSize: 36539444 -> 36500948 (-0.11%); split: -0.22%, +0.11% VGPRs: 444324 -> 445932 (+0.36%); split: -0.01%, +0.37% SpillSGPRs: 1350 -> 1346 (-0.30%) Latency: 63628380 -> 63523687 (-0.16%); split: -0.20%, +0.04% InvThroughput: 10566750 -> 10486009 (-0.76%); split: -0.77%, +0.01% VClause: 100171 -> 100248 (+0.08%); split: -0.08%, +0.16% SClause: 175467 -> 176208 (+0.42%); split: -0.05%, +0.47% Copies: 356817 -> 356935 (+0.03%); split: -0.17%, +0.20% PreVGPRs: 283403 -> 283898 (+0.17%); split: -0.02%, +0.20% VALU: 4217969 -> 4229831 (+0.28%); split: -0.03%, +0.31% SALU: 479367 -> 479428 (+0.01%); split: -0.00%, +0.01% Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30350>
This commit is contained in:
@@ -4219,8 +4219,21 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
|
||||
|
||||
aco_opcode subrev =
|
||||
instr->def.bit_size == 16 ? aco_opcode::v_subrev_f16 : aco_opcode::v_subrev_f32;
|
||||
bool use_interp = dpp_ctrl1 == dpp_quad_perm(0, 0, 0, 0) && instr->def.bit_size == 32 &&
|
||||
ctx->program->gfx_level >= GFX11_5;
|
||||
if (!nir_src_is_divergent(instr->src[0].src)) {
|
||||
bld.vop2(subrev, Definition(dst), src, src);
|
||||
} else if (use_interp && dpp_ctrl2 == dpp_quad_perm(1, 1, 1, 1)) {
|
||||
bld.vinterp_inreg(aco_opcode::v_interp_p10_f32_inreg, Definition(dst), src,
|
||||
Operand::c32(0x3f800000), src)
|
||||
->valu()
|
||||
.neg[2] = true;
|
||||
} else if (use_interp && dpp_ctrl2 == dpp_quad_perm(2, 2, 2, 2)) {
|
||||
Builder::Result tmp = bld.vinterp_inreg(aco_opcode::v_interp_p10_f32_inreg, bld.def(v1),
|
||||
Operand::c32(0), Operand::c32(0), src);
|
||||
tmp->valu().neg = 0x6;
|
||||
bld.vinterp_inreg(aco_opcode::v_interp_p2_f32_inreg, Definition(dst), src,
|
||||
Operand::c32(0x3f800000), tmp);
|
||||
} else if (ctx->program->gfx_level >= GFX8) {
|
||||
Temp tmp = bld.vop2_dpp(subrev, bld.def(v1), src, src, dpp_ctrl1);
|
||||
bld.vop1_dpp(aco_opcode::v_mov_b32, Definition(dst), tmp, dpp_ctrl2);
|
||||
|
Reference in New Issue
Block a user