From bee487df48b19fc3bf792874aa6aff25104e281a Mon Sep 17 00:00:00 2001 From: Georg Lehmann Date: Wed, 24 Jul 2024 17:32:22 +0200 Subject: [PATCH] aco/gfx11.5+: use vinterp for fddx/fddy Since GFX11.5 VINTERP can be dual issued, DPP cannot. Foz-DB GFX11.5: Totals from 8401 (10.58% of 79395) affected shaders: MaxWaves: 247880 -> 247848 (-0.01%) Instrs: 6802675 -> 6815061 (+0.18%); split: -0.08%, +0.26% CodeSize: 36539444 -> 36500948 (-0.11%); split: -0.22%, +0.11% VGPRs: 444324 -> 445932 (+0.36%); split: -0.01%, +0.37% SpillSGPRs: 1350 -> 1346 (-0.30%) Latency: 63628380 -> 63523687 (-0.16%); split: -0.20%, +0.04% InvThroughput: 10566750 -> 10486009 (-0.76%); split: -0.77%, +0.01% VClause: 100171 -> 100248 (+0.08%); split: -0.08%, +0.16% SClause: 175467 -> 176208 (+0.42%); split: -0.05%, +0.47% Copies: 356817 -> 356935 (+0.03%); split: -0.17%, +0.20% PreVGPRs: 283403 -> 283898 (+0.17%); split: -0.02%, +0.20% VALU: 4217969 -> 4229831 (+0.28%); split: -0.03%, +0.31% SALU: 479367 -> 479428 (+0.01%); split: -0.00%, +0.01% Reviewed-by: Rhys Perry Part-of: --- src/amd/compiler/aco_instruction_selection.cpp | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 92768a637ab..503cb662ea3 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -4219,8 +4219,21 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr) aco_opcode subrev = instr->def.bit_size == 16 ? aco_opcode::v_subrev_f16 : aco_opcode::v_subrev_f32; + bool use_interp = dpp_ctrl1 == dpp_quad_perm(0, 0, 0, 0) && instr->def.bit_size == 32 && + ctx->program->gfx_level >= GFX11_5; if (!nir_src_is_divergent(instr->src[0].src)) { bld.vop2(subrev, Definition(dst), src, src); + } else if (use_interp && dpp_ctrl2 == dpp_quad_perm(1, 1, 1, 1)) { + bld.vinterp_inreg(aco_opcode::v_interp_p10_f32_inreg, Definition(dst), src, + Operand::c32(0x3f800000), src) + ->valu() + .neg[2] = true; + } else if (use_interp && dpp_ctrl2 == dpp_quad_perm(2, 2, 2, 2)) { + Builder::Result tmp = bld.vinterp_inreg(aco_opcode::v_interp_p10_f32_inreg, bld.def(v1), + Operand::c32(0), Operand::c32(0), src); + tmp->valu().neg = 0x6; + bld.vinterp_inreg(aco_opcode::v_interp_p2_f32_inreg, Definition(dst), src, + Operand::c32(0x3f800000), tmp); } else if (ctx->program->gfx_level >= GFX8) { Temp tmp = bld.vop2_dpp(subrev, bld.def(v1), src, src, dpp_ctrl1); bld.vop1_dpp(aco_opcode::v_mov_b32, Definition(dst), tmp, dpp_ctrl2);