From d76fc005b62bc9599c5ed278942fec8c23c587ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Sch=C3=BCrmann?= Date: Fri, 19 Apr 2024 11:55:28 +0200 Subject: [PATCH] aco/ra: re-use registers from killed operands Totals from 77283 (97.34% of 79395) affected shaders: (GFX11) MaxWaves: 2348498 -> 2348250 (-0.01%); split: +0.01%, -0.02% Instrs: 45304558 -> 45097367 (-0.46%); split: -0.57%, +0.11% CodeSize: 235719656 -> 234957768 (-0.32%); split: -0.43%, +0.11% VGPRs: 3065984 -> 3073244 (+0.24%); split: -0.41%, +0.65% Latency: 308010576 -> 307008565 (-0.33%); split: -0.85%, +0.52% InvThroughput: 49560307 -> 49464214 (-0.19%); split: -0.54%, +0.34% VClause: 881895 -> 879739 (-0.24%); split: -0.78%, +0.53% SClause: 1388139 -> 1374634 (-0.97%); split: -1.12%, +0.14% Copies: 2918583 -> 2910434 (-0.28%); split: -1.92%, +1.64% Branches: 893947 -> 893712 (-0.03%); split: -0.06%, +0.03% VALU: 25260728 -> 25256766 (-0.02%); split: -0.20%, +0.19% SALU: 4377750 -> 4373595 (-0.09%); split: -0.17%, +0.07% VOPD: 8603 -> 9163 (+6.51%); split: +8.54%, -2.03% Part-of: --- src/amd/compiler/aco_register_allocation.cpp | 10 ++++++++++ src/amd/compiler/tests/test_d3d11_derivs.cpp | 6 ++---- src/amd/compiler/tests/test_isel.cpp | 4 ++-- 3 files changed, 14 insertions(+), 6 deletions(-) diff --git a/src/amd/compiler/aco_register_allocation.cpp b/src/amd/compiler/aco_register_allocation.cpp index 59dcd5273cf..59cc6c11749 100644 --- a/src/amd/compiler/aco_register_allocation.cpp +++ b/src/amd/compiler/aco_register_allocation.cpp @@ -1804,6 +1804,16 @@ get_reg(ra_ctx& ctx, const RegisterFile& reg_file, Temp temp, return *res; } + if (temp.size() == 1 && operand_index == -1) { + for (const Operand& op : instr->operands) { + if (op.isTemp() && op.isFirstKillBeforeDef() && op.regClass() == temp.regClass()) { + assert(op.isFixed()); + if (get_reg_specified(ctx, reg_file, temp.regClass(), instr, op.physReg())) + return op.physReg(); + } + } + } + DefInfo info(ctx, instr, temp.regClass(), operand_index); if (!ctx.policy.skip_optimistic_path) { diff --git a/src/amd/compiler/tests/test_d3d11_derivs.cpp b/src/amd/compiler/tests/test_d3d11_derivs.cpp index 9130a6918c0..d12cd5fe55b 100644 --- a/src/amd/compiler/tests/test_d3d11_derivs.cpp +++ b/src/amd/compiler/tests/test_d3d11_derivs.cpp @@ -429,8 +429,7 @@ BEGIN_TEST(d3d11_derivs.cube) //>> v_mov_b32_e32 v#rf, v#rf_tmp ; $_ //>> v_fmaak_f32 v#rx_tmp, v#_, v#_, 0x3fc00000 ; $_ $_ //>> v_fmaak_f32 v#ry_tmp, v#_, v#_, 0x3fc00000 ; $_ $_ - //>> v_mov_b32_e32 v#ry, v#ry_tmp ; $_ - //>> v_mov_b32_e32 v#rx, v#rx_tmp ; $_ + //>> v_lshrrev_b64 v[#rx:#ry], 0, v[#rx_tmp:#ry_tmp] ; $_ $_ //; success = rx+1 == ry and rx+2 == rf //>> image_sample v[#_:#_], v[#rx:#rf], s[#_:#_], s[#_:#_] dmask:0xf dim:SQ_RSRC_IMG_CUBE ; $_ $_ pbld.print_ir(VK_SHADER_STAGE_FRAGMENT_BIT, "Assembly"); @@ -477,9 +476,8 @@ BEGIN_TEST(d3d11_derivs.cube_array) //>> v_fmaak_f32 v#rx_tmp, v#_, v#_, 0x3fc00000 ; $_ $_ //>> v_fmaak_f32 v#ry_tmp, v#_, v#_, 0x3fc00000 ; $_ $_ - //>> v_mov_b32_e32 v#ry, v#ry_tmp ; $_ //>> v_fmamk_f32 v#rlf_tmp, v#rl, 0x41000000, v#rf ; $_ $_ - //>> v_mov_b32_e32 v#rx, v#rx_tmp ; $_ + //>> v_lshrrev_b64 v[#rx:#ry], 0, v[#rx_tmp:#ry_tmp] ; $_ $_ //>> v_mov_b32_e32 v#rlf, v#rlf_tmp ; $_ //>> BB1: diff --git a/src/amd/compiler/tests/test_isel.cpp b/src/amd/compiler/tests/test_isel.cpp index 2674b4f322a..0b34dc65a68 100644 --- a/src/amd/compiler/tests/test_isel.cpp +++ b/src/amd/compiler/tests/test_isel.cpp @@ -171,7 +171,7 @@ BEGIN_TEST(isel.discard_early_exit.mrtz) ); /* On GFX11, the discard early exit must use mrtz if the shader exports only depth. */ - //>> exp mrtz v0, off, off, off done ; $_ $_ + //>> exp mrtz v#_, off, off, off done ; $_ $_ //! s_nop 0 ; $_ //! s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; $_ //! s_endpgm ; $_ @@ -199,7 +199,7 @@ BEGIN_TEST(isel.discard_early_exit.mrt0) ); /* On GFX11, the discard early exit must use mrt0 if the shader exports color. */ - //>> exp mrt0 v0, v0, v0, v0 done ; $_ $_ + //>> exp mrt0 v#x, v#x, v#x, v#x done ; $_ $_ //! s_nop 0 ; $_ //! s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; $_ //! s_endpgm ; $_