From 2b56a97374ce9c2b453b69ca001db4b0886d5ceb Mon Sep 17 00:00:00 2001 From: Georg Lehmann Date: Mon, 27 May 2024 21:51:23 +0200 Subject: [PATCH] aco/lower_to_hw: optimize split 64bit constant copies MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Foz-DB Navi21: Totals from 3209 (4.04% of 79395) affected shaders: Instrs: 6502065 -> 6496612 (-0.08%) CodeSize: 35578300 -> 35556596 (-0.06%) Latency: 66092924 -> 66092668 (-0.00%); split: -0.00%, +0.00% InvThroughput: 16968953 -> 16968900 (-0.00%); split: -0.00%, +0.00% SClause: 198651 -> 198647 (-0.00%) Copies: 597323 -> 591872 (-0.91%) SALU: 930918 -> 925467 (-0.59%) Reviewed-by: Daniel Schürmann Part-of: --- src/amd/compiler/aco_lower_to_hw_instr.cpp | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/src/amd/compiler/aco_lower_to_hw_instr.cpp b/src/amd/compiler/aco_lower_to_hw_instr.cpp index 069f4b3b95b..966846aedd8 100644 --- a/src/amd/compiler/aco_lower_to_hw_instr.cpp +++ b/src/amd/compiler/aco_lower_to_hw_instr.cpp @@ -1776,6 +1776,23 @@ handle_operands(std::map& copy_map, lower_context* ctx, } } + /* optimize constant copies to aligned sgpr pair that's otherwise unused. */ + if (it->first <= exec && (it->first % 2) == 0 && it->second.bytes == 4 && + it->second.op.isConstant() && !it->second.is_used) { + PhysReg reg_hi = it->first.advance(4); + std::map::iterator other = copy_map.find(reg_hi); + if (other != copy_map.end() && other->second.bytes == 4 && other->second.op.isConstant() && + !other->second.is_used) { + uint64_t constant = + it->second.op.constantValue64() | (other->second.op.constantValue64() << 32); + copy_constant_sgpr(bld, Definition(it->first, s2), constant); + copy_map.erase(it); + copy_map.erase(other); + it = copy_map.begin(); + continue; + } + } + /* find portions where the target reg is not used as operand for any other copy */ if (it->second.is_used) { if (it->second.op.isConstant() || skip_partial_copies) {