From 0e6d32777f6a37b64ee9ab4df1d85c0c148c6a35 Mon Sep 17 00:00:00 2001 From: Georg Lehmann Date: Fri, 6 Sep 2024 14:29:36 +0200 Subject: [PATCH] nir/opt_remove_phis: rematerialize equal alu MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Foz-DB Navi31: Totals from 943 (1.19% of 79395) affected shaders: MaxWaves: 24672 -> 24722 (+0.20%) Instrs: 1541665 -> 1544956 (+0.21%); split: -0.23%, +0.44% CodeSize: 8085180 -> 8109212 (+0.30%); split: -0.16%, +0.46% VGPRs: 57768 -> 57624 (-0.25%) Latency: 18043743 -> 17948245 (-0.53%); split: -1.28%, +0.75% InvThroughput: 2692605 -> 2677049 (-0.58%); split: -2.07%, +1.49% VClause: 25321 -> 25343 (+0.09%); split: -0.48%, +0.57% SClause: 38473 -> 38614 (+0.37%); split: -0.00%, +0.37% Copies: 86089 -> 86236 (+0.17%); split: -0.46%, +0.63% Branches: 36719 -> 36777 (+0.16%); split: -0.60%, +0.76% PreSGPRs: 44138 -> 44303 (+0.37%); split: -0.05%, +0.42% PreVGPRs: 43319 -> 43009 (-0.72%) VALU: 893684 -> 894272 (+0.07%); split: -0.42%, +0.48% SALU: 189561 -> 191358 (+0.95%); split: -0.05%, +1.00% VMEM: 42294 -> 42313 (+0.04%); split: -0.44%, +0.49% SMEM: 72916 -> 73144 (+0.31%) Instruction count regressions are largly caused by additional loop unrolling. Reviewed-by: Daniel Schürmann Part-of: --- src/compiler/nir/nir_opt_remove_phis.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/src/compiler/nir/nir_opt_remove_phis.c b/src/compiler/nir/nir_opt_remove_phis.c index 22c0f57a69f..d6a1357dd29 100644 --- a/src/compiler/nir/nir_opt_remove_phis.c +++ b/src/compiler/nir/nir_opt_remove_phis.c @@ -56,16 +56,17 @@ phi_srcs_equal(nir_def *a, nir_def *b) } static bool -can_rematerialize_phi_src(nir_block *block, nir_def *def) +src_dominates_block(nir_src *src, void *state) +{ + nir_block *block = state; + return nir_block_dominates(src->ssa->parent_instr->block, block); +} + +static bool +can_rematerialize_phi_src(nir_block *imm_dom, nir_def *def) { if (def->parent_instr->type == nir_instr_type_alu) { - /* Restrict alu to movs. */ - nir_alu_instr *alu = nir_instr_as_alu(def->parent_instr); - if (alu->op != nir_op_mov) - return false; - if (!nir_block_dominates(alu->src[0].src.ssa->parent_instr->block, block->imm_dom)) - return false; - return true; + return nir_foreach_src(def->parent_instr, src_dominates_block, imm_dom); } else if (def->parent_instr->type == nir_instr_type_load_const) { return true; } @@ -119,7 +120,7 @@ remove_phis_block(nir_block *block, nir_builder *b) if (def == NULL) { def = src->src.ssa; if (!nir_block_dominates(def->parent_instr->block, block->imm_dom)) { - if (!can_rematerialize_phi_src(block, def)) { + if (!can_rematerialize_phi_src(block->imm_dom, def)) { srcs_same = false; break; } @@ -139,7 +140,7 @@ remove_phis_block(nir_block *block, nir_builder *b) b->cursor = nir_after_phis(block); def = nir_undef(b, phi->def.num_components, phi->def.bit_size); } else if (needs_remat) { - b->cursor = nir_after_phis(block); + b->cursor = nir_after_block_before_jump(block->imm_dom); nir_instr *remat = nir_instr_clone(b->shader, def->parent_instr); nir_builder_instr_insert(b, remat); def = nir_instr_def(remat);