From 3d9ac270e2c4d3207f95dc0ce89cf65b360ac304 Mon Sep 17 00:00:00 2001 From: Georg Lehmann Date: Tue, 22 Apr 2025 19:34:12 +0200 Subject: [PATCH] aco/insert_exec: reset temporary when recreating wqm mask from exact mask MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The old, now incorrect temporary was still used for invert blocks and loop masks. Foz-DB Navi31: Totals from 379 (0.48% of 79789) affected shaders: Instrs: 399471 -> 399897 (+0.11%); split: -0.00%, +0.11% CodeSize: 2197292 -> 2198908 (+0.07%); split: -0.00%, +0.08% Latency: 2500636 -> 2500895 (+0.01%); split: -0.00%, +0.01% SClause: 7912 -> 7918 (+0.08%); split: -0.04%, +0.11% Copies: 25687 -> 26068 (+1.48%); split: -0.04%, +1.53% PreSGPRs: 15648 -> 15562 (-0.55%) SALU: 35125 -> 35517 (+1.12%) Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/12901 Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/13019 Fixes: b872ff6ef28 ("aco/insert_exec_mask: if applicable, use s_wqm to restore exec after divergent CF") Reviewed-by: Daniel Schürmann Part-of: (cherry picked from commit dd3e1190a2bdbc6b996152510407adb9a8cb5618) --- .pick_status.json | 2 +- src/amd/compiler/aco_insert_exec_mask.cpp | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.pick_status.json b/.pick_status.json index d186826780e..1d9bb9dc3e7 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -4,7 +4,7 @@ "description": "aco/insert_exec: reset temporary when recreating wqm mask from exact mask", "nominated": true, "nomination_type": 2, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": "b872ff6ef28bc44ac0f7aa5f963a273e40c79a61", "notes": null diff --git a/src/amd/compiler/aco_insert_exec_mask.cpp b/src/amd/compiler/aco_insert_exec_mask.cpp index a0628f72c5d..10a4c9a9548 100644 --- a/src/amd/compiler/aco_insert_exec_mask.cpp +++ b/src/amd/compiler/aco_insert_exec_mask.cpp @@ -378,6 +378,7 @@ add_coupling_code(exec_ctx& ctx, Block* block, std::vector> */ bld.sop1(Builder::s_wqm, Definition(exec, bld.lm), bld.def(s1, scc), ctx.info[idx].exec[0].op); + ctx.info[idx].exec[1].op = Operand(exec, bld.lm); restore_exec = false; ctx.had_demote_in_cf = false; }