aco: don't optimize DPP across more than one block
Register write tracking doesn't work for inactive lanes, so this was unsafe.
Foz-DB Navi31:
Totals from 8 (0.01% of 78196) affected shaders:
Instrs: 11513 -> 11515 (+0.02%)
CodeSize: 61056 -> 61064 (+0.01%)
Cc: mesa-stable
Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/10197
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26373>
(cherry picked from commit 576afa8540
)
This commit is contained in:

committed by
Eric Engestrom

parent
658c34f795
commit
ae0873c0f8
@@ -1844,7 +1844,7 @@
|
||||
"description": "aco: don't optimize DPP across more than one block",
|
||||
"nominated": true,
|
||||
"nomination_type": 0,
|
||||
"resolution": 0,
|
||||
"resolution": 1,
|
||||
"main_sha": null,
|
||||
"because_sha": null,
|
||||
"notes": null
|
||||
|
@@ -492,6 +492,13 @@ try_combine_dpp(pr_opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
||||
if (!op_instr_idx.found())
|
||||
continue;
|
||||
|
||||
/* is_overwritten_since only considers active lanes when the register could possibly
|
||||
* have been overwritten from inactive lanes. Restrict this optimization to at most
|
||||
* one block so that there is no possibility for clobbered inactive lanes.
|
||||
*/
|
||||
if (ctx.current_block->index - op_instr_idx.block > 1)
|
||||
continue;
|
||||
|
||||
const Instruction* mov = ctx.get(op_instr_idx);
|
||||
if (mov->opcode != aco_opcode::v_mov_b32 || !mov->isDPP())
|
||||
continue;
|
||||
|
@@ -571,6 +571,12 @@ BEGIN_TEST(optimizer_postRA.dpp_across_cf)
|
||||
//! buffer_store_dword %c:v[2], 0, %d:v[3], 0 offen
|
||||
bld.mubuf(aco_opcode::buffer_store_dword, c, Operand::zero(), d, Operand::zero(), 0, true);
|
||||
|
||||
//! v1: %res10:v[12] = v_add_f32 %a:v[0], %b:v[1] row_mirror bound_ctrl:1 fi
|
||||
//! p_unit_test 10, %res10:v[12]
|
||||
Temp result =
|
||||
bld.vop2(aco_opcode::v_add_f32, bld.def(v1, reg_v12), Operand(dpp_tmp, reg_v12), b);
|
||||
writeout(10, Operand(result, reg_v12));
|
||||
|
||||
//! p_logical_end
|
||||
//! s2: %0:vcc = p_branch BB3
|
||||
|
||||
@@ -605,12 +611,6 @@ BEGIN_TEST(optimizer_postRA.dpp_across_cf)
|
||||
//! /* logical preds: BB1, BB4, / linear preds: BB4, BB5, / kind: uniform, top-level, merge, */
|
||||
//! s2: %0:exec = p_parallelcopy %saved_exec:s[84-85]
|
||||
|
||||
//! v1: %res10:v[12] = v_add_f32 %a:v[0], %b:v[1] row_mirror bound_ctrl:1 fi
|
||||
//! p_unit_test 10, %res10:v[12]
|
||||
Temp result =
|
||||
bld.vop2(aco_opcode::v_add_f32, bld.def(v1, reg_v12), Operand(dpp_tmp, reg_v12), b);
|
||||
writeout(10, Operand(result, reg_v12));
|
||||
|
||||
finish_optimizer_postRA_test();
|
||||
END_TEST
|
||||
|
||||
|
Reference in New Issue
Block a user