aco/insert_exec_mask: replace phi for loop restore mask with explicit copies
Totals from 1785 (2.25% of 79242) affected shaders: (GFX11) Instrs: 6787574 -> 6787041 (-0.01%); split: -0.01%, +0.00% CodeSize: 34906500 -> 34904704 (-0.01%); split: -0.01%, +0.01% SpillSGPRs: 5848 -> 5816 (-0.55%) Latency: 88616877 -> 88617209 (+0.00%); split: -0.00%, +0.00% InvThroughput: 16644948 -> 16644717 (-0.00%); split: -0.00%, +0.00% VClause: 141122 -> 141121 (-0.00%) SClause: 178929 -> 178906 (-0.01%); split: -0.03%, +0.02% Copies: 569444 -> 569081 (-0.06%); split: -0.09%, +0.03% Branches: 186980 -> 186961 (-0.01%); split: -0.01%, +0.00% PreSGPRs: 133648 -> 133369 (-0.21%) Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26937>
This commit is contained in:

committed by
Marge Bot

parent
d375d297cf
commit
e83d8e1366
@@ -235,8 +235,7 @@ add_coupling_code(exec_ctx& ctx, Block* block, std::vector<aco_ptr<Instruction>>
|
||||
assert(preds[0] == idx - 1);
|
||||
ctx.info[idx].exec = ctx.info[idx - 1].exec;
|
||||
loop_info& info = ctx.loop.back();
|
||||
while (ctx.info[idx].exec.size() > info.num_exec_masks)
|
||||
ctx.info[idx].exec.pop_back();
|
||||
assert(ctx.info[idx].exec.size() == info.num_exec_masks);
|
||||
|
||||
/* create ssa names for outer exec masks */
|
||||
if (info.has_discard) {
|
||||
@@ -250,17 +249,6 @@ add_coupling_code(exec_ctx& ctx, Block* block, std::vector<aco_ptr<Instruction>>
|
||||
}
|
||||
}
|
||||
|
||||
/* create ssa name for restore mask */
|
||||
if (info.has_divergent_break) {
|
||||
// TODO: this phi is unnecessary if we end WQM immediately after the loop
|
||||
/* this phi might be trivial but ensures a parallelcopy on the loop header */
|
||||
aco_ptr<Pseudo_instruction> phi{create_instruction<Pseudo_instruction>(
|
||||
aco_opcode::p_linear_phi, Format::PSEUDO, preds.size(), 1)};
|
||||
phi->definitions[0] = bld.def(bld.lm);
|
||||
phi->operands[0] = get_exec_op(ctx.info[preds[0]].exec[info.num_exec_masks - 1].first);
|
||||
ctx.info[idx].exec.back().first = bld.insert(std::move(phi));
|
||||
}
|
||||
|
||||
/* create ssa name for loop active mask */
|
||||
aco_ptr<Pseudo_instruction> phi{create_instruction<Pseudo_instruction>(
|
||||
aco_opcode::p_linear_phi, Format::PSEUDO, preds.size(), 1)};
|
||||
@@ -269,16 +257,8 @@ add_coupling_code(exec_ctx& ctx, Block* block, std::vector<aco_ptr<Instruction>>
|
||||
else
|
||||
phi->definitions[0] = Definition(exec, bld.lm);
|
||||
phi->operands[0] = get_exec_op(ctx.info[preds[0]].exec.back().first);
|
||||
Temp loop_active = bld.insert(std::move(phi));
|
||||
|
||||
if (info.has_divergent_break) {
|
||||
uint8_t mask_type =
|
||||
(ctx.info[idx].exec.back().second & (mask_type_wqm | mask_type_exact)) | mask_type_loop;
|
||||
ctx.info[idx].exec.emplace_back(loop_active, mask_type);
|
||||
} else {
|
||||
ctx.info[idx].exec.back().first = Operand(loop_active);
|
||||
ctx.info[idx].exec.back().second |= mask_type_loop;
|
||||
}
|
||||
ctx.info[idx].exec.back().first = bld.insert(std::move(phi));
|
||||
ctx.info[idx].exec.back().second |= mask_type_loop;
|
||||
|
||||
/* create a parallelcopy to move the active mask to exec */
|
||||
if (info.has_divergent_continue) {
|
||||
@@ -318,13 +298,9 @@ add_coupling_code(exec_ctx& ctx, Block* block, std::vector<aco_ptr<Instruction>>
|
||||
|
||||
if (info.has_divergent_break) {
|
||||
restore_exec = true;
|
||||
aco_ptr<Instruction>& phi = header->instructions[instr_idx];
|
||||
assert(phi->opcode == aco_opcode::p_linear_phi);
|
||||
for (unsigned i = 1; i < phi->operands.size(); i++)
|
||||
phi->operands[i] =
|
||||
get_exec_op(ctx.info[header_preds[i]].exec[info.num_exec_masks].first);
|
||||
/* Drop the loop active mask. */
|
||||
info.num_exec_masks--;
|
||||
}
|
||||
|
||||
assert(!(block->kind & block_kind_top_level) || info.num_exec_masks <= 2);
|
||||
|
||||
/* create the loop exit phis if not trivial */
|
||||
@@ -345,10 +321,6 @@ add_coupling_code(exec_ctx& ctx, Block* block, std::vector<aco_ptr<Instruction>>
|
||||
aco_ptr<Pseudo_instruction> phi{create_instruction<Pseudo_instruction>(
|
||||
aco_opcode::p_linear_phi, Format::PSEUDO, preds.size(), 1)};
|
||||
phi->definitions[0] = bld.def(bld.lm);
|
||||
if (exec_idx == info.num_exec_masks - 1u) {
|
||||
phi->definitions[0] = Definition(exec, bld.lm);
|
||||
restore_exec = false;
|
||||
}
|
||||
for (unsigned i = 0; i < phi->operands.size(); i++)
|
||||
phi->operands[i] = get_exec_op(ctx.info[preds[i]].exec[exec_idx].first);
|
||||
ctx.info[idx].exec.emplace_back(bld.insert(std::move(phi)), type);
|
||||
@@ -659,9 +631,20 @@ add_branch_code(exec_ctx& ctx, Block* block)
|
||||
has_divergent_continue = true;
|
||||
}
|
||||
|
||||
if (has_divergent_break) {
|
||||
/* save restore exec mask */
|
||||
uint8_t mask = ctx.info[idx].exec.back().second;
|
||||
if (ctx.info[idx].exec.back().first.constantEquals(-1u)) {
|
||||
ctx.info[idx].exec.emplace_back(Operand(exec, bld.lm), mask);
|
||||
} else {
|
||||
bld.reset(bld.instructions, std::prev(bld.instructions->end()));
|
||||
Operand restore = bld.copy(bld.def(bld.lm), Operand(exec, bld.lm));
|
||||
ctx.info[idx].exec.emplace(std::prev(ctx.info[idx].exec.end()), restore, mask);
|
||||
bld.reset(bld.instructions);
|
||||
}
|
||||
ctx.info[idx].exec.back().second &= (mask_type_wqm | mask_type_exact);
|
||||
}
|
||||
unsigned num_exec_masks = ctx.info[idx].exec.size();
|
||||
if (block->kind & block_kind_top_level)
|
||||
num_exec_masks = std::min(num_exec_masks, 2u);
|
||||
|
||||
ctx.loop.emplace_back(&ctx.program->blocks[block->linear_succs[0]], num_exec_masks,
|
||||
has_divergent_break, has_divergent_continue, has_discard);
|
||||
|
Reference in New Issue
Block a user