aco: always terminate quads if they have been demoted entirely
Previously, quads got only terminated in top-level control flow. This patch makes the behavior consistent. Totals from 7811 (9.86% of 79242) affected shaders: (GFX11) Instrs: 7859667 -> 7850757 (-0.11%); split: -0.18%, +0.07% CodeSize: 41642280 -> 41611836 (-0.07%); split: -0.13%, +0.06% Latency: 73692815 -> 73707588 (+0.02%); split: -0.02%, +0.04% InvThroughput: 10672160 -> 10672323 (+0.00%); split: -0.01%, +0.01% VClause: 137478 -> 137469 (-0.01%); split: -0.02%, +0.02% SClause: 314905 -> 314924 (+0.01%); split: -0.19%, +0.20% Copies: 587014 -> 576039 (-1.87%); split: -2.10%, +0.23% Branches: 213101 -> 213123 (+0.01%); split: -0.01%, +0.02% PreSGPRs: 313588 -> 313355 (-0.07%); split: -0.09%, +0.01% Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27112>
This commit is contained in:

committed by
Marge Bot

parent
a42b83e3fb
commit
e89977ff71
@@ -516,57 +516,60 @@ process_instructions(exec_ctx& ctx, Block* block, std::vector<aco_ptr<Instructio
|
||||
instr->definitions[1] = bld.def(s1, scc);
|
||||
}
|
||||
} else if (instr->opcode == aco_opcode::p_demote_to_helper) {
|
||||
/* turn demote into discard_if with only exact masks */
|
||||
assert((info.exec[0].second & mask_type_exact) &&
|
||||
(info.exec[0].second & mask_type_global));
|
||||
|
||||
int num;
|
||||
Operand src;
|
||||
Temp exit_cond;
|
||||
if (instr->operands[0].isConstant() && !(block->kind & block_kind_top_level)) {
|
||||
assert(instr->operands[0].constantValue() == -1u);
|
||||
/* transition to exact and set exec to zero */
|
||||
exit_cond = bld.tmp(s1);
|
||||
src = bld.sop1(Builder::s_and_saveexec, bld.def(bld.lm), bld.scc(Definition(exit_cond)),
|
||||
Definition(exec, bld.lm), Operand::zero(), Operand(exec, bld.lm));
|
||||
|
||||
num = info.exec.size() - 2;
|
||||
if (!(info.exec.back().second & mask_type_exact)) {
|
||||
info.exec.back().first = src;
|
||||
info.exec.emplace_back(Operand(bld.lm), mask_type_exact);
|
||||
}
|
||||
} else {
|
||||
/* demote_if: transition to exact */
|
||||
if (block->kind & block_kind_top_level && info.exec.size() == 2 &&
|
||||
info.exec.back().second & mask_type_global) {
|
||||
/* We don't need to actually copy anything into exec, since the s_andn2
|
||||
* instructions later will do that.
|
||||
*/
|
||||
info.exec.pop_back();
|
||||
} else {
|
||||
transition_to_Exact(ctx, bld, block->index);
|
||||
}
|
||||
src = instr->operands[0];
|
||||
num = info.exec.size() - 1;
|
||||
const bool nested_cf = !(info.exec.back().second & mask_type_global);
|
||||
if (ctx.handle_wqm && state == Exact && nested_cf) {
|
||||
/* Transition back to WQM without extra instruction. */
|
||||
info.exec.pop_back();
|
||||
state = WQM;
|
||||
} else if (block->instructions[idx + 1]->opcode == aco_opcode::p_end_wqm) {
|
||||
/* Transition to Exact without extra instruction. */
|
||||
info.exec.resize(1);
|
||||
state = Exact;
|
||||
} else if (nested_cf) {
|
||||
/* Save curent exec temporarily. */
|
||||
info.exec.back().first = bld.copy(bld.def(bld.lm), Operand(exec, bld.lm));
|
||||
}
|
||||
|
||||
for (int i = num; i >= 0; i--) {
|
||||
if (info.exec[i].second & mask_type_exact) {
|
||||
Instruction* andn2 =
|
||||
bld.sop2(Builder::s_andn2, bld.def(bld.lm), bld.def(s1, scc),
|
||||
get_exec_op(info.exec[i].first), src);
|
||||
if (i == (int)info.exec.size() - 1)
|
||||
andn2->definitions[0] = Definition(exec, bld.lm);
|
||||
/* Remove invocations from global exact mask. */
|
||||
Definition def = state == Exact ? Definition(exec, bld.lm) : bld.def(bld.lm);
|
||||
Operand src = instr->operands[0].isConstant() ? Operand(exec, bld.lm) : instr->operands[0];
|
||||
|
||||
info.exec[i].first = Operand(andn2->definitions[0].getTemp());
|
||||
exit_cond = andn2->definitions[1].getTemp();
|
||||
} else {
|
||||
assert(i != 0);
|
||||
}
|
||||
Definition exit_cond =
|
||||
bld.sop2(Builder::s_andn2, def, bld.def(s1, scc), get_exec_op(info.exec[0].first), src)
|
||||
.def(1);
|
||||
info.exec[0].first = Operand(def.getTemp());
|
||||
|
||||
/* Update global WQM mask and store in exec. */
|
||||
if (state == WQM) {
|
||||
assert(info.exec.size() > 1);
|
||||
exit_cond =
|
||||
bld.sop1(Builder::s_wqm, Definition(exec, bld.lm), bld.def(s1, scc), def.getTemp())
|
||||
.def(1);
|
||||
}
|
||||
|
||||
/* End shader if global mask is zero. */
|
||||
instr->opcode = aco_opcode::p_exit_early_if;
|
||||
instr->operands[0] = bld.scc(exit_cond);
|
||||
state = Exact;
|
||||
instr->operands[0] = bld.scc(exit_cond.getTemp());
|
||||
bld.insert(std::move(instr));
|
||||
|
||||
/* Update all other exec masks. */
|
||||
if (nested_cf) {
|
||||
const unsigned global_idx = state == WQM ? 1 : 0;
|
||||
for (unsigned i = global_idx + 1; i < info.exec.size() - 1; i++) {
|
||||
info.exec[i].first =
|
||||
bld.sop2(Builder::s_and, bld.def(bld.lm), bld.def(s1, scc),
|
||||
get_exec_op(info.exec[i].first), Operand(exec, bld.lm));
|
||||
}
|
||||
/* Update current exec and save WQM mask. */
|
||||
info.exec[global_idx].first =
|
||||
bld.sop1(Builder::s_and_saveexec, bld.def(bld.lm), bld.def(s1, scc),
|
||||
Definition(exec, bld.lm), info.exec.back().first, Operand(exec, bld.lm));
|
||||
info.exec.back().first = Operand(bld.lm);
|
||||
}
|
||||
continue;
|
||||
|
||||
} else if (instr->opcode == aco_opcode::p_elect) {
|
||||
bool all_lanes_enabled = info.exec.back().first.constantEquals(-1u);
|
||||
|
Reference in New Issue
Block a user