aco/optimizer_postRA: Fix logical control flow handling.
Change reset_block() so it only considers the logical
predecessors for VGPRs. Relevant for some optimizations
across loops.
This commit fixes an assertion failure which was triggered
by Zink in a piglit test.
Fossil DB stats unaffected on Navi 21.
Fixes: 2e56e23420
Signed-off-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18488>
This commit is contained in:
@@ -34,6 +34,9 @@ namespace aco {
|
||||
namespace {
|
||||
|
||||
constexpr const size_t max_reg_cnt = 512;
|
||||
constexpr const size_t max_sgpr_cnt = 128;
|
||||
constexpr const size_t min_vgpr = 256;
|
||||
constexpr const size_t max_vgpr_cnt = 256;
|
||||
|
||||
struct Idx {
|
||||
bool operator==(const Idx& other) const { return block == other.block && instr == other.instr; }
|
||||
@@ -66,18 +69,47 @@ struct pr_opt_ctx {
|
||||
std::fill(instr_idx_by_regs[block->index].begin(), instr_idx_by_regs[block->index].end(),
|
||||
not_written_in_block);
|
||||
} else {
|
||||
unsigned first_pred = block->linear_preds[0];
|
||||
for (unsigned i = 0; i < max_reg_cnt; i++) {
|
||||
bool all_same = std::all_of(
|
||||
std::next(block->linear_preds.begin()), block->linear_preds.end(),
|
||||
[&](unsigned pred)
|
||||
{ return instr_idx_by_regs[pred][i] == instr_idx_by_regs[first_pred][i]; });
|
||||
const uint32_t first_linear_pred = block->linear_preds[0];
|
||||
const std::vector<uint32_t>& linear_preds = block->linear_preds;
|
||||
|
||||
for (unsigned i = 0; i < max_sgpr_cnt; i++) {
|
||||
const bool all_same = std::all_of(
|
||||
std::next(linear_preds.begin()), linear_preds.end(),
|
||||
[=](unsigned pred)
|
||||
{ return instr_idx_by_regs[pred][i] == instr_idx_by_regs[first_linear_pred][i]; });
|
||||
|
||||
if (all_same)
|
||||
instr_idx_by_regs[block->index][i] = instr_idx_by_regs[first_pred][i];
|
||||
instr_idx_by_regs[block->index][i] = instr_idx_by_regs[first_linear_pred][i];
|
||||
else
|
||||
instr_idx_by_regs[block->index][i] = written_by_multiple_instrs;
|
||||
}
|
||||
|
||||
if (!block->logical_preds.empty()) {
|
||||
/* We assume that VGPRs are only read by blocks which have a logical predecessor,
|
||||
* ie. any block that reads any VGPR has at least 1 logical predecessor.
|
||||
*/
|
||||
const unsigned first_logical_pred = block->logical_preds[0];
|
||||
const std::vector<uint32_t>& logical_preds = block->logical_preds;
|
||||
|
||||
for (unsigned i = min_vgpr; i < (min_vgpr + max_vgpr_cnt); i++) {
|
||||
const bool all_same = std::all_of(
|
||||
std::next(logical_preds.begin()), logical_preds.end(),
|
||||
[=](unsigned pred) {
|
||||
return instr_idx_by_regs[pred][i] == instr_idx_by_regs[first_logical_pred][i];
|
||||
});
|
||||
|
||||
if (all_same)
|
||||
instr_idx_by_regs[block->index][i] = instr_idx_by_regs[first_logical_pred][i];
|
||||
else
|
||||
instr_idx_by_regs[block->index][i] = written_by_multiple_instrs;
|
||||
}
|
||||
} else {
|
||||
/* If a block has no logical predecessors, it is not part of the
|
||||
* logical CFG and therefore it also won't have any logical successors.
|
||||
* Such a block does not write any VGPRs ever.
|
||||
*/
|
||||
assert(block->logical_succs.empty());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -425,14 +425,16 @@ BEGIN_TEST(optimizer_postRA.dpp)
|
||||
|
||||
/* control flow */
|
||||
//! BB1
|
||||
//! /* logical preds: / linear preds: BB0, / kind: uniform, */
|
||||
//! /* logical preds: BB0, / linear preds: BB0, / kind: uniform, */
|
||||
//! v1: %res10:v[2] = v_add_f32 %a:v[0], %b:v[1] row_mirror bound_ctrl:1
|
||||
//! p_unit_test 10, %res10:v[2]
|
||||
Temp tmp10 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror);
|
||||
|
||||
bld.reset(program->create_and_insert_block());
|
||||
program->blocks[0].linear_succs.push_back(1);
|
||||
program->blocks[0].logical_succs.push_back(1);
|
||||
program->blocks[1].linear_preds.push_back(0);
|
||||
program->blocks[1].logical_preds.push_back(0);
|
||||
|
||||
Temp res10 = bld.vop2(aco_opcode::v_add_f32, bld.def(v1, reg_v2), Operand(tmp10, reg_v2), b);
|
||||
writeout(10, Operand(res10, reg_v2));
|
||||
|
Reference in New Issue
Block a user