ir3: Calculate physical edges correctly
A block can have more than one extra physical successor, a fact that I missed initially. Now that we've fixed up RA to handle it, we can finally handle this correctly. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22072>
This commit is contained in:
@@ -515,10 +515,11 @@ ir3_block_add_predecessor(struct ir3_block *block, struct ir3_block *pred)
|
||||
}
|
||||
|
||||
void
|
||||
ir3_block_add_physical_predecessor(struct ir3_block *block,
|
||||
struct ir3_block *pred)
|
||||
ir3_block_link_physical(struct ir3_block *pred,
|
||||
struct ir3_block *succ)
|
||||
{
|
||||
array_insert(block, block->physical_predecessors, pred);
|
||||
array_insert(pred, pred->physical_successors, succ);
|
||||
array_insert(succ, succ->physical_predecessors, pred);
|
||||
}
|
||||
|
||||
void
|
||||
@@ -537,22 +538,6 @@ ir3_block_remove_predecessor(struct ir3_block *block, struct ir3_block *pred)
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
ir3_block_remove_physical_predecessor(struct ir3_block *block, struct ir3_block *pred)
|
||||
{
|
||||
for (unsigned i = 0; i < block->physical_predecessors_count; i++) {
|
||||
if (block->physical_predecessors[i] == pred) {
|
||||
if (i < block->physical_predecessors_count - 1) {
|
||||
block->physical_predecessors[i] =
|
||||
block->physical_predecessors[block->physical_predecessors_count - 1];
|
||||
}
|
||||
|
||||
block->physical_predecessors_count--;
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
unsigned
|
||||
ir3_block_get_pred_index(struct ir3_block *block, struct ir3_block *pred)
|
||||
{
|
||||
|
@@ -653,10 +653,10 @@ struct ir3_block {
|
||||
*/
|
||||
struct ir3_instruction *condition;
|
||||
struct ir3_block *successors[2];
|
||||
struct ir3_block *physical_successors[2];
|
||||
|
||||
DECLARE_ARRAY(struct ir3_block *, predecessors);
|
||||
DECLARE_ARRAY(struct ir3_block *, physical_predecessors);
|
||||
DECLARE_ARRAY(struct ir3_block *, physical_successors);
|
||||
|
||||
uint16_t start_ip, end_ip;
|
||||
|
||||
@@ -724,12 +724,9 @@ ir3_after_preamble(struct ir3 *ir)
|
||||
}
|
||||
|
||||
void ir3_block_add_predecessor(struct ir3_block *block, struct ir3_block *pred);
|
||||
void ir3_block_add_physical_predecessor(struct ir3_block *block,
|
||||
struct ir3_block *pred);
|
||||
void ir3_block_link_physical(struct ir3_block *pred, struct ir3_block *succ);
|
||||
void ir3_block_remove_predecessor(struct ir3_block *block,
|
||||
struct ir3_block *pred);
|
||||
void ir3_block_remove_physical_predecessor(struct ir3_block *block,
|
||||
struct ir3_block *pred);
|
||||
unsigned ir3_block_get_pred_index(struct ir3_block *block,
|
||||
struct ir3_block *pred);
|
||||
|
||||
|
@@ -3647,7 +3647,6 @@ emit_block(struct ir3_context *ctx, nir_block *nblock)
|
||||
if (nblock->successors[i]) {
|
||||
ctx->block->successors[i] =
|
||||
get_block_or_continue(ctx, nblock->successors[i]);
|
||||
ctx->block->physical_successors[i] = ctx->block->successors[i];
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3687,20 +3686,6 @@ emit_if(struct ir3_context *ctx, nir_if *nif)
|
||||
|
||||
emit_cf_list(ctx, &nif->then_list);
|
||||
emit_cf_list(ctx, &nif->else_list);
|
||||
|
||||
struct ir3_block *last_then = get_block(ctx, nir_if_last_then_block(nif));
|
||||
struct ir3_block *first_else = get_block(ctx, nir_if_first_else_block(nif));
|
||||
assert(last_then->physical_successors[0] &&
|
||||
!last_then->physical_successors[1]);
|
||||
last_then->physical_successors[1] = first_else;
|
||||
|
||||
struct ir3_block *last_else = get_block(ctx, nir_if_last_else_block(nif));
|
||||
struct ir3_block *after_if =
|
||||
get_block(ctx, nir_cf_node_as_block(nir_cf_node_next(&nif->cf_node)));
|
||||
assert(last_else->physical_successors[0] &&
|
||||
!last_else->physical_successors[1]);
|
||||
if (after_if != last_else->physical_successors[0])
|
||||
last_else->physical_successors[1] = after_if;
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -3728,7 +3713,6 @@ emit_loop(struct ir3_context *ctx, nir_loop *nloop)
|
||||
if (continue_blk) {
|
||||
struct ir3_block *start = get_block(ctx, nstart);
|
||||
continue_blk->successors[0] = start;
|
||||
continue_blk->physical_successors[0] = start;
|
||||
continue_blk->loop_id = ctx->loop_id;
|
||||
continue_blk->loop_depth = ctx->loop_depth;
|
||||
list_addtail(&continue_blk->node, &ctx->ir->block_list);
|
||||
@@ -3814,13 +3798,8 @@ emit_stream_out(struct ir3_context *ctx)
|
||||
orig_end_block->successors[0] = stream_out_block;
|
||||
orig_end_block->successors[1] = new_end_block;
|
||||
|
||||
orig_end_block->physical_successors[0] = stream_out_block;
|
||||
orig_end_block->physical_successors[1] = new_end_block;
|
||||
|
||||
stream_out_block->successors[0] = new_end_block;
|
||||
|
||||
stream_out_block->physical_successors[0] = new_end_block;
|
||||
|
||||
/* setup 'if (vtxcnt < maxvtxcnt)' condition: */
|
||||
cond = ir3_CMPS_S(ctx->block, vtxcnt, 0, maxvtxcnt, 0);
|
||||
cond->dsts[0]->num = regid(REG_P0, 0);
|
||||
@@ -3886,9 +3865,6 @@ setup_predecessors(struct ir3 *ir)
|
||||
for (int i = 0; i < ARRAY_SIZE(block->successors); i++) {
|
||||
if (block->successors[i])
|
||||
ir3_block_add_predecessor(block->successors[i], block);
|
||||
if (block->physical_successors[i])
|
||||
ir3_block_add_physical_predecessor(block->physical_successors[i],
|
||||
block);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -580,7 +580,7 @@ opt_jump(struct ir3 *ir)
|
||||
/* This pass destroys the physical CFG so don't keep it around to avoid
|
||||
* validation errors.
|
||||
*/
|
||||
block->physical_successors[0] = block->physical_successors[1] = NULL;
|
||||
block->physical_successors_count = 0;
|
||||
block->physical_predecessors_count = 0;
|
||||
|
||||
foreach_instr (instr, &block->instr_list) {
|
||||
|
@@ -22,6 +22,7 @@
|
||||
*/
|
||||
|
||||
#include "ir3.h"
|
||||
#include "util/ralloc.h"
|
||||
|
||||
/* Lower several macro-instructions needed for shader subgroup support that
|
||||
* must be turned into if statements. We do this after RA and post-RA
|
||||
@@ -178,18 +179,21 @@ split_block(struct ir3 *ir, struct ir3_block *before_block,
|
||||
replace_pred(after_block->successors[i], before_block, after_block);
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(before_block->physical_successors);
|
||||
i++) {
|
||||
after_block->physical_successors[i] =
|
||||
before_block->physical_successors[i];
|
||||
if (after_block->physical_successors[i]) {
|
||||
replace_physical_pred(after_block->physical_successors[i],
|
||||
before_block, after_block);
|
||||
}
|
||||
for (unsigned i = 0; i < before_block->physical_successors_count; i++) {
|
||||
replace_physical_pred(before_block->physical_successors[i],
|
||||
before_block, after_block);
|
||||
}
|
||||
|
||||
ralloc_steal(after_block, before_block->physical_successors);
|
||||
after_block->physical_successors = before_block->physical_successors;
|
||||
after_block->physical_successors_sz = before_block->physical_successors_sz;
|
||||
after_block->physical_successors_count =
|
||||
before_block->physical_successors_count;
|
||||
|
||||
before_block->successors[0] = before_block->successors[1] = NULL;
|
||||
before_block->physical_successors[0] = before_block->physical_successors[1] = NULL;
|
||||
before_block->physical_successors = NULL;
|
||||
before_block->physical_successors_count = 0;
|
||||
before_block->physical_successors_sz = 0;
|
||||
|
||||
foreach_instr_from_safe (rem_instr, &instr->node,
|
||||
&before_block->instr_list) {
|
||||
@@ -204,20 +208,12 @@ split_block(struct ir3 *ir, struct ir3_block *before_block,
|
||||
return after_block;
|
||||
}
|
||||
|
||||
static void
|
||||
link_blocks_physical(struct ir3_block *pred, struct ir3_block *succ,
|
||||
unsigned index)
|
||||
{
|
||||
pred->physical_successors[index] = succ;
|
||||
ir3_block_add_physical_predecessor(succ, pred);
|
||||
}
|
||||
|
||||
static void
|
||||
link_blocks(struct ir3_block *pred, struct ir3_block *succ, unsigned index)
|
||||
{
|
||||
pred->successors[index] = succ;
|
||||
ir3_block_add_predecessor(succ, pred);
|
||||
link_blocks_physical(pred, succ, index);
|
||||
ir3_block_link_physical(pred, succ);
|
||||
}
|
||||
|
||||
static struct ir3_block *
|
||||
@@ -292,7 +288,7 @@ lower_instr(struct ir3 *ir, struct ir3_block **block, struct ir3_instruction *in
|
||||
header->brtype = IR3_BRANCH_GETONE;
|
||||
|
||||
link_blocks(exit, after_block, 0);
|
||||
link_blocks_physical(exit, footer, 1);
|
||||
ir3_block_link_physical(exit, footer);
|
||||
|
||||
link_blocks(footer, header, 0);
|
||||
|
||||
|
@@ -560,13 +560,14 @@ print_block(struct ir3_block *block, int lvl)
|
||||
mesa_log_stream_printf(stream, "/* succs: block%u; */\n",
|
||||
block_id(block->successors[0]));
|
||||
}
|
||||
if (block->physical_successors[0]) {
|
||||
if (block->physical_successors_count > 0) {
|
||||
tab(stream, lvl + 1);
|
||||
mesa_log_stream_printf(stream, "/* physical succs: block%u",
|
||||
block_id(block->physical_successors[0]));
|
||||
if (block->physical_successors[1]) {
|
||||
mesa_log_stream_printf(stream, ", block%u",
|
||||
block_id(block->physical_successors[1]));
|
||||
mesa_log_stream_printf(stream, "/* physical succs: ");
|
||||
for (unsigned i = 0; i < block->physical_successors_count; i++) {
|
||||
mesa_log_stream_printf(stream, "block%u",
|
||||
block_id(block->physical_successors[i]));
|
||||
if (i < block->physical_successors_count - 1)
|
||||
mesa_log_stream_printf(stream, ", ");
|
||||
}
|
||||
mesa_log_stream_printf(stream, " */\n");
|
||||
}
|
||||
|
@@ -477,10 +477,8 @@ propagate_block(struct ra_val_ctx *ctx, struct ir3_block *block)
|
||||
progress |=
|
||||
merge_state(ctx, &ctx->block_reaching[succ->index], &ctx->reaching);
|
||||
}
|
||||
for (unsigned i = 0; i < 2; i++) {
|
||||
for (unsigned i = 0; i < block->physical_successors_count; i++) {
|
||||
struct ir3_block *succ = block->physical_successors[i];
|
||||
if (!succ)
|
||||
continue;
|
||||
progress |= merge_state_physical(ctx, &ctx->block_reaching[succ->index],
|
||||
&ctx->reaching);
|
||||
}
|
||||
|
@@ -139,6 +139,8 @@ ir3_calc_reconvergence(struct ir3_shader_variant *so)
|
||||
blocks[block->index].first_processed_divergent_pred = UINT_MAX;
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(block->successors); i++) {
|
||||
if (block->successors[i]) {
|
||||
ir3_block_link_physical(block, block->successors[i]);
|
||||
|
||||
if (block->successors[i]->index > block->index + 1) {
|
||||
edges[edge] = (struct logical_edge) {
|
||||
.node = {
|
||||
@@ -196,6 +198,7 @@ ir3_calc_reconvergence(struct ir3_shader_variant *so)
|
||||
|
||||
/* Iterate over all edges stepping over the block. */
|
||||
struct uinterval interval = { block->index, block->index };
|
||||
struct logical_edge *prev = NULL;
|
||||
uinterval_tree_foreach (struct logical_edge, edge, interval, &forward_edges,
|
||||
node) {
|
||||
/* If "block" definitely isn't outstanding when the branch
|
||||
@@ -249,6 +252,19 @@ ir3_calc_reconvergence(struct ir3_shader_variant *so)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!prev || prev->start_block != edge->start_block) {
|
||||
/* We should only process this edge + block combination once, and
|
||||
* we use the fact that edges are sorted by start point to avoid
|
||||
* adding redundant physical edges in case multiple edges have the
|
||||
* same start point by comparing with the previous edge. Therefore
|
||||
* we should only add the physical edge once.
|
||||
*/
|
||||
for (unsigned i = 0; i < block->physical_predecessors_count; i++)
|
||||
assert(block->physical_predecessors[i] != edge->start_block);
|
||||
ir3_block_link_physical(edge->start_block, block);
|
||||
}
|
||||
prev = edge;
|
||||
}
|
||||
|
||||
blocks[block->index].first_processed_divergent_pred =
|
||||
|
@@ -81,29 +81,6 @@ delete_block(struct ir3 *ir, struct ir3_block *block)
|
||||
}
|
||||
succ->predecessors_count--;
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < 2; i++) {
|
||||
struct ir3_block *succ = block->physical_successors[i];
|
||||
if (!succ)
|
||||
continue;
|
||||
|
||||
ir3_block_remove_physical_predecessor(succ, block);
|
||||
}
|
||||
|
||||
if (block->physical_predecessors_count != 0) {
|
||||
/* There should be only one physical predecessor, for the fallthrough
|
||||
* edge.
|
||||
*/
|
||||
assert(block->physical_predecessors_count == 1);
|
||||
struct ir3_block *pred = block->physical_predecessors[0];
|
||||
assert(block->node.next != &ir->block_list);
|
||||
struct ir3_block *next = list_entry(block->node.next, struct ir3_block, node);
|
||||
if (pred->physical_successors[1] == block)
|
||||
pred->physical_successors[1] = next;
|
||||
else
|
||||
pred->physical_successors[0] = next;
|
||||
ir3_block_add_physical_predecessor(next, pred);
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
|
@@ -372,7 +372,7 @@ validate_instr(struct ir3_validate_ctx *ctx, struct ir3_instruction *instr)
|
||||
static bool
|
||||
is_physical_successor(struct ir3_block *block, struct ir3_block *succ)
|
||||
{
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(block->physical_successors); i++)
|
||||
for (unsigned i = 0; i < block->physical_successors_count; i++)
|
||||
if (block->physical_successors[i] == succ)
|
||||
return true;
|
||||
return false;
|
||||
@@ -426,13 +426,12 @@ ir3_validate(struct ir3 *ir)
|
||||
ctx->current_instr = NULL;
|
||||
|
||||
/* Each logical successor should also be a physical successor: */
|
||||
if (block->physical_successors[0])
|
||||
if (block->physical_successors_count > 0)
|
||||
validate_assert(ctx, is_physical_successor(block, block->successors[i]));
|
||||
}
|
||||
}
|
||||
|
||||
validate_assert(ctx, block->successors[0] || !block->successors[1]);
|
||||
validate_assert(ctx, block->physical_successors[0] || !block->physical_successors[1]);
|
||||
}
|
||||
|
||||
ralloc_free(ctx);
|
||||
|
Reference in New Issue
Block a user