From c40bc48252454079de809f95266fe6ed8af13a36 Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Thu, 9 Feb 2023 16:21:39 +0100 Subject: [PATCH] ir3: Calculate physical edges correctly A block can have more than one extra physical successor, a fact that I missed initially. Now that we've fixed up RA to handle it, we can finally handle this correctly. Part-of: --- src/freedreno/ir3/ir3.c | 23 +++------------ src/freedreno/ir3/ir3.h | 7 ++--- src/freedreno/ir3/ir3_compiler_nir.c | 24 --------------- src/freedreno/ir3/ir3_legalize.c | 2 +- src/freedreno/ir3/ir3_lower_subgroups.c | 34 ++++++++++------------ src/freedreno/ir3/ir3_print.c | 13 +++++---- src/freedreno/ir3/ir3_ra_validate.c | 4 +-- src/freedreno/ir3/ir3_reconvergence.c | 16 ++++++++++ src/freedreno/ir3/ir3_remove_unreachable.c | 23 --------------- src/freedreno/ir3/ir3_validate.c | 5 ++-- 10 files changed, 48 insertions(+), 103 deletions(-) diff --git a/src/freedreno/ir3/ir3.c b/src/freedreno/ir3/ir3.c index b469594c40c..89796721893 100644 --- a/src/freedreno/ir3/ir3.c +++ b/src/freedreno/ir3/ir3.c @@ -515,10 +515,11 @@ ir3_block_add_predecessor(struct ir3_block *block, struct ir3_block *pred) } void -ir3_block_add_physical_predecessor(struct ir3_block *block, - struct ir3_block *pred) +ir3_block_link_physical(struct ir3_block *pred, + struct ir3_block *succ) { - array_insert(block, block->physical_predecessors, pred); + array_insert(pred, pred->physical_successors, succ); + array_insert(succ, succ->physical_predecessors, pred); } void @@ -537,22 +538,6 @@ ir3_block_remove_predecessor(struct ir3_block *block, struct ir3_block *pred) } } -void -ir3_block_remove_physical_predecessor(struct ir3_block *block, struct ir3_block *pred) -{ - for (unsigned i = 0; i < block->physical_predecessors_count; i++) { - if (block->physical_predecessors[i] == pred) { - if (i < block->physical_predecessors_count - 1) { - block->physical_predecessors[i] = - block->physical_predecessors[block->physical_predecessors_count - 1]; - } - - block->physical_predecessors_count--; - return; - } - } -} - unsigned ir3_block_get_pred_index(struct ir3_block *block, struct ir3_block *pred) { diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h index c52f60f9a93..8f3e9c05ba8 100644 --- a/src/freedreno/ir3/ir3.h +++ b/src/freedreno/ir3/ir3.h @@ -653,10 +653,10 @@ struct ir3_block { */ struct ir3_instruction *condition; struct ir3_block *successors[2]; - struct ir3_block *physical_successors[2]; DECLARE_ARRAY(struct ir3_block *, predecessors); DECLARE_ARRAY(struct ir3_block *, physical_predecessors); + DECLARE_ARRAY(struct ir3_block *, physical_successors); uint16_t start_ip, end_ip; @@ -724,12 +724,9 @@ ir3_after_preamble(struct ir3 *ir) } void ir3_block_add_predecessor(struct ir3_block *block, struct ir3_block *pred); -void ir3_block_add_physical_predecessor(struct ir3_block *block, - struct ir3_block *pred); +void ir3_block_link_physical(struct ir3_block *pred, struct ir3_block *succ); void ir3_block_remove_predecessor(struct ir3_block *block, struct ir3_block *pred); -void ir3_block_remove_physical_predecessor(struct ir3_block *block, - struct ir3_block *pred); unsigned ir3_block_get_pred_index(struct ir3_block *block, struct ir3_block *pred); diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c index dc8e5bb2a9f..a0dffbe9db3 100644 --- a/src/freedreno/ir3/ir3_compiler_nir.c +++ b/src/freedreno/ir3/ir3_compiler_nir.c @@ -3647,7 +3647,6 @@ emit_block(struct ir3_context *ctx, nir_block *nblock) if (nblock->successors[i]) { ctx->block->successors[i] = get_block_or_continue(ctx, nblock->successors[i]); - ctx->block->physical_successors[i] = ctx->block->successors[i]; } } @@ -3687,20 +3686,6 @@ emit_if(struct ir3_context *ctx, nir_if *nif) emit_cf_list(ctx, &nif->then_list); emit_cf_list(ctx, &nif->else_list); - - struct ir3_block *last_then = get_block(ctx, nir_if_last_then_block(nif)); - struct ir3_block *first_else = get_block(ctx, nir_if_first_else_block(nif)); - assert(last_then->physical_successors[0] && - !last_then->physical_successors[1]); - last_then->physical_successors[1] = first_else; - - struct ir3_block *last_else = get_block(ctx, nir_if_last_else_block(nif)); - struct ir3_block *after_if = - get_block(ctx, nir_cf_node_as_block(nir_cf_node_next(&nif->cf_node))); - assert(last_else->physical_successors[0] && - !last_else->physical_successors[1]); - if (after_if != last_else->physical_successors[0]) - last_else->physical_successors[1] = after_if; } static void @@ -3728,7 +3713,6 @@ emit_loop(struct ir3_context *ctx, nir_loop *nloop) if (continue_blk) { struct ir3_block *start = get_block(ctx, nstart); continue_blk->successors[0] = start; - continue_blk->physical_successors[0] = start; continue_blk->loop_id = ctx->loop_id; continue_blk->loop_depth = ctx->loop_depth; list_addtail(&continue_blk->node, &ctx->ir->block_list); @@ -3814,13 +3798,8 @@ emit_stream_out(struct ir3_context *ctx) orig_end_block->successors[0] = stream_out_block; orig_end_block->successors[1] = new_end_block; - orig_end_block->physical_successors[0] = stream_out_block; - orig_end_block->physical_successors[1] = new_end_block; - stream_out_block->successors[0] = new_end_block; - stream_out_block->physical_successors[0] = new_end_block; - /* setup 'if (vtxcnt < maxvtxcnt)' condition: */ cond = ir3_CMPS_S(ctx->block, vtxcnt, 0, maxvtxcnt, 0); cond->dsts[0]->num = regid(REG_P0, 0); @@ -3886,9 +3865,6 @@ setup_predecessors(struct ir3 *ir) for (int i = 0; i < ARRAY_SIZE(block->successors); i++) { if (block->successors[i]) ir3_block_add_predecessor(block->successors[i], block); - if (block->physical_successors[i]) - ir3_block_add_physical_predecessor(block->physical_successors[i], - block); } } } diff --git a/src/freedreno/ir3/ir3_legalize.c b/src/freedreno/ir3/ir3_legalize.c index 81c1536a6ea..795d3c7d7bb 100644 --- a/src/freedreno/ir3/ir3_legalize.c +++ b/src/freedreno/ir3/ir3_legalize.c @@ -580,7 +580,7 @@ opt_jump(struct ir3 *ir) /* This pass destroys the physical CFG so don't keep it around to avoid * validation errors. */ - block->physical_successors[0] = block->physical_successors[1] = NULL; + block->physical_successors_count = 0; block->physical_predecessors_count = 0; foreach_instr (instr, &block->instr_list) { diff --git a/src/freedreno/ir3/ir3_lower_subgroups.c b/src/freedreno/ir3/ir3_lower_subgroups.c index 6292b673fbd..032a781c474 100644 --- a/src/freedreno/ir3/ir3_lower_subgroups.c +++ b/src/freedreno/ir3/ir3_lower_subgroups.c @@ -22,6 +22,7 @@ */ #include "ir3.h" +#include "util/ralloc.h" /* Lower several macro-instructions needed for shader subgroup support that * must be turned into if statements. We do this after RA and post-RA @@ -178,18 +179,21 @@ split_block(struct ir3 *ir, struct ir3_block *before_block, replace_pred(after_block->successors[i], before_block, after_block); } - for (unsigned i = 0; i < ARRAY_SIZE(before_block->physical_successors); - i++) { - after_block->physical_successors[i] = - before_block->physical_successors[i]; - if (after_block->physical_successors[i]) { - replace_physical_pred(after_block->physical_successors[i], - before_block, after_block); - } + for (unsigned i = 0; i < before_block->physical_successors_count; i++) { + replace_physical_pred(before_block->physical_successors[i], + before_block, after_block); } + ralloc_steal(after_block, before_block->physical_successors); + after_block->physical_successors = before_block->physical_successors; + after_block->physical_successors_sz = before_block->physical_successors_sz; + after_block->physical_successors_count = + before_block->physical_successors_count; + before_block->successors[0] = before_block->successors[1] = NULL; - before_block->physical_successors[0] = before_block->physical_successors[1] = NULL; + before_block->physical_successors = NULL; + before_block->physical_successors_count = 0; + before_block->physical_successors_sz = 0; foreach_instr_from_safe (rem_instr, &instr->node, &before_block->instr_list) { @@ -204,20 +208,12 @@ split_block(struct ir3 *ir, struct ir3_block *before_block, return after_block; } -static void -link_blocks_physical(struct ir3_block *pred, struct ir3_block *succ, - unsigned index) -{ - pred->physical_successors[index] = succ; - ir3_block_add_physical_predecessor(succ, pred); -} - static void link_blocks(struct ir3_block *pred, struct ir3_block *succ, unsigned index) { pred->successors[index] = succ; ir3_block_add_predecessor(succ, pred); - link_blocks_physical(pred, succ, index); + ir3_block_link_physical(pred, succ); } static struct ir3_block * @@ -292,7 +288,7 @@ lower_instr(struct ir3 *ir, struct ir3_block **block, struct ir3_instruction *in header->brtype = IR3_BRANCH_GETONE; link_blocks(exit, after_block, 0); - link_blocks_physical(exit, footer, 1); + ir3_block_link_physical(exit, footer); link_blocks(footer, header, 0); diff --git a/src/freedreno/ir3/ir3_print.c b/src/freedreno/ir3/ir3_print.c index 8023439b7e5..f3db9dcde0d 100644 --- a/src/freedreno/ir3/ir3_print.c +++ b/src/freedreno/ir3/ir3_print.c @@ -560,13 +560,14 @@ print_block(struct ir3_block *block, int lvl) mesa_log_stream_printf(stream, "/* succs: block%u; */\n", block_id(block->successors[0])); } - if (block->physical_successors[0]) { + if (block->physical_successors_count > 0) { tab(stream, lvl + 1); - mesa_log_stream_printf(stream, "/* physical succs: block%u", - block_id(block->physical_successors[0])); - if (block->physical_successors[1]) { - mesa_log_stream_printf(stream, ", block%u", - block_id(block->physical_successors[1])); + mesa_log_stream_printf(stream, "/* physical succs: "); + for (unsigned i = 0; i < block->physical_successors_count; i++) { + mesa_log_stream_printf(stream, "block%u", + block_id(block->physical_successors[i])); + if (i < block->physical_successors_count - 1) + mesa_log_stream_printf(stream, ", "); } mesa_log_stream_printf(stream, " */\n"); } diff --git a/src/freedreno/ir3/ir3_ra_validate.c b/src/freedreno/ir3/ir3_ra_validate.c index 3d19e2b7431..d4e4902438a 100644 --- a/src/freedreno/ir3/ir3_ra_validate.c +++ b/src/freedreno/ir3/ir3_ra_validate.c @@ -477,10 +477,8 @@ propagate_block(struct ra_val_ctx *ctx, struct ir3_block *block) progress |= merge_state(ctx, &ctx->block_reaching[succ->index], &ctx->reaching); } - for (unsigned i = 0; i < 2; i++) { + for (unsigned i = 0; i < block->physical_successors_count; i++) { struct ir3_block *succ = block->physical_successors[i]; - if (!succ) - continue; progress |= merge_state_physical(ctx, &ctx->block_reaching[succ->index], &ctx->reaching); } diff --git a/src/freedreno/ir3/ir3_reconvergence.c b/src/freedreno/ir3/ir3_reconvergence.c index 995dea8137c..f47590a0108 100644 --- a/src/freedreno/ir3/ir3_reconvergence.c +++ b/src/freedreno/ir3/ir3_reconvergence.c @@ -139,6 +139,8 @@ ir3_calc_reconvergence(struct ir3_shader_variant *so) blocks[block->index].first_processed_divergent_pred = UINT_MAX; for (unsigned i = 0; i < ARRAY_SIZE(block->successors); i++) { if (block->successors[i]) { + ir3_block_link_physical(block, block->successors[i]); + if (block->successors[i]->index > block->index + 1) { edges[edge] = (struct logical_edge) { .node = { @@ -196,6 +198,7 @@ ir3_calc_reconvergence(struct ir3_shader_variant *so) /* Iterate over all edges stepping over the block. */ struct uinterval interval = { block->index, block->index }; + struct logical_edge *prev = NULL; uinterval_tree_foreach (struct logical_edge, edge, interval, &forward_edges, node) { /* If "block" definitely isn't outstanding when the branch @@ -249,6 +252,19 @@ ir3_calc_reconvergence(struct ir3_shader_variant *so) } } } + + if (!prev || prev->start_block != edge->start_block) { + /* We should only process this edge + block combination once, and + * we use the fact that edges are sorted by start point to avoid + * adding redundant physical edges in case multiple edges have the + * same start point by comparing with the previous edge. Therefore + * we should only add the physical edge once. + */ + for (unsigned i = 0; i < block->physical_predecessors_count; i++) + assert(block->physical_predecessors[i] != edge->start_block); + ir3_block_link_physical(edge->start_block, block); + } + prev = edge; } blocks[block->index].first_processed_divergent_pred = diff --git a/src/freedreno/ir3/ir3_remove_unreachable.c b/src/freedreno/ir3/ir3_remove_unreachable.c index 71a8848f227..bb587b11bf3 100644 --- a/src/freedreno/ir3/ir3_remove_unreachable.c +++ b/src/freedreno/ir3/ir3_remove_unreachable.c @@ -81,29 +81,6 @@ delete_block(struct ir3 *ir, struct ir3_block *block) } succ->predecessors_count--; } - - for (unsigned i = 0; i < 2; i++) { - struct ir3_block *succ = block->physical_successors[i]; - if (!succ) - continue; - - ir3_block_remove_physical_predecessor(succ, block); - } - - if (block->physical_predecessors_count != 0) { - /* There should be only one physical predecessor, for the fallthrough - * edge. - */ - assert(block->physical_predecessors_count == 1); - struct ir3_block *pred = block->physical_predecessors[0]; - assert(block->node.next != &ir->block_list); - struct ir3_block *next = list_entry(block->node.next, struct ir3_block, node); - if (pred->physical_successors[1] == block) - pred->physical_successors[1] = next; - else - pred->physical_successors[0] = next; - ir3_block_add_physical_predecessor(next, pred); - } } bool diff --git a/src/freedreno/ir3/ir3_validate.c b/src/freedreno/ir3/ir3_validate.c index 3c58e8b1fc0..3bd89566a4c 100644 --- a/src/freedreno/ir3/ir3_validate.c +++ b/src/freedreno/ir3/ir3_validate.c @@ -372,7 +372,7 @@ validate_instr(struct ir3_validate_ctx *ctx, struct ir3_instruction *instr) static bool is_physical_successor(struct ir3_block *block, struct ir3_block *succ) { - for (unsigned i = 0; i < ARRAY_SIZE(block->physical_successors); i++) + for (unsigned i = 0; i < block->physical_successors_count; i++) if (block->physical_successors[i] == succ) return true; return false; @@ -426,13 +426,12 @@ ir3_validate(struct ir3 *ir) ctx->current_instr = NULL; /* Each logical successor should also be a physical successor: */ - if (block->physical_successors[0]) + if (block->physical_successors_count > 0) validate_assert(ctx, is_physical_successor(block, block->successors[i])); } } validate_assert(ctx, block->successors[0] || !block->successors[1]); - validate_assert(ctx, block->physical_successors[0] || !block->physical_successors[1]); } ralloc_free(ctx);