ir3: Calculate physical edges correctly
A block can have more than one extra physical successor, a fact that I missed initially. Now that we've fixed up RA to handle it, we can finally handle this correctly. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22072>
This commit is contained in:
@@ -515,10 +515,11 @@ ir3_block_add_predecessor(struct ir3_block *block, struct ir3_block *pred)
|
|||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
ir3_block_add_physical_predecessor(struct ir3_block *block,
|
ir3_block_link_physical(struct ir3_block *pred,
|
||||||
struct ir3_block *pred)
|
struct ir3_block *succ)
|
||||||
{
|
{
|
||||||
array_insert(block, block->physical_predecessors, pred);
|
array_insert(pred, pred->physical_successors, succ);
|
||||||
|
array_insert(succ, succ->physical_predecessors, pred);
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
@@ -537,22 +538,6 @@ ir3_block_remove_predecessor(struct ir3_block *block, struct ir3_block *pred)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
|
||||||
ir3_block_remove_physical_predecessor(struct ir3_block *block, struct ir3_block *pred)
|
|
||||||
{
|
|
||||||
for (unsigned i = 0; i < block->physical_predecessors_count; i++) {
|
|
||||||
if (block->physical_predecessors[i] == pred) {
|
|
||||||
if (i < block->physical_predecessors_count - 1) {
|
|
||||||
block->physical_predecessors[i] =
|
|
||||||
block->physical_predecessors[block->physical_predecessors_count - 1];
|
|
||||||
}
|
|
||||||
|
|
||||||
block->physical_predecessors_count--;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
unsigned
|
unsigned
|
||||||
ir3_block_get_pred_index(struct ir3_block *block, struct ir3_block *pred)
|
ir3_block_get_pred_index(struct ir3_block *block, struct ir3_block *pred)
|
||||||
{
|
{
|
||||||
|
@@ -653,10 +653,10 @@ struct ir3_block {
|
|||||||
*/
|
*/
|
||||||
struct ir3_instruction *condition;
|
struct ir3_instruction *condition;
|
||||||
struct ir3_block *successors[2];
|
struct ir3_block *successors[2];
|
||||||
struct ir3_block *physical_successors[2];
|
|
||||||
|
|
||||||
DECLARE_ARRAY(struct ir3_block *, predecessors);
|
DECLARE_ARRAY(struct ir3_block *, predecessors);
|
||||||
DECLARE_ARRAY(struct ir3_block *, physical_predecessors);
|
DECLARE_ARRAY(struct ir3_block *, physical_predecessors);
|
||||||
|
DECLARE_ARRAY(struct ir3_block *, physical_successors);
|
||||||
|
|
||||||
uint16_t start_ip, end_ip;
|
uint16_t start_ip, end_ip;
|
||||||
|
|
||||||
@@ -724,12 +724,9 @@ ir3_after_preamble(struct ir3 *ir)
|
|||||||
}
|
}
|
||||||
|
|
||||||
void ir3_block_add_predecessor(struct ir3_block *block, struct ir3_block *pred);
|
void ir3_block_add_predecessor(struct ir3_block *block, struct ir3_block *pred);
|
||||||
void ir3_block_add_physical_predecessor(struct ir3_block *block,
|
void ir3_block_link_physical(struct ir3_block *pred, struct ir3_block *succ);
|
||||||
struct ir3_block *pred);
|
|
||||||
void ir3_block_remove_predecessor(struct ir3_block *block,
|
void ir3_block_remove_predecessor(struct ir3_block *block,
|
||||||
struct ir3_block *pred);
|
struct ir3_block *pred);
|
||||||
void ir3_block_remove_physical_predecessor(struct ir3_block *block,
|
|
||||||
struct ir3_block *pred);
|
|
||||||
unsigned ir3_block_get_pred_index(struct ir3_block *block,
|
unsigned ir3_block_get_pred_index(struct ir3_block *block,
|
||||||
struct ir3_block *pred);
|
struct ir3_block *pred);
|
||||||
|
|
||||||
|
@@ -3647,7 +3647,6 @@ emit_block(struct ir3_context *ctx, nir_block *nblock)
|
|||||||
if (nblock->successors[i]) {
|
if (nblock->successors[i]) {
|
||||||
ctx->block->successors[i] =
|
ctx->block->successors[i] =
|
||||||
get_block_or_continue(ctx, nblock->successors[i]);
|
get_block_or_continue(ctx, nblock->successors[i]);
|
||||||
ctx->block->physical_successors[i] = ctx->block->successors[i];
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -3687,20 +3686,6 @@ emit_if(struct ir3_context *ctx, nir_if *nif)
|
|||||||
|
|
||||||
emit_cf_list(ctx, &nif->then_list);
|
emit_cf_list(ctx, &nif->then_list);
|
||||||
emit_cf_list(ctx, &nif->else_list);
|
emit_cf_list(ctx, &nif->else_list);
|
||||||
|
|
||||||
struct ir3_block *last_then = get_block(ctx, nir_if_last_then_block(nif));
|
|
||||||
struct ir3_block *first_else = get_block(ctx, nir_if_first_else_block(nif));
|
|
||||||
assert(last_then->physical_successors[0] &&
|
|
||||||
!last_then->physical_successors[1]);
|
|
||||||
last_then->physical_successors[1] = first_else;
|
|
||||||
|
|
||||||
struct ir3_block *last_else = get_block(ctx, nir_if_last_else_block(nif));
|
|
||||||
struct ir3_block *after_if =
|
|
||||||
get_block(ctx, nir_cf_node_as_block(nir_cf_node_next(&nif->cf_node)));
|
|
||||||
assert(last_else->physical_successors[0] &&
|
|
||||||
!last_else->physical_successors[1]);
|
|
||||||
if (after_if != last_else->physical_successors[0])
|
|
||||||
last_else->physical_successors[1] = after_if;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
@@ -3728,7 +3713,6 @@ emit_loop(struct ir3_context *ctx, nir_loop *nloop)
|
|||||||
if (continue_blk) {
|
if (continue_blk) {
|
||||||
struct ir3_block *start = get_block(ctx, nstart);
|
struct ir3_block *start = get_block(ctx, nstart);
|
||||||
continue_blk->successors[0] = start;
|
continue_blk->successors[0] = start;
|
||||||
continue_blk->physical_successors[0] = start;
|
|
||||||
continue_blk->loop_id = ctx->loop_id;
|
continue_blk->loop_id = ctx->loop_id;
|
||||||
continue_blk->loop_depth = ctx->loop_depth;
|
continue_blk->loop_depth = ctx->loop_depth;
|
||||||
list_addtail(&continue_blk->node, &ctx->ir->block_list);
|
list_addtail(&continue_blk->node, &ctx->ir->block_list);
|
||||||
@@ -3814,13 +3798,8 @@ emit_stream_out(struct ir3_context *ctx)
|
|||||||
orig_end_block->successors[0] = stream_out_block;
|
orig_end_block->successors[0] = stream_out_block;
|
||||||
orig_end_block->successors[1] = new_end_block;
|
orig_end_block->successors[1] = new_end_block;
|
||||||
|
|
||||||
orig_end_block->physical_successors[0] = stream_out_block;
|
|
||||||
orig_end_block->physical_successors[1] = new_end_block;
|
|
||||||
|
|
||||||
stream_out_block->successors[0] = new_end_block;
|
stream_out_block->successors[0] = new_end_block;
|
||||||
|
|
||||||
stream_out_block->physical_successors[0] = new_end_block;
|
|
||||||
|
|
||||||
/* setup 'if (vtxcnt < maxvtxcnt)' condition: */
|
/* setup 'if (vtxcnt < maxvtxcnt)' condition: */
|
||||||
cond = ir3_CMPS_S(ctx->block, vtxcnt, 0, maxvtxcnt, 0);
|
cond = ir3_CMPS_S(ctx->block, vtxcnt, 0, maxvtxcnt, 0);
|
||||||
cond->dsts[0]->num = regid(REG_P0, 0);
|
cond->dsts[0]->num = regid(REG_P0, 0);
|
||||||
@@ -3886,9 +3865,6 @@ setup_predecessors(struct ir3 *ir)
|
|||||||
for (int i = 0; i < ARRAY_SIZE(block->successors); i++) {
|
for (int i = 0; i < ARRAY_SIZE(block->successors); i++) {
|
||||||
if (block->successors[i])
|
if (block->successors[i])
|
||||||
ir3_block_add_predecessor(block->successors[i], block);
|
ir3_block_add_predecessor(block->successors[i], block);
|
||||||
if (block->physical_successors[i])
|
|
||||||
ir3_block_add_physical_predecessor(block->physical_successors[i],
|
|
||||||
block);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -580,7 +580,7 @@ opt_jump(struct ir3 *ir)
|
|||||||
/* This pass destroys the physical CFG so don't keep it around to avoid
|
/* This pass destroys the physical CFG so don't keep it around to avoid
|
||||||
* validation errors.
|
* validation errors.
|
||||||
*/
|
*/
|
||||||
block->physical_successors[0] = block->physical_successors[1] = NULL;
|
block->physical_successors_count = 0;
|
||||||
block->physical_predecessors_count = 0;
|
block->physical_predecessors_count = 0;
|
||||||
|
|
||||||
foreach_instr (instr, &block->instr_list) {
|
foreach_instr (instr, &block->instr_list) {
|
||||||
|
@@ -22,6 +22,7 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#include "ir3.h"
|
#include "ir3.h"
|
||||||
|
#include "util/ralloc.h"
|
||||||
|
|
||||||
/* Lower several macro-instructions needed for shader subgroup support that
|
/* Lower several macro-instructions needed for shader subgroup support that
|
||||||
* must be turned into if statements. We do this after RA and post-RA
|
* must be turned into if statements. We do this after RA and post-RA
|
||||||
@@ -178,18 +179,21 @@ split_block(struct ir3 *ir, struct ir3_block *before_block,
|
|||||||
replace_pred(after_block->successors[i], before_block, after_block);
|
replace_pred(after_block->successors[i], before_block, after_block);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (unsigned i = 0; i < ARRAY_SIZE(before_block->physical_successors);
|
for (unsigned i = 0; i < before_block->physical_successors_count; i++) {
|
||||||
i++) {
|
replace_physical_pred(before_block->physical_successors[i],
|
||||||
after_block->physical_successors[i] =
|
before_block, after_block);
|
||||||
before_block->physical_successors[i];
|
|
||||||
if (after_block->physical_successors[i]) {
|
|
||||||
replace_physical_pred(after_block->physical_successors[i],
|
|
||||||
before_block, after_block);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ralloc_steal(after_block, before_block->physical_successors);
|
||||||
|
after_block->physical_successors = before_block->physical_successors;
|
||||||
|
after_block->physical_successors_sz = before_block->physical_successors_sz;
|
||||||
|
after_block->physical_successors_count =
|
||||||
|
before_block->physical_successors_count;
|
||||||
|
|
||||||
before_block->successors[0] = before_block->successors[1] = NULL;
|
before_block->successors[0] = before_block->successors[1] = NULL;
|
||||||
before_block->physical_successors[0] = before_block->physical_successors[1] = NULL;
|
before_block->physical_successors = NULL;
|
||||||
|
before_block->physical_successors_count = 0;
|
||||||
|
before_block->physical_successors_sz = 0;
|
||||||
|
|
||||||
foreach_instr_from_safe (rem_instr, &instr->node,
|
foreach_instr_from_safe (rem_instr, &instr->node,
|
||||||
&before_block->instr_list) {
|
&before_block->instr_list) {
|
||||||
@@ -204,20 +208,12 @@ split_block(struct ir3 *ir, struct ir3_block *before_block,
|
|||||||
return after_block;
|
return after_block;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
|
||||||
link_blocks_physical(struct ir3_block *pred, struct ir3_block *succ,
|
|
||||||
unsigned index)
|
|
||||||
{
|
|
||||||
pred->physical_successors[index] = succ;
|
|
||||||
ir3_block_add_physical_predecessor(succ, pred);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void
|
static void
|
||||||
link_blocks(struct ir3_block *pred, struct ir3_block *succ, unsigned index)
|
link_blocks(struct ir3_block *pred, struct ir3_block *succ, unsigned index)
|
||||||
{
|
{
|
||||||
pred->successors[index] = succ;
|
pred->successors[index] = succ;
|
||||||
ir3_block_add_predecessor(succ, pred);
|
ir3_block_add_predecessor(succ, pred);
|
||||||
link_blocks_physical(pred, succ, index);
|
ir3_block_link_physical(pred, succ);
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct ir3_block *
|
static struct ir3_block *
|
||||||
@@ -292,7 +288,7 @@ lower_instr(struct ir3 *ir, struct ir3_block **block, struct ir3_instruction *in
|
|||||||
header->brtype = IR3_BRANCH_GETONE;
|
header->brtype = IR3_BRANCH_GETONE;
|
||||||
|
|
||||||
link_blocks(exit, after_block, 0);
|
link_blocks(exit, after_block, 0);
|
||||||
link_blocks_physical(exit, footer, 1);
|
ir3_block_link_physical(exit, footer);
|
||||||
|
|
||||||
link_blocks(footer, header, 0);
|
link_blocks(footer, header, 0);
|
||||||
|
|
||||||
|
@@ -560,13 +560,14 @@ print_block(struct ir3_block *block, int lvl)
|
|||||||
mesa_log_stream_printf(stream, "/* succs: block%u; */\n",
|
mesa_log_stream_printf(stream, "/* succs: block%u; */\n",
|
||||||
block_id(block->successors[0]));
|
block_id(block->successors[0]));
|
||||||
}
|
}
|
||||||
if (block->physical_successors[0]) {
|
if (block->physical_successors_count > 0) {
|
||||||
tab(stream, lvl + 1);
|
tab(stream, lvl + 1);
|
||||||
mesa_log_stream_printf(stream, "/* physical succs: block%u",
|
mesa_log_stream_printf(stream, "/* physical succs: ");
|
||||||
block_id(block->physical_successors[0]));
|
for (unsigned i = 0; i < block->physical_successors_count; i++) {
|
||||||
if (block->physical_successors[1]) {
|
mesa_log_stream_printf(stream, "block%u",
|
||||||
mesa_log_stream_printf(stream, ", block%u",
|
block_id(block->physical_successors[i]));
|
||||||
block_id(block->physical_successors[1]));
|
if (i < block->physical_successors_count - 1)
|
||||||
|
mesa_log_stream_printf(stream, ", ");
|
||||||
}
|
}
|
||||||
mesa_log_stream_printf(stream, " */\n");
|
mesa_log_stream_printf(stream, " */\n");
|
||||||
}
|
}
|
||||||
|
@@ -477,10 +477,8 @@ propagate_block(struct ra_val_ctx *ctx, struct ir3_block *block)
|
|||||||
progress |=
|
progress |=
|
||||||
merge_state(ctx, &ctx->block_reaching[succ->index], &ctx->reaching);
|
merge_state(ctx, &ctx->block_reaching[succ->index], &ctx->reaching);
|
||||||
}
|
}
|
||||||
for (unsigned i = 0; i < 2; i++) {
|
for (unsigned i = 0; i < block->physical_successors_count; i++) {
|
||||||
struct ir3_block *succ = block->physical_successors[i];
|
struct ir3_block *succ = block->physical_successors[i];
|
||||||
if (!succ)
|
|
||||||
continue;
|
|
||||||
progress |= merge_state_physical(ctx, &ctx->block_reaching[succ->index],
|
progress |= merge_state_physical(ctx, &ctx->block_reaching[succ->index],
|
||||||
&ctx->reaching);
|
&ctx->reaching);
|
||||||
}
|
}
|
||||||
|
@@ -139,6 +139,8 @@ ir3_calc_reconvergence(struct ir3_shader_variant *so)
|
|||||||
blocks[block->index].first_processed_divergent_pred = UINT_MAX;
|
blocks[block->index].first_processed_divergent_pred = UINT_MAX;
|
||||||
for (unsigned i = 0; i < ARRAY_SIZE(block->successors); i++) {
|
for (unsigned i = 0; i < ARRAY_SIZE(block->successors); i++) {
|
||||||
if (block->successors[i]) {
|
if (block->successors[i]) {
|
||||||
|
ir3_block_link_physical(block, block->successors[i]);
|
||||||
|
|
||||||
if (block->successors[i]->index > block->index + 1) {
|
if (block->successors[i]->index > block->index + 1) {
|
||||||
edges[edge] = (struct logical_edge) {
|
edges[edge] = (struct logical_edge) {
|
||||||
.node = {
|
.node = {
|
||||||
@@ -196,6 +198,7 @@ ir3_calc_reconvergence(struct ir3_shader_variant *so)
|
|||||||
|
|
||||||
/* Iterate over all edges stepping over the block. */
|
/* Iterate over all edges stepping over the block. */
|
||||||
struct uinterval interval = { block->index, block->index };
|
struct uinterval interval = { block->index, block->index };
|
||||||
|
struct logical_edge *prev = NULL;
|
||||||
uinterval_tree_foreach (struct logical_edge, edge, interval, &forward_edges,
|
uinterval_tree_foreach (struct logical_edge, edge, interval, &forward_edges,
|
||||||
node) {
|
node) {
|
||||||
/* If "block" definitely isn't outstanding when the branch
|
/* If "block" definitely isn't outstanding when the branch
|
||||||
@@ -249,6 +252,19 @@ ir3_calc_reconvergence(struct ir3_shader_variant *so)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!prev || prev->start_block != edge->start_block) {
|
||||||
|
/* We should only process this edge + block combination once, and
|
||||||
|
* we use the fact that edges are sorted by start point to avoid
|
||||||
|
* adding redundant physical edges in case multiple edges have the
|
||||||
|
* same start point by comparing with the previous edge. Therefore
|
||||||
|
* we should only add the physical edge once.
|
||||||
|
*/
|
||||||
|
for (unsigned i = 0; i < block->physical_predecessors_count; i++)
|
||||||
|
assert(block->physical_predecessors[i] != edge->start_block);
|
||||||
|
ir3_block_link_physical(edge->start_block, block);
|
||||||
|
}
|
||||||
|
prev = edge;
|
||||||
}
|
}
|
||||||
|
|
||||||
blocks[block->index].first_processed_divergent_pred =
|
blocks[block->index].first_processed_divergent_pred =
|
||||||
|
@@ -81,29 +81,6 @@ delete_block(struct ir3 *ir, struct ir3_block *block)
|
|||||||
}
|
}
|
||||||
succ->predecessors_count--;
|
succ->predecessors_count--;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (unsigned i = 0; i < 2; i++) {
|
|
||||||
struct ir3_block *succ = block->physical_successors[i];
|
|
||||||
if (!succ)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
ir3_block_remove_physical_predecessor(succ, block);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (block->physical_predecessors_count != 0) {
|
|
||||||
/* There should be only one physical predecessor, for the fallthrough
|
|
||||||
* edge.
|
|
||||||
*/
|
|
||||||
assert(block->physical_predecessors_count == 1);
|
|
||||||
struct ir3_block *pred = block->physical_predecessors[0];
|
|
||||||
assert(block->node.next != &ir->block_list);
|
|
||||||
struct ir3_block *next = list_entry(block->node.next, struct ir3_block, node);
|
|
||||||
if (pred->physical_successors[1] == block)
|
|
||||||
pred->physical_successors[1] = next;
|
|
||||||
else
|
|
||||||
pred->physical_successors[0] = next;
|
|
||||||
ir3_block_add_physical_predecessor(next, pred);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool
|
bool
|
||||||
|
@@ -372,7 +372,7 @@ validate_instr(struct ir3_validate_ctx *ctx, struct ir3_instruction *instr)
|
|||||||
static bool
|
static bool
|
||||||
is_physical_successor(struct ir3_block *block, struct ir3_block *succ)
|
is_physical_successor(struct ir3_block *block, struct ir3_block *succ)
|
||||||
{
|
{
|
||||||
for (unsigned i = 0; i < ARRAY_SIZE(block->physical_successors); i++)
|
for (unsigned i = 0; i < block->physical_successors_count; i++)
|
||||||
if (block->physical_successors[i] == succ)
|
if (block->physical_successors[i] == succ)
|
||||||
return true;
|
return true;
|
||||||
return false;
|
return false;
|
||||||
@@ -426,13 +426,12 @@ ir3_validate(struct ir3 *ir)
|
|||||||
ctx->current_instr = NULL;
|
ctx->current_instr = NULL;
|
||||||
|
|
||||||
/* Each logical successor should also be a physical successor: */
|
/* Each logical successor should also be a physical successor: */
|
||||||
if (block->physical_successors[0])
|
if (block->physical_successors_count > 0)
|
||||||
validate_assert(ctx, is_physical_successor(block, block->successors[i]));
|
validate_assert(ctx, is_physical_successor(block, block->successors[i]));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
validate_assert(ctx, block->successors[0] || !block->successors[1]);
|
validate_assert(ctx, block->successors[0] || !block->successors[1]);
|
||||||
validate_assert(ctx, block->physical_successors[0] || !block->physical_successors[1]);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
ralloc_free(ctx);
|
ralloc_free(ctx);
|
||||||
|
Reference in New Issue
Block a user