broadcom/compiler: Emit uniform loops using uniform control flow
Similarly to if statements, uniform loops are now emitted without predication, using simple branches for breaks and continues. The uniformity of the loop is determined by running the nir_divergence_analysis pass. Reviewed-by: Iago Toral Quiroga <itoral@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7726>
This commit is contained in:

committed by
Marge Bot

parent
6643bdbd53
commit
79bde75131
@@ -2774,8 +2774,6 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr)
|
||||
* XXX perf: Could we be using flpush/flpop somehow for our execution channel
|
||||
* enabling?
|
||||
*
|
||||
* XXX perf: For uniform control flow, we should be able to skip c->execute
|
||||
* handling entirely.
|
||||
*/
|
||||
static void
|
||||
ntq_activate_execute_for_block(struct v3d_compile *c)
|
||||
@@ -2823,9 +2821,13 @@ ntq_emit_uniform_if(struct v3d_compile *c, nir_if *if_stmt)
|
||||
ntq_emit_cf_list(c, &if_stmt->then_list);
|
||||
|
||||
if (!empty_else_block) {
|
||||
/* At the end of the THEN block, jump to ENDIF */
|
||||
/* At the end of the THEN block, jump to ENDIF, unless
|
||||
* the block ended in a break or continue.
|
||||
*/
|
||||
if (!c->cur_block->branch_emitted) {
|
||||
vir_BRANCH(c, V3D_QPU_BRANCH_COND_ALWAYS);
|
||||
vir_link_blocks(c->cur_block, after_block);
|
||||
}
|
||||
|
||||
/* Emit the else block. */
|
||||
vir_set_emit_block(c, else_block);
|
||||
@@ -2932,7 +2934,7 @@ ntq_emit_if(struct v3d_compile *c, nir_if *nif)
|
||||
bool was_in_control_flow = c->in_control_flow;
|
||||
c->in_control_flow = true;
|
||||
if (!vir_in_nonuniform_control_flow(c) &&
|
||||
nir_src_is_dynamically_uniform(nif->condition)) {
|
||||
!nir_src_is_divergent(nif->condition)) {
|
||||
ntq_emit_uniform_if(c, nif);
|
||||
} else {
|
||||
ntq_emit_nonuniform_if(c, nif);
|
||||
@@ -2959,7 +2961,34 @@ ntq_emit_jump(struct v3d_compile *c, nir_jump_instr *jump)
|
||||
break;
|
||||
|
||||
case nir_jump_return:
|
||||
unreachable("All returns shouold be lowered\n");
|
||||
unreachable("All returns should be lowered\n");
|
||||
break;
|
||||
|
||||
case nir_jump_halt:
|
||||
case nir_jump_goto:
|
||||
case nir_jump_goto_if:
|
||||
unreachable("not supported\n");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
ntq_emit_uniform_jump(struct v3d_compile *c, nir_jump_instr *jump)
|
||||
{
|
||||
switch (jump->type) {
|
||||
case nir_jump_break:
|
||||
vir_BRANCH(c, V3D_QPU_BRANCH_COND_ALWAYS);
|
||||
vir_link_blocks(c->cur_block, c->loop_break_block);
|
||||
c->cur_block->branch_emitted = true;
|
||||
break;
|
||||
case nir_jump_continue:
|
||||
vir_BRANCH(c, V3D_QPU_BRANCH_COND_ALWAYS);
|
||||
vir_link_blocks(c->cur_block, c->loop_cont_block);
|
||||
c->cur_block->branch_emitted = true;
|
||||
break;
|
||||
|
||||
case nir_jump_return:
|
||||
unreachable("All returns should be lowered\n");
|
||||
break;
|
||||
|
||||
case nir_jump_halt:
|
||||
@@ -2995,7 +3024,10 @@ ntq_emit_instr(struct v3d_compile *c, nir_instr *instr)
|
||||
break;
|
||||
|
||||
case nir_instr_type_jump:
|
||||
if (vir_in_nonuniform_control_flow(c))
|
||||
ntq_emit_jump(c, nir_instr_as_jump(instr));
|
||||
else
|
||||
ntq_emit_uniform_jump(c, nir_instr_as_jump(instr));
|
||||
break;
|
||||
|
||||
default:
|
||||
@@ -3017,20 +3049,14 @@ ntq_emit_block(struct v3d_compile *c, nir_block *block)
|
||||
static void ntq_emit_cf_list(struct v3d_compile *c, struct exec_list *list);
|
||||
|
||||
static void
|
||||
ntq_emit_loop(struct v3d_compile *c, nir_loop *loop)
|
||||
ntq_emit_nonuniform_loop(struct v3d_compile *c, nir_loop *loop)
|
||||
{
|
||||
bool was_in_control_flow = c->in_control_flow;
|
||||
c->in_control_flow = true;
|
||||
|
||||
bool was_uniform_control_flow = false;
|
||||
if (!vir_in_nonuniform_control_flow(c)) {
|
||||
c->execute = vir_MOV(c, vir_uniform_ui(c, 0));
|
||||
was_uniform_control_flow = true;
|
||||
}
|
||||
|
||||
struct qblock *save_loop_cont_block = c->loop_cont_block;
|
||||
struct qblock *save_loop_break_block = c->loop_break_block;
|
||||
|
||||
c->loop_cont_block = vir_new_block(c);
|
||||
c->loop_break_block = vir_new_block(c);
|
||||
|
||||
@@ -3067,6 +3093,42 @@ ntq_emit_loop(struct v3d_compile *c, nir_loop *loop)
|
||||
c->execute = c->undef;
|
||||
else
|
||||
ntq_activate_execute_for_block(c);
|
||||
}
|
||||
|
||||
static void
|
||||
ntq_emit_uniform_loop(struct v3d_compile *c, nir_loop *loop)
|
||||
{
|
||||
|
||||
c->loop_cont_block = vir_new_block(c);
|
||||
c->loop_break_block = vir_new_block(c);
|
||||
|
||||
vir_link_blocks(c->cur_block, c->loop_cont_block);
|
||||
vir_set_emit_block(c, c->loop_cont_block);
|
||||
|
||||
ntq_emit_cf_list(c, &loop->body);
|
||||
|
||||
if (!c->cur_block->branch_emitted) {
|
||||
vir_BRANCH(c, V3D_QPU_BRANCH_COND_ALWAYS);
|
||||
vir_link_blocks(c->cur_block, c->loop_cont_block);
|
||||
}
|
||||
|
||||
vir_set_emit_block(c, c->loop_break_block);
|
||||
}
|
||||
|
||||
static void
|
||||
ntq_emit_loop(struct v3d_compile *c, nir_loop *loop)
|
||||
{
|
||||
bool was_in_control_flow = c->in_control_flow;
|
||||
c->in_control_flow = true;
|
||||
|
||||
struct qblock *save_loop_cont_block = c->loop_cont_block;
|
||||
struct qblock *save_loop_break_block = c->loop_break_block;
|
||||
|
||||
if (vir_in_nonuniform_control_flow(c) || loop->divergent) {
|
||||
ntq_emit_nonuniform_loop(c, loop);
|
||||
} else {
|
||||
ntq_emit_uniform_loop(c, loop);
|
||||
}
|
||||
|
||||
c->loop_break_block = save_loop_break_block;
|
||||
c->loop_cont_block = save_loop_cont_block;
|
||||
|
@@ -456,6 +456,12 @@ struct qblock {
|
||||
/** Offset within the uniform stream of the branch instruction */
|
||||
uint32_t branch_uniform;
|
||||
|
||||
/**
|
||||
* Has the terminating branch of this block already been emitted
|
||||
* by a break or continue?
|
||||
*/
|
||||
bool branch_emitted;
|
||||
|
||||
/** @{ used by v3d_vir_live_variables.c */
|
||||
BITSET_WORD *def;
|
||||
BITSET_WORD *defin;
|
||||
|
@@ -1156,6 +1156,8 @@ v3d_attempt_compile(struct v3d_compile *c)
|
||||
}
|
||||
|
||||
NIR_PASS_V(c->s, nir_lower_bool_to_int32);
|
||||
nir_convert_to_lcssa(c->s, true, true);
|
||||
NIR_PASS_V(c->s, nir_divergence_analysis);
|
||||
NIR_PASS_V(c->s, nir_convert_from_ssa, true);
|
||||
|
||||
struct nir_schedule_options schedule_options = {
|
||||
|
Reference in New Issue
Block a user