broadcom/compiler: Emit uniform loops using uniform control flow

Similarly to if statements, uniform loops are now emitted without
predication, using simple branches for breaks and continues. The
uniformity of the loop is determined by running the
nir_divergence_analysis pass.

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7726>
This commit is contained in:
Arcady Goldmints-Orlov
2020-12-24 10:24:56 -06:00
committed by Marge Bot
parent 6643bdbd53
commit 79bde75131
3 changed files with 85 additions and 15 deletions

View File

@@ -2774,8 +2774,6 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr)
* XXX perf: Could we be using flpush/flpop somehow for our execution channel
* enabling?
*
* XXX perf: For uniform control flow, we should be able to skip c->execute
* handling entirely.
*/
static void
ntq_activate_execute_for_block(struct v3d_compile *c)
@@ -2823,9 +2821,13 @@ ntq_emit_uniform_if(struct v3d_compile *c, nir_if *if_stmt)
ntq_emit_cf_list(c, &if_stmt->then_list);
if (!empty_else_block) {
/* At the end of the THEN block, jump to ENDIF */
/* At the end of the THEN block, jump to ENDIF, unless
* the block ended in a break or continue.
*/
if (!c->cur_block->branch_emitted) {
vir_BRANCH(c, V3D_QPU_BRANCH_COND_ALWAYS);
vir_link_blocks(c->cur_block, after_block);
}
/* Emit the else block. */
vir_set_emit_block(c, else_block);
@@ -2932,7 +2934,7 @@ ntq_emit_if(struct v3d_compile *c, nir_if *nif)
bool was_in_control_flow = c->in_control_flow;
c->in_control_flow = true;
if (!vir_in_nonuniform_control_flow(c) &&
nir_src_is_dynamically_uniform(nif->condition)) {
!nir_src_is_divergent(nif->condition)) {
ntq_emit_uniform_if(c, nif);
} else {
ntq_emit_nonuniform_if(c, nif);
@@ -2959,7 +2961,34 @@ ntq_emit_jump(struct v3d_compile *c, nir_jump_instr *jump)
break;
case nir_jump_return:
unreachable("All returns shouold be lowered\n");
unreachable("All returns should be lowered\n");
break;
case nir_jump_halt:
case nir_jump_goto:
case nir_jump_goto_if:
unreachable("not supported\n");
break;
}
}
static void
ntq_emit_uniform_jump(struct v3d_compile *c, nir_jump_instr *jump)
{
switch (jump->type) {
case nir_jump_break:
vir_BRANCH(c, V3D_QPU_BRANCH_COND_ALWAYS);
vir_link_blocks(c->cur_block, c->loop_break_block);
c->cur_block->branch_emitted = true;
break;
case nir_jump_continue:
vir_BRANCH(c, V3D_QPU_BRANCH_COND_ALWAYS);
vir_link_blocks(c->cur_block, c->loop_cont_block);
c->cur_block->branch_emitted = true;
break;
case nir_jump_return:
unreachable("All returns should be lowered\n");
break;
case nir_jump_halt:
@@ -2995,7 +3024,10 @@ ntq_emit_instr(struct v3d_compile *c, nir_instr *instr)
break;
case nir_instr_type_jump:
if (vir_in_nonuniform_control_flow(c))
ntq_emit_jump(c, nir_instr_as_jump(instr));
else
ntq_emit_uniform_jump(c, nir_instr_as_jump(instr));
break;
default:
@@ -3017,20 +3049,14 @@ ntq_emit_block(struct v3d_compile *c, nir_block *block)
static void ntq_emit_cf_list(struct v3d_compile *c, struct exec_list *list);
static void
ntq_emit_loop(struct v3d_compile *c, nir_loop *loop)
ntq_emit_nonuniform_loop(struct v3d_compile *c, nir_loop *loop)
{
bool was_in_control_flow = c->in_control_flow;
c->in_control_flow = true;
bool was_uniform_control_flow = false;
if (!vir_in_nonuniform_control_flow(c)) {
c->execute = vir_MOV(c, vir_uniform_ui(c, 0));
was_uniform_control_flow = true;
}
struct qblock *save_loop_cont_block = c->loop_cont_block;
struct qblock *save_loop_break_block = c->loop_break_block;
c->loop_cont_block = vir_new_block(c);
c->loop_break_block = vir_new_block(c);
@@ -3067,6 +3093,42 @@ ntq_emit_loop(struct v3d_compile *c, nir_loop *loop)
c->execute = c->undef;
else
ntq_activate_execute_for_block(c);
}
static void
ntq_emit_uniform_loop(struct v3d_compile *c, nir_loop *loop)
{
c->loop_cont_block = vir_new_block(c);
c->loop_break_block = vir_new_block(c);
vir_link_blocks(c->cur_block, c->loop_cont_block);
vir_set_emit_block(c, c->loop_cont_block);
ntq_emit_cf_list(c, &loop->body);
if (!c->cur_block->branch_emitted) {
vir_BRANCH(c, V3D_QPU_BRANCH_COND_ALWAYS);
vir_link_blocks(c->cur_block, c->loop_cont_block);
}
vir_set_emit_block(c, c->loop_break_block);
}
static void
ntq_emit_loop(struct v3d_compile *c, nir_loop *loop)
{
bool was_in_control_flow = c->in_control_flow;
c->in_control_flow = true;
struct qblock *save_loop_cont_block = c->loop_cont_block;
struct qblock *save_loop_break_block = c->loop_break_block;
if (vir_in_nonuniform_control_flow(c) || loop->divergent) {
ntq_emit_nonuniform_loop(c, loop);
} else {
ntq_emit_uniform_loop(c, loop);
}
c->loop_break_block = save_loop_break_block;
c->loop_cont_block = save_loop_cont_block;

View File

@@ -456,6 +456,12 @@ struct qblock {
/** Offset within the uniform stream of the branch instruction */
uint32_t branch_uniform;
/**
* Has the terminating branch of this block already been emitted
* by a break or continue?
*/
bool branch_emitted;
/** @{ used by v3d_vir_live_variables.c */
BITSET_WORD *def;
BITSET_WORD *defin;

View File

@@ -1156,6 +1156,8 @@ v3d_attempt_compile(struct v3d_compile *c)
}
NIR_PASS_V(c->s, nir_lower_bool_to_int32);
nir_convert_to_lcssa(c->s, true, true);
NIR_PASS_V(c->s, nir_divergence_analysis);
NIR_PASS_V(c->s, nir_convert_from_ssa, true);
struct nir_schedule_options schedule_options = {