nir: Make "divergent" a property of an SSA value

v2: fix usage in ACO (by Daniel Schürmann)

Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4062>
This commit is contained in:
Jason Ekstrand
2019-10-15 14:48:10 -05:00
committed by Marge Bot
parent db94a2d03d
commit ca2d53f451
5 changed files with 122 additions and 95 deletions

View File

@@ -711,9 +711,8 @@ void emit_comparison(isel_context *ctx, nir_alu_instr *instr, Temp dst,
{ {
aco_opcode s_op = instr->src[0].src.ssa->bit_size == 64 ? s64_op : instr->src[0].src.ssa->bit_size == 32 ? s32_op : aco_opcode::num_opcodes; aco_opcode s_op = instr->src[0].src.ssa->bit_size == 64 ? s64_op : instr->src[0].src.ssa->bit_size == 32 ? s32_op : aco_opcode::num_opcodes;
aco_opcode v_op = instr->src[0].src.ssa->bit_size == 64 ? v64_op : instr->src[0].src.ssa->bit_size == 32 ? v32_op : v16_op; aco_opcode v_op = instr->src[0].src.ssa->bit_size == 64 ? v64_op : instr->src[0].src.ssa->bit_size == 32 ? v32_op : v16_op;
bool divergent_vals = ctx->divergent_vals[instr->dest.dest.ssa.index];
bool use_valu = s_op == aco_opcode::num_opcodes || bool use_valu = s_op == aco_opcode::num_opcodes ||
divergent_vals || nir_dest_is_divergent(instr->dest.dest) ||
ctx->allocated[instr->src[0].src.ssa->index].type() == RegType::vgpr || ctx->allocated[instr->src[0].src.ssa->index].type() == RegType::vgpr ||
ctx->allocated[instr->src[1].src.ssa->index].type() == RegType::vgpr; ctx->allocated[instr->src[1].src.ssa->index].type() == RegType::vgpr;
aco_opcode op = use_valu ? v_op : s_op; aco_opcode op = use_valu ? v_op : s_op;
@@ -779,7 +778,7 @@ void emit_bcsel(isel_context *ctx, nir_alu_instr *instr, Temp dst)
assert(els.regClass() == bld.lm); assert(els.regClass() == bld.lm);
} }
if (!ctx->divergent_vals[instr->src[0].src.ssa->index]) { /* uniform condition and values in sgpr */ if (!nir_src_is_divergent(instr->src[0].src)) { /* uniform condition and values in sgpr */
if (dst.regClass() == s1 || dst.regClass() == s2) { if (dst.regClass() == s1 || dst.regClass() == s2) {
assert((then.regClass() == s1 || then.regClass() == s2) && els.regClass() == then.regClass()); assert((then.regClass() == s1 || then.regClass() == s2) && els.regClass() == then.regClass());
assert(dst.size() == then.size()); assert(dst.size() == then.size());
@@ -5010,7 +5009,7 @@ void visit_load_resource(isel_context *ctx, nir_intrinsic_instr *instr)
{ {
Builder bld(ctx->program, ctx->block); Builder bld(ctx->program, ctx->block);
Temp index = get_ssa_temp(ctx, instr->src[0].ssa); Temp index = get_ssa_temp(ctx, instr->src[0].ssa);
if (!ctx->divergent_vals[instr->dest.ssa.index]) if (!nir_dest_is_divergent(instr->dest))
index = bld.as_uniform(index); index = bld.as_uniform(index);
unsigned desc_set = nir_intrinsic_desc_set(instr); unsigned desc_set = nir_intrinsic_desc_set(instr);
unsigned binding = nir_intrinsic_binding(instr); unsigned binding = nir_intrinsic_binding(instr);
@@ -6086,7 +6085,7 @@ void visit_store_ssbo(isel_context *ctx, nir_intrinsic_instr *instr)
Temp rsrc = convert_pointer_to_64_bit(ctx, get_ssa_temp(ctx, instr->src[1].ssa)); Temp rsrc = convert_pointer_to_64_bit(ctx, get_ssa_temp(ctx, instr->src[1].ssa));
rsrc = bld.smem(aco_opcode::s_load_dwordx4, bld.def(s4), rsrc, Operand(0u)); rsrc = bld.smem(aco_opcode::s_load_dwordx4, bld.def(s4), rsrc, Operand(0u));
bool smem = !ctx->divergent_vals[instr->src[2].ssa->index] && bool smem = !nir_src_is_divergent(instr->src[2]) &&
ctx->options->chip_class >= GFX8 && ctx->options->chip_class >= GFX8 &&
elem_size_bytes >= 4; elem_size_bytes >= 4;
if (smem) if (smem)
@@ -7477,11 +7476,11 @@ void visit_intrinsic(isel_context *ctx, nir_intrinsic_instr *instr)
case nir_intrinsic_shuffle: case nir_intrinsic_shuffle:
case nir_intrinsic_read_invocation: { case nir_intrinsic_read_invocation: {
Temp src = get_ssa_temp(ctx, instr->src[0].ssa); Temp src = get_ssa_temp(ctx, instr->src[0].ssa);
if (!ctx->divergent_vals[instr->src[0].ssa->index]) { if (!nir_src_is_divergent(instr->src[0])) {
emit_uniform_subgroup(ctx, instr, src); emit_uniform_subgroup(ctx, instr, src);
} else { } else {
Temp tid = get_ssa_temp(ctx, instr->src[1].ssa); Temp tid = get_ssa_temp(ctx, instr->src[1].ssa);
if (instr->intrinsic == nir_intrinsic_read_invocation || !ctx->divergent_vals[instr->src[1].ssa->index]) if (instr->intrinsic == nir_intrinsic_read_invocation || !nir_src_is_divergent(instr->src[1]))
tid = bld.as_uniform(tid); tid = bld.as_uniform(tid);
Temp dst = get_ssa_temp(ctx, &instr->dest.ssa); Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
if (src.regClass() == v1) { if (src.regClass() == v1) {
@@ -7587,7 +7586,7 @@ void visit_intrinsic(isel_context *ctx, nir_intrinsic_instr *instr)
nir_intrinsic_cluster_size(instr) : 0; nir_intrinsic_cluster_size(instr) : 0;
cluster_size = util_next_power_of_two(MIN2(cluster_size ? cluster_size : ctx->program->wave_size, ctx->program->wave_size)); cluster_size = util_next_power_of_two(MIN2(cluster_size ? cluster_size : ctx->program->wave_size, ctx->program->wave_size));
if (!ctx->divergent_vals[instr->src[0].ssa->index] && (op == nir_op_ior || op == nir_op_iand)) { if (!nir_src_is_divergent(instr->src[0]) && (op == nir_op_ior || op == nir_op_iand)) {
emit_uniform_subgroup(ctx, instr, src); emit_uniform_subgroup(ctx, instr, src);
} else if (instr->dest.ssa.bit_size == 1) { } else if (instr->dest.ssa.bit_size == 1) {
if (op == nir_op_imul || op == nir_op_umin || op == nir_op_imin) if (op == nir_op_imul || op == nir_op_umin || op == nir_op_imin)
@@ -7670,7 +7669,7 @@ void visit_intrinsic(isel_context *ctx, nir_intrinsic_instr *instr)
} }
case nir_intrinsic_quad_broadcast: { case nir_intrinsic_quad_broadcast: {
Temp src = get_ssa_temp(ctx, instr->src[0].ssa); Temp src = get_ssa_temp(ctx, instr->src[0].ssa);
if (!ctx->divergent_vals[instr->dest.ssa.index]) { if (!nir_dest_is_divergent(instr->dest)) {
emit_uniform_subgroup(ctx, instr, src); emit_uniform_subgroup(ctx, instr, src);
} else { } else {
Temp dst = get_ssa_temp(ctx, &instr->dest.ssa); Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
@@ -7717,7 +7716,7 @@ void visit_intrinsic(isel_context *ctx, nir_intrinsic_instr *instr)
case nir_intrinsic_quad_swap_diagonal: case nir_intrinsic_quad_swap_diagonal:
case nir_intrinsic_quad_swizzle_amd: { case nir_intrinsic_quad_swizzle_amd: {
Temp src = get_ssa_temp(ctx, instr->src[0].ssa); Temp src = get_ssa_temp(ctx, instr->src[0].ssa);
if (!ctx->divergent_vals[instr->dest.ssa.index]) { if (!nir_dest_is_divergent(instr->dest)) {
emit_uniform_subgroup(ctx, instr, src); emit_uniform_subgroup(ctx, instr, src);
break; break;
} }
@@ -7779,7 +7778,7 @@ void visit_intrinsic(isel_context *ctx, nir_intrinsic_instr *instr)
} }
case nir_intrinsic_masked_swizzle_amd: { case nir_intrinsic_masked_swizzle_amd: {
Temp src = get_ssa_temp(ctx, instr->src[0].ssa); Temp src = get_ssa_temp(ctx, instr->src[0].ssa);
if (!ctx->divergent_vals[instr->dest.ssa.index]) { if (!nir_dest_is_divergent(instr->dest)) {
emit_uniform_subgroup(ctx, instr, src); emit_uniform_subgroup(ctx, instr, src);
break; break;
} }
@@ -8774,7 +8773,7 @@ void visit_phi(isel_context *ctx, nir_phi_instr *instr)
Temp dst = get_ssa_temp(ctx, &instr->dest.ssa); Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
assert(instr->dest.ssa.bit_size != 1 || dst.regClass() == ctx->program->lane_mask); assert(instr->dest.ssa.bit_size != 1 || dst.regClass() == ctx->program->lane_mask);
bool logical = !dst.is_linear() || ctx->divergent_vals[instr->dest.ssa.index]; bool logical = !dst.is_linear() || nir_dest_is_divergent(instr->dest);
logical |= ctx->block->kind & block_kind_merge; logical |= ctx->block->kind & block_kind_merge;
aco_opcode opcode = logical ? aco_opcode::p_phi : aco_opcode::p_linear_phi; aco_opcode opcode = logical ? aco_opcode::p_phi : aco_opcode::p_linear_phi;
@@ -9468,7 +9467,7 @@ static bool visit_if(isel_context *ctx, nir_if *if_stmt)
aco_ptr<Pseudo_branch_instruction> branch; aco_ptr<Pseudo_branch_instruction> branch;
if_context ic; if_context ic;
if (!ctx->divergent_vals[if_stmt->condition.ssa->index]) { /* uniform condition */ if (!nir_src_is_divergent(if_stmt->condition)) { /* uniform condition */
/** /**
* Uniform conditionals are represented in the following way*) : * Uniform conditionals are represented in the following way*) :
* *
@@ -10683,8 +10682,6 @@ void select_program(Program *program,
if (ngg_no_gs && !ngg_early_prim_export(&ctx)) if (ngg_no_gs && !ngg_early_prim_export(&ctx))
ngg_emit_nogs_output(&ctx); ngg_emit_nogs_output(&ctx);
ralloc_free(ctx.divergent_vals);
if (i == 0 && ctx.stage == vertex_tess_control_hs && ctx.tcs_in_out_eq) { if (i == 0 && ctx.stage == vertex_tess_control_hs && ctx.tcs_in_out_eq) {
/* Outputs of the previous stage are inputs to the next stage */ /* Outputs of the previous stage are inputs to the next stage */
ctx.inputs = ctx.outputs; ctx.inputs = ctx.outputs;

View File

@@ -57,7 +57,6 @@ struct isel_context {
nir_shader *shader; nir_shader *shader;
uint32_t constant_data_offset; uint32_t constant_data_offset;
Block *block; Block *block;
bool *divergent_vals;
std::unique_ptr<Temp[]> allocated; std::unique_ptr<Temp[]> allocated;
std::unordered_map<unsigned, std::array<Temp,NIR_MAX_VEC_COMPONENTS>> allocated_vec; std::unordered_map<unsigned, std::array<Temp,NIR_MAX_VEC_COMPONENTS>> allocated_vec;
Stage stage; /* Stage */ Stage stage; /* Stage */
@@ -152,7 +151,7 @@ unsigned get_interp_input(nir_intrinsic_op intrin, enum glsl_interp_mode interp)
* block instead. This is so that we can use any SGPR live-out of the side * block instead. This is so that we can use any SGPR live-out of the side
* without the branch without creating a linear phi in the invert or merge block. */ * without the branch without creating a linear phi in the invert or merge block. */
bool bool
sanitize_if(nir_function_impl *impl, bool *divergent, nir_if *nif) sanitize_if(nir_function_impl *impl, nir_if *nif)
{ {
//TODO: skip this if the condition is uniform and there are no divergent breaks/continues? //TODO: skip this if the condition is uniform and there are no divergent breaks/continues?
@@ -197,7 +196,7 @@ sanitize_if(nir_function_impl *impl, bool *divergent, nir_if *nif)
} }
bool bool
sanitize_cf_list(nir_function_impl *impl, bool *divergent, struct exec_list *cf_list) sanitize_cf_list(nir_function_impl *impl, struct exec_list *cf_list)
{ {
bool progress = false; bool progress = false;
foreach_list_typed(nir_cf_node, cf_node, node, cf_list) { foreach_list_typed(nir_cf_node, cf_node, node, cf_list) {
@@ -206,14 +205,14 @@ sanitize_cf_list(nir_function_impl *impl, bool *divergent, struct exec_list *cf_
break; break;
case nir_cf_node_if: { case nir_cf_node_if: {
nir_if *nif = nir_cf_node_as_if(cf_node); nir_if *nif = nir_cf_node_as_if(cf_node);
progress |= sanitize_cf_list(impl, divergent, &nif->then_list); progress |= sanitize_cf_list(impl, &nif->then_list);
progress |= sanitize_cf_list(impl, divergent, &nif->else_list); progress |= sanitize_cf_list(impl, &nif->else_list);
progress |= sanitize_if(impl, divergent, nif); progress |= sanitize_if(impl, nif);
break; break;
} }
case nir_cf_node_loop: { case nir_cf_node_loop: {
nir_loop *loop = nir_cf_node_as_loop(cf_node); nir_loop *loop = nir_cf_node_as_loop(cf_node);
progress |= sanitize_cf_list(impl, divergent, &loop->body); progress |= sanitize_cf_list(impl, &loop->body);
break; break;
} }
case nir_cf_node_function: case nir_cf_node_function:
@@ -238,11 +237,11 @@ void init_context(isel_context *ctx, nir_shader *shader)
unsigned lane_mask_size = ctx->program->lane_mask.size(); unsigned lane_mask_size = ctx->program->lane_mask.size();
ctx->shader = shader; ctx->shader = shader;
ctx->divergent_vals = nir_divergence_analysis(shader, nir_divergence_view_index_uniform); nir_divergence_analysis(shader, nir_divergence_view_index_uniform);
/* sanitize control flow */ /* sanitize control flow */
nir_metadata_require(impl, nir_metadata_dominance); nir_metadata_require(impl, nir_metadata_dominance);
sanitize_cf_list(impl, ctx->divergent_vals, &impl->body); sanitize_cf_list(impl, &impl->body);
nir_metadata_preserve(impl, (nir_metadata)~nir_metadata_block_index); nir_metadata_preserve(impl, (nir_metadata)~nir_metadata_block_index);
/* we'll need this for isel */ /* we'll need this for isel */
@@ -332,10 +331,10 @@ void init_context(isel_context *ctx, nir_shader *shader)
case nir_op_b2f16: case nir_op_b2f16:
case nir_op_b2f32: case nir_op_b2f32:
case nir_op_mov: case nir_op_mov:
type = ctx->divergent_vals[alu_instr->dest.dest.ssa.index] ? RegType::vgpr : RegType::sgpr; type = nir_dest_is_divergent(alu_instr->dest.dest) ? RegType::vgpr : RegType::sgpr;
break; break;
case nir_op_bcsel: case nir_op_bcsel:
type = ctx->divergent_vals[alu_instr->dest.dest.ssa.index] ? RegType::vgpr : RegType::sgpr; type = nir_dest_is_divergent(alu_instr->dest.dest) ? RegType::vgpr : RegType::sgpr;
/* fallthrough */ /* fallthrough */
default: default:
for (unsigned i = 0; i < nir_op_infos[alu_instr->op].num_inputs; i++) { for (unsigned i = 0; i < nir_op_infos[alu_instr->op].num_inputs; i++) {
@@ -465,7 +464,7 @@ void init_context(isel_context *ctx, nir_shader *shader)
case nir_intrinsic_load_global: case nir_intrinsic_load_global:
case nir_intrinsic_vulkan_resource_index: case nir_intrinsic_vulkan_resource_index:
case nir_intrinsic_load_shared: case nir_intrinsic_load_shared:
type = ctx->divergent_vals[intrinsic->dest.ssa.index] ? RegType::vgpr : RegType::sgpr; type = nir_dest_is_divergent(intrinsic->dest) ? RegType::vgpr : RegType::sgpr;
break; break;
case nir_intrinsic_load_view_index: case nir_intrinsic_load_view_index:
type = ctx->stage == fragment_fs ? RegType::vgpr : RegType::sgpr; type = ctx->stage == fragment_fs ? RegType::vgpr : RegType::sgpr;
@@ -524,9 +523,10 @@ void init_context(isel_context *ctx, nir_shader *shader)
if (tex->dest.ssa.bit_size == 64) if (tex->dest.ssa.bit_size == 64)
size *= 2; size *= 2;
if (tex->op == nir_texop_texture_samples) if (tex->op == nir_texop_texture_samples) {
assert(!ctx->divergent_vals[tex->dest.ssa.index]); assert(!tex->dest.ssa.divergent);
if (ctx->divergent_vals[tex->dest.ssa.index]) }
if (nir_dest_is_divergent(tex->dest))
allocated[tex->dest.ssa.index] = Temp(0, RegClass(RegType::vgpr, size)); allocated[tex->dest.ssa.index] = Temp(0, RegClass(RegType::vgpr, size));
else else
allocated[tex->dest.ssa.index] = Temp(0, RegClass(RegType::sgpr, size)); allocated[tex->dest.ssa.index] = Temp(0, RegClass(RegType::sgpr, size));
@@ -558,7 +558,7 @@ void init_context(isel_context *ctx, nir_shader *shader)
break; break;
} }
if (ctx->divergent_vals[phi->dest.ssa.index]) { if (nir_dest_is_divergent(phi->dest)) {
type = RegType::vgpr; type = RegType::vgpr;
} else { } else {
type = RegType::sgpr; type = RegType::sgpr;

View File

@@ -1451,6 +1451,7 @@ nir_ssa_def_init(nir_instr *instr, nir_ssa_def *def,
list_inithead(&def->if_uses); list_inithead(&def->if_uses);
def->num_components = num_components; def->num_components = num_components;
def->bit_size = bit_size; def->bit_size = bit_size;
def->divergent = true; /* This is the safer default */
if (instr->block) { if (instr->block) {
nir_function_impl *impl = nir_function_impl *impl =

View File

@@ -740,6 +740,12 @@ typedef struct nir_ssa_def {
/* The bit-size of each channel; must be one of 8, 16, 32, or 64 */ /* The bit-size of each channel; must be one of 8, 16, 32, or 64 */
uint8_t bit_size; uint8_t bit_size;
/**
* True if this SSA value may have different values in different SIMD
* invocations of the shader. This is set by nir_divergence_analysis.
*/
bool divergent;
} nir_ssa_def; } nir_ssa_def;
struct nir_src; struct nir_src;
@@ -880,6 +886,13 @@ nir_src_is_const(nir_src src)
src.ssa->parent_instr->type == nir_instr_type_load_const; src.ssa->parent_instr->type == nir_instr_type_load_const;
} }
static inline bool
nir_src_is_divergent(nir_src src)
{
assert(src.is_ssa);
return src.ssa->divergent;
}
static inline unsigned static inline unsigned
nir_dest_bit_size(nir_dest dest) nir_dest_bit_size(nir_dest dest)
{ {
@@ -892,6 +905,13 @@ nir_dest_num_components(nir_dest dest)
return dest.is_ssa ? dest.ssa.num_components : dest.reg.reg->num_components; return dest.is_ssa ? dest.ssa.num_components : dest.reg.reg->num_components;
} }
static inline bool
nir_dest_is_divergent(nir_dest dest)
{
assert(dest.is_ssa);
return dest.ssa.divergent;
}
/* Are all components the same, ie. .xxxx */ /* Are all components the same, ie. .xxxx */
static inline bool static inline bool
nir_is_same_comp_swizzle(uint8_t *swiz, unsigned nr_comp) nir_is_same_comp_swizzle(uint8_t *swiz, unsigned nr_comp)
@@ -4321,7 +4341,7 @@ bool nir_repair_ssa(nir_shader *shader);
void nir_convert_loop_to_lcssa(nir_loop *loop); void nir_convert_loop_to_lcssa(nir_loop *loop);
bool nir_convert_to_lcssa(nir_shader *shader, bool skip_invariants, bool skip_bool_invariants); bool nir_convert_to_lcssa(nir_shader *shader, bool skip_invariants, bool skip_bool_invariants);
bool* nir_divergence_analysis(nir_shader *shader, nir_divergence_options options); void nir_divergence_analysis(nir_shader *shader, nir_divergence_options options);
/* If phi_webs_only is true, only convert SSA values involved in phi nodes to /* If phi_webs_only is true, only convert SSA values involved in phi nodes to
* registers. If false, convert all values (even those not involved in a phi * registers. If false, convert all values (even those not involved in a phi

View File

@@ -37,20 +37,20 @@
*/ */
static bool static bool
visit_cf_list(bool *divergent, struct exec_list *list, visit_cf_list(struct exec_list *list,
nir_divergence_options options, gl_shader_stage stage); nir_divergence_options options, gl_shader_stage stage);
static bool static bool
visit_alu(bool *divergent, nir_alu_instr *instr) visit_alu(nir_alu_instr *instr)
{ {
if (divergent[instr->dest.dest.ssa.index]) if (instr->dest.dest.ssa.divergent)
return false; return false;
unsigned num_src = nir_op_infos[instr->op].num_inputs; unsigned num_src = nir_op_infos[instr->op].num_inputs;
for (unsigned i = 0; i < num_src; i++) { for (unsigned i = 0; i < num_src; i++) {
if (divergent[instr->src[i].src.ssa->index]) { if (instr->src[i].src.ssa->divergent) {
divergent[instr->dest.dest.ssa.index] = true; instr->dest.dest.ssa.divergent = true;
return true; return true;
} }
} }
@@ -59,13 +59,13 @@ visit_alu(bool *divergent, nir_alu_instr *instr)
} }
static bool static bool
visit_intrinsic(bool *divergent, nir_intrinsic_instr *instr, visit_intrinsic(nir_intrinsic_instr *instr,
nir_divergence_options options, gl_shader_stage stage) nir_divergence_options options, gl_shader_stage stage)
{ {
if (!nir_intrinsic_infos[instr->intrinsic].has_dest) if (!nir_intrinsic_infos[instr->intrinsic].has_dest)
return false; return false;
if (divergent[instr->dest.ssa.index]) if (instr->dest.ssa.divergent)
return false; return false;
bool is_divergent = false; bool is_divergent = false;
@@ -117,7 +117,7 @@ visit_intrinsic(bool *divergent, nir_intrinsic_instr *instr,
/* Intrinsics with divergence depending on shader stage and hardware */ /* Intrinsics with divergence depending on shader stage and hardware */
case nir_intrinsic_load_input: case nir_intrinsic_load_input:
is_divergent = divergent[instr->src[0].ssa->index]; is_divergent = instr->src[0].ssa->divergent;
if (stage == MESA_SHADER_FRAGMENT) if (stage == MESA_SHADER_FRAGMENT)
is_divergent |= !(options & nir_divergence_single_prim_per_subgroup); is_divergent |= !(options & nir_divergence_single_prim_per_subgroup);
else if (stage == MESA_SHADER_TESS_EVAL) else if (stage == MESA_SHADER_TESS_EVAL)
@@ -126,13 +126,13 @@ visit_intrinsic(bool *divergent, nir_intrinsic_instr *instr,
is_divergent = true; is_divergent = true;
break; break;
case nir_intrinsic_load_input_vertex: case nir_intrinsic_load_input_vertex:
is_divergent = divergent[instr->src[1].ssa->index]; is_divergent = instr->src[1].ssa->divergent;
assert(stage == MESA_SHADER_FRAGMENT); assert(stage == MESA_SHADER_FRAGMENT);
is_divergent |= !(options & nir_divergence_single_prim_per_subgroup); is_divergent |= !(options & nir_divergence_single_prim_per_subgroup);
break; break;
case nir_intrinsic_load_output: case nir_intrinsic_load_output:
assert(stage == MESA_SHADER_TESS_CTRL || stage == MESA_SHADER_FRAGMENT); assert(stage == MESA_SHADER_TESS_CTRL || stage == MESA_SHADER_FRAGMENT);
is_divergent = divergent[instr->src[0].ssa->index]; is_divergent = instr->src[0].ssa->divergent;
if (stage == MESA_SHADER_TESS_CTRL) if (stage == MESA_SHADER_TESS_CTRL)
is_divergent |= !(options & nir_divergence_single_patch_per_tcs_subgroup); is_divergent |= !(options & nir_divergence_single_patch_per_tcs_subgroup);
else else
@@ -152,7 +152,7 @@ visit_intrinsic(bool *divergent, nir_intrinsic_instr *instr,
break; break;
case nir_intrinsic_load_fs_input_interp_deltas: case nir_intrinsic_load_fs_input_interp_deltas:
assert(stage == MESA_SHADER_FRAGMENT); assert(stage == MESA_SHADER_FRAGMENT);
is_divergent = divergent[instr->src[0].ssa->index]; is_divergent = instr->src[0].ssa->divergent;
is_divergent |= !(options & nir_divergence_single_prim_per_subgroup); is_divergent |= !(options & nir_divergence_single_prim_per_subgroup);
break; break;
case nir_intrinsic_load_primitive_id: case nir_intrinsic_load_primitive_id:
@@ -194,7 +194,7 @@ visit_intrinsic(bool *divergent, nir_intrinsic_instr *instr,
/* fallthrough */ /* fallthrough */
case nir_intrinsic_inclusive_scan: { case nir_intrinsic_inclusive_scan: {
nir_op op = nir_intrinsic_reduction_op(instr); nir_op op = nir_intrinsic_reduction_op(instr);
is_divergent = divergent[instr->src[0].ssa->index]; is_divergent = instr->src[0].ssa->divergent;
if (op != nir_op_umin && op != nir_op_imin && op != nir_op_fmin && if (op != nir_op_umin && op != nir_op_imin && op != nir_op_fmin &&
op != nir_op_umax && op != nir_op_imax && op != nir_op_fmax && op != nir_op_umax && op != nir_op_imax && op != nir_op_fmax &&
op != nir_op_iand && op != nir_op_ior) op != nir_op_iand && op != nir_op_ior)
@@ -245,7 +245,7 @@ visit_intrinsic(bool *divergent, nir_intrinsic_instr *instr,
case nir_intrinsic_masked_swizzle_amd: { case nir_intrinsic_masked_swizzle_amd: {
unsigned num_srcs = nir_intrinsic_infos[instr->intrinsic].num_srcs; unsigned num_srcs = nir_intrinsic_infos[instr->intrinsic].num_srcs;
for (unsigned i = 0; i < num_srcs; i++) { for (unsigned i = 0; i < num_srcs; i++) {
if (divergent[instr->src[i].ssa->index]) { if (instr->src[i].ssa->divergent) {
is_divergent = true; is_divergent = true;
break; break;
} }
@@ -254,8 +254,8 @@ visit_intrinsic(bool *divergent, nir_intrinsic_instr *instr,
} }
case nir_intrinsic_shuffle: case nir_intrinsic_shuffle:
is_divergent = divergent[instr->src[0].ssa->index] && is_divergent = instr->src[0].ssa->divergent &&
divergent[instr->src[1].ssa->index]; instr->src[1].ssa->divergent;
break; break;
/* Intrinsics which are always divergent */ /* Intrinsics which are always divergent */
@@ -423,14 +423,14 @@ visit_intrinsic(bool *divergent, nir_intrinsic_instr *instr,
#endif #endif
} }
divergent[instr->dest.ssa.index] = is_divergent; instr->dest.ssa.divergent = is_divergent;
return is_divergent; return is_divergent;
} }
static bool static bool
visit_tex(bool *divergent, nir_tex_instr *instr) visit_tex(nir_tex_instr *instr)
{ {
if (divergent[instr->dest.ssa.index]) if (instr->dest.ssa.divergent)
return false; return false;
bool is_divergent = false; bool is_divergent = false;
@@ -440,27 +440,27 @@ visit_tex(bool *divergent, nir_tex_instr *instr)
case nir_tex_src_sampler_deref: case nir_tex_src_sampler_deref:
case nir_tex_src_sampler_handle: case nir_tex_src_sampler_handle:
case nir_tex_src_sampler_offset: case nir_tex_src_sampler_offset:
is_divergent |= divergent[instr->src[i].src.ssa->index] && is_divergent |= instr->src[i].src.ssa->divergent &&
instr->sampler_non_uniform; instr->sampler_non_uniform;
break; break;
case nir_tex_src_texture_deref: case nir_tex_src_texture_deref:
case nir_tex_src_texture_handle: case nir_tex_src_texture_handle:
case nir_tex_src_texture_offset: case nir_tex_src_texture_offset:
is_divergent |= divergent[instr->src[i].src.ssa->index] && is_divergent |= instr->src[i].src.ssa->divergent &&
instr->texture_non_uniform; instr->texture_non_uniform;
break; break;
default: default:
is_divergent |= divergent[instr->src[i].src.ssa->index]; is_divergent |= instr->src[i].src.ssa->divergent;
break; break;
} }
} }
divergent[instr->dest.ssa.index] = is_divergent; instr->dest.ssa.divergent = is_divergent;
return is_divergent; return is_divergent;
} }
static bool static bool
visit_phi(bool *divergent, nir_phi_instr *instr) visit_phi(nir_phi_instr *instr)
{ {
/* There are 3 types of phi instructions: /* There are 3 types of phi instructions:
* (1) gamma: represent the joining point of different paths * (1) gamma: represent the joining point of different paths
@@ -481,13 +481,13 @@ visit_phi(bool *divergent, nir_phi_instr *instr)
* (note: there should be no phi for loop-invariant variables.) * (note: there should be no phi for loop-invariant variables.)
*/ */
if (divergent[instr->dest.ssa.index]) if (instr->dest.ssa.divergent)
return false; return false;
nir_foreach_phi_src(src, instr) { nir_foreach_phi_src(src, instr) {
/* if any source value is divergent, the resulting value is divergent */ /* if any source value is divergent, the resulting value is divergent */
if (divergent[src->src.ssa->index]) { if (src->src.ssa->divergent) {
divergent[instr->dest.ssa.index] = true; instr->dest.ssa.divergent = true;
return true; return true;
} }
} }
@@ -537,8 +537,8 @@ visit_phi(bool *divergent, nir_phi_instr *instr)
while (current->type != nir_cf_node_loop) { while (current->type != nir_cf_node_loop) {
assert (current->type == nir_cf_node_if); assert (current->type == nir_cf_node_if);
nir_if *if_node = nir_cf_node_as_if(current); nir_if *if_node = nir_cf_node_as_if(current);
if (divergent[if_node->condition.ssa->index]) { if (if_node->condition.ssa->divergent) {
divergent[instr->dest.ssa.index] = true; instr->dest.ssa.divergent = true;
return true; return true;
} }
current = current->parent; current = current->parent;
@@ -558,8 +558,8 @@ visit_phi(bool *divergent, nir_phi_instr *instr)
/* gamma: check if the condition is divergent */ /* gamma: check if the condition is divergent */
nir_if *if_node = nir_cf_node_as_if(prev); nir_if *if_node = nir_cf_node_as_if(prev);
if (divergent[if_node->condition.ssa->index]) { if (if_node->condition.ssa->divergent) {
divergent[instr->dest.ssa.index] = true; instr->dest.ssa.divergent = true;
return true; return true;
} }
@@ -578,8 +578,8 @@ visit_phi(bool *divergent, nir_phi_instr *instr)
while (current->type != nir_cf_node_loop) { while (current->type != nir_cf_node_loop) {
assert(current->type == nir_cf_node_if); assert(current->type == nir_cf_node_if);
nir_if *if_node = nir_cf_node_as_if(current); nir_if *if_node = nir_cf_node_as_if(current);
if (divergent[if_node->condition.ssa->index]) { if (if_node->condition.ssa->divergent) {
divergent[instr->dest.ssa.index] = true; instr->dest.ssa.divergent = true;
return true; return true;
} }
current = current->parent; current = current->parent;
@@ -607,12 +607,12 @@ visit_phi(bool *divergent, nir_phi_instr *instr)
} }
assert(current->type == nir_cf_node_if); assert(current->type == nir_cf_node_if);
nir_if *if_node = nir_cf_node_as_if(current); nir_if *if_node = nir_cf_node_as_if(current);
is_divergent |= divergent[if_node->condition.ssa->index]; is_divergent |= if_node->condition.ssa->divergent;
current = current->parent; current = current->parent;
} }
if (is_divergent) { if (is_divergent) {
divergent[instr->dest.ssa.index] = true; instr->dest.ssa.divergent = true;
return true; return true;
} }
} }
@@ -623,13 +623,13 @@ visit_phi(bool *divergent, nir_phi_instr *instr)
} }
static bool static bool
visit_load_const(bool *divergent, nir_load_const_instr *instr) visit_load_const(nir_load_const_instr *instr)
{ {
return false; return false;
} }
static bool static bool
visit_ssa_undef(bool *divergent, nir_ssa_undef_instr *instr) visit_ssa_undef(nir_ssa_undef_instr *instr)
{ {
return false; return false;
} }
@@ -675,10 +675,10 @@ nir_variable_is_uniform(nir_variable *var, nir_divergence_options options,
} }
static bool static bool
visit_deref(bool *divergent, nir_deref_instr *deref, visit_deref(nir_deref_instr *deref,
nir_divergence_options options, gl_shader_stage stage) nir_divergence_options options, gl_shader_stage stage)
{ {
if (divergent[deref->dest.ssa.index]) if (deref->dest.ssa.divergent)
return false; return false;
bool is_divergent = false; bool is_divergent = false;
@@ -688,24 +688,24 @@ visit_deref(bool *divergent, nir_deref_instr *deref,
break; break;
case nir_deref_type_array: case nir_deref_type_array:
case nir_deref_type_ptr_as_array: case nir_deref_type_ptr_as_array:
is_divergent = divergent[deref->arr.index.ssa->index]; is_divergent = deref->arr.index.ssa->divergent;
/* fallthrough */ /* fallthrough */
case nir_deref_type_struct: case nir_deref_type_struct:
case nir_deref_type_array_wildcard: case nir_deref_type_array_wildcard:
is_divergent |= divergent[deref->parent.ssa->index]; is_divergent |= deref->parent.ssa->divergent;
break; break;
case nir_deref_type_cast: case nir_deref_type_cast:
is_divergent = !nir_variable_mode_is_uniform(deref->var->data.mode) || is_divergent = !nir_variable_mode_is_uniform(deref->var->data.mode) ||
divergent[deref->parent.ssa->index]; deref->parent.ssa->divergent;
break; break;
} }
divergent[deref->dest.ssa.index] = is_divergent; deref->dest.ssa.divergent = is_divergent;
return is_divergent; return is_divergent;
} }
static bool static bool
visit_block(bool *divergent, nir_block *block, nir_divergence_options options, visit_block(nir_block *block, nir_divergence_options options,
gl_shader_stage stage) gl_shader_stage stage)
{ {
bool has_changed = false; bool has_changed = false;
@@ -713,26 +713,26 @@ visit_block(bool *divergent, nir_block *block, nir_divergence_options options,
nir_foreach_instr(instr, block) { nir_foreach_instr(instr, block) {
switch (instr->type) { switch (instr->type) {
case nir_instr_type_alu: case nir_instr_type_alu:
has_changed |= visit_alu(divergent, nir_instr_as_alu(instr)); has_changed |= visit_alu(nir_instr_as_alu(instr));
break; break;
case nir_instr_type_intrinsic: case nir_instr_type_intrinsic:
has_changed |= visit_intrinsic(divergent, nir_instr_as_intrinsic(instr), has_changed |= visit_intrinsic(nir_instr_as_intrinsic(instr),
options, stage); options, stage);
break; break;
case nir_instr_type_tex: case nir_instr_type_tex:
has_changed |= visit_tex(divergent, nir_instr_as_tex(instr)); has_changed |= visit_tex(nir_instr_as_tex(instr));
break; break;
case nir_instr_type_phi: case nir_instr_type_phi:
has_changed |= visit_phi(divergent, nir_instr_as_phi(instr)); has_changed |= visit_phi(nir_instr_as_phi(instr));
break; break;
case nir_instr_type_load_const: case nir_instr_type_load_const:
has_changed |= visit_load_const(divergent, nir_instr_as_load_const(instr)); has_changed |= visit_load_const(nir_instr_as_load_const(instr));
break; break;
case nir_instr_type_ssa_undef: case nir_instr_type_ssa_undef:
has_changed |= visit_ssa_undef(divergent, nir_instr_as_ssa_undef(instr)); has_changed |= visit_ssa_undef(nir_instr_as_ssa_undef(instr));
break; break;
case nir_instr_type_deref: case nir_instr_type_deref:
has_changed |= visit_deref(divergent, nir_instr_as_deref(instr), has_changed |= visit_deref(nir_instr_as_deref(instr),
options, stage); options, stage);
break; break;
case nir_instr_type_jump: case nir_instr_type_jump:
@@ -747,21 +747,21 @@ visit_block(bool *divergent, nir_block *block, nir_divergence_options options,
} }
static bool static bool
visit_if(bool *divergent, nir_if *if_stmt, nir_divergence_options options, gl_shader_stage stage) visit_if(nir_if *if_stmt, nir_divergence_options options, gl_shader_stage stage)
{ {
return visit_cf_list(divergent, &if_stmt->then_list, options, stage) | return visit_cf_list(&if_stmt->then_list, options, stage) |
visit_cf_list(divergent, &if_stmt->else_list, options, stage); visit_cf_list(&if_stmt->else_list, options, stage);
} }
static bool static bool
visit_loop(bool *divergent, nir_loop *loop, nir_divergence_options options, gl_shader_stage stage) visit_loop(nir_loop *loop, nir_divergence_options options, gl_shader_stage stage)
{ {
bool has_changed = false; bool has_changed = false;
bool repeat = true; bool repeat = true;
/* TODO: restructure this and the phi handling more efficiently */ /* TODO: restructure this and the phi handling more efficiently */
while (repeat) { while (repeat) {
repeat = visit_cf_list(divergent, &loop->body, options, stage); repeat = visit_cf_list(&loop->body, options, stage);
has_changed |= repeat; has_changed |= repeat;
} }
@@ -769,7 +769,7 @@ visit_loop(bool *divergent, nir_loop *loop, nir_divergence_options options, gl_s
} }
static bool static bool
visit_cf_list(bool *divergent, struct exec_list *list, visit_cf_list(struct exec_list *list,
nir_divergence_options options, gl_shader_stage stage) nir_divergence_options options, gl_shader_stage stage)
{ {
bool has_changed = false; bool has_changed = false;
@@ -777,15 +777,15 @@ visit_cf_list(bool *divergent, struct exec_list *list,
foreach_list_typed(nir_cf_node, node, node, list) { foreach_list_typed(nir_cf_node, node, node, list) {
switch (node->type) { switch (node->type) {
case nir_cf_node_block: case nir_cf_node_block:
has_changed |= visit_block(divergent, nir_cf_node_as_block(node), has_changed |= visit_block(nir_cf_node_as_block(node),
options, stage); options, stage);
break; break;
case nir_cf_node_if: case nir_cf_node_if:
has_changed |= visit_if(divergent, nir_cf_node_as_if(node), has_changed |= visit_if(nir_cf_node_as_if(node),
options, stage); options, stage);
break; break;
case nir_cf_node_loop: case nir_cf_node_loop:
has_changed |= visit_loop(divergent, nir_cf_node_as_loop(node), has_changed |= visit_loop(nir_cf_node_as_loop(node),
options, stage); options, stage);
break; break;
case nir_cf_node_function: case nir_cf_node_function:
@@ -796,14 +796,23 @@ visit_cf_list(bool *divergent, struct exec_list *list,
return has_changed; return has_changed;
} }
static bool
set_ssa_def_not_divergent(nir_ssa_def *def, UNUSED void *_state)
{
def->divergent = false;
return true;
}
bool* void
nir_divergence_analysis(nir_shader *shader, nir_divergence_options options) nir_divergence_analysis(nir_shader *shader, nir_divergence_options options)
{ {
nir_function_impl *impl = nir_shader_get_entrypoint(shader); nir_function_impl *impl = nir_shader_get_entrypoint(shader);
bool *t = rzalloc_array(shader, bool, impl->ssa_alloc);
visit_cf_list(t, &impl->body, options, shader->info.stage); /* Set all SSA defs to non-divergent to start off */
nir_foreach_block(block, impl) {
return t; nir_foreach_instr(instr, block)
nir_foreach_ssa_def(instr, set_ssa_def_not_divergent, NULL);
}
visit_cf_list(&impl->body, options, shader->info.stage);
} }