nir/lower_locals_to_regs: Add bool bitsize knob
GLSL booleans (and hence bool derefs) may be translated either as 1-bit or 32-bit NIR registers, depending whether the backend uses nir_lower_bool_to_int32 or not. Add a knob for this and choose the right type for different backends. Fixes nir_validate failure on dEQP-VK.subgroups.ballot_broadcast.graphics.subgroupbroadcast_bvec3 run under lavapipe. That test indexes into a bvec3 array, and gallivm first lowers bools and then lowers derefs to registers, resulting in random 1-bit booleans mixed in with 32-bit bools. Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Reviewed-by: Ian Romanick <ian.d.romanick@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23804>
This commit is contained in:
@@ -4855,7 +4855,7 @@ bool nir_lower_indirect_derefs(nir_shader *shader, nir_variable_mode modes,
|
||||
bool nir_lower_indirect_var_derefs(nir_shader *shader,
|
||||
const struct set *vars);
|
||||
|
||||
bool nir_lower_locals_to_regs(nir_shader *shader);
|
||||
bool nir_lower_locals_to_regs(nir_shader *shader, uint8_t bool_bitsize);
|
||||
|
||||
void nir_lower_io_to_temporaries(nir_shader *shader,
|
||||
nir_function_impl *entrypoint,
|
||||
|
@@ -30,6 +30,9 @@ struct locals_to_regs_state {
|
||||
/* A hash table mapping derefs to registers */
|
||||
struct hash_table *regs_table;
|
||||
|
||||
/* Bit size to use for boolean registers */
|
||||
uint8_t bool_bitsize;
|
||||
|
||||
bool progress;
|
||||
};
|
||||
|
||||
@@ -118,6 +121,9 @@ get_reg_for_deref(nir_deref_instr *deref, struct locals_to_regs_state *state)
|
||||
reg->num_array_elems = array_size > 1 ? array_size : 0;
|
||||
reg->bit_size = glsl_get_bit_size(deref->type);
|
||||
|
||||
if (reg->bit_size == 1)
|
||||
reg->bit_size = state->bool_bitsize;
|
||||
|
||||
_mesa_hash_table_insert_pre_hashed(state->regs_table, hash, deref, reg);
|
||||
|
||||
return reg;
|
||||
@@ -288,13 +294,14 @@ lower_locals_to_regs_block(nir_block *block,
|
||||
}
|
||||
|
||||
static bool
|
||||
nir_lower_locals_to_regs_impl(nir_function_impl *impl)
|
||||
nir_lower_locals_to_regs_impl(nir_function_impl *impl, uint8_t bool_bitsize)
|
||||
{
|
||||
struct locals_to_regs_state state;
|
||||
|
||||
nir_builder_init(&state.builder, impl);
|
||||
state.progress = false;
|
||||
state.regs_table = _mesa_hash_table_create(NULL, hash_deref, derefs_equal);
|
||||
state.bool_bitsize = bool_bitsize;
|
||||
|
||||
nir_metadata_require(impl, nir_metadata_dominance);
|
||||
|
||||
@@ -311,13 +318,16 @@ nir_lower_locals_to_regs_impl(nir_function_impl *impl)
|
||||
}
|
||||
|
||||
bool
|
||||
nir_lower_locals_to_regs(nir_shader *shader)
|
||||
nir_lower_locals_to_regs(nir_shader *shader, uint8_t bool_bitsize)
|
||||
{
|
||||
bool progress = false;
|
||||
|
||||
nir_foreach_function(function, shader) {
|
||||
if (function->impl)
|
||||
progress = nir_lower_locals_to_regs_impl(function->impl) || progress;
|
||||
if (function->impl) {
|
||||
progress =
|
||||
nir_lower_locals_to_regs_impl(function->impl, bool_bitsize) ||
|
||||
progress;
|
||||
}
|
||||
}
|
||||
|
||||
return progress;
|
||||
|
@@ -87,7 +87,7 @@ ir3_context_init(struct ir3_compiler *compiler, struct ir3_shader *shader,
|
||||
*/
|
||||
bool progress = false;
|
||||
bool needs_late_alg = false;
|
||||
NIR_PASS(progress, ctx->s, nir_lower_locals_to_regs);
|
||||
NIR_PASS(progress, ctx->s, nir_lower_locals_to_regs, 1);
|
||||
|
||||
/* we could need cleanup after lower_locals_to_regs */
|
||||
while (progress) {
|
||||
|
@@ -2795,7 +2795,7 @@ bool lp_build_nir_llvm(struct lp_build_nir_context *bld_base,
|
||||
struct nir_function *func;
|
||||
|
||||
nir_convert_from_ssa(nir, true);
|
||||
nir_lower_locals_to_regs(nir);
|
||||
nir_lower_locals_to_regs(nir, 32);
|
||||
nir_remove_dead_derefs(nir);
|
||||
nir_remove_dead_variables(nir, nir_var_function_temp, NULL);
|
||||
|
||||
|
@@ -3871,7 +3871,7 @@ const void *nir_to_tgsi_options(struct nir_shader *s,
|
||||
NIR_PASS_V(s, nir_lower_vec_to_movs, ntt_vec_to_mov_writemask_cb, NULL);
|
||||
|
||||
/* locals_to_regs will leave dead derefs that are good to clean up. */
|
||||
NIR_PASS_V(s, nir_lower_locals_to_regs);
|
||||
NIR_PASS_V(s, nir_lower_locals_to_regs, 32);
|
||||
NIR_PASS_V(s, nir_opt_dce);
|
||||
|
||||
if (NIR_DEBUG(TGSI)) {
|
||||
|
@@ -939,7 +939,7 @@ r600_shader_from_nir(struct r600_context *rctx,
|
||||
|
||||
NIR_PASS_V(sh, nir_lower_bool_to_int32);
|
||||
|
||||
NIR_PASS_V(sh, nir_lower_locals_to_regs);
|
||||
NIR_PASS_V(sh, nir_lower_locals_to_regs, 32);
|
||||
NIR_PASS_V(sh, nir_convert_from_ssa, true);
|
||||
NIR_PASS_V(sh, nir_opt_dce);
|
||||
|
||||
|
@@ -1701,7 +1701,7 @@ brw_postprocess_nir(nir_shader *nir, const struct brw_compiler *compiler,
|
||||
OPT(nir_copy_prop);
|
||||
OPT(nir_opt_dce);
|
||||
|
||||
OPT(nir_lower_locals_to_regs);
|
||||
OPT(nir_lower_locals_to_regs, 32);
|
||||
|
||||
if (unlikely(debug_enabled)) {
|
||||
/* Re-index SSA defs so we print more sensible numbers. */
|
||||
|
Reference in New Issue
Block a user