diff --git a/src/amd/compiler/aco_instruction_selection_setup.cpp b/src/amd/compiler/aco_instruction_selection_setup.cpp index e92e6b3cc9b..a90f7c4e060 100644 --- a/src/amd/compiler/aco_instruction_selection_setup.cpp +++ b/src/amd/compiler/aco_instruction_selection_setup.cpp @@ -487,7 +487,7 @@ setup_nir(isel_context *ctx, nir_shader *nir) setup_variables(ctx, nir); nir_convert_to_lcssa(nir, true, false); - nir_lower_phis_to_scalar(nir); + nir_lower_phis_to_scalar(nir, false); nir_function_impl *func = nir_shader_get_entrypoint(nir); nir_index_ssa_defs(func); diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index 385cc6d4486..9e2cebc19ff 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -146,7 +146,7 @@ radv_optimize_nir(const struct radv_device *device, struct nir_shader *shader, nir_var_function_temp | nir_var_shader_in | nir_var_shader_out, NULL); NIR_PASS_V(shader, nir_lower_alu_to_scalar, NULL, NULL); - NIR_PASS_V(shader, nir_lower_phis_to_scalar); + NIR_PASS_V(shader, nir_lower_phis_to_scalar, false); NIR_PASS(progress, shader, nir_copy_prop); NIR_PASS(progress, shader, nir_opt_remove_phis); diff --git a/src/broadcom/compiler/nir_to_vir.c b/src/broadcom/compiler/nir_to_vir.c index 95fe17b9292..155673dcdef 100644 --- a/src/broadcom/compiler/nir_to_vir.c +++ b/src/broadcom/compiler/nir_to_vir.c @@ -1794,7 +1794,7 @@ v3d_optimize_nir(struct v3d_compile *c, struct nir_shader *s) NIR_PASS_V(s, nir_lower_vars_to_ssa); NIR_PASS(progress, s, nir_lower_alu_to_scalar, NULL, NULL); - NIR_PASS(progress, s, nir_lower_phis_to_scalar); + NIR_PASS(progress, s, nir_lower_phis_to_scalar, false); NIR_PASS(progress, s, nir_copy_prop); NIR_PASS(progress, s, nir_opt_remove_phis); NIR_PASS(progress, s, nir_opt_dce); diff --git a/src/broadcom/vulkan/v3dv_pipeline.c b/src/broadcom/vulkan/v3dv_pipeline.c index b23e49d7f41..a11e134a752 100644 --- a/src/broadcom/vulkan/v3dv_pipeline.c +++ b/src/broadcom/vulkan/v3dv_pipeline.c @@ -279,7 +279,7 @@ nir_optimize(nir_shader *nir, OPT(nir_lower_alu_to_scalar, NULL, NULL); OPT(nir_copy_prop); - OPT(nir_lower_phis_to_scalar); + OPT(nir_lower_phis_to_scalar, false); OPT(nir_copy_prop); OPT(nir_opt_dce); @@ -1578,7 +1578,7 @@ st_nir_opts(nir_shader *nir) if (nir->options->lower_to_scalar) { NIR_PASS_V(nir, nir_lower_alu_to_scalar, NULL, NULL); - NIR_PASS_V(nir, nir_lower_phis_to_scalar); + NIR_PASS_V(nir, nir_lower_phis_to_scalar, false); } NIR_PASS_V(nir, nir_lower_alu); diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 5eda51c30f6..8ab2d598701 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -4626,7 +4626,7 @@ bool nir_lower_alu_conversion_to_intrinsic(nir_shader *shader); bool nir_lower_int_to_float(nir_shader *shader); bool nir_lower_load_const_to_scalar(nir_shader *shader); bool nir_lower_read_invocation_to_scalar(nir_shader *shader); -bool nir_lower_phis_to_scalar(nir_shader *shader); +bool nir_lower_phis_to_scalar(nir_shader *shader, bool lower_all); void nir_lower_io_arrays_to_elements(nir_shader *producer, nir_shader *consumer); void nir_lower_io_arrays_to_elements_no_indirects(nir_shader *shader, bool outputs_only); diff --git a/src/compiler/nir/nir_lower_phis_to_scalar.c b/src/compiler/nir/nir_lower_phis_to_scalar.c index c964ad04467..694fdb34fc4 100644 --- a/src/compiler/nir/nir_lower_phis_to_scalar.c +++ b/src/compiler/nir/nir_lower_phis_to_scalar.c @@ -36,6 +36,8 @@ struct lower_phis_to_scalar_state { void *mem_ctx; void *dead_ctx; + bool lower_all; + /* Hash table marking which phi nodes are scalarizable. The key is * pointers to phi instructions and the entry is either NULL for not * scalarizable or non-null for scalarizable. @@ -121,7 +123,7 @@ is_phi_src_scalarizable(nir_phi_src *src, /** * Determines if the given phi node should be lowered. The only phi nodes * we will scalarize at the moment are those where all of the sources are - * scalarizable. + * scalarizable, unless lower_all is set. * * The reason for this comes down to coalescing. Since phi sources can't * swizzle, swizzles on phis have to be resolved by inserting a mov right @@ -146,6 +148,9 @@ should_lower_phi(nir_phi_instr *phi, struct lower_phis_to_scalar_state *state) if (phi->dest.ssa.num_components == 1) return false; + if (state->lower_all) + return true; + struct hash_entry *entry = _mesa_hash_table_search(state->phi_table, phi); if (entry) return entry->data != NULL; @@ -277,7 +282,7 @@ lower_phis_to_scalar_block(nir_block *block, } static bool -lower_phis_to_scalar_impl(nir_function_impl *impl) +lower_phis_to_scalar_impl(nir_function_impl *impl, bool lower_all) { struct lower_phis_to_scalar_state state; bool progress = false; @@ -285,6 +290,7 @@ lower_phis_to_scalar_impl(nir_function_impl *impl) state.mem_ctx = ralloc_parent(impl); state.dead_ctx = ralloc_context(NULL); state.phi_table = _mesa_pointer_hash_table_create(state.dead_ctx); + state.lower_all = lower_all; nir_foreach_block(block, impl) { progress = lower_phis_to_scalar_block(block, &state) || progress; @@ -305,13 +311,13 @@ lower_phis_to_scalar_impl(nir_function_impl *impl) * don't bother lowering because that would generate hard-to-coalesce movs. */ bool -nir_lower_phis_to_scalar(nir_shader *shader) +nir_lower_phis_to_scalar(nir_shader *shader, bool lower_all) { bool progress = false; nir_foreach_function(function, shader) { if (function->impl) - progress = lower_phis_to_scalar_impl(function->impl) || progress; + progress = lower_phis_to_scalar_impl(function->impl, lower_all) || progress; } return progress; diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c index 48134258226..874d9302571 100644 --- a/src/freedreno/ir3/ir3_nir.c +++ b/src/freedreno/ir3/ir3_nir.c @@ -205,7 +205,7 @@ ir3_optimize_loop(struct ir3_compiler *compiler, nir_shader *s) progress |= OPT(s, nir_opt_copy_prop_vars); progress |= OPT(s, nir_opt_dead_write_vars); progress |= OPT(s, nir_lower_alu_to_scalar, NULL, NULL); - progress |= OPT(s, nir_lower_phis_to_scalar); + progress |= OPT(s, nir_lower_phis_to_scalar, false); progress |= OPT(s, nir_copy_prop); progress |= OPT(s, nir_opt_dce); diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c b/src/gallium/auxiliary/nir/tgsi_to_nir.c index c915eebb3e5..18100e73f6e 100644 --- a/src/gallium/auxiliary/nir/tgsi_to_nir.c +++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c @@ -2436,7 +2436,7 @@ ttn_optimize_nir(nir_shader *nir) if (nir->options->lower_to_scalar) { NIR_PASS_V(nir, nir_lower_alu_to_scalar, NULL, NULL); - NIR_PASS_V(nir, nir_lower_phis_to_scalar); + NIR_PASS_V(nir, nir_lower_phis_to_scalar, false); } NIR_PASS_V(nir, nir_lower_alu); diff --git a/src/gallium/drivers/lima/lima_program.c b/src/gallium/drivers/lima/lima_program.c index 19ad375c7ec..40af5c029be 100644 --- a/src/gallium/drivers/lima/lima_program.c +++ b/src/gallium/drivers/lima/lima_program.c @@ -116,7 +116,7 @@ lima_program_optimize_vs_nir(struct nir_shader *s) NIR_PASS_V(s, nir_lower_vars_to_ssa); NIR_PASS(progress, s, nir_lower_alu_to_scalar, NULL, NULL); - NIR_PASS(progress, s, nir_lower_phis_to_scalar); + NIR_PASS(progress, s, nir_lower_phis_to_scalar, false); NIR_PASS(progress, s, nir_copy_prop); NIR_PASS(progress, s, nir_opt_remove_phis); NIR_PASS(progress, s, nir_opt_dce); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp index 54d3cd794a8..92df3a3f13a 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp @@ -3134,7 +3134,7 @@ Converter::run() NIR_PASS_V(nir, nir_lower_load_const_to_scalar); NIR_PASS_V(nir, nir_lower_alu_to_scalar, NULL, NULL); - NIR_PASS_V(nir, nir_lower_phis_to_scalar); + NIR_PASS_V(nir, nir_lower_phis_to_scalar, false); /*TODO: improve this lowering/optimisation loop so that we can use * nir_opt_idiv_const effectively before this. diff --git a/src/gallium/drivers/r600/sfn/sfn_nir.cpp b/src/gallium/drivers/r600/sfn/sfn_nir.cpp index 6314db50e1f..f1c663819c9 100644 --- a/src/gallium/drivers/r600/sfn/sfn_nir.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_nir.cpp @@ -870,7 +870,7 @@ int r600_shader_from_nir(struct r600_context *rctx, }; NIR_PASS_V(sel->nir, nir_lower_idiv, &idiv_options); NIR_PASS_V(sel->nir, r600_lower_alu); - NIR_PASS_V(sel->nir, nir_lower_phis_to_scalar); + NIR_PASS_V(sel->nir, nir_lower_phis_to_scalar, false); if (lower_64bit) NIR_PASS_V(sel->nir, nir_lower_int64); @@ -932,11 +932,11 @@ int r600_shader_from_nir(struct r600_context *rctx, NIR_PASS_V(sel->nir, nir_io_add_const_offset_to_base, io_modes); NIR_PASS_V(sel->nir, nir_lower_alu_to_scalar, r600_lower_to_scalar_instr_filter, NULL); - NIR_PASS_V(sel->nir, nir_lower_phis_to_scalar); + NIR_PASS_V(sel->nir, nir_lower_phis_to_scalar, false); if (lower_64bit) NIR_PASS_V(sel->nir, r600::r600_nir_split_64bit_io); NIR_PASS_V(sel->nir, nir_lower_alu_to_scalar, r600_lower_to_scalar_instr_filter, NULL); - NIR_PASS_V(sel->nir, nir_lower_phis_to_scalar); + NIR_PASS_V(sel->nir, nir_lower_phis_to_scalar, false); NIR_PASS_V(sel->nir, nir_lower_alu_to_scalar, r600_lower_to_scalar_instr_filter, NULL); NIR_PASS_V(sel->nir, nir_copy_prop); NIR_PASS_V(sel->nir, nir_opt_dce); diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c b/src/gallium/drivers/radeonsi/si_shader_nir.c index 06df038598a..8296c15a124 100644 --- a/src/gallium/drivers/radeonsi/si_shader_nir.c +++ b/src/gallium/drivers/radeonsi/si_shader_nir.c @@ -534,7 +534,7 @@ void si_nir_opts(struct si_screen *sscreen, struct nir_shader *nir, bool first) NIR_PASS_V(nir, nir_lower_vars_to_ssa); NIR_PASS_V(nir, nir_lower_alu_to_scalar, si_alu_to_scalar_filter, sscreen); - NIR_PASS_V(nir, nir_lower_phis_to_scalar); + NIR_PASS_V(nir, nir_lower_phis_to_scalar, false); do { progress = false; @@ -560,7 +560,7 @@ void si_nir_opts(struct si_screen *sscreen, struct nir_shader *nir, bool first) if (lower_alu_to_scalar) NIR_PASS_V(nir, nir_lower_alu_to_scalar, si_alu_to_scalar_filter, sscreen); if (lower_phis_to_scalar) - NIR_PASS_V(nir, nir_lower_phis_to_scalar); + NIR_PASS_V(nir, nir_lower_phis_to_scalar, false); progress |= lower_alu_to_scalar | lower_phis_to_scalar; NIR_PASS(progress, nir, nir_opt_cse); diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index b5ceccc4daa..3ba8e6e5a94 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -1521,7 +1521,7 @@ vc4_optimize_nir(struct nir_shader *s) NIR_PASS_V(s, nir_lower_vars_to_ssa); NIR_PASS(progress, s, nir_lower_alu_to_scalar, NULL, NULL); - NIR_PASS(progress, s, nir_lower_phis_to_scalar); + NIR_PASS(progress, s, nir_lower_phis_to_scalar, false); NIR_PASS(progress, s, nir_copy_prop); NIR_PASS(progress, s, nir_opt_remove_phis); NIR_PASS(progress, s, nir_opt_dce); diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c index 8f115b020ec..ee5f3cf40bd 100644 --- a/src/intel/compiler/brw_nir.c +++ b/src/intel/compiler/brw_nir.c @@ -600,7 +600,7 @@ brw_nir_optimize(nir_shader *nir, const struct brw_compiler *compiler, OPT(nir_copy_prop); if (is_scalar) { - OPT(nir_lower_phis_to_scalar); + OPT(nir_lower_phis_to_scalar, false); } OPT(nir_copy_prop); diff --git a/src/mesa/state_tracker/st_glsl_to_nir.cpp b/src/mesa/state_tracker/st_glsl_to_nir.cpp index 94ff2389243..bf77064efb4 100644 --- a/src/mesa/state_tracker/st_glsl_to_nir.cpp +++ b/src/mesa/state_tracker/st_glsl_to_nir.cpp @@ -274,7 +274,7 @@ st_nir_opts(nir_shader *nir) if (nir->options->lower_to_scalar) { NIR_PASS_V(nir, nir_lower_alu_to_scalar, nir->options->lower_to_scalar_filter, NULL); - NIR_PASS_V(nir, nir_lower_phis_to_scalar); + NIR_PASS_V(nir, nir_lower_phis_to_scalar, false); } NIR_PASS_V(nir, nir_lower_alu); @@ -540,7 +540,7 @@ st_glsl_to_nir_post_opts(struct st_context *st, struct gl_program *prog, * vectorize them afterwards again */ if (!nir->options->lower_to_scalar) { NIR_PASS_V(nir, nir_lower_alu_to_scalar, filter_64_bit_instr, nullptr); - NIR_PASS_V(nir, nir_lower_phis_to_scalar); + NIR_PASS_V(nir, nir_lower_phis_to_scalar, false); } if (nir->options->lower_doubles_options) {