diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index c9f402870d8..82dade3dee4 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -181,7 +181,7 @@ radv_optimize_nir(const struct radv_device *device, struct nir_shader *shader, NIR_PASS(progress, shader, nir_opt_shrink_vectors, !device->instance->disable_shrink_image_store); if (shader->options->max_unroll_iterations) { - NIR_PASS(progress, shader, nir_opt_loop_unroll, 0); + NIR_PASS(progress, shader, nir_opt_loop_unroll); } } while (progress && !optimize_conservatively); diff --git a/src/asahi/compiler/agx_compile.c b/src/asahi/compiler/agx_compile.c index e0458ba4c1c..1066ed4e490 100644 --- a/src/asahi/compiler/agx_compile.c +++ b/src/asahi/compiler/agx_compile.c @@ -1235,10 +1235,7 @@ agx_optimize_nir(nir_shader *nir) NIR_PASS(progress, nir, nir_opt_undef); NIR_PASS(progress, nir, nir_lower_undef_to_zero); - NIR_PASS(progress, nir, nir_opt_loop_unroll, - nir_var_shader_in | - nir_var_shader_out | - nir_var_function_temp); + NIR_PASS(progress, nir, nir_opt_loop_unroll); } while (progress); NIR_PASS_V(nir, nir_opt_algebraic_late); diff --git a/src/asahi/compiler/agx_compile.h b/src/asahi/compiler/agx_compile.h index 6e8b14ad795..b874ee1b6a1 100644 --- a/src/asahi/compiler/agx_compile.h +++ b/src/asahi/compiler/agx_compile.h @@ -213,6 +213,8 @@ static const nir_shader_compiler_options agx_nir_options = { .lower_doubles_options = nir_lower_dmod, .lower_int64_options = ~(nir_lower_iadd64 | nir_lower_imul_2x32_64), + .force_indirect_unrolling = (nir_var_shader_in | nir_var_shader_out | nir_var_function_temp), + .has_fsub = true, .has_isub = true, .has_cs_global_id = true, diff --git a/src/broadcom/compiler/nir_to_vir.c b/src/broadcom/compiler/nir_to_vir.c index 190d7c3a2e9..b0cc4977c0b 100644 --- a/src/broadcom/compiler/nir_to_vir.c +++ b/src/broadcom/compiler/nir_to_vir.c @@ -1877,9 +1877,7 @@ v3d_optimize_nir(struct v3d_compile *c, struct nir_shader *s) if (c && !c->disable_loop_unrolling && s->options->max_unroll_iterations > 0) { bool local_progress = false; - NIR_PASS(local_progress, s, nir_opt_loop_unroll, - nir_var_shader_in | - nir_var_function_temp); + NIR_PASS(local_progress, s, nir_opt_loop_unroll); c->unrolled_any_loops |= local_progress; progress |= local_progress; } diff --git a/src/broadcom/vulkan/v3dv_pipeline.c b/src/broadcom/vulkan/v3dv_pipeline.c index 4447e4d0d84..4e692bbf389 100644 --- a/src/broadcom/vulkan/v3dv_pipeline.c +++ b/src/broadcom/vulkan/v3dv_pipeline.c @@ -248,6 +248,7 @@ const nir_shader_compiler_options v3dv_nir_options = { * needs to be supported */ .lower_interpolate_at = true, .max_unroll_iterations = 16, + .force_indirect_unrolling = (nir_var_shader_in | nir_var_function_temp), .divergence_analysis_options = nir_divergence_multiple_workgroup_per_compute_subgroup }; diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 63bdc6fcf38..76ca1f94a9d 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -3690,6 +3690,12 @@ typedef struct nir_shader_compiler_options { * vectorized IO can pack more varyings when linking. */ bool linker_ignore_precision; + /** + * Specifies which type of indirectly accessed variables should force + * loop unrolling. + */ + nir_variable_mode force_indirect_unrolling; + nir_lower_int64_options lower_int64_options; nir_lower_doubles_options lower_doubles_options; nir_divergence_options divergence_analysis_options; @@ -5436,7 +5442,7 @@ bool nir_opt_large_constants(nir_shader *shader, glsl_type_size_align_func size_align, unsigned threshold); -bool nir_opt_loop_unroll(nir_shader *shader, nir_variable_mode indirect_mask); +bool nir_opt_loop_unroll(nir_shader *shader); typedef enum { nir_move_const_undef = (1 << 0), diff --git a/src/compiler/nir/nir_opt_loop_unroll.c b/src/compiler/nir/nir_opt_loop_unroll.c index c4c76742d79..3dec5e15e56 100644 --- a/src/compiler/nir/nir_opt_loop_unroll.c +++ b/src/compiler/nir/nir_opt_loop_unroll.c @@ -1058,10 +1058,11 @@ nir_opt_loop_unroll_impl(nir_function_impl *impl, * should force loop unrolling. */ bool -nir_opt_loop_unroll(nir_shader *shader, nir_variable_mode indirect_mask) +nir_opt_loop_unroll(nir_shader *shader) { bool progress = false; + nir_variable_mode indirect_mask = shader->options->force_indirect_unrolling; nir_foreach_function(function, shader) { if (function->impl) { progress |= nir_opt_loop_unroll_impl(function->impl, indirect_mask); diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c index 2b651676a6b..c0c9a7b90b8 100644 --- a/src/freedreno/ir3/ir3_nir.c +++ b/src/freedreno/ir3/ir3_nir.c @@ -132,6 +132,7 @@ static const nir_shader_compiler_options options_a6xx = { .has_fsub = true, .has_isub = true, .max_unroll_iterations = 32, + .force_indirect_unrolling = nir_var_all, .lower_wpos_pntc = true, .lower_cs_local_index_from_id = true, @@ -269,7 +270,7 @@ ir3_optimize_loop(struct ir3_compiler *compiler, nir_shader *s) OPT(s, nir_opt_dce); } progress |= OPT(s, nir_opt_if, false); - progress |= OPT(s, nir_opt_loop_unroll, nir_var_all); + progress |= OPT(s, nir_opt_loop_unroll); progress |= OPT(s, nir_opt_remove_phis); progress |= OPT(s, nir_opt_undef); } while (progress); diff --git a/src/gallium/auxiliary/nir/nir_to_tgsi.c b/src/gallium/auxiliary/nir/nir_to_tgsi.c index c8b0b9ca982..135793dbde7 100644 --- a/src/gallium/auxiliary/nir/nir_to_tgsi.c +++ b/src/gallium/auxiliary/nir/nir_to_tgsi.c @@ -2458,7 +2458,6 @@ static void ntt_optimize_nir(struct nir_shader *s, struct pipe_screen *screen) { bool progress; - nir_variable_mode no_indirects_mask = ntt_no_indirects_mask(s, screen); unsigned pipe_stage = pipe_shader_type_from_mesa(s->info.stage); unsigned control_flow_depth = screen->get_shader_param(screen, pipe_stage, @@ -2492,7 +2491,7 @@ ntt_optimize_nir(struct nir_shader *s, struct pipe_screen *screen) NIR_PASS(progress, s, nir_opt_trivial_continues); NIR_PASS(progress, s, nir_opt_vectorize, ntt_should_vectorize_instr, NULL); NIR_PASS(progress, s, nir_opt_undef); - NIR_PASS(progress, s, nir_opt_loop_unroll, no_indirects_mask); + NIR_PASS(progress, s, nir_opt_loop_unroll); } while (progress); } @@ -2802,6 +2801,8 @@ ntt_fix_nir_options(struct pipe_screen *screen, struct nir_shader *s) !screen->get_shader_param(screen, pipe_shader_type_from_mesa(s->info.stage), PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED); + nir_variable_mode no_indirects_mask = ntt_no_indirects_mask(s, screen); + if (!options->lower_extract_byte || !options->lower_extract_word || !options->lower_insert_byte || @@ -2812,7 +2813,8 @@ ntt_fix_nir_options(struct pipe_screen *screen, struct nir_shader *s) !options->lower_rotate || !options->lower_uniforms_to_ubo || !options->lower_vector_cmp || - options->lower_fsqrt != lower_fsqrt) { + options->lower_fsqrt != lower_fsqrt || + options->force_indirect_unrolling != no_indirects_mask) { nir_shader_compiler_options *new_options = ralloc(s, nir_shader_compiler_options); *new_options = *s->options; @@ -2827,6 +2829,7 @@ ntt_fix_nir_options(struct pipe_screen *screen, struct nir_shader *s) new_options->lower_uniforms_to_ubo = true, new_options->lower_vector_cmp = true; new_options->lower_fsqrt = lower_fsqrt; + new_options->force_indirect_unrolling = no_indirects_mask; s->options = new_options; } diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c b/src/gallium/auxiliary/nir/tgsi_to_nir.c index cf9a49bf329..29f110e9f5e 100644 --- a/src/gallium/auxiliary/nir/tgsi_to_nir.c +++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c @@ -2444,7 +2444,7 @@ ttn_optimize_nir(nir_shader *nir) NIR_PASS(progress, nir, nir_opt_conditional_discard); if (nir->options->max_unroll_iterations) { - NIR_PASS(progress, nir, nir_opt_loop_unroll, (nir_variable_mode)0); + NIR_PASS(progress, nir, nir_opt_loop_unroll); } } while (progress); diff --git a/src/gallium/drivers/etnaviv/etnaviv_compiler_nir.c b/src/gallium/drivers/etnaviv/etnaviv_compiler_nir.c index 6d28fee391e..b1ca2e0ddb4 100644 --- a/src/gallium/drivers/etnaviv/etnaviv_compiler_nir.c +++ b/src/gallium/drivers/etnaviv/etnaviv_compiler_nir.c @@ -164,7 +164,7 @@ etna_optimize_loop(nir_shader *s) OPT(s, nir_copy_prop); OPT(s, nir_opt_dce); } - progress |= OPT(s, nir_opt_loop_unroll, nir_var_all); + progress |= OPT(s, nir_opt_loop_unroll); progress |= OPT(s, nir_opt_if, false); progress |= OPT(s, nir_opt_remove_phis); progress |= OPT(s, nir_opt_undef); diff --git a/src/gallium/drivers/etnaviv/etnaviv_screen.c b/src/gallium/drivers/etnaviv/etnaviv_screen.c index 82a0e152bfc..45fede5ca00 100644 --- a/src/gallium/drivers/etnaviv/etnaviv_screen.c +++ b/src/gallium/drivers/etnaviv/etnaviv_screen.c @@ -1076,6 +1076,7 @@ etna_screen_create(struct etna_device *dev, struct etna_gpu *gpu, .lower_fsqrt = !screen->specs.has_sin_cos_sqrt, .lower_sincos = !screen->specs.has_sin_cos_sqrt, .lower_uniforms_to_ubo = screen->specs.halti >= 2, + .force_indirect_unrolling = nir_var_all, }; /* apply debug options that disable individual features */ diff --git a/src/gallium/drivers/freedreno/a2xx/ir2_nir.c b/src/gallium/drivers/freedreno/a2xx/ir2_nir.c index 46c7ad7ff0f..74331275300 100644 --- a/src/gallium/drivers/freedreno/a2xx/ir2_nir.c +++ b/src/gallium/drivers/freedreno/a2xx/ir2_nir.c @@ -49,6 +49,7 @@ static const nir_shader_compiler_options options = { .has_isub = true, .lower_insert_byte = true, .lower_insert_word = true, + .force_indirect_unrolling = nir_var_all, }; const nir_shader_compiler_options * @@ -92,7 +93,7 @@ ir2_optimize_loop(nir_shader *s) OPT(s, nir_copy_prop); OPT(s, nir_opt_dce); } - progress |= OPT(s, nir_opt_loop_unroll, nir_var_all); + progress |= OPT(s, nir_opt_loop_unroll); progress |= OPT(s, nir_opt_if, false); progress |= OPT(s, nir_opt_remove_phis); progress |= OPT(s, nir_opt_undef); diff --git a/src/gallium/drivers/lima/lima_program.c b/src/gallium/drivers/lima/lima_program.c index 562586b851e..e43fa9e629e 100644 --- a/src/gallium/drivers/lima/lima_program.c +++ b/src/gallium/drivers/lima/lima_program.c @@ -61,6 +61,7 @@ static const nir_shader_compiler_options vs_nir_options = { .lower_fceil = true, .lower_insert_byte = true, .lower_insert_word = true, + .force_indirect_unrolling = (nir_var_shader_in | nir_var_shader_out | nir_var_function_temp), }; static const nir_shader_compiler_options fs_nir_options = { @@ -80,6 +81,7 @@ static const nir_shader_compiler_options fs_nir_options = { .lower_insert_word = true, .lower_bitops = true, .lower_vector_cmp = true, + .force_indirect_unrolling = (nir_var_shader_in | nir_var_shader_out | nir_var_function_temp), }; const void * @@ -131,10 +133,7 @@ lima_program_optimize_vs_nir(struct nir_shader *s) NIR_PASS(progress, s, lima_nir_lower_ftrunc); NIR_PASS(progress, s, nir_opt_constant_folding); NIR_PASS(progress, s, nir_opt_undef); - NIR_PASS(progress, s, nir_opt_loop_unroll, - nir_var_shader_in | - nir_var_shader_out | - nir_var_function_temp); + NIR_PASS(progress, s, nir_opt_loop_unroll); } while (progress); NIR_PASS_V(s, nir_lower_int_to_float); @@ -236,10 +235,7 @@ lima_program_optimize_fs_nir(struct nir_shader *s, NIR_PASS(progress, s, nir_opt_algebraic); NIR_PASS(progress, s, nir_opt_constant_folding); NIR_PASS(progress, s, nir_opt_undef); - NIR_PASS(progress, s, nir_opt_loop_unroll, - nir_var_shader_in | - nir_var_shader_out | - nir_var_function_temp); + NIR_PASS(progress, s, nir_opt_loop_unroll); NIR_PASS(progress, s, lima_nir_split_load_input); } while (progress); diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c b/src/gallium/drivers/radeonsi/si_shader_nir.c index 5573cc7065c..044c1ffeb99 100644 --- a/src/gallium/drivers/radeonsi/si_shader_nir.c +++ b/src/gallium/drivers/radeonsi/si_shader_nir.c @@ -596,7 +596,7 @@ void si_nir_opts(struct si_screen *sscreen, struct nir_shader *nir, bool first) NIR_PASS(progress, nir, nir_opt_undef); NIR_PASS(progress, nir, nir_opt_conditional_discard); if (nir->options->max_unroll_iterations) { - NIR_PASS(progress, nir, nir_opt_loop_unroll, 0); + NIR_PASS(progress, nir, nir_opt_loop_unroll); } if (nir->info.stage == MESA_SHADER_FRAGMENT) diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index 0c4ff754d4d..782127f5c33 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -1548,10 +1548,7 @@ vc4_optimize_nir(struct nir_shader *s) } NIR_PASS(progress, s, nir_opt_undef); - NIR_PASS(progress, s, nir_opt_loop_unroll, - nir_var_shader_in | - nir_var_shader_out | - nir_var_function_temp); + NIR_PASS(progress, s, nir_opt_loop_unroll); } while (progress); } @@ -2195,6 +2192,7 @@ static const nir_shader_compiler_options nir_options = { .has_fsub = true, .has_isub = true, .max_unroll_iterations = 32, + .force_indirect_unrolling = (nir_var_shader_in | nir_var_shader_out | nir_var_function_temp), }; const void * diff --git a/src/intel/compiler/brw_compiler.c b/src/intel/compiler/brw_compiler.c index c1c4f869f9c..4336ff73a08 100644 --- a/src/intel/compiler/brw_compiler.c +++ b/src/intel/compiler/brw_compiler.c @@ -68,7 +68,8 @@ .lower_usub_sat64 = true, \ .lower_hadd64 = true, \ .lower_bfe_with_two_constants = true, \ - .max_unroll_iterations = 32 + .max_unroll_iterations = 32, \ + .force_indirect_unrolling = nir_var_function_temp static const struct nir_shader_compiler_options scalar_nir_options = { COMMON_OPTIONS, @@ -196,6 +197,9 @@ brw_compiler_create(void *mem_ctx, const struct intel_device_info *devinfo) nir_options->unify_interfaces = i < MESA_SHADER_FRAGMENT; + nir_options->force_indirect_unrolling |= + brw_nir_no_indirect_mask(compiler, i); + compiler->glsl_compiler_options[i].NirOptions = nir_options; compiler->glsl_compiler_options[i].ClampBlockIndicesToArrayBounds = true; diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c index dc09dcbd24c..a50a76e73aa 100644 --- a/src/intel/compiler/brw_nir.c +++ b/src/intel/compiler/brw_nir.c @@ -520,63 +520,10 @@ brw_nir_lower_fs_outputs(nir_shader *nir) this_progress; \ }) -static nir_variable_mode -brw_nir_no_indirect_mask(const struct brw_compiler *compiler, - gl_shader_stage stage) -{ - const struct intel_device_info *devinfo = compiler->devinfo; - const bool is_scalar = compiler->scalar_stage[stage]; - nir_variable_mode indirect_mask = 0; - - switch (stage) { - case MESA_SHADER_VERTEX: - case MESA_SHADER_FRAGMENT: - indirect_mask |= nir_var_shader_in; - break; - - case MESA_SHADER_GEOMETRY: - if (!is_scalar) - indirect_mask |= nir_var_shader_in; - break; - - default: - /* Everything else can handle indirect inputs */ - break; - } - - if (is_scalar && stage != MESA_SHADER_TESS_CTRL) - indirect_mask |= nir_var_shader_out; - - /* On HSW+, we allow indirects in scalar shaders. They get implemented - * using nir_lower_vars_to_explicit_types and nir_lower_explicit_io in - * brw_postprocess_nir. - * - * We haven't plumbed through the indirect scratch messages on gfx6 or - * earlier so doing indirects via scratch doesn't work there. On gfx7 and - * earlier the scratch space size is limited to 12kB. If we allowed - * indirects as scratch all the time, we may easily exceed this limit - * without having any fallback. - */ - if (is_scalar && devinfo->verx10 <= 70) - indirect_mask |= nir_var_function_temp; - - return indirect_mask; -} - void brw_nir_optimize(nir_shader *nir, const struct brw_compiler *compiler, bool is_scalar, bool allow_copies) { - nir_variable_mode loop_indirect_mask = - brw_nir_no_indirect_mask(compiler, nir->info.stage); - - /* We can handle indirects via scratch messages. However, they are - * expensive so we'd rather not if we can avoid it. Have loop unrolling - * try to get rid of them. - */ - if (is_scalar) - loop_indirect_mask |= nir_var_function_temp; - bool progress; unsigned lower_flrp = (nir->options->lower_flrp16 ? 16 : 0) | @@ -671,7 +618,7 @@ brw_nir_optimize(nir_shader *nir, const struct brw_compiler *compiler, OPT(nir_opt_if, false); OPT(nir_opt_conditional_discard); if (nir->options->max_unroll_iterations != 0) { - OPT(nir_opt_loop_unroll, loop_indirect_mask); + OPT(nir_opt_loop_unroll); } OPT(nir_opt_remove_phis); OPT(nir_opt_gcm, false); diff --git a/src/intel/compiler/brw_shader.h b/src/intel/compiler/brw_shader.h index 20e9281b7d0..8d0c9c6b164 100644 --- a/src/intel/compiler/brw_shader.h +++ b/src/intel/compiler/brw_shader.h @@ -127,6 +127,50 @@ brw_get_scratch_size(int size) return MAX2(1024, util_next_power_of_two(size)); } + +static inline nir_variable_mode +brw_nir_no_indirect_mask(const struct brw_compiler *compiler, + gl_shader_stage stage) +{ + const struct intel_device_info *devinfo = compiler->devinfo; + const bool is_scalar = compiler->scalar_stage[stage]; + nir_variable_mode indirect_mask = (nir_variable_mode) 0; + + switch (stage) { + case MESA_SHADER_VERTEX: + case MESA_SHADER_FRAGMENT: + indirect_mask |= nir_var_shader_in; + break; + + case MESA_SHADER_GEOMETRY: + if (!is_scalar) + indirect_mask |= nir_var_shader_in; + break; + + default: + /* Everything else can handle indirect inputs */ + break; + } + + if (is_scalar && stage != MESA_SHADER_TESS_CTRL) + indirect_mask |= nir_var_shader_out; + + /* On HSW+, we allow indirects in scalar shaders. They get implemented + * using nir_lower_vars_to_explicit_types and nir_lower_explicit_io in + * brw_postprocess_nir. + * + * We haven't plumbed through the indirect scratch messages on gfx6 or + * earlier so doing indirects via scratch doesn't work there. On gfx7 and + * earlier the scratch space size is limited to 12kB. If we allowed + * indirects as scratch all the time, we may easily exceed this limit + * without having any fallback. + */ + if (is_scalar && devinfo->verx10 <= 70) + indirect_mask |= nir_var_function_temp; + + return indirect_mask; +} + bool brw_texture_offset(const nir_tex_instr *tex, unsigned src, uint32_t *offset_bits); diff --git a/src/mesa/state_tracker/st_glsl_to_nir.cpp b/src/mesa/state_tracker/st_glsl_to_nir.cpp index 182644bda31..caeb195518a 100644 --- a/src/mesa/state_tracker/st_glsl_to_nir.cpp +++ b/src/mesa/state_tracker/st_glsl_to_nir.cpp @@ -324,7 +324,7 @@ st_nir_opts(nir_shader *nir) NIR_PASS(progress, nir, nir_opt_undef); NIR_PASS(progress, nir, nir_opt_conditional_discard); if (nir->options->max_unroll_iterations) { - NIR_PASS(progress, nir, nir_opt_loop_unroll, (nir_variable_mode)0); + NIR_PASS(progress, nir, nir_opt_loop_unroll); } } while (progress); } diff --git a/src/panfrost/bifrost/bifrost_compile.c b/src/panfrost/bifrost/bifrost_compile.c index afacef28831..a8df91075dd 100644 --- a/src/panfrost/bifrost/bifrost_compile.c +++ b/src/panfrost/bifrost/bifrost_compile.c @@ -3306,10 +3306,7 @@ bi_optimize_nir(nir_shader *nir, unsigned gpu_id, bool is_blend) NIR_PASS(progress, nir, nir_opt_undef); NIR_PASS(progress, nir, nir_lower_undef_to_zero); - NIR_PASS(progress, nir, nir_opt_loop_unroll, - nir_var_shader_in | - nir_var_shader_out | - nir_var_function_temp); + NIR_PASS(progress, nir, nir_opt_loop_unroll); } while (progress); /* TODO: Why is 64-bit getting rematerialized? diff --git a/src/panfrost/bifrost/bifrost_compile.h b/src/panfrost/bifrost/bifrost_compile.h index 45bfd522131..9b320d74e26 100644 --- a/src/panfrost/bifrost/bifrost_compile.h +++ b/src/panfrost/bifrost/bifrost_compile.h @@ -93,6 +93,7 @@ static const nir_shader_compiler_options bifrost_nir_options = { .vertex_id_zero_based = true, .lower_cs_local_index_from_id = true, .max_unroll_iterations = 32, + .force_indirect_unrolling = (nir_var_shader_in | nir_var_shader_out | nir_var_function_temp), }; #endif diff --git a/src/panfrost/midgard/midgard_compile.c b/src/panfrost/midgard/midgard_compile.c index 183e2991497..609db5086c3 100644 --- a/src/panfrost/midgard/midgard_compile.c +++ b/src/panfrost/midgard/midgard_compile.c @@ -369,10 +369,7 @@ optimise_nir(nir_shader *nir, unsigned quirks, bool is_blend) NIR_PASS(progress, nir, nir_opt_undef); NIR_PASS(progress, nir, nir_lower_undef_to_zero); - NIR_PASS(progress, nir, nir_opt_loop_unroll, - nir_var_shader_in | - nir_var_shader_out | - nir_var_function_temp); + NIR_PASS(progress, nir, nir_opt_loop_unroll); NIR_PASS(progress, nir, nir_opt_vectorize, midgard_vectorize_filter, NULL); diff --git a/src/panfrost/midgard/midgard_compile.h b/src/panfrost/midgard/midgard_compile.h index d3bce99c8ce..65c9becd28e 100644 --- a/src/panfrost/midgard/midgard_compile.h +++ b/src/panfrost/midgard/midgard_compile.h @@ -97,6 +97,7 @@ static const nir_shader_compiler_options midgard_nir_options = { .has_cs_global_id = true, .lower_cs_local_index_from_id = true, .max_unroll_iterations = 32, + .force_indirect_unrolling = (nir_var_shader_in | nir_var_shader_out | nir_var_function_temp), }; #endif