nir: add indirect loop unrolling to compiler options

This is where it should be rather than having to pass it into the optimisation pass every time. It also allows us to call the loop analysis pass without having to duplicate these options which we will do later in this series. Reviewed-by: Ian Romanick <ian.d.romanick@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12064>
2021-07-29 19:34:26 +10:00
parent a654e39f15
commit a9ed4538ab
24 changed files with 90 additions and 94 deletions
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -181,7 +181,7 @@ radv_optimize_nir(const struct radv_device *device, struct nir_shader *shader,
      NIR_PASS(progress, shader, nir_opt_shrink_vectors,
               !device->instance->disable_shrink_image_store);
      if (shader->options->max_unroll_iterations) {
-         NIR_PASS(progress, shader, nir_opt_loop_unroll, 0);
+         NIR_PASS(progress, shader, nir_opt_loop_unroll);
      }
   } while (progress && !optimize_conservatively);

--- a/src/asahi/compiler/agx_compile.c
+++ b/src/asahi/compiler/agx_compile.c
@@ -1235,10 +1235,7 @@ agx_optimize_nir(nir_shader *nir)
      NIR_PASS(progress, nir, nir_opt_undef);
      NIR_PASS(progress, nir, nir_lower_undef_to_zero);

-      NIR_PASS(progress, nir, nir_opt_loop_unroll,
-               nir_var_shader_in |
-               nir_var_shader_out |
-               nir_var_function_temp);
+      NIR_PASS(progress, nir, nir_opt_loop_unroll);
   } while (progress);

   NIR_PASS_V(nir, nir_opt_algebraic_late);
--- a/src/asahi/compiler/agx_compile.h
+++ b/src/asahi/compiler/agx_compile.h
@@ -213,6 +213,8 @@ static const nir_shader_compiler_options agx_nir_options = {
   .lower_doubles_options = nir_lower_dmod,
   .lower_int64_options = ~(nir_lower_iadd64 | nir_lower_imul_2x32_64),

+   .force_indirect_unrolling = (nir_var_shader_in | nir_var_shader_out | nir_var_function_temp),
+
   .has_fsub = true,
   .has_isub = true,
   .has_cs_global_id = true,
--- a/src/broadcom/compiler/nir_to_vir.c
+++ b/src/broadcom/compiler/nir_to_vir.c
@@ -1877,9 +1877,7 @@ v3d_optimize_nir(struct v3d_compile *c, struct nir_shader *s)
                if (c && !c->disable_loop_unrolling &&
                    s->options->max_unroll_iterations > 0) {
                       bool local_progress = false;
-                       NIR_PASS(local_progress, s, nir_opt_loop_unroll,
-                                nir_var_shader_in |
-                                nir_var_function_temp);
+                       NIR_PASS(local_progress, s, nir_opt_loop_unroll);
                       c->unrolled_any_loops |= local_progress;
                       progress |= local_progress;
                }
--- a/src/broadcom/vulkan/v3dv_pipeline.c
+++ b/src/broadcom/vulkan/v3dv_pipeline.c
@@ -248,6 +248,7 @@ const nir_shader_compiler_options v3dv_nir_options = {
                                   * needs to be supported */
   .lower_interpolate_at = true,
   .max_unroll_iterations = 16,
+   .force_indirect_unrolling = (nir_var_shader_in | nir_var_function_temp),
   .divergence_analysis_options =
      nir_divergence_multiple_workgroup_per_compute_subgroup
 };
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -3690,6 +3690,12 @@ typedef struct nir_shader_compiler_options {
    * vectorized IO can pack more varyings when linking. */
   bool linker_ignore_precision;

+   /**
+    * Specifies which type of indirectly accessed variables should force
+    * loop unrolling.
+    */
+   nir_variable_mode force_indirect_unrolling;
+
   nir_lower_int64_options lower_int64_options;
   nir_lower_doubles_options lower_doubles_options;
   nir_divergence_options divergence_analysis_options;
@@ -5436,7 +5442,7 @@ bool nir_opt_large_constants(nir_shader *shader,
                             glsl_type_size_align_func size_align,
                             unsigned threshold);

-bool nir_opt_loop_unroll(nir_shader *shader, nir_variable_mode indirect_mask);
+bool nir_opt_loop_unroll(nir_shader *shader);

 typedef enum {
    nir_move_const_undef = (1 << 0),
--- a/src/compiler/nir/nir_opt_loop_unroll.c
+++ b/src/compiler/nir/nir_opt_loop_unroll.c
@@ -1058,10 +1058,11 @@ nir_opt_loop_unroll_impl(nir_function_impl *impl,
 * should force loop unrolling.
 */
 bool
-nir_opt_loop_unroll(nir_shader *shader, nir_variable_mode indirect_mask)
+nir_opt_loop_unroll(nir_shader *shader)
 {
   bool progress = false;

+   nir_variable_mode indirect_mask = shader->options->force_indirect_unrolling;
   nir_foreach_function(function, shader) {
      if (function->impl) {
         progress |= nir_opt_loop_unroll_impl(function->impl, indirect_mask);
--- a/src/freedreno/ir3/ir3_nir.c
+++ b/src/freedreno/ir3/ir3_nir.c
@@ -132,6 +132,7 @@ static const nir_shader_compiler_options options_a6xx = {
   .has_fsub = true,
   .has_isub = true,
   .max_unroll_iterations = 32,
+   .force_indirect_unrolling = nir_var_all,
   .lower_wpos_pntc = true,
   .lower_cs_local_index_from_id = true,

@@ -269,7 +270,7 @@ ir3_optimize_loop(struct ir3_compiler *compiler, nir_shader *s)
         OPT(s, nir_opt_dce);
      }
      progress |= OPT(s, nir_opt_if, false);
-      progress |= OPT(s, nir_opt_loop_unroll, nir_var_all);
+      progress |= OPT(s, nir_opt_loop_unroll);
      progress |= OPT(s, nir_opt_remove_phis);
      progress |= OPT(s, nir_opt_undef);
   } while (progress);
--- a/src/gallium/auxiliary/nir/nir_to_tgsi.c
+++ b/src/gallium/auxiliary/nir/nir_to_tgsi.c
@@ -2458,7 +2458,6 @@ static void
 ntt_optimize_nir(struct nir_shader *s, struct pipe_screen *screen)
 {
   bool progress;
-   nir_variable_mode no_indirects_mask = ntt_no_indirects_mask(s, screen);
   unsigned pipe_stage = pipe_shader_type_from_mesa(s->info.stage);
   unsigned control_flow_depth =
      screen->get_shader_param(screen, pipe_stage,
@@ -2492,7 +2491,7 @@ ntt_optimize_nir(struct nir_shader *s, struct pipe_screen *screen)
      NIR_PASS(progress, s, nir_opt_trivial_continues);
      NIR_PASS(progress, s, nir_opt_vectorize, ntt_should_vectorize_instr, NULL);
      NIR_PASS(progress, s, nir_opt_undef);
-      NIR_PASS(progress, s, nir_opt_loop_unroll, no_indirects_mask);
+      NIR_PASS(progress, s, nir_opt_loop_unroll);

   } while (progress);
 }
@@ -2802,6 +2801,8 @@ ntt_fix_nir_options(struct pipe_screen *screen, struct nir_shader *s)
      !screen->get_shader_param(screen, pipe_shader_type_from_mesa(s->info.stage),
                                PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED);

+   nir_variable_mode no_indirects_mask = ntt_no_indirects_mask(s, screen);
+
   if (!options->lower_extract_byte ||
       !options->lower_extract_word ||
       !options->lower_insert_byte ||
@@ -2812,7 +2813,8 @@ ntt_fix_nir_options(struct pipe_screen *screen, struct nir_shader *s)
       !options->lower_rotate ||
       !options->lower_uniforms_to_ubo ||
       !options->lower_vector_cmp ||
-       options->lower_fsqrt != lower_fsqrt) {
+       options->lower_fsqrt != lower_fsqrt ||
+       options->force_indirect_unrolling != no_indirects_mask) {
      nir_shader_compiler_options *new_options = ralloc(s, nir_shader_compiler_options);
      *new_options = *s->options;

@@ -2827,6 +2829,7 @@ ntt_fix_nir_options(struct pipe_screen *screen, struct nir_shader *s)
      new_options->lower_uniforms_to_ubo = true,
      new_options->lower_vector_cmp = true;
      new_options->lower_fsqrt = lower_fsqrt;
+      new_options->force_indirect_unrolling = no_indirects_mask;

      s->options = new_options;
   }
--- a/src/gallium/auxiliary/nir/tgsi_to_nir.c
+++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c
@@ -2444,7 +2444,7 @@ ttn_optimize_nir(nir_shader *nir)
      NIR_PASS(progress, nir, nir_opt_conditional_discard);

      if (nir->options->max_unroll_iterations) {
-         NIR_PASS(progress, nir, nir_opt_loop_unroll, (nir_variable_mode)0);
+         NIR_PASS(progress, nir, nir_opt_loop_unroll);
      }

   } while (progress);
--- a/src/gallium/drivers/etnaviv/etnaviv_compiler_nir.c
+++ b/src/gallium/drivers/etnaviv/etnaviv_compiler_nir.c
@@ -164,7 +164,7 @@ etna_optimize_loop(nir_shader *s)
         OPT(s, nir_copy_prop);
         OPT(s, nir_opt_dce);
      }
-      progress |= OPT(s, nir_opt_loop_unroll, nir_var_all);
+      progress |= OPT(s, nir_opt_loop_unroll);
      progress |= OPT(s, nir_opt_if, false);
      progress |= OPT(s, nir_opt_remove_phis);
      progress |= OPT(s, nir_opt_undef);
--- a/src/gallium/drivers/etnaviv/etnaviv_screen.c
+++ b/src/gallium/drivers/etnaviv/etnaviv_screen.c
@@ -1076,6 +1076,7 @@ etna_screen_create(struct etna_device *dev, struct etna_gpu *gpu,
      .lower_fsqrt = !screen->specs.has_sin_cos_sqrt,
      .lower_sincos = !screen->specs.has_sin_cos_sqrt,
      .lower_uniforms_to_ubo = screen->specs.halti >= 2,
+      .force_indirect_unrolling = nir_var_all,
   };

   /* apply debug options that disable individual features */
--- a/src/gallium/drivers/freedreno/a2xx/ir2_nir.c
+++ b/src/gallium/drivers/freedreno/a2xx/ir2_nir.c
@@ -49,6 +49,7 @@ static const nir_shader_compiler_options options = {
   .has_isub = true,
   .lower_insert_byte = true,
   .lower_insert_word = true,
+   .force_indirect_unrolling = nir_var_all,
 };

 const nir_shader_compiler_options *
@@ -92,7 +93,7 @@ ir2_optimize_loop(nir_shader *s)
         OPT(s, nir_copy_prop);
         OPT(s, nir_opt_dce);
      }
-      progress |= OPT(s, nir_opt_loop_unroll, nir_var_all);
+      progress |= OPT(s, nir_opt_loop_unroll);
      progress |= OPT(s, nir_opt_if, false);
      progress |= OPT(s, nir_opt_remove_phis);
      progress |= OPT(s, nir_opt_undef);
--- a/src/gallium/drivers/lima/lima_program.c
+++ b/src/gallium/drivers/lima/lima_program.c
@@ -61,6 +61,7 @@ static const nir_shader_compiler_options vs_nir_options = {
   .lower_fceil = true,
   .lower_insert_byte = true,
   .lower_insert_word = true,
+   .force_indirect_unrolling = (nir_var_shader_in | nir_var_shader_out | nir_var_function_temp),
 };

 static const nir_shader_compiler_options fs_nir_options = {
@@ -80,6 +81,7 @@ static const nir_shader_compiler_options fs_nir_options = {
   .lower_insert_word = true,
   .lower_bitops = true,
   .lower_vector_cmp = true,
+   .force_indirect_unrolling = (nir_var_shader_in | nir_var_shader_out | nir_var_function_temp),
 };

 const void *
@@ -131,10 +133,7 @@ lima_program_optimize_vs_nir(struct nir_shader *s)
      NIR_PASS(progress, s, lima_nir_lower_ftrunc);
      NIR_PASS(progress, s, nir_opt_constant_folding);
      NIR_PASS(progress, s, nir_opt_undef);
-      NIR_PASS(progress, s, nir_opt_loop_unroll,
-               nir_var_shader_in |
-               nir_var_shader_out |
-               nir_var_function_temp);
+      NIR_PASS(progress, s, nir_opt_loop_unroll);
   } while (progress);

   NIR_PASS_V(s, nir_lower_int_to_float);
@@ -236,10 +235,7 @@ lima_program_optimize_fs_nir(struct nir_shader *s,
      NIR_PASS(progress, s, nir_opt_algebraic);
      NIR_PASS(progress, s, nir_opt_constant_folding);
      NIR_PASS(progress, s, nir_opt_undef);
-      NIR_PASS(progress, s, nir_opt_loop_unroll,
-               nir_var_shader_in |
-               nir_var_shader_out |
-               nir_var_function_temp);
+      NIR_PASS(progress, s, nir_opt_loop_unroll);
      NIR_PASS(progress, s, lima_nir_split_load_input);
   } while (progress);

--- a/src/gallium/drivers/radeonsi/si_shader_nir.c
+++ b/src/gallium/drivers/radeonsi/si_shader_nir.c
@@ -596,7 +596,7 @@ void si_nir_opts(struct si_screen *sscreen, struct nir_shader *nir, bool first)
      NIR_PASS(progress, nir, nir_opt_undef);
      NIR_PASS(progress, nir, nir_opt_conditional_discard);
      if (nir->options->max_unroll_iterations) {
-         NIR_PASS(progress, nir, nir_opt_loop_unroll, 0);
+         NIR_PASS(progress, nir, nir_opt_loop_unroll);
      }

      if (nir->info.stage == MESA_SHADER_FRAGMENT)
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -1548,10 +1548,7 @@ vc4_optimize_nir(struct nir_shader *s)
                }

                NIR_PASS(progress, s, nir_opt_undef);
-                NIR_PASS(progress, s, nir_opt_loop_unroll,
-                         nir_var_shader_in |
-                         nir_var_shader_out |
-                         nir_var_function_temp);
+                NIR_PASS(progress, s, nir_opt_loop_unroll);
        } while (progress);
 }

@@ -2195,6 +2192,7 @@ static const nir_shader_compiler_options nir_options = {
        .has_fsub = true,
        .has_isub = true,
        .max_unroll_iterations = 32,
+        .force_indirect_unrolling = (nir_var_shader_in | nir_var_shader_out | nir_var_function_temp),
 };

 const void *
--- a/src/intel/compiler/brw_compiler.c
+++ b/src/intel/compiler/brw_compiler.c
@@ -68,7 +68,8 @@
   .lower_usub_sat64 = true,                                                  \
   .lower_hadd64 = true,                                                      \
   .lower_bfe_with_two_constants = true,                                      \
-   .max_unroll_iterations = 32
+   .max_unroll_iterations = 32,                                               \
+   .force_indirect_unrolling = nir_var_function_temp

 static const struct nir_shader_compiler_options scalar_nir_options = {
   COMMON_OPTIONS,
@@ -196,6 +197,9 @@ brw_compiler_create(void *mem_ctx, const struct intel_device_info *devinfo)

      nir_options->unify_interfaces = i < MESA_SHADER_FRAGMENT;

+      nir_options->force_indirect_unrolling |=
+         brw_nir_no_indirect_mask(compiler, i);
+
      compiler->glsl_compiler_options[i].NirOptions = nir_options;

      compiler->glsl_compiler_options[i].ClampBlockIndicesToArrayBounds = true;
--- a/src/intel/compiler/brw_nir.c
+++ b/src/intel/compiler/brw_nir.c
@@ -520,63 +520,10 @@ brw_nir_lower_fs_outputs(nir_shader *nir)
   this_progress;                                          \
 })

-static nir_variable_mode
-brw_nir_no_indirect_mask(const struct brw_compiler *compiler,
-                         gl_shader_stage stage)
-{
-   const struct intel_device_info *devinfo = compiler->devinfo;
-   const bool is_scalar = compiler->scalar_stage[stage];
-   nir_variable_mode indirect_mask = 0;
-
-   switch (stage) {
-   case MESA_SHADER_VERTEX:
-   case MESA_SHADER_FRAGMENT:
-      indirect_mask |= nir_var_shader_in;
-      break;
-
-   case MESA_SHADER_GEOMETRY:
-      if (!is_scalar)
-         indirect_mask |= nir_var_shader_in;
-      break;
-
-   default:
-      /* Everything else can handle indirect inputs */
-      break;
-   }
-
-   if (is_scalar && stage != MESA_SHADER_TESS_CTRL)
-      indirect_mask |= nir_var_shader_out;
-
-   /* On HSW+, we allow indirects in scalar shaders.  They get implemented
-    * using nir_lower_vars_to_explicit_types and nir_lower_explicit_io in
-    * brw_postprocess_nir.
-    *
-    * We haven't plumbed through the indirect scratch messages on gfx6 or
-    * earlier so doing indirects via scratch doesn't work there. On gfx7 and
-    * earlier the scratch space size is limited to 12kB.  If we allowed
-    * indirects as scratch all the time, we may easily exceed this limit
-    * without having any fallback.
-    */
-   if (is_scalar && devinfo->verx10 <= 70)
-      indirect_mask |= nir_var_function_temp;
-
-   return indirect_mask;
-}
-
 void
 brw_nir_optimize(nir_shader *nir, const struct brw_compiler *compiler,
                 bool is_scalar, bool allow_copies)
 {
-   nir_variable_mode loop_indirect_mask =
-      brw_nir_no_indirect_mask(compiler, nir->info.stage);
-
-   /* We can handle indirects via scratch messages.  However, they are
-    * expensive so we'd rather not if we can avoid it.  Have loop unrolling
-    * try to get rid of them.
-    */
-   if (is_scalar)
-      loop_indirect_mask |= nir_var_function_temp;
-
   bool progress;
   unsigned lower_flrp =
      (nir->options->lower_flrp16 ? 16 : 0) |
@@ -671,7 +618,7 @@ brw_nir_optimize(nir_shader *nir, const struct brw_compiler *compiler,
      OPT(nir_opt_if, false);
      OPT(nir_opt_conditional_discard);
      if (nir->options->max_unroll_iterations != 0) {
-         OPT(nir_opt_loop_unroll, loop_indirect_mask);
+         OPT(nir_opt_loop_unroll);
      }
      OPT(nir_opt_remove_phis);
      OPT(nir_opt_gcm, false);
--- a/src/intel/compiler/brw_shader.h
+++ b/src/intel/compiler/brw_shader.h
@@ -127,6 +127,50 @@ brw_get_scratch_size(int size)
   return MAX2(1024, util_next_power_of_two(size));
 }

+
+static inline nir_variable_mode
+brw_nir_no_indirect_mask(const struct brw_compiler *compiler,
+                         gl_shader_stage stage)
+{
+   const struct intel_device_info *devinfo = compiler->devinfo;
+   const bool is_scalar = compiler->scalar_stage[stage];
+   nir_variable_mode indirect_mask = (nir_variable_mode) 0;
+
+   switch (stage) {
+   case MESA_SHADER_VERTEX:
+   case MESA_SHADER_FRAGMENT:
+      indirect_mask |= nir_var_shader_in;
+      break;
+
+   case MESA_SHADER_GEOMETRY:
+      if (!is_scalar)
+         indirect_mask |= nir_var_shader_in;
+      break;
+
+   default:
+      /* Everything else can handle indirect inputs */
+      break;
+   }
+
+   if (is_scalar && stage != MESA_SHADER_TESS_CTRL)
+      indirect_mask |= nir_var_shader_out;
+
+   /* On HSW+, we allow indirects in scalar shaders.  They get implemented
+    * using nir_lower_vars_to_explicit_types and nir_lower_explicit_io in
+    * brw_postprocess_nir.
+    *
+    * We haven't plumbed through the indirect scratch messages on gfx6 or
+    * earlier so doing indirects via scratch doesn't work there. On gfx7 and
+    * earlier the scratch space size is limited to 12kB.  If we allowed
+    * indirects as scratch all the time, we may easily exceed this limit
+    * without having any fallback.
+    */
+   if (is_scalar && devinfo->verx10 <= 70)
+      indirect_mask |= nir_var_function_temp;
+
+   return indirect_mask;
+}
+
 bool brw_texture_offset(const nir_tex_instr *tex, unsigned src,
                        uint32_t *offset_bits);

--- a/src/mesa/state_tracker/st_glsl_to_nir.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_nir.cpp
@@ -324,7 +324,7 @@ st_nir_opts(nir_shader *nir)
      NIR_PASS(progress, nir, nir_opt_undef);
      NIR_PASS(progress, nir, nir_opt_conditional_discard);
      if (nir->options->max_unroll_iterations) {
-         NIR_PASS(progress, nir, nir_opt_loop_unroll, (nir_variable_mode)0);
+         NIR_PASS(progress, nir, nir_opt_loop_unroll);
      }
   } while (progress);
 }
--- a/src/panfrost/bifrost/bifrost_compile.c
+++ b/src/panfrost/bifrost/bifrost_compile.c
@@ -3306,10 +3306,7 @@ bi_optimize_nir(nir_shader *nir, unsigned gpu_id, bool is_blend)
                NIR_PASS(progress, nir, nir_opt_undef);
                NIR_PASS(progress, nir, nir_lower_undef_to_zero);

-                NIR_PASS(progress, nir, nir_opt_loop_unroll,
-                         nir_var_shader_in |
-                         nir_var_shader_out |
-                         nir_var_function_temp);
+                NIR_PASS(progress, nir, nir_opt_loop_unroll);
        } while (progress);

        /* TODO: Why is 64-bit getting rematerialized?
--- a/src/panfrost/bifrost/bifrost_compile.h
+++ b/src/panfrost/bifrost/bifrost_compile.h
@@ -93,6 +93,7 @@ static const nir_shader_compiler_options bifrost_nir_options = {
        .vertex_id_zero_based = true,
        .lower_cs_local_index_from_id = true,
        .max_unroll_iterations = 32,
+        .force_indirect_unrolling = (nir_var_shader_in | nir_var_shader_out | nir_var_function_temp),
 };

 #endif
--- a/src/panfrost/midgard/midgard_compile.c
+++ b/src/panfrost/midgard/midgard_compile.c
@@ -369,10 +369,7 @@ optimise_nir(nir_shader *nir, unsigned quirks, bool is_blend)
                NIR_PASS(progress, nir, nir_opt_undef);
                NIR_PASS(progress, nir, nir_lower_undef_to_zero);

-                NIR_PASS(progress, nir, nir_opt_loop_unroll,
-                         nir_var_shader_in |
-                         nir_var_shader_out |
-                         nir_var_function_temp);
+                NIR_PASS(progress, nir, nir_opt_loop_unroll);

                NIR_PASS(progress, nir, nir_opt_vectorize,
                         midgard_vectorize_filter, NULL);
--- a/src/panfrost/midgard/midgard_compile.h
+++ b/src/panfrost/midgard/midgard_compile.h
@@ -97,6 +97,7 @@ static const nir_shader_compiler_options midgard_nir_options = {
        .has_cs_global_id = true,
        .lower_cs_local_index_from_id = true,
        .max_unroll_iterations = 32,
+        .force_indirect_unrolling = (nir_var_shader_in | nir_var_shader_out | nir_var_function_temp),
 };

 #endif