nir: add nir_opt_varyings, new pass optimizing and compacting varyings

Highlights: - all shader stages and all input/output types are handled, including inputs and outputs with multiple vertices - the optimizations performed are: unused input/output removal, constant and uniform propagation, output deduplication, inter-shader code motion, and compaction - constant and uniform propagation and output deduplication work even if a shader contains multiple stores of the same output, e.g. in GS - the same optimizations are also performed between output stores and output loads (for TCS) - FS inputs are packed agressively. Only flat, interp FP32, and interp FP16 can't be in the same vec4. Also, if an output value is non-divergent within a primitive, the corresponding FS input is opportunistically promoted to flat. The big comment at the beginning of nir_opt_varyings.c has a detailed explanation, which is the same as: https://gitlab.freedesktop.org/mesa/mesa/-/issues/8841 dEQP and GLCTS have incorrect tests that fail with this, see: https://gitlab.freedesktop.org/mesa/mesa/-/issues/10361 Acked-by: Timothy Arceri <tarceri@itsqueeze.com> Reviewed-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26819>
2023-04-06 04:21:00 -04:00
parent ba54099dce
commit c66967b5cb
3 changed files with 4154 additions and 0 deletions
--- a/src/compiler/nir/meson.build
+++ b/src/compiler/nir/meson.build
@@ -273,6 +273,7 @@ files_libnir = files(
  'nir_opt_undef.c',
  'nir_opt_uniform_atomics.c',
  'nir_opt_uniform_subgroup.c',
+  'nir_opt_varyings.c',
  'nir_opt_vectorize.c',
  'nir_passthrough_gs.c',
  'nir_passthrough_tcs.c',
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -3577,6 +3577,16 @@ typedef enum {
    */
   nir_io_has_flexible_input_interpolation_except_flat = BITFIELD_BIT(0),

+   /**
+    * nir_opt_varyings compacts (relocates) components of varyings by
+    * rewriting their locations completely, effectively moving components of
+    * varyings between slots. This option forces nir_opt_varyings to make
+    * VARYING_SLOT_POS unused by moving its contents to VARn if the consumer
+    * is not FS. If this option is not set and POS is unused, it moves
+    * components of VARn to POS until it's fully used.
+    */
+   nir_io_dont_use_pos_for_non_fs_varyings = BITFIELD_BIT(1),
+
   /* Options affecting the GLSL compiler are below. */

   /**
@@ -3584,6 +3594,17 @@ typedef enum {
    * This is only affects GLSL compilation.
    */
   nir_io_glsl_lower_derefs = BITFIELD_BIT(16),
+
+   /**
+    * Run nir_opt_varyings in the GLSL linker. If false, optimize varyings
+    * the old way and lower IO later.
+    *
+    * nir_io_lower_to_intrinsics must be set for this to take effect.
+    *
+    * TODO: remove this and default to enabled once we are sure that this
+    * codepath is solid.
+    */
+   nir_io_glsl_opt_varyings = BITFIELD_BIT(17),
 } nir_io_options;

 /** An instruction filtering callback
@@ -4083,6 +4104,31 @@ typedef struct nir_shader_compiler_options {
    *  Used by nir_lower_io_passes.
    */
   void (*lower_mediump_io)(struct nir_shader *nir);
+
+   /**
+    * Return the maximum cost of an expression that's written to a shader
+    * output that can be moved into the next shader to remove that output.
+    *
+    * Currently only uniform expressions are moved. A uniform expression is
+    * any ALU expression sourcing only constants, uniforms, and UBO loads.
+    *
+    * Set to NULL or return 0 if you only want to propagate constants from
+    * outputs to inputs.
+    *
+    * Drivers can set the maximum cost based on the types of consecutive
+    * shaders or shader SHA1s.
+    *
+    * Drivers should also set "varying_estimate_instr_cost".
+    */
+   unsigned (*varying_expression_max_cost)(struct nir_shader *consumer,
+                                           struct nir_shader *producer);
+
+   /**
+    * Return the cost of an instruction that could be moved into the next
+    * shader. If the cost of all instructions in an expression is <=
+    * varying_expression_max_cost(), the instruction is moved.
+    */
+   unsigned (*varying_estimate_instr_cost)(struct nir_instr *instr);
 } nir_shader_compiler_options;

 typedef struct nir_shader {
@@ -5204,6 +5250,22 @@ nir_deref_instr *nir_clone_deref_instr(struct nir_builder *b,
                                       nir_variable *var,
                                       nir_deref_instr *deref);

+
+/* Return status from nir_opt_varyings. */
+typedef enum {
+   /* Whether the IR changed such that NIR optimizations should be run, such
+    * as due to removal of loads and stores. IO semantic changes such as
+    * compaction don't count as IR changes because they don't affect NIR
+    * optimizations.
+    */
+   nir_progress_producer = BITFIELD_BIT(0),
+   nir_progress_consumer = BITFIELD_BIT(1),
+} nir_opt_varyings_progress;
+
+nir_opt_varyings_progress
+nir_opt_varyings(nir_shader *producer, nir_shader *consumer, bool spirv,
+                 unsigned max_uniform_components, unsigned max_ubos_per_stage);
+
 bool nir_slot_is_sysval_output(gl_varying_slot slot,
                               gl_shader_stage next_shader);
 bool nir_slot_is_varying(gl_varying_slot slot);
--- a/src/compiler/nir/nir_opt_varyings.c
+++ b/src/compiler/nir/nir_opt_varyings.c