From 8e39e8ed4dd4256a70e6435c0bfaf46a576033c3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak@amd.com>
Date: Wed, 20 Nov 2024 02:42:18 -0500
Subject: [PATCH] nir/opt_varyings: make top-level compaction code for TES,
 TCS, GS separate

Add a separate "if" block for each and use a helper for repeated code.
There will be more code added here that keeping TES, TCS, and GS compaction
code unified would be a mess.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32424>
---
 src/compiler/nir/nir_opt_varyings.c | 114 +++++++++++++++++-----------
 1 file changed, 68 insertions(+), 46 deletions(-)

diff --git a/src/compiler/nir/nir_opt_varyings.c b/src/compiler/nir/nir_opt_varyings.c
index 370a80176c0..9c44e90959f 100644
--- a/src/compiler/nir/nir_opt_varyings.c
+++ b/src/compiler/nir/nir_opt_varyings.c
@@ -4553,6 +4553,26 @@ vs_tcs_tes_gs_assign_slots(struct linkage_info *linkage,
    }
 }
 
+static void
+vs_tcs_tes_gs_assign_slots_2sets(struct linkage_info *linkage,
+                                 BITSET_WORD *input32_mask,
+                                 BITSET_WORD *input16_mask,
+                                 unsigned *slot_index,
+                                 unsigned *patch_slot_index,
+                                 nir_opt_varyings_progress *progress)
+{
+   /* Compact 32-bit inputs, followed by 16-bit inputs allowing them to
+    * share vec4 slots with 32-bit inputs.
+    */
+   vs_tcs_tes_gs_assign_slots(linkage, input32_mask, slot_index,
+                              patch_slot_index, 2, progress);
+   vs_tcs_tes_gs_assign_slots(linkage, input16_mask, slot_index,
+                              patch_slot_index, 1, progress);
+
+   assert(*slot_index <= VARYING_SLOT_MAX * 8);
+   assert(!patch_slot_index || *patch_slot_index <= VARYING_SLOT_TESS_MAX * 8);
+}
+
 /**
  * Compaction means scalarizing and then packing scalar components into full
  * vec4s, so that we minimize the number of unused components in vec4 slots.
@@ -4734,58 +4754,60 @@ compact_varyings(struct linkage_info *linkage,
                          NUM_SCALAR_SLOTS, false, true, color_channel_rotate,
                          progress);
       }
-   } else {
-      /* The consumer is a TCS, TES, or GS.
-       *
-       * "use_pos" says whether the driver prefers that compaction with non-FS
-       * consumers puts varyings into POS first before using any VARn.
+      return;
+   }
+
+   /* If we get here, the consumer can only be TCS, TES, or GS.
+    *
+    * "use_pos" says whether the driver prefers that compaction with non-FS
+    * consumers puts varyings into POS first before using any VARn.
+    */
+   bool use_pos = !(linkage->producer_builder.shader->options->io_options &
+                    nir_io_dont_use_pos_for_non_fs_varyings);
+   unsigned slot_index = (use_pos ? VARYING_SLOT_POS
+                                  : VARYING_SLOT_VAR0) * 8;
+
+   if (linkage->consumer_stage == MESA_SHADER_TESS_CTRL) {
+      /* Make tcs_cross_invoc*_mask bits disjoint with flat*_mask bits
+       * because tcs_cross_invoc*_mask is initially a subset of flat*_mask,
+       * but we must assign each scalar slot only once.
        */
-      bool use_pos = !(linkage->producer_builder.shader->options->io_options &
-                       nir_io_dont_use_pos_for_non_fs_varyings);
-      unsigned slot_index = (use_pos ? VARYING_SLOT_POS
-                                     : VARYING_SLOT_VAR0) * 8;
+      BITSET_ANDNOT(linkage->flat32_mask, linkage->flat32_mask,
+                    linkage->tcs_cross_invoc32_mask);
+      BITSET_ANDNOT(linkage->flat16_mask, linkage->flat16_mask,
+                    linkage->tcs_cross_invoc16_mask);
+
+      /* Put cross-invocation-accessed TCS inputs first. */
+      vs_tcs_tes_gs_assign_slots_2sets(linkage, linkage->tcs_cross_invoc32_mask,
+                                       linkage->tcs_cross_invoc16_mask,
+                                       &slot_index, NULL, progress);
+      /* Remaining TCS inputs. */
+      vs_tcs_tes_gs_assign_slots_2sets(linkage, linkage->flat32_mask,
+                                       linkage->flat16_mask, &slot_index,
+                                       NULL,  progress);
+      return;
+   }
+
+   if (linkage->consumer_stage == MESA_SHADER_TESS_EVAL) {
       unsigned patch_slot_index = VARYING_SLOT_PATCH0 * 8;
 
-      if (linkage->consumer_stage == MESA_SHADER_TESS_CTRL) {
-         /* Make tcs_cross_invoc*_mask bits disjoint with flat*_mask bits
-          * because tcs_cross_invoc*_mask is initially a subset of flat*_mask,
-          * but we must assign each scalar slot only once.
-          */
-         BITSET_ANDNOT(linkage->flat32_mask, linkage->flat32_mask,
-                       linkage->tcs_cross_invoc32_mask);
-         BITSET_ANDNOT(linkage->flat16_mask, linkage->flat16_mask,
-                       linkage->tcs_cross_invoc16_mask);
+      vs_tcs_tes_gs_assign_slots_2sets(linkage, linkage->flat32_mask,
+                                       linkage->flat16_mask, &slot_index,
+                                       &patch_slot_index, progress);
 
-         /* Compact 32-bit inputs and 16-bit inputs separately. */
-         vs_tcs_tes_gs_assign_slots(linkage, linkage->tcs_cross_invoc32_mask,
-                                    &slot_index, &patch_slot_index, 2, progress);
-         vs_tcs_tes_gs_assign_slots(linkage, linkage->tcs_cross_invoc16_mask,
-                                    &slot_index, &patch_slot_index, 1, progress);
-      }
-
-      /* Compact 32-bit inputs. */
-      vs_tcs_tes_gs_assign_slots(linkage, linkage->flat32_mask, &slot_index,
-                                 &patch_slot_index, 2, progress);
-
-      /* Compact 16-bit inputs, allowing them to share vec4 slots with 32-bit
-       * inputs.
+      /* Put no-varying slots last. These are TCS outputs read by TCS but
+       * not TES.
        */
-      vs_tcs_tes_gs_assign_slots(linkage, linkage->flat16_mask, &slot_index,
-                                 &patch_slot_index, 1, progress);
-
-      if (linkage->producer_stage == MESA_SHADER_TESS_CTRL) {
-         /* Put no-varying slots last. These are TCS outputs read by TCS but
-          * not TES.
-          */
-         vs_tcs_tes_gs_assign_slots(linkage, linkage->no_varying32_mask,
-                                    &slot_index, &patch_slot_index, 2, progress);
-         vs_tcs_tes_gs_assign_slots(linkage, linkage->no_varying16_mask,
-                                    &slot_index, &patch_slot_index, 1, progress);
-      }
-
-      assert(slot_index <= VARYING_SLOT_MAX * 8);
-      assert(patch_slot_index <= VARYING_SLOT_TESS_MAX * 8);
+      vs_tcs_tes_gs_assign_slots_2sets(linkage, linkage->no_varying32_mask,
+                                       linkage->no_varying16_mask, &slot_index,
+                                       &patch_slot_index, progress);
+      return;
    }
+
+   assert(linkage->consumer_stage == MESA_SHADER_GEOMETRY);
+   vs_tcs_tes_gs_assign_slots_2sets(linkage, linkage->flat32_mask,
+                                    linkage->flat16_mask, &slot_index,
+                                    NULL, progress);
 }
 
 /******************************************************************