intel/brw: Reduce scope of some TCS specific functions

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30169>
2024-07-12 16:13:14 -07:00
parent 47b9dc9070
commit f9ddf51b70
3 changed files with 87 additions and 90 deletions
--- a/src/intel/compiler/brw_compile_tcs.cpp
+++ b/src/intel/compiler/brw_compile_tcs.cpp
@@ -42,6 +42,90 @@ get_patch_count_threshold(int input_control_points)
   return 1;
 }

+static void
+brw_set_tcs_invocation_id(fs_visitor &s)
+{
+   const struct intel_device_info *devinfo = s.devinfo;
+   struct brw_tcs_prog_data *tcs_prog_data = brw_tcs_prog_data(s.prog_data);
+   struct brw_vue_prog_data *vue_prog_data = &tcs_prog_data->base;
+   const fs_builder bld = fs_builder(&s).at_end();
+
+   const unsigned instance_id_mask =
+      (devinfo->verx10 >= 125) ? INTEL_MASK(7, 0) :
+      (devinfo->ver >= 11)     ? INTEL_MASK(22, 16) :
+                                 INTEL_MASK(23, 17);
+   const unsigned instance_id_shift =
+      (devinfo->verx10 >= 125) ? 0 : (devinfo->ver >= 11) ? 16 : 17;
+
+   /* Get instance number from g0.2 bits:
+    *  * 7:0 on DG2+
+    *  * 22:16 on gfx11+
+    *  * 23:17 otherwise
+    */
+   brw_reg t =
+      bld.AND(brw_reg(retype(brw_vec1_grf(0, 2), BRW_TYPE_UD)),
+              brw_imm_ud(instance_id_mask));
+
+   if (vue_prog_data->dispatch_mode == INTEL_DISPATCH_MODE_TCS_MULTI_PATCH) {
+      /* gl_InvocationID is just the thread number */
+      s.invocation_id = bld.SHR(t, brw_imm_ud(instance_id_shift));
+      return;
+   }
+
+   assert(vue_prog_data->dispatch_mode == INTEL_DISPATCH_MODE_TCS_SINGLE_PATCH);
+
+   brw_reg channels_uw = bld.vgrf(BRW_TYPE_UW);
+   brw_reg channels_ud = bld.vgrf(BRW_TYPE_UD);
+   bld.MOV(channels_uw, brw_reg(brw_imm_uv(0x76543210)));
+   bld.MOV(channels_ud, channels_uw);
+
+   if (tcs_prog_data->instances == 1) {
+      s.invocation_id = channels_ud;
+   } else {
+      /* instance_id = 8 * t + <76543210> */
+      s.invocation_id =
+         bld.ADD(bld.SHR(t, brw_imm_ud(instance_id_shift - 3)), channels_ud);
+   }
+}
+
+static void
+brw_emit_tcs_thread_end(fs_visitor &s)
+{
+   /* Try and tag the last URB write with EOT instead of emitting a whole
+    * separate write just to finish the thread.  There isn't guaranteed to
+    * be one, so this may not succeed.
+    */
+   if (s.mark_last_urb_write_with_eot())
+      return;
+
+   const fs_builder bld = fs_builder(&s).at_end();
+
+   /* Emit a URB write to end the thread.  On Broadwell, we use this to write
+    * zero to the "TR DS Cache Disable" bit (we haven't implemented a fancy
+    * algorithm to set it optimally).  On other platforms, we simply write
+    * zero to a reserved/MBZ patch header DWord which has no consequence.
+    */
+   brw_reg srcs[URB_LOGICAL_NUM_SRCS];
+   srcs[URB_LOGICAL_SRC_HANDLE] = s.tcs_payload().patch_urb_output;
+   srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = brw_imm_ud(WRITEMASK_X << 16);
+   srcs[URB_LOGICAL_SRC_DATA] = brw_imm_ud(0);
+   srcs[URB_LOGICAL_SRC_COMPONENTS] = brw_imm_ud(1);
+   fs_inst *inst = bld.emit(SHADER_OPCODE_URB_WRITE_LOGICAL,
+                            reg_undef, srcs, ARRAY_SIZE(srcs));
+   inst->eot = true;
+}
+
+static void
+brw_assign_tcs_urb_setup(fs_visitor &s)
+{
+   assert(s.stage == MESA_SHADER_TESS_CTRL);
+
+   /* Rewrite all ATTR file references to HW_REGs. */
+   foreach_block_and_inst(block, fs_inst, inst, s.cfg) {
+      s.convert_attr_sources_to_hw_regs(inst);
+   }
+}
+
 static bool
 run_tcs(fs_visitor &s)
 {
@@ -56,7 +140,7 @@ run_tcs(fs_visitor &s)
   s.payload_ = new tcs_thread_payload(s);

   /* Initialize gl_InvocationID */
-   s.set_tcs_invocation_id();
+   brw_set_tcs_invocation_id(s);

   const bool fix_dispatch_mask =
      vue_prog_data->dispatch_mode == INTEL_DISPATCH_MODE_TCS_SINGLE_PATCH &&
@@ -75,7 +159,7 @@ run_tcs(fs_visitor &s)
      bld.emit(BRW_OPCODE_ENDIF);
   }

-   s.emit_tcs_thread_end();
+   brw_emit_tcs_thread_end(s);

   if (s.failed)
      return false;
@@ -85,7 +169,7 @@ run_tcs(fs_visitor &s)
   brw_fs_optimize(s);

   s.assign_curb_setup();
-   s.assign_tcs_urb_setup();
+   brw_assign_tcs_urb_setup(s);

   brw_fs_lower_3src_null_dest(s);
   brw_fs_workaround_memory_fence_before_eot(s);
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -1357,17 +1357,6 @@ fs_visitor::convert_attr_sources_to_hw_regs(fs_inst *inst)
   }
 }

-void
-fs_visitor::assign_tcs_urb_setup()
-{
-   assert(stage == MESA_SHADER_TESS_CTRL);
-
-   /* Rewrite all ATTR file references to HW_REGs. */
-   foreach_block_and_inst(block, fs_inst, inst, cfg) {
-      convert_attr_sources_to_hw_regs(inst);
-   }
-}
-
 void
 fs_visitor::assign_tes_urb_setup()
 {
@@ -2394,78 +2383,6 @@ fs_visitor::allocate_registers(bool allow_spilling)
   brw_fs_lower_scoreboard(*this);
 }

-void
-fs_visitor::set_tcs_invocation_id()
-{
-   struct brw_tcs_prog_data *tcs_prog_data = brw_tcs_prog_data(prog_data);
-   struct brw_vue_prog_data *vue_prog_data = &tcs_prog_data->base;
-   const fs_builder bld = fs_builder(this).at_end();
-
-   const unsigned instance_id_mask =
-      (devinfo->verx10 >= 125) ? INTEL_MASK(7, 0) :
-      (devinfo->ver >= 11)     ? INTEL_MASK(22, 16) :
-                                 INTEL_MASK(23, 17);
-   const unsigned instance_id_shift =
-      (devinfo->verx10 >= 125) ? 0 : (devinfo->ver >= 11) ? 16 : 17;
-
-   /* Get instance number from g0.2 bits:
-    *  * 7:0 on DG2+
-    *  * 22:16 on gfx11+
-    *  * 23:17 otherwise
-    */
-   brw_reg t =
-      bld.AND(brw_reg(retype(brw_vec1_grf(0, 2), BRW_TYPE_UD)),
-              brw_imm_ud(instance_id_mask));
-
-   if (vue_prog_data->dispatch_mode == INTEL_DISPATCH_MODE_TCS_MULTI_PATCH) {
-      /* gl_InvocationID is just the thread number */
-      invocation_id = bld.SHR(t, brw_imm_ud(instance_id_shift));
-      return;
-   }
-
-   assert(vue_prog_data->dispatch_mode == INTEL_DISPATCH_MODE_TCS_SINGLE_PATCH);
-
-   brw_reg channels_uw = bld.vgrf(BRW_TYPE_UW);
-   brw_reg channels_ud = bld.vgrf(BRW_TYPE_UD);
-   bld.MOV(channels_uw, brw_reg(brw_imm_uv(0x76543210)));
-   bld.MOV(channels_ud, channels_uw);
-
-   if (tcs_prog_data->instances == 1) {
-      invocation_id = channels_ud;
-   } else {
-      /* instance_id = 8 * t + <76543210> */
-      invocation_id =
-         bld.ADD(bld.SHR(t, brw_imm_ud(instance_id_shift - 3)), channels_ud);
-   }
-}
-
-void
-fs_visitor::emit_tcs_thread_end()
-{
-   /* Try and tag the last URB write with EOT instead of emitting a whole
-    * separate write just to finish the thread.  There isn't guaranteed to
-    * be one, so this may not succeed.
-    */
-   if (mark_last_urb_write_with_eot())
-      return;
-
-   const fs_builder bld = fs_builder(this).at_end();
-
-   /* Emit a URB write to end the thread.  On Broadwell, we use this to write
-    * zero to the "TR DS Cache Disable" bit (we haven't implemented a fancy
-    * algorithm to set it optimally).  On other platforms, we simply write
-    * zero to a reserved/MBZ patch header DWord which has no consequence.
-    */
-   brw_reg srcs[URB_LOGICAL_NUM_SRCS];
-   srcs[URB_LOGICAL_SRC_HANDLE] = tcs_payload().patch_urb_output;
-   srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = brw_imm_ud(WRITEMASK_X << 16);
-   srcs[URB_LOGICAL_SRC_DATA] = brw_imm_ud(0);
-   srcs[URB_LOGICAL_SRC_COMPONENTS] = brw_imm_ud(1);
-   fs_inst *inst = bld.emit(SHADER_OPCODE_URB_WRITE_LOGICAL,
-                            reg_undef, srcs, ARRAY_SIZE(srcs));
-   inst->eot = true;
-}
-
 /**
 * Move load_interpolated_input with simple (payload-based) barycentric modes
 * to the top of the program so we don't emit multiple PLNs for the same input.
--- a/src/intel/compiler/brw_fs.h
+++ b/src/intel/compiler/brw_fs.h
@@ -305,7 +305,6 @@ public:
   uint32_t compute_max_register_pressure();
   void assign_curb_setup();
   void convert_attr_sources_to_hw_regs(fs_inst *inst);
-   void assign_tcs_urb_setup();
   void assign_tes_urb_setup();
   bool assign_regs(bool allow_spilling, bool spill_all);
   void assign_regs_trivial();
@@ -325,14 +324,11 @@ public:
   void fail(const char *msg, ...);
   void limit_dispatch_width(unsigned n, const char *msg);

-   void set_tcs_invocation_id();
-
   void emit_urb_writes(const brw_reg &gs_vertex_count = brw_reg());
   void emit_gs_control_data_bits(const brw_reg &vertex_count);
   brw_reg gs_urb_channel_mask(const brw_reg &dword_index);
   brw_reg gs_urb_per_slot_dword_index(const brw_reg &vertex_count);
   bool mark_last_urb_write_with_eot();
-   void emit_tcs_thread_end();
   void emit_urb_fence();
   void emit_cs_terminate();