intel/compiler: Don't use fs_visitor::bld in remaining places

The remaining users can simply create a new builder at_end() if needed. In many places a new builder object is already being constructed, so just give more specific instructions. Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26323>
2023-11-21 09:47:18 -08:00
parent c73c1aa496
commit 5b8ec015f2
5 changed files with 29 additions and 23 deletions
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -1231,7 +1231,7 @@ fs_visitor::emit_gs_thread_end()
      emit_gs_control_data_bits(this->final_gs_vertex_count);
   }

-   const fs_builder abld = bld.annotate("thread end");
+   const fs_builder abld = fs_builder(this, dispatch_width).at_end().annotate("thread end");
   fs_inst *inst;

   if (gs_prog_data->static_vertex_count != -1) {
@@ -1285,7 +1285,7 @@ fs_visitor::assign_curb_setup()
      assert(uniform_push_length <= reg_unit(devinfo));
   } else if (is_compute && devinfo->verx10 >= 125) {
      assert(devinfo->has_lsc);
-      fs_builder ubld = bld.exec_all().group(1, 0).at(
+      fs_builder ubld = fs_builder(this, 1).exec_all().at(
         cfg->first_block(), cfg->first_block()->start());

      /* The base offset for our push data is passed in as R0.0[31:6]. We have
@@ -1382,7 +1382,7 @@ fs_visitor::assign_curb_setup()

   uint64_t want_zero = used & stage_prog_data->zero_push_reg;
   if (want_zero) {
-      fs_builder ubld = bld.exec_all().group(8, 0).at(
+      fs_builder ubld = fs_builder(this, 8).exec_all().at(
         cfg->first_block(), cfg->first_block()->start());

      /* push_reg_mask_param is in 32-bit units */
@@ -3307,6 +3307,7 @@ fs_visitor::emit_repclear_shader()
              BRW_VERTICAL_STRIDE_8, BRW_WIDTH_2, BRW_HORIZONTAL_STRIDE_4,
              BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);

+   const fs_builder bld = fs_builder(this, dispatch_width).at_end();
   bld.exec_all().group(4, 0).MOV(color_output, color_input);

   if (key->nr_color_regions > 1) {
@@ -5310,6 +5311,8 @@ fs_visitor::lower_simd_width()
          * we're sure that both cases can be handled.
          */
         const unsigned max_width = MAX2(inst->exec_size, lower_width);
+
+         const fs_builder bld = fs_builder(this, dispatch_width).at_end();
         const fs_builder ibld = bld.at(block, inst)
                                    .exec_all(inst->force_writemask_all)
                                    .group(max_width, inst->group / max_width);
@@ -5584,7 +5587,7 @@ fs_visitor::lower_find_live_channel()
      if (!inst->is_partial_write())
         ibld.emit_undef_for_dst(inst);

-      const fs_builder ubld = bld.at(block, inst).exec_all().group(1, 0);
+      const fs_builder ubld = fs_builder(this, block, inst).exec_all().group(1, 0);

      /* ce0 doesn't consider the thread dispatch mask (DMask or VMask),
       * so combine the execution and dispatch masks to obtain the true mask.
@@ -5946,19 +5949,6 @@ fs_visitor::optimize()
   /* Start by validating the shader we currently have. */
   validate();

-   /* bld is the common builder object pointing at the end of the program we
-    * used to translate it into i965 IR.  For the optimization and lowering
-    * passes coming next, any code added after the end of the program without
-    * having explicitly called fs_builder::at() clearly points at a mistake.
-    * Ideally optimization passes wouldn't be part of the visitor so they
-    * wouldn't have access to bld at all, but they do, so just in case some
-    * pass forgets to ask for a location explicitly set it to NULL here to
-    * make it trip.  The dispatch width is initialized to a bogus value to
-    * make sure that optimizations set the execution controls explicitly to
-    * match the code they are manipulating instead of relying on the defaults.
-    */
-   bld = fs_builder(this, 64);
-
   bool progress = false;
   int iteration = 0;
   int pass_num = 0;
@@ -6138,7 +6128,7 @@ fs_visitor::fixup_sends_duplicate_payload()
         /* Sadly, we've lost all notion of channels and bit sizes at this
          * point.  Just WE_all it.
          */
-         const fs_builder ibld = bld.at(block, inst).exec_all().group(16, 0);
+         const fs_builder ibld = fs_builder(this, block, inst).exec_all().group(16, 0);
         fs_reg copy_src = retype(inst->src[3], BRW_REGISTER_TYPE_UD);
         fs_reg copy_dst = tmp;
         for (unsigned i = 0; i < inst->ex_mlen; i += 2) {
@@ -6242,8 +6232,8 @@ fs_visitor::emit_dummy_mov_instruction()

   /* Insert dummy mov as first instruction. */
   const fs_builder ubld =
-      bld.at(cfg->first_block(), first_inst).exec_all().group(8, 0);
-   ubld.MOV(bld.null_reg_ud(), brw_imm_ud(0u));
+      fs_builder(this, cfg->first_block(), (fs_inst *)first_inst).exec_all().group(8, 0);
+   ubld.MOV(ubld.null_reg_ud(), brw_imm_ud(0u));

   invalidate_analysis(DEPENDENCY_INSTRUCTIONS | DEPENDENCY_VARIABLES);
 }
@@ -6700,6 +6690,7 @@ fs_visitor::set_tcs_invocation_id()
 {
   struct brw_tcs_prog_data *tcs_prog_data = brw_tcs_prog_data(prog_data);
   struct brw_vue_prog_data *vue_prog_data = &tcs_prog_data->base;
+   const fs_builder bld = fs_builder(this, dispatch_width).at_end();

   const unsigned instance_id_mask =
      (devinfo->verx10 >= 125) ? INTEL_MASK(7, 0) :
@@ -6751,6 +6742,8 @@ fs_visitor::emit_tcs_thread_end()
   if (devinfo->ver != 8 && mark_last_urb_write_with_eot())
      return;

+   const fs_builder bld = fs_builder(this, dispatch_width).at_end();
+
   /* Emit a URB write to end the thread.  On Broadwell, we use this to write
    * zero to the "TR DS Cache Disable" bit (we haven't implemented a fancy
    * algorithm to set it optimally).  On other platforms, we simply write
@@ -6772,6 +6765,7 @@ fs_visitor::run_tcs()
   assert(stage == MESA_SHADER_TESS_CTRL);

   struct brw_vue_prog_data *vue_prog_data = brw_vue_prog_data(prog_data);
+   const fs_builder bld = fs_builder(this, dispatch_width).at_end();

   assert(vue_prog_data->dispatch_mode == DISPATCH_MODE_TCS_SINGLE_PATCH ||
          vue_prog_data->dispatch_mode == DISPATCH_MODE_TCS_MULTI_PATCH);
@@ -6871,6 +6865,7 @@ fs_visitor::run_gs()
       * Otherwise, we need to initialize it to 0 here.
       */
      if (gs_compile->control_data_header_size_bits <= 32) {
+         const fs_builder bld = fs_builder(this, dispatch_width).at_end();
         const fs_builder abld = bld.annotate("initialize control data bits");
         abld.MOV(this->control_data_bits, brw_imm_ud(0u));
      }
@@ -6933,6 +6928,7 @@ fs_visitor::run_fs(bool allow_spilling, bool do_rep_send)
 {
   struct brw_wm_prog_data *wm_prog_data = brw_wm_prog_data(this->prog_data);
   brw_wm_prog_key *wm_key = (brw_wm_prog_key *) this->key;
+   const fs_builder bld = fs_builder(this, dispatch_width).at_end();

   assert(stage == MESA_SHADER_FRAGMENT);

@@ -7008,6 +7004,7 @@ fs_visitor::run_cs(bool allow_spilling)
 {
   assert(gl_shader_stage_is_compute(stage));
   assert(devinfo->ver >= 7);
+   const fs_builder bld = fs_builder(this, dispatch_width).at_end();

   payload_ = new cs_thread_payload(*this);

--- a/src/intel/compiler/brw_fs_combine_constants.cpp
+++ b/src/intel/compiler/brw_fs_combine_constants.cpp
@@ -1660,7 +1660,7 @@ fs_visitor::opt_combine_constants()
       * both HF slots within a DWord with the constant.
       */
      const uint32_t width = devinfo->ver == 8 && imm->is_half_float ? 2 : 1;
-      const fs_builder ibld = bld.at(insert_block, n).exec_all().group(width, 0);
+      const fs_builder ibld = fs_builder(this, width).at(insert_block, n).exec_all();

      fs_reg reg(VGRF, imm->nr);
      reg.offset = imm->subreg_offset;
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -2218,6 +2218,7 @@ fs_visitor::emit_gs_control_data_bits(const fs_reg &vertex_count)

   struct brw_gs_prog_data *gs_prog_data = brw_gs_prog_data(prog_data);

+   const fs_builder bld = fs_builder(this, dispatch_width).at_end();
   const fs_builder abld = bld.annotate("emit control data bits");
   const fs_builder fwa_bld = bld.exec_all();

--- a/src/intel/compiler/brw_fs_visitor.cpp
+++ b/src/intel/compiler/brw_fs_visitor.cpp
@@ -94,7 +94,7 @@ fs_visitor::emit_interpolation_setup_gfx4()
 {
   struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW);

-   fs_builder abld = bld.annotate("compute pixel centers");
+   fs_builder abld = fs_builder(this, dispatch_width).at_end().annotate("compute pixel centers");
   this->pixel_x = vgrf(glsl_type::uint_type);
   this->pixel_y = vgrf(glsl_type::uint_type);
   this->pixel_x.type = BRW_REGISTER_TYPE_UW;
@@ -106,6 +106,7 @@ fs_visitor::emit_interpolation_setup_gfx4()
            fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)),
            fs_reg(brw_imm_v(0x11001100)));

+   const fs_builder bld = fs_builder(this, dispatch_width).at_end();
   abld = bld.annotate("compute pixel deltas from v0");

   this->delta_xy[BRW_BARYCENTRIC_PERSPECTIVE_PIXEL] =
@@ -151,6 +152,7 @@ fs_visitor::emit_interpolation_setup_gfx4()
 void
 fs_visitor::emit_interpolation_setup_gfx6()
 {
+   const fs_builder bld = fs_builder(this, dispatch_width).at_end();
   fs_builder abld = bld.annotate("compute pixel centers");

   this->pixel_x = vgrf(glsl_type::float_type);
@@ -603,6 +605,7 @@ fs_visitor::emit_alpha_test()
 {
   assert(stage == MESA_SHADER_FRAGMENT);
   brw_wm_prog_key *key = (brw_wm_prog_key*) this->key;
+   const fs_builder bld = fs_builder(this, dispatch_width).at_end();
   const fs_builder abld = bld.annotate("Alpha test");

   fs_inst *cmp;
@@ -676,6 +679,7 @@ fs_visitor::emit_single_fb_write(const fs_builder &bld,
 void
 fs_visitor::do_emit_fb_writes(int nr_color_regions, bool replicate_alpha)
 {
+   const fs_builder bld = fs_builder(this, dispatch_width).at_end();
   fs_inst *inst = NULL;

   for (int target = 0; target < nr_color_regions; target++) {
@@ -810,6 +814,8 @@ fs_visitor::emit_urb_writes(const fs_reg &gs_vertex_count)
      unreachable("invalid stage");
   }

+   const fs_builder bld = fs_builder(this, dispatch_width).at_end();
+
   fs_reg per_slot_offsets;

   if (stage == MESA_SHADER_GEOMETRY) {
@@ -1084,6 +1090,7 @@ fs_visitor::emit_urb_writes(const fs_reg &gs_vertex_count)
 void
 fs_visitor::emit_urb_fence()
 {
+   const fs_builder bld = fs_builder(this, dispatch_width).at_end();
   fs_reg dst = bld.vgrf(BRW_REGISTER_TYPE_UD);
   fs_inst *fence = bld.emit(SHADER_OPCODE_MEMORY_FENCE, dst,
                             brw_vec8_grf(0, 0),
@@ -1103,6 +1110,7 @@ void
 fs_visitor::emit_cs_terminate()
 {
   assert(devinfo->ver >= 7);
+   const fs_builder bld = fs_builder(this, dispatch_width).at_end();

   /* We can't directly send from g0, since sends with EOT have to use
    * g112-127. So, copy it to a virtual register, The register allocator will
--- a/src/intel/compiler/brw_lower_logical_sends.cpp
+++ b/src/intel/compiler/brw_lower_logical_sends.cpp
@@ -3304,7 +3304,7 @@ fs_visitor::lower_uniform_pull_constant_loads()
         invalidate_analysis(DEPENDENCY_INSTRUCTIONS | DEPENDENCY_VARIABLES);
      } else if (devinfo->ver >= 7) {
         const fs_builder ubld = fs_builder(this, block, inst).exec_all();
-         fs_reg header = bld.exec_all().group(8, 0).vgrf(BRW_REGISTER_TYPE_UD);
+         fs_reg header = fs_builder(this, 8).exec_all().vgrf(BRW_REGISTER_TYPE_UD);

         ubld.group(8, 0).MOV(header,
                              retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));