intel/fs/xe2+: Add ALU-based implementation of barycentric interpolation at a per-channel sample.

This implements a replacement for the previous implementation of nir_intrinsic_load_barycentric_at_sample that relied on the Pixel Interpolator shared function, since it's going to be removed from the hardware from Xe2 onwards. This implementation simply looks up the X/Y offsets of each sample index on the table provided in the PS thread payload by using indirect addressing, then does the actual interpolation by recursing into emit_pixel_interpolater_alu_at_offset() introduced in the previous commit. Note that even though this is only immediately useful on Xe2+ platforms there's no reason why it shouldn't work on earlier platforms, as long as we have the sample X/Y offsets available in the thread payload. Reviewed-by: Caio Oliveira <caio.oliveira@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29847>
2024-06-20 18:51:06 -07:00
parent 95eec5a0dd
commit 79fa3eba11
1 changed files with 92 additions and 27 deletions
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -2161,6 +2161,63 @@ emit_pixel_interpolater_alu_at_offset(const fs_builder &bld,
   }
 }

+/**
+ * Interpolate per-polygon barycentrics at a specified sample index,
+ * optionally using perspective-correct interpolation if requested.
+ * This is mostly useful as replacement for the PI shared function
+ * that existed on platforms prior to Xe2, but is expected to work on
+ * earlier platforms since we can get the required polygon setup
+ * information from the thread payload as far back as ICL.
+ */
+static void
+emit_pixel_interpolater_alu_at_sample(const fs_builder &bld,
+                                      const fs_reg &dst,
+                                      const fs_reg &idx,
+                                      glsl_interp_mode interpolation)
+{
+   const fs_thread_payload &payload = bld.shader->fs_payload();
+   const struct brw_wm_prog_data *wm_prog_data =
+      brw_wm_prog_data(bld.shader->prog_data);
+   const fs_builder ubld = bld.exec_all().group(16, 0);
+   const fs_reg sample_offs_xy = ubld.vgrf(BRW_TYPE_UD);
+   assert(wm_prog_data->uses_sample_offsets);
+
+   /* Interleave the X/Y coordinates of each sample in order to allow
+    * a single indirect look-up, by using a MOV for the 16 X
+    * coordinates, then another MOV for the 16 Y coordinates.
+    */
+   for (unsigned i = 0; i < 2; i++) {
+      const fs_reg reg = retype(brw_vec16_grf(payload.sample_offsets_reg, 4 * i),
+                                BRW_TYPE_UB);
+      ubld.MOV(subscript(sample_offs_xy, BRW_TYPE_UW, i), reg);
+   }
+
+   /* Use indirect addressing to fetch the X/Y offsets of the sample
+    * index provided for each channel.
+    */
+   const fs_reg idx_b = bld.vgrf(BRW_TYPE_UD);
+   bld.MUL(idx_b, idx, brw_imm_ud(brw_type_size_bytes(BRW_TYPE_UD)));
+
+   const fs_reg off_xy = bld.vgrf(BRW_TYPE_UD);
+   bld.emit(SHADER_OPCODE_MOV_INDIRECT, off_xy, component(sample_offs_xy, 0),
+            idx_b, brw_imm_ud(16 * brw_type_size_bytes(BRW_TYPE_UD)));
+
+   /* Convert the selected fixed-point offsets to floating-point
+    * offsets.
+    */
+   const fs_reg offs = bld.vgrf(BRW_TYPE_F, 2);
+
+   for (unsigned i = 0; i < 2; i++) {
+      const fs_reg tmp = bld.vgrf(BRW_TYPE_F);
+      bld.MOV(tmp, subscript(off_xy, BRW_TYPE_UW, i));
+      bld.MUL(tmp, tmp, brw_imm_f(0.0625));
+      bld.ADD(offset(offs, bld, i), tmp, brw_imm_f(-0.5));
+   }
+
+   /* Interpolate at the resulting offsets. */
+   emit_pixel_interpolater_alu_at_offset(bld, dst, offs, interpolation);
+}
+
 /**
 * Computes 1 << x, given a D/UD register containing some value x.
 */
@@ -4233,35 +4290,43 @@ fs_nir_emit_fs_intrinsic(nir_to_brw_state &ntb,
      const glsl_interp_mode interpolation =
         (enum glsl_interp_mode) nir_intrinsic_interp_mode(instr);

-      fs_reg msg_data;
-      if (nir_src_is_const(instr->src[0])) {
-         msg_data = brw_imm_ud(nir_src_as_uint(instr->src[0]) << 4);
+      if (devinfo->ver >= 20) {
+         emit_pixel_interpolater_alu_at_sample(
+            bld, dest, retype(get_nir_src(ntb, instr->src[0]),
+                              BRW_TYPE_UD),
+            interpolation);
+
      } else {
-         const fs_reg sample_src = retype(get_nir_src(ntb, instr->src[0]),
-                                          BRW_TYPE_UD);
-         const fs_reg sample_id = bld.emit_uniformize(sample_src);
-         msg_data = component(bld.group(8, 0).vgrf(BRW_TYPE_UD), 0);
-         bld.exec_all().group(1, 0).SHL(msg_data, sample_id, brw_imm_ud(4u));
+         fs_reg msg_data;
+         if (nir_src_is_const(instr->src[0])) {
+            msg_data = brw_imm_ud(nir_src_as_uint(instr->src[0]) << 4);
+         } else {
+            const fs_reg sample_src = retype(get_nir_src(ntb, instr->src[0]),
+                                             BRW_TYPE_UD);
+            const fs_reg sample_id = bld.emit_uniformize(sample_src);
+            msg_data = component(bld.group(8, 0).vgrf(BRW_TYPE_UD), 0);
+            bld.exec_all().group(1, 0).SHL(msg_data, sample_id, brw_imm_ud(4u));
+         }
+
+         fs_reg flag_reg;
+         struct brw_wm_prog_key *wm_prog_key = (struct brw_wm_prog_key *) s.key;
+         if (wm_prog_key->multisample_fbo == BRW_SOMETIMES) {
+            struct brw_wm_prog_data *wm_prog_data = brw_wm_prog_data(s.prog_data);
+
+            check_dynamic_msaa_flag(bld.exec_all().group(8, 0),
+                                    wm_prog_data,
+                                    INTEL_MSAA_FLAG_MULTISAMPLE_FBO);
+            flag_reg = brw_flag_reg(0, 0);
+         }
+
+         emit_pixel_interpolater_send(bld,
+                                      FS_OPCODE_INTERPOLATE_AT_SAMPLE,
+                                      dest,
+                                      fs_reg(), /* src */
+                                      msg_data,
+                                      flag_reg,
+                                      interpolation);
      }
-
-      fs_reg flag_reg;
-      struct brw_wm_prog_key *wm_prog_key = (struct brw_wm_prog_key *) s.key;
-      if (wm_prog_key->multisample_fbo == BRW_SOMETIMES) {
-         struct brw_wm_prog_data *wm_prog_data = brw_wm_prog_data(s.prog_data);
-
-         check_dynamic_msaa_flag(bld.exec_all().group(8, 0),
-                                 wm_prog_data,
-                                 INTEL_MSAA_FLAG_MULTISAMPLE_FBO);
-         flag_reg = brw_flag_reg(0, 0);
-      }
-
-      emit_pixel_interpolater_send(bld,
-                                   FS_OPCODE_INTERPOLATE_AT_SAMPLE,
-                                   dest,
-                                   fs_reg(), /* src */
-                                   msg_data,
-                                   flag_reg,
-                                   interpolation);
      break;
   }