agx: Implement nir_texop_txf_ms

Mutlisampled texture fetch (txf_ms) is encoded like regular txf. However, we now need to pack the multisample index in the right place, which we do by extending our existing NIR texture source lowering pass. 2D MS arrays use a new value of dim which requires tweaking the encoding slightly. Otherwise, everything is bog standard. Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19871>
2022-11-18 22:40:39 -05:00
parent 53d013a605
commit db0461a8d0
7 changed files with 71 additions and 27 deletions
--- a/src/asahi/compiler/agx_compile.c
+++ b/src/asahi/compiler/agx_compile.c
@@ -1094,6 +1094,7 @@ agx_lod_mode_for_nir(nir_texop op)
   case nir_texop_txd: return AGX_LOD_MODE_LOD_GRAD;
   case nir_texop_txl: return AGX_LOD_MODE_LOD_MIN;
   case nir_texop_txf: return AGX_LOD_MODE_LOD_MIN;
+   case nir_texop_txf_ms: return AGX_LOD_MODE_AUTO_LOD; /* no mipmapping */
   default: unreachable("Unhandled texture op");
   }
 }
@@ -1108,13 +1109,12 @@ agx_emit_tex(agx_builder *b, nir_tex_instr *instr)
             compare = agx_null(),
             packed_offset = agx_null();

-   bool txf = instr->op == nir_texop_txf;
+   bool txf = (instr->op == nir_texop_txf || instr->op == nir_texop_txf_ms);

   for (unsigned i = 0; i < instr->num_srcs; ++i) {
      agx_index index = agx_src_index(&instr->src[i].src);

      switch (instr->src[i].src_type) {
-      case nir_tex_src_coord:
      case nir_tex_src_backend1:
         coords = index;
         break;
@@ -1175,7 +1175,6 @@ agx_emit_tex(agx_builder *b, nir_tex_instr *instr)
         /* handled above */
         break;

-      case nir_tex_src_ms_index:
      case nir_tex_src_texture_offset:
      case nir_tex_src_sampler_offset:
      default:
@@ -1939,12 +1938,18 @@ agx_preprocess_nir(nir_shader *nir)
   nir_tex_src_type_constraints tex_constraints = {
      [nir_tex_src_lod] = { true, 16 },
      [nir_tex_src_bias] = { true, 16 },
+      [nir_tex_src_ms_index] = { true, 16 },
   };

   NIR_PASS_V(nir, nir_lower_tex, &lower_tex_options);
+   NIR_PASS_V(nir, nir_legalize_16bit_sampler_srcs, tex_constraints);
+
+   /* Lower texture sources after legalizing types (as the lowering depends on
+    * 16-bit multisample indices) but before lowering queries (as the lowering
+    * generates txs for array textures).
+    */
   NIR_PASS_V(nir, agx_nir_lower_array_texture);
   NIR_PASS_V(nir, agx_lower_resinfo);
-   NIR_PASS_V(nir, nir_legalize_16bit_sampler_srcs, tex_constraints);

   nir->info.io_lowered = true;
 }
--- a/src/asahi/compiler/agx_compiler.h
+++ b/src/asahi/compiler/agx_compiler.h
@@ -316,7 +316,7 @@ typedef struct {
   bool invert_cond : 1;

   /* TODO: Handle tex ops more efficient */
-   enum agx_dim dim : 3;
+   enum agx_dim dim : 4;
   bool offset : 1;
   bool shadow : 1;

--- a/src/asahi/compiler/agx_lower_resinfo.c
+++ b/src/asahi/compiler/agx_lower_resinfo.c
@@ -79,7 +79,8 @@ agx_txs(nir_builder *b, nir_tex_instr *tex)
   /* Add LOD offset to first level to get the interesting LOD */
   int lod_idx = nir_tex_instr_src_index(tex, nir_tex_src_lod);
   if (lod_idx >= 0)
-      lod = nir_iadd(b, lod, nir_ssa_for_src(b, tex->src[lod_idx].src, 1));
+      lod = nir_iadd(b, lod, nir_u2u32(b, nir_ssa_for_src(b,
+                     tex->src[lod_idx].src, 1)));

   /* Add 1 to width-1, height-1 to get base dimensions */
   nir_ssa_def *width = nir_iadd_imm(b, width_m1, 1);
--- a/src/asahi/compiler/agx_nir_lower_array_texture.c
+++ b/src/asahi/compiler/agx_nir_lower_array_texture.c
@@ -28,6 +28,19 @@
 #include "compiler/nir/nir_builder.h"
 #include "compiler/nir/nir_builtin_builder.h"

+static nir_ssa_def *
+steal_tex_src(nir_tex_instr *tex, nir_tex_src_type type_)
+{
+   int idx = nir_tex_instr_src_index(tex, type_);
+
+   if (idx < 0)
+      return NULL;
+
+   nir_ssa_def *ssa = tex->src[idx].src.ssa;
+   nir_tex_instr_remove_src(tex, idx);
+   return ssa;
+}
+
 /*
 * NIR indexes into array textures with unclamped floats (integer for txf). AGX
 * requires the index to be a clamped integer. Lower tex_src_coord into
@@ -42,31 +55,54 @@ lower_array_texture(nir_builder *b, nir_instr *instr, UNUSED void *data)
   nir_tex_instr *tex = nir_instr_as_tex(instr);
   b->cursor = nir_before_instr(instr);

-   if (!tex->is_array || nir_tex_instr_is_query(tex))
+   if (nir_tex_instr_is_query(tex))
      return false;

   /* Get the coordinates */
-   int coord_idx = nir_tex_instr_src_index(tex, nir_tex_src_coord);
-   nir_ssa_def *coord = tex->src[coord_idx].src.ssa;
-   unsigned nr = nir_src_num_components(tex->src[coord_idx].src);
+   nir_ssa_def *coord = steal_tex_src(tex, nir_tex_src_coord);
+   nir_ssa_def *ms_idx = steal_tex_src(tex, nir_tex_src_ms_index);

-   /* The layer is always the last component of the NIR coordinate */
-   unsigned lidx = nr - 1;
-   nir_ssa_def *layer = nir_channel(b, coord, lidx);
+   /* The layer is always the last component of the NIR coordinate, split it off
+    * because we'll need to swizzle.
+    */
+   nir_ssa_def *layer = NULL;

-   /* Round layer to nearest even */
-   if (tex->op != nir_texop_txf)
-      layer = nir_f2u32(b, nir_fround_even(b, layer));
+   if (tex->is_array) {
+      unsigned lidx = coord->num_components - 1;
+      nir_ssa_def *unclamped_layer = nir_channel(b, coord, lidx);
+      coord = nir_trim_vector(b, coord, lidx);

-   /* Clamp to max layer = (# of layers - 1) for out-of-bounds handling */
-   nir_ssa_def *txs = nir_get_texture_size(b, tex);
-   nir_ssa_def *nr_layers = nir_channel(b, txs, lidx);
-   layer = nir_umin(b, layer, nir_iadd_imm(b, nr_layers, -1));
+      /* Round layer to nearest even */
+      if (tex->op != nir_texop_txf && tex->op != nir_texop_txf_ms)
+         unclamped_layer = nir_f2u32(b, nir_fround_even(b, unclamped_layer));

-   nir_tex_instr_remove_src(tex, coord_idx);
-   nir_tex_instr_add_src(tex, nir_tex_src_backend1,
-                         nir_src_for_ssa(nir_vector_insert_imm(b, coord, layer,
-                                                                  lidx)));
+      /* Clamp to max layer = (# of layers - 1) for out-of-bounds handling.
+       * Layer must be 16-bits for the hardware, drop top bits after clamping.
+       */
+      nir_ssa_def *txs = nir_get_texture_size(b, tex);
+      nir_ssa_def *nr_layers = nir_channel(b, txs, lidx);
+      nir_ssa_def *max_layer = nir_iadd_imm(b, nr_layers, -1);
+      layer = nir_u2u16(b, nir_umin(b, unclamped_layer, max_layer));
+   }
+
+   /* Combine layer and multisample index into 32-bit so we don't need a vec5 or
+    * vec6 16-bit coordinate tuple, which would be inconvenient in NIR for
+    * little benefit (a minor optimization, I guess).
+    */
+   nir_ssa_def *sample_array =
+      (ms_idx && layer) ? nir_pack_32_2x16_split(b, ms_idx, layer) :
+      ms_idx            ? nir_u2u32(b, ms_idx) :
+      layer             ? nir_u2u32(b, layer) :
+      NULL;
+
+   /* Combine into the final 32-bit tuple */
+   if (sample_array != NULL) {
+      unsigned end = coord->num_components;
+      coord = nir_pad_vector(b, coord, end + 1);
+      coord = nir_vector_insert_imm(b, coord, sample_array, end);
+   }
+
+   nir_tex_instr_add_src(tex, nir_tex_src_backend1, nir_src_for_ssa(coord));
   return true;
 }

--- a/src/asahi/compiler/agx_opcodes.py
+++ b/src/asahi/compiler/agx_opcodes.py
@@ -103,7 +103,8 @@ DIM = enum("dim", {
    4: '2d_ms',
    5: '3d',
    6: 'cube',
-    7: 'cube_array'
+    7: 'cube_array',
+    8: '2d_ms_array',
 })

 OFFSET = immediate("offset", "bool")
--- a/src/asahi/compiler/agx_pack.c
+++ b/src/asahi/compiler/agx_pack.c
@@ -618,6 +618,7 @@ agx_pack_instr(struct util_dynarray *emission, struct util_dynarray *fixups, agx
      uint32_t extend =
            ((U & BITFIELD_MASK(5)) << 0) |
            (kill << 5) |
+            ((I->dim >> 3) << 7) |
            ((R >> 6) << 8) |
            ((C >> 6) << 10) |
            ((D >> 6) << 12) |
@@ -644,7 +645,7 @@ agx_pack_instr(struct util_dynarray *emission, struct util_dynarray *fixups, agx
            (q2 << 30) |
            (((uint64_t) (T & BITFIELD_MASK(6))) << 32) |
            (((uint64_t) Tt) << 38) |
-            (((uint64_t) I->dim) << 40) |
+            (((uint64_t) (I->dim & BITFIELD_MASK(3))) << 40) |
            (((uint64_t) q3) << 43) |
            (((uint64_t) I->mask) << 48) |
            (((uint64_t) I->lod_mode) << 52) |
--- a/src/asahi/compiler/meson.build
+++ b/src/asahi/compiler/meson.build
@@ -23,7 +23,7 @@ libasahi_agx_files = files(
  'agx_compile.c',
  'agx_dce.c',
  'agx_liveness.c',
-  'agx_nir_lower_array_texture.c',
+  'agx_nir_lower_texture.c',
  'agx_nir_lower_load_mask.c',
  'agx_nir_opt_preamble.c',
  'agx_lower_64bit.c',