agx: Convert and clamp array indices in NIR

..Rather than at backend IR translation time. This is considerably simpler because we can use the txs lowering instead of special casing array sizes. Unfortunately it generates worse code, but that gap should close once nir_opt_preamble is wired in. Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18652>
2022-09-18 11:38:12 -04:00
parent 1304f4578d
commit a1faab0b90
6 changed files with 83 additions and 71 deletions
--- a/src/asahi/compiler/agx_compile.c
+++ b/src/asahi/compiler/agx_compile.c
@@ -1044,55 +1044,8 @@ agx_emit_tex(agx_builder *b, nir_tex_instr *instr)

      switch (instr->src[i].src_type) {
      case nir_tex_src_coord:
+      case nir_tex_src_backend1:
         coords = index;
-
-         /* Array textures are indexed by a floating-point in NIR, but by an
-          * integer in AGX. Convert the array index from float-to-int for array
-          * textures. The array index is the last source in NIR. The conversion
-          * is according to the rule from 8.9 ("Texture Functions") of the GLSL
-          * ES 3.20 specification:
-          *
-          *     max(0, min(d - 1, floor(layer + 0.5))) =
-          *     max(0, min(d - 1, f32_to_u32(layer + 0.5))) =
-          *     min(d - 1, f32_to_u32(layer + 0.5))
-          *
-          * For txf, the coordinates are already integers, so we only need to
-          * clamp (not convert).
-          */
-         if (instr->is_array) {
-            unsigned nr = nir_src_num_components(instr->src[i].src);
-            agx_index channels[4] = {};
-
-            for (unsigned i = 0; i < nr; ++i)
-               channels[i] = agx_emit_extract(b, index, i);
-
-            agx_index d1 = agx_indexed_sysval(b->shader,
-                  AGX_PUSH_ARRAY_SIZE_MINUS_1, AGX_SIZE_16,
-                  instr->texture_index, 1);
-
-            agx_index layer = channels[nr - 1];
-
-            if (!txf) {
-               layer = agx_fadd(b, channels[nr - 1], agx_immediate_f(0.5f));
-
-               layer = agx_convert(b, agx_immediate(AGX_CONVERT_F_TO_U32), layer,
-                                      AGX_ROUND_RTZ);
-            }
-
-            agx_index layer16 = agx_temp(b->shader, AGX_SIZE_16);
-            agx_mov_to(b, layer16, layer);
-
-            layer = agx_icmpsel(b, layer16, d1, layer16, d1, AGX_ICOND_ULT);
-
-            agx_index layer32 = agx_temp(b->shader, AGX_SIZE_32);
-            agx_mov_to(b, layer32, layer);
-
-            channels[nr - 1] = layer32;
-            coords = agx_vec4(b, channels[0], channels[1], channels[2], channels[3]);
-         } else {
-            coords = index;
-         }
-
         break;

      case nir_tex_src_lod:
@@ -1834,6 +1787,7 @@ agx_compile_shader_nir(nir_shader *nir,
   };

   NIR_PASS_V(nir, nir_lower_tex, &lower_tex_options);
+   NIR_PASS_V(nir, agx_nir_lower_array_texture);
   NIR_PASS_V(nir, agx_lower_resinfo);
   NIR_PASS_V(nir, nir_legalize_16bit_sampler_srcs, tex_constraints);

--- a/src/asahi/compiler/agx_compile.h
+++ b/src/asahi/compiler/agx_compile.h
@@ -49,11 +49,6 @@ enum agx_push_type {
   /* RGBA blend constant (FP32) */
   AGX_PUSH_BLEND_CONST,

-   /* Array of 16-bit (array_size - 1) for indexed array textures, used to
-    * lower access to indexed array textures
-    */
-   AGX_PUSH_ARRAY_SIZE_MINUS_1,
-
   AGX_PUSH_TEXTURE_BASE,

   /* Keep last */
--- a/src/asahi/compiler/agx_compiler.h
+++ b/src/asahi/compiler/agx_compiler.h
@@ -741,6 +741,7 @@ void agx_compute_liveness(agx_context *ctx);
 void agx_liveness_ins_update(BITSET_WORD *live, agx_instr *I);

 bool agx_lower_resinfo(nir_shader *s);
+bool agx_nir_lower_array_texture(nir_shader *s);

 #ifdef __cplusplus
 } /* extern C */
--- a/src/asahi/compiler/agx_nir_lower_array_texture.c
+++ b/src/asahi/compiler/agx_nir_lower_array_texture.c
@@ -0,0 +1,79 @@
+/*
+ * Copyright (C) 2021 Alyssa Rosenzweig <alyssa@rosenzweig.io>
+ * Copyright (C) 2020 Collabora Ltd.
+ * Copyright © 2016 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "agx_compiler.h"
+#include "compiler/nir/nir.h"
+#include "compiler/nir/nir_builder.h"
+#include "compiler/nir/nir_builtin_builder.h"
+
+/*
+ * NIR indexes into array textures with unclamped floats (integer for txf). AGX
+ * requires the index to be a clamped integer. Lower tex_src_coord into
+ * tex_src_backend1 for array textures by type-converting and clamping.
+ */
+static bool
+lower_array_texture(nir_builder *b, nir_instr *instr, UNUSED void *data)
+{
+   if (instr->type != nir_instr_type_tex)
+      return false;
+
+   nir_tex_instr *tex = nir_instr_as_tex(instr);
+   b->cursor = nir_before_instr(instr);
+
+   if (!tex->is_array || nir_tex_instr_is_query(tex))
+      return false;
+
+   /* Get the coordinates */
+   int coord_idx = nir_tex_instr_src_index(tex, nir_tex_src_coord);
+   nir_ssa_def *coord = tex->src[coord_idx].src.ssa;
+   unsigned nr = nir_src_num_components(tex->src[coord_idx].src);
+
+   /* The layer is always the last component of the NIR coordinate */
+   unsigned lidx = nr - 1;
+   nir_ssa_def *layer = nir_channel(b, coord, lidx);
+
+   /* Round layer to nearest even */
+   if (tex->op != nir_texop_txf)
+      layer = nir_f2u32(b, nir_fround_even(b, layer));
+
+   /* Clamp to max layer = (# of layers - 1) for out-of-bounds handling */
+   nir_ssa_def *txs = nir_get_texture_size(b, tex);
+   nir_ssa_def *nr_layers = nir_channel(b, txs, lidx);
+   layer = nir_umin(b, layer, nir_iadd_imm(b, nr_layers, -1));
+
+   nir_tex_instr_remove_src(tex, coord_idx);
+   nir_tex_instr_add_src(tex, nir_tex_src_backend1,
+                         nir_src_for_ssa(nir_vector_insert_imm(b, coord, layer,
+                                                                  lidx)));
+   return true;
+}
+
+bool
+agx_nir_lower_array_texture(nir_shader *s)
+{
+   return nir_shader_instructions_pass(s, lower_array_texture,
+                                       nir_metadata_block_index |
+                                       nir_metadata_dominance, NULL);
+}
--- a/src/asahi/compiler/meson.build
+++ b/src/asahi/compiler/meson.build
@@ -23,6 +23,7 @@ libasahi_agx_files = files(
  'agx_compile.c',
  'agx_dce.c',
  'agx_liveness.c',
+  'agx_nir_lower_array_texture.c',
  'agx_lower_resinfo.c',
  'agx_lower_parallel_copy.c',
  'agx_lower_pseudo.c',
--- a/src/gallium/drivers/asahi/agx_uniforms.c
+++ b/src/gallium/drivers/asahi/agx_uniforms.c
@@ -88,24 +88,6 @@ agx_push_location_direct(struct agx_context *ctx, struct agx_push push,
            sizeof(ctx->blend_color), 8);
   }

-   case AGX_PUSH_ARRAY_SIZE_MINUS_1: {
-      struct agx_stage *st = &ctx->stage[stage];
-      unsigned count = st->texture_count;
-      struct agx_ptr ptr = agx_pool_alloc_aligned(&batch->pool, count * sizeof(uint16_t), 8);
-      uint16_t *d1 = ptr.cpu;
-
-      for (unsigned i = 0; i < count; ++i) {
-         unsigned array_size = 1;
-
-         if (st->textures[i])
-            array_size = st->textures[i]->base.texture->array_size;
-
-         d1[i] = array_size - 1;
-      }
-
-      return ptr.gpu;
-   }
-
   case AGX_PUSH_TEXTURE_BASE: {
      struct agx_ptr ptr = agx_pool_alloc_aligned(&batch->pool, sizeof(uint64_t), 8);
      uint64_t *address = ptr.cpu;