agx: Add uniform_store instruction

Moves data from GPRs to uniform registers. Encoded like load/store instructions, so reuse the device_load packing code. Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18813>
2022-09-25 20:08:20 -04:00
parent ca4fe9d3f5
commit 05009d1dad
3 changed files with 38 additions and 13 deletions
--- a/src/asahi/compiler/agx_opcodes.py
+++ b/src/asahi/compiler/agx_opcodes.py
@@ -224,6 +224,12 @@ op("device_load",
      encoding_32 = (0x05, 0x7F, 6, 8),
      srcs = 2, imms = [FORMAT, MASK, SCOREBOARD])

+# sources are value, index
+# TODO: Consider permitting the short form
+op("uniform_store",
+      encoding_32 = ((0b111 << 27) | 0b1000101 | (1 << 47), 0, 8, _),
+      dests = 0, srcs = 2, can_eliminate = False)
+
 op("wait", (0x38, 0xFF, 2, _), dests = 0,
      can_eliminate = False, imms = [SCOREBOARD])

--- a/src/asahi/compiler/agx_optimizer.c
+++ b/src/asahi/compiler/agx_optimizer.c
@@ -197,7 +197,7 @@ agx_optimizer_forward(agx_context *ctx)
      /* Inline immediates if we can. TODO: systematic */
      if (I->op != AGX_OPCODE_ST_VARY && I->op != AGX_OPCODE_ST_TILE &&
          I->op != AGX_OPCODE_COLLECT && I->op != AGX_OPCODE_TEXTURE_SAMPLE &&
-          I->op != AGX_OPCODE_TEXTURE_LOAD)
+          I->op != AGX_OPCODE_TEXTURE_LOAD && I->op != AGX_OPCODE_UNIFORM_STORE)
         agx_optimizer_inline_imm(defs, I, info.nr_srcs, info.is_float);
   }

--- a/src/asahi/compiler/agx_pack.c
+++ b/src/asahi/compiler/agx_pack.c
@@ -528,28 +528,47 @@ agx_pack_instr(struct util_dynarray *emission, struct util_dynarray *fixups, agx
   }

   case AGX_OPCODE_DEVICE_LOAD:
+   case AGX_OPCODE_UNIFORM_STORE:
   {
-      assert(I->mask != 0);
-      assert(I->format <= 0x10);
+      bool is_uniform_store = I->op == AGX_OPCODE_UNIFORM_STORE;
+      bool is_store = is_uniform_store;
+      bool has_base = !is_uniform_store;

-      bool Rt, At, Ot;
-      unsigned R = agx_pack_memory_reg(I->dest[0], &Rt);
-      unsigned A = agx_pack_memory_base(I->src[0], &At);
-      unsigned O = agx_pack_memory_index(I->src[1], &Ot);
-      unsigned u1 = 1; // XXX
+      /* Uniform stores internally packed as 16-bit. Fix up the format, mask,
+       * and size so we can use scalar 32-bit values in the IR and avoid
+       * special casing earlier in the compiler.
+       */
+      enum agx_format format = is_uniform_store ? AGX_FORMAT_I16 : I->format;
+      agx_index reg = is_store ? I->src[0] : I->dest[0];
+      unsigned mask = I->mask;
+
+      if (is_uniform_store) {
+         mask = BITFIELD_MASK(agx_size_align_16(reg.size));
+         reg.size = AGX_SIZE_16;
+      }
+
+      bool Rt, At = false, Ot;
+      unsigned R = agx_pack_memory_reg(reg, &Rt);
+      unsigned A = has_base ? agx_pack_memory_base(I->src[0], &At) : 0;
+      unsigned O = agx_pack_memory_index(I->src[(has_base ? 1 : 0) + (is_store ? 1 : 0)], &Ot);
+      unsigned u1 = is_uniform_store ? 0 : 1; // XXX
      unsigned u3 = 0;
-      unsigned u4 = 4; // XXX
+      unsigned u4 = is_uniform_store ? 0 : 4; // XXX
      unsigned u5 = 0;
      bool L = true; /* TODO: when would you want short? */

+      assert(mask != 0);
+      assert(format <= 0x10);
+
      uint64_t raw =
-            0x05 |
-            ((I->format & BITFIELD_MASK(3)) << 7) |
+            agx_opcodes_info[I->op].encoding.exact |
+            ((format & BITFIELD_MASK(3)) << 7) |
            ((R & BITFIELD_MASK(6)) << 10) |
            ((A & BITFIELD_MASK(4)) << 16) |
            ((O & BITFIELD_MASK(4)) << 20) |
            (Ot ? (1 << 24) : 0) |
            (I->src[1].abs ? (1 << 25) : 0) |
+            (is_uniform_store ? (2 << 25) : 0) |
            (u1 << 26) |
            (At << 27) |
            (u3 << 28) |
@@ -560,10 +579,10 @@ agx_pack_instr(struct util_dynarray *emission, struct util_dynarray *fixups, agx
            (((uint64_t) I->shift) << 42) |
            (((uint64_t) u4) << 44) |
            (L ? (1ull << 47) : 0) |
-            (((uint64_t) (I->format >> 3)) << 48) |
+            (((uint64_t) (format >> 3)) << 48) |
            (((uint64_t) Rt) << 49) |
            (((uint64_t) u5) << 50) |
-            (((uint64_t) I->mask) << 52) |
+            (((uint64_t) mask) << 52) |
            (((uint64_t) (O >> 8)) << 56);

      unsigned size = L ? 8 : 6;