diff --git a/src/asahi/compiler/agx_opcodes.py b/src/asahi/compiler/agx_opcodes.py index e8cf206dd27..7900d745a94 100644 --- a/src/asahi/compiler/agx_opcodes.py +++ b/src/asahi/compiler/agx_opcodes.py @@ -224,6 +224,12 @@ op("device_load", encoding_32 = (0x05, 0x7F, 6, 8), srcs = 2, imms = [FORMAT, MASK, SCOREBOARD]) +# sources are value, index +# TODO: Consider permitting the short form +op("uniform_store", + encoding_32 = ((0b111 << 27) | 0b1000101 | (1 << 47), 0, 8, _), + dests = 0, srcs = 2, can_eliminate = False) + op("wait", (0x38, 0xFF, 2, _), dests = 0, can_eliminate = False, imms = [SCOREBOARD]) diff --git a/src/asahi/compiler/agx_optimizer.c b/src/asahi/compiler/agx_optimizer.c index 94e9fcbb192..86a107a29bc 100644 --- a/src/asahi/compiler/agx_optimizer.c +++ b/src/asahi/compiler/agx_optimizer.c @@ -197,7 +197,7 @@ agx_optimizer_forward(agx_context *ctx) /* Inline immediates if we can. TODO: systematic */ if (I->op != AGX_OPCODE_ST_VARY && I->op != AGX_OPCODE_ST_TILE && I->op != AGX_OPCODE_COLLECT && I->op != AGX_OPCODE_TEXTURE_SAMPLE && - I->op != AGX_OPCODE_TEXTURE_LOAD) + I->op != AGX_OPCODE_TEXTURE_LOAD && I->op != AGX_OPCODE_UNIFORM_STORE) agx_optimizer_inline_imm(defs, I, info.nr_srcs, info.is_float); } diff --git a/src/asahi/compiler/agx_pack.c b/src/asahi/compiler/agx_pack.c index a4a3c75d82c..b12047b6580 100644 --- a/src/asahi/compiler/agx_pack.c +++ b/src/asahi/compiler/agx_pack.c @@ -528,28 +528,47 @@ agx_pack_instr(struct util_dynarray *emission, struct util_dynarray *fixups, agx } case AGX_OPCODE_DEVICE_LOAD: + case AGX_OPCODE_UNIFORM_STORE: { - assert(I->mask != 0); - assert(I->format <= 0x10); + bool is_uniform_store = I->op == AGX_OPCODE_UNIFORM_STORE; + bool is_store = is_uniform_store; + bool has_base = !is_uniform_store; - bool Rt, At, Ot; - unsigned R = agx_pack_memory_reg(I->dest[0], &Rt); - unsigned A = agx_pack_memory_base(I->src[0], &At); - unsigned O = agx_pack_memory_index(I->src[1], &Ot); - unsigned u1 = 1; // XXX + /* Uniform stores internally packed as 16-bit. Fix up the format, mask, + * and size so we can use scalar 32-bit values in the IR and avoid + * special casing earlier in the compiler. + */ + enum agx_format format = is_uniform_store ? AGX_FORMAT_I16 : I->format; + agx_index reg = is_store ? I->src[0] : I->dest[0]; + unsigned mask = I->mask; + + if (is_uniform_store) { + mask = BITFIELD_MASK(agx_size_align_16(reg.size)); + reg.size = AGX_SIZE_16; + } + + bool Rt, At = false, Ot; + unsigned R = agx_pack_memory_reg(reg, &Rt); + unsigned A = has_base ? agx_pack_memory_base(I->src[0], &At) : 0; + unsigned O = agx_pack_memory_index(I->src[(has_base ? 1 : 0) + (is_store ? 1 : 0)], &Ot); + unsigned u1 = is_uniform_store ? 0 : 1; // XXX unsigned u3 = 0; - unsigned u4 = 4; // XXX + unsigned u4 = is_uniform_store ? 0 : 4; // XXX unsigned u5 = 0; bool L = true; /* TODO: when would you want short? */ + assert(mask != 0); + assert(format <= 0x10); + uint64_t raw = - 0x05 | - ((I->format & BITFIELD_MASK(3)) << 7) | + agx_opcodes_info[I->op].encoding.exact | + ((format & BITFIELD_MASK(3)) << 7) | ((R & BITFIELD_MASK(6)) << 10) | ((A & BITFIELD_MASK(4)) << 16) | ((O & BITFIELD_MASK(4)) << 20) | (Ot ? (1 << 24) : 0) | (I->src[1].abs ? (1 << 25) : 0) | + (is_uniform_store ? (2 << 25) : 0) | (u1 << 26) | (At << 27) | (u3 << 28) | @@ -560,10 +579,10 @@ agx_pack_instr(struct util_dynarray *emission, struct util_dynarray *fixups, agx (((uint64_t) I->shift) << 42) | (((uint64_t) u4) << 44) | (L ? (1ull << 47) : 0) | - (((uint64_t) (I->format >> 3)) << 48) | + (((uint64_t) (format >> 3)) << 48) | (((uint64_t) Rt) << 49) | (((uint64_t) u5) << 50) | - (((uint64_t) I->mask) << 52) | + (((uint64_t) mask) << 52) | (((uint64_t) (O >> 8)) << 56); unsigned size = L ? 8 : 6;