agx: Plumb in store instruction

This will be used for compute kernels (and transform feedback) in the (near)
future. For now, let's get the opcode plumbed in the backend to reduce some of
the rebase pain.

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20446>
This commit is contained in:
Alyssa Rosenzweig
2022-12-02 21:41:22 -05:00
parent 6b645f12ad
commit ddbec45b6f
3 changed files with 21 additions and 7 deletions

View File

@@ -239,6 +239,12 @@ op("device_load",
encoding_32 = (0x05, 0x7F, 6, 8),
srcs = 2, imms = [FORMAT, MASK, SHIFT, SCOREBOARD], can_reorder = False)
# sources are value, base, index
# TODO: Consider permitting the short form
op("device_store",
encoding_32 = (0x45 | (1 << 47), 0, 8, _),
dests = 0, srcs = 3, imms = [FORMAT, MASK, SHIFT, SCOREBOARD], can_eliminate = False)
# sources are value, index
# TODO: Consider permitting the short form
op("uniform_store",

View File

@@ -132,6 +132,8 @@ agx_optimizer_inline_imm(agx_instr **defs, agx_instr *I, unsigned srcs,
continue;
if (I->op == AGX_OPCODE_ZS_EMIT && s != 0)
continue;
if (I->op == AGX_OPCODE_DEVICE_STORE && s != 2)
continue;
if (float_src) {
bool fp16 = (def->dest[0].size == AGX_SIZE_16);
@@ -194,16 +196,18 @@ agx_optimizer_copyprop(agx_instr **defs, agx_instr *I)
I->op == AGX_OPCODE_TEXTURE_SAMPLE ||
(I->op == AGX_OPCODE_DEVICE_LOAD &&
(s != 0 || def->src[0].value >= 256)) ||
(I->op == AGX_OPCODE_DEVICE_STORE &&
(s != 1 || def->src[0].value >= 256)) ||
I->op == AGX_OPCODE_PHI || I->op == AGX_OPCODE_ZS_EMIT ||
I->op == AGX_OPCODE_ST_TILE || I->op == AGX_OPCODE_LD_TILE ||
I->op == AGX_OPCODE_BLOCK_IMAGE_STORE ||
/*I->op == AGX_OPCODE_DEVICE_STORE ||*/
I->op == AGX_OPCODE_UNIFORM_STORE || I->op == AGX_OPCODE_ST_VARY))
continue;
/* ALU instructions cannot take 64-bit */
if (def->src[0].size == AGX_SIZE_64 &&
!(I->op == AGX_OPCODE_DEVICE_LOAD && s == 0))
!(I->op == AGX_OPCODE_DEVICE_LOAD && s == 0) &&
!(I->op == AGX_OPCODE_DEVICE_STORE && s == 1))
continue;
agx_replace_src(I, s, def->src[0]);

View File

@@ -505,9 +505,11 @@ agx_pack_instr(struct util_dynarray *emission, struct util_dynarray *fixups,
}
case AGX_OPCODE_DEVICE_LOAD:
case AGX_OPCODE_DEVICE_STORE:
case AGX_OPCODE_UNIFORM_STORE: {
bool is_device_store = I->op == AGX_OPCODE_DEVICE_STORE;
bool is_uniform_store = I->op == AGX_OPCODE_UNIFORM_STORE;
bool is_store = is_uniform_store;
bool is_store = is_device_store || is_uniform_store;
bool has_base = !is_uniform_store;
/* Uniform stores internally packed as 16-bit. Fix up the format, mask,
@@ -523,11 +525,13 @@ agx_pack_instr(struct util_dynarray *emission, struct util_dynarray *fixups,
reg.size = AGX_SIZE_16;
}
unsigned offset_src = (has_base ? 1 : 0) + (is_store ? 1 : 0);
bool Rt, At = false, Ot;
unsigned R = agx_pack_memory_reg(reg, &Rt);
unsigned A = has_base ? agx_pack_memory_base(I->src[0], &At) : 0;
unsigned O = agx_pack_memory_index(
I->src[(has_base ? 1 : 0) + (is_store ? 1 : 0)], &Ot);
unsigned A =
has_base ? agx_pack_memory_base(I->src[is_store ? 1 : 0], &At) : 0;
unsigned O = agx_pack_memory_index(I->src[offset_src], &Ot);
unsigned u1 = is_uniform_store ? 0 : 1; // XXX
unsigned u3 = 0;
unsigned u4 = is_uniform_store ? 0 : 4; // XXX
@@ -541,7 +545,7 @@ agx_pack_instr(struct util_dynarray *emission, struct util_dynarray *fixups,
agx_opcodes_info[I->op].encoding.exact |
((format & BITFIELD_MASK(3)) << 7) | ((R & BITFIELD_MASK(6)) << 10) |
((A & BITFIELD_MASK(4)) << 16) | ((O & BITFIELD_MASK(4)) << 20) |
(Ot ? (1 << 24) : 0) | (I->src[1].abs ? (1 << 25) : 0) |
(Ot ? (1 << 24) : 0) | (I->src[offset_src].abs ? (1 << 25) : 0) |
(is_uniform_store ? (2 << 25) : 0) | (u1 << 26) | (At << 27) |
(u3 << 28) | (I->scoreboard << 30) |
(((uint64_t)((O >> 4) & BITFIELD_MASK(4))) << 32) |