agx: Plumb in store instruction
This will be used for compute kernels (and transform feedback) in the (near) future. For now, let's get the opcode plumbed in the backend to reduce some of the rebase pain. Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20446>
This commit is contained in:
@@ -239,6 +239,12 @@ op("device_load",
|
||||
encoding_32 = (0x05, 0x7F, 6, 8),
|
||||
srcs = 2, imms = [FORMAT, MASK, SHIFT, SCOREBOARD], can_reorder = False)
|
||||
|
||||
# sources are value, base, index
|
||||
# TODO: Consider permitting the short form
|
||||
op("device_store",
|
||||
encoding_32 = (0x45 | (1 << 47), 0, 8, _),
|
||||
dests = 0, srcs = 3, imms = [FORMAT, MASK, SHIFT, SCOREBOARD], can_eliminate = False)
|
||||
|
||||
# sources are value, index
|
||||
# TODO: Consider permitting the short form
|
||||
op("uniform_store",
|
||||
|
@@ -132,6 +132,8 @@ agx_optimizer_inline_imm(agx_instr **defs, agx_instr *I, unsigned srcs,
|
||||
continue;
|
||||
if (I->op == AGX_OPCODE_ZS_EMIT && s != 0)
|
||||
continue;
|
||||
if (I->op == AGX_OPCODE_DEVICE_STORE && s != 2)
|
||||
continue;
|
||||
|
||||
if (float_src) {
|
||||
bool fp16 = (def->dest[0].size == AGX_SIZE_16);
|
||||
@@ -194,16 +196,18 @@ agx_optimizer_copyprop(agx_instr **defs, agx_instr *I)
|
||||
I->op == AGX_OPCODE_TEXTURE_SAMPLE ||
|
||||
(I->op == AGX_OPCODE_DEVICE_LOAD &&
|
||||
(s != 0 || def->src[0].value >= 256)) ||
|
||||
(I->op == AGX_OPCODE_DEVICE_STORE &&
|
||||
(s != 1 || def->src[0].value >= 256)) ||
|
||||
I->op == AGX_OPCODE_PHI || I->op == AGX_OPCODE_ZS_EMIT ||
|
||||
I->op == AGX_OPCODE_ST_TILE || I->op == AGX_OPCODE_LD_TILE ||
|
||||
I->op == AGX_OPCODE_BLOCK_IMAGE_STORE ||
|
||||
/*I->op == AGX_OPCODE_DEVICE_STORE ||*/
|
||||
I->op == AGX_OPCODE_UNIFORM_STORE || I->op == AGX_OPCODE_ST_VARY))
|
||||
continue;
|
||||
|
||||
/* ALU instructions cannot take 64-bit */
|
||||
if (def->src[0].size == AGX_SIZE_64 &&
|
||||
!(I->op == AGX_OPCODE_DEVICE_LOAD && s == 0))
|
||||
!(I->op == AGX_OPCODE_DEVICE_LOAD && s == 0) &&
|
||||
!(I->op == AGX_OPCODE_DEVICE_STORE && s == 1))
|
||||
continue;
|
||||
|
||||
agx_replace_src(I, s, def->src[0]);
|
||||
|
@@ -505,9 +505,11 @@ agx_pack_instr(struct util_dynarray *emission, struct util_dynarray *fixups,
|
||||
}
|
||||
|
||||
case AGX_OPCODE_DEVICE_LOAD:
|
||||
case AGX_OPCODE_DEVICE_STORE:
|
||||
case AGX_OPCODE_UNIFORM_STORE: {
|
||||
bool is_device_store = I->op == AGX_OPCODE_DEVICE_STORE;
|
||||
bool is_uniform_store = I->op == AGX_OPCODE_UNIFORM_STORE;
|
||||
bool is_store = is_uniform_store;
|
||||
bool is_store = is_device_store || is_uniform_store;
|
||||
bool has_base = !is_uniform_store;
|
||||
|
||||
/* Uniform stores internally packed as 16-bit. Fix up the format, mask,
|
||||
@@ -523,11 +525,13 @@ agx_pack_instr(struct util_dynarray *emission, struct util_dynarray *fixups,
|
||||
reg.size = AGX_SIZE_16;
|
||||
}
|
||||
|
||||
unsigned offset_src = (has_base ? 1 : 0) + (is_store ? 1 : 0);
|
||||
|
||||
bool Rt, At = false, Ot;
|
||||
unsigned R = agx_pack_memory_reg(reg, &Rt);
|
||||
unsigned A = has_base ? agx_pack_memory_base(I->src[0], &At) : 0;
|
||||
unsigned O = agx_pack_memory_index(
|
||||
I->src[(has_base ? 1 : 0) + (is_store ? 1 : 0)], &Ot);
|
||||
unsigned A =
|
||||
has_base ? agx_pack_memory_base(I->src[is_store ? 1 : 0], &At) : 0;
|
||||
unsigned O = agx_pack_memory_index(I->src[offset_src], &Ot);
|
||||
unsigned u1 = is_uniform_store ? 0 : 1; // XXX
|
||||
unsigned u3 = 0;
|
||||
unsigned u4 = is_uniform_store ? 0 : 4; // XXX
|
||||
@@ -541,7 +545,7 @@ agx_pack_instr(struct util_dynarray *emission, struct util_dynarray *fixups,
|
||||
agx_opcodes_info[I->op].encoding.exact |
|
||||
((format & BITFIELD_MASK(3)) << 7) | ((R & BITFIELD_MASK(6)) << 10) |
|
||||
((A & BITFIELD_MASK(4)) << 16) | ((O & BITFIELD_MASK(4)) << 20) |
|
||||
(Ot ? (1 << 24) : 0) | (I->src[1].abs ? (1 << 25) : 0) |
|
||||
(Ot ? (1 << 24) : 0) | (I->src[offset_src].abs ? (1 << 25) : 0) |
|
||||
(is_uniform_store ? (2 << 25) : 0) | (u1 << 26) | (At << 27) |
|
||||
(u3 << 28) | (I->scoreboard << 30) |
|
||||
(((uint64_t)((O >> 4) & BITFIELD_MASK(4))) << 32) |
|
||||
|
Reference in New Issue
Block a user