agx: Add image write instruction
Model and pack what's in the hardware for this. Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24258>
This commit is contained in:

committed by
Marge Bot

parent
2978f4eef2
commit
4505cb962c
@@ -34,6 +34,8 @@ should_lower(enum agx_opcode op, agx_index uniform, unsigned src_index)
|
||||
return src_index != 0;
|
||||
case AGX_OPCODE_LOCAL_STORE:
|
||||
return src_index != 1;
|
||||
case AGX_OPCODE_IMAGE_WRITE:
|
||||
return src_index != 3;
|
||||
case AGX_OPCODE_ZS_EMIT:
|
||||
case AGX_OPCODE_ST_TILE:
|
||||
case AGX_OPCODE_LD_TILE:
|
||||
|
@@ -351,6 +351,11 @@ op("trap", (0x08, 0xFFFF, 2, _), dests = 0, can_eliminate = False)
|
||||
op("wait_pix", (0x48, 0xFF, 4, _), dests = 0, imms = [WRITEOUT], can_eliminate = False)
|
||||
op("signal_pix", (0x58, 0xFF, 4, _), dests = 0, imms = [WRITEOUT], can_eliminate = False)
|
||||
|
||||
# Sources are the data vector, the coordinate vector, the LOD, the bindless
|
||||
# table if present (zero for texture state registers), and texture index.
|
||||
op("image_write", (0xF1 | (1 << 23) | (9 << 43), 0xFF, 6, 8), dests = 0, srcs = 5, imms
|
||||
= [DIM], can_eliminate = False)
|
||||
|
||||
# Sources are the image and the offset within shared memory
|
||||
# TODO: Do we need the short encoding?
|
||||
op("block_image_store", (0xB1, 0xFF, 10, _), dests = 0, srcs = 2,
|
||||
|
@@ -104,6 +104,30 @@ agx_optimizer_fmov(agx_instr **defs, agx_instr *ins)
|
||||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
image_write_source_can_be_immediate(agx_instr *I, unsigned s)
|
||||
{
|
||||
assert(I->op == AGX_OPCODE_IMAGE_WRITE);
|
||||
|
||||
/* LOD can always be immediate. Actually, it's just zero so far, we don't
|
||||
* support nonzero LOD for images yet.
|
||||
*/
|
||||
if (s == 2)
|
||||
return true;
|
||||
|
||||
/* If the "bindless" source (source 3) is an immediate, it means we don't
|
||||
* have a bindless image, instead we have a texture state index. We're
|
||||
* allowed to have immediate texture state registers (source 4). However,
|
||||
* we're not allowed to have immediate bindless offsets (also source 4).
|
||||
*/
|
||||
bool is_texture_state = (I->src[3].type == AGX_INDEX_IMMEDIATE);
|
||||
if (s == 4 && is_texture_state)
|
||||
return true;
|
||||
|
||||
/* Otherwise, must be from a register */
|
||||
return false;
|
||||
}
|
||||
|
||||
static void
|
||||
agx_optimizer_inline_imm(agx_instr **defs, agx_instr *I, unsigned srcs,
|
||||
bool is_float)
|
||||
@@ -140,6 +164,10 @@ agx_optimizer_inline_imm(agx_instr **defs, agx_instr *I, unsigned srcs,
|
||||
s != 1)
|
||||
continue;
|
||||
|
||||
if (I->op == AGX_OPCODE_IMAGE_WRITE &&
|
||||
!image_write_source_can_be_immediate(I, s))
|
||||
continue;
|
||||
|
||||
if (float_src) {
|
||||
bool fp16 = (def->dest[0].size == AGX_SIZE_16);
|
||||
assert(fp16 || (def->dest[0].size == AGX_SIZE_32));
|
||||
|
@@ -115,6 +115,31 @@ agx_pack_lod(agx_index index, unsigned *lod_mode)
|
||||
return index.value;
|
||||
}
|
||||
|
||||
static unsigned
|
||||
agx_pack_pbe_source(agx_index index, bool *flag)
|
||||
{
|
||||
assert(index.size == AGX_SIZE_16 || index.size == AGX_SIZE_32);
|
||||
assert_register_is_aligned(index);
|
||||
|
||||
*flag = (index.size == AGX_SIZE_32);
|
||||
return index.value;
|
||||
}
|
||||
|
||||
static unsigned
|
||||
agx_pack_pbe_lod(agx_index index, bool *flag)
|
||||
{
|
||||
assert(index.size == AGX_SIZE_16);
|
||||
|
||||
if (index.type == AGX_INDEX_IMMEDIATE)
|
||||
*flag = true;
|
||||
else if (index.type == AGX_INDEX_REGISTER)
|
||||
*flag = false;
|
||||
else
|
||||
unreachable("Invalid PBE LOD type");
|
||||
|
||||
return index.value;
|
||||
}
|
||||
|
||||
/* Load/stores have their own operands */
|
||||
|
||||
static unsigned
|
||||
@@ -786,6 +811,47 @@ agx_pack_instr(struct util_dynarray *emission, struct util_dynarray *fixups,
|
||||
break;
|
||||
}
|
||||
|
||||
case AGX_OPCODE_IMAGE_WRITE: {
|
||||
bool Ct, Dt, Rt, Cs;
|
||||
unsigned Tt;
|
||||
unsigned U;
|
||||
|
||||
unsigned R = agx_pack_pbe_source(I->src[0], &Rt);
|
||||
unsigned C = agx_pack_sample_coords(I->src[1], &Ct, &Cs);
|
||||
unsigned D = agx_pack_pbe_lod(I->src[2], &Dt);
|
||||
unsigned T = agx_pack_texture(I->src[3], I->src[4], &U, &Tt);
|
||||
bool rtz = false;
|
||||
|
||||
assert(U < (1 << 5));
|
||||
assert(D < (1 << 8));
|
||||
assert(R < (1 << 8));
|
||||
assert(C < (1 << 8));
|
||||
assert(T < (1 << 8));
|
||||
assert(Tt < (1 << 2));
|
||||
|
||||
uint64_t raw = agx_opcodes_info[I->op].encoding.exact |
|
||||
(Rt ? (1 << 8) : 0) | ((R & BITFIELD_MASK(6)) << 9) |
|
||||
((C & BITFIELD_MASK(6)) << 16) | (Ct ? (1 << 22) : 0) |
|
||||
((D & BITFIELD_MASK(6)) << 24) | (Dt ? (1u << 31) : 0) |
|
||||
(((uint64_t)(T & BITFIELD_MASK(6))) << 32) |
|
||||
(((uint64_t)Tt) << 38) |
|
||||
(((uint64_t)I->dim & BITFIELD_MASK(3)) << 40) |
|
||||
(Cs ? (1ull << 47) : 0) | (((uint64_t)U) << 48) |
|
||||
(rtz ? (1ull << 53) : 0) |
|
||||
((I->dim & BITFIELD_BIT(4)) ? (1ull << 55) : 0) |
|
||||
(((uint64_t)R >> 6) << 56) | (((uint64_t)C >> 6) << 58) |
|
||||
(((uint64_t)D >> 6) << 60) | (((uint64_t)T >> 6) << 62);
|
||||
|
||||
if (raw >> 48) {
|
||||
raw |= BITFIELD_BIT(15);
|
||||
memcpy(util_dynarray_grow_bytes(emission, 1, 8), &raw, 8);
|
||||
} else {
|
||||
memcpy(util_dynarray_grow_bytes(emission, 1, 6), &raw, 6);
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
case AGX_OPCODE_BLOCK_IMAGE_STORE: {
|
||||
enum agx_format F = I->format;
|
||||
assert(F < 0x10);
|
||||
|
@@ -230,6 +230,14 @@ agx_read_registers(const agx_instr *I, unsigned s)
|
||||
return 1;
|
||||
}
|
||||
|
||||
case AGX_OPCODE_IMAGE_WRITE:
|
||||
if (s == 0)
|
||||
return 4 * size /* data */;
|
||||
else if (s == 1)
|
||||
return agx_coordinate_registers(I);
|
||||
else
|
||||
return size;
|
||||
|
||||
case AGX_OPCODE_TEXTURE_LOAD:
|
||||
case AGX_OPCODE_TEXTURE_SAMPLE:
|
||||
if (s == 0) {
|
||||
|
Reference in New Issue
Block a user