2021-04-11 16:01:47 -04:00
|
|
|
/*
|
asahi: Convert to SPDX headers
Also drop my email address in the copyright lines and fix some "Copyright 208
Alyssa Rosenzweig" lines, I'm not *that* old. Together this drops a lot of
boilerplate without losing any meaningful licensing information. SPDX is already
in use for the MIT-licensed code in turnip, venus, and a few other scattered
parts of the tree, so this should be ok from a Mesa licensing standpoint.
This reduces friction to create new files, by parsing the copy/paste boilerplate
and being short enough you can easily type it out if you want. It makes new
files seem less daunting: 20 lines of header for 30 lines of code is
discouraging, but 2 lines of header for 30 lines of code is reasonable for a
simple compiler pass. This has technical effects, as lowering the barrier to
making new files should encourage people to split code into more modular files
with (hopefully positive) effects on project compile time.
This helps with consistency between files. Across the tree we have at least a
half dozen variants of the MIT license text (probably more), plus code that uses
SPDX headers instead. I've already been using SPDX headers in Asahi manually, so
you can tell old vs new code based on the headers.
Finally, it means less for reviewers to scroll through adding files. Minimal
actual cognitive burden for reviewers thanks to banner blindness, but the big
headers still bloat diffs that add/delete files.
I originally proposed this in December (for much more of the tree) but someone
requested I wait until January to discuss. I've been trying to get in touch with
them since then. It is now almost April and, with still no response, I'd like to
press forward with this. So with a joint sign-off from the major authors of the
code in question, let's do this.
Signed-off-by: Asahi Lina <lina@asahilina.net>
Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Acked-by: Emma Anholt <emma@anholt.net>
Acked-by: Daniel Stone <daniels@collabora.com>
Reviewed-by: Eric Engestrom <eric@igalia.com>
Acked-by: Kenneth Graunke <kenneth@whitecape.org>
Acked-by: Rose Hudson <rose@krx.sh>
Acked-by: Lyude Paul [over IRC: "yes I'm fine with that"]
Meh'd-by: Rob Clark <robdclark@chromium.org>
Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22062>
2023-03-15 17:36:17 -04:00
|
|
|
* Copyright 2021 Alyssa Rosenzweig
|
|
|
|
* SPDX-License-Identifier: MIT
|
2021-04-11 16:01:47 -04:00
|
|
|
*/
|
|
|
|
|
|
|
|
#include "agx_builder.h"
|
2022-12-27 17:36:08 -05:00
|
|
|
#include "agx_compiler.h"
|
2021-04-11 16:01:47 -04:00
|
|
|
|
2022-05-01 17:12:17 -04:00
|
|
|
/* SSA-based register allocator */
|
2021-04-11 16:01:47 -04:00
|
|
|
|
2022-09-24 00:15:55 -04:00
|
|
|
struct ra_ctx {
|
|
|
|
agx_context *shader;
|
|
|
|
agx_block *block;
|
|
|
|
uint8_t *ssa_to_reg;
|
|
|
|
uint8_t *ncomps;
|
2022-09-24 20:57:31 -04:00
|
|
|
BITSET_WORD *visited;
|
|
|
|
BITSET_WORD *used_regs;
|
2022-09-24 19:13:00 -04:00
|
|
|
|
2022-10-26 11:23:51 -04:00
|
|
|
/* For affinities */
|
|
|
|
agx_instr **src_to_collect;
|
|
|
|
|
2022-09-24 19:13:00 -04:00
|
|
|
/* Maximum number of registers that RA is allowed to use */
|
|
|
|
unsigned bound;
|
2022-09-24 00:15:55 -04:00
|
|
|
};
|
|
|
|
|
2021-06-19 14:33:12 -04:00
|
|
|
/** Returns number of registers written by an instruction */
|
2022-04-12 18:05:59 -04:00
|
|
|
unsigned
|
2023-03-04 22:17:29 -05:00
|
|
|
agx_write_registers(const agx_instr *I, unsigned d)
|
2021-06-19 13:23:25 -04:00
|
|
|
{
|
2022-08-02 14:02:16 -04:00
|
|
|
unsigned size = agx_size_align_16(I->dest[d].size);
|
2021-06-19 14:33:12 -04:00
|
|
|
|
2021-06-19 13:23:25 -04:00
|
|
|
switch (I->op) {
|
2022-08-07 11:52:38 -04:00
|
|
|
case AGX_OPCODE_ITER:
|
2023-05-25 13:22:50 -04:00
|
|
|
case AGX_OPCODE_ITERPROJ:
|
2022-04-12 18:06:03 -04:00
|
|
|
assert(1 <= I->channels && I->channels <= 4);
|
|
|
|
return I->channels * size;
|
|
|
|
|
2022-09-03 15:31:15 -04:00
|
|
|
case AGX_OPCODE_TEXTURE_LOAD:
|
2021-06-19 13:23:25 -04:00
|
|
|
case AGX_OPCODE_TEXTURE_SAMPLE:
|
2022-12-19 23:42:09 -05:00
|
|
|
/* Even when masked out, these clobber 4 registers */
|
|
|
|
return 4 * size;
|
|
|
|
|
|
|
|
case AGX_OPCODE_DEVICE_LOAD:
|
2023-02-08 20:11:48 -05:00
|
|
|
case AGX_OPCODE_LOCAL_LOAD:
|
2021-06-19 13:23:25 -04:00
|
|
|
case AGX_OPCODE_LD_TILE:
|
2022-09-11 12:03:15 -04:00
|
|
|
return util_bitcount(I->mask) * size;
|
2022-04-12 18:06:03 -04:00
|
|
|
|
2022-08-07 11:52:38 -04:00
|
|
|
case AGX_OPCODE_LDCF:
|
2021-06-19 14:33:12 -04:00
|
|
|
return 6;
|
2022-09-22 22:35:46 -04:00
|
|
|
case AGX_OPCODE_COLLECT:
|
2022-11-19 15:07:52 -05:00
|
|
|
return I->nr_srcs * agx_size_align_16(I->src[0].size);
|
2021-06-19 13:23:25 -04:00
|
|
|
default:
|
2021-06-19 14:33:12 -04:00
|
|
|
return size;
|
2021-06-19 13:23:25 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-08-07 14:16:43 -04:00
|
|
|
static inline enum agx_size
|
|
|
|
agx_split_width(const agx_instr *I)
|
|
|
|
{
|
|
|
|
enum agx_size width = ~0;
|
|
|
|
|
|
|
|
agx_foreach_dest(I, d) {
|
2022-10-08 21:02:03 -04:00
|
|
|
if (I->dest[d].type == AGX_INDEX_NULL)
|
2022-08-07 14:16:43 -04:00
|
|
|
continue;
|
|
|
|
else if (width != ~0)
|
|
|
|
assert(width == I->dest[d].size);
|
|
|
|
else
|
|
|
|
width = I->dest[d].size;
|
|
|
|
}
|
|
|
|
|
|
|
|
assert(width != ~0 && "should have been DCE'd");
|
|
|
|
return width;
|
|
|
|
}
|
|
|
|
|
2023-05-19 13:07:25 -04:00
|
|
|
/*
|
|
|
|
* Return number of registers required for coordinates for a
|
|
|
|
* texture/image instruction. We handle layer + sample index as 32-bit even when
|
|
|
|
* only the lower 16-bits are present.
|
|
|
|
*/
|
|
|
|
static unsigned
|
|
|
|
agx_coordinate_registers(const agx_instr *I)
|
|
|
|
{
|
|
|
|
switch (I->dim) {
|
|
|
|
case AGX_DIM_1D:
|
|
|
|
return 2 * 1;
|
|
|
|
case AGX_DIM_1D_ARRAY:
|
|
|
|
return 2 * 2;
|
|
|
|
case AGX_DIM_2D:
|
|
|
|
return 2 * 2;
|
|
|
|
case AGX_DIM_2D_ARRAY:
|
|
|
|
return 2 * 3;
|
|
|
|
case AGX_DIM_2D_MS:
|
|
|
|
return 2 * 3;
|
|
|
|
case AGX_DIM_3D:
|
|
|
|
return 2 * 3;
|
|
|
|
case AGX_DIM_CUBE:
|
|
|
|
return 2 * 3;
|
|
|
|
case AGX_DIM_CUBE_ARRAY:
|
|
|
|
return 2 * 4;
|
|
|
|
case AGX_DIM_2D_MS_ARRAY:
|
|
|
|
return 2 * 3;
|
|
|
|
}
|
|
|
|
|
|
|
|
unreachable("Invalid texture dimension");
|
|
|
|
}
|
|
|
|
|
2022-12-20 11:12:01 -05:00
|
|
|
unsigned
|
2023-03-04 22:17:29 -05:00
|
|
|
agx_read_registers(const agx_instr *I, unsigned s)
|
2022-12-20 11:12:01 -05:00
|
|
|
{
|
|
|
|
unsigned size = agx_size_align_16(I->src[s].size);
|
|
|
|
|
|
|
|
switch (I->op) {
|
|
|
|
case AGX_OPCODE_SPLIT:
|
|
|
|
return I->nr_dests * agx_size_align_16(agx_split_width(I));
|
|
|
|
|
|
|
|
case AGX_OPCODE_DEVICE_STORE:
|
2023-02-08 20:11:48 -05:00
|
|
|
case AGX_OPCODE_LOCAL_STORE:
|
2022-12-20 11:12:01 -05:00
|
|
|
case AGX_OPCODE_ST_TILE:
|
|
|
|
if (s == 0)
|
|
|
|
return util_bitcount(I->mask) * size;
|
|
|
|
else
|
|
|
|
return size;
|
|
|
|
|
|
|
|
case AGX_OPCODE_ZS_EMIT:
|
|
|
|
if (s == 1) {
|
|
|
|
/* Depth (bit 0) is fp32, stencil (bit 1) is u16 in the hw but we pad
|
|
|
|
* up to u32 for simplicity
|
|
|
|
*/
|
2023-04-23 17:50:52 -04:00
|
|
|
bool z = !!(I->zs & 1);
|
|
|
|
bool s = !!(I->zs & 2);
|
|
|
|
assert(z || s);
|
|
|
|
|
|
|
|
return (z && s) ? 4 : z ? 2 : 1;
|
2022-12-20 11:12:01 -05:00
|
|
|
} else {
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
case AGX_OPCODE_TEXTURE_LOAD:
|
|
|
|
case AGX_OPCODE_TEXTURE_SAMPLE:
|
|
|
|
if (s == 0) {
|
2023-05-19 13:07:25 -04:00
|
|
|
return agx_coordinate_registers(I);
|
2022-12-20 11:12:01 -05:00
|
|
|
} else if (s == 1) {
|
|
|
|
/* LOD */
|
|
|
|
if (I->lod_mode == AGX_LOD_MODE_LOD_GRAD) {
|
|
|
|
switch (I->dim) {
|
|
|
|
case AGX_DIM_1D:
|
|
|
|
case AGX_DIM_1D_ARRAY:
|
|
|
|
return 2 * 2 * 1;
|
|
|
|
case AGX_DIM_2D:
|
|
|
|
case AGX_DIM_2D_ARRAY:
|
|
|
|
case AGX_DIM_2D_MS_ARRAY:
|
|
|
|
case AGX_DIM_2D_MS:
|
|
|
|
return 2 * 2 * 2;
|
|
|
|
case AGX_DIM_CUBE:
|
|
|
|
case AGX_DIM_CUBE_ARRAY:
|
|
|
|
case AGX_DIM_3D:
|
|
|
|
return 2 * 2 * 3;
|
|
|
|
}
|
|
|
|
|
|
|
|
unreachable("Invalid texture dimension");
|
|
|
|
} else {
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
} else if (s == 4) {
|
|
|
|
/* Compare/offset */
|
|
|
|
return 2 * ((!!I->shadow) + (!!I->offset));
|
|
|
|
} else {
|
|
|
|
return size;
|
|
|
|
}
|
|
|
|
|
2023-02-08 19:15:54 -05:00
|
|
|
case AGX_OPCODE_ATOMIC:
|
|
|
|
case AGX_OPCODE_LOCAL_ATOMIC:
|
|
|
|
if (s == 0 && I->atomic_opc == AGX_ATOMIC_OPC_CMPXCHG)
|
|
|
|
return size * 2;
|
|
|
|
else
|
|
|
|
return size;
|
|
|
|
|
2022-12-20 11:12:01 -05:00
|
|
|
default:
|
|
|
|
return size;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-06-19 14:34:44 -04:00
|
|
|
static unsigned
|
2022-10-08 20:51:06 -04:00
|
|
|
find_regs(BITSET_WORD *used_regs, unsigned count, unsigned align, unsigned max)
|
2021-06-19 14:34:44 -04:00
|
|
|
{
|
2022-11-12 11:37:49 -05:00
|
|
|
assert(count >= 1);
|
|
|
|
|
2023-03-06 23:09:38 -05:00
|
|
|
for (unsigned reg = 0; reg + count <= max; reg += align) {
|
2022-10-08 20:51:06 -04:00
|
|
|
if (!BITSET_TEST_RANGE(used_regs, reg, reg + count - 1))
|
2021-06-19 14:34:44 -04:00
|
|
|
return reg;
|
|
|
|
}
|
|
|
|
|
2021-07-24 14:54:34 -04:00
|
|
|
/* Couldn't find a free register, dump the state of the register file */
|
|
|
|
fprintf(stderr, "Failed to find register of size %u aligned %u max %u.\n",
|
|
|
|
count, align, max);
|
|
|
|
|
|
|
|
fprintf(stderr, "Register file:\n");
|
|
|
|
for (unsigned i = 0; i < BITSET_WORDS(max); ++i)
|
|
|
|
fprintf(stderr, " %08X\n", used_regs[i]);
|
|
|
|
|
2021-06-19 14:34:44 -04:00
|
|
|
unreachable("Could not find a free register");
|
|
|
|
}
|
|
|
|
|
2022-09-24 20:57:31 -04:00
|
|
|
/*
|
|
|
|
* Loop over live-in values at the start of the block and mark their registers
|
|
|
|
* as in-use. We process blocks in dominance order, so this handles everything
|
|
|
|
* but loop headers.
|
|
|
|
*
|
|
|
|
* For loop headers, this handles the forward edges but not the back edge.
|
|
|
|
* However, that's okay: we don't want to reserve the registers that are
|
|
|
|
* defined within the loop, because then we'd get a contradiction. Instead we
|
|
|
|
* leave them available and then they become fixed points of a sort.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
reserve_live_in(struct ra_ctx *rctx)
|
|
|
|
{
|
2022-12-27 17:36:08 -05:00
|
|
|
int i;
|
|
|
|
BITSET_FOREACH_SET(i, rctx->block->live_in, rctx->shader->alloc) {
|
|
|
|
/* Skip values defined in loops when processing the loop header */
|
|
|
|
if (!BITSET_TEST(rctx->visited, i))
|
|
|
|
continue;
|
2022-09-24 20:57:31 -04:00
|
|
|
|
2022-12-27 17:36:08 -05:00
|
|
|
for (unsigned j = 0; j < rctx->ncomps[i]; ++j)
|
|
|
|
BITSET_SET(rctx->used_regs, rctx->ssa_to_reg[i] + j);
|
|
|
|
}
|
2022-09-24 20:57:31 -04:00
|
|
|
}
|
|
|
|
|
2022-10-08 20:51:06 -04:00
|
|
|
static void
|
|
|
|
assign_regs(struct ra_ctx *rctx, agx_index v, unsigned reg)
|
|
|
|
{
|
2023-03-06 22:59:23 -05:00
|
|
|
assert(reg < rctx->bound && "must not overflow register file");
|
2022-10-08 20:51:06 -04:00
|
|
|
assert(v.type == AGX_INDEX_NORMAL && "only SSA gets registers allocated");
|
|
|
|
rctx->ssa_to_reg[v.value] = reg;
|
|
|
|
|
|
|
|
assert(!BITSET_TEST(rctx->visited, v.value) && "SSA violated");
|
|
|
|
BITSET_SET(rctx->visited, v.value);
|
|
|
|
|
2022-11-12 11:37:49 -05:00
|
|
|
assert(rctx->ncomps[v.value] >= 1);
|
2022-10-08 20:51:06 -04:00
|
|
|
unsigned end = reg + rctx->ncomps[v.value] - 1;
|
|
|
|
assert(!BITSET_TEST_RANGE(rctx->used_regs, reg, end) && "no interference");
|
|
|
|
BITSET_SET_RANGE(rctx->used_regs, reg, end);
|
|
|
|
}
|
|
|
|
|
2022-10-26 11:23:51 -04:00
|
|
|
static unsigned
|
|
|
|
affinity_base_of_collect(struct ra_ctx *rctx, agx_instr *collect, unsigned src)
|
|
|
|
{
|
|
|
|
unsigned src_reg = rctx->ssa_to_reg[collect->src[src].value];
|
|
|
|
unsigned src_offset = src * agx_size_align_16(collect->src[src].size);
|
|
|
|
|
|
|
|
if (src_reg >= src_offset)
|
|
|
|
return src_reg - src_offset;
|
|
|
|
else
|
|
|
|
return ~0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static unsigned
|
|
|
|
pick_regs(struct ra_ctx *rctx, agx_instr *I, unsigned d)
|
|
|
|
{
|
|
|
|
agx_index idx = I->dest[d];
|
|
|
|
assert(idx.type == AGX_INDEX_NORMAL);
|
|
|
|
|
|
|
|
unsigned count = agx_write_registers(I, d);
|
|
|
|
unsigned align = agx_size_align_16(idx.size);
|
2022-11-12 11:37:49 -05:00
|
|
|
assert(count >= 1);
|
2022-10-26 11:23:51 -04:00
|
|
|
|
|
|
|
/* Try to allocate collects compatibly with their sources */
|
|
|
|
if (I->op == AGX_OPCODE_COLLECT) {
|
|
|
|
agx_foreach_ssa_src(I, s) {
|
|
|
|
assert(BITSET_TEST(rctx->visited, I->src[s].value) &&
|
|
|
|
"registers assigned in an order compatible with dominance "
|
|
|
|
"and this is not a phi node, so we have assigned a register");
|
|
|
|
|
|
|
|
unsigned base = affinity_base_of_collect(rctx, I, s);
|
|
|
|
if (base >= rctx->bound || (base + count) > rctx->bound)
|
|
|
|
continue;
|
|
|
|
|
2022-11-19 15:07:52 -05:00
|
|
|
/* Unaligned destinations can happen when dest size > src size */
|
|
|
|
if (base % align)
|
|
|
|
continue;
|
|
|
|
|
2022-10-26 11:23:51 -04:00
|
|
|
if (!BITSET_TEST_RANGE(rctx->used_regs, base, base + count - 1))
|
|
|
|
return base;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Try to allocate sources of collects contiguously */
|
|
|
|
if (rctx->src_to_collect[idx.value] != NULL) {
|
|
|
|
agx_instr *collect = rctx->src_to_collect[idx.value];
|
|
|
|
|
|
|
|
assert(count == align && "collect sources are scalar");
|
|
|
|
|
|
|
|
/* Find our offset in the collect. If our source is repeated in the
|
|
|
|
* collect, this may not be unique. We arbitrarily choose the first.
|
|
|
|
*/
|
|
|
|
unsigned our_source = ~0;
|
|
|
|
agx_foreach_ssa_src(collect, s) {
|
|
|
|
if (agx_is_equiv(collect->src[s], idx)) {
|
|
|
|
our_source = s;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
assert(our_source < collect->nr_srcs && "source must be in the collect");
|
|
|
|
|
|
|
|
/* See if we can allocate compatibly with any source of the collect */
|
|
|
|
agx_foreach_ssa_src(collect, s) {
|
|
|
|
if (!BITSET_TEST(rctx->visited, collect->src[s].value))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
/* Determine where the collect should start relative to the source */
|
|
|
|
unsigned base = affinity_base_of_collect(rctx, collect, s);
|
|
|
|
if (base >= rctx->bound)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
unsigned our_reg = base + (our_source * align);
|
|
|
|
|
|
|
|
/* Don't allocate past the end of the register file */
|
|
|
|
if ((our_reg + align) > rctx->bound)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
/* If those registers are free, then choose them */
|
|
|
|
if (!BITSET_TEST_RANGE(rctx->used_regs, our_reg, our_reg + align - 1))
|
|
|
|
return our_reg;
|
|
|
|
}
|
2022-11-19 16:43:19 -05:00
|
|
|
|
2023-02-28 16:44:12 -05:00
|
|
|
unsigned collect_align = agx_size_align_16(collect->dest[0].size);
|
|
|
|
unsigned offset = our_source * align;
|
|
|
|
|
|
|
|
/* Prefer ranges of the register file that leave room for all sources of
|
|
|
|
* the collect contiguously.
|
|
|
|
*/
|
|
|
|
for (unsigned base = 0; base + (collect->nr_srcs * align) <= rctx->bound;
|
|
|
|
base += collect_align) {
|
|
|
|
if (!BITSET_TEST_RANGE(rctx->used_regs, base,
|
|
|
|
base + (collect->nr_srcs * align) - 1))
|
|
|
|
return base + offset;
|
|
|
|
}
|
|
|
|
|
2022-11-19 16:43:19 -05:00
|
|
|
/* Try to respect the alignment requirement of the collect destination,
|
|
|
|
* which may be greater than the sources (e.g. pack_64_2x32_split). Look
|
|
|
|
* for a register for the source such that the collect base is aligned.
|
|
|
|
*/
|
|
|
|
if (collect_align > align) {
|
2023-03-06 23:09:38 -05:00
|
|
|
for (unsigned reg = offset; reg + collect_align <= rctx->bound;
|
|
|
|
reg += collect_align) {
|
2022-11-19 16:43:19 -05:00
|
|
|
if (!BITSET_TEST_RANGE(rctx->used_regs, reg, reg + count - 1))
|
|
|
|
return reg;
|
|
|
|
}
|
|
|
|
}
|
2022-10-26 11:23:51 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Default to any contiguous sequence of registers */
|
|
|
|
return find_regs(rctx->used_regs, count, align, rctx->bound);
|
|
|
|
}
|
|
|
|
|
2021-06-19 14:34:44 -04:00
|
|
|
/** Assign registers to SSA values in a block. */
|
|
|
|
|
|
|
|
static void
|
2022-09-24 00:15:55 -04:00
|
|
|
agx_ra_assign_local(struct ra_ctx *rctx)
|
2021-06-19 14:34:44 -04:00
|
|
|
{
|
2022-12-27 17:36:08 -05:00
|
|
|
BITSET_DECLARE(used_regs, AGX_NUM_REGS) = {0};
|
2021-06-19 14:34:44 -04:00
|
|
|
|
2022-09-24 00:15:55 -04:00
|
|
|
agx_block *block = rctx->block;
|
|
|
|
uint8_t *ssa_to_reg = rctx->ssa_to_reg;
|
|
|
|
uint8_t *ncomps = rctx->ncomps;
|
2022-09-24 20:57:31 -04:00
|
|
|
rctx->used_regs = used_regs;
|
2022-09-24 00:15:55 -04:00
|
|
|
|
2022-09-24 20:57:31 -04:00
|
|
|
reserve_live_in(rctx);
|
2021-06-19 14:34:44 -04:00
|
|
|
|
2022-09-24 00:04:21 -04:00
|
|
|
/* Force the nesting counter r0l live throughout shaders using control flow.
|
|
|
|
* This could be optimized (sync with agx_calc_register_demand).
|
|
|
|
*/
|
2022-09-24 00:15:55 -04:00
|
|
|
if (rctx->shader->any_cf)
|
2022-09-24 00:04:21 -04:00
|
|
|
BITSET_SET(used_regs, 0);
|
2021-06-19 14:34:44 -04:00
|
|
|
|
|
|
|
agx_foreach_instr_in_block(block, I) {
|
2022-04-12 20:58:49 -04:00
|
|
|
/* Optimization: if a split contains the last use of a vector, the split
|
|
|
|
* can be removed by assigning the destinations overlapping the source.
|
|
|
|
*/
|
2022-09-22 22:35:39 -04:00
|
|
|
if (I->op == AGX_OPCODE_SPLIT && I->src[0].kill) {
|
2022-04-12 20:58:49 -04:00
|
|
|
unsigned reg = ssa_to_reg[I->src[0].value];
|
2022-08-07 14:16:43 -04:00
|
|
|
unsigned width = agx_size_align_16(agx_split_width(I));
|
2022-04-12 20:58:49 -04:00
|
|
|
|
|
|
|
agx_foreach_dest(I, d) {
|
2022-10-08 20:51:06 -04:00
|
|
|
/* Free up the source */
|
|
|
|
unsigned offset_reg = reg + (d * width);
|
|
|
|
BITSET_CLEAR_RANGE(used_regs, offset_reg, offset_reg + width - 1);
|
|
|
|
|
|
|
|
/* Assign the destination where the source was */
|
|
|
|
if (!agx_is_null(I->dest[d]))
|
|
|
|
assign_regs(rctx, I->dest[d], offset_reg);
|
2022-04-12 20:58:49 -04:00
|
|
|
}
|
|
|
|
|
2022-09-23 17:27:43 -04:00
|
|
|
continue;
|
|
|
|
} else if (I->op == AGX_OPCODE_PRELOAD) {
|
|
|
|
/* We must coalesce all preload moves */
|
|
|
|
assert(I->dest[0].size == I->src[0].size);
|
|
|
|
assert(I->src[0].type == AGX_INDEX_REGISTER);
|
|
|
|
|
2022-10-08 20:51:06 -04:00
|
|
|
assign_regs(rctx, I->dest[0], I->src[0].value);
|
2022-04-12 20:58:49 -04:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2021-06-19 14:34:44 -04:00
|
|
|
/* First, free killed sources */
|
2022-10-08 21:02:03 -04:00
|
|
|
agx_foreach_ssa_src(I, s) {
|
|
|
|
if (I->src[s].kill) {
|
2021-06-19 14:34:44 -04:00
|
|
|
unsigned reg = ssa_to_reg[I->src[s].value];
|
2021-07-24 14:55:16 -04:00
|
|
|
unsigned count = ncomps[I->src[s].value];
|
2021-06-19 14:34:44 -04:00
|
|
|
|
2022-11-12 11:37:49 -05:00
|
|
|
assert(count >= 1);
|
2022-10-08 20:51:03 -04:00
|
|
|
BITSET_CLEAR_RANGE(used_regs, reg, reg + count - 1);
|
2021-06-19 14:34:44 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-04-12 20:58:49 -04:00
|
|
|
/* Next, assign destinations one at a time. This is always legal
|
|
|
|
* because of the SSA form.
|
|
|
|
*/
|
2022-10-08 21:02:03 -04:00
|
|
|
agx_foreach_ssa_dest(I, d) {
|
2022-10-26 11:23:51 -04:00
|
|
|
assign_regs(rctx, I->dest[d], pick_regs(rctx, I, d));
|
2021-06-19 14:34:44 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
STATIC_ASSERT(sizeof(block->regs_out) == sizeof(used_regs));
|
|
|
|
memcpy(block->regs_out, used_regs, sizeof(used_regs));
|
|
|
|
}
|
|
|
|
|
2022-04-12 23:11:23 -04:00
|
|
|
/*
|
|
|
|
* Lower phis to parallel copies at the logical end of a given block. If a block
|
|
|
|
* needs parallel copies inserted, a successor of the block has a phi node. To
|
|
|
|
* have a (nontrivial) phi node, a block must have multiple predecessors. So the
|
|
|
|
* edge from the block to the successor (with phi) is not the only edge entering
|
|
|
|
* the successor. Because the control flow graph has no critical edges, this
|
|
|
|
* edge must therefore be the only edge leaving the block, so the block must
|
|
|
|
* have only a single successor.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
agx_insert_parallel_copies(agx_context *ctx, agx_block *block)
|
|
|
|
{
|
|
|
|
bool any_succ = false;
|
|
|
|
unsigned nr_phi = 0;
|
|
|
|
|
|
|
|
/* Phi nodes logically happen on the control flow edge, so parallel copies
|
|
|
|
* are added at the end of the predecessor */
|
|
|
|
agx_builder b = agx_init_builder(ctx, agx_after_block_logical(block));
|
|
|
|
|
|
|
|
agx_foreach_successor(block, succ) {
|
|
|
|
assert(nr_phi == 0 && "control flow graph has a critical edge");
|
|
|
|
|
2022-09-21 23:23:14 -04:00
|
|
|
agx_foreach_phi_in_block(succ, phi) {
|
2022-04-12 23:11:23 -04:00
|
|
|
assert(!any_succ && "control flow graph has a critical edge");
|
|
|
|
nr_phi++;
|
|
|
|
}
|
|
|
|
|
|
|
|
any_succ = true;
|
|
|
|
|
|
|
|
/* Nothing to do if there are no phi nodes */
|
|
|
|
if (nr_phi == 0)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
unsigned pred_index = agx_predecessor_index(succ, block);
|
|
|
|
|
|
|
|
/* Create a parallel copy lowering all the phi nodes */
|
|
|
|
struct agx_copy *copies = calloc(sizeof(*copies), nr_phi);
|
|
|
|
|
|
|
|
unsigned i = 0;
|
|
|
|
|
2022-09-21 23:23:14 -04:00
|
|
|
agx_foreach_phi_in_block(succ, phi) {
|
2022-04-12 23:11:23 -04:00
|
|
|
agx_index dest = phi->dest[0];
|
|
|
|
agx_index src = phi->src[pred_index];
|
|
|
|
|
|
|
|
assert(dest.type == AGX_INDEX_REGISTER);
|
|
|
|
assert(dest.size == src.size);
|
|
|
|
|
2022-12-27 17:36:08 -05:00
|
|
|
copies[i++] = (struct agx_copy){
|
2022-04-12 23:11:23 -04:00
|
|
|
.dest = dest.value,
|
2022-10-26 21:16:11 -04:00
|
|
|
.src = src,
|
2022-04-12 23:11:23 -04:00
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
agx_emit_parallel_copies(&b, copies, nr_phi);
|
|
|
|
|
|
|
|
free(copies);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-04-11 16:01:47 -04:00
|
|
|
void
|
|
|
|
agx_ra(agx_context *ctx)
|
|
|
|
{
|
|
|
|
unsigned *alloc = calloc(ctx->alloc, sizeof(unsigned));
|
2021-06-19 14:34:44 -04:00
|
|
|
|
|
|
|
agx_compute_liveness(ctx);
|
|
|
|
uint8_t *ssa_to_reg = calloc(ctx->alloc, sizeof(uint8_t));
|
2021-07-24 14:55:16 -04:00
|
|
|
uint8_t *ncomps = calloc(ctx->alloc, sizeof(uint8_t));
|
2022-10-26 11:23:51 -04:00
|
|
|
agx_instr **src_to_collect = calloc(ctx->alloc, sizeof(agx_instr *));
|
2022-09-24 20:57:31 -04:00
|
|
|
BITSET_WORD *visited = calloc(BITSET_WORDS(ctx->alloc), sizeof(BITSET_WORD));
|
2021-07-24 14:55:16 -04:00
|
|
|
|
|
|
|
agx_foreach_instr_global(ctx, I) {
|
2022-10-26 11:23:51 -04:00
|
|
|
/* Record collects so we can coalesce when assigning */
|
|
|
|
if (I->op == AGX_OPCODE_COLLECT) {
|
|
|
|
agx_foreach_ssa_src(I, s) {
|
|
|
|
src_to_collect[I->src[s].value] = I;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-10-08 21:02:03 -04:00
|
|
|
agx_foreach_ssa_dest(I, d) {
|
2021-07-24 14:55:16 -04:00
|
|
|
unsigned v = I->dest[d].value;
|
|
|
|
assert(ncomps[v] == 0 && "broken SSA");
|
|
|
|
ncomps[v] = agx_write_registers(I, d);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-04-12 18:06:01 -04:00
|
|
|
/* Assign registers in dominance-order. This coincides with source-order due
|
|
|
|
* to a NIR invariant, so we do not need special handling for this.
|
|
|
|
*/
|
|
|
|
agx_foreach_block(ctx, block) {
|
2022-12-27 17:36:08 -05:00
|
|
|
agx_ra_assign_local(&(struct ra_ctx){
|
2022-09-24 00:15:55 -04:00
|
|
|
.shader = ctx,
|
|
|
|
.block = block,
|
|
|
|
.ssa_to_reg = ssa_to_reg,
|
2022-10-26 11:23:51 -04:00
|
|
|
.src_to_collect = src_to_collect,
|
2022-09-24 19:13:00 -04:00
|
|
|
.ncomps = ncomps,
|
2022-09-24 20:57:31 -04:00
|
|
|
.visited = visited,
|
2022-12-26 09:55:19 -05:00
|
|
|
.bound = AGX_NUM_REGS,
|
2022-09-24 00:15:55 -04:00
|
|
|
});
|
2022-04-12 18:06:01 -04:00
|
|
|
}
|
2021-06-19 14:34:44 -04:00
|
|
|
|
2022-10-20 22:15:54 -04:00
|
|
|
for (unsigned i = 0; i < ctx->alloc; ++i) {
|
|
|
|
if (ncomps[i])
|
|
|
|
ctx->max_reg = MAX2(ctx->max_reg, ssa_to_reg[i] + ncomps[i] - 1);
|
|
|
|
}
|
|
|
|
|
2022-11-19 17:37:25 -05:00
|
|
|
/* Vertex shaders preload the vertex/instance IDs (r5, r6) even if the shader
|
|
|
|
* don't use them. Account for that so the preload doesn't clobber GPRs.
|
|
|
|
*/
|
|
|
|
if (ctx->nir->info.stage == MESA_SHADER_VERTEX)
|
|
|
|
ctx->max_reg = MAX2(ctx->max_reg, 6 * 2);
|
|
|
|
|
2022-04-13 21:05:02 -04:00
|
|
|
agx_foreach_instr_global(ctx, ins) {
|
2022-10-08 21:02:03 -04:00
|
|
|
agx_foreach_ssa_src(ins, s) {
|
|
|
|
unsigned v = ssa_to_reg[ins->src[s].value];
|
2022-11-04 23:26:06 -04:00
|
|
|
agx_replace_src(ins, s, agx_register(v, ins->src[s].size));
|
2022-04-13 21:05:02 -04:00
|
|
|
}
|
|
|
|
|
2022-10-08 21:02:03 -04:00
|
|
|
agx_foreach_ssa_dest(ins, d) {
|
|
|
|
unsigned v = ssa_to_reg[ins->dest[d].value];
|
2022-12-27 17:36:08 -05:00
|
|
|
ins->dest[d] =
|
|
|
|
agx_replace_index(ins->dest[d], agx_register(v, ins->dest[d].size));
|
2022-04-13 21:05:02 -04:00
|
|
|
}
|
|
|
|
}
|
2021-04-11 16:01:47 -04:00
|
|
|
|
|
|
|
agx_foreach_instr_global_safe(ctx, ins) {
|
|
|
|
/* Lower away RA pseudo-instructions */
|
2022-04-12 20:43:32 -04:00
|
|
|
agx_builder b = agx_init_builder(ctx, agx_after_instr(ins));
|
|
|
|
|
2022-09-22 22:35:46 -04:00
|
|
|
if (ins->op == AGX_OPCODE_COLLECT) {
|
2022-11-05 00:05:55 -04:00
|
|
|
assert(ins->dest[0].type == AGX_INDEX_REGISTER);
|
|
|
|
unsigned base = ins->dest[0].value;
|
2022-11-19 15:07:52 -05:00
|
|
|
unsigned width = agx_size_align_16(ins->src[0].size);
|
2021-06-19 14:34:44 -04:00
|
|
|
|
2022-09-09 14:32:01 -04:00
|
|
|
struct agx_copy *copies = alloca(sizeof(copies[0]) * ins->nr_srcs);
|
2022-04-17 16:47:37 -04:00
|
|
|
unsigned n = 0;
|
2021-06-19 14:34:44 -04:00
|
|
|
|
2022-04-17 16:47:37 -04:00
|
|
|
/* Move the sources */
|
2022-09-09 14:32:01 -04:00
|
|
|
agx_foreach_src(ins, i) {
|
2022-12-19 23:27:30 -05:00
|
|
|
if (agx_is_null(ins->src[i]) || ins->src[i].type == AGX_INDEX_UNDEF)
|
2022-12-27 17:36:08 -05:00
|
|
|
continue;
|
2022-11-19 15:07:52 -05:00
|
|
|
assert(ins->src[i].size == ins->src[0].size);
|
2021-06-19 14:34:44 -04:00
|
|
|
|
2022-12-27 17:36:08 -05:00
|
|
|
copies[n++] = (struct agx_copy){
|
2022-04-17 16:47:37 -04:00
|
|
|
.dest = base + (i * width),
|
2022-12-26 09:55:19 -05:00
|
|
|
.src = ins->src[i],
|
2022-04-17 16:47:37 -04:00
|
|
|
};
|
2021-04-11 16:01:47 -04:00
|
|
|
}
|
|
|
|
|
2022-04-17 16:47:37 -04:00
|
|
|
agx_emit_parallel_copies(&b, copies, n);
|
2022-04-12 20:43:32 -04:00
|
|
|
agx_remove_instruction(ins);
|
|
|
|
continue;
|
2022-09-22 22:35:39 -04:00
|
|
|
} else if (ins->op == AGX_OPCODE_SPLIT) {
|
2023-04-09 09:42:33 -04:00
|
|
|
assert(ins->src[0].type == AGX_INDEX_REGISTER ||
|
|
|
|
ins->src[0].type == AGX_INDEX_UNIFORM);
|
2022-04-12 20:43:32 -04:00
|
|
|
|
|
|
|
struct agx_copy copies[4];
|
2022-09-22 22:35:44 -04:00
|
|
|
assert(ins->nr_dests <= ARRAY_SIZE(copies));
|
|
|
|
|
2022-04-12 20:43:32 -04:00
|
|
|
unsigned n = 0;
|
2023-04-09 09:42:33 -04:00
|
|
|
unsigned width = agx_size_align_16(agx_split_width(ins));
|
2022-04-12 20:43:32 -04:00
|
|
|
|
|
|
|
/* Move the sources */
|
2022-09-22 22:35:44 -04:00
|
|
|
agx_foreach_dest(ins, i) {
|
2022-10-08 21:02:03 -04:00
|
|
|
if (ins->dest[i].type != AGX_INDEX_REGISTER)
|
|
|
|
continue;
|
2022-04-12 20:43:32 -04:00
|
|
|
|
2023-04-09 09:42:33 -04:00
|
|
|
agx_index src = ins->src[0];
|
|
|
|
src.size = ins->dest[i].size;
|
|
|
|
src.value += (i * width);
|
|
|
|
|
2022-12-27 17:36:08 -05:00
|
|
|
copies[n++] = (struct agx_copy){
|
2022-11-05 00:05:55 -04:00
|
|
|
.dest = ins->dest[i].value,
|
2023-04-09 09:42:33 -04:00
|
|
|
.src = src,
|
2022-04-12 20:43:32 -04:00
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Lower away */
|
|
|
|
agx_builder b = agx_init_builder(ctx, agx_after_instr(ins));
|
|
|
|
agx_emit_parallel_copies(&b, copies, n);
|
2021-04-11 16:01:47 -04:00
|
|
|
agx_remove_instruction(ins);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-04-12 23:11:23 -04:00
|
|
|
/* Insert parallel copies lowering phi nodes */
|
|
|
|
agx_foreach_block(ctx, block) {
|
|
|
|
agx_insert_parallel_copies(ctx, block);
|
|
|
|
}
|
|
|
|
|
2022-04-12 23:32:18 -04:00
|
|
|
agx_foreach_instr_global_safe(ctx, I) {
|
2022-09-23 17:27:43 -04:00
|
|
|
switch (I->op) {
|
|
|
|
/* Pseudoinstructions for RA must be removed now */
|
|
|
|
case AGX_OPCODE_PHI:
|
|
|
|
case AGX_OPCODE_PRELOAD:
|
2022-04-12 23:32:18 -04:00
|
|
|
agx_remove_instruction(I);
|
2022-09-23 17:27:43 -04:00
|
|
|
break;
|
2022-04-18 18:34:14 -04:00
|
|
|
|
2022-09-23 17:27:43 -04:00
|
|
|
/* Coalesced moves can be removed */
|
|
|
|
case AGX_OPCODE_MOV:
|
|
|
|
if (I->src[0].type == AGX_INDEX_REGISTER &&
|
|
|
|
I->dest[0].size == I->src[0].size &&
|
|
|
|
I->src[0].value == I->dest[0].value) {
|
2022-04-18 18:34:14 -04:00
|
|
|
|
2022-09-23 17:27:43 -04:00
|
|
|
assert(I->dest[0].type == AGX_INDEX_REGISTER);
|
|
|
|
agx_remove_instruction(I);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
2022-09-24 00:04:21 -04:00
|
|
|
/* Writes to the nesting counter lowered to the real register */
|
|
|
|
case AGX_OPCODE_NEST: {
|
|
|
|
agx_builder b = agx_init_builder(ctx, agx_before_instr(I));
|
|
|
|
agx_mov_to(&b, agx_register(0, AGX_SIZE_16), I->src[0]);
|
|
|
|
agx_remove_instruction(I);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2022-09-23 17:27:43 -04:00
|
|
|
default:
|
|
|
|
break;
|
2022-04-18 18:34:14 -04:00
|
|
|
}
|
2022-04-12 23:32:18 -04:00
|
|
|
}
|
|
|
|
|
2022-10-26 11:23:51 -04:00
|
|
|
free(src_to_collect);
|
2021-07-24 14:56:52 -04:00
|
|
|
free(ssa_to_reg);
|
2021-07-24 14:55:16 -04:00
|
|
|
free(ncomps);
|
2022-09-24 20:57:31 -04:00
|
|
|
free(visited);
|
2021-04-11 16:01:47 -04:00
|
|
|
free(alloc);
|
|
|
|
}
|