2021-04-11 16:01:47 -04:00
|
|
|
/*
|
|
|
|
* Copyright (C) 2021 Alyssa Rosenzweig <alyssa@rosenzweig.io>
|
|
|
|
*
|
|
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
|
|
* to deal in the Software without restriction, including without limitation
|
|
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
|
|
*
|
|
|
|
* The above copyright notice and this permission notice (including the next
|
|
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
|
|
* Software.
|
|
|
|
*
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
|
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
|
|
* SOFTWARE.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "agx_compiler.h"
|
|
|
|
#include "agx_builder.h"
|
|
|
|
|
2022-05-01 17:12:17 -04:00
|
|
|
/* SSA-based register allocator */
|
2021-04-11 16:01:47 -04:00
|
|
|
|
2022-09-24 00:15:55 -04:00
|
|
|
struct ra_ctx {
|
|
|
|
agx_context *shader;
|
|
|
|
agx_block *block;
|
|
|
|
uint8_t *ssa_to_reg;
|
|
|
|
uint8_t *ncomps;
|
2022-09-24 20:57:31 -04:00
|
|
|
BITSET_WORD *visited;
|
|
|
|
BITSET_WORD *used_regs;
|
2022-09-24 19:13:00 -04:00
|
|
|
|
2022-10-26 11:23:51 -04:00
|
|
|
/* For affinities */
|
|
|
|
agx_instr **src_to_collect;
|
|
|
|
|
2022-09-24 19:13:00 -04:00
|
|
|
/* Maximum number of registers that RA is allowed to use */
|
|
|
|
unsigned bound;
|
2022-09-24 00:15:55 -04:00
|
|
|
};
|
|
|
|
|
2021-06-19 14:33:12 -04:00
|
|
|
/** Returns number of registers written by an instruction */
|
2022-04-12 18:05:59 -04:00
|
|
|
unsigned
|
2021-06-19 14:33:12 -04:00
|
|
|
agx_write_registers(agx_instr *I, unsigned d)
|
2021-06-19 13:23:25 -04:00
|
|
|
{
|
2022-08-02 14:02:16 -04:00
|
|
|
unsigned size = agx_size_align_16(I->dest[d].size);
|
2021-06-19 14:33:12 -04:00
|
|
|
|
2021-06-19 13:23:25 -04:00
|
|
|
switch (I->op) {
|
2022-08-07 11:52:38 -04:00
|
|
|
case AGX_OPCODE_ITER:
|
2022-04-12 18:06:03 -04:00
|
|
|
assert(1 <= I->channels && I->channels <= 4);
|
|
|
|
return I->channels * size;
|
|
|
|
|
2021-06-19 13:23:25 -04:00
|
|
|
case AGX_OPCODE_DEVICE_LOAD:
|
2022-09-03 15:31:15 -04:00
|
|
|
case AGX_OPCODE_TEXTURE_LOAD:
|
2021-06-19 13:23:25 -04:00
|
|
|
case AGX_OPCODE_TEXTURE_SAMPLE:
|
|
|
|
case AGX_OPCODE_LD_TILE:
|
2022-09-11 12:03:15 -04:00
|
|
|
return util_bitcount(I->mask) * size;
|
2022-04-12 18:06:03 -04:00
|
|
|
|
2022-08-07 11:52:38 -04:00
|
|
|
case AGX_OPCODE_LDCF:
|
2021-06-19 14:33:12 -04:00
|
|
|
return 6;
|
2022-09-22 22:35:46 -04:00
|
|
|
case AGX_OPCODE_COLLECT:
|
2022-11-19 15:07:52 -05:00
|
|
|
return I->nr_srcs * agx_size_align_16(I->src[0].size);
|
2021-06-19 13:23:25 -04:00
|
|
|
default:
|
2021-06-19 14:33:12 -04:00
|
|
|
return size;
|
2021-06-19 13:23:25 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-08-07 14:16:43 -04:00
|
|
|
static inline enum agx_size
|
|
|
|
agx_split_width(const agx_instr *I)
|
|
|
|
{
|
|
|
|
enum agx_size width = ~0;
|
|
|
|
|
|
|
|
agx_foreach_dest(I, d) {
|
2022-10-08 21:02:03 -04:00
|
|
|
if (I->dest[d].type == AGX_INDEX_NULL)
|
2022-08-07 14:16:43 -04:00
|
|
|
continue;
|
|
|
|
else if (width != ~0)
|
|
|
|
assert(width == I->dest[d].size);
|
|
|
|
else
|
|
|
|
width = I->dest[d].size;
|
|
|
|
}
|
|
|
|
|
|
|
|
assert(width != ~0 && "should have been DCE'd");
|
|
|
|
return width;
|
|
|
|
}
|
|
|
|
|
2021-06-19 14:34:44 -04:00
|
|
|
static unsigned
|
2022-10-08 20:51:06 -04:00
|
|
|
find_regs(BITSET_WORD *used_regs, unsigned count, unsigned align, unsigned max)
|
2021-06-19 14:34:44 -04:00
|
|
|
{
|
2022-11-12 11:37:49 -05:00
|
|
|
assert(count >= 1);
|
|
|
|
|
2021-07-06 22:11:31 -04:00
|
|
|
for (unsigned reg = 0; reg < max; reg += align) {
|
2022-10-08 20:51:06 -04:00
|
|
|
if (!BITSET_TEST_RANGE(used_regs, reg, reg + count - 1))
|
2021-06-19 14:34:44 -04:00
|
|
|
return reg;
|
|
|
|
}
|
|
|
|
|
2021-07-24 14:54:34 -04:00
|
|
|
/* Couldn't find a free register, dump the state of the register file */
|
|
|
|
fprintf(stderr, "Failed to find register of size %u aligned %u max %u.\n",
|
|
|
|
count, align, max);
|
|
|
|
|
|
|
|
fprintf(stderr, "Register file:\n");
|
|
|
|
for (unsigned i = 0; i < BITSET_WORDS(max); ++i)
|
|
|
|
fprintf(stderr, " %08X\n", used_regs[i]);
|
|
|
|
|
2021-06-19 14:34:44 -04:00
|
|
|
unreachable("Could not find a free register");
|
|
|
|
}
|
|
|
|
|
2022-09-24 20:57:31 -04:00
|
|
|
/*
|
|
|
|
* Loop over live-in values at the start of the block and mark their registers
|
|
|
|
* as in-use. We process blocks in dominance order, so this handles everything
|
|
|
|
* but loop headers.
|
|
|
|
*
|
|
|
|
* For loop headers, this handles the forward edges but not the back edge.
|
|
|
|
* However, that's okay: we don't want to reserve the registers that are
|
|
|
|
* defined within the loop, because then we'd get a contradiction. Instead we
|
|
|
|
* leave them available and then they become fixed points of a sort.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
reserve_live_in(struct ra_ctx *rctx)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
BITSET_FOREACH_SET(i, rctx->block->live_in, rctx->shader->alloc) {
|
|
|
|
/* Skip values defined in loops when processing the loop header */
|
|
|
|
if (!BITSET_TEST(rctx->visited, i))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
for (unsigned j = 0; j < rctx->ncomps[i]; ++j)
|
|
|
|
BITSET_SET(rctx->used_regs, rctx->ssa_to_reg[i] + j);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-10-08 20:51:06 -04:00
|
|
|
static void
|
|
|
|
assign_regs(struct ra_ctx *rctx, agx_index v, unsigned reg)
|
|
|
|
{
|
|
|
|
assert(v.type == AGX_INDEX_NORMAL && "only SSA gets registers allocated");
|
|
|
|
rctx->ssa_to_reg[v.value] = reg;
|
|
|
|
|
|
|
|
assert(!BITSET_TEST(rctx->visited, v.value) && "SSA violated");
|
|
|
|
BITSET_SET(rctx->visited, v.value);
|
|
|
|
|
2022-11-12 11:37:49 -05:00
|
|
|
assert(rctx->ncomps[v.value] >= 1);
|
2022-10-08 20:51:06 -04:00
|
|
|
unsigned end = reg + rctx->ncomps[v.value] - 1;
|
|
|
|
assert(!BITSET_TEST_RANGE(rctx->used_regs, reg, end) && "no interference");
|
|
|
|
BITSET_SET_RANGE(rctx->used_regs, reg, end);
|
|
|
|
}
|
|
|
|
|
2022-10-26 11:23:51 -04:00
|
|
|
static unsigned
|
|
|
|
affinity_base_of_collect(struct ra_ctx *rctx, agx_instr *collect, unsigned src)
|
|
|
|
{
|
|
|
|
unsigned src_reg = rctx->ssa_to_reg[collect->src[src].value];
|
|
|
|
unsigned src_offset = src * agx_size_align_16(collect->src[src].size);
|
|
|
|
|
|
|
|
if (src_reg >= src_offset)
|
|
|
|
return src_reg - src_offset;
|
|
|
|
else
|
|
|
|
return ~0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static unsigned
|
|
|
|
pick_regs(struct ra_ctx *rctx, agx_instr *I, unsigned d)
|
|
|
|
{
|
|
|
|
agx_index idx = I->dest[d];
|
|
|
|
assert(idx.type == AGX_INDEX_NORMAL);
|
|
|
|
|
|
|
|
unsigned count = agx_write_registers(I, d);
|
|
|
|
unsigned align = agx_size_align_16(idx.size);
|
2022-11-12 11:37:49 -05:00
|
|
|
assert(count >= 1);
|
2022-10-26 11:23:51 -04:00
|
|
|
|
|
|
|
/* Try to allocate collects compatibly with their sources */
|
|
|
|
if (I->op == AGX_OPCODE_COLLECT) {
|
|
|
|
agx_foreach_ssa_src(I, s) {
|
|
|
|
assert(BITSET_TEST(rctx->visited, I->src[s].value) &&
|
|
|
|
"registers assigned in an order compatible with dominance "
|
|
|
|
"and this is not a phi node, so we have assigned a register");
|
|
|
|
|
|
|
|
unsigned base = affinity_base_of_collect(rctx, I, s);
|
|
|
|
if (base >= rctx->bound || (base + count) > rctx->bound)
|
|
|
|
continue;
|
|
|
|
|
2022-11-19 15:07:52 -05:00
|
|
|
/* Unaligned destinations can happen when dest size > src size */
|
|
|
|
if (base % align)
|
|
|
|
continue;
|
|
|
|
|
2022-10-26 11:23:51 -04:00
|
|
|
if (!BITSET_TEST_RANGE(rctx->used_regs, base, base + count - 1))
|
|
|
|
return base;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Try to allocate sources of collects contiguously */
|
|
|
|
if (rctx->src_to_collect[idx.value] != NULL) {
|
|
|
|
agx_instr *collect = rctx->src_to_collect[idx.value];
|
|
|
|
|
|
|
|
assert(count == align && "collect sources are scalar");
|
|
|
|
|
|
|
|
/* Find our offset in the collect. If our source is repeated in the
|
|
|
|
* collect, this may not be unique. We arbitrarily choose the first.
|
|
|
|
*/
|
|
|
|
unsigned our_source = ~0;
|
|
|
|
agx_foreach_ssa_src(collect, s) {
|
|
|
|
if (agx_is_equiv(collect->src[s], idx)) {
|
|
|
|
our_source = s;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
assert(our_source < collect->nr_srcs && "source must be in the collect");
|
|
|
|
|
|
|
|
/* See if we can allocate compatibly with any source of the collect */
|
|
|
|
agx_foreach_ssa_src(collect, s) {
|
|
|
|
if (!BITSET_TEST(rctx->visited, collect->src[s].value))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
/* Determine where the collect should start relative to the source */
|
|
|
|
unsigned base = affinity_base_of_collect(rctx, collect, s);
|
|
|
|
if (base >= rctx->bound)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
unsigned our_reg = base + (our_source * align);
|
|
|
|
|
|
|
|
/* Don't allocate past the end of the register file */
|
|
|
|
if ((our_reg + align) > rctx->bound)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
/* If those registers are free, then choose them */
|
|
|
|
if (!BITSET_TEST_RANGE(rctx->used_regs, our_reg, our_reg + align - 1))
|
|
|
|
return our_reg;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Default to any contiguous sequence of registers */
|
|
|
|
return find_regs(rctx->used_regs, count, align, rctx->bound);
|
|
|
|
}
|
|
|
|
|
2021-06-19 14:34:44 -04:00
|
|
|
/** Assign registers to SSA values in a block. */
|
|
|
|
|
|
|
|
static void
|
2022-09-24 00:15:55 -04:00
|
|
|
agx_ra_assign_local(struct ra_ctx *rctx)
|
2021-06-19 14:34:44 -04:00
|
|
|
{
|
|
|
|
BITSET_DECLARE(used_regs, AGX_NUM_REGS) = { 0 };
|
|
|
|
|
2022-09-24 00:15:55 -04:00
|
|
|
agx_block *block = rctx->block;
|
|
|
|
uint8_t *ssa_to_reg = rctx->ssa_to_reg;
|
|
|
|
uint8_t *ncomps = rctx->ncomps;
|
2022-09-24 20:57:31 -04:00
|
|
|
rctx->used_regs = used_regs;
|
2022-09-24 00:15:55 -04:00
|
|
|
|
2022-09-24 20:57:31 -04:00
|
|
|
reserve_live_in(rctx);
|
2021-06-19 14:34:44 -04:00
|
|
|
|
2022-09-24 00:04:21 -04:00
|
|
|
/* Force the nesting counter r0l live throughout shaders using control flow.
|
|
|
|
* This could be optimized (sync with agx_calc_register_demand).
|
|
|
|
*/
|
2022-09-24 00:15:55 -04:00
|
|
|
if (rctx->shader->any_cf)
|
2022-09-24 00:04:21 -04:00
|
|
|
BITSET_SET(used_regs, 0);
|
2021-06-19 14:34:44 -04:00
|
|
|
|
|
|
|
agx_foreach_instr_in_block(block, I) {
|
2022-04-12 20:58:49 -04:00
|
|
|
/* Optimization: if a split contains the last use of a vector, the split
|
|
|
|
* can be removed by assigning the destinations overlapping the source.
|
|
|
|
*/
|
2022-09-22 22:35:39 -04:00
|
|
|
if (I->op == AGX_OPCODE_SPLIT && I->src[0].kill) {
|
2022-04-12 20:58:49 -04:00
|
|
|
unsigned reg = ssa_to_reg[I->src[0].value];
|
2022-08-07 14:16:43 -04:00
|
|
|
unsigned width = agx_size_align_16(agx_split_width(I));
|
2022-04-12 20:58:49 -04:00
|
|
|
|
|
|
|
agx_foreach_dest(I, d) {
|
2022-10-08 20:51:06 -04:00
|
|
|
/* Free up the source */
|
|
|
|
unsigned offset_reg = reg + (d * width);
|
|
|
|
BITSET_CLEAR_RANGE(used_regs, offset_reg, offset_reg + width - 1);
|
|
|
|
|
|
|
|
/* Assign the destination where the source was */
|
|
|
|
if (!agx_is_null(I->dest[d]))
|
|
|
|
assign_regs(rctx, I->dest[d], offset_reg);
|
2022-04-12 20:58:49 -04:00
|
|
|
}
|
|
|
|
|
2022-09-23 17:27:43 -04:00
|
|
|
continue;
|
|
|
|
} else if (I->op == AGX_OPCODE_PRELOAD) {
|
|
|
|
/* We must coalesce all preload moves */
|
|
|
|
assert(I->dest[0].size == I->src[0].size);
|
|
|
|
assert(I->src[0].type == AGX_INDEX_REGISTER);
|
|
|
|
|
2022-10-08 20:51:06 -04:00
|
|
|
assign_regs(rctx, I->dest[0], I->src[0].value);
|
2022-04-12 20:58:49 -04:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2021-06-19 14:34:44 -04:00
|
|
|
/* First, free killed sources */
|
2022-10-08 21:02:03 -04:00
|
|
|
agx_foreach_ssa_src(I, s) {
|
|
|
|
if (I->src[s].kill) {
|
2021-06-19 14:34:44 -04:00
|
|
|
unsigned reg = ssa_to_reg[I->src[s].value];
|
2021-07-24 14:55:16 -04:00
|
|
|
unsigned count = ncomps[I->src[s].value];
|
2021-06-19 14:34:44 -04:00
|
|
|
|
2022-11-12 11:37:49 -05:00
|
|
|
assert(count >= 1);
|
2022-10-08 20:51:03 -04:00
|
|
|
BITSET_CLEAR_RANGE(used_regs, reg, reg + count - 1);
|
2021-06-19 14:34:44 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-04-12 20:58:49 -04:00
|
|
|
/* Next, assign destinations one at a time. This is always legal
|
|
|
|
* because of the SSA form.
|
|
|
|
*/
|
2022-10-08 21:02:03 -04:00
|
|
|
agx_foreach_ssa_dest(I, d) {
|
2022-10-26 11:23:51 -04:00
|
|
|
assign_regs(rctx, I->dest[d], pick_regs(rctx, I, d));
|
2021-06-19 14:34:44 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
STATIC_ASSERT(sizeof(block->regs_out) == sizeof(used_regs));
|
|
|
|
memcpy(block->regs_out, used_regs, sizeof(used_regs));
|
|
|
|
}
|
|
|
|
|
2022-04-12 23:11:23 -04:00
|
|
|
/*
|
|
|
|
* Lower phis to parallel copies at the logical end of a given block. If a block
|
|
|
|
* needs parallel copies inserted, a successor of the block has a phi node. To
|
|
|
|
* have a (nontrivial) phi node, a block must have multiple predecessors. So the
|
|
|
|
* edge from the block to the successor (with phi) is not the only edge entering
|
|
|
|
* the successor. Because the control flow graph has no critical edges, this
|
|
|
|
* edge must therefore be the only edge leaving the block, so the block must
|
|
|
|
* have only a single successor.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
agx_insert_parallel_copies(agx_context *ctx, agx_block *block)
|
|
|
|
{
|
|
|
|
bool any_succ = false;
|
|
|
|
unsigned nr_phi = 0;
|
|
|
|
|
|
|
|
/* Phi nodes logically happen on the control flow edge, so parallel copies
|
|
|
|
* are added at the end of the predecessor */
|
|
|
|
agx_builder b = agx_init_builder(ctx, agx_after_block_logical(block));
|
|
|
|
|
|
|
|
agx_foreach_successor(block, succ) {
|
|
|
|
assert(nr_phi == 0 && "control flow graph has a critical edge");
|
|
|
|
|
2022-09-21 23:23:14 -04:00
|
|
|
agx_foreach_phi_in_block(succ, phi) {
|
2022-04-12 23:11:23 -04:00
|
|
|
assert(!any_succ && "control flow graph has a critical edge");
|
|
|
|
nr_phi++;
|
|
|
|
}
|
|
|
|
|
|
|
|
any_succ = true;
|
|
|
|
|
|
|
|
/* Nothing to do if there are no phi nodes */
|
|
|
|
if (nr_phi == 0)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
unsigned pred_index = agx_predecessor_index(succ, block);
|
|
|
|
|
|
|
|
/* Create a parallel copy lowering all the phi nodes */
|
|
|
|
struct agx_copy *copies = calloc(sizeof(*copies), nr_phi);
|
|
|
|
|
|
|
|
unsigned i = 0;
|
|
|
|
|
2022-09-21 23:23:14 -04:00
|
|
|
agx_foreach_phi_in_block(succ, phi) {
|
2022-04-12 23:11:23 -04:00
|
|
|
agx_index dest = phi->dest[0];
|
|
|
|
agx_index src = phi->src[pred_index];
|
|
|
|
|
|
|
|
assert(dest.type == AGX_INDEX_REGISTER);
|
|
|
|
assert(src.type == AGX_INDEX_REGISTER);
|
|
|
|
assert(dest.size == src.size);
|
|
|
|
|
|
|
|
copies[i++] = (struct agx_copy) {
|
|
|
|
.dest = dest.value,
|
2022-10-26 21:16:11 -04:00
|
|
|
.src = src,
|
2022-04-12 23:11:23 -04:00
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
agx_emit_parallel_copies(&b, copies, nr_phi);
|
|
|
|
|
|
|
|
free(copies);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-04-11 16:01:47 -04:00
|
|
|
void
|
|
|
|
agx_ra(agx_context *ctx)
|
|
|
|
{
|
|
|
|
unsigned *alloc = calloc(ctx->alloc, sizeof(unsigned));
|
2021-06-19 14:34:44 -04:00
|
|
|
|
|
|
|
agx_compute_liveness(ctx);
|
|
|
|
uint8_t *ssa_to_reg = calloc(ctx->alloc, sizeof(uint8_t));
|
2021-07-24 14:55:16 -04:00
|
|
|
uint8_t *ncomps = calloc(ctx->alloc, sizeof(uint8_t));
|
2022-10-26 11:23:51 -04:00
|
|
|
agx_instr **src_to_collect = calloc(ctx->alloc, sizeof(agx_instr *));
|
2022-09-24 20:57:31 -04:00
|
|
|
BITSET_WORD *visited = calloc(BITSET_WORDS(ctx->alloc), sizeof(BITSET_WORD));
|
2021-07-24 14:55:16 -04:00
|
|
|
|
|
|
|
agx_foreach_instr_global(ctx, I) {
|
2022-10-26 11:23:51 -04:00
|
|
|
/* Record collects so we can coalesce when assigning */
|
|
|
|
if (I->op == AGX_OPCODE_COLLECT) {
|
|
|
|
agx_foreach_ssa_src(I, s) {
|
|
|
|
src_to_collect[I->src[s].value] = I;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-10-08 21:02:03 -04:00
|
|
|
agx_foreach_ssa_dest(I, d) {
|
2021-07-24 14:55:16 -04:00
|
|
|
unsigned v = I->dest[d].value;
|
|
|
|
assert(ncomps[v] == 0 && "broken SSA");
|
|
|
|
ncomps[v] = agx_write_registers(I, d);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-04-12 18:06:01 -04:00
|
|
|
/* Assign registers in dominance-order. This coincides with source-order due
|
|
|
|
* to a NIR invariant, so we do not need special handling for this.
|
|
|
|
*/
|
|
|
|
agx_foreach_block(ctx, block) {
|
2022-09-24 00:15:55 -04:00
|
|
|
agx_ra_assign_local(&(struct ra_ctx) {
|
|
|
|
.shader = ctx,
|
|
|
|
.block = block,
|
|
|
|
.ssa_to_reg = ssa_to_reg,
|
2022-10-26 11:23:51 -04:00
|
|
|
.src_to_collect = src_to_collect,
|
2022-09-24 19:13:00 -04:00
|
|
|
.ncomps = ncomps,
|
2022-09-24 20:57:31 -04:00
|
|
|
.visited = visited,
|
2022-09-24 19:13:00 -04:00
|
|
|
.bound = AGX_NUM_REGS
|
2022-09-24 00:15:55 -04:00
|
|
|
});
|
2022-04-12 18:06:01 -04:00
|
|
|
}
|
2021-06-19 14:34:44 -04:00
|
|
|
|
2022-10-20 22:15:54 -04:00
|
|
|
for (unsigned i = 0; i < ctx->alloc; ++i) {
|
|
|
|
if (ncomps[i])
|
|
|
|
ctx->max_reg = MAX2(ctx->max_reg, ssa_to_reg[i] + ncomps[i] - 1);
|
|
|
|
}
|
|
|
|
|
2022-04-13 21:05:02 -04:00
|
|
|
agx_foreach_instr_global(ctx, ins) {
|
2022-10-08 21:02:03 -04:00
|
|
|
agx_foreach_ssa_src(ins, s) {
|
|
|
|
unsigned v = ssa_to_reg[ins->src[s].value];
|
2022-11-04 23:26:06 -04:00
|
|
|
agx_replace_src(ins, s, agx_register(v, ins->src[s].size));
|
2022-04-13 21:05:02 -04:00
|
|
|
}
|
|
|
|
|
2022-10-08 21:02:03 -04:00
|
|
|
agx_foreach_ssa_dest(ins, d) {
|
|
|
|
unsigned v = ssa_to_reg[ins->dest[d].value];
|
|
|
|
ins->dest[d] = agx_replace_index(ins->dest[d], agx_register(v, ins->dest[d].size));
|
2022-04-13 21:05:02 -04:00
|
|
|
}
|
|
|
|
}
|
2021-04-11 16:01:47 -04:00
|
|
|
|
|
|
|
agx_foreach_instr_global_safe(ctx, ins) {
|
|
|
|
/* Lower away RA pseudo-instructions */
|
2022-04-12 20:43:32 -04:00
|
|
|
agx_builder b = agx_init_builder(ctx, agx_after_instr(ins));
|
|
|
|
|
2022-09-22 22:35:46 -04:00
|
|
|
if (ins->op == AGX_OPCODE_COLLECT) {
|
2022-11-05 00:05:55 -04:00
|
|
|
assert(ins->dest[0].type == AGX_INDEX_REGISTER);
|
|
|
|
unsigned base = ins->dest[0].value;
|
2022-11-19 15:07:52 -05:00
|
|
|
unsigned width = agx_size_align_16(ins->src[0].size);
|
2021-06-19 14:34:44 -04:00
|
|
|
|
2022-09-09 14:32:01 -04:00
|
|
|
struct agx_copy *copies = alloca(sizeof(copies[0]) * ins->nr_srcs);
|
2022-04-17 16:47:37 -04:00
|
|
|
unsigned n = 0;
|
2021-06-19 14:34:44 -04:00
|
|
|
|
2022-04-17 16:47:37 -04:00
|
|
|
/* Move the sources */
|
2022-09-09 14:32:01 -04:00
|
|
|
agx_foreach_src(ins, i) {
|
2021-06-19 14:34:44 -04:00
|
|
|
if (agx_is_null(ins->src[i])) continue;
|
2022-11-19 15:07:52 -05:00
|
|
|
assert(ins->src[i].size == ins->src[0].size);
|
2021-06-19 14:34:44 -04:00
|
|
|
|
2022-04-17 16:47:37 -04:00
|
|
|
copies[n++] = (struct agx_copy) {
|
|
|
|
.dest = base + (i * width),
|
2022-10-26 21:16:11 -04:00
|
|
|
.src = ins->src[i]
|
2022-04-17 16:47:37 -04:00
|
|
|
};
|
2021-04-11 16:01:47 -04:00
|
|
|
}
|
|
|
|
|
2022-04-17 16:47:37 -04:00
|
|
|
agx_emit_parallel_copies(&b, copies, n);
|
2022-04-12 20:43:32 -04:00
|
|
|
agx_remove_instruction(ins);
|
|
|
|
continue;
|
2022-09-22 22:35:39 -04:00
|
|
|
} else if (ins->op == AGX_OPCODE_SPLIT) {
|
2022-11-05 00:05:55 -04:00
|
|
|
assert(ins->src[0].type == AGX_INDEX_REGISTER);
|
|
|
|
unsigned base = ins->src[0].value;
|
2022-08-07 14:16:43 -04:00
|
|
|
unsigned width = agx_size_align_16(agx_split_width(ins));
|
2022-04-12 20:43:32 -04:00
|
|
|
|
|
|
|
struct agx_copy copies[4];
|
2022-09-22 22:35:44 -04:00
|
|
|
assert(ins->nr_dests <= ARRAY_SIZE(copies));
|
|
|
|
|
2022-04-12 20:43:32 -04:00
|
|
|
unsigned n = 0;
|
|
|
|
|
|
|
|
/* Move the sources */
|
2022-09-22 22:35:44 -04:00
|
|
|
agx_foreach_dest(ins, i) {
|
2022-10-08 21:02:03 -04:00
|
|
|
if (ins->dest[i].type != AGX_INDEX_REGISTER)
|
|
|
|
continue;
|
2022-04-12 20:43:32 -04:00
|
|
|
|
|
|
|
copies[n++] = (struct agx_copy) {
|
2022-11-05 00:05:55 -04:00
|
|
|
.dest = ins->dest[i].value,
|
2022-10-26 21:16:11 -04:00
|
|
|
.src = agx_register(base + (i * width), ins->dest[i].size)
|
2022-04-12 20:43:32 -04:00
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Lower away */
|
|
|
|
agx_builder b = agx_init_builder(ctx, agx_after_instr(ins));
|
|
|
|
agx_emit_parallel_copies(&b, copies, n);
|
2021-04-11 16:01:47 -04:00
|
|
|
agx_remove_instruction(ins);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
2022-04-12 23:11:23 -04:00
|
|
|
/* Insert parallel copies lowering phi nodes */
|
|
|
|
agx_foreach_block(ctx, block) {
|
|
|
|
agx_insert_parallel_copies(ctx, block);
|
|
|
|
}
|
|
|
|
|
2022-04-12 23:32:18 -04:00
|
|
|
agx_foreach_instr_global_safe(ctx, I) {
|
2022-09-23 17:27:43 -04:00
|
|
|
switch (I->op) {
|
|
|
|
/* Pseudoinstructions for RA must be removed now */
|
|
|
|
case AGX_OPCODE_PHI:
|
|
|
|
case AGX_OPCODE_LOGICAL_END:
|
|
|
|
case AGX_OPCODE_PRELOAD:
|
2022-04-12 23:32:18 -04:00
|
|
|
agx_remove_instruction(I);
|
2022-09-23 17:27:43 -04:00
|
|
|
break;
|
2022-04-18 18:34:14 -04:00
|
|
|
|
2022-09-23 17:27:43 -04:00
|
|
|
/* Coalesced moves can be removed */
|
|
|
|
case AGX_OPCODE_MOV:
|
|
|
|
if (I->src[0].type == AGX_INDEX_REGISTER &&
|
|
|
|
I->dest[0].size == I->src[0].size &&
|
|
|
|
I->src[0].value == I->dest[0].value) {
|
2022-04-18 18:34:14 -04:00
|
|
|
|
2022-09-23 17:27:43 -04:00
|
|
|
assert(I->dest[0].type == AGX_INDEX_REGISTER);
|
|
|
|
agx_remove_instruction(I);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
2022-09-24 00:04:21 -04:00
|
|
|
/* Writes to the nesting counter lowered to the real register */
|
|
|
|
case AGX_OPCODE_NEST: {
|
|
|
|
agx_builder b = agx_init_builder(ctx, agx_before_instr(I));
|
|
|
|
agx_mov_to(&b, agx_register(0, AGX_SIZE_16), I->src[0]);
|
|
|
|
agx_remove_instruction(I);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2022-09-23 17:27:43 -04:00
|
|
|
default:
|
|
|
|
break;
|
2022-04-18 18:34:14 -04:00
|
|
|
}
|
2022-04-12 23:32:18 -04:00
|
|
|
}
|
|
|
|
|
2022-10-26 11:23:51 -04:00
|
|
|
free(src_to_collect);
|
2021-07-24 14:56:52 -04:00
|
|
|
free(ssa_to_reg);
|
2021-07-24 14:55:16 -04:00
|
|
|
free(ncomps);
|
2022-09-24 20:57:31 -04:00
|
|
|
free(visited);
|
2021-04-11 16:01:47 -04:00
|
|
|
free(alloc);
|
|
|
|
}
|