agx: Assign registers locally

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11751>
This commit is contained in:
Alyssa Rosenzweig
2021-06-19 14:34:44 -04:00
committed by Marge Bot
parent 15b49a6795
commit 85e18deb18
2 changed files with 130 additions and 27 deletions

View File

@@ -43,6 +43,9 @@ enum agx_dbg {
extern int agx_debug;
/* r0-r127 inclusive, as pairs of 16-bits, gives 256 registers */
#define AGX_NUM_REGS (256)
enum agx_index_type {
AGX_INDEX_NULL = 0,
AGX_INDEX_NORMAL = 1,
@@ -325,6 +328,9 @@ typedef struct agx_block {
BITSET_WORD *live_in;
BITSET_WORD *live_out;
/* Register allocation */
BITSET_DECLARE(regs_out, AGX_NUM_REGS);
/* Offset of the block in the emitted binary */
off_t offset;

View File

@@ -37,8 +37,6 @@ agx_read_registers(agx_instr *I, unsigned s)
unsigned size = I->src[s].size == AGX_SIZE_32 ? 2 : 1;
switch (I->op) {
case AGX_OPCODE_DEVICE_LOAD:
return 8;
default:
return size;
}
@@ -58,43 +56,134 @@ agx_write_registers(agx_instr *I, unsigned d)
return 8;
case AGX_OPCODE_LD_VARY_FLAT:
return 6;
case AGX_OPCODE_P_COMBINE:
{
unsigned components = 0;
for (unsigned i = 0; i < 4; ++i) {
if (!agx_is_null(I->src[i]))
components = i + 1;
}
return components * size;
}
default:
return size;
}
}
static unsigned
agx_assign_regs(BITSET_WORD *used_regs, unsigned count, unsigned align)
{
for (unsigned reg = 0; reg < AGX_NUM_REGS; reg += align) {
bool conflict = false;
for (unsigned j = 0; j < count; ++j)
conflict |= BITSET_TEST(used_regs, reg + j);
if (!conflict) {
for (unsigned j = 0; j < count; ++j)
BITSET_SET(used_regs, reg + j);
return reg;
}
}
unreachable("Could not find a free register");
}
/** Assign registers to SSA values in a block. */
static void
agx_ra_assign_local(agx_block *block, uint8_t *ssa_to_reg)
{
BITSET_DECLARE(used_regs, AGX_NUM_REGS) = { 0 };
agx_foreach_predecessor(block, pred) {
for (unsigned i = 0; i < BITSET_WORDS(AGX_NUM_REGS); ++i)
used_regs[i] |= pred->regs_out[i];
}
BITSET_SET(used_regs, 0); // control flow writes r0l
BITSET_SET(used_regs, 5*2); // TODO: precolouring, don't overwrite vertex ID
BITSET_SET(used_regs, (5*2 + 1));
agx_foreach_instr_in_block(block, I) {
/* First, free killed sources */
agx_foreach_src(I, s) {
if (I->src[s].type == AGX_INDEX_NORMAL && I->src[s].kill) {
unsigned reg = ssa_to_reg[I->src[s].value];
unsigned count = agx_read_registers(I, s);
for (unsigned i = 0; i < count; ++i)
BITSET_CLEAR(used_regs, reg + i);
}
}
/* Next, assign destinations. Always legal in SSA form. */
agx_foreach_dest(I, d) {
if (I->dest[d].type == AGX_INDEX_NORMAL) {
unsigned count = agx_write_registers(I, d);
unsigned align = (I->dest[d].size == AGX_SIZE_16) ? 1 : 2;
unsigned reg = agx_assign_regs(used_regs, count, align);
ssa_to_reg[I->dest[d].value] = reg;
}
}
}
STATIC_ASSERT(sizeof(block->regs_out) == sizeof(used_regs));
memcpy(block->regs_out, used_regs, sizeof(used_regs));
}
void
agx_ra(agx_context *ctx)
{
unsigned *alloc = calloc(ctx->alloc, sizeof(unsigned));
unsigned usage = 6*2;
agx_compute_liveness(ctx);
uint8_t *ssa_to_reg = calloc(ctx->alloc, sizeof(uint8_t));
agx_foreach_block(ctx, block)
agx_ra_assign_local(block, ssa_to_reg);
/* TODO: Coalesce combines */
agx_foreach_instr_global_safe(ctx, ins) {
/* Lower away RA pseudo-instructions */
if (ins->op == AGX_OPCODE_P_COMBINE) {
/* TODO: Optimize out the moves! */
unsigned components = 0;
for (unsigned i = 0; i < 4; ++i) {
if (!agx_is_null(ins->src[i]))
components = i + 1;
}
unsigned size = ins->dest[0].size == AGX_SIZE_32 ? 2 : 1;
if (size == 2 && usage & 1) usage++;
unsigned base = usage;
assert(ins->dest[0].type == AGX_INDEX_NORMAL);
alloc[ins->dest[0].value] = base;
usage += (components * size);
enum agx_size common_size = ins->dest[0].size;
unsigned base = ssa_to_reg[ins->dest[0].value];
unsigned size = common_size == AGX_SIZE_32 ? 2 : 1;
/* Move the sources */
agx_builder b = agx_init_builder(ctx, agx_after_instr(ins));
/* TODO: Eliminate the intermediate copy by handling parallel copies */
for (unsigned i = 0; i < 4; ++i) {
if (agx_is_null(ins->src[i])) continue;
assert(ins->src[0].type == AGX_INDEX_NORMAL);
agx_mov_to(&b, agx_register(base + (i * size), ins->dest[0].size),
agx_register(alloc[ins->src[i].value], ins->src[0].size));
unsigned base = ins->src[i].value;
if (ins->src[i].type == AGX_INDEX_NORMAL)
base = ssa_to_reg[base];
else
assert(ins->src[i].type == AGX_INDEX_REGISTER);
assert(ins->src[i].size == common_size);
agx_mov_to(&b, agx_register(124*2 + (i * size), common_size),
agx_register(base, common_size));
}
for (unsigned i = 0; i < 4; ++i) {
if (agx_is_null(ins->src[i])) continue;
agx_index src = ins->src[i];
if (src.type == AGX_INDEX_NORMAL)
src = agx_register(alloc[src.value], src.size);
agx_mov_to(&b, agx_register(base + (i * size), common_size),
agx_register(124*2 + (i * size), common_size));
}
/* We've lowered away, delete the old */
@@ -102,34 +191,42 @@ agx_ra(agx_context *ctx)
continue;
} else if (ins->op == AGX_OPCODE_P_EXTRACT) {
assert(ins->dest[0].type == AGX_INDEX_NORMAL);
assert(ins->src[0].type == AGX_INDEX_NORMAL);
assert(ins->dest[0].size == ins->src[0].size);
unsigned base = ins->src[0].value;
if (ins->src[0].type != AGX_INDEX_REGISTER) {
assert(ins->src[0].type == AGX_INDEX_NORMAL);
base = alloc[base];
}
unsigned size = ins->dest[0].size == AGX_SIZE_32 ? 2 : 1;
alloc[ins->dest[0].value] = alloc[ins->src[0].value] + (size * ins->imm);
unsigned left = ssa_to_reg[ins->dest[0].value];
unsigned right = ssa_to_reg[ins->src[0].value] + (size * ins->imm);
if (left != right) {
agx_builder b = agx_init_builder(ctx, agx_after_instr(ins));
agx_mov_to(&b, agx_register(left, ins->dest[0].size),
agx_register(right, ins->src[0].size));
}
agx_remove_instruction(ins);
continue;
}
agx_foreach_src(ins, s) {
if (ins->src[s].type == AGX_INDEX_NORMAL) {
unsigned v = alloc[ins->src[s].value];
unsigned v = ssa_to_reg[ins->src[s].value];
ins->src[s] = agx_replace_index(ins->src[s], agx_register(v, ins->src[s].size));
}
}
agx_foreach_dest(ins, d) {
if (ins->dest[d].type == AGX_INDEX_NORMAL) {
unsigned size = ins->dest[d].size == AGX_SIZE_32 ? 2 : 1;
if (size == 2 && usage & 1) usage++;
unsigned v = usage;
usage += agx_write_registers(ins, d);
alloc[ins->dest[d].value] = v;
unsigned v = ssa_to_reg[ins->dest[d].value];
ins->dest[d] = agx_replace_index(ins->dest[d], agx_register(v, ins->dest[d].size));
}
}
}
assert(usage < 256 && "dummy RA");
free(alloc);
}