agx: Optimize waits locally
Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20446>
This commit is contained in:
@@ -44,6 +44,7 @@ static const struct debug_named_value agx_debug_options[] = {
|
|||||||
{"internal", AGX_DBG_INTERNAL, "Dump even internal shaders"},
|
{"internal", AGX_DBG_INTERNAL, "Dump even internal shaders"},
|
||||||
{"novalidate",AGX_DBG_NOVALIDATE,"Skip IR validation in debug builds"},
|
{"novalidate",AGX_DBG_NOVALIDATE,"Skip IR validation in debug builds"},
|
||||||
{"noopt", AGX_DBG_NOOPT, "Disable backend optimizations"},
|
{"noopt", AGX_DBG_NOOPT, "Disable backend optimizations"},
|
||||||
|
{"wait", AGX_DBG_WAIT, "Wait after all async instructions"},
|
||||||
DEBUG_NAMED_VALUE_END
|
DEBUG_NAMED_VALUE_END
|
||||||
};
|
};
|
||||||
/* clang-format on */
|
/* clang-format on */
|
||||||
|
@@ -47,6 +47,7 @@ enum agx_dbg {
|
|||||||
AGX_DBG_INTERNAL = BITFIELD_BIT(4),
|
AGX_DBG_INTERNAL = BITFIELD_BIT(4),
|
||||||
AGX_DBG_NOVALIDATE = BITFIELD_BIT(5),
|
AGX_DBG_NOVALIDATE = BITFIELD_BIT(5),
|
||||||
AGX_DBG_NOOPT = BITFIELD_BIT(6),
|
AGX_DBG_NOOPT = BITFIELD_BIT(6),
|
||||||
|
AGX_DBG_WAIT = BITFIELD_BIT(7),
|
||||||
};
|
};
|
||||||
/* clang-format on */
|
/* clang-format on */
|
||||||
|
|
||||||
|
@@ -6,6 +6,8 @@
|
|||||||
#include "agx_builder.h"
|
#include "agx_builder.h"
|
||||||
#include "agx_compiler.h"
|
#include "agx_compiler.h"
|
||||||
|
|
||||||
|
#define AGX_MAX_PENDING (8)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Returns whether an instruction is asynchronous and needs a scoreboard slot
|
* Returns whether an instruction is asynchronous and needs a scoreboard slot
|
||||||
*/
|
*/
|
||||||
@@ -15,6 +17,16 @@ instr_is_async(agx_instr *I)
|
|||||||
return agx_opcodes_info[I->op].immediates & AGX_IMMEDIATE_SCOREBOARD;
|
return agx_opcodes_info[I->op].immediates & AGX_IMMEDIATE_SCOREBOARD;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct slot {
|
||||||
|
/* Set of registers this slot is currently writing */
|
||||||
|
BITSET_DECLARE(writes, AGX_NUM_REGS);
|
||||||
|
|
||||||
|
/* Number of pending messages on this slot. Must not exceed
|
||||||
|
* AGX_MAX_PENDING for correct results.
|
||||||
|
*/
|
||||||
|
uint8_t nr_pending;
|
||||||
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Insert waits within a block to stall after every async instruction. Useful
|
* Insert waits within a block to stall after every async instruction. Useful
|
||||||
* for debugging.
|
* for debugging.
|
||||||
@@ -30,6 +42,92 @@ agx_insert_waits_trivial(agx_context *ctx, agx_block *block)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Insert waits within a block, assuming scoreboard slots have already been
|
||||||
|
* assigned. This waits for everything at the end of the block, rather than
|
||||||
|
* doing something more intelligent/global. This should be optimized.
|
||||||
|
*
|
||||||
|
* XXX: Do any instructions read their sources asynchronously?
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
agx_insert_waits_local(agx_context *ctx, agx_block *block)
|
||||||
|
{
|
||||||
|
struct slot slots[2] = {0};
|
||||||
|
|
||||||
|
agx_foreach_instr_in_block_safe(block, I) {
|
||||||
|
uint8_t wait_mask = 0;
|
||||||
|
|
||||||
|
/* Check for read-after-write */
|
||||||
|
agx_foreach_src(I, s) {
|
||||||
|
if (I->src[s].type != AGX_INDEX_REGISTER)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
unsigned nr_read = agx_read_registers(I, s);
|
||||||
|
for (unsigned slot = 0; slot < ARRAY_SIZE(slots); ++slot) {
|
||||||
|
if (BITSET_TEST_RANGE(slots[slot].writes, I->src[s].value,
|
||||||
|
I->src[s].value + nr_read - 1))
|
||||||
|
wait_mask |= BITSET_BIT(slot);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Check for write-after-write */
|
||||||
|
agx_foreach_dest(I, d) {
|
||||||
|
if (I->dest[d].type != AGX_INDEX_REGISTER)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
unsigned nr_writes = agx_write_registers(I, d);
|
||||||
|
for (unsigned slot = 0; slot < ARRAY_SIZE(slots); ++slot) {
|
||||||
|
if (BITSET_TEST_RANGE(slots[slot].writes, I->dest[d].value,
|
||||||
|
I->dest[d].value + nr_writes - 1))
|
||||||
|
wait_mask |= BITSET_BIT(slot);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Try to assign a free slot */
|
||||||
|
if (instr_is_async(I)) {
|
||||||
|
for (unsigned slot = 0; slot < ARRAY_SIZE(slots); ++slot) {
|
||||||
|
if (slots[slot].nr_pending == 0) {
|
||||||
|
I->scoreboard = slot;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Check for slot overflow */
|
||||||
|
if (instr_is_async(I) &&
|
||||||
|
slots[I->scoreboard].nr_pending >= AGX_MAX_PENDING)
|
||||||
|
wait_mask |= BITSET_BIT(I->scoreboard);
|
||||||
|
|
||||||
|
/* Insert the appropriate waits, clearing the slots */
|
||||||
|
u_foreach_bit(slot, wait_mask) {
|
||||||
|
agx_builder b = agx_init_builder(ctx, agx_before_instr(I));
|
||||||
|
agx_wait(&b, slot);
|
||||||
|
|
||||||
|
BITSET_ZERO(slots[slot].writes);
|
||||||
|
slots[slot].nr_pending = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Record access */
|
||||||
|
if (instr_is_async(I)) {
|
||||||
|
agx_foreach_dest(I, d) {
|
||||||
|
assert(I->dest[d].type == AGX_INDEX_REGISTER);
|
||||||
|
BITSET_SET_RANGE(slots[I->scoreboard].writes, I->dest[d].value,
|
||||||
|
I->dest[d].value + agx_write_registers(I, d) - 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
slots[I->scoreboard].nr_pending++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* If there are outstanding messages, wait for them */
|
||||||
|
for (unsigned slot = 0; slot < ARRAY_SIZE(slots); ++slot) {
|
||||||
|
if (slots[slot].nr_pending) {
|
||||||
|
agx_builder b = agx_init_builder(ctx, agx_after_block_logical(block));
|
||||||
|
agx_wait(&b, slot);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Assign scoreboard slots to asynchronous instructions and insert waits for the
|
* Assign scoreboard slots to asynchronous instructions and insert waits for the
|
||||||
* appropriate hazard tracking.
|
* appropriate hazard tracking.
|
||||||
@@ -38,6 +136,9 @@ void
|
|||||||
agx_insert_waits(agx_context *ctx)
|
agx_insert_waits(agx_context *ctx)
|
||||||
{
|
{
|
||||||
agx_foreach_block(ctx, block) {
|
agx_foreach_block(ctx, block) {
|
||||||
|
if (agx_debug & AGX_DBG_WAIT)
|
||||||
agx_insert_waits_trivial(ctx, block);
|
agx_insert_waits_trivial(ctx, block);
|
||||||
|
else
|
||||||
|
agx_insert_waits_local(ctx, block);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user