Files
third_party_mesa3d/src/asahi/compiler/agx_insert_waits.c
Alyssa Rosenzweig f4fd4d4d50 agx: Fix atomics with no destination
We need to:

* properly null out the dest in DCE.
* not assert out when packing with null dest

Fixes potential reg pressure blow up with atomics that don't use their
destinations, though I don't see shader-db changes.

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24635>
2023-08-11 20:31:27 +00:00

164 lines
4.6 KiB
C

/*
* Copyright 2022 Alyssa Rosenzweig
* SPDX-License-Identifier: MIT
*/
#include "agx_builder.h"
#include "agx_compiler.h"
#include "agx_debug.h"
#define AGX_MAX_PENDING (8)
/*
* Returns whether an instruction is asynchronous and needs a scoreboard slot
*/
static bool
instr_is_async(agx_instr *I)
{
return agx_opcodes_info[I->op].immediates & AGX_IMMEDIATE_SCOREBOARD;
}
struct slot {
/* Set of registers this slot is currently writing */
BITSET_DECLARE(writes, AGX_NUM_REGS);
/* Number of pending messages on this slot. Must not exceed
* AGX_MAX_PENDING for correct results.
*/
uint8_t nr_pending;
};
/*
* Insert waits within a block to stall after every async instruction. Useful
* for debugging.
*/
static void
agx_insert_waits_trivial(agx_context *ctx, agx_block *block)
{
agx_foreach_instr_in_block_safe(block, I) {
if (instr_is_async(I)) {
agx_builder b = agx_init_builder(ctx, agx_after_instr(I));
agx_wait(&b, I->scoreboard);
}
}
}
/*
* Insert waits within a block, assuming scoreboard slots have already been
* assigned. This waits for everything at the end of the block, rather than
* doing something more intelligent/global. This should be optimized.
*
* XXX: Do any instructions read their sources asynchronously?
*/
static void
agx_insert_waits_local(agx_context *ctx, agx_block *block)
{
struct slot slots[2] = {0};
agx_foreach_instr_in_block_safe(block, I) {
uint8_t wait_mask = 0;
/* Check for read-after-write */
agx_foreach_src(I, s) {
if (I->src[s].type != AGX_INDEX_REGISTER)
continue;
unsigned nr_read = agx_read_registers(I, s);
for (unsigned slot = 0; slot < ARRAY_SIZE(slots); ++slot) {
if (BITSET_TEST_RANGE(slots[slot].writes, I->src[s].value,
I->src[s].value + nr_read - 1))
wait_mask |= BITSET_BIT(slot);
}
}
/* Check for write-after-write */
agx_foreach_dest(I, d) {
if (I->dest[d].type != AGX_INDEX_REGISTER)
continue;
unsigned nr_writes = agx_write_registers(I, d);
for (unsigned slot = 0; slot < ARRAY_SIZE(slots); ++slot) {
if (BITSET_TEST_RANGE(slots[slot].writes, I->dest[d].value,
I->dest[d].value + nr_writes - 1))
wait_mask |= BITSET_BIT(slot);
}
}
/* Check for barriers */
if (I->op == AGX_OPCODE_THREADGROUP_BARRIER ||
I->op == AGX_OPCODE_MEMORY_BARRIER) {
for (unsigned slot = 0; slot < ARRAY_SIZE(slots); ++slot) {
if (slots[slot].nr_pending)
wait_mask |= BITSET_BIT(slot);
}
}
/* Try to assign a free slot */
if (instr_is_async(I)) {
for (unsigned slot = 0; slot < ARRAY_SIZE(slots); ++slot) {
if (slots[slot].nr_pending == 0) {
I->scoreboard = slot;
break;
}
}
}
/* Check for slot overflow */
if (instr_is_async(I) &&
slots[I->scoreboard].nr_pending >= AGX_MAX_PENDING)
wait_mask |= BITSET_BIT(I->scoreboard);
/* Insert the appropriate waits, clearing the slots */
u_foreach_bit(slot, wait_mask) {
agx_builder b = agx_init_builder(ctx, agx_before_instr(I));
agx_wait(&b, slot);
BITSET_ZERO(slots[slot].writes);
slots[slot].nr_pending = 0;
}
/* Record access */
if (instr_is_async(I)) {
agx_foreach_dest(I, d) {
if (agx_is_null(I->dest[d]))
continue;
assert(I->dest[d].type == AGX_INDEX_REGISTER);
BITSET_SET_RANGE(slots[I->scoreboard].writes, I->dest[d].value,
I->dest[d].value + agx_write_registers(I, d) - 1);
}
slots[I->scoreboard].nr_pending++;
}
}
/* If there are outstanding messages, wait for them. We don't do this for the
* exit block, though, since nothing else will execute in the shader so
* waiting is pointless.
*/
if (block != agx_exit_block(ctx)) {
agx_builder b = agx_init_builder(ctx, agx_after_block_logical(block));
for (unsigned slot = 0; slot < ARRAY_SIZE(slots); ++slot) {
if (slots[slot].nr_pending)
agx_wait(&b, slot);
}
}
}
/*
* Assign scoreboard slots to asynchronous instructions and insert waits for the
* appropriate hazard tracking.
*/
void
agx_insert_waits(agx_context *ctx)
{
agx_foreach_block(ctx, block) {
if (agx_compiler_debug & AGX_DBG_WAIT)
agx_insert_waits_trivial(ctx, block);
else
agx_insert_waits_local(ctx, block);
}
}