agx: improve scratch size accounting
- prep for preamble scratch - only include scratch actually used - prep for spilling scratch Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27616>
This commit is contained in:

committed by
Marge Bot

parent
a2328820f7
commit
9445005c87
@@ -818,6 +818,7 @@ agx_emit_load_scratch(agx_builder *b, agx_index dst, nir_intrinsic_instr *instr)
|
||||
|
||||
agx_stack_load_to(b, dst, offset, format, mask);
|
||||
agx_emit_cached_split(b, dst, nr);
|
||||
b->shader->any_scratch = true;
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -829,6 +830,7 @@ agx_emit_store_scratch(agx_builder *b, nir_intrinsic_instr *instr)
|
||||
unsigned mask = BITFIELD_MASK(nir_src_num_components(instr->src[0]));
|
||||
|
||||
agx_stack_store(b, value, offset, format, mask);
|
||||
b->shader->any_scratch = true;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -2776,15 +2778,12 @@ agx_compile_function_nir(nir_shader *nir, nir_function_impl *impl,
|
||||
emit_cf_list(ctx, &impl->body);
|
||||
agx_emit_phis_deferred(ctx);
|
||||
|
||||
if (impl->function->is_entrypoint && nir->scratch_size > 0) {
|
||||
/* Apple always allocate 40 more bytes in the entrypoint and align to 4. */
|
||||
uint64_t stack_size = ALIGN(DIV_ROUND_UP(nir->scratch_size, 4) + 10, 4);
|
||||
|
||||
assert(stack_size < INT16_MAX);
|
||||
|
||||
agx_block *start_block = agx_start_block(ctx);
|
||||
agx_builder _b = agx_init_builder(ctx, agx_before_block(start_block));
|
||||
agx_stack_adjust(&_b, stack_size);
|
||||
/* Only allocate scratch if it's statically used, regardless of if the NIR
|
||||
* info claims otherwise.
|
||||
*/
|
||||
if (ctx->any_scratch) {
|
||||
assert(!ctx->is_preamble && "preambles don't use scratch");
|
||||
ctx->scratch_size = ALIGN(nir->scratch_size, 16);
|
||||
}
|
||||
|
||||
/* Stop the main shader or preamble shader after the exit block. For real
|
||||
@@ -2838,6 +2837,22 @@ agx_compile_function_nir(nir_shader *nir, nir_function_impl *impl,
|
||||
agx_validate(ctx, "RA");
|
||||
agx_lower_64bit_postra(ctx);
|
||||
|
||||
if (ctx->scratch_size > 0) {
|
||||
/* Apple always allocate 40 more bytes in the entrypoint and align to 4. */
|
||||
uint64_t stack_size = ALIGN(DIV_ROUND_UP(ctx->scratch_size, 4) + 10, 4);
|
||||
|
||||
assert(stack_size < INT16_MAX);
|
||||
|
||||
agx_block *start_block = agx_start_block(ctx);
|
||||
agx_builder _b = agx_init_builder(ctx, agx_before_block(start_block));
|
||||
agx_stack_adjust(&_b, stack_size);
|
||||
|
||||
if (ctx->is_preamble)
|
||||
out->preamble_scratch_size = stack_size;
|
||||
else
|
||||
out->scratch_size = stack_size;
|
||||
}
|
||||
|
||||
if (ctx->stage == MESA_SHADER_VERTEX && !impl->function->is_preamble)
|
||||
agx_set_st_vary_final(ctx);
|
||||
|
||||
@@ -3145,8 +3160,6 @@ agx_compile_shader_nir(nir_shader *nir, struct agx_shader_key *key,
|
||||
nir_print_shader(nir, stdout);
|
||||
|
||||
out->local_size = nir->info.shared_size;
|
||||
if (nir->scratch_size > 0)
|
||||
out->scratch_size = ALIGN(DIV_ROUND_UP(nir->scratch_size, 4) + 10, 4);
|
||||
|
||||
nir_foreach_function_with_impl(func, impl, nir) {
|
||||
unsigned offset =
|
||||
|
@@ -114,8 +114,8 @@ struct agx_shader_info {
|
||||
/* Local memory allocation in bytes */
|
||||
unsigned local_size;
|
||||
|
||||
/* Scratch memory allocation in bytes */
|
||||
unsigned scratch_size;
|
||||
/* Scratch memory allocation in bytes for main/preamble respectively */
|
||||
unsigned scratch_size, preamble_scratch_size;
|
||||
|
||||
/* Does the shader have a preamble? If so, it is at offset preamble_offset.
|
||||
* The main shader is at offset main_offset. The preamble is executed first.
|
||||
|
@@ -416,6 +416,7 @@ typedef struct {
|
||||
nir_shader *nir;
|
||||
gl_shader_stage stage;
|
||||
bool is_preamble;
|
||||
unsigned scratch_size;
|
||||
|
||||
struct list_head blocks; /* list of agx_block */
|
||||
struct agx_shader_info *out;
|
||||
@@ -427,6 +428,9 @@ typedef struct {
|
||||
/* For creating temporaries */
|
||||
unsigned alloc;
|
||||
|
||||
/* Does the shader statically use scratch memory? */
|
||||
bool any_scratch;
|
||||
|
||||
/* I don't really understand how writeout ops work yet */
|
||||
bool did_writeout;
|
||||
|
||||
|
Reference in New Issue
Block a user