anv: setup scratch space correctly for RT shaders

Things are a bit confusing because we use the term "scratch" for 2
different things :
  * the buffer for register allocation spilling
  * the buffer for storing live values between splitted shaders around shader calls

Here we're fixing the missing register allocation spilling buffer.

v2: update comments (Caio)
    fix scratch bo size computation with pipeline libraries (Lionel)

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16970>
This commit is contained in:
Lionel Landwerlin
2021-08-17 14:51:12 +03:00
committed by Marge Bot
parent f3ddfd81b4
commit 5ad803840d
3 changed files with 50 additions and 0 deletions

View File

@@ -2474,6 +2474,9 @@ compile_upload_rt_shader(struct anv_ray_tracing_pipeline *pipeline,
anv_pipeline_add_executables(&pipeline->base, stage, bin);
util_dynarray_append(&pipeline->shaders, struct anv_shader_bin *, bin);
pipeline->scratch_size =
MAX2(pipeline->scratch_size, bin->prog_data->total_scratch);
*shader_out = bin;
return VK_SUCCESS;
@@ -3131,6 +3134,14 @@ anv_ray_tracing_pipeline_create(
return result;
}
/* Compute the size of the scratch BO (for register spilling) by taking the
* max of all the shaders in the pipeline.
*/
util_dynarray_foreach(&pipeline->shaders, struct anv_shader_bin *, shader) {
pipeline->scratch_size =
MAX2(pipeline->scratch_size, (*shader)->prog_data->total_scratch);
}
if (pCreateInfo->pLibraryInfo) {
uint32_t g = pCreateInfo->groupCount;
for (uint32_t l = 0; l < pCreateInfo->pLibraryInfo->libraryCount; l++) {
@@ -3140,6 +3151,12 @@ anv_ray_tracing_pipeline_create(
anv_pipeline_to_ray_tracing(library);
for (uint32_t lg = 0; lg < rt_library->group_count; lg++)
pipeline->groups[g++] = rt_library->groups[lg];
/* Also account for all the pipeline libraries for the size of the
* scratch BO.
*/
pipeline->scratch_size =
MAX2(pipeline->scratch_size, rt_library->scratch_size);
}
}

View File

@@ -3016,6 +3016,9 @@ struct anv_ray_tracing_pipeline {
* client has requested a dynamic stack size.
*/
uint32_t stack_size;
/* Maximum scratch size for all shaders in this pipeline. */
uint32_t scratch_size;
};
#define ANV_DECL_PIPELINE_DOWNCAST(pipe_type, pipe_enum) \

View File

@@ -5478,6 +5478,7 @@ cmd_buffer_trace_rays(struct anv_cmd_buffer *cmd_buffer,
uint32_t launch_depth,
uint64_t launch_size_addr)
{
struct anv_device *device = cmd_buffer->device;
struct anv_cmd_ray_tracing_state *rt = &cmd_buffer->state.rt;
struct anv_ray_tracing_pipeline *pipeline = rt->pipeline;
@@ -5613,6 +5614,35 @@ cmd_buffer_trace_rays(struct anv_cmd_buffer *cmd_buffer,
}
}
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_BTD), btd) {
/* TODO: This is the timeout after which the bucketed thread dispatcher
* will kick off a wave of threads. We go with the lowest value
* for now. It could be tweaked on a per application basis
* (drirc).
*/
btd.DispatchTimeoutCounter = _64clocks;
/* BSpec 43851: "This field must be programmed to 6h i.e. memory backed
* buffer must be 128KB."
*/
btd.PerDSSMemoryBackedBufferSize = 6;
btd.MemoryBackedBufferBasePointer = (struct anv_address) { .bo = device->btd_fifo_bo };
if (pipeline->scratch_size > 0) {
struct anv_bo *scratch_bo =
anv_scratch_pool_alloc(device,
&device->scratch_pool,
MESA_SHADER_COMPUTE,
pipeline->scratch_size);
anv_reloc_list_add_bo(cmd_buffer->batch.relocs,
cmd_buffer->batch.alloc,
scratch_bo);
uint32_t scratch_surf =
anv_scratch_pool_get_surf(cmd_buffer->device,
&device->scratch_pool,
pipeline->scratch_size);
btd.ScratchSpaceBuffer = scratch_surf >> 4;
}
}
anv_batch_emit(&cmd_buffer->batch, GENX(COMPUTE_WALKER), cw) {
cw.IndirectParameterEnable = is_indirect;
cw.PredicateEnable = false;