diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index df23f1a3eaa..16f001c3786 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -2474,6 +2474,9 @@ compile_upload_rt_shader(struct anv_ray_tracing_pipeline *pipeline, anv_pipeline_add_executables(&pipeline->base, stage, bin); util_dynarray_append(&pipeline->shaders, struct anv_shader_bin *, bin); + pipeline->scratch_size = + MAX2(pipeline->scratch_size, bin->prog_data->total_scratch); + *shader_out = bin; return VK_SUCCESS; @@ -3131,6 +3134,14 @@ anv_ray_tracing_pipeline_create( return result; } + /* Compute the size of the scratch BO (for register spilling) by taking the + * max of all the shaders in the pipeline. + */ + util_dynarray_foreach(&pipeline->shaders, struct anv_shader_bin *, shader) { + pipeline->scratch_size = + MAX2(pipeline->scratch_size, (*shader)->prog_data->total_scratch); + } + if (pCreateInfo->pLibraryInfo) { uint32_t g = pCreateInfo->groupCount; for (uint32_t l = 0; l < pCreateInfo->pLibraryInfo->libraryCount; l++) { @@ -3140,6 +3151,12 @@ anv_ray_tracing_pipeline_create( anv_pipeline_to_ray_tracing(library); for (uint32_t lg = 0; lg < rt_library->group_count; lg++) pipeline->groups[g++] = rt_library->groups[lg]; + + /* Also account for all the pipeline libraries for the size of the + * scratch BO. + */ + pipeline->scratch_size = + MAX2(pipeline->scratch_size, rt_library->scratch_size); } } diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 6c7407742d5..1d07fd3b1c8 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -3016,6 +3016,9 @@ struct anv_ray_tracing_pipeline { * client has requested a dynamic stack size. */ uint32_t stack_size; + + /* Maximum scratch size for all shaders in this pipeline. */ + uint32_t scratch_size; }; #define ANV_DECL_PIPELINE_DOWNCAST(pipe_type, pipe_enum) \ diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index 565a80e0d3e..a0f0e9cf121 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -5478,6 +5478,7 @@ cmd_buffer_trace_rays(struct anv_cmd_buffer *cmd_buffer, uint32_t launch_depth, uint64_t launch_size_addr) { + struct anv_device *device = cmd_buffer->device; struct anv_cmd_ray_tracing_state *rt = &cmd_buffer->state.rt; struct anv_ray_tracing_pipeline *pipeline = rt->pipeline; @@ -5613,6 +5614,35 @@ cmd_buffer_trace_rays(struct anv_cmd_buffer *cmd_buffer, } } + anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_BTD), btd) { + /* TODO: This is the timeout after which the bucketed thread dispatcher + * will kick off a wave of threads. We go with the lowest value + * for now. It could be tweaked on a per application basis + * (drirc). + */ + btd.DispatchTimeoutCounter = _64clocks; + /* BSpec 43851: "This field must be programmed to 6h i.e. memory backed + * buffer must be 128KB." + */ + btd.PerDSSMemoryBackedBufferSize = 6; + btd.MemoryBackedBufferBasePointer = (struct anv_address) { .bo = device->btd_fifo_bo }; + if (pipeline->scratch_size > 0) { + struct anv_bo *scratch_bo = + anv_scratch_pool_alloc(device, + &device->scratch_pool, + MESA_SHADER_COMPUTE, + pipeline->scratch_size); + anv_reloc_list_add_bo(cmd_buffer->batch.relocs, + cmd_buffer->batch.alloc, + scratch_bo); + uint32_t scratch_surf = + anv_scratch_pool_get_surf(cmd_buffer->device, + &device->scratch_pool, + pipeline->scratch_size); + btd.ScratchSpaceBuffer = scratch_surf >> 4; + } + } + anv_batch_emit(&cmd_buffer->batch, GENX(COMPUTE_WALKER), cw) { cw.IndirectParameterEnable = is_indirect; cw.PredicateEnable = false;