anv: cache raytracing trampoline shader

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8637>
This commit is contained in:
Lionel Landwerlin
2021-04-02 17:03:13 +03:00
committed by Marge Bot
parent ab77aeb488
commit 045f4600b1
4 changed files with 94 additions and 41 deletions

View File

@@ -3354,6 +3354,10 @@ VkResult anv_CreateDevice(
anv_pipeline_cache_init(&device->default_pipeline_cache, device,
true /* cache_enabled */, false /* external_sync */);
result = anv_device_init_rt_trampoline(device);
if (result != VK_SUCCESS)
goto fail_rt_trampoline;
anv_device_init_blorp(device);
anv_device_init_border_colors(device);
@@ -3364,6 +3368,8 @@ VkResult anv_CreateDevice(
return VK_SUCCESS;
fail_rt_trampoline:
anv_pipeline_cache_finish(&device->default_pipeline_cache);
fail_trivial_batch_bo_and_scratch_pool:
anv_scratch_pool_finish(device, &device->scratch_pool);
anv_device_release_bo(device, device->trivial_batch_bo);
@@ -3427,6 +3433,8 @@ void anv_DestroyDevice(
anv_device_finish_blorp(device);
anv_device_finish_rt_shaders(device);
anv_pipeline_cache_finish(&device->default_pipeline_cache);
#ifdef HAVE_VALGRIND

View File

@@ -359,9 +359,6 @@ void anv_DestroyPipeline(
struct anv_shader_bin *, shader) {
anv_shader_bin_unref(device, *shader);
}
anv_state_pool_free(&device->instruction_state_pool,
rt_pipeline->trampoline);
break;
}
@@ -2772,6 +2769,82 @@ anv_pipeline_compile_ray_tracing(struct anv_ray_tracing_pipeline *pipeline,
return VK_SUCCESS;
}
VkResult
anv_device_init_rt_trampoline(struct anv_device *device)
{
if (!device->vk.enabled_extensions.KHR_ray_tracing_pipeline)
return VK_SUCCESS;
bool cache_hit;
struct brw_rt_trampoline {
char name[16];
struct brw_cs_prog_key key;
} trampoline_key = {
.name = "rt-trampoline",
.key = {
/* TODO: Other subgroup sizes? */
.base.subgroup_size_type = BRW_SUBGROUP_SIZE_REQUIRE_8,
},
};
device->rt_trampoline =
anv_device_search_for_kernel(device, &device->default_pipeline_cache,
&trampoline_key, sizeof(trampoline_key),
&cache_hit);
if (device->rt_trampoline == NULL) {
void *tmp_ctx = ralloc_context(NULL);
nir_shader *trampoline_nir =
brw_nir_create_raygen_trampoline(device->physical->compiler, tmp_ctx);
struct anv_pipeline_bind_map bind_map = {
.surface_count = 0,
.sampler_count = 0,
};
uint32_t dummy_params[4] = { 0, };
struct brw_cs_prog_data trampoline_prog_data = {
.base.nr_params = 4,
.base.param = dummy_params,
.uses_inline_data = true,
.uses_btd_stack_ids = true,
};
struct brw_compile_cs_params params = {
.nir = trampoline_nir,
.key = &trampoline_key.key,
.prog_data = &trampoline_prog_data,
.log_data = device,
};
const unsigned *tramp_data =
brw_compile_cs(device->physical->compiler, tmp_ctx, &params);
device->rt_trampoline =
anv_device_upload_kernel(device, &device->default_pipeline_cache,
MESA_SHADER_COMPUTE,
&trampoline_key, sizeof(trampoline_key),
tramp_data,
trampoline_prog_data.base.program_size,
&trampoline_prog_data.base,
sizeof(trampoline_prog_data),
NULL, 0, NULL, &bind_map);
ralloc_free(tmp_ctx);
if (device->rt_trampoline == NULL)
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
}
return VK_SUCCESS;
}
void
anv_device_finish_rt_shaders(struct anv_device *device)
{
if (!device->vk.enabled_extensions.KHR_ray_tracing_pipeline)
return;
anv_shader_bin_unref(device, device->rt_trampoline);
}
VkResult
anv_ray_tracing_pipeline_init(struct anv_ray_tracing_pipeline *pipeline,
struct anv_device *device,
@@ -2788,38 +2861,6 @@ anv_ray_tracing_pipeline_init(struct anv_ray_tracing_pipeline *pipeline,
util_dynarray_init(&pipeline->shaders, pipeline->base.mem_ctx);
/* TODO: We should probably create this once per device */
{
void *tmp_ctx = ralloc_context(NULL);
nir_shader *trampoline_nir =
brw_nir_create_raygen_trampoline(device->physical->compiler, tmp_ctx);
struct brw_cs_prog_key key = {
/* TODO: Other subgroup sizes? */
.base.subgroup_size_type = BRW_SUBGROUP_SIZE_REQUIRE_8,
};
struct brw_cs_prog_data prog_data = {
.base.nr_params = 4,
.uses_inline_data = true,
.uses_btd_stack_ids = true,
};
struct brw_compile_cs_params params = {
.nir = trampoline_nir,
.key = &key,
.prog_data = &prog_data,
.log_data = pipeline->base.device,
};
const unsigned *tramp_data =
brw_compile_cs(device->physical->compiler, tmp_ctx, &params);
pipeline->trampoline =
anv_state_pool_alloc(&device->instruction_state_pool,
prog_data.base.program_size, 64);
memcpy(pipeline->trampoline.map, tramp_data, prog_data.base.program_size);
ralloc_free(tmp_ctx);
}
result = anv_pipeline_compile_ray_tracing(pipeline, cache, pCreateInfo);
if (result != VK_SUCCESS)
goto fail;
@@ -2829,8 +2870,6 @@ anv_ray_tracing_pipeline_init(struct anv_ray_tracing_pipeline *pipeline,
return VK_SUCCESS;
fail:
anv_state_pool_free(&device->instruction_state_pool,
pipeline->trampoline);
util_dynarray_foreach(&pipeline->shaders,
struct anv_shader_bin *, shader) {
anv_shader_bin_unref(device, *shader);

View File

@@ -1241,6 +1241,8 @@ struct anv_device {
struct anv_scratch_pool scratch_pool;
struct anv_bo *rt_scratch_bos[16];
struct anv_shader_bin *rt_trampoline;
pthread_mutex_t mutex;
pthread_cond_t queue_submit;
int _lost;
@@ -3610,9 +3612,6 @@ struct anv_ray_tracing_pipeline {
/* All shaders in the pipeline */
struct util_dynarray shaders;
/* Trampoline shader */
struct anv_state trampoline;
/* Dummy stack return shader */
struct anv_shader_bin * trivial_return_shader;
@@ -3681,6 +3680,12 @@ anv_pipeline_get_last_vue_prog_data(const struct anv_graphics_pipeline *pipeline
return &get_vs_prog_data(pipeline)->base;
}
VkResult
anv_device_init_rt_trampoline(struct anv_device *device);
void
anv_device_finish_rt_shaders(struct anv_device *device);
VkResult
anv_pipeline_init(struct anv_pipeline *pipeline,
struct anv_device *device,

View File

@@ -5123,10 +5123,11 @@ cmd_buffer_trace_rays(struct anv_cmd_buffer *cmd_buffer,
cw.EmitInlineParameter = true;
const gl_shader_stage s = MESA_SHADER_RAYGEN;
struct anv_device *device = cmd_buffer->device;
struct anv_state *surfaces = &cmd_buffer->state.binding_tables[s];
struct anv_state *samplers = &cmd_buffer->state.samplers[s];
cw.InterfaceDescriptor = (struct GENX(INTERFACE_DESCRIPTOR_DATA)) {
.KernelStartPointer = pipeline->trampoline.offset,
.KernelStartPointer = device->rt_trampoline->kernel.offset,
.SamplerStatePointer = samplers->offset,
/* i965: DIV_ROUND_UP(CLAMP(stage_state->sampler_count, 0, 16), 4), */
.SamplerCount = 0,