radv: upload shader binaries of a pipeline contiguously in memory

RGP expects shaders to be contiguous in memory, otherwise it explodes
because we have to generate huge captures with lot of holes.

This reduces capture sizes of Cyberpunk 2077 from ~3.5GiB to ~180MiB.

This should also help for future pipeline libraries.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13690>
This commit is contained in:
Samuel Pitoiset
2021-11-05 13:58:12 +01:00
committed by Marge Bot
parent a7f0463612
commit 3fa2220838
7 changed files with 169 additions and 72 deletions

View File

@@ -171,6 +171,37 @@ radv_pipeline_has_gs_copy_shader(const struct radv_pipeline *pipeline)
return !!pipeline->gs_copy_shader;
}
static struct radv_pipeline_slab *
radv_pipeline_slab_create(struct radv_device *device, struct radv_pipeline *pipeline,
uint32_t code_size)
{
struct radv_pipeline_slab *slab;
slab = calloc(1, sizeof(*slab));
if (!slab)
return NULL;
slab->ref_count = 1;
slab->alloc = radv_alloc_shader_memory(device, code_size, pipeline);
if (!slab->alloc) {
free(slab);
return NULL;
}
return slab;
}
void
radv_pipeline_slab_destroy(struct radv_device *device, struct radv_pipeline_slab *slab)
{
if (!p_atomic_dec_zero(&slab->ref_count))
return;
radv_free_shader_memory(device, slab->alloc);
free(slab);
}
void
radv_pipeline_destroy(struct radv_device *device, struct radv_pipeline *pipeline,
const VkAllocationCallbacks *allocator)
@@ -183,6 +214,9 @@ radv_pipeline_destroy(struct radv_device *device, struct radv_pipeline *pipeline
free(pipeline->library.stages);
}
if (pipeline->slab)
radv_pipeline_slab_destroy(device, pipeline->slab);
for (unsigned i = 0; i < MESA_VULKAN_SHADER_STAGES; ++i)
if (pipeline->shaders[i])
radv_shader_destroy(device, pipeline->shaders[i]);
@@ -3354,6 +3388,61 @@ non_uniform_access_callback(const nir_src *src, void *_)
return nir_chase_binding(*src).success ? 0x2 : 0x3;
}
VkResult
radv_upload_shaders(struct radv_device *device, struct radv_pipeline *pipeline,
struct radv_shader_binary **binaries, struct radv_shader_binary *gs_copy_binary)
{
uint32_t code_size = 0;
/* Compute the total code size. */
for (int i = 0; i < MESA_VULKAN_SHADER_STAGES; i++) {
struct radv_shader *shader = pipeline->shaders[i];
if (!shader)
continue;
code_size += align(shader->code_size, RADV_SHADER_ALLOC_ALIGNMENT);
}
if (pipeline->gs_copy_shader) {
code_size += align(pipeline->gs_copy_shader->code_size, RADV_SHADER_ALLOC_ALIGNMENT);
}
/* Allocate memory for all shader binaries. */
pipeline->slab = radv_pipeline_slab_create(device, pipeline, code_size);
if (!pipeline->slab)
return VK_ERROR_OUT_OF_DEVICE_MEMORY;
/* Upload shader binaries. */
uint64_t slab_va = radv_buffer_get_va(pipeline->slab->alloc->arena->bo);
uint32_t slab_offset = pipeline->slab->alloc->offset;
char *slab_ptr = pipeline->slab->alloc->arena->ptr;
for (int i = 0; i < MESA_VULKAN_SHADER_STAGES; ++i) {
struct radv_shader *shader = pipeline->shaders[i];
if (!shader)
continue;
shader->va = slab_va + slab_offset;
void *dest_ptr = slab_ptr + slab_offset;
if (!radv_shader_binary_upload(device, binaries[i], shader, dest_ptr))
return VK_ERROR_OUT_OF_HOST_MEMORY;
slab_offset += align(shader->code_size, RADV_SHADER_ALLOC_ALIGNMENT);
}
if (pipeline->gs_copy_shader) {
pipeline->gs_copy_shader->va = slab_va + slab_offset;
void *dest_ptr = slab_ptr + slab_offset;
if (!radv_shader_binary_upload(device, gs_copy_binary, pipeline->gs_copy_shader, dest_ptr))
return VK_ERROR_OUT_OF_HOST_MEMORY;
}
return VK_SUCCESS;
}
VkResult
radv_create_shaders(struct radv_pipeline *pipeline, struct radv_pipeline_layout *pipeline_layout,
struct radv_device *device, struct radv_pipeline_cache *cache,
@@ -3411,11 +3500,6 @@ radv_create_shaders(struct radv_pipeline *pipeline, struct radv_pipeline_layout
radv_create_shaders_from_pipeline_cache(device, cache, hash, pipeline,
stack_sizes, num_stack_sizes,
&found_in_application_cache)) {
if (modules[MESA_SHADER_GEOMETRY] && !pipeline->shaders[MESA_SHADER_GEOMETRY]->info.is_ngg) {
/* We use the CS slot because graphics pipelines might use all the other ones. */
pipeline->gs_copy_shader = pipeline->shaders[MESA_SHADER_COMPUTE];
pipeline->shaders[MESA_SHADER_COMPUTE] = NULL;
}
radv_stop_feedback(pipeline_feedback, found_in_application_cache);
return VK_SUCCESS;
}
@@ -3692,19 +3776,7 @@ radv_create_shaders(struct radv_pipeline *pipeline, struct radv_pipeline_layout
}
/* Upload shader binaries. */
for (int i = 0; i < MESA_VULKAN_SHADER_STAGES; ++i) {
struct radv_shader *shader = pipeline->shaders[i];
if (!shader)
continue;
if (!radv_shader_binary_upload(device, binaries[i], shader))
return VK_ERROR_OUT_OF_DEVICE_MEMORY;
if (i == MESA_SHADER_GEOMETRY && pipeline->gs_copy_shader) {
if (!radv_shader_binary_upload(device, gs_copy_binary, pipeline->gs_copy_shader))
return VK_ERROR_OUT_OF_DEVICE_MEMORY;
}
}
radv_upload_shaders(device, pipeline, binaries, gs_copy_binary);
if (!keep_executable_info) {
if (pipeline->gs_copy_shader) {