radv: Add suballocation for shaders.
This reduces the number of BOs that we need for the BO lists during a submission. Currently uses a fairly simple linear search for finding free space, that could eventually be improved to a binary tree, which with some per-node info could make a check for space O(1) and finding it O(log n), in the number of buffers in that slab. Signed-off-by: Bas Nieuwenhuizen <basni@google.com> Reviewed-by: Dave Airlie <airlied@redhat.com>
This commit is contained in:
@@ -546,7 +546,7 @@ radv_emit_hw_vs(struct radv_cmd_buffer *cmd_buffer,
|
||||
struct ac_vs_output_info *outinfo)
|
||||
{
|
||||
struct radeon_winsys *ws = cmd_buffer->device->ws;
|
||||
uint64_t va = ws->buffer_get_va(shader->bo);
|
||||
uint64_t va = ws->buffer_get_va(shader->bo) + shader->bo_offset;
|
||||
unsigned export_count;
|
||||
|
||||
ws->cs_add_buffer(cmd_buffer->cs, shader->bo, 8);
|
||||
@@ -596,7 +596,7 @@ radv_emit_hw_es(struct radv_cmd_buffer *cmd_buffer,
|
||||
struct ac_es_output_info *outinfo)
|
||||
{
|
||||
struct radeon_winsys *ws = cmd_buffer->device->ws;
|
||||
uint64_t va = ws->buffer_get_va(shader->bo);
|
||||
uint64_t va = ws->buffer_get_va(shader->bo) + shader->bo_offset;
|
||||
|
||||
ws->cs_add_buffer(cmd_buffer->cs, shader->bo, 8);
|
||||
radv_emit_prefetch(cmd_buffer, va, shader->code_size);
|
||||
@@ -615,7 +615,7 @@ radv_emit_hw_ls(struct radv_cmd_buffer *cmd_buffer,
|
||||
struct radv_shader_variant *shader)
|
||||
{
|
||||
struct radeon_winsys *ws = cmd_buffer->device->ws;
|
||||
uint64_t va = ws->buffer_get_va(shader->bo);
|
||||
uint64_t va = ws->buffer_get_va(shader->bo) + shader->bo_offset;
|
||||
uint32_t rsrc2 = shader->rsrc2;
|
||||
|
||||
ws->cs_add_buffer(cmd_buffer->cs, shader->bo, 8);
|
||||
@@ -640,7 +640,7 @@ radv_emit_hw_hs(struct radv_cmd_buffer *cmd_buffer,
|
||||
struct radv_shader_variant *shader)
|
||||
{
|
||||
struct radeon_winsys *ws = cmd_buffer->device->ws;
|
||||
uint64_t va = ws->buffer_get_va(shader->bo);
|
||||
uint64_t va = ws->buffer_get_va(shader->bo) + shader->bo_offset;
|
||||
|
||||
ws->cs_add_buffer(cmd_buffer->cs, shader->bo, 8);
|
||||
radv_emit_prefetch(cmd_buffer, va, shader->code_size);
|
||||
@@ -775,7 +775,7 @@ radv_emit_geometry_shader(struct radv_cmd_buffer *cmd_buffer,
|
||||
S_028B90_CNT(MIN2(gs_num_invocations, 127)) |
|
||||
S_028B90_ENABLE(gs_num_invocations > 0));
|
||||
|
||||
va = ws->buffer_get_va(gs->bo);
|
||||
va = ws->buffer_get_va(gs->bo) + gs->bo_offset;
|
||||
ws->cs_add_buffer(cmd_buffer->cs, gs->bo, 8);
|
||||
radv_emit_prefetch(cmd_buffer, va, gs->code_size);
|
||||
|
||||
@@ -816,8 +816,7 @@ radv_emit_fragment_shader(struct radv_cmd_buffer *cmd_buffer,
|
||||
assert (pipeline->shaders[MESA_SHADER_FRAGMENT]);
|
||||
|
||||
ps = pipeline->shaders[MESA_SHADER_FRAGMENT];
|
||||
|
||||
va = ws->buffer_get_va(ps->bo);
|
||||
va = ws->buffer_get_va(ps->bo) + ps->bo_offset;
|
||||
ws->cs_add_buffer(cmd_buffer->cs, ps->bo, 8);
|
||||
radv_emit_prefetch(cmd_buffer, va, ps->code_size);
|
||||
|
||||
@@ -2256,7 +2255,7 @@ radv_emit_compute_pipeline(struct radv_cmd_buffer *cmd_buffer)
|
||||
cmd_buffer->state.emitted_compute_pipeline = pipeline;
|
||||
|
||||
compute_shader = pipeline->shaders[MESA_SHADER_COMPUTE];
|
||||
va = ws->buffer_get_va(compute_shader->bo);
|
||||
va = ws->buffer_get_va(compute_shader->bo) + compute_shader->bo_offset;
|
||||
|
||||
ws->cs_add_buffer(cmd_buffer->cs, compute_shader->bo, 8);
|
||||
radv_emit_prefetch(cmd_buffer, va, compute_shader->code_size);
|
||||
|
@@ -1080,6 +1080,9 @@ VkResult radv_CreateDevice(
|
||||
else
|
||||
device->alloc = physical_device->instance->alloc;
|
||||
|
||||
mtx_init(&device->shader_slab_mutex, mtx_plain);
|
||||
list_inithead(&device->shader_slabs);
|
||||
|
||||
for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
|
||||
const VkDeviceQueueCreateInfo *queue_create = &pCreateInfo->pQueueCreateInfos[i];
|
||||
uint32_t qfi = queue_create->queueFamilyIndex;
|
||||
@@ -1270,6 +1273,8 @@ void radv_DestroyDevice(
|
||||
VkPipelineCache pc = radv_pipeline_cache_to_handle(device->mem_cache);
|
||||
radv_DestroyPipelineCache(radv_device_to_handle(device), pc, NULL);
|
||||
|
||||
radv_destroy_shader_slabs(device);
|
||||
|
||||
vk_free(&device->alloc, device);
|
||||
}
|
||||
|
||||
|
@@ -381,7 +381,10 @@ void radv_shader_variant_destroy(struct radv_device *device,
|
||||
if (!p_atomic_dec_zero(&variant->ref_count))
|
||||
return;
|
||||
|
||||
device->ws->buffer_destroy(variant->bo);
|
||||
mtx_lock(&device->shader_slab_mutex);
|
||||
list_del(&variant->slab_list);
|
||||
mtx_unlock(&device->shader_slab_mutex);
|
||||
|
||||
free(variant);
|
||||
}
|
||||
|
||||
@@ -431,14 +434,8 @@ static void radv_fill_shader_variant(struct radv_device *device,
|
||||
S_00B848_DX10_CLAMP(1) |
|
||||
S_00B848_FLOAT_MODE(variant->config.float_mode);
|
||||
|
||||
variant->bo = device->ws->buffer_create(device->ws, binary->code_size, 256,
|
||||
RADEON_DOMAIN_VRAM, RADEON_FLAG_CPU_ACCESS);
|
||||
|
||||
void *ptr = device->ws->buffer_map(variant->bo);
|
||||
void *ptr = radv_alloc_shader_memory(device, variant);
|
||||
memcpy(ptr, binary->code, binary->code_size);
|
||||
device->ws->buffer_unmap(variant->bo);
|
||||
|
||||
|
||||
}
|
||||
|
||||
static struct radv_shader_variant *radv_shader_variant_create(struct radv_device *device,
|
||||
@@ -2426,3 +2423,56 @@ VkResult radv_CreateComputePipelines(
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void *radv_alloc_shader_memory(struct radv_device *device,
|
||||
struct radv_shader_variant *shader)
|
||||
{
|
||||
mtx_lock(&device->shader_slab_mutex);
|
||||
list_for_each_entry(struct radv_shader_slab, slab, &device->shader_slabs, slabs) {
|
||||
uint64_t offset = 0;
|
||||
list_for_each_entry(struct radv_shader_variant, s, &slab->shaders, slab_list) {
|
||||
if (s->bo_offset - offset >= shader->code_size) {
|
||||
shader->bo = slab->bo;
|
||||
shader->bo_offset = offset;
|
||||
list_addtail(&shader->slab_list, &s->slab_list);
|
||||
mtx_unlock(&device->shader_slab_mutex);
|
||||
return slab->ptr + offset;
|
||||
}
|
||||
offset = align_u64(s->bo_offset + s->code_size, 256);
|
||||
}
|
||||
if (slab->size - offset >= shader->code_size) {
|
||||
shader->bo = slab->bo;
|
||||
shader->bo_offset = offset;
|
||||
list_addtail(&shader->slab_list, &slab->shaders);
|
||||
mtx_unlock(&device->shader_slab_mutex);
|
||||
return slab->ptr + offset;
|
||||
}
|
||||
}
|
||||
|
||||
mtx_unlock(&device->shader_slab_mutex);
|
||||
struct radv_shader_slab *slab = calloc(1, sizeof(struct radv_shader_slab));
|
||||
|
||||
slab->size = 256 * 1024;
|
||||
slab->bo = device->ws->buffer_create(device->ws, slab->size, 256,
|
||||
RADEON_DOMAIN_VRAM, 0);
|
||||
slab->ptr = (char*)device->ws->buffer_map(slab->bo);
|
||||
list_inithead(&slab->shaders);
|
||||
|
||||
mtx_lock(&device->shader_slab_mutex);
|
||||
list_add(&slab->slabs, &device->shader_slabs);
|
||||
|
||||
shader->bo = slab->bo;
|
||||
shader->bo_offset = 0;
|
||||
list_add(&shader->slab_list, &slab->shaders);
|
||||
mtx_unlock(&device->shader_slab_mutex);
|
||||
return slab->ptr;
|
||||
}
|
||||
|
||||
void radv_destroy_shader_slabs(struct radv_device *device)
|
||||
{
|
||||
list_for_each_entry_safe(struct radv_shader_slab, slab, &device->shader_slabs, slabs) {
|
||||
device->ws->buffer_destroy(slab->bo);
|
||||
free(slab);
|
||||
}
|
||||
mtx_destroy(&device->shader_slab_mutex);
|
||||
}
|
||||
|
@@ -168,6 +168,7 @@ radv_create_shader_variant_from_pipeline_cache(struct radv_device *device,
|
||||
if (!variant)
|
||||
return NULL;
|
||||
|
||||
variant->code_size = entry->code_size;
|
||||
variant->config = entry->config;
|
||||
variant->info = entry->variant_info;
|
||||
variant->rsrc1 = entry->rsrc1;
|
||||
@@ -175,12 +176,8 @@ radv_create_shader_variant_from_pipeline_cache(struct radv_device *device,
|
||||
variant->code_size = entry->code_size;
|
||||
variant->ref_count = 1;
|
||||
|
||||
variant->bo = device->ws->buffer_create(device->ws, entry->code_size, 256,
|
||||
RADEON_DOMAIN_VRAM, RADEON_FLAG_CPU_ACCESS);
|
||||
|
||||
void *ptr = device->ws->buffer_map(variant->bo);
|
||||
void *ptr = radv_alloc_shader_memory(device, variant);
|
||||
memcpy(ptr, entry->code, entry->code_size);
|
||||
device->ws->buffer_unmap(variant->bo);
|
||||
|
||||
entry->variant = variant;
|
||||
}
|
||||
|
@@ -549,6 +549,9 @@ struct radv_device {
|
||||
struct radv_pipeline_cache * mem_cache;
|
||||
|
||||
uint32_t image_mrt_offset_counter;
|
||||
|
||||
struct list_head shader_slabs;
|
||||
mtx_t shader_slab_mutex;
|
||||
};
|
||||
|
||||
struct radv_device_memory {
|
||||
@@ -981,17 +984,35 @@ mesa_to_vk_shader_stage(gl_shader_stage mesa_stage)
|
||||
stage = __builtin_ffs(__tmp) - 1, __tmp; \
|
||||
__tmp &= ~(1 << (stage)))
|
||||
|
||||
|
||||
struct radv_shader_slab {
|
||||
struct list_head slabs;
|
||||
struct list_head shaders;
|
||||
struct radeon_winsys_bo *bo;
|
||||
uint64_t size;
|
||||
char *ptr;
|
||||
};
|
||||
|
||||
struct radv_shader_variant {
|
||||
uint32_t ref_count;
|
||||
|
||||
struct radeon_winsys_bo *bo;
|
||||
uint64_t bo_offset;
|
||||
struct ac_shader_config config;
|
||||
struct ac_shader_variant_info info;
|
||||
unsigned rsrc1;
|
||||
unsigned rsrc2;
|
||||
uint32_t code_size;
|
||||
|
||||
struct list_head slab_list;
|
||||
};
|
||||
|
||||
|
||||
void *radv_alloc_shader_memory(struct radv_device *device,
|
||||
struct radv_shader_variant *shader);
|
||||
|
||||
void radv_destroy_shader_slabs(struct radv_device *device);
|
||||
|
||||
struct radv_depth_stencil_state {
|
||||
uint32_t db_depth_control;
|
||||
uint32_t db_stencil_control;
|
||||
|
Reference in New Issue
Block a user