vk: Stream surface state instead of using the surface pool

Since the binding table pointer is only 16 bits, we can only have 64kb
of binding table state allocated at any given time. With a block size of
1kb, that amounts to just 64 command buffers, which is not enough.
This commit is contained in:
Kristian Høgsberg
2015-05-19 14:14:24 -07:00
parent 01504057f5
commit a1bd426393
4 changed files with 112 additions and 33 deletions

View File

@@ -261,8 +261,8 @@ anv_cmd_buffer_dump(struct anv_cmd_buffer *cmd_buffer)
} }
relocate_bo(&batch->bo, &batch->cmd_relocs, aub_bos); relocate_bo(&batch->bo, &batch->cmd_relocs, aub_bos);
relocate_bo(&device->surface_state_block_pool.bo, relocate_bo(&cmd_buffer->surface_bo,
&batch->surf_relocs, aub_bos); &cmd_buffer->surface_relocs, aub_bos);
for (uint32_t i = 0; i < cmd_buffer->bo_count; i++) { for (uint32_t i = 0; i < cmd_buffer->bo_count; i++) {
bo = cmd_buffer->exec2_bos[i]; bo = cmd_buffer->exec2_bos[i];

View File

@@ -481,15 +481,20 @@ anv_batch_init(struct anv_batch *batch, struct anv_device *device)
batch->bo.map = batch->bo.map =
anv_gem_mmap(device, batch->bo.gem_handle, 0, BATCH_SIZE); anv_gem_mmap(device, batch->bo.gem_handle, 0, BATCH_SIZE);
if (batch->bo.map == NULL) { if (batch->bo.map == NULL) {
anv_gem_close(device, batch->bo.gem_handle); result = vk_error(VK_ERROR_MEMORY_MAP_FAILED);
return vk_error(VK_ERROR_MEMORY_MAP_FAILED); goto fail_bo;
} }
batch->cmd_relocs.num_relocs = 0; batch->cmd_relocs.num_relocs = 0;
batch->surf_relocs.num_relocs = 0;
batch->next = batch->bo.map; batch->next = batch->bo.map;
return VK_SUCCESS; return VK_SUCCESS;
fail_bo:
anv_gem_close(device, batch->bo.gem_handle);
return result;
} }
void void
@@ -504,7 +509,6 @@ anv_batch_reset(struct anv_batch *batch)
{ {
batch->next = batch->bo.map; batch->next = batch->bo.map;
batch->cmd_relocs.num_relocs = 0; batch->cmd_relocs.num_relocs = 0;
batch->surf_relocs.num_relocs = 0;
} }
void * void *
@@ -568,7 +572,6 @@ anv_batch_emit_batch(struct anv_batch *batch, struct anv_batch *other)
offset = batch->next - batch->bo.map; offset = batch->next - batch->bo.map;
anv_reloc_list_append(&batch->cmd_relocs, &other->cmd_relocs, offset); anv_reloc_list_append(&batch->cmd_relocs, &other->cmd_relocs, offset);
anv_reloc_list_append(&batch->surf_relocs, &other->surf_relocs, offset);
batch->next += size; batch->next += size;
} }
@@ -926,6 +929,8 @@ anv_cmd_buffer_destructor(struct anv_device * device,
{ {
struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) object; struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) object;
anv_gem_munmap(cmd_buffer->surface_bo.map, BATCH_SIZE);
anv_gem_close(device, cmd_buffer->surface_bo.gem_handle);
anv_state_stream_finish(&cmd_buffer->surface_state_stream); anv_state_stream_finish(&cmd_buffer->surface_state_stream);
anv_state_stream_finish(&cmd_buffer->dynamic_state_stream); anv_state_stream_finish(&cmd_buffer->dynamic_state_stream);
anv_state_stream_finish(&cmd_buffer->binding_table_state_stream); anv_state_stream_finish(&cmd_buffer->binding_table_state_stream);
@@ -2073,12 +2078,27 @@ VkResult anv_CreateCommandBuffer(
if (result != VK_SUCCESS) if (result != VK_SUCCESS)
goto fail; goto fail;
result = anv_bo_init_new(&cmd_buffer->surface_bo, device, BATCH_SIZE);
if (result != VK_SUCCESS)
goto fail_batch;
cmd_buffer->surface_bo.map =
anv_gem_mmap(device, cmd_buffer->surface_bo.gem_handle, 0, BATCH_SIZE);
if (cmd_buffer->surface_bo.map == NULL) {
result = vk_error(VK_ERROR_MEMORY_MAP_FAILED);
goto fail_surface_bo;
}
/* Start surface_next at 1 so surface offset 0 is invalid. */
cmd_buffer->surface_next = 1;
cmd_buffer->surface_relocs.num_relocs = 0;
cmd_buffer->exec2_objects = cmd_buffer->exec2_objects =
anv_device_alloc(device, 8192 * sizeof(cmd_buffer->exec2_objects[0]), 8, anv_device_alloc(device, 8192 * sizeof(cmd_buffer->exec2_objects[0]), 8,
VK_SYSTEM_ALLOC_TYPE_API_OBJECT); VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
if (cmd_buffer->exec2_objects == NULL) { if (cmd_buffer->exec2_objects == NULL) {
result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
goto fail_batch; goto fail_surface_map;
} }
cmd_buffer->exec2_bos = cmd_buffer->exec2_bos =
@@ -2105,6 +2125,10 @@ VkResult anv_CreateCommandBuffer(
fail_exec2_objects: fail_exec2_objects:
anv_device_free(device, cmd_buffer->exec2_objects); anv_device_free(device, cmd_buffer->exec2_objects);
fail_surface_map:
anv_gem_munmap(cmd_buffer->surface_bo.map, BATCH_SIZE);
fail_surface_bo:
anv_gem_close(device, cmd_buffer->surface_bo.gem_handle);
fail_batch: fail_batch:
anv_batch_finish(&cmd_buffer->batch, device); anv_batch_finish(&cmd_buffer->batch, device);
fail: fail:
@@ -2130,7 +2154,7 @@ VkResult anv_BeginCommandBuffer(
.GeneralStateBufferSize = 0xfffff, .GeneralStateBufferSize = 0xfffff,
.GeneralStateBufferSizeModifyEnable = true, .GeneralStateBufferSizeModifyEnable = true,
.SurfaceStateBaseAddress = { &device->surface_state_block_pool.bo, 0 }, .SurfaceStateBaseAddress = { &cmd_buffer->surface_bo, 0 },
.SurfaceStateMemoryObjectControlState = 0, /* FIXME: MOCS */ .SurfaceStateMemoryObjectControlState = 0, /* FIXME: MOCS */
.SurfaceStateBaseAddressModifyEnable = true, .SurfaceStateBaseAddressModifyEnable = true,
@@ -2277,13 +2301,13 @@ VkResult anv_EndCommandBuffer(
pthread_mutex_lock(&device->mutex); pthread_mutex_lock(&device->mutex);
/* Add block pool bos first so we can add them with their relocs. */ /* Add block pool bos first so we can add them with their relocs. */
anv_cmd_buffer_add_bo(cmd_buffer, &device->surface_state_block_pool.bo, anv_cmd_buffer_add_bo(cmd_buffer, &cmd_buffer->surface_bo,
&batch->surf_relocs); &cmd_buffer->surface_relocs);
anv_cmd_buffer_add_validate_bos(cmd_buffer, &batch->surf_relocs); anv_cmd_buffer_add_validate_bos(cmd_buffer, &cmd_buffer->surface_relocs);
anv_cmd_buffer_add_validate_bos(cmd_buffer, &batch->cmd_relocs); anv_cmd_buffer_add_validate_bos(cmd_buffer, &batch->cmd_relocs);
anv_cmd_buffer_add_bo(cmd_buffer, &batch->bo, &batch->cmd_relocs); anv_cmd_buffer_add_bo(cmd_buffer, &batch->bo, &batch->cmd_relocs);
anv_cmd_buffer_process_relocs(cmd_buffer, &batch->surf_relocs); anv_cmd_buffer_process_relocs(cmd_buffer, &cmd_buffer->surface_relocs);
anv_cmd_buffer_process_relocs(cmd_buffer, &batch->cmd_relocs); anv_cmd_buffer_process_relocs(cmd_buffer, &batch->cmd_relocs);
cmd_buffer->execbuf.buffers_ptr = (uintptr_t) cmd_buffer->exec2_objects; cmd_buffer->execbuf.buffers_ptr = (uintptr_t) cmd_buffer->exec2_objects;
@@ -2313,6 +2337,8 @@ VkResult anv_ResetCommandBuffer(
struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer; struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer;
anv_batch_reset(&cmd_buffer->batch); anv_batch_reset(&cmd_buffer->batch);
cmd_buffer->surface_next = 0;
cmd_buffer->surface_relocs.num_relocs = 0;
return VK_SUCCESS; return VK_SUCCESS;
} }
@@ -2363,6 +2389,22 @@ void anv_CmdBindDynamicStateObject(
}; };
} }
static struct anv_state
anv_cmd_buffer_alloc_surface_state(struct anv_cmd_buffer *cmd_buffer,
uint32_t size, uint32_t alignment)
{
struct anv_state state;
state.offset = ALIGN_U32(cmd_buffer->surface_next, alignment);
state.map = cmd_buffer->surface_bo.map + state.offset;
state.alloc_size = size;
cmd_buffer->surface_next = state.offset + size;
assert(state.offset + size < cmd_buffer->surface_bo.size);
return state;
}
void anv_CmdBindDescriptorSets( void anv_CmdBindDescriptorSets(
VkCmdBuffer cmdBuffer, VkCmdBuffer cmdBuffer,
VkPipelineBindPoint pipelineBindPoint, VkPipelineBindPoint pipelineBindPoint,
@@ -2392,8 +2434,11 @@ void anv_CmdBindDescriptorSets(
for (uint32_t b = 0; b < set_layout->stage[s].surface_count; b++) { for (uint32_t b = 0; b < set_layout->stage[s].surface_count; b++) {
struct anv_surface_view *view = set->descriptors[surface_to_desc[b]].view; struct anv_surface_view *view = set->descriptors[surface_to_desc[b]].view;
bindings->descriptors[s].surfaces[start + b] = struct anv_state state =
view->surface_state.offset; anv_cmd_buffer_alloc_surface_state(cmd_buffer, 64, 64);
memcpy(state.map, view->surface_state.map, 64);
bindings->descriptors[s].surfaces[start + b] = state.offset;
bindings->descriptors[s].relocs[start + b].bo = view->bo; bindings->descriptors[s].relocs[start + b].bo = view->bo;
bindings->descriptors[s].relocs[start + b].offset = view->offset; bindings->descriptors[s].relocs[start + b].offset = view->offset;
} }
@@ -2480,24 +2525,33 @@ flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer)
if (layers + surface_count > 0) { if (layers + surface_count > 0) {
struct anv_state state; struct anv_state state;
uint32_t offset;
uint32_t *address;
uint32_t size; uint32_t size;
size = (bias + surface_count) * sizeof(uint32_t); size = (bias + surface_count) * sizeof(uint32_t);
state = anv_state_stream_alloc(&cmd_buffer->binding_table_state_stream, state = anv_cmd_buffer_alloc_surface_state(cmd_buffer, size, 32);
size, 32);
memcpy(state.map, bindings->descriptors[s].surfaces, size); memcpy(state.map, bindings->descriptors[s].surfaces, size);
for (uint32_t i = 0; i < layers; i++) for (uint32_t i = 0; i < layers; i++) {
anv_reloc_list_add(&cmd_buffer->batch.surf_relocs, offset = bindings->descriptors[s].surfaces[i] + 8 * sizeof(int32_t);
bindings->descriptors[s].surfaces[i] + 8 * sizeof(int32_t), address = cmd_buffer->surface_bo.map + offset;
*address =
anv_reloc_list_add(&cmd_buffer->surface_relocs, offset,
bindings->descriptors[s].relocs[i].bo, bindings->descriptors[s].relocs[i].bo,
bindings->descriptors[s].relocs[i].offset); bindings->descriptors[s].relocs[i].offset);
}
for (uint32_t i = 0; i < surface_count; i++) for (uint32_t i = 0; i < surface_count; i++) {
anv_reloc_list_add(&cmd_buffer->batch.surf_relocs, offset = bindings->descriptors[s].surfaces[i] + 8 * sizeof(int32_t);
bindings->descriptors[s].surfaces[bias + i] + 8 * sizeof(int32_t), address = cmd_buffer->surface_bo.map + offset;
*address =
anv_reloc_list_add(&cmd_buffer->surface_relocs, offset,
bindings->descriptors[s].relocs[bias + i].bo, bindings->descriptors[s].relocs[bias + i].bo,
bindings->descriptors[s].relocs[bias + i].offset); bindings->descriptors[s].relocs[bias + i].offset);
}
static const uint32_t binding_table_opcodes[] = { static const uint32_t binding_table_opcodes[] = {
[VK_SHADER_STAGE_VERTEX] = 38, [VK_SHADER_STAGE_VERTEX] = 38,
@@ -2519,7 +2573,7 @@ flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer)
size_t size; size_t size;
size = layout->stage[s].sampler_count * 16; size = layout->stage[s].sampler_count * 16;
state = anv_state_stream_alloc(&cmd_buffer->dynamic_state_stream, size, 32); state = anv_cmd_buffer_alloc_surface_state(cmd_buffer, size, 32);
memcpy(state.map, bindings->descriptors[s].samplers, size); memcpy(state.map, bindings->descriptors[s].samplers, size);
static const uint32_t sampler_state_opcodes[] = { static const uint32_t sampler_state_opcodes[] = {
@@ -3086,7 +3140,11 @@ anv_cmd_buffer_fill_render_targets(struct anv_cmd_buffer *cmd_buffer)
for (uint32_t i = 0; i < framebuffer->color_attachment_count; i++) { for (uint32_t i = 0; i < framebuffer->color_attachment_count; i++) {
struct anv_surface_view *view = framebuffer->color_attachments[i]; struct anv_surface_view *view = framebuffer->color_attachments[i];
bindings->descriptors[VK_SHADER_STAGE_FRAGMENT].surfaces[i] = view->surface_state.offset; struct anv_state state =
anv_cmd_buffer_alloc_surface_state(cmd_buffer, 64, 64);
memcpy(state.map, view->surface_state.map, 64);
bindings->descriptors[VK_SHADER_STAGE_FRAGMENT].surfaces[i] = state.offset;
bindings->descriptors[VK_SHADER_STAGE_FRAGMENT].relocs[i].bo = view->bo; bindings->descriptors[VK_SHADER_STAGE_FRAGMENT].relocs[i].bo = view->bo;
bindings->descriptors[VK_SHADER_STAGE_FRAGMENT].relocs[i].offset = view->offset; bindings->descriptors[VK_SHADER_STAGE_FRAGMENT].relocs[i].offset = view->offset;
} }

View File

@@ -174,6 +174,27 @@ anv_cmd_buffer_restore(struct anv_cmd_buffer *cmd_buffer,
ANV_CMD_BUFFER_DESCRIPTOR_SET_DIRTY; ANV_CMD_BUFFER_DESCRIPTOR_SET_DIRTY;
} }
static void
anv_cmd_buffer_copy_render_targets(struct anv_cmd_buffer *cmd_buffer,
struct anv_saved_state *state)
{
struct anv_framebuffer *fb = cmd_buffer->framebuffer;
struct anv_bindings *old_bindings = state->old_bindings;
struct anv_bindings *bindings = cmd_buffer->bindings;
for (uint32_t i = 0; i < fb->color_attachment_count; i++) {
bindings->descriptors[VK_SHADER_STAGE_FRAGMENT].surfaces[i] =
old_bindings->descriptors[VK_SHADER_STAGE_FRAGMENT].surfaces[i];
bindings->descriptors[VK_SHADER_STAGE_FRAGMENT].relocs[i].bo =
old_bindings->descriptors[VK_SHADER_STAGE_FRAGMENT].relocs[i].bo;
bindings->descriptors[VK_SHADER_STAGE_FRAGMENT].relocs[i].offset =
old_bindings->descriptors[VK_SHADER_STAGE_FRAGMENT].relocs[i].offset;
}
cmd_buffer->dirty |= ANV_CMD_BUFFER_DESCRIPTOR_SET_DIRTY;
}
struct vue_header { struct vue_header {
uint32_t Reserved; uint32_t Reserved;
uint32_t RTAIndex; uint32_t RTAIndex;
@@ -241,9 +262,7 @@ anv_cmd_buffer_clear(struct anv_cmd_buffer *cmd_buffer,
}; };
anv_cmd_buffer_save(cmd_buffer, &saved_state); anv_cmd_buffer_save(cmd_buffer, &saved_state);
anv_cmd_buffer_copy_render_targets(cmd_buffer, &saved_state);
/* Initialize render targets for the meta bindings. */
anv_cmd_buffer_fill_render_targets(cmd_buffer);
anv_CmdBindVertexBuffers((VkCmdBuffer) cmd_buffer, 0, 2, anv_CmdBindVertexBuffers((VkCmdBuffer) cmd_buffer, 0, 2,
(VkBuffer[]) { (VkBuffer[]) {

View File

@@ -368,7 +368,6 @@ struct anv_batch {
struct anv_bo bo; struct anv_bo bo;
void * next; void * next;
struct anv_reloc_list cmd_relocs; struct anv_reloc_list cmd_relocs;
struct anv_reloc_list surf_relocs;
}; };
VkResult anv_batch_init(struct anv_batch *batch, struct anv_device *device); VkResult anv_batch_init(struct anv_batch *batch, struct anv_device *device);
@@ -549,6 +548,9 @@ struct anv_cmd_buffer {
uint32_t bo_count; uint32_t bo_count;
struct anv_batch batch; struct anv_batch batch;
struct anv_bo surface_bo;
uint32_t surface_next;
struct anv_reloc_list surface_relocs;
struct anv_state_stream binding_table_state_stream; struct anv_state_stream binding_table_state_stream;
struct anv_state_stream surface_state_stream; struct anv_state_stream surface_state_stream;
struct anv_state_stream dynamic_state_stream; struct anv_state_stream dynamic_state_stream;