anv: Allocate batch and fence buffers from the cache

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
This commit is contained in:
Jason Ekstrand
2019-10-28 15:42:20 -05:00
parent e4f01eca3b
commit ee77938733
5 changed files with 124 additions and 199 deletions

View File

@@ -357,57 +357,6 @@ anv_free_list_pop(union anv_free_list *list,
return NULL; return NULL;
} }
/* All pointers in the ptr_free_list are assumed to be page-aligned. This
* means that the bottom 12 bits should all be zero.
*/
#define PFL_COUNT(x) ((uintptr_t)(x) & 0xfff)
#define PFL_PTR(x) ((void *)((uintptr_t)(x) & ~(uintptr_t)0xfff))
#define PFL_PACK(ptr, count) ({ \
(void *)(((uintptr_t)(ptr) & ~(uintptr_t)0xfff) | ((count) & 0xfff)); \
})
static bool
anv_ptr_free_list_pop(void **list, void **elem)
{
void *current = *list;
while (PFL_PTR(current) != NULL) {
void **next_ptr = PFL_PTR(current);
void *new_ptr = VG_NOACCESS_READ(next_ptr);
unsigned new_count = PFL_COUNT(current) + 1;
void *new = PFL_PACK(new_ptr, new_count);
void *old = __sync_val_compare_and_swap(list, current, new);
if (old == current) {
*elem = PFL_PTR(current);
return true;
}
current = old;
}
return false;
}
static void
anv_ptr_free_list_push(void **list, void *elem)
{
void *old, *current;
void **next_ptr = elem;
/* The pointer-based free list requires that the pointer be
* page-aligned. This is because we use the bottom 12 bits of the
* pointer to store a counter to solve the ABA concurrency problem.
*/
assert(((uintptr_t)elem & 0xfff) == 0);
old = *list;
do {
current = old;
VG_NOACCESS_WRITE(next_ptr, PFL_PTR(current));
unsigned new_count = PFL_COUNT(current) + 1;
void *new = PFL_PACK(elem, new_count);
old = __sync_val_compare_and_swap(list, current, new);
} while (old != current);
}
static VkResult static VkResult
anv_block_pool_expand_range(struct anv_block_pool *pool, anv_block_pool_expand_range(struct anv_block_pool *pool,
uint32_t center_bo_offset, uint32_t size); uint32_t center_bo_offset, uint32_t size);
@@ -1311,18 +1260,17 @@ anv_state_stream_alloc(struct anv_state_stream *stream,
return state; return state;
} }
struct bo_pool_bo_link {
struct bo_pool_bo_link *next;
struct anv_bo bo;
};
void void
anv_bo_pool_init(struct anv_bo_pool *pool, struct anv_device *device, anv_bo_pool_init(struct anv_bo_pool *pool, struct anv_device *device,
uint64_t bo_flags) uint64_t bo_flags)
{ {
pool->device = device; pool->device = device;
pool->bo_flags = bo_flags; pool->bo_flags = bo_flags;
memset(pool->free_list, 0, sizeof(pool->free_list)); for (unsigned i = 0; i < ARRAY_SIZE(pool->free_list); i++) {
util_sparse_array_free_list_init(&pool->free_list[i],
&device->bo_cache.bo_map, 0,
offsetof(struct anv_bo, free_index));
}
VG(VALGRIND_CREATE_MEMPOOL(pool, 0, false)); VG(VALGRIND_CREATE_MEMPOOL(pool, 0, false));
} }
@@ -1331,14 +1279,15 @@ void
anv_bo_pool_finish(struct anv_bo_pool *pool) anv_bo_pool_finish(struct anv_bo_pool *pool)
{ {
for (unsigned i = 0; i < ARRAY_SIZE(pool->free_list); i++) { for (unsigned i = 0; i < ARRAY_SIZE(pool->free_list); i++) {
struct bo_pool_bo_link *link = PFL_PTR(pool->free_list[i]); while (1) {
while (link != NULL) { struct anv_bo *bo =
struct bo_pool_bo_link link_copy = VG_NOACCESS_READ(link); util_sparse_array_free_list_pop_elem(&pool->free_list[i]);
if (bo == NULL)
break;
anv_gem_munmap(link_copy.bo.map, link_copy.bo.size); /* anv_device_release_bo is going to "free" it */
anv_vma_free(pool->device, &link_copy.bo); VG(VALGRIND_MALLOCLIKE_BLOCK(bo->map, bo->size, 0, 1));
anv_gem_close(pool->device, link_copy.bo.gem_handle); anv_device_release_bo(pool->device, bo);
link = link_copy.next;
} }
} }
@@ -1346,80 +1295,53 @@ anv_bo_pool_finish(struct anv_bo_pool *pool)
} }
VkResult VkResult
anv_bo_pool_alloc(struct anv_bo_pool *pool, struct anv_bo *bo, uint32_t size) anv_bo_pool_alloc(struct anv_bo_pool *pool, uint32_t size,
struct anv_bo **bo_out)
{ {
VkResult result;
const unsigned size_log2 = size < 4096 ? 12 : ilog2_round_up(size); const unsigned size_log2 = size < 4096 ? 12 : ilog2_round_up(size);
const unsigned pow2_size = 1 << size_log2; const unsigned pow2_size = 1 << size_log2;
const unsigned bucket = size_log2 - 12; const unsigned bucket = size_log2 - 12;
assert(bucket < ARRAY_SIZE(pool->free_list)); assert(bucket < ARRAY_SIZE(pool->free_list));
void *next_free_void; struct anv_bo *bo =
if (anv_ptr_free_list_pop(&pool->free_list[bucket], &next_free_void)) { util_sparse_array_free_list_pop_elem(&pool->free_list[bucket]);
struct bo_pool_bo_link *next_free = next_free_void; if (bo != NULL) {
*bo = VG_NOACCESS_READ(&next_free->bo);
assert(bo->gem_handle);
assert(bo->map == next_free);
assert(size <= bo->size);
VG(VALGRIND_MEMPOOL_ALLOC(pool, bo->map, size)); VG(VALGRIND_MEMPOOL_ALLOC(pool, bo->map, size));
*bo_out = bo;
return VK_SUCCESS; return VK_SUCCESS;
} }
struct anv_bo new_bo; VkResult result = anv_device_alloc_bo(pool->device,
pow2_size,
result = anv_bo_init_new(&new_bo, pool->device, pow2_size); ANV_BO_ALLOC_MAPPED |
ANV_BO_ALLOC_SNOOPED,
&bo);
if (result != VK_SUCCESS) if (result != VK_SUCCESS)
return result; return result;
new_bo.flags = pool->bo_flags; /* We want it to look like it came from this pool */
VG(VALGRIND_FREELIKE_BLOCK(bo->map, 0));
if (!anv_vma_alloc(pool->device, &new_bo))
return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
assert(new_bo.size == pow2_size);
new_bo.map = anv_gem_mmap(pool->device, new_bo.gem_handle, 0, pow2_size, 0);
if (new_bo.map == MAP_FAILED) {
anv_gem_close(pool->device, new_bo.gem_handle);
anv_vma_free(pool->device, &new_bo);
return vk_error(VK_ERROR_MEMORY_MAP_FAILED);
}
/* We are removing the state flushes, so lets make sure that these buffers
* are cached/snooped.
*/
if (!pool->device->info.has_llc) {
anv_gem_set_caching(pool->device, new_bo.gem_handle,
I915_CACHING_CACHED);
}
*bo = new_bo;
VG(VALGRIND_MEMPOOL_ALLOC(pool, bo->map, size)); VG(VALGRIND_MEMPOOL_ALLOC(pool, bo->map, size));
*bo_out = bo;
return VK_SUCCESS; return VK_SUCCESS;
} }
void void
anv_bo_pool_free(struct anv_bo_pool *pool, const struct anv_bo *bo_in) anv_bo_pool_free(struct anv_bo_pool *pool, struct anv_bo *bo)
{ {
/* Make a copy in case the anv_bo happens to be storred in the BO */ VG(VALGRIND_MEMPOOL_FREE(pool, bo->map));
struct anv_bo bo = *bo_in;
VG(VALGRIND_MEMPOOL_FREE(pool, bo.map)); assert(util_is_power_of_two_or_zero(bo->size));
const unsigned size_log2 = ilog2_round_up(bo->size);
struct bo_pool_bo_link *link = bo.map;
VG_NOACCESS_WRITE(&link->bo, bo);
assert(util_is_power_of_two_or_zero(bo.size));
const unsigned size_log2 = ilog2_round_up(bo.size);
const unsigned bucket = size_log2 - 12; const unsigned bucket = size_log2 - 12;
assert(bucket < ARRAY_SIZE(pool->free_list)); assert(bucket < ARRAY_SIZE(pool->free_list));
anv_ptr_free_list_push(&pool->free_list[bucket], link); assert(util_sparse_array_get(&pool->device->bo_cache.bo_map,
bo->gem_handle) == bo);
util_sparse_array_free_list_push(&pool->free_list[bucket],
&bo->gem_handle, 1);
} }
// Scratch pool // Scratch pool

View File

@@ -308,8 +308,8 @@ anv_batch_bo_create(struct anv_cmd_buffer *cmd_buffer,
if (bbo == NULL) if (bbo == NULL)
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
result = anv_bo_pool_alloc(&cmd_buffer->device->batch_bo_pool, &bbo->bo, result = anv_bo_pool_alloc(&cmd_buffer->device->batch_bo_pool,
ANV_CMD_BUFFER_BATCH_SIZE); ANV_CMD_BUFFER_BATCH_SIZE, &bbo->bo);
if (result != VK_SUCCESS) if (result != VK_SUCCESS)
goto fail_alloc; goto fail_alloc;
@@ -322,7 +322,7 @@ anv_batch_bo_create(struct anv_cmd_buffer *cmd_buffer,
return VK_SUCCESS; return VK_SUCCESS;
fail_bo_alloc: fail_bo_alloc:
anv_bo_pool_free(&cmd_buffer->device->batch_bo_pool, &bbo->bo); anv_bo_pool_free(&cmd_buffer->device->batch_bo_pool, bbo->bo);
fail_alloc: fail_alloc:
vk_free(&cmd_buffer->pool->alloc, bbo); vk_free(&cmd_buffer->pool->alloc, bbo);
@@ -341,8 +341,8 @@ anv_batch_bo_clone(struct anv_cmd_buffer *cmd_buffer,
if (bbo == NULL) if (bbo == NULL)
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
result = anv_bo_pool_alloc(&cmd_buffer->device->batch_bo_pool, &bbo->bo, result = anv_bo_pool_alloc(&cmd_buffer->device->batch_bo_pool,
other_bbo->bo.size); other_bbo->bo->size, &bbo->bo);
if (result != VK_SUCCESS) if (result != VK_SUCCESS)
goto fail_alloc; goto fail_alloc;
@@ -352,14 +352,13 @@ anv_batch_bo_clone(struct anv_cmd_buffer *cmd_buffer,
goto fail_bo_alloc; goto fail_bo_alloc;
bbo->length = other_bbo->length; bbo->length = other_bbo->length;
memcpy(bbo->bo.map, other_bbo->bo.map, other_bbo->length); memcpy(bbo->bo->map, other_bbo->bo->map, other_bbo->length);
*bbo_out = bbo; *bbo_out = bbo;
return VK_SUCCESS; return VK_SUCCESS;
fail_bo_alloc: fail_bo_alloc:
anv_bo_pool_free(&cmd_buffer->device->batch_bo_pool, &bbo->bo); anv_bo_pool_free(&cmd_buffer->device->batch_bo_pool, bbo->bo);
fail_alloc: fail_alloc:
vk_free(&cmd_buffer->pool->alloc, bbo); vk_free(&cmd_buffer->pool->alloc, bbo);
@@ -370,8 +369,8 @@ static void
anv_batch_bo_start(struct anv_batch_bo *bbo, struct anv_batch *batch, anv_batch_bo_start(struct anv_batch_bo *bbo, struct anv_batch *batch,
size_t batch_padding) size_t batch_padding)
{ {
batch->next = batch->start = bbo->bo.map; batch->next = batch->start = bbo->bo->map;
batch->end = bbo->bo.map + bbo->bo.size - batch_padding; batch->end = bbo->bo->map + bbo->bo->size - batch_padding;
batch->relocs = &bbo->relocs; batch->relocs = &bbo->relocs;
bbo->relocs.num_relocs = 0; bbo->relocs.num_relocs = 0;
_mesa_set_clear(bbo->relocs.deps, NULL); _mesa_set_clear(bbo->relocs.deps, NULL);
@@ -381,16 +380,16 @@ static void
anv_batch_bo_continue(struct anv_batch_bo *bbo, struct anv_batch *batch, anv_batch_bo_continue(struct anv_batch_bo *bbo, struct anv_batch *batch,
size_t batch_padding) size_t batch_padding)
{ {
batch->start = bbo->bo.map; batch->start = bbo->bo->map;
batch->next = bbo->bo.map + bbo->length; batch->next = bbo->bo->map + bbo->length;
batch->end = bbo->bo.map + bbo->bo.size - batch_padding; batch->end = bbo->bo->map + bbo->bo->size - batch_padding;
batch->relocs = &bbo->relocs; batch->relocs = &bbo->relocs;
} }
static void static void
anv_batch_bo_finish(struct anv_batch_bo *bbo, struct anv_batch *batch) anv_batch_bo_finish(struct anv_batch_bo *bbo, struct anv_batch *batch)
{ {
assert(batch->start == bbo->bo.map); assert(batch->start == bbo->bo->map);
bbo->length = batch->next - batch->start; bbo->length = batch->next - batch->start;
VG(VALGRIND_CHECK_MEM_IS_DEFINED(batch->start, bbo->length)); VG(VALGRIND_CHECK_MEM_IS_DEFINED(batch->start, bbo->length));
} }
@@ -400,25 +399,25 @@ anv_batch_bo_grow(struct anv_cmd_buffer *cmd_buffer, struct anv_batch_bo *bbo,
struct anv_batch *batch, size_t aditional, struct anv_batch *batch, size_t aditional,
size_t batch_padding) size_t batch_padding)
{ {
assert(batch->start == bbo->bo.map); assert(batch->start == bbo->bo->map);
bbo->length = batch->next - batch->start; bbo->length = batch->next - batch->start;
size_t new_size = bbo->bo.size; size_t new_size = bbo->bo->size;
while (new_size <= bbo->length + aditional + batch_padding) while (new_size <= bbo->length + aditional + batch_padding)
new_size *= 2; new_size *= 2;
if (new_size == bbo->bo.size) if (new_size == bbo->bo->size)
return VK_SUCCESS; return VK_SUCCESS;
struct anv_bo new_bo; struct anv_bo *new_bo;
VkResult result = anv_bo_pool_alloc(&cmd_buffer->device->batch_bo_pool, VkResult result = anv_bo_pool_alloc(&cmd_buffer->device->batch_bo_pool,
&new_bo, new_size); new_size, &new_bo);
if (result != VK_SUCCESS) if (result != VK_SUCCESS)
return result; return result;
memcpy(new_bo.map, bbo->bo.map, bbo->length); memcpy(new_bo->map, bbo->bo->map, bbo->length);
anv_bo_pool_free(&cmd_buffer->device->batch_bo_pool, &bbo->bo); anv_bo_pool_free(&cmd_buffer->device->batch_bo_pool, bbo->bo);
bbo->bo = new_bo; bbo->bo = new_bo;
anv_batch_bo_continue(bbo, batch, batch_padding); anv_batch_bo_continue(bbo, batch, batch_padding);
@@ -434,24 +433,24 @@ anv_batch_bo_link(struct anv_cmd_buffer *cmd_buffer,
{ {
const uint32_t bb_start_offset = const uint32_t bb_start_offset =
prev_bbo->length - GEN8_MI_BATCH_BUFFER_START_length * 4; prev_bbo->length - GEN8_MI_BATCH_BUFFER_START_length * 4;
ASSERTED const uint32_t *bb_start = prev_bbo->bo.map + bb_start_offset; ASSERTED const uint32_t *bb_start = prev_bbo->bo->map + bb_start_offset;
/* Make sure we're looking at a MI_BATCH_BUFFER_START */ /* Make sure we're looking at a MI_BATCH_BUFFER_START */
assert(((*bb_start >> 29) & 0x07) == 0); assert(((*bb_start >> 29) & 0x07) == 0);
assert(((*bb_start >> 23) & 0x3f) == 49); assert(((*bb_start >> 23) & 0x3f) == 49);
if (cmd_buffer->device->instance->physicalDevice.use_softpin) { if (cmd_buffer->device->instance->physicalDevice.use_softpin) {
assert(prev_bbo->bo.flags & EXEC_OBJECT_PINNED); assert(prev_bbo->bo->flags & EXEC_OBJECT_PINNED);
assert(next_bbo->bo.flags & EXEC_OBJECT_PINNED); assert(next_bbo->bo->flags & EXEC_OBJECT_PINNED);
write_reloc(cmd_buffer->device, write_reloc(cmd_buffer->device,
prev_bbo->bo.map + bb_start_offset + 4, prev_bbo->bo->map + bb_start_offset + 4,
next_bbo->bo.offset + next_bbo_offset, true); next_bbo->bo->offset + next_bbo_offset, true);
} else { } else {
uint32_t reloc_idx = prev_bbo->relocs.num_relocs - 1; uint32_t reloc_idx = prev_bbo->relocs.num_relocs - 1;
assert(prev_bbo->relocs.relocs[reloc_idx].offset == bb_start_offset + 4); assert(prev_bbo->relocs.relocs[reloc_idx].offset == bb_start_offset + 4);
prev_bbo->relocs.reloc_bos[reloc_idx] = &next_bbo->bo; prev_bbo->relocs.reloc_bos[reloc_idx] = next_bbo->bo;
prev_bbo->relocs.relocs[reloc_idx].delta = next_bbo_offset; prev_bbo->relocs.relocs[reloc_idx].delta = next_bbo_offset;
/* Use a bogus presumed offset to force a relocation */ /* Use a bogus presumed offset to force a relocation */
@@ -464,7 +463,7 @@ anv_batch_bo_destroy(struct anv_batch_bo *bbo,
struct anv_cmd_buffer *cmd_buffer) struct anv_cmd_buffer *cmd_buffer)
{ {
anv_reloc_list_finish(&bbo->relocs, &cmd_buffer->pool->alloc); anv_reloc_list_finish(&bbo->relocs, &cmd_buffer->pool->alloc);
anv_bo_pool_free(&cmd_buffer->device->batch_bo_pool, &bbo->bo); anv_bo_pool_free(&cmd_buffer->device->batch_bo_pool, bbo->bo);
vk_free(&cmd_buffer->pool->alloc, bbo); vk_free(&cmd_buffer->pool->alloc, bbo);
} }
@@ -561,9 +560,9 @@ cmd_buffer_chain_to_batch_bo(struct anv_cmd_buffer *cmd_buffer,
* chaining command, let's set it back where it should go. * chaining command, let's set it back where it should go.
*/ */
batch->end += GEN8_MI_BATCH_BUFFER_START_length * 4; batch->end += GEN8_MI_BATCH_BUFFER_START_length * 4;
assert(batch->end == current_bbo->bo.map + current_bbo->bo.size); assert(batch->end == current_bbo->bo->map + current_bbo->bo->size);
emit_batch_buffer_start(cmd_buffer, &bbo->bo, 0); emit_batch_buffer_start(cmd_buffer, bbo->bo, 0);
anv_batch_bo_finish(current_bbo, batch); anv_batch_bo_finish(current_bbo, batch);
} }
@@ -870,7 +869,7 @@ anv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer *cmd_buffer)
* with our BATCH_BUFFER_END in another BO. * with our BATCH_BUFFER_END in another BO.
*/ */
cmd_buffer->batch.end += GEN8_MI_BATCH_BUFFER_START_length * 4; cmd_buffer->batch.end += GEN8_MI_BATCH_BUFFER_START_length * 4;
assert(cmd_buffer->batch.end == batch_bo->bo.map + batch_bo->bo.size); assert(cmd_buffer->batch.end == batch_bo->bo->map + batch_bo->bo->size);
anv_batch_emit(&cmd_buffer->batch, GEN8_MI_BATCH_BUFFER_END, bbe); anv_batch_emit(&cmd_buffer->batch, GEN8_MI_BATCH_BUFFER_END, bbe);
@@ -911,11 +910,11 @@ anv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer *cmd_buffer)
* chaining command, let's set it back where it should go. * chaining command, let's set it back where it should go.
*/ */
cmd_buffer->batch.end += GEN8_MI_BATCH_BUFFER_START_length * 4; cmd_buffer->batch.end += GEN8_MI_BATCH_BUFFER_START_length * 4;
assert(cmd_buffer->batch.start == batch_bo->bo.map); assert(cmd_buffer->batch.start == batch_bo->bo->map);
assert(cmd_buffer->batch.end == batch_bo->bo.map + batch_bo->bo.size); assert(cmd_buffer->batch.end == batch_bo->bo->map + batch_bo->bo->size);
emit_batch_buffer_start(cmd_buffer, &batch_bo->bo, 0); emit_batch_buffer_start(cmd_buffer, batch_bo->bo, 0);
assert(cmd_buffer->batch.start == batch_bo->bo.map); assert(cmd_buffer->batch.start == batch_bo->bo->map);
} else { } else {
cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_COPY_AND_CHAIN; cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_COPY_AND_CHAIN;
} }
@@ -961,10 +960,10 @@ anv_cmd_buffer_add_secondary(struct anv_cmd_buffer *primary,
struct anv_batch_bo *last_bbo = struct anv_batch_bo *last_bbo =
list_last_entry(&secondary->batch_bos, struct anv_batch_bo, link); list_last_entry(&secondary->batch_bos, struct anv_batch_bo, link);
emit_batch_buffer_start(primary, &first_bbo->bo, 0); emit_batch_buffer_start(primary, first_bbo->bo, 0);
struct anv_batch_bo *this_bbo = anv_cmd_buffer_current_batch_bo(primary); struct anv_batch_bo *this_bbo = anv_cmd_buffer_current_batch_bo(primary);
assert(primary->batch.start == this_bbo->bo.map); assert(primary->batch.start == this_bbo->bo->map);
uint32_t offset = primary->batch.next - primary->batch.start; uint32_t offset = primary->batch.next - primary->batch.start;
/* Make the tail of the secondary point back to right after the /* Make the tail of the secondary point back to right after the
@@ -1383,7 +1382,7 @@ relocate_cmd_buffer(struct anv_cmd_buffer *cmd_buffer,
struct anv_batch_bo **bbo; struct anv_batch_bo **bbo;
u_vector_foreach(bbo, &cmd_buffer->seen_bbos) { u_vector_foreach(bbo, &cmd_buffer->seen_bbos) {
anv_reloc_list_apply(cmd_buffer->device, anv_reloc_list_apply(cmd_buffer->device,
&(*bbo)->relocs, &(*bbo)->bo, false); &(*bbo)->relocs, (*bbo)->bo, false);
} }
for (uint32_t i = 0; i < exec->bo_count; i++) for (uint32_t i = 0; i < exec->bo_count; i++)
@@ -1465,10 +1464,10 @@ setup_execbuf_for_cmd_buffer(struct anv_execbuf *execbuf,
*/ */
struct anv_batch_bo **bbo; struct anv_batch_bo **bbo;
u_vector_foreach(bbo, &cmd_buffer->seen_bbos) { u_vector_foreach(bbo, &cmd_buffer->seen_bbos) {
adjust_relocations_to_state_pool(ss_pool, &(*bbo)->bo, &(*bbo)->relocs, adjust_relocations_to_state_pool(ss_pool, (*bbo)->bo, &(*bbo)->relocs,
cmd_buffer->last_ss_pool_center); cmd_buffer->last_ss_pool_center);
result = anv_execbuf_add_bo(execbuf, &(*bbo)->bo, &(*bbo)->relocs, 0, result = anv_execbuf_add_bo(execbuf, (*bbo)->bo, &(*bbo)->relocs, 0,
&cmd_buffer->device->alloc); &cmd_buffer->device->alloc);
if (result != VK_SUCCESS) if (result != VK_SUCCESS)
return result; return result;
@@ -1488,20 +1487,20 @@ setup_execbuf_for_cmd_buffer(struct anv_execbuf *execbuf,
* corresponding to the first batch_bo in the chain with the last * corresponding to the first batch_bo in the chain with the last
* element in the list. * element in the list.
*/ */
if (first_batch_bo->bo.index != execbuf->bo_count - 1) { if (first_batch_bo->bo->index != execbuf->bo_count - 1) {
uint32_t idx = first_batch_bo->bo.index; uint32_t idx = first_batch_bo->bo->index;
uint32_t last_idx = execbuf->bo_count - 1; uint32_t last_idx = execbuf->bo_count - 1;
struct drm_i915_gem_exec_object2 tmp_obj = execbuf->objects[idx]; struct drm_i915_gem_exec_object2 tmp_obj = execbuf->objects[idx];
assert(execbuf->bos[idx] == &first_batch_bo->bo); assert(execbuf->bos[idx] == first_batch_bo->bo);
execbuf->objects[idx] = execbuf->objects[last_idx]; execbuf->objects[idx] = execbuf->objects[last_idx];
execbuf->bos[idx] = execbuf->bos[last_idx]; execbuf->bos[idx] = execbuf->bos[last_idx];
execbuf->bos[idx]->index = idx; execbuf->bos[idx]->index = idx;
execbuf->objects[last_idx] = tmp_obj; execbuf->objects[last_idx] = tmp_obj;
execbuf->bos[last_idx] = &first_batch_bo->bo; execbuf->bos[last_idx] = first_batch_bo->bo;
first_batch_bo->bo.index = last_idx; first_batch_bo->bo->index = last_idx;
} }
/* If we are pinning our BOs, we shouldn't have to relocate anything */ /* If we are pinning our BOs, we shouldn't have to relocate anything */
@@ -1523,7 +1522,7 @@ setup_execbuf_for_cmd_buffer(struct anv_execbuf *execbuf,
__builtin_ia32_mfence(); __builtin_ia32_mfence();
u_vector_foreach(bbo, &cmd_buffer->seen_bbos) { u_vector_foreach(bbo, &cmd_buffer->seen_bbos) {
for (uint32_t i = 0; i < (*bbo)->length; i += CACHELINE_SIZE) for (uint32_t i = 0; i < (*bbo)->length; i += CACHELINE_SIZE)
__builtin_ia32_clflush((*bbo)->bo.map + i); __builtin_ia32_clflush((*bbo)->bo->map + i);
} }
} }
@@ -1732,7 +1731,7 @@ anv_cmd_buffer_execbuf(struct anv_device *device,
switch (impl->type) { switch (impl->type) {
case ANV_FENCE_TYPE_BO: case ANV_FENCE_TYPE_BO:
assert(!pdevice->has_syncobj_wait); assert(!pdevice->has_syncobj_wait);
result = anv_execbuf_add_bo(&execbuf, &impl->bo.bo, NULL, result = anv_execbuf_add_bo(&execbuf, impl->bo.bo, NULL,
EXEC_OBJECT_WRITE, &device->alloc); EXEC_OBJECT_WRITE, &device->alloc);
if (result != VK_SUCCESS) if (result != VK_SUCCESS)
return result; return result;
@@ -1756,8 +1755,8 @@ anv_cmd_buffer_execbuf(struct anv_device *device,
struct anv_batch_bo **bo = u_vector_tail(&cmd_buffer->seen_bbos); struct anv_batch_bo **bo = u_vector_tail(&cmd_buffer->seen_bbos);
device->cmd_buffer_being_decoded = cmd_buffer; device->cmd_buffer_being_decoded = cmd_buffer;
gen_print_batch(&device->decoder_ctx, (*bo)->bo.map, gen_print_batch(&device->decoder_ctx, (*bo)->bo->map,
(*bo)->bo.size, (*bo)->bo.offset, false); (*bo)->bo->size, (*bo)->bo->offset, false);
device->cmd_buffer_being_decoded = NULL; device->cmd_buffer_being_decoded = NULL;
} }

View File

@@ -2373,13 +2373,13 @@ decode_get_bo(void *v_batch, bool ppgtt, uint64_t address)
u_vector_foreach(bo, &device->cmd_buffer_being_decoded->seen_bbos) { u_vector_foreach(bo, &device->cmd_buffer_being_decoded->seen_bbos) {
/* The decoder zeroes out the top 16 bits, so we need to as well */ /* The decoder zeroes out the top 16 bits, so we need to as well */
uint64_t bo_address = (*bo)->bo.offset & (~0ull >> 16); uint64_t bo_address = (*bo)->bo->offset & (~0ull >> 16);
if (address >= bo_address && address < bo_address + (*bo)->bo.size) { if (address >= bo_address && address < bo_address + (*bo)->bo->size) {
return (struct gen_batch_decode_bo) { return (struct gen_batch_decode_bo) {
.addr = bo_address, .addr = bo_address,
.size = (*bo)->bo.size, .size = (*bo)->bo->size,
.map = (*bo)->bo.map, .map = (*bo)->bo->map,
}; };
} }
} }
@@ -2612,16 +2612,16 @@ VkResult anv_CreateDevice(
(physical_device->has_exec_capture ? EXEC_OBJECT_CAPTURE : 0) | (physical_device->has_exec_capture ? EXEC_OBJECT_CAPTURE : 0) |
(physical_device->use_softpin ? EXEC_OBJECT_PINNED : 0); (physical_device->use_softpin ? EXEC_OBJECT_PINNED : 0);
anv_bo_pool_init(&device->batch_bo_pool, device, bo_flags);
result = anv_bo_cache_init(&device->bo_cache); result = anv_bo_cache_init(&device->bo_cache);
if (result != VK_SUCCESS) if (result != VK_SUCCESS)
goto fail_batch_bo_pool; goto fail_queue_cond;
anv_bo_pool_init(&device->batch_bo_pool, device, bo_flags);
result = anv_state_pool_init(&device->dynamic_state_pool, device, result = anv_state_pool_init(&device->dynamic_state_pool, device,
DYNAMIC_STATE_POOL_MIN_ADDRESS, 16384); DYNAMIC_STATE_POOL_MIN_ADDRESS, 16384);
if (result != VK_SUCCESS) if (result != VK_SUCCESS)
goto fail_bo_cache; goto fail_batch_bo_pool;
result = anv_state_pool_init(&device->instruction_state_pool, device, result = anv_state_pool_init(&device->instruction_state_pool, device,
INSTRUCTION_STATE_POOL_MIN_ADDRESS, 16384); INSTRUCTION_STATE_POOL_MIN_ADDRESS, 16384);
@@ -2727,10 +2727,10 @@ VkResult anv_CreateDevice(
anv_state_pool_finish(&device->instruction_state_pool); anv_state_pool_finish(&device->instruction_state_pool);
fail_dynamic_state_pool: fail_dynamic_state_pool:
anv_state_pool_finish(&device->dynamic_state_pool); anv_state_pool_finish(&device->dynamic_state_pool);
fail_bo_cache:
anv_bo_cache_finish(&device->bo_cache);
fail_batch_bo_pool: fail_batch_bo_pool:
anv_bo_pool_finish(&device->batch_bo_pool); anv_bo_pool_finish(&device->batch_bo_pool);
anv_bo_cache_finish(&device->bo_cache);
fail_queue_cond:
pthread_cond_destroy(&device->queue_submit); pthread_cond_destroy(&device->queue_submit);
fail_mutex: fail_mutex:
pthread_mutex_destroy(&device->mutex); pthread_mutex_destroy(&device->mutex);
@@ -2797,10 +2797,10 @@ void anv_DestroyDevice(
anv_state_pool_finish(&device->instruction_state_pool); anv_state_pool_finish(&device->instruction_state_pool);
anv_state_pool_finish(&device->dynamic_state_pool); anv_state_pool_finish(&device->dynamic_state_pool);
anv_bo_cache_finish(&device->bo_cache);
anv_bo_pool_finish(&device->batch_bo_pool); anv_bo_pool_finish(&device->batch_bo_pool);
anv_bo_cache_finish(&device->bo_cache);
if (physical_device->use_softpin) { if (physical_device->use_softpin) {
util_vma_heap_finish(&device->vma_hi); util_vma_heap_finish(&device->vma_hi);
util_vma_heap_finish(&device->vma_lo); util_vma_heap_finish(&device->vma_lo);

View File

@@ -608,6 +608,9 @@ struct anv_bo {
*/ */
uint32_t index; uint32_t index;
/* Index for use with util_sparse_array_free_list */
uint32_t free_index;
/* Last known offset. This value is provided by the kernel when we /* Last known offset. This value is provided by the kernel when we
* execbuf and is used as the presumed offset for the next bunch of * execbuf and is used as the presumed offset for the next bunch of
* relocations. * relocations.
@@ -898,15 +901,15 @@ struct anv_bo_pool {
uint64_t bo_flags; uint64_t bo_flags;
void *free_list[16]; struct util_sparse_array_free_list free_list[16];
}; };
void anv_bo_pool_init(struct anv_bo_pool *pool, struct anv_device *device, void anv_bo_pool_init(struct anv_bo_pool *pool, struct anv_device *device,
uint64_t bo_flags); uint64_t bo_flags);
void anv_bo_pool_finish(struct anv_bo_pool *pool); void anv_bo_pool_finish(struct anv_bo_pool *pool);
VkResult anv_bo_pool_alloc(struct anv_bo_pool *pool, struct anv_bo *bo, VkResult anv_bo_pool_alloc(struct anv_bo_pool *pool, uint32_t size,
uint32_t size); struct anv_bo **bo_out);
void anv_bo_pool_free(struct anv_bo_pool *pool, const struct anv_bo *bo); void anv_bo_pool_free(struct anv_bo_pool *pool, struct anv_bo *bo);
struct anv_scratch_bo { struct anv_scratch_bo {
bool exists; bool exists;
@@ -1400,7 +1403,7 @@ struct anv_batch_bo {
/* Link in the anv_cmd_buffer.owned_batch_bos list */ /* Link in the anv_cmd_buffer.owned_batch_bos list */
struct list_head link; struct list_head link;
struct anv_bo bo; struct anv_bo * bo;
/* Bytes actually consumed in this batch BO */ /* Bytes actually consumed in this batch BO */
uint32_t length; uint32_t length;
@@ -2739,7 +2742,7 @@ struct anv_fence_impl {
* will say it's idle in this case. * will say it's idle in this case.
*/ */
struct { struct {
struct anv_bo bo; struct anv_bo *bo;
enum anv_bo_fence_state state; enum anv_bo_fence_state state;
} bo; } bo;

View File

@@ -61,27 +61,26 @@ anv_device_submit_simple_batch(struct anv_device *device,
{ {
struct drm_i915_gem_execbuffer2 execbuf; struct drm_i915_gem_execbuffer2 execbuf;
struct drm_i915_gem_exec_object2 exec2_objects[1]; struct drm_i915_gem_exec_object2 exec2_objects[1];
struct anv_bo bo, *exec_bos[1]; struct anv_bo *bo;
VkResult result = VK_SUCCESS; VkResult result = VK_SUCCESS;
uint32_t size; uint32_t size;
/* Kernel driver requires 8 byte aligned batch length */ /* Kernel driver requires 8 byte aligned batch length */
size = align_u32(batch->next - batch->start, 8); size = align_u32(batch->next - batch->start, 8);
result = anv_bo_pool_alloc(&device->batch_bo_pool, &bo, size); result = anv_bo_pool_alloc(&device->batch_bo_pool, size, &bo);
if (result != VK_SUCCESS) if (result != VK_SUCCESS)
return result; return result;
memcpy(bo.map, batch->start, size); memcpy(bo->map, batch->start, size);
if (!device->info.has_llc) if (!device->info.has_llc)
gen_flush_range(bo.map, size); gen_flush_range(bo->map, size);
exec_bos[0] = &bo; exec2_objects[0].handle = bo->gem_handle;
exec2_objects[0].handle = bo.gem_handle;
exec2_objects[0].relocation_count = 0; exec2_objects[0].relocation_count = 0;
exec2_objects[0].relocs_ptr = 0; exec2_objects[0].relocs_ptr = 0;
exec2_objects[0].alignment = 0; exec2_objects[0].alignment = 0;
exec2_objects[0].offset = bo.offset; exec2_objects[0].offset = bo->offset;
exec2_objects[0].flags = bo.flags; exec2_objects[0].flags = bo->flags;
exec2_objects[0].rsvd1 = 0; exec2_objects[0].rsvd1 = 0;
exec2_objects[0].rsvd2 = 0; exec2_objects[0].rsvd2 = 0;
@@ -99,17 +98,19 @@ anv_device_submit_simple_batch(struct anv_device *device,
execbuf.rsvd1 = device->context_id; execbuf.rsvd1 = device->context_id;
execbuf.rsvd2 = 0; execbuf.rsvd2 = 0;
if (unlikely(INTEL_DEBUG & DEBUG_BATCH)) if (unlikely(INTEL_DEBUG & DEBUG_BATCH)) {
gen_print_batch(&device->decoder_ctx, bo.map, bo.size, bo.offset, false); gen_print_batch(&device->decoder_ctx, bo->map,
bo->size, bo->offset, false);
}
result = anv_device_execbuf(device, &execbuf, exec_bos); result = anv_device_execbuf(device, &execbuf, &bo);
if (result != VK_SUCCESS) if (result != VK_SUCCESS)
goto fail; goto fail;
result = anv_device_wait(device, &bo, INT64_MAX); result = anv_device_wait(device, bo, INT64_MAX);
fail: fail:
anv_bo_pool_free(&device->batch_bo_pool, &bo); anv_bo_pool_free(&device->batch_bo_pool, bo);
return result; return result;
} }
@@ -288,8 +289,8 @@ VkResult anv_CreateFence(
} else { } else {
fence->permanent.type = ANV_FENCE_TYPE_BO; fence->permanent.type = ANV_FENCE_TYPE_BO;
VkResult result = anv_bo_pool_alloc(&device->batch_bo_pool, VkResult result = anv_bo_pool_alloc(&device->batch_bo_pool, 4096,
&fence->permanent.bo.bo, 4096); &fence->permanent.bo.bo);
if (result != VK_SUCCESS) if (result != VK_SUCCESS)
return result; return result;
@@ -315,7 +316,7 @@ anv_fence_impl_cleanup(struct anv_device *device,
break; break;
case ANV_FENCE_TYPE_BO: case ANV_FENCE_TYPE_BO:
anv_bo_pool_free(&device->batch_bo_pool, &impl->bo.bo); anv_bo_pool_free(&device->batch_bo_pool, impl->bo.bo);
break; break;
case ANV_FENCE_TYPE_SYNCOBJ: case ANV_FENCE_TYPE_SYNCOBJ:
@@ -417,7 +418,7 @@ VkResult anv_GetFenceStatus(
return VK_SUCCESS; return VK_SUCCESS;
case ANV_BO_FENCE_STATE_SUBMITTED: { case ANV_BO_FENCE_STATE_SUBMITTED: {
VkResult result = anv_device_bo_busy(device, &impl->bo.bo); VkResult result = anv_device_bo_busy(device, impl->bo.bo);
if (result == VK_SUCCESS) { if (result == VK_SUCCESS) {
impl->bo.state = ANV_BO_FENCE_STATE_SIGNALED; impl->bo.state = ANV_BO_FENCE_STATE_SIGNALED;
return VK_SUCCESS; return VK_SUCCESS;
@@ -591,7 +592,7 @@ anv_wait_for_bo_fences(struct anv_device *device,
/* These are the fences we really care about. Go ahead and wait /* These are the fences we really care about. Go ahead and wait
* on it until we hit a timeout. * on it until we hit a timeout.
*/ */
result = anv_device_wait(device, &impl->bo.bo, result = anv_device_wait(device, impl->bo.bo,
anv_get_relative_timeout(abs_timeout_ns)); anv_get_relative_timeout(abs_timeout_ns));
switch (result) { switch (result) {
case VK_SUCCESS: case VK_SUCCESS: