anv: decouple util function from anv_cmd_buffer

The issue we're addressing here is that we have 2 batches and the both
grow at different rate. We want to keep doubling the main batch size
as the application writes more and more commands to limit the number
of GEM BOs. But we don't want to have the generation batch size to be
linked to the main batch.

v2: remove gfx7 code

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Rohan Garg <rohan.garg@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15642>
This commit is contained in:
Lionel Landwerlin
2022-02-26 14:00:07 +02:00
committed by Marge Bot
parent bd83e5ddaf
commit 61b730f1f4
3 changed files with 126 additions and 25 deletions

View File

@@ -410,6 +410,12 @@ anv_cmd_buffer_current_batch_bo(struct anv_cmd_buffer *cmd_buffer)
return list_entry(cmd_buffer->batch_bos.prev, struct anv_batch_bo, link); return list_entry(cmd_buffer->batch_bos.prev, struct anv_batch_bo, link);
} }
static struct anv_batch_bo *
anv_cmd_buffer_current_generation_batch_bo(struct anv_cmd_buffer *cmd_buffer)
{
return list_entry(cmd_buffer->generation_batch_bos.prev, struct anv_batch_bo, link);
}
struct anv_address struct anv_address
anv_cmd_buffer_surface_base_address(struct anv_cmd_buffer *cmd_buffer) anv_cmd_buffer_surface_base_address(struct anv_cmd_buffer *cmd_buffer)
{ {
@@ -422,34 +428,34 @@ anv_cmd_buffer_surface_base_address(struct anv_cmd_buffer *cmd_buffer)
} }
static void static void
emit_batch_buffer_start(struct anv_cmd_buffer *cmd_buffer, emit_batch_buffer_start(struct anv_batch *batch,
struct anv_bo *bo, uint32_t offset) struct anv_bo *bo, uint32_t offset)
{ {
/* In gfx8+ the address field grew to two dwords to accommodate 48 bit anv_batch_emit(batch, GFX8_MI_BATCH_BUFFER_START, bbs) {
* offsets. The high 16 bits are in the last dword, so we can use the gfx8 bbs.DWordLength = GFX8_MI_BATCH_BUFFER_START_length -
* version in either case, as long as we set the instruction length in the GFX8_MI_BATCH_BUFFER_START_length_bias;
* header accordingly. This means that we always emit three dwords here
* and all the padding and adjustment we do in this file works for all
* gens.
*/
const uint32_t gfx8_length =
GFX8_MI_BATCH_BUFFER_START_length - GFX8_MI_BATCH_BUFFER_START_length_bias;
anv_batch_emit(&cmd_buffer->batch, GFX8_MI_BATCH_BUFFER_START, bbs) {
bbs.DWordLength = gfx8_length;
bbs.SecondLevelBatchBuffer = Firstlevelbatch; bbs.SecondLevelBatchBuffer = Firstlevelbatch;
bbs.AddressSpaceIndicator = ASI_PPGTT; bbs.AddressSpaceIndicator = ASI_PPGTT;
bbs.BatchBufferStartAddress = (struct anv_address) { bo, offset }; bbs.BatchBufferStartAddress = (struct anv_address) { bo, offset };
} }
} }
enum anv_cmd_buffer_batch {
ANV_CMD_BUFFER_BATCH_MAIN,
ANV_CMD_BUFFER_BATCH_GENERATION,
};
static void static void
cmd_buffer_chain_to_batch_bo(struct anv_cmd_buffer *cmd_buffer, cmd_buffer_chain_to_batch_bo(struct anv_cmd_buffer *cmd_buffer,
struct anv_batch_bo *bbo) struct anv_batch_bo *bbo,
enum anv_cmd_buffer_batch batch_type)
{ {
struct anv_batch *batch = &cmd_buffer->batch; struct anv_batch *batch =
batch_type == ANV_CMD_BUFFER_BATCH_GENERATION ?
&cmd_buffer->generation_batch : &cmd_buffer->batch;
struct anv_batch_bo *current_bbo = struct anv_batch_bo *current_bbo =
batch_type == ANV_CMD_BUFFER_BATCH_GENERATION ?
anv_cmd_buffer_current_generation_batch_bo(cmd_buffer) :
anv_cmd_buffer_current_batch_bo(cmd_buffer); anv_cmd_buffer_current_batch_bo(cmd_buffer);
/* We set the end of the batch a little short so we would be sure we /* We set the end of the batch a little short so we would be sure we
@@ -459,7 +465,7 @@ cmd_buffer_chain_to_batch_bo(struct anv_cmd_buffer *cmd_buffer,
batch->end += GFX8_MI_BATCH_BUFFER_START_length * 4; batch->end += GFX8_MI_BATCH_BUFFER_START_length * 4;
assert(batch->end == current_bbo->bo->map + current_bbo->bo->size); assert(batch->end == current_bbo->bo->map + current_bbo->bo->size);
emit_batch_buffer_start(cmd_buffer, bbo->bo, 0); emit_batch_buffer_start(batch, bbo->bo, 0);
anv_batch_bo_finish(current_bbo, batch); anv_batch_bo_finish(current_bbo, batch);
} }
@@ -537,7 +543,7 @@ anv_cmd_buffer_chain_batch(struct anv_batch *batch, uint32_t size, void *_data)
} }
*seen_bbo = new_bbo; *seen_bbo = new_bbo;
cmd_buffer_chain_to_batch_bo(cmd_buffer, new_bbo); cmd_buffer_chain_to_batch_bo(cmd_buffer, new_bbo, ANV_CMD_BUFFER_BATCH_MAIN);
list_addtail(&new_bbo->link, &cmd_buffer->batch_bos); list_addtail(&new_bbo->link, &cmd_buffer->batch_bos);
@@ -546,6 +552,46 @@ anv_cmd_buffer_chain_batch(struct anv_batch *batch, uint32_t size, void *_data)
return VK_SUCCESS; return VK_SUCCESS;
} }
static VkResult
anv_cmd_buffer_chain_generation_batch(struct anv_batch *batch, uint32_t size, void *_data)
{
/* The caller should not need that much space. Otherwise it should split
* its commands.
*/
assert(size <= ANV_MAX_CMD_BUFFER_BATCH_SIZE);
struct anv_cmd_buffer *cmd_buffer = _data;
struct anv_batch_bo *new_bbo = NULL;
/* Cap reallocation to chunk. */
uint32_t alloc_size = MIN2(
MAX2(batch->total_batch_size, size),
ANV_MAX_CMD_BUFFER_BATCH_SIZE);
VkResult result = anv_batch_bo_create(cmd_buffer, alloc_size, &new_bbo);
if (result != VK_SUCCESS)
return result;
batch->total_batch_size += alloc_size;
struct anv_batch_bo **seen_bbo = u_vector_add(&cmd_buffer->seen_bbos);
if (seen_bbo == NULL) {
anv_batch_bo_destroy(new_bbo, cmd_buffer);
return vk_error(cmd_buffer, VK_ERROR_OUT_OF_HOST_MEMORY);
}
*seen_bbo = new_bbo;
if (!list_is_empty(&cmd_buffer->generation_batch_bos)) {
cmd_buffer_chain_to_batch_bo(cmd_buffer, new_bbo,
ANV_CMD_BUFFER_BATCH_GENERATION);
}
list_addtail(&new_bbo->link, &cmd_buffer->generation_batch_bos);
anv_batch_bo_start(new_bbo, batch, GFX8_MI_BATCH_BUFFER_START_length * 4);
return VK_SUCCESS;
}
/** Allocate a binding table /** Allocate a binding table
* *
* This function allocates a binding table. This is a bit more complicated * This function allocates a binding table. This is a bit more complicated
@@ -762,6 +808,16 @@ anv_cmd_buffer_init_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer)
anv_batch_bo_start(batch_bo, &cmd_buffer->batch, anv_batch_bo_start(batch_bo, &cmd_buffer->batch,
GFX8_MI_BATCH_BUFFER_START_length * 4); GFX8_MI_BATCH_BUFFER_START_length * 4);
/* Generation batch is initialized empty since it's possible it won't be
* used.
*/
list_inithead(&cmd_buffer->generation_batch_bos);
cmd_buffer->generation_batch.alloc = &cmd_buffer->vk.pool->alloc;
cmd_buffer->generation_batch.user_data = cmd_buffer;
cmd_buffer->generation_batch.total_batch_size = 0;
cmd_buffer->generation_batch.extend_cb = anv_cmd_buffer_chain_generation_batch;
int success = u_vector_init_pow2(&cmd_buffer->seen_bbos, 8, int success = u_vector_init_pow2(&cmd_buffer->seen_bbos, 8,
sizeof(struct anv_bo *)); sizeof(struct anv_bo *));
if (!success) if (!success)
@@ -813,6 +869,12 @@ anv_cmd_buffer_fini_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer)
list_del(&bbo->link); list_del(&bbo->link);
anv_batch_bo_destroy(bbo, cmd_buffer); anv_batch_bo_destroy(bbo, cmd_buffer);
} }
/* Also destroy all generation batch buffers */
list_for_each_entry_safe(struct anv_batch_bo, bbo,
&cmd_buffer->generation_batch_bos, link) {
list_del(&bbo->link);
anv_batch_bo_destroy(bbo, cmd_buffer);
}
} }
void void
@@ -849,14 +911,27 @@ anv_cmd_buffer_reset_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer)
*(struct anv_batch_bo **)u_vector_add(&cmd_buffer->seen_bbos) = first_bbo; *(struct anv_batch_bo **)u_vector_add(&cmd_buffer->seen_bbos) = first_bbo;
assert(first_bbo->bo->size == ANV_MIN_CMD_BUFFER_BATCH_SIZE); assert(first_bbo->bo->size == ANV_MIN_CMD_BUFFER_BATCH_SIZE);
cmd_buffer->batch.total_batch_size = first_bbo->bo->size; cmd_buffer->batch.total_batch_size = first_bbo->bo->size;
/* Delete all generation batch bos */
list_for_each_entry_safe(struct anv_batch_bo, bbo,
&cmd_buffer->generation_batch_bos, link) {
list_del(&bbo->link);
anv_batch_bo_destroy(bbo, cmd_buffer);
}
/* And reset generation batch */
cmd_buffer->generation_batch.total_batch_size = 0;
cmd_buffer->generation_batch.start = NULL;
cmd_buffer->generation_batch.end = NULL;
cmd_buffer->generation_batch.next = NULL;
} }
void void
anv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer *cmd_buffer) anv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer *cmd_buffer)
{ {
const struct intel_device_info *devinfo = cmd_buffer->device->info;
struct anv_batch_bo *batch_bo = anv_cmd_buffer_current_batch_bo(cmd_buffer); struct anv_batch_bo *batch_bo = anv_cmd_buffer_current_batch_bo(cmd_buffer);
if (cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) { if (cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) {
@@ -878,7 +953,7 @@ anv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer *cmd_buffer)
*/ */
batch_bo->chained = anv_cmd_buffer_is_chainable(cmd_buffer); batch_bo->chained = anv_cmd_buffer_is_chainable(cmd_buffer);
if (batch_bo->chained) if (batch_bo->chained)
emit_batch_buffer_start(cmd_buffer, batch_bo->bo, 0); emit_batch_buffer_start(&cmd_buffer->batch, batch_bo->bo, 0);
else else
anv_batch_emit(&cmd_buffer->batch, GFX8_MI_BATCH_BUFFER_END, bbe); anv_batch_emit(&cmd_buffer->batch, GFX8_MI_BATCH_BUFFER_END, bbe);
@@ -903,7 +978,6 @@ anv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer *cmd_buffer)
* prefetch. * prefetch.
*/ */
if (cmd_buffer->batch_bos.next == cmd_buffer->batch_bos.prev) { if (cmd_buffer->batch_bos.next == cmd_buffer->batch_bos.prev) {
const struct intel_device_info *devinfo = cmd_buffer->device->info;
const enum intel_engine_class engine_class = cmd_buffer->queue_family->engine_class; const enum intel_engine_class engine_class = cmd_buffer->queue_family->engine_class;
/* Careful to have everything in signed integer. */ /* Careful to have everything in signed integer. */
int32_t prefetch_len = devinfo->engine_class_prefetch[engine_class]; int32_t prefetch_len = devinfo->engine_class_prefetch[engine_class];
@@ -951,7 +1025,7 @@ anv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer *cmd_buffer)
assert(cmd_buffer->batch.start == batch_bo->bo->map); assert(cmd_buffer->batch.start == batch_bo->bo->map);
assert(cmd_buffer->batch.end == batch_bo->bo->map + batch_bo->bo->size); assert(cmd_buffer->batch.end == batch_bo->bo->map + batch_bo->bo->size);
emit_batch_buffer_start(cmd_buffer, batch_bo->bo, 0); emit_batch_buffer_start(&cmd_buffer->batch, batch_bo->bo, 0);
assert(cmd_buffer->batch.start == batch_bo->bo->map); assert(cmd_buffer->batch.start == batch_bo->bo->map);
} else { } else {
cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_COPY_AND_CHAIN; cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_COPY_AND_CHAIN;
@@ -991,7 +1065,7 @@ anv_cmd_buffer_add_secondary(struct anv_cmd_buffer *primary,
struct anv_batch_bo *last_bbo = struct anv_batch_bo *last_bbo =
list_last_entry(&secondary->batch_bos, struct anv_batch_bo, link); list_last_entry(&secondary->batch_bos, struct anv_batch_bo, link);
emit_batch_buffer_start(primary, first_bbo->bo, 0); emit_batch_buffer_start(&primary->batch, first_bbo->bo, 0);
struct anv_batch_bo *this_bbo = anv_cmd_buffer_current_batch_bo(primary); struct anv_batch_bo *this_bbo = anv_cmd_buffer_current_batch_bo(primary);
assert(primary->batch.start == this_bbo->bo->map); assert(primary->batch.start == this_bbo->bo->map);
@@ -1020,7 +1094,8 @@ anv_cmd_buffer_add_secondary(struct anv_cmd_buffer *primary,
struct anv_batch_bo *last_bbo = struct anv_batch_bo *last_bbo =
list_last_entry(&copy_list, struct anv_batch_bo, link); list_last_entry(&copy_list, struct anv_batch_bo, link);
cmd_buffer_chain_to_batch_bo(primary, first_bbo); cmd_buffer_chain_to_batch_bo(primary, first_bbo,
ANV_CMD_BUFFER_BATCH_MAIN);
list_splicetail(&copy_list, &primary->batch_bos); list_splicetail(&copy_list, &primary->batch_bos);
@@ -1039,7 +1114,7 @@ anv_cmd_buffer_add_secondary(struct anv_cmd_buffer *primary,
.Address = secondary->return_addr) .Address = secondary->return_addr)
+ (GFX8_MI_STORE_DATA_IMM_ImmediateData_start / 8); + (GFX8_MI_STORE_DATA_IMM_ImmediateData_start / 8);
emit_batch_buffer_start(primary, first_bbo->bo, 0); emit_batch_buffer_start(&primary->batch, first_bbo->bo, 0);
*write_return_addr = *write_return_addr =
anv_address_physical(anv_batch_address(&primary->batch, anv_address_physical(anv_batch_address(&primary->batch,

View File

@@ -103,6 +103,7 @@ anv_create_cmd_buffer(struct vk_command_pool *pool,
&cmd_buffer->state.gfx.sample_locations; &cmd_buffer->state.gfx.sample_locations;
cmd_buffer->batch.status = VK_SUCCESS; cmd_buffer->batch.status = VK_SUCCESS;
cmd_buffer->generation_batch.status = VK_SUCCESS;
cmd_buffer->device = device; cmd_buffer->device = device;
@@ -128,6 +129,8 @@ anv_create_cmd_buffer(struct vk_command_pool *pool,
cmd_buffer->self_mod_locations = NULL; cmd_buffer->self_mod_locations = NULL;
cmd_buffer->generation_return_addr = ANV_NULL_ADDRESS;
anv_cmd_state_init(cmd_buffer); anv_cmd_state_init(cmd_buffer);
anv_measure_init(cmd_buffer); anv_measure_init(cmd_buffer);

View File

@@ -2681,6 +2681,7 @@ struct anv_cmd_buffer {
struct anv_device * device; struct anv_device * device;
struct anv_queue_family * queue_family; struct anv_queue_family * queue_family;
/** Batch where the main commands live */
struct anv_batch batch; struct anv_batch batch;
/* Pointer to the location in the batch where MI_BATCH_BUFFER_END was /* Pointer to the location in the batch where MI_BATCH_BUFFER_END was
@@ -2753,6 +2754,28 @@ struct anv_cmd_buffer {
*/ */
uint32_t total_batch_size; uint32_t total_batch_size;
/** Batch generating part of the anv_cmd_buffer::batch */
struct anv_batch generation_batch;
/**
* Location in anv_cmd_buffer::batch at which we left some space to insert
* a MI_BATCH_BUFFER_START into the generation_batch if needed.
*/
struct anv_address generation_jump_addr;
/**
* Location in anv_cmd_buffer::batch at which the generation batch should
* jump back to.
*/
struct anv_address generation_return_addr;
/** List of anv_batch_bo used for generation
*
* We have to keep this separated of the anv_cmd_buffer::batch_bos that is
* used for a chaining optimization.
*/
struct list_head generation_batch_bos;
/** /**
* A vector of anv_bo pointers for chunks of memory used by the command * A vector of anv_bo pointers for chunks of memory used by the command
* buffer that are too large to be allocated through dynamic_state_stream. * buffer that are too large to be allocated through dynamic_state_stream.