diff --git a/src/intel/vulkan/anv_batch_chain.c b/src/intel/vulkan/anv_batch_chain.c index 299be1cd2a5..fd64598eb47 100644 --- a/src/intel/vulkan/anv_batch_chain.c +++ b/src/intel/vulkan/anv_batch_chain.c @@ -410,6 +410,12 @@ anv_cmd_buffer_current_batch_bo(struct anv_cmd_buffer *cmd_buffer) return list_entry(cmd_buffer->batch_bos.prev, struct anv_batch_bo, link); } +static struct anv_batch_bo * +anv_cmd_buffer_current_generation_batch_bo(struct anv_cmd_buffer *cmd_buffer) +{ + return list_entry(cmd_buffer->generation_batch_bos.prev, struct anv_batch_bo, link); +} + struct anv_address anv_cmd_buffer_surface_base_address(struct anv_cmd_buffer *cmd_buffer) { @@ -422,34 +428,34 @@ anv_cmd_buffer_surface_base_address(struct anv_cmd_buffer *cmd_buffer) } static void -emit_batch_buffer_start(struct anv_cmd_buffer *cmd_buffer, +emit_batch_buffer_start(struct anv_batch *batch, struct anv_bo *bo, uint32_t offset) { - /* In gfx8+ the address field grew to two dwords to accommodate 48 bit - * offsets. The high 16 bits are in the last dword, so we can use the gfx8 - * version in either case, as long as we set the instruction length in the - * header accordingly. This means that we always emit three dwords here - * and all the padding and adjustment we do in this file works for all - * gens. - */ - - const uint32_t gfx8_length = - GFX8_MI_BATCH_BUFFER_START_length - GFX8_MI_BATCH_BUFFER_START_length_bias; - - anv_batch_emit(&cmd_buffer->batch, GFX8_MI_BATCH_BUFFER_START, bbs) { - bbs.DWordLength = gfx8_length; + anv_batch_emit(batch, GFX8_MI_BATCH_BUFFER_START, bbs) { + bbs.DWordLength = GFX8_MI_BATCH_BUFFER_START_length - + GFX8_MI_BATCH_BUFFER_START_length_bias; bbs.SecondLevelBatchBuffer = Firstlevelbatch; bbs.AddressSpaceIndicator = ASI_PPGTT; bbs.BatchBufferStartAddress = (struct anv_address) { bo, offset }; } } +enum anv_cmd_buffer_batch { + ANV_CMD_BUFFER_BATCH_MAIN, + ANV_CMD_BUFFER_BATCH_GENERATION, +}; + static void cmd_buffer_chain_to_batch_bo(struct anv_cmd_buffer *cmd_buffer, - struct anv_batch_bo *bbo) + struct anv_batch_bo *bbo, + enum anv_cmd_buffer_batch batch_type) { - struct anv_batch *batch = &cmd_buffer->batch; + struct anv_batch *batch = + batch_type == ANV_CMD_BUFFER_BATCH_GENERATION ? + &cmd_buffer->generation_batch : &cmd_buffer->batch; struct anv_batch_bo *current_bbo = + batch_type == ANV_CMD_BUFFER_BATCH_GENERATION ? + anv_cmd_buffer_current_generation_batch_bo(cmd_buffer) : anv_cmd_buffer_current_batch_bo(cmd_buffer); /* We set the end of the batch a little short so we would be sure we @@ -459,7 +465,7 @@ cmd_buffer_chain_to_batch_bo(struct anv_cmd_buffer *cmd_buffer, batch->end += GFX8_MI_BATCH_BUFFER_START_length * 4; assert(batch->end == current_bbo->bo->map + current_bbo->bo->size); - emit_batch_buffer_start(cmd_buffer, bbo->bo, 0); + emit_batch_buffer_start(batch, bbo->bo, 0); anv_batch_bo_finish(current_bbo, batch); } @@ -537,7 +543,7 @@ anv_cmd_buffer_chain_batch(struct anv_batch *batch, uint32_t size, void *_data) } *seen_bbo = new_bbo; - cmd_buffer_chain_to_batch_bo(cmd_buffer, new_bbo); + cmd_buffer_chain_to_batch_bo(cmd_buffer, new_bbo, ANV_CMD_BUFFER_BATCH_MAIN); list_addtail(&new_bbo->link, &cmd_buffer->batch_bos); @@ -546,6 +552,46 @@ anv_cmd_buffer_chain_batch(struct anv_batch *batch, uint32_t size, void *_data) return VK_SUCCESS; } +static VkResult +anv_cmd_buffer_chain_generation_batch(struct anv_batch *batch, uint32_t size, void *_data) +{ + /* The caller should not need that much space. Otherwise it should split + * its commands. + */ + assert(size <= ANV_MAX_CMD_BUFFER_BATCH_SIZE); + + struct anv_cmd_buffer *cmd_buffer = _data; + struct anv_batch_bo *new_bbo = NULL; + /* Cap reallocation to chunk. */ + uint32_t alloc_size = MIN2( + MAX2(batch->total_batch_size, size), + ANV_MAX_CMD_BUFFER_BATCH_SIZE); + + VkResult result = anv_batch_bo_create(cmd_buffer, alloc_size, &new_bbo); + if (result != VK_SUCCESS) + return result; + + batch->total_batch_size += alloc_size; + + struct anv_batch_bo **seen_bbo = u_vector_add(&cmd_buffer->seen_bbos); + if (seen_bbo == NULL) { + anv_batch_bo_destroy(new_bbo, cmd_buffer); + return vk_error(cmd_buffer, VK_ERROR_OUT_OF_HOST_MEMORY); + } + *seen_bbo = new_bbo; + + if (!list_is_empty(&cmd_buffer->generation_batch_bos)) { + cmd_buffer_chain_to_batch_bo(cmd_buffer, new_bbo, + ANV_CMD_BUFFER_BATCH_GENERATION); + } + + list_addtail(&new_bbo->link, &cmd_buffer->generation_batch_bos); + + anv_batch_bo_start(new_bbo, batch, GFX8_MI_BATCH_BUFFER_START_length * 4); + + return VK_SUCCESS; +} + /** Allocate a binding table * * This function allocates a binding table. This is a bit more complicated @@ -762,6 +808,16 @@ anv_cmd_buffer_init_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer) anv_batch_bo_start(batch_bo, &cmd_buffer->batch, GFX8_MI_BATCH_BUFFER_START_length * 4); + /* Generation batch is initialized empty since it's possible it won't be + * used. + */ + list_inithead(&cmd_buffer->generation_batch_bos); + + cmd_buffer->generation_batch.alloc = &cmd_buffer->vk.pool->alloc; + cmd_buffer->generation_batch.user_data = cmd_buffer; + cmd_buffer->generation_batch.total_batch_size = 0; + cmd_buffer->generation_batch.extend_cb = anv_cmd_buffer_chain_generation_batch; + int success = u_vector_init_pow2(&cmd_buffer->seen_bbos, 8, sizeof(struct anv_bo *)); if (!success) @@ -813,6 +869,12 @@ anv_cmd_buffer_fini_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer) list_del(&bbo->link); anv_batch_bo_destroy(bbo, cmd_buffer); } + /* Also destroy all generation batch buffers */ + list_for_each_entry_safe(struct anv_batch_bo, bbo, + &cmd_buffer->generation_batch_bos, link) { + list_del(&bbo->link); + anv_batch_bo_destroy(bbo, cmd_buffer); + } } void @@ -849,14 +911,27 @@ anv_cmd_buffer_reset_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer) *(struct anv_batch_bo **)u_vector_add(&cmd_buffer->seen_bbos) = first_bbo; - assert(first_bbo->bo->size == ANV_MIN_CMD_BUFFER_BATCH_SIZE); cmd_buffer->batch.total_batch_size = first_bbo->bo->size; + + /* Delete all generation batch bos */ + list_for_each_entry_safe(struct anv_batch_bo, bbo, + &cmd_buffer->generation_batch_bos, link) { + list_del(&bbo->link); + anv_batch_bo_destroy(bbo, cmd_buffer); + } + + /* And reset generation batch */ + cmd_buffer->generation_batch.total_batch_size = 0; + cmd_buffer->generation_batch.start = NULL; + cmd_buffer->generation_batch.end = NULL; + cmd_buffer->generation_batch.next = NULL; } void anv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer *cmd_buffer) { + const struct intel_device_info *devinfo = cmd_buffer->device->info; struct anv_batch_bo *batch_bo = anv_cmd_buffer_current_batch_bo(cmd_buffer); if (cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) { @@ -878,7 +953,7 @@ anv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer *cmd_buffer) */ batch_bo->chained = anv_cmd_buffer_is_chainable(cmd_buffer); if (batch_bo->chained) - emit_batch_buffer_start(cmd_buffer, batch_bo->bo, 0); + emit_batch_buffer_start(&cmd_buffer->batch, batch_bo->bo, 0); else anv_batch_emit(&cmd_buffer->batch, GFX8_MI_BATCH_BUFFER_END, bbe); @@ -903,7 +978,6 @@ anv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer *cmd_buffer) * prefetch. */ if (cmd_buffer->batch_bos.next == cmd_buffer->batch_bos.prev) { - const struct intel_device_info *devinfo = cmd_buffer->device->info; const enum intel_engine_class engine_class = cmd_buffer->queue_family->engine_class; /* Careful to have everything in signed integer. */ int32_t prefetch_len = devinfo->engine_class_prefetch[engine_class]; @@ -951,7 +1025,7 @@ anv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer *cmd_buffer) assert(cmd_buffer->batch.start == batch_bo->bo->map); assert(cmd_buffer->batch.end == batch_bo->bo->map + batch_bo->bo->size); - emit_batch_buffer_start(cmd_buffer, batch_bo->bo, 0); + emit_batch_buffer_start(&cmd_buffer->batch, batch_bo->bo, 0); assert(cmd_buffer->batch.start == batch_bo->bo->map); } else { cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_COPY_AND_CHAIN; @@ -991,7 +1065,7 @@ anv_cmd_buffer_add_secondary(struct anv_cmd_buffer *primary, struct anv_batch_bo *last_bbo = list_last_entry(&secondary->batch_bos, struct anv_batch_bo, link); - emit_batch_buffer_start(primary, first_bbo->bo, 0); + emit_batch_buffer_start(&primary->batch, first_bbo->bo, 0); struct anv_batch_bo *this_bbo = anv_cmd_buffer_current_batch_bo(primary); assert(primary->batch.start == this_bbo->bo->map); @@ -1020,7 +1094,8 @@ anv_cmd_buffer_add_secondary(struct anv_cmd_buffer *primary, struct anv_batch_bo *last_bbo = list_last_entry(©_list, struct anv_batch_bo, link); - cmd_buffer_chain_to_batch_bo(primary, first_bbo); + cmd_buffer_chain_to_batch_bo(primary, first_bbo, + ANV_CMD_BUFFER_BATCH_MAIN); list_splicetail(©_list, &primary->batch_bos); @@ -1039,7 +1114,7 @@ anv_cmd_buffer_add_secondary(struct anv_cmd_buffer *primary, .Address = secondary->return_addr) + (GFX8_MI_STORE_DATA_IMM_ImmediateData_start / 8); - emit_batch_buffer_start(primary, first_bbo->bo, 0); + emit_batch_buffer_start(&primary->batch, first_bbo->bo, 0); *write_return_addr = anv_address_physical(anv_batch_address(&primary->batch, diff --git a/src/intel/vulkan/anv_cmd_buffer.c b/src/intel/vulkan/anv_cmd_buffer.c index 7b3dc75d900..77df27c2f03 100644 --- a/src/intel/vulkan/anv_cmd_buffer.c +++ b/src/intel/vulkan/anv_cmd_buffer.c @@ -103,6 +103,7 @@ anv_create_cmd_buffer(struct vk_command_pool *pool, &cmd_buffer->state.gfx.sample_locations; cmd_buffer->batch.status = VK_SUCCESS; + cmd_buffer->generation_batch.status = VK_SUCCESS; cmd_buffer->device = device; @@ -128,6 +129,8 @@ anv_create_cmd_buffer(struct vk_command_pool *pool, cmd_buffer->self_mod_locations = NULL; + cmd_buffer->generation_return_addr = ANV_NULL_ADDRESS; + anv_cmd_state_init(cmd_buffer); anv_measure_init(cmd_buffer); diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 889393c0d80..99afceed172 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -2681,6 +2681,7 @@ struct anv_cmd_buffer { struct anv_device * device; struct anv_queue_family * queue_family; + /** Batch where the main commands live */ struct anv_batch batch; /* Pointer to the location in the batch where MI_BATCH_BUFFER_END was @@ -2753,6 +2754,28 @@ struct anv_cmd_buffer { */ uint32_t total_batch_size; + /** Batch generating part of the anv_cmd_buffer::batch */ + struct anv_batch generation_batch; + + /** + * Location in anv_cmd_buffer::batch at which we left some space to insert + * a MI_BATCH_BUFFER_START into the generation_batch if needed. + */ + struct anv_address generation_jump_addr; + + /** + * Location in anv_cmd_buffer::batch at which the generation batch should + * jump back to. + */ + struct anv_address generation_return_addr; + + /** List of anv_batch_bo used for generation + * + * We have to keep this separated of the anv_cmd_buffer::batch_bos that is + * used for a chaining optimization. + */ + struct list_head generation_batch_bos; + /** * A vector of anv_bo pointers for chunks of memory used by the command * buffer that are too large to be allocated through dynamic_state_stream.