From e2dc32d755b15697765719acec1d23f12f9c938c Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Sun, 20 Nov 2022 01:10:15 +0200 Subject: [PATCH] anv: move functions around to plan for generated draws Signed-off-by: Lionel Landwerlin Reviewed-by: Ian Romanick Part-of: --- src/intel/vulkan/genX_cmd_buffer.c | 963 ++++++++++++++--------------- 1 file changed, 471 insertions(+), 492 deletions(-) diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index 6cb34b792c9..5e5c9989b86 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -1442,332 +1442,6 @@ anv_cmd_buffer_reset_rendering(struct anv_cmd_buffer *cmd_buffer) gfx->null_surface_state = ANV_STATE_NULL; } -VkResult -genX(BeginCommandBuffer)( - VkCommandBuffer commandBuffer, - const VkCommandBufferBeginInfo* pBeginInfo) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - VkResult result; - - /* If this is the first vkBeginCommandBuffer, we must *initialize* the - * command buffer's state. Otherwise, we must *reset* its state. In both - * cases we reset it. - * - * From the Vulkan 1.0 spec: - * - * If a command buffer is in the executable state and the command buffer - * was allocated from a command pool with the - * VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT flag set, then - * vkBeginCommandBuffer implicitly resets the command buffer, behaving - * as if vkResetCommandBuffer had been called with - * VK_COMMAND_BUFFER_RESET_RELEASE_RESOURCES_BIT not set. It then puts - * the command buffer in the recording state. - */ - anv_cmd_buffer_reset(&cmd_buffer->vk, 0); - anv_cmd_buffer_reset_rendering(cmd_buffer); - - cmd_buffer->usage_flags = pBeginInfo->flags; - - /* VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT must be ignored for - * primary level command buffers. - * - * From the Vulkan 1.0 spec: - * - * VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT specifies that a - * secondary command buffer is considered to be entirely inside a render - * pass. If this is a primary command buffer, then this bit is ignored. - */ - if (cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) - cmd_buffer->usage_flags &= ~VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT; - - trace_intel_begin_cmd_buffer(&cmd_buffer->trace); - - genX(cmd_buffer_emit_state_base_address)(cmd_buffer); - - /* We sometimes store vertex data in the dynamic state buffer for blorp - * operations and our dynamic state stream may re-use data from previous - * command buffers. In order to prevent stale cache data, we flush the VF - * cache. We could do this on every blorp call but that's not really - * needed as all of the data will get written by the CPU prior to the GPU - * executing anything. The chances are fairly high that they will use - * blorp at least once per primary command buffer so it shouldn't be - * wasted. - * - * There is also a workaround on gfx8 which requires us to invalidate the - * VF cache occasionally. It's easier if we can assume we start with a - * fresh cache (See also genX(cmd_buffer_set_binding_for_gfx8_vb_flush).) - */ - anv_add_pending_pipe_bits(cmd_buffer, - ANV_PIPE_VF_CACHE_INVALIDATE_BIT, - "new cmd buffer"); - - /* Re-emit the aux table register in every command buffer. This way we're - * ensured that we have the table even if this command buffer doesn't - * initialize any images. - */ - if (cmd_buffer->device->info->has_aux_map) { - anv_add_pending_pipe_bits(cmd_buffer, - ANV_PIPE_AUX_TABLE_INVALIDATE_BIT, - "new cmd buffer with aux-tt"); - } - - /* We send an "Indirect State Pointers Disable" packet at - * EndCommandBuffer, so all push constant packets are ignored during a - * context restore. Documentation says after that command, we need to - * emit push constants again before any rendering operation. So we - * flag them dirty here to make sure they get emitted. - */ - cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_ALL_GRAPHICS; - - if (cmd_buffer->usage_flags & - VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT) { - struct anv_cmd_graphics_state *gfx = &cmd_buffer->state.gfx; - - char gcbiar_data[VK_GCBIARR_DATA_SIZE(MAX_RTS)]; - const VkRenderingInfo *resume_info = - vk_get_command_buffer_inheritance_as_rendering_resume(cmd_buffer->vk.level, - pBeginInfo, - gcbiar_data); - if (resume_info != NULL) { - genX(CmdBeginRendering)(commandBuffer, resume_info); - } else { - const VkCommandBufferInheritanceRenderingInfo *inheritance_info = - vk_get_command_buffer_inheritance_rendering_info(cmd_buffer->vk.level, - pBeginInfo); - assert(inheritance_info); - - gfx->rendering_flags = inheritance_info->flags; - gfx->render_area = (VkRect2D) { }; - gfx->layer_count = 0; - gfx->samples = inheritance_info->rasterizationSamples; - gfx->view_mask = inheritance_info->viewMask; - - uint32_t color_att_count = inheritance_info->colorAttachmentCount; - result = anv_cmd_buffer_init_attachments(cmd_buffer, color_att_count); - if (result != VK_SUCCESS) - return result; - - for (uint32_t i = 0; i < color_att_count; i++) { - gfx->color_att[i].vk_format = - inheritance_info->pColorAttachmentFormats[i]; - } - gfx->depth_att.vk_format = - inheritance_info->depthAttachmentFormat; - gfx->stencil_att.vk_format = - inheritance_info->stencilAttachmentFormat; - - cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_RENDER_TARGETS; - } - } - - /* Emit the sample pattern at the beginning of the batch because the - * default locations emitted at the device initialization might have been - * changed by a previous command buffer. - * - * Do not change that when we're continuing a previous renderpass. - */ - if (cmd_buffer->device->vk.enabled_extensions.EXT_sample_locations && - !(cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT)) - genX(emit_sample_pattern)(&cmd_buffer->batch, NULL); - - if (cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_SECONDARY) { - const VkCommandBufferInheritanceConditionalRenderingInfoEXT *conditional_rendering_info = - vk_find_struct_const(pBeginInfo->pInheritanceInfo->pNext, COMMAND_BUFFER_INHERITANCE_CONDITIONAL_RENDERING_INFO_EXT); - - /* If secondary buffer supports conditional rendering - * we should emit commands as if conditional rendering is enabled. - */ - cmd_buffer->state.conditional_render_enabled = - conditional_rendering_info && conditional_rendering_info->conditionalRenderingEnable; - } - - return VK_SUCCESS; -} - -/* From the PRM, Volume 2a: - * - * "Indirect State Pointers Disable - * - * At the completion of the post-sync operation associated with this pipe - * control packet, the indirect state pointers in the hardware are - * considered invalid; the indirect pointers are not saved in the context. - * If any new indirect state commands are executed in the command stream - * while the pipe control is pending, the new indirect state commands are - * preserved. - * - * [DevIVB+]: Using Invalidate State Pointer (ISP) only inhibits context - * restoring of Push Constant (3DSTATE_CONSTANT_*) commands. Push Constant - * commands are only considered as Indirect State Pointers. Once ISP is - * issued in a context, SW must initialize by programming push constant - * commands for all the shaders (at least to zero length) before attempting - * any rendering operation for the same context." - * - * 3DSTATE_CONSTANT_* packets are restored during a context restore, - * even though they point to a BO that has been already unreferenced at - * the end of the previous batch buffer. This has been fine so far since - * we are protected by these scratch page (every address not covered by - * a BO should be pointing to the scratch page). But on CNL, it is - * causing a GPU hang during context restore at the 3DSTATE_CONSTANT_* - * instruction. - * - * The flag "Indirect State Pointers Disable" in PIPE_CONTROL tells the - * hardware to ignore previous 3DSTATE_CONSTANT_* packets during a - * context restore, so the mentioned hang doesn't happen. However, - * software must program push constant commands for all stages prior to - * rendering anything. So we flag them dirty in BeginCommandBuffer. - * - * Finally, we also make sure to stall at pixel scoreboard to make sure the - * constants have been loaded into the EUs prior to disable the push constants - * so that it doesn't hang a previous 3DPRIMITIVE. - */ -static void -emit_isp_disable(struct anv_cmd_buffer *cmd_buffer) -{ - anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) { - pc.StallAtPixelScoreboard = true; - pc.CommandStreamerStallEnable = true; - anv_debug_dump_pc(pc); - } - anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) { - pc.IndirectStatePointersDisable = true; - pc.CommandStreamerStallEnable = true; - anv_debug_dump_pc(pc); - } -} - -VkResult -genX(EndCommandBuffer)( - VkCommandBuffer commandBuffer) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - - if (anv_batch_has_error(&cmd_buffer->batch)) - return cmd_buffer->batch.status; - - anv_measure_endcommandbuffer(cmd_buffer); - - /* We want every command buffer to start with the PMA fix in a known state, - * so we disable it at the end of the command buffer. - */ - genX(cmd_buffer_enable_pma_fix)(cmd_buffer, false); - - genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer); - - emit_isp_disable(cmd_buffer); - - trace_intel_end_cmd_buffer(&cmd_buffer->trace, cmd_buffer->vk.level); - - anv_cmd_buffer_end_batch_buffer(cmd_buffer); - - return VK_SUCCESS; -} - -void -genX(CmdExecuteCommands)( - VkCommandBuffer commandBuffer, - uint32_t commandBufferCount, - const VkCommandBuffer* pCmdBuffers) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, primary, commandBuffer); - - assert(primary->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY); - - if (anv_batch_has_error(&primary->batch)) - return; - - /* The secondary command buffers will assume that the PMA fix is disabled - * when they begin executing. Make sure this is true. - */ - genX(cmd_buffer_enable_pma_fix)(primary, false); - - /* The secondary command buffer doesn't know which textures etc. have been - * flushed prior to their execution. Apply those flushes now. - */ - genX(cmd_buffer_apply_pipe_flushes)(primary); - - for (uint32_t i = 0; i < commandBufferCount; i++) { - ANV_FROM_HANDLE(anv_cmd_buffer, secondary, pCmdBuffers[i]); - - assert(secondary->vk.level == VK_COMMAND_BUFFER_LEVEL_SECONDARY); - assert(!anv_batch_has_error(&secondary->batch)); - - if (secondary->state.conditional_render_enabled) { - if (!primary->state.conditional_render_enabled) { - /* Secondary buffer is constructed as if it will be executed - * with conditional rendering, we should satisfy this dependency - * regardless of conditional rendering being enabled in primary. - */ - struct mi_builder b; - mi_builder_init(&b, primary->device->info, &primary->batch); - mi_store(&b, mi_reg64(ANV_PREDICATE_RESULT_REG), - mi_imm(UINT64_MAX)); - } - } - - if (secondary->usage_flags & - VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT) { - /* If we're continuing a render pass from the primary, we need to - * copy the surface states for the current subpass into the storage - * we allocated for them in BeginCommandBuffer. - */ - struct anv_state src_state = primary->state.gfx.att_states; - struct anv_state dst_state = secondary->state.gfx.att_states; - assert(src_state.alloc_size == dst_state.alloc_size); - - genX(cmd_buffer_so_memcpy)( - primary, - anv_state_pool_state_address(&primary->device->internal_surface_state_pool, - dst_state), - anv_state_pool_state_address(&primary->device->internal_surface_state_pool, - src_state), - src_state.alloc_size); - } - - anv_cmd_buffer_add_secondary(primary, secondary); - - assert(secondary->perf_query_pool == NULL || primary->perf_query_pool == NULL || - secondary->perf_query_pool == primary->perf_query_pool); - if (secondary->perf_query_pool) - primary->perf_query_pool = secondary->perf_query_pool; - -#if GFX_VERx10 == 120 - if (secondary->state.depth_reg_mode != ANV_DEPTH_REG_MODE_UNKNOWN) - primary->state.depth_reg_mode = secondary->state.depth_reg_mode; -#endif - } - - /* The secondary isn't counted in our VF cache tracking so we need to - * invalidate the whole thing. - */ - if (GFX_VER == 9) { - anv_add_pending_pipe_bits(primary, - ANV_PIPE_CS_STALL_BIT | ANV_PIPE_VF_CACHE_INVALIDATE_BIT, - "Secondary cmd buffer not tracked in VF cache"); - } - - /* The secondary may have selected a different pipeline (3D or compute) and - * may have changed the current L3$ configuration. Reset our tracking - * variables to invalid values to ensure that we re-emit these in the case - * where we do any draws or compute dispatches from the primary after the - * secondary has returned. - */ - primary->state.current_pipeline = UINT32_MAX; - primary->state.current_l3_config = NULL; - primary->state.current_hash_scale = 0; - primary->state.gfx.push_constant_stages = 0; - vk_dynamic_graphics_state_dirty_all(&primary->vk.dynamic_graphics_state); - - /* Each of the secondary command buffers will use its own state base - * address. We need to re-emit state base address for the primary after - * all of the secondaries are done. - * - * TODO: Maybe we want to make this a dirty bit to avoid extra state base - * address calls? - */ - genX(cmd_buffer_emit_state_base_address)(primary); -} - /** * Program the hardware to use the specified L3 configuration. */ @@ -2222,98 +1896,6 @@ genX(cmd_buffer_apply_pipe_flushes)(struct anv_cmd_buffer *cmd_buffer) } } -static void -cmd_buffer_barrier(struct anv_cmd_buffer *cmd_buffer, - const VkDependencyInfo *dep_info, - const char *reason) -{ - /* XXX: Right now, we're really dumb and just flush whatever categories - * the app asks for. One of these days we may make this a bit better - * but right now that's all the hardware allows for in most areas. - */ - VkAccessFlags2 src_flags = 0; - VkAccessFlags2 dst_flags = 0; - - for (uint32_t i = 0; i < dep_info->memoryBarrierCount; i++) { - src_flags |= dep_info->pMemoryBarriers[i].srcAccessMask; - dst_flags |= dep_info->pMemoryBarriers[i].dstAccessMask; - } - - for (uint32_t i = 0; i < dep_info->bufferMemoryBarrierCount; i++) { - src_flags |= dep_info->pBufferMemoryBarriers[i].srcAccessMask; - dst_flags |= dep_info->pBufferMemoryBarriers[i].dstAccessMask; - } - - for (uint32_t i = 0; i < dep_info->imageMemoryBarrierCount; i++) { - const VkImageMemoryBarrier2 *img_barrier = - &dep_info->pImageMemoryBarriers[i]; - - src_flags |= img_barrier->srcAccessMask; - dst_flags |= img_barrier->dstAccessMask; - - ANV_FROM_HANDLE(anv_image, image, img_barrier->image); - const VkImageSubresourceRange *range = &img_barrier->subresourceRange; - - uint32_t base_layer, layer_count; - if (image->vk.image_type == VK_IMAGE_TYPE_3D) { - base_layer = 0; - layer_count = anv_minify(image->vk.extent.depth, range->baseMipLevel); - } else { - base_layer = range->baseArrayLayer; - layer_count = vk_image_subresource_layer_count(&image->vk, range); - } - const uint32_t level_count = - vk_image_subresource_level_count(&image->vk, range); - - if (range->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) { - transition_depth_buffer(cmd_buffer, image, - base_layer, layer_count, - img_barrier->oldLayout, - img_barrier->newLayout, - false /* will_full_fast_clear */); - } - - if (range->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) { - transition_stencil_buffer(cmd_buffer, image, - range->baseMipLevel, level_count, - base_layer, layer_count, - img_barrier->oldLayout, - img_barrier->newLayout, - false /* will_full_fast_clear */); - } - - if (range->aspectMask & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) { - VkImageAspectFlags color_aspects = - vk_image_expand_aspect_mask(&image->vk, range->aspectMask); - anv_foreach_image_aspect_bit(aspect_bit, image, color_aspects) { - transition_color_buffer(cmd_buffer, image, 1UL << aspect_bit, - range->baseMipLevel, level_count, - base_layer, layer_count, - img_barrier->oldLayout, - img_barrier->newLayout, - img_barrier->srcQueueFamilyIndex, - img_barrier->dstQueueFamilyIndex, - false /* will_full_fast_clear */); - } - } - } - - enum anv_pipe_bits bits = - anv_pipe_flush_bits_for_access_flags(cmd_buffer->device, src_flags) | - anv_pipe_invalidate_bits_for_access_flags(cmd_buffer->device, dst_flags); - - anv_add_pending_pipe_bits(cmd_buffer, bits, reason); -} - -void genX(CmdPipelineBarrier2)( - VkCommandBuffer commandBuffer, - const VkDependencyInfo* pDependencyInfo) -{ - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - - cmd_buffer_barrier(cmd_buffer, pDependencyInfo, "pipe barrier"); -} - static void cmd_buffer_alloc_push_constants(struct anv_cmd_buffer *cmd_buffer) { @@ -3828,6 +3410,423 @@ genX(cmd_buffer_flush_gfx_state)(struct anv_cmd_buffer *cmd_buffer) genX(cmd_buffer_flush_dynamic_state)(cmd_buffer); } +VkResult +genX(BeginCommandBuffer)( + VkCommandBuffer commandBuffer, + const VkCommandBufferBeginInfo* pBeginInfo) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + VkResult result; + + /* If this is the first vkBeginCommandBuffer, we must *initialize* the + * command buffer's state. Otherwise, we must *reset* its state. In both + * cases we reset it. + * + * From the Vulkan 1.0 spec: + * + * If a command buffer is in the executable state and the command buffer + * was allocated from a command pool with the + * VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT flag set, then + * vkBeginCommandBuffer implicitly resets the command buffer, behaving + * as if vkResetCommandBuffer had been called with + * VK_COMMAND_BUFFER_RESET_RELEASE_RESOURCES_BIT not set. It then puts + * the command buffer in the recording state. + */ + anv_cmd_buffer_reset(&cmd_buffer->vk, 0); + anv_cmd_buffer_reset_rendering(cmd_buffer); + + cmd_buffer->usage_flags = pBeginInfo->flags; + + /* VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT must be ignored for + * primary level command buffers. + * + * From the Vulkan 1.0 spec: + * + * VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT specifies that a + * secondary command buffer is considered to be entirely inside a render + * pass. If this is a primary command buffer, then this bit is ignored. + */ + if (cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) + cmd_buffer->usage_flags &= ~VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT; + + trace_intel_begin_cmd_buffer(&cmd_buffer->trace); + + genX(cmd_buffer_emit_state_base_address)(cmd_buffer); + + /* We sometimes store vertex data in the dynamic state buffer for blorp + * operations and our dynamic state stream may re-use data from previous + * command buffers. In order to prevent stale cache data, we flush the VF + * cache. We could do this on every blorp call but that's not really + * needed as all of the data will get written by the CPU prior to the GPU + * executing anything. The chances are fairly high that they will use + * blorp at least once per primary command buffer so it shouldn't be + * wasted. + * + * There is also a workaround on gfx8 which requires us to invalidate the + * VF cache occasionally. It's easier if we can assume we start with a + * fresh cache (See also genX(cmd_buffer_set_binding_for_gfx8_vb_flush).) + */ + anv_add_pending_pipe_bits(cmd_buffer, + ANV_PIPE_VF_CACHE_INVALIDATE_BIT, + "new cmd buffer"); + + /* Re-emit the aux table register in every command buffer. This way we're + * ensured that we have the table even if this command buffer doesn't + * initialize any images. + */ + if (cmd_buffer->device->info->has_aux_map) { + anv_add_pending_pipe_bits(cmd_buffer, + ANV_PIPE_AUX_TABLE_INVALIDATE_BIT, + "new cmd buffer with aux-tt"); + } + + /* We send an "Indirect State Pointers Disable" packet at + * EndCommandBuffer, so all push constant packets are ignored during a + * context restore. Documentation says after that command, we need to + * emit push constants again before any rendering operation. So we + * flag them dirty here to make sure they get emitted. + */ + cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_ALL_GRAPHICS; + + if (cmd_buffer->usage_flags & + VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT) { + struct anv_cmd_graphics_state *gfx = &cmd_buffer->state.gfx; + + char gcbiar_data[VK_GCBIARR_DATA_SIZE(MAX_RTS)]; + const VkRenderingInfo *resume_info = + vk_get_command_buffer_inheritance_as_rendering_resume(cmd_buffer->vk.level, + pBeginInfo, + gcbiar_data); + if (resume_info != NULL) { + genX(CmdBeginRendering)(commandBuffer, resume_info); + } else { + const VkCommandBufferInheritanceRenderingInfo *inheritance_info = + vk_get_command_buffer_inheritance_rendering_info(cmd_buffer->vk.level, + pBeginInfo); + assert(inheritance_info); + + gfx->rendering_flags = inheritance_info->flags; + gfx->render_area = (VkRect2D) { }; + gfx->layer_count = 0; + gfx->samples = inheritance_info->rasterizationSamples; + gfx->view_mask = inheritance_info->viewMask; + + uint32_t color_att_count = inheritance_info->colorAttachmentCount; + result = anv_cmd_buffer_init_attachments(cmd_buffer, color_att_count); + if (result != VK_SUCCESS) + return result; + + for (uint32_t i = 0; i < color_att_count; i++) { + gfx->color_att[i].vk_format = + inheritance_info->pColorAttachmentFormats[i]; + } + gfx->depth_att.vk_format = + inheritance_info->depthAttachmentFormat; + gfx->stencil_att.vk_format = + inheritance_info->stencilAttachmentFormat; + + cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_RENDER_TARGETS; + } + } + + /* Emit the sample pattern at the beginning of the batch because the + * default locations emitted at the device initialization might have been + * changed by a previous command buffer. + * + * Do not change that when we're continuing a previous renderpass. + */ + if (cmd_buffer->device->vk.enabled_extensions.EXT_sample_locations && + !(cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT)) + genX(emit_sample_pattern)(&cmd_buffer->batch, NULL); + + if (cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_SECONDARY) { + const VkCommandBufferInheritanceConditionalRenderingInfoEXT *conditional_rendering_info = + vk_find_struct_const(pBeginInfo->pInheritanceInfo->pNext, COMMAND_BUFFER_INHERITANCE_CONDITIONAL_RENDERING_INFO_EXT); + + /* If secondary buffer supports conditional rendering + * we should emit commands as if conditional rendering is enabled. + */ + cmd_buffer->state.conditional_render_enabled = + conditional_rendering_info && conditional_rendering_info->conditionalRenderingEnable; + } + + return VK_SUCCESS; +} + +/* From the PRM, Volume 2a: + * + * "Indirect State Pointers Disable + * + * At the completion of the post-sync operation associated with this pipe + * control packet, the indirect state pointers in the hardware are + * considered invalid; the indirect pointers are not saved in the context. + * If any new indirect state commands are executed in the command stream + * while the pipe control is pending, the new indirect state commands are + * preserved. + * + * [DevIVB+]: Using Invalidate State Pointer (ISP) only inhibits context + * restoring of Push Constant (3DSTATE_CONSTANT_*) commands. Push Constant + * commands are only considered as Indirect State Pointers. Once ISP is + * issued in a context, SW must initialize by programming push constant + * commands for all the shaders (at least to zero length) before attempting + * any rendering operation for the same context." + * + * 3DSTATE_CONSTANT_* packets are restored during a context restore, + * even though they point to a BO that has been already unreferenced at + * the end of the previous batch buffer. This has been fine so far since + * we are protected by these scratch page (every address not covered by + * a BO should be pointing to the scratch page). But on CNL, it is + * causing a GPU hang during context restore at the 3DSTATE_CONSTANT_* + * instruction. + * + * The flag "Indirect State Pointers Disable" in PIPE_CONTROL tells the + * hardware to ignore previous 3DSTATE_CONSTANT_* packets during a + * context restore, so the mentioned hang doesn't happen. However, + * software must program push constant commands for all stages prior to + * rendering anything. So we flag them dirty in BeginCommandBuffer. + * + * Finally, we also make sure to stall at pixel scoreboard to make sure the + * constants have been loaded into the EUs prior to disable the push constants + * so that it doesn't hang a previous 3DPRIMITIVE. + */ +static void +emit_isp_disable(struct anv_cmd_buffer *cmd_buffer) +{ + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) { + pc.StallAtPixelScoreboard = true; + pc.CommandStreamerStallEnable = true; + anv_debug_dump_pc(pc); + } + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) { + pc.IndirectStatePointersDisable = true; + pc.CommandStreamerStallEnable = true; + anv_debug_dump_pc(pc); + } +} + +VkResult +genX(EndCommandBuffer)( + VkCommandBuffer commandBuffer) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + + if (anv_batch_has_error(&cmd_buffer->batch)) + return cmd_buffer->batch.status; + + anv_measure_endcommandbuffer(cmd_buffer); + + /* We want every command buffer to start with the PMA fix in a known state, + * so we disable it at the end of the command buffer. + */ + genX(cmd_buffer_enable_pma_fix)(cmd_buffer, false); + + genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer); + + emit_isp_disable(cmd_buffer); + + trace_intel_end_cmd_buffer(&cmd_buffer->trace, cmd_buffer->vk.level); + + anv_cmd_buffer_end_batch_buffer(cmd_buffer); + + return VK_SUCCESS; +} + +void +genX(CmdExecuteCommands)( + VkCommandBuffer commandBuffer, + uint32_t commandBufferCount, + const VkCommandBuffer* pCmdBuffers) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, primary, commandBuffer); + + assert(primary->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY); + + if (anv_batch_has_error(&primary->batch)) + return; + + /* The secondary command buffers will assume that the PMA fix is disabled + * when they begin executing. Make sure this is true. + */ + genX(cmd_buffer_enable_pma_fix)(primary, false); + + /* The secondary command buffer doesn't know which textures etc. have been + * flushed prior to their execution. Apply those flushes now. + */ + genX(cmd_buffer_apply_pipe_flushes)(primary); + + for (uint32_t i = 0; i < commandBufferCount; i++) { + ANV_FROM_HANDLE(anv_cmd_buffer, secondary, pCmdBuffers[i]); + + assert(secondary->vk.level == VK_COMMAND_BUFFER_LEVEL_SECONDARY); + assert(!anv_batch_has_error(&secondary->batch)); + + if (secondary->state.conditional_render_enabled) { + if (!primary->state.conditional_render_enabled) { + /* Secondary buffer is constructed as if it will be executed + * with conditional rendering, we should satisfy this dependency + * regardless of conditional rendering being enabled in primary. + */ + struct mi_builder b; + mi_builder_init(&b, primary->device->info, &primary->batch); + mi_store(&b, mi_reg64(ANV_PREDICATE_RESULT_REG), + mi_imm(UINT64_MAX)); + } + } + + if (secondary->usage_flags & + VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT) { + /* If we're continuing a render pass from the primary, we need to + * copy the surface states for the current subpass into the storage + * we allocated for them in BeginCommandBuffer. + */ + struct anv_state src_state = primary->state.gfx.att_states; + struct anv_state dst_state = secondary->state.gfx.att_states; + assert(src_state.alloc_size == dst_state.alloc_size); + + genX(cmd_buffer_so_memcpy)( + primary, + anv_state_pool_state_address(&primary->device->internal_surface_state_pool, + dst_state), + anv_state_pool_state_address(&primary->device->internal_surface_state_pool, + src_state), + src_state.alloc_size); + } + + anv_cmd_buffer_add_secondary(primary, secondary); + + assert(secondary->perf_query_pool == NULL || primary->perf_query_pool == NULL || + secondary->perf_query_pool == primary->perf_query_pool); + if (secondary->perf_query_pool) + primary->perf_query_pool = secondary->perf_query_pool; + +#if GFX_VERx10 == 120 + if (secondary->state.depth_reg_mode != ANV_DEPTH_REG_MODE_UNKNOWN) + primary->state.depth_reg_mode = secondary->state.depth_reg_mode; +#endif + } + + /* The secondary isn't counted in our VF cache tracking so we need to + * invalidate the whole thing. + */ + if (GFX_VER == 9) { + anv_add_pending_pipe_bits(primary, + ANV_PIPE_CS_STALL_BIT | ANV_PIPE_VF_CACHE_INVALIDATE_BIT, + "Secondary cmd buffer not tracked in VF cache"); + } + + /* The secondary may have selected a different pipeline (3D or compute) and + * may have changed the current L3$ configuration. Reset our tracking + * variables to invalid values to ensure that we re-emit these in the case + * where we do any draws or compute dispatches from the primary after the + * secondary has returned. + */ + primary->state.current_pipeline = UINT32_MAX; + primary->state.current_l3_config = NULL; + primary->state.current_hash_scale = 0; + primary->state.gfx.push_constant_stages = 0; + vk_dynamic_graphics_state_dirty_all(&primary->vk.dynamic_graphics_state); + + /* Each of the secondary command buffers will use its own state base + * address. We need to re-emit state base address for the primary after + * all of the secondaries are done. + * + * TODO: Maybe we want to make this a dirty bit to avoid extra state base + * address calls? + */ + genX(cmd_buffer_emit_state_base_address)(primary); +} + +static void +cmd_buffer_barrier(struct anv_cmd_buffer *cmd_buffer, + const VkDependencyInfo *dep_info, + const char *reason) +{ + /* XXX: Right now, we're really dumb and just flush whatever categories + * the app asks for. One of these days we may make this a bit better + * but right now that's all the hardware allows for in most areas. + */ + VkAccessFlags2 src_flags = 0; + VkAccessFlags2 dst_flags = 0; + + for (uint32_t i = 0; i < dep_info->memoryBarrierCount; i++) { + src_flags |= dep_info->pMemoryBarriers[i].srcAccessMask; + dst_flags |= dep_info->pMemoryBarriers[i].dstAccessMask; + } + + for (uint32_t i = 0; i < dep_info->bufferMemoryBarrierCount; i++) { + src_flags |= dep_info->pBufferMemoryBarriers[i].srcAccessMask; + dst_flags |= dep_info->pBufferMemoryBarriers[i].dstAccessMask; + } + + for (uint32_t i = 0; i < dep_info->imageMemoryBarrierCount; i++) { + const VkImageMemoryBarrier2 *img_barrier = + &dep_info->pImageMemoryBarriers[i]; + + src_flags |= img_barrier->srcAccessMask; + dst_flags |= img_barrier->dstAccessMask; + + ANV_FROM_HANDLE(anv_image, image, img_barrier->image); + const VkImageSubresourceRange *range = &img_barrier->subresourceRange; + + uint32_t base_layer, layer_count; + if (image->vk.image_type == VK_IMAGE_TYPE_3D) { + base_layer = 0; + layer_count = anv_minify(image->vk.extent.depth, range->baseMipLevel); + } else { + base_layer = range->baseArrayLayer; + layer_count = vk_image_subresource_layer_count(&image->vk, range); + } + const uint32_t level_count = + vk_image_subresource_level_count(&image->vk, range); + + if (range->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) { + transition_depth_buffer(cmd_buffer, image, + base_layer, layer_count, + img_barrier->oldLayout, + img_barrier->newLayout, + false /* will_full_fast_clear */); + } + + if (range->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) { + transition_stencil_buffer(cmd_buffer, image, + range->baseMipLevel, level_count, + base_layer, layer_count, + img_barrier->oldLayout, + img_barrier->newLayout, + false /* will_full_fast_clear */); + } + + if (range->aspectMask & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) { + VkImageAspectFlags color_aspects = + vk_image_expand_aspect_mask(&image->vk, range->aspectMask); + anv_foreach_image_aspect_bit(aspect_bit, image, color_aspects) { + transition_color_buffer(cmd_buffer, image, 1UL << aspect_bit, + range->baseMipLevel, level_count, + base_layer, layer_count, + img_barrier->oldLayout, + img_barrier->newLayout, + img_barrier->srcQueueFamilyIndex, + img_barrier->dstQueueFamilyIndex, + false /* will_full_fast_clear */); + } + } + } + + enum anv_pipe_bits bits = + anv_pipe_flush_bits_for_access_flags(cmd_buffer->device, src_flags) | + anv_pipe_invalidate_bits_for_access_flags(cmd_buffer->device, dst_flags); + + anv_add_pending_pipe_bits(cmd_buffer, bits, reason); +} + +void genX(CmdPipelineBarrier2)( + VkCommandBuffer commandBuffer, + const VkDependencyInfo* pDependencyInfo) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + + cmd_buffer_barrier(cmd_buffer, pDependencyInfo, "pipe barrier"); +} #if GFX_VER >= 11 #define _3DPRIMITIVE_DIRECT GENX(3DPRIMITIVE_EXTENDED) @@ -4348,51 +4347,53 @@ load_indirect_parameters(struct anv_cmd_buffer *cmd_buffer, #endif } -void genX(CmdDrawIndirect)( - VkCommandBuffer commandBuffer, - VkBuffer _buffer, - VkDeviceSize offset, - uint32_t drawCount, - uint32_t stride) +static void +emit_indirect_draws(struct anv_cmd_buffer *cmd_buffer, + struct anv_address indirect_data_addr, + uint32_t indirect_data_stride, + uint32_t draw_count, + bool indexed) { - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); #if GFX_VER < 11 struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline; const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline); #endif - if (anv_batch_has_error(&cmd_buffer->batch)) - return; - - anv_measure_snapshot(cmd_buffer, - INTEL_SNAPSHOT_DRAW, - "draw indirect", - drawCount); - trace_intel_begin_draw_indirect(&cmd_buffer->trace); - genX(cmd_buffer_flush_gfx_state)(cmd_buffer); if (cmd_buffer->state.conditional_render_enabled) genX(cmd_emit_conditional_render_predicate)(cmd_buffer); - for (uint32_t i = 0; i < drawCount; i++) { - struct anv_address draw = anv_address_add(buffer->address, offset); + uint32_t offset = 0; + for (uint32_t i = 0; i < draw_count; i++) { + struct anv_address draw = anv_address_add(indirect_data_addr, offset); #if GFX_VER < 11 + /* TODO: We need to stomp base vertex to 0 somehow */ + + /* With sequential draws, we're dealing with the VkDrawIndirectCommand + * structure data. We want to load VkDrawIndirectCommand::firstVertex at + * offset 8 in the structure. + * + * With indexed draws, we're dealing with VkDrawIndexedIndirectCommand. + * We want the VkDrawIndirectCommand::vertexOffset field at offset 12 in + * the structure. + */ if (vs_prog_data->uses_firstvertex || - vs_prog_data->uses_baseinstance) - emit_base_vertex_instance_bo(cmd_buffer, anv_address_add(draw, 8)); + vs_prog_data->uses_baseinstance) { + emit_base_vertex_instance_bo(cmd_buffer, + anv_address_add(draw, indexed ? 12 : 8)); + } if (vs_prog_data->uses_drawid) emit_draw_index(cmd_buffer, i); -# endif +#endif /* Emitting draw index or vertex index BOs may result in needing * additional VF cache flushes. */ genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer); - load_indirect_parameters(cmd_buffer, draw, false, i); + load_indirect_parameters(cmd_buffer, draw, indexed, i); anv_batch_emit(&cmd_buffer->batch, #if GFX_VER < 11 @@ -4403,7 +4404,7 @@ void genX(CmdDrawIndirect)( prim) { prim.IndirectParameterEnable = true; prim.PredicateEnable = cmd_buffer->state.conditional_render_enabled; - prim.VertexAccessType = SEQUENTIAL; + prim.VertexAccessType = indexed ? RANDOM : SEQUENTIAL; #if GFX_VER >= 11 prim.ExtendedParametersPresent = true; #endif @@ -4415,8 +4416,32 @@ void genX(CmdDrawIndirect)( update_dirty_vbs_for_gfx8_vb_flush(cmd_buffer, SEQUENTIAL); - offset += stride; + offset += indirect_data_stride; } +} + +void genX(CmdDrawIndirect)( + VkCommandBuffer commandBuffer, + VkBuffer _buffer, + VkDeviceSize offset, + uint32_t drawCount, + uint32_t stride) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); + + if (anv_batch_has_error(&cmd_buffer->batch)) + return; + + anv_measure_snapshot(cmd_buffer, + INTEL_SNAPSHOT_DRAW, + "draw indirect", + drawCount); + trace_intel_begin_draw_indirect(&cmd_buffer->trace); + + emit_indirect_draws(cmd_buffer, + anv_address_add(buffer->address, offset), + stride, drawCount, false /* indexed */); trace_intel_end_draw_indirect(&cmd_buffer->trace, drawCount); } @@ -4430,8 +4455,6 @@ void genX(CmdDrawIndexedIndirect)( { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); - UNUSED struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline; - UNUSED const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline); if (anv_batch_has_error(&cmd_buffer->batch)) return; @@ -4442,53 +4465,9 @@ void genX(CmdDrawIndexedIndirect)( drawCount); trace_intel_begin_draw_indexed_indirect(&cmd_buffer->trace); - genX(cmd_buffer_flush_gfx_state)(cmd_buffer); - - if (cmd_buffer->state.conditional_render_enabled) - genX(cmd_emit_conditional_render_predicate)(cmd_buffer); - - for (uint32_t i = 0; i < drawCount; i++) { - struct anv_address draw = anv_address_add(buffer->address, offset); - -#if GFX_VER < 11 - /* TODO: We need to stomp base vertex to 0 somehow */ - if (vs_prog_data->uses_firstvertex || - vs_prog_data->uses_baseinstance) - emit_base_vertex_instance_bo(cmd_buffer, anv_address_add(draw, 12)); - if (vs_prog_data->uses_drawid) - emit_draw_index(cmd_buffer, i); -#endif - - /* Emitting draw index or vertex index BOs may result in needing - * additional VF cache flushes. - */ - genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer); - - load_indirect_parameters(cmd_buffer, draw, true, i); - - anv_batch_emit(&cmd_buffer->batch, -#if GFX_VER < 11 - GENX(3DPRIMITIVE), -#else - GENX(3DPRIMITIVE_EXTENDED), -#endif - prim) { - prim.IndirectParameterEnable = true; - prim.PredicateEnable = cmd_buffer->state.conditional_render_enabled; - prim.VertexAccessType = RANDOM; -#if GFX_VER >= 11 - prim.ExtendedParametersPresent = true; -#endif - } - -#if GFX_VERx10 == 125 - genX(emit_dummy_post_sync_op)(cmd_buffer, 1); -#endif - - update_dirty_vbs_for_gfx8_vb_flush(cmd_buffer, RANDOM); - - offset += stride; - } + emit_indirect_draws(cmd_buffer, + anv_address_add(buffer->address, offset), + stride, drawCount, true /* indexed */); trace_intel_end_draw_indexed_indirect(&cmd_buffer->trace, drawCount); }