From b996fa8efaa42558bda2c52377561d7eb3c4127e Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Wed, 11 Nov 2020 21:38:25 +0200 Subject: [PATCH] anv: implement VK_KHR_synchronization2 v2: Use u_foreach_bit64() (Samuel) v3: Add missing handling of VkMemoryBarrier2KHR in pNext of VkSubpassDependency2KHR (Samuel) v4: Remove unused ANV_PIPELINE_STAGE_PIPELINED_BITS (Ivan) v5: fix missing anv_measure_submit() (Jason) constify anv_pipeline_stage_pipelined_bits (Jason) v6: Split flushes & invalidation emissions on vkCmdSetEvent2KHR()/vkCmdWaitEvents2KHR() (Jason) v7: Only apply flushes once on events (Jason) v8: Drop split flushes for this patch v9: Add comment about ignore some fields of VkMemoryBarrier2 in VkSubpassDependency2KHR (Jason) Drop spurious PIPE_CONTROL change s/,/;/ (Jason) v10: Fix build issue on Android (Lionel) Signed-off-by: Lionel Landwerlin Reviewed-by: Jason Ekstrand Part-of: --- docs/relnotes/new_features.txt | 1 + src/intel/vulkan/anv_android.c | 3 +- src/intel/vulkan/anv_device.c | 8 ++ src/intel/vulkan/anv_pass.c | 22 ++++- src/intel/vulkan/anv_private.h | 54 ++++++------ src/intel/vulkan/anv_queue.c | 35 +++----- src/intel/vulkan/genX_cmd_buffer.c | 128 +++++++++++++++-------------- src/intel/vulkan/genX_query.c | 12 +-- 8 files changed, 139 insertions(+), 124 deletions(-) diff --git a/docs/relnotes/new_features.txt b/docs/relnotes/new_features.txt index d7ed2865292..3f0d07e81af 100644 --- a/docs/relnotes/new_features.txt +++ b/docs/relnotes/new_features.txt @@ -15,3 +15,4 @@ VK_KHR_shader_float16_int8 on lavapipe VK_KHR_shader_subgroup_extended_types on lavapipe VK_KHR_spirv_1_4 on lavapipe Experimental raytracing support on RADV +VK_KHR_synchronization2 on Intel diff --git a/src/intel/vulkan/anv_android.c b/src/intel/vulkan/anv_android.c index b3b102d4202..c87bfb9cefd 100644 --- a/src/intel/vulkan/anv_android.c +++ b/src/intel/vulkan/anv_android.c @@ -34,6 +34,7 @@ #include #include "anv_private.h" +#include "vk_common_entrypoints.h" #include "vk_util.h" static int anv_hal_open(const struct hw_module_t* mod, const char* id, struct hw_device_t** dev); @@ -875,7 +876,7 @@ anv_QueueSignalReleaseImageANDROID( if (waitSemaphoreCount == 0) goto done; - result = anv_QueueSubmit(queue, 1, + result = vk_common_QueueSubmit(queue, 1, &(VkSubmitInfo) { .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, .waitSemaphoreCount = 1, diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index 8c334302bd1..08e9e05252a 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -230,6 +230,7 @@ get_device_extensions(const struct anv_physical_device *device, .KHR_swapchain = true, .KHR_swapchain_mutable_format = true, #endif + .KHR_synchronization2 = true, .KHR_timeline_semaphore = true, .KHR_uniform_buffer_standard_layout = true, .KHR_variable_pointers = true, @@ -1689,6 +1690,13 @@ void anv_GetPhysicalDeviceFeatures2( break; } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SYNCHRONIZATION_2_FEATURES_KHR: { + VkPhysicalDeviceSynchronization2FeaturesKHR *features = + (VkPhysicalDeviceSynchronization2FeaturesKHR *)ext; + features->synchronization2 = true; + break; + } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXEL_BUFFER_ALIGNMENT_FEATURES_EXT: { VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT *features = (VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT *)ext; diff --git a/src/intel/vulkan/anv_pass.c b/src/intel/vulkan/anv_pass.c index 634a3a3e24e..18c689ac864 100644 --- a/src/intel/vulkan/anv_pass.c +++ b/src/intel/vulkan/anv_pass.c @@ -31,22 +31,36 @@ anv_render_pass_add_subpass_dep(struct anv_device *device, struct anv_render_pass *pass, const VkSubpassDependency2KHR *dep) { + /* From the Vulkan 1.2.195 spec: + * + * "If an instance of VkMemoryBarrier2 is included in the pNext chain, + * srcStageMask, dstStageMask, srcAccessMask, and dstAccessMask + * parameters are ignored. The synchronization and access scopes instead + * are defined by the parameters of VkMemoryBarrier2." + */ + const VkMemoryBarrier2KHR *barrier = + vk_find_struct_const(dep->pNext, MEMORY_BARRIER_2_KHR); + VkAccessFlags2KHR src_access_mask = + barrier ? barrier->srcAccessMask : dep->srcAccessMask; + VkAccessFlags2KHR dst_access_mask = + barrier ? barrier->dstAccessMask : dep->dstAccessMask; + if (dep->dstSubpass == VK_SUBPASS_EXTERNAL) { pass->subpass_flushes[pass->subpass_count] |= - anv_pipe_invalidate_bits_for_access_flags(device, dep->dstAccessMask); + anv_pipe_invalidate_bits_for_access_flags(device, dst_access_mask); } else { assert(dep->dstSubpass < pass->subpass_count); pass->subpass_flushes[dep->dstSubpass] |= - anv_pipe_invalidate_bits_for_access_flags(device, dep->dstAccessMask); + anv_pipe_invalidate_bits_for_access_flags(device, dst_access_mask); } if (dep->srcSubpass == VK_SUBPASS_EXTERNAL) { pass->subpass_flushes[0] |= - anv_pipe_flush_bits_for_access_flags(device, dep->srcAccessMask); + anv_pipe_flush_bits_for_access_flags(device, src_access_mask); } else { assert(dep->srcSubpass < pass->subpass_count); pass->subpass_flushes[dep->srcSubpass + 1] |= - anv_pipe_flush_bits_for_access_flags(device, dep->srcAccessMask); + anv_pipe_flush_bits_for_access_flags(device, src_access_mask); } } diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 09567490f30..8b8711d7d7b 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -66,6 +66,7 @@ #include "vk_alloc.h" #include "vk_debug_report.h" #include "vk_device.h" +#include "vk_enum_defines.h" #include "vk_image.h" #include "vk_instance.h" #include "vk_physical_device.h" @@ -2538,34 +2539,35 @@ enum anv_pipe_bits { static inline enum anv_pipe_bits anv_pipe_flush_bits_for_access_flags(struct anv_device *device, - VkAccessFlags flags) + VkAccessFlags2KHR flags) { enum anv_pipe_bits pipe_bits = 0; - u_foreach_bit(b, flags) { - switch ((VkAccessFlagBits)(1 << b)) { - case VK_ACCESS_SHADER_WRITE_BIT: + u_foreach_bit64(b, flags) { + switch ((VkAccessFlags2KHR)(1 << b)) { + case VK_ACCESS_2_SHADER_WRITE_BIT_KHR: + case VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT_KHR: /* We're transitioning a buffer that was previously used as write * destination through the data port. To make its content available * to future operations, flush the hdc pipeline. */ pipe_bits |= ANV_PIPE_HDC_PIPELINE_FLUSH_BIT; break; - case VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT: + case VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT_KHR: /* We're transitioning a buffer that was previously used as render * target. To make its content available to future operations, flush * the render target cache. */ pipe_bits |= ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT; break; - case VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT: + case VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT_KHR: /* We're transitioning a buffer that was previously used as depth * buffer. To make its content available to future operations, flush * the depth cache. */ pipe_bits |= ANV_PIPE_DEPTH_CACHE_FLUSH_BIT; break; - case VK_ACCESS_TRANSFER_WRITE_BIT: + case VK_ACCESS_2_TRANSFER_WRITE_BIT_KHR: /* We're transitioning a buffer that was previously used as a * transfer write destination. Generic write operations include color * & depth operations as well as buffer operations like : @@ -2582,13 +2584,13 @@ anv_pipe_flush_bits_for_access_flags(struct anv_device *device, pipe_bits |= ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT; pipe_bits |= ANV_PIPE_DEPTH_CACHE_FLUSH_BIT; break; - case VK_ACCESS_MEMORY_WRITE_BIT: + case VK_ACCESS_2_MEMORY_WRITE_BIT_KHR: /* We're transitioning a buffer for generic write operations. Flush * all the caches. */ pipe_bits |= ANV_PIPE_FLUSH_BITS; break; - case VK_ACCESS_HOST_WRITE_BIT: + case VK_ACCESS_2_HOST_WRITE_BIT_KHR: /* We're transitioning a buffer for access by CPU. Invalidate * all the caches. Since data and tile caches don't have invalidate, * we are forced to flush those as well. @@ -2596,8 +2598,8 @@ anv_pipe_flush_bits_for_access_flags(struct anv_device *device, pipe_bits |= ANV_PIPE_FLUSH_BITS; pipe_bits |= ANV_PIPE_INVALIDATE_BITS; break; - case VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT: - case VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT: + case VK_ACCESS_2_TRANSFORM_FEEDBACK_WRITE_BIT_EXT: + case VK_ACCESS_2_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT: /* We're transitioning a buffer written either from VS stage or from * the command streamer (see CmdEndTransformFeedbackEXT), we just * need to stall the CS. @@ -2614,13 +2616,13 @@ anv_pipe_flush_bits_for_access_flags(struct anv_device *device, static inline enum anv_pipe_bits anv_pipe_invalidate_bits_for_access_flags(struct anv_device *device, - VkAccessFlags flags) + VkAccessFlags2KHR flags) { enum anv_pipe_bits pipe_bits = 0; - u_foreach_bit(b, flags) { - switch ((VkAccessFlagBits)(1 << b)) { - case VK_ACCESS_INDIRECT_COMMAND_READ_BIT: + u_foreach_bit64(b, flags) { + switch ((VkAccessFlags2KHR)(1 << b)) { + case VK_ACCESS_2_INDIRECT_COMMAND_READ_BIT_KHR: /* Indirect draw commands take a buffer as input that we're going to * read from the command streamer to load some of the HW registers * (see genX_cmd_buffer.c:load_indirect_parameters). This requires a @@ -2642,15 +2644,15 @@ anv_pipe_invalidate_bits_for_access_flags(struct anv_device *device, */ pipe_bits |= ANV_PIPE_TILE_CACHE_FLUSH_BIT; break; - case VK_ACCESS_INDEX_READ_BIT: - case VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT: + case VK_ACCESS_2_INDEX_READ_BIT_KHR: + case VK_ACCESS_2_VERTEX_ATTRIBUTE_READ_BIT_KHR: /* We transitioning a buffer to be used for as input for vkCmdDraw* * commands, so we invalidate the VF cache to make sure there is no * stale data when we start rendering. */ pipe_bits |= ANV_PIPE_VF_CACHE_INVALIDATE_BIT; break; - case VK_ACCESS_UNIFORM_READ_BIT: + case VK_ACCESS_2_UNIFORM_READ_BIT_KHR: /* We transitioning a buffer to be used as uniform data. Because * uniform is accessed through the data port & sampler, we need to * invalidate the texture cache (sampler) & constant cache (data @@ -2662,28 +2664,28 @@ anv_pipe_invalidate_bits_for_access_flags(struct anv_device *device, else pipe_bits |= ANV_PIPE_HDC_PIPELINE_FLUSH_BIT; break; - case VK_ACCESS_SHADER_READ_BIT: - case VK_ACCESS_INPUT_ATTACHMENT_READ_BIT: - case VK_ACCESS_TRANSFER_READ_BIT: + case VK_ACCESS_2_SHADER_READ_BIT_KHR: + case VK_ACCESS_2_INPUT_ATTACHMENT_READ_BIT_KHR: + case VK_ACCESS_2_TRANSFER_READ_BIT_KHR: /* Transitioning a buffer to be read through the sampler, so * invalidate the texture cache, we don't want any stale data. */ pipe_bits |= ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT; break; - case VK_ACCESS_MEMORY_READ_BIT: + case VK_ACCESS_2_MEMORY_READ_BIT_KHR: /* Transitioning a buffer for generic read, invalidate all the * caches. */ pipe_bits |= ANV_PIPE_INVALIDATE_BITS; break; - case VK_ACCESS_MEMORY_WRITE_BIT: + case VK_ACCESS_2_MEMORY_WRITE_BIT_KHR: /* Generic write, make sure all previously written things land in * memory. */ pipe_bits |= ANV_PIPE_FLUSH_BITS; break; - case VK_ACCESS_CONDITIONAL_RENDERING_READ_BIT_EXT: - case VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT: + case VK_ACCESS_2_CONDITIONAL_RENDERING_READ_BIT_EXT: + case VK_ACCESS_2_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT: /* Transitioning a buffer for conditional rendering or transform * feedback. We'll load the content of this buffer into HW registers * using the command streamer, so we need to stall the command @@ -2694,7 +2696,7 @@ anv_pipe_invalidate_bits_for_access_flags(struct anv_device *device, pipe_bits |= ANV_PIPE_TILE_CACHE_FLUSH_BIT; pipe_bits |= ANV_PIPE_DATA_CACHE_FLUSH_BIT; break; - case VK_ACCESS_HOST_READ_BIT: + case VK_ACCESS_2_HOST_READ_BIT_KHR: /* We're transitioning a buffer that was written by CPU. Flush * all the caches. */ diff --git a/src/intel/vulkan/anv_queue.c b/src/intel/vulkan/anv_queue.c index ba170e38931..368dab8dd94 100644 --- a/src/intel/vulkan/anv_queue.c +++ b/src/intel/vulkan/anv_queue.c @@ -1206,10 +1206,10 @@ anv_queue_submit_post_and_alloc_new(struct anv_queue *queue, return VK_SUCCESS; } -VkResult anv_QueueSubmit( +VkResult anv_QueueSubmit2KHR( VkQueue _queue, uint32_t submitCount, - const VkSubmitInfo* pSubmits, + const VkSubmitInfo2KHR* pSubmits, VkFence _fence) { ANV_FROM_HANDLE(anv_queue, queue, _queue); @@ -1242,23 +1242,14 @@ VkResult anv_QueueSubmit( mem_signal_info && mem_signal_info->memory != VK_NULL_HANDLE ? anv_device_memory_from_handle(mem_signal_info->memory)->bo : NULL; - const VkTimelineSemaphoreSubmitInfoKHR *timeline_info = - vk_find_struct_const(pSubmits[i].pNext, - TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR); const VkPerformanceQuerySubmitInfoKHR *perf_info = vk_find_struct_const(pSubmits[i].pNext, PERFORMANCE_QUERY_SUBMIT_INFO_KHR); const int perf_pass = perf_info ? perf_info->counterPassIndex : 0; - const uint64_t *wait_values = - timeline_info && timeline_info->waitSemaphoreValueCount ? - timeline_info->pWaitSemaphoreValues : NULL; - const uint64_t *signal_values = - timeline_info && timeline_info->signalSemaphoreValueCount ? - timeline_info->pSignalSemaphoreValues : NULL; if (!anv_queue_submit_can_add_submit(submit, - pSubmits[i].waitSemaphoreCount, - pSubmits[i].signalSemaphoreCount, + pSubmits[i].waitSemaphoreInfoCount, + pSubmits[i].signalSemaphoreInfoCount, perf_pass)) { result = anv_queue_submit_post_and_alloc_new(queue, &submit); if (result != VK_SUCCESS) @@ -1266,19 +1257,19 @@ VkResult anv_QueueSubmit( } /* Wait semaphores */ - for (uint32_t j = 0; j < pSubmits[i].waitSemaphoreCount; j++) { + for (uint32_t j = 0; j < pSubmits[i].waitSemaphoreInfoCount; j++) { result = anv_queue_submit_add_in_semaphore(submit, device, - pSubmits[i].pWaitSemaphores[j], - wait_values ? wait_values[j] : 0); + pSubmits[i].pWaitSemaphoreInfos[j].semaphore, + pSubmits[i].pWaitSemaphoreInfos[j].value); if (result != VK_SUCCESS) goto out; } /* Command buffers */ - for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) { + for (uint32_t j = 0; j < pSubmits[i].commandBufferInfoCount; j++) { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, - pSubmits[i].pCommandBuffers[j]); + pSubmits[i].pCommandBufferInfos[j].commandBuffer); assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY); assert(!anv_batch_has_error(&cmd_buffer->batch)); anv_measure_submit(cmd_buffer); @@ -1298,11 +1289,11 @@ VkResult anv_QueueSubmit( } /* Signal semaphores */ - for (uint32_t j = 0; j < pSubmits[i].signalSemaphoreCount; j++) { + for (uint32_t j = 0; j < pSubmits[i].signalSemaphoreInfoCount; j++) { result = anv_queue_submit_add_out_semaphore(submit, device, - pSubmits[i].pSignalSemaphores[j], - signal_values ? signal_values[j] : 0); + pSubmits[i].pSignalSemaphoreInfos[j].semaphore, + pSubmits[i].pSignalSemaphoreInfos[j].value); if (result != VK_SUCCESS) goto out; } @@ -1350,7 +1341,7 @@ out: * anv_device_set_lost() would have been called already by a callee of * anv_queue_submit(). */ - result = anv_device_set_lost(device, "vkQueueSubmit() failed"); + result = anv_device_set_lost(device, "vkQueueSubmit2KHR() failed"); } return result; diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index fe450bd5888..93b8fc99d91 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -2403,43 +2403,37 @@ genX(cmd_buffer_apply_pipe_flushes)(struct anv_cmd_buffer *cmd_buffer) cmd_buffer->state.pending_pipe_bits = bits; } -void genX(CmdPipelineBarrier)( - VkCommandBuffer commandBuffer, - VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags destStageMask, - VkBool32 byRegion, - uint32_t memoryBarrierCount, - const VkMemoryBarrier* pMemoryBarriers, - uint32_t bufferMemoryBarrierCount, - const VkBufferMemoryBarrier* pBufferMemoryBarriers, - uint32_t imageMemoryBarrierCount, - const VkImageMemoryBarrier* pImageMemoryBarriers) +static void +cmd_buffer_barrier(struct anv_cmd_buffer *cmd_buffer, + const VkDependencyInfoKHR *dep_info, + const char *reason) { - ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); - /* XXX: Right now, we're really dumb and just flush whatever categories * the app asks for. One of these days we may make this a bit better * but right now that's all the hardware allows for in most areas. */ - VkAccessFlags src_flags = 0; - VkAccessFlags dst_flags = 0; + VkAccessFlags2KHR src_flags = 0; + VkAccessFlags2KHR dst_flags = 0; - for (uint32_t i = 0; i < memoryBarrierCount; i++) { - src_flags |= pMemoryBarriers[i].srcAccessMask; - dst_flags |= pMemoryBarriers[i].dstAccessMask; + for (uint32_t i = 0; i < dep_info->memoryBarrierCount; i++) { + src_flags |= dep_info->pMemoryBarriers[i].srcAccessMask; + dst_flags |= dep_info->pMemoryBarriers[i].dstAccessMask; } - for (uint32_t i = 0; i < bufferMemoryBarrierCount; i++) { - src_flags |= pBufferMemoryBarriers[i].srcAccessMask; - dst_flags |= pBufferMemoryBarriers[i].dstAccessMask; + for (uint32_t i = 0; i < dep_info->bufferMemoryBarrierCount; i++) { + src_flags |= dep_info->pBufferMemoryBarriers[i].srcAccessMask; + dst_flags |= dep_info->pBufferMemoryBarriers[i].dstAccessMask; } - for (uint32_t i = 0; i < imageMemoryBarrierCount; i++) { - src_flags |= pImageMemoryBarriers[i].srcAccessMask; - dst_flags |= pImageMemoryBarriers[i].dstAccessMask; - ANV_FROM_HANDLE(anv_image, image, pImageMemoryBarriers[i].image); - const VkImageSubresourceRange *range = - &pImageMemoryBarriers[i].subresourceRange; + for (uint32_t i = 0; i < dep_info->imageMemoryBarrierCount; i++) { + const VkImageMemoryBarrier2KHR *img_barrier = + &dep_info->pImageMemoryBarriers[i]; + + src_flags |= img_barrier->srcAccessMask; + dst_flags |= img_barrier->dstAccessMask; + + ANV_FROM_HANDLE(anv_image, image, img_barrier->image); + const VkImageSubresourceRange *range = &img_barrier->subresourceRange; uint32_t base_layer, layer_count; if (image->vk.image_type == VK_IMAGE_TYPE_3D) { @@ -2455,8 +2449,8 @@ void genX(CmdPipelineBarrier)( if (range->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) { transition_depth_buffer(cmd_buffer, image, base_layer, layer_count, - pImageMemoryBarriers[i].oldLayout, - pImageMemoryBarriers[i].newLayout, + img_barrier->oldLayout, + img_barrier->newLayout, false /* will_full_fast_clear */); } @@ -2464,8 +2458,8 @@ void genX(CmdPipelineBarrier)( transition_stencil_buffer(cmd_buffer, image, range->baseMipLevel, level_count, base_layer, layer_count, - pImageMemoryBarriers[i].oldLayout, - pImageMemoryBarriers[i].newLayout, + img_barrier->oldLayout, + img_barrier->newLayout, false /* will_full_fast_clear */); } @@ -2476,19 +2470,29 @@ void genX(CmdPipelineBarrier)( transition_color_buffer(cmd_buffer, image, 1UL << aspect_bit, range->baseMipLevel, level_count, base_layer, layer_count, - pImageMemoryBarriers[i].oldLayout, - pImageMemoryBarriers[i].newLayout, - pImageMemoryBarriers[i].srcQueueFamilyIndex, - pImageMemoryBarriers[i].dstQueueFamilyIndex, + img_barrier->oldLayout, + img_barrier->newLayout, + img_barrier->srcQueueFamilyIndex, + img_barrier->dstQueueFamilyIndex, false /* will_full_fast_clear */); } } } - anv_add_pending_pipe_bits(cmd_buffer, - anv_pipe_flush_bits_for_access_flags(cmd_buffer->device, src_flags) | - anv_pipe_invalidate_bits_for_access_flags(cmd_buffer->device, dst_flags), - "pipe barrier"); + enum anv_pipe_bits bits = + anv_pipe_flush_bits_for_access_flags(cmd_buffer->device, src_flags) | + anv_pipe_invalidate_bits_for_access_flags(cmd_buffer->device, dst_flags); + + anv_add_pending_pipe_bits(cmd_buffer, bits, reason); +} + +void genX(CmdPipelineBarrier2KHR)( + VkCommandBuffer commandBuffer, + const VkDependencyInfoKHR* pDependencyInfo) +{ + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); + + cmd_buffer_barrier(cmd_buffer, pDependencyInfo, "pipe barrier"); } static void @@ -6866,24 +6870,33 @@ void genX(CmdEndConditionalRenderingEXT)( * by the command streamer for later execution. */ #define ANV_PIPELINE_STAGE_PIPELINED_BITS \ - ~(VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT | \ - VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT | \ - VK_PIPELINE_STAGE_HOST_BIT | \ - VK_PIPELINE_STAGE_CONDITIONAL_RENDERING_BIT_EXT) + ~(VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT_KHR | \ + VK_PIPELINE_STAGE_2_DRAW_INDIRECT_BIT_KHR | \ + VK_PIPELINE_STAGE_2_HOST_BIT_KHR | \ + VK_PIPELINE_STAGE_2_CONDITIONAL_RENDERING_BIT_EXT) -void genX(CmdSetEvent)( +void genX(CmdSetEvent2KHR)( VkCommandBuffer commandBuffer, VkEvent _event, - VkPipelineStageFlags stageMask) + const VkDependencyInfoKHR* pDependencyInfo) { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); ANV_FROM_HANDLE(anv_event, event, _event); + VkPipelineStageFlags2KHR src_stages = 0; + + for (uint32_t i = 0; i < pDependencyInfo->memoryBarrierCount; i++) + src_stages |= pDependencyInfo->pMemoryBarriers[i].srcStageMask; + for (uint32_t i = 0; i < pDependencyInfo->bufferMemoryBarrierCount; i++) + src_stages |= pDependencyInfo->pBufferMemoryBarriers[i].srcStageMask; + for (uint32_t i = 0; i < pDependencyInfo->imageMemoryBarrierCount; i++) + src_stages |= pDependencyInfo->pImageMemoryBarriers[i].srcStageMask; + cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_POST_SYNC_BIT; genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer); anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) { - if (stageMask & ANV_PIPELINE_STAGE_PIPELINED_BITS) { + if (src_stages & ANV_PIPELINE_STAGE_PIPELINED_BITS) { pc.StallAtPixelScoreboard = true; pc.CommandStreamerStallEnable = true; } @@ -6899,10 +6912,10 @@ void genX(CmdSetEvent)( } } -void genX(CmdResetEvent)( +void genX(CmdResetEvent2KHR)( VkCommandBuffer commandBuffer, VkEvent _event, - VkPipelineStageFlags stageMask) + VkPipelineStageFlags2KHR stageMask) { ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); ANV_FROM_HANDLE(anv_event, event, _event); @@ -6927,22 +6940,15 @@ void genX(CmdResetEvent)( } } -void genX(CmdWaitEvents)( +void genX(CmdWaitEvents2KHR)( VkCommandBuffer commandBuffer, uint32_t eventCount, const VkEvent* pEvents, - VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags destStageMask, - uint32_t memoryBarrierCount, - const VkMemoryBarrier* pMemoryBarriers, - uint32_t bufferMemoryBarrierCount, - const VkBufferMemoryBarrier* pBufferMemoryBarriers, - uint32_t imageMemoryBarrierCount, - const VkImageMemoryBarrier* pImageMemoryBarriers) + const VkDependencyInfoKHR* pDependencyInfos) { -#if GFX_VER >= 8 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); +#if GFX_VER >= 8 for (uint32_t i = 0; i < eventCount; i++) { ANV_FROM_HANDLE(anv_event, event, pEvents[i]); @@ -6960,11 +6966,7 @@ void genX(CmdWaitEvents)( anv_finishme("Implement events on gfx7"); #endif - genX(CmdPipelineBarrier)(commandBuffer, srcStageMask, destStageMask, - false, /* byRegion */ - memoryBarrierCount, pMemoryBarriers, - bufferMemoryBarrierCount, pBufferMemoryBarriers, - imageMemoryBarrierCount, pImageMemoryBarriers); + cmd_buffer_barrier(cmd_buffer, pDependencyInfos, "wait event"); } VkResult genX(CmdSetPerformanceOverrideINTEL)( diff --git a/src/intel/vulkan/genX_query.c b/src/intel/vulkan/genX_query.c index 8978f5843a9..c45f4630990 100644 --- a/src/intel/vulkan/genX_query.c +++ b/src/intel/vulkan/genX_query.c @@ -1226,9 +1226,9 @@ void genX(CmdEndQueryIndexedEXT)( #define TIMESTAMP 0x2358 -void genX(CmdWriteTimestamp)( +void genX(CmdWriteTimestamp2KHR)( VkCommandBuffer commandBuffer, - VkPipelineStageFlagBits pipelineStage, + VkPipelineStageFlags2KHR stage, VkQueryPool queryPool, uint32_t query) { @@ -1241,13 +1241,10 @@ void genX(CmdWriteTimestamp)( struct mi_builder b; mi_builder_init(&b, &cmd_buffer->device->info, &cmd_buffer->batch); - switch (pipelineStage) { - case VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT: + if (stage == VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT_KHR) { mi_store(&b, mi_mem64(anv_address_add(query_addr, 8)), mi_reg64(TIMESTAMP)); - break; - - default: + } else { /* Everything else is bottom-of-pipe */ cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_POST_SYNC_BIT; genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer); @@ -1260,7 +1257,6 @@ void genX(CmdWriteTimestamp)( if (GFX_VER == 9 && cmd_buffer->device->info.gt == 4) pc.CommandStreamerStallEnable = true; } - break; } emit_query_pc_availability(cmd_buffer, query_addr, true);