anv: implement VK_KHR_synchronization2

v2: Use u_foreach_bit64() (Samuel)

v3: Add missing handling of VkMemoryBarrier2KHR in pNext of
    VkSubpassDependency2KHR (Samuel)

v4: Remove unused ANV_PIPELINE_STAGE_PIPELINED_BITS (Ivan)

v5: fix missing anv_measure_submit() (Jason)
    constify anv_pipeline_stage_pipelined_bits (Jason)

v6: Split flushes & invalidation emissions on
    vkCmdSetEvent2KHR()/vkCmdWaitEvents2KHR() (Jason)

v7: Only apply flushes once on events (Jason)

v8: Drop split flushes for this patch

v9: Add comment about ignore some fields of VkMemoryBarrier2 in
    VkSubpassDependency2KHR (Jason)
    Drop spurious PIPE_CONTROL change s/,/;/ (Jason)

v10: Fix build issue on Android (Lionel)

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9045>
This commit is contained in:
Lionel Landwerlin
2020-11-11 21:38:25 +02:00
committed by Marge Bot
parent dff9098059
commit b996fa8efa
8 changed files with 139 additions and 124 deletions

View File

@@ -15,3 +15,4 @@ VK_KHR_shader_float16_int8 on lavapipe
VK_KHR_shader_subgroup_extended_types on lavapipe VK_KHR_shader_subgroup_extended_types on lavapipe
VK_KHR_spirv_1_4 on lavapipe VK_KHR_spirv_1_4 on lavapipe
Experimental raytracing support on RADV Experimental raytracing support on RADV
VK_KHR_synchronization2 on Intel

View File

@@ -34,6 +34,7 @@
#include <sync/sync.h> #include <sync/sync.h>
#include "anv_private.h" #include "anv_private.h"
#include "vk_common_entrypoints.h"
#include "vk_util.h" #include "vk_util.h"
static int anv_hal_open(const struct hw_module_t* mod, const char* id, struct hw_device_t** dev); static int anv_hal_open(const struct hw_module_t* mod, const char* id, struct hw_device_t** dev);
@@ -875,7 +876,7 @@ anv_QueueSignalReleaseImageANDROID(
if (waitSemaphoreCount == 0) if (waitSemaphoreCount == 0)
goto done; goto done;
result = anv_QueueSubmit(queue, 1, result = vk_common_QueueSubmit(queue, 1,
&(VkSubmitInfo) { &(VkSubmitInfo) {
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
.waitSemaphoreCount = 1, .waitSemaphoreCount = 1,

View File

@@ -230,6 +230,7 @@ get_device_extensions(const struct anv_physical_device *device,
.KHR_swapchain = true, .KHR_swapchain = true,
.KHR_swapchain_mutable_format = true, .KHR_swapchain_mutable_format = true,
#endif #endif
.KHR_synchronization2 = true,
.KHR_timeline_semaphore = true, .KHR_timeline_semaphore = true,
.KHR_uniform_buffer_standard_layout = true, .KHR_uniform_buffer_standard_layout = true,
.KHR_variable_pointers = true, .KHR_variable_pointers = true,
@@ -1689,6 +1690,13 @@ void anv_GetPhysicalDeviceFeatures2(
break; break;
} }
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SYNCHRONIZATION_2_FEATURES_KHR: {
VkPhysicalDeviceSynchronization2FeaturesKHR *features =
(VkPhysicalDeviceSynchronization2FeaturesKHR *)ext;
features->synchronization2 = true;
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXEL_BUFFER_ALIGNMENT_FEATURES_EXT: { case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXEL_BUFFER_ALIGNMENT_FEATURES_EXT: {
VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT *features = VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT *features =
(VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT *)ext; (VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT *)ext;

View File

@@ -31,22 +31,36 @@ anv_render_pass_add_subpass_dep(struct anv_device *device,
struct anv_render_pass *pass, struct anv_render_pass *pass,
const VkSubpassDependency2KHR *dep) const VkSubpassDependency2KHR *dep)
{ {
/* From the Vulkan 1.2.195 spec:
*
* "If an instance of VkMemoryBarrier2 is included in the pNext chain,
* srcStageMask, dstStageMask, srcAccessMask, and dstAccessMask
* parameters are ignored. The synchronization and access scopes instead
* are defined by the parameters of VkMemoryBarrier2."
*/
const VkMemoryBarrier2KHR *barrier =
vk_find_struct_const(dep->pNext, MEMORY_BARRIER_2_KHR);
VkAccessFlags2KHR src_access_mask =
barrier ? barrier->srcAccessMask : dep->srcAccessMask;
VkAccessFlags2KHR dst_access_mask =
barrier ? barrier->dstAccessMask : dep->dstAccessMask;
if (dep->dstSubpass == VK_SUBPASS_EXTERNAL) { if (dep->dstSubpass == VK_SUBPASS_EXTERNAL) {
pass->subpass_flushes[pass->subpass_count] |= pass->subpass_flushes[pass->subpass_count] |=
anv_pipe_invalidate_bits_for_access_flags(device, dep->dstAccessMask); anv_pipe_invalidate_bits_for_access_flags(device, dst_access_mask);
} else { } else {
assert(dep->dstSubpass < pass->subpass_count); assert(dep->dstSubpass < pass->subpass_count);
pass->subpass_flushes[dep->dstSubpass] |= pass->subpass_flushes[dep->dstSubpass] |=
anv_pipe_invalidate_bits_for_access_flags(device, dep->dstAccessMask); anv_pipe_invalidate_bits_for_access_flags(device, dst_access_mask);
} }
if (dep->srcSubpass == VK_SUBPASS_EXTERNAL) { if (dep->srcSubpass == VK_SUBPASS_EXTERNAL) {
pass->subpass_flushes[0] |= pass->subpass_flushes[0] |=
anv_pipe_flush_bits_for_access_flags(device, dep->srcAccessMask); anv_pipe_flush_bits_for_access_flags(device, src_access_mask);
} else { } else {
assert(dep->srcSubpass < pass->subpass_count); assert(dep->srcSubpass < pass->subpass_count);
pass->subpass_flushes[dep->srcSubpass + 1] |= pass->subpass_flushes[dep->srcSubpass + 1] |=
anv_pipe_flush_bits_for_access_flags(device, dep->srcAccessMask); anv_pipe_flush_bits_for_access_flags(device, src_access_mask);
} }
} }

View File

@@ -66,6 +66,7 @@
#include "vk_alloc.h" #include "vk_alloc.h"
#include "vk_debug_report.h" #include "vk_debug_report.h"
#include "vk_device.h" #include "vk_device.h"
#include "vk_enum_defines.h"
#include "vk_image.h" #include "vk_image.h"
#include "vk_instance.h" #include "vk_instance.h"
#include "vk_physical_device.h" #include "vk_physical_device.h"
@@ -2538,34 +2539,35 @@ enum anv_pipe_bits {
static inline enum anv_pipe_bits static inline enum anv_pipe_bits
anv_pipe_flush_bits_for_access_flags(struct anv_device *device, anv_pipe_flush_bits_for_access_flags(struct anv_device *device,
VkAccessFlags flags) VkAccessFlags2KHR flags)
{ {
enum anv_pipe_bits pipe_bits = 0; enum anv_pipe_bits pipe_bits = 0;
u_foreach_bit(b, flags) { u_foreach_bit64(b, flags) {
switch ((VkAccessFlagBits)(1 << b)) { switch ((VkAccessFlags2KHR)(1 << b)) {
case VK_ACCESS_SHADER_WRITE_BIT: case VK_ACCESS_2_SHADER_WRITE_BIT_KHR:
case VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT_KHR:
/* We're transitioning a buffer that was previously used as write /* We're transitioning a buffer that was previously used as write
* destination through the data port. To make its content available * destination through the data port. To make its content available
* to future operations, flush the hdc pipeline. * to future operations, flush the hdc pipeline.
*/ */
pipe_bits |= ANV_PIPE_HDC_PIPELINE_FLUSH_BIT; pipe_bits |= ANV_PIPE_HDC_PIPELINE_FLUSH_BIT;
break; break;
case VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT: case VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT_KHR:
/* We're transitioning a buffer that was previously used as render /* We're transitioning a buffer that was previously used as render
* target. To make its content available to future operations, flush * target. To make its content available to future operations, flush
* the render target cache. * the render target cache.
*/ */
pipe_bits |= ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT; pipe_bits |= ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT;
break; break;
case VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT: case VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT_KHR:
/* We're transitioning a buffer that was previously used as depth /* We're transitioning a buffer that was previously used as depth
* buffer. To make its content available to future operations, flush * buffer. To make its content available to future operations, flush
* the depth cache. * the depth cache.
*/ */
pipe_bits |= ANV_PIPE_DEPTH_CACHE_FLUSH_BIT; pipe_bits |= ANV_PIPE_DEPTH_CACHE_FLUSH_BIT;
break; break;
case VK_ACCESS_TRANSFER_WRITE_BIT: case VK_ACCESS_2_TRANSFER_WRITE_BIT_KHR:
/* We're transitioning a buffer that was previously used as a /* We're transitioning a buffer that was previously used as a
* transfer write destination. Generic write operations include color * transfer write destination. Generic write operations include color
* & depth operations as well as buffer operations like : * & depth operations as well as buffer operations like :
@@ -2582,13 +2584,13 @@ anv_pipe_flush_bits_for_access_flags(struct anv_device *device,
pipe_bits |= ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT; pipe_bits |= ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT;
pipe_bits |= ANV_PIPE_DEPTH_CACHE_FLUSH_BIT; pipe_bits |= ANV_PIPE_DEPTH_CACHE_FLUSH_BIT;
break; break;
case VK_ACCESS_MEMORY_WRITE_BIT: case VK_ACCESS_2_MEMORY_WRITE_BIT_KHR:
/* We're transitioning a buffer for generic write operations. Flush /* We're transitioning a buffer for generic write operations. Flush
* all the caches. * all the caches.
*/ */
pipe_bits |= ANV_PIPE_FLUSH_BITS; pipe_bits |= ANV_PIPE_FLUSH_BITS;
break; break;
case VK_ACCESS_HOST_WRITE_BIT: case VK_ACCESS_2_HOST_WRITE_BIT_KHR:
/* We're transitioning a buffer for access by CPU. Invalidate /* We're transitioning a buffer for access by CPU. Invalidate
* all the caches. Since data and tile caches don't have invalidate, * all the caches. Since data and tile caches don't have invalidate,
* we are forced to flush those as well. * we are forced to flush those as well.
@@ -2596,8 +2598,8 @@ anv_pipe_flush_bits_for_access_flags(struct anv_device *device,
pipe_bits |= ANV_PIPE_FLUSH_BITS; pipe_bits |= ANV_PIPE_FLUSH_BITS;
pipe_bits |= ANV_PIPE_INVALIDATE_BITS; pipe_bits |= ANV_PIPE_INVALIDATE_BITS;
break; break;
case VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT: case VK_ACCESS_2_TRANSFORM_FEEDBACK_WRITE_BIT_EXT:
case VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT: case VK_ACCESS_2_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT:
/* We're transitioning a buffer written either from VS stage or from /* We're transitioning a buffer written either from VS stage or from
* the command streamer (see CmdEndTransformFeedbackEXT), we just * the command streamer (see CmdEndTransformFeedbackEXT), we just
* need to stall the CS. * need to stall the CS.
@@ -2614,13 +2616,13 @@ anv_pipe_flush_bits_for_access_flags(struct anv_device *device,
static inline enum anv_pipe_bits static inline enum anv_pipe_bits
anv_pipe_invalidate_bits_for_access_flags(struct anv_device *device, anv_pipe_invalidate_bits_for_access_flags(struct anv_device *device,
VkAccessFlags flags) VkAccessFlags2KHR flags)
{ {
enum anv_pipe_bits pipe_bits = 0; enum anv_pipe_bits pipe_bits = 0;
u_foreach_bit(b, flags) { u_foreach_bit64(b, flags) {
switch ((VkAccessFlagBits)(1 << b)) { switch ((VkAccessFlags2KHR)(1 << b)) {
case VK_ACCESS_INDIRECT_COMMAND_READ_BIT: case VK_ACCESS_2_INDIRECT_COMMAND_READ_BIT_KHR:
/* Indirect draw commands take a buffer as input that we're going to /* Indirect draw commands take a buffer as input that we're going to
* read from the command streamer to load some of the HW registers * read from the command streamer to load some of the HW registers
* (see genX_cmd_buffer.c:load_indirect_parameters). This requires a * (see genX_cmd_buffer.c:load_indirect_parameters). This requires a
@@ -2642,15 +2644,15 @@ anv_pipe_invalidate_bits_for_access_flags(struct anv_device *device,
*/ */
pipe_bits |= ANV_PIPE_TILE_CACHE_FLUSH_BIT; pipe_bits |= ANV_PIPE_TILE_CACHE_FLUSH_BIT;
break; break;
case VK_ACCESS_INDEX_READ_BIT: case VK_ACCESS_2_INDEX_READ_BIT_KHR:
case VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT: case VK_ACCESS_2_VERTEX_ATTRIBUTE_READ_BIT_KHR:
/* We transitioning a buffer to be used for as input for vkCmdDraw* /* We transitioning a buffer to be used for as input for vkCmdDraw*
* commands, so we invalidate the VF cache to make sure there is no * commands, so we invalidate the VF cache to make sure there is no
* stale data when we start rendering. * stale data when we start rendering.
*/ */
pipe_bits |= ANV_PIPE_VF_CACHE_INVALIDATE_BIT; pipe_bits |= ANV_PIPE_VF_CACHE_INVALIDATE_BIT;
break; break;
case VK_ACCESS_UNIFORM_READ_BIT: case VK_ACCESS_2_UNIFORM_READ_BIT_KHR:
/* We transitioning a buffer to be used as uniform data. Because /* We transitioning a buffer to be used as uniform data. Because
* uniform is accessed through the data port & sampler, we need to * uniform is accessed through the data port & sampler, we need to
* invalidate the texture cache (sampler) & constant cache (data * invalidate the texture cache (sampler) & constant cache (data
@@ -2662,28 +2664,28 @@ anv_pipe_invalidate_bits_for_access_flags(struct anv_device *device,
else else
pipe_bits |= ANV_PIPE_HDC_PIPELINE_FLUSH_BIT; pipe_bits |= ANV_PIPE_HDC_PIPELINE_FLUSH_BIT;
break; break;
case VK_ACCESS_SHADER_READ_BIT: case VK_ACCESS_2_SHADER_READ_BIT_KHR:
case VK_ACCESS_INPUT_ATTACHMENT_READ_BIT: case VK_ACCESS_2_INPUT_ATTACHMENT_READ_BIT_KHR:
case VK_ACCESS_TRANSFER_READ_BIT: case VK_ACCESS_2_TRANSFER_READ_BIT_KHR:
/* Transitioning a buffer to be read through the sampler, so /* Transitioning a buffer to be read through the sampler, so
* invalidate the texture cache, we don't want any stale data. * invalidate the texture cache, we don't want any stale data.
*/ */
pipe_bits |= ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT; pipe_bits |= ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT;
break; break;
case VK_ACCESS_MEMORY_READ_BIT: case VK_ACCESS_2_MEMORY_READ_BIT_KHR:
/* Transitioning a buffer for generic read, invalidate all the /* Transitioning a buffer for generic read, invalidate all the
* caches. * caches.
*/ */
pipe_bits |= ANV_PIPE_INVALIDATE_BITS; pipe_bits |= ANV_PIPE_INVALIDATE_BITS;
break; break;
case VK_ACCESS_MEMORY_WRITE_BIT: case VK_ACCESS_2_MEMORY_WRITE_BIT_KHR:
/* Generic write, make sure all previously written things land in /* Generic write, make sure all previously written things land in
* memory. * memory.
*/ */
pipe_bits |= ANV_PIPE_FLUSH_BITS; pipe_bits |= ANV_PIPE_FLUSH_BITS;
break; break;
case VK_ACCESS_CONDITIONAL_RENDERING_READ_BIT_EXT: case VK_ACCESS_2_CONDITIONAL_RENDERING_READ_BIT_EXT:
case VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT: case VK_ACCESS_2_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT:
/* Transitioning a buffer for conditional rendering or transform /* Transitioning a buffer for conditional rendering or transform
* feedback. We'll load the content of this buffer into HW registers * feedback. We'll load the content of this buffer into HW registers
* using the command streamer, so we need to stall the command * using the command streamer, so we need to stall the command
@@ -2694,7 +2696,7 @@ anv_pipe_invalidate_bits_for_access_flags(struct anv_device *device,
pipe_bits |= ANV_PIPE_TILE_CACHE_FLUSH_BIT; pipe_bits |= ANV_PIPE_TILE_CACHE_FLUSH_BIT;
pipe_bits |= ANV_PIPE_DATA_CACHE_FLUSH_BIT; pipe_bits |= ANV_PIPE_DATA_CACHE_FLUSH_BIT;
break; break;
case VK_ACCESS_HOST_READ_BIT: case VK_ACCESS_2_HOST_READ_BIT_KHR:
/* We're transitioning a buffer that was written by CPU. Flush /* We're transitioning a buffer that was written by CPU. Flush
* all the caches. * all the caches.
*/ */

View File

@@ -1206,10 +1206,10 @@ anv_queue_submit_post_and_alloc_new(struct anv_queue *queue,
return VK_SUCCESS; return VK_SUCCESS;
} }
VkResult anv_QueueSubmit( VkResult anv_QueueSubmit2KHR(
VkQueue _queue, VkQueue _queue,
uint32_t submitCount, uint32_t submitCount,
const VkSubmitInfo* pSubmits, const VkSubmitInfo2KHR* pSubmits,
VkFence _fence) VkFence _fence)
{ {
ANV_FROM_HANDLE(anv_queue, queue, _queue); ANV_FROM_HANDLE(anv_queue, queue, _queue);
@@ -1242,23 +1242,14 @@ VkResult anv_QueueSubmit(
mem_signal_info && mem_signal_info->memory != VK_NULL_HANDLE ? mem_signal_info && mem_signal_info->memory != VK_NULL_HANDLE ?
anv_device_memory_from_handle(mem_signal_info->memory)->bo : NULL; anv_device_memory_from_handle(mem_signal_info->memory)->bo : NULL;
const VkTimelineSemaphoreSubmitInfoKHR *timeline_info =
vk_find_struct_const(pSubmits[i].pNext,
TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR);
const VkPerformanceQuerySubmitInfoKHR *perf_info = const VkPerformanceQuerySubmitInfoKHR *perf_info =
vk_find_struct_const(pSubmits[i].pNext, vk_find_struct_const(pSubmits[i].pNext,
PERFORMANCE_QUERY_SUBMIT_INFO_KHR); PERFORMANCE_QUERY_SUBMIT_INFO_KHR);
const int perf_pass = perf_info ? perf_info->counterPassIndex : 0; const int perf_pass = perf_info ? perf_info->counterPassIndex : 0;
const uint64_t *wait_values =
timeline_info && timeline_info->waitSemaphoreValueCount ?
timeline_info->pWaitSemaphoreValues : NULL;
const uint64_t *signal_values =
timeline_info && timeline_info->signalSemaphoreValueCount ?
timeline_info->pSignalSemaphoreValues : NULL;
if (!anv_queue_submit_can_add_submit(submit, if (!anv_queue_submit_can_add_submit(submit,
pSubmits[i].waitSemaphoreCount, pSubmits[i].waitSemaphoreInfoCount,
pSubmits[i].signalSemaphoreCount, pSubmits[i].signalSemaphoreInfoCount,
perf_pass)) { perf_pass)) {
result = anv_queue_submit_post_and_alloc_new(queue, &submit); result = anv_queue_submit_post_and_alloc_new(queue, &submit);
if (result != VK_SUCCESS) if (result != VK_SUCCESS)
@@ -1266,19 +1257,19 @@ VkResult anv_QueueSubmit(
} }
/* Wait semaphores */ /* Wait semaphores */
for (uint32_t j = 0; j < pSubmits[i].waitSemaphoreCount; j++) { for (uint32_t j = 0; j < pSubmits[i].waitSemaphoreInfoCount; j++) {
result = anv_queue_submit_add_in_semaphore(submit, result = anv_queue_submit_add_in_semaphore(submit,
device, device,
pSubmits[i].pWaitSemaphores[j], pSubmits[i].pWaitSemaphoreInfos[j].semaphore,
wait_values ? wait_values[j] : 0); pSubmits[i].pWaitSemaphoreInfos[j].value);
if (result != VK_SUCCESS) if (result != VK_SUCCESS)
goto out; goto out;
} }
/* Command buffers */ /* Command buffers */
for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) { for (uint32_t j = 0; j < pSubmits[i].commandBufferInfoCount; j++) {
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer,
pSubmits[i].pCommandBuffers[j]); pSubmits[i].pCommandBufferInfos[j].commandBuffer);
assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY); assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
assert(!anv_batch_has_error(&cmd_buffer->batch)); assert(!anv_batch_has_error(&cmd_buffer->batch));
anv_measure_submit(cmd_buffer); anv_measure_submit(cmd_buffer);
@@ -1298,11 +1289,11 @@ VkResult anv_QueueSubmit(
} }
/* Signal semaphores */ /* Signal semaphores */
for (uint32_t j = 0; j < pSubmits[i].signalSemaphoreCount; j++) { for (uint32_t j = 0; j < pSubmits[i].signalSemaphoreInfoCount; j++) {
result = anv_queue_submit_add_out_semaphore(submit, result = anv_queue_submit_add_out_semaphore(submit,
device, device,
pSubmits[i].pSignalSemaphores[j], pSubmits[i].pSignalSemaphoreInfos[j].semaphore,
signal_values ? signal_values[j] : 0); pSubmits[i].pSignalSemaphoreInfos[j].value);
if (result != VK_SUCCESS) if (result != VK_SUCCESS)
goto out; goto out;
} }
@@ -1350,7 +1341,7 @@ out:
* anv_device_set_lost() would have been called already by a callee of * anv_device_set_lost() would have been called already by a callee of
* anv_queue_submit(). * anv_queue_submit().
*/ */
result = anv_device_set_lost(device, "vkQueueSubmit() failed"); result = anv_device_set_lost(device, "vkQueueSubmit2KHR() failed");
} }
return result; return result;

View File

@@ -2403,43 +2403,37 @@ genX(cmd_buffer_apply_pipe_flushes)(struct anv_cmd_buffer *cmd_buffer)
cmd_buffer->state.pending_pipe_bits = bits; cmd_buffer->state.pending_pipe_bits = bits;
} }
void genX(CmdPipelineBarrier)( static void
VkCommandBuffer commandBuffer, cmd_buffer_barrier(struct anv_cmd_buffer *cmd_buffer,
VkPipelineStageFlags srcStageMask, const VkDependencyInfoKHR *dep_info,
VkPipelineStageFlags destStageMask, const char *reason)
VkBool32 byRegion,
uint32_t memoryBarrierCount,
const VkMemoryBarrier* pMemoryBarriers,
uint32_t bufferMemoryBarrierCount,
const VkBufferMemoryBarrier* pBufferMemoryBarriers,
uint32_t imageMemoryBarrierCount,
const VkImageMemoryBarrier* pImageMemoryBarriers)
{ {
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
/* XXX: Right now, we're really dumb and just flush whatever categories /* XXX: Right now, we're really dumb and just flush whatever categories
* the app asks for. One of these days we may make this a bit better * the app asks for. One of these days we may make this a bit better
* but right now that's all the hardware allows for in most areas. * but right now that's all the hardware allows for in most areas.
*/ */
VkAccessFlags src_flags = 0; VkAccessFlags2KHR src_flags = 0;
VkAccessFlags dst_flags = 0; VkAccessFlags2KHR dst_flags = 0;
for (uint32_t i = 0; i < memoryBarrierCount; i++) { for (uint32_t i = 0; i < dep_info->memoryBarrierCount; i++) {
src_flags |= pMemoryBarriers[i].srcAccessMask; src_flags |= dep_info->pMemoryBarriers[i].srcAccessMask;
dst_flags |= pMemoryBarriers[i].dstAccessMask; dst_flags |= dep_info->pMemoryBarriers[i].dstAccessMask;
} }
for (uint32_t i = 0; i < bufferMemoryBarrierCount; i++) { for (uint32_t i = 0; i < dep_info->bufferMemoryBarrierCount; i++) {
src_flags |= pBufferMemoryBarriers[i].srcAccessMask; src_flags |= dep_info->pBufferMemoryBarriers[i].srcAccessMask;
dst_flags |= pBufferMemoryBarriers[i].dstAccessMask; dst_flags |= dep_info->pBufferMemoryBarriers[i].dstAccessMask;
} }
for (uint32_t i = 0; i < imageMemoryBarrierCount; i++) { for (uint32_t i = 0; i < dep_info->imageMemoryBarrierCount; i++) {
src_flags |= pImageMemoryBarriers[i].srcAccessMask; const VkImageMemoryBarrier2KHR *img_barrier =
dst_flags |= pImageMemoryBarriers[i].dstAccessMask; &dep_info->pImageMemoryBarriers[i];
ANV_FROM_HANDLE(anv_image, image, pImageMemoryBarriers[i].image);
const VkImageSubresourceRange *range = src_flags |= img_barrier->srcAccessMask;
&pImageMemoryBarriers[i].subresourceRange; dst_flags |= img_barrier->dstAccessMask;
ANV_FROM_HANDLE(anv_image, image, img_barrier->image);
const VkImageSubresourceRange *range = &img_barrier->subresourceRange;
uint32_t base_layer, layer_count; uint32_t base_layer, layer_count;
if (image->vk.image_type == VK_IMAGE_TYPE_3D) { if (image->vk.image_type == VK_IMAGE_TYPE_3D) {
@@ -2455,8 +2449,8 @@ void genX(CmdPipelineBarrier)(
if (range->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) { if (range->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) {
transition_depth_buffer(cmd_buffer, image, transition_depth_buffer(cmd_buffer, image,
base_layer, layer_count, base_layer, layer_count,
pImageMemoryBarriers[i].oldLayout, img_barrier->oldLayout,
pImageMemoryBarriers[i].newLayout, img_barrier->newLayout,
false /* will_full_fast_clear */); false /* will_full_fast_clear */);
} }
@@ -2464,8 +2458,8 @@ void genX(CmdPipelineBarrier)(
transition_stencil_buffer(cmd_buffer, image, transition_stencil_buffer(cmd_buffer, image,
range->baseMipLevel, level_count, range->baseMipLevel, level_count,
base_layer, layer_count, base_layer, layer_count,
pImageMemoryBarriers[i].oldLayout, img_barrier->oldLayout,
pImageMemoryBarriers[i].newLayout, img_barrier->newLayout,
false /* will_full_fast_clear */); false /* will_full_fast_clear */);
} }
@@ -2476,19 +2470,29 @@ void genX(CmdPipelineBarrier)(
transition_color_buffer(cmd_buffer, image, 1UL << aspect_bit, transition_color_buffer(cmd_buffer, image, 1UL << aspect_bit,
range->baseMipLevel, level_count, range->baseMipLevel, level_count,
base_layer, layer_count, base_layer, layer_count,
pImageMemoryBarriers[i].oldLayout, img_barrier->oldLayout,
pImageMemoryBarriers[i].newLayout, img_barrier->newLayout,
pImageMemoryBarriers[i].srcQueueFamilyIndex, img_barrier->srcQueueFamilyIndex,
pImageMemoryBarriers[i].dstQueueFamilyIndex, img_barrier->dstQueueFamilyIndex,
false /* will_full_fast_clear */); false /* will_full_fast_clear */);
} }
} }
} }
anv_add_pending_pipe_bits(cmd_buffer, enum anv_pipe_bits bits =
anv_pipe_flush_bits_for_access_flags(cmd_buffer->device, src_flags) | anv_pipe_flush_bits_for_access_flags(cmd_buffer->device, src_flags) |
anv_pipe_invalidate_bits_for_access_flags(cmd_buffer->device, dst_flags), anv_pipe_invalidate_bits_for_access_flags(cmd_buffer->device, dst_flags);
"pipe barrier");
anv_add_pending_pipe_bits(cmd_buffer, bits, reason);
}
void genX(CmdPipelineBarrier2KHR)(
VkCommandBuffer commandBuffer,
const VkDependencyInfoKHR* pDependencyInfo)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
cmd_buffer_barrier(cmd_buffer, pDependencyInfo, "pipe barrier");
} }
static void static void
@@ -6866,24 +6870,33 @@ void genX(CmdEndConditionalRenderingEXT)(
* by the command streamer for later execution. * by the command streamer for later execution.
*/ */
#define ANV_PIPELINE_STAGE_PIPELINED_BITS \ #define ANV_PIPELINE_STAGE_PIPELINED_BITS \
~(VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT | \ ~(VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT_KHR | \
VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT | \ VK_PIPELINE_STAGE_2_DRAW_INDIRECT_BIT_KHR | \
VK_PIPELINE_STAGE_HOST_BIT | \ VK_PIPELINE_STAGE_2_HOST_BIT_KHR | \
VK_PIPELINE_STAGE_CONDITIONAL_RENDERING_BIT_EXT) VK_PIPELINE_STAGE_2_CONDITIONAL_RENDERING_BIT_EXT)
void genX(CmdSetEvent)( void genX(CmdSetEvent2KHR)(
VkCommandBuffer commandBuffer, VkCommandBuffer commandBuffer,
VkEvent _event, VkEvent _event,
VkPipelineStageFlags stageMask) const VkDependencyInfoKHR* pDependencyInfo)
{ {
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
ANV_FROM_HANDLE(anv_event, event, _event); ANV_FROM_HANDLE(anv_event, event, _event);
VkPipelineStageFlags2KHR src_stages = 0;
for (uint32_t i = 0; i < pDependencyInfo->memoryBarrierCount; i++)
src_stages |= pDependencyInfo->pMemoryBarriers[i].srcStageMask;
for (uint32_t i = 0; i < pDependencyInfo->bufferMemoryBarrierCount; i++)
src_stages |= pDependencyInfo->pBufferMemoryBarriers[i].srcStageMask;
for (uint32_t i = 0; i < pDependencyInfo->imageMemoryBarrierCount; i++)
src_stages |= pDependencyInfo->pImageMemoryBarriers[i].srcStageMask;
cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_POST_SYNC_BIT; cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_POST_SYNC_BIT;
genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer); genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) { anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
if (stageMask & ANV_PIPELINE_STAGE_PIPELINED_BITS) { if (src_stages & ANV_PIPELINE_STAGE_PIPELINED_BITS) {
pc.StallAtPixelScoreboard = true; pc.StallAtPixelScoreboard = true;
pc.CommandStreamerStallEnable = true; pc.CommandStreamerStallEnable = true;
} }
@@ -6899,10 +6912,10 @@ void genX(CmdSetEvent)(
} }
} }
void genX(CmdResetEvent)( void genX(CmdResetEvent2KHR)(
VkCommandBuffer commandBuffer, VkCommandBuffer commandBuffer,
VkEvent _event, VkEvent _event,
VkPipelineStageFlags stageMask) VkPipelineStageFlags2KHR stageMask)
{ {
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
ANV_FROM_HANDLE(anv_event, event, _event); ANV_FROM_HANDLE(anv_event, event, _event);
@@ -6927,22 +6940,15 @@ void genX(CmdResetEvent)(
} }
} }
void genX(CmdWaitEvents)( void genX(CmdWaitEvents2KHR)(
VkCommandBuffer commandBuffer, VkCommandBuffer commandBuffer,
uint32_t eventCount, uint32_t eventCount,
const VkEvent* pEvents, const VkEvent* pEvents,
VkPipelineStageFlags srcStageMask, const VkDependencyInfoKHR* pDependencyInfos)
VkPipelineStageFlags destStageMask,
uint32_t memoryBarrierCount,
const VkMemoryBarrier* pMemoryBarriers,
uint32_t bufferMemoryBarrierCount,
const VkBufferMemoryBarrier* pBufferMemoryBarriers,
uint32_t imageMemoryBarrierCount,
const VkImageMemoryBarrier* pImageMemoryBarriers)
{ {
#if GFX_VER >= 8
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer); ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
#if GFX_VER >= 8
for (uint32_t i = 0; i < eventCount; i++) { for (uint32_t i = 0; i < eventCount; i++) {
ANV_FROM_HANDLE(anv_event, event, pEvents[i]); ANV_FROM_HANDLE(anv_event, event, pEvents[i]);
@@ -6960,11 +6966,7 @@ void genX(CmdWaitEvents)(
anv_finishme("Implement events on gfx7"); anv_finishme("Implement events on gfx7");
#endif #endif
genX(CmdPipelineBarrier)(commandBuffer, srcStageMask, destStageMask, cmd_buffer_barrier(cmd_buffer, pDependencyInfos, "wait event");
false, /* byRegion */
memoryBarrierCount, pMemoryBarriers,
bufferMemoryBarrierCount, pBufferMemoryBarriers,
imageMemoryBarrierCount, pImageMemoryBarriers);
} }
VkResult genX(CmdSetPerformanceOverrideINTEL)( VkResult genX(CmdSetPerformanceOverrideINTEL)(

View File

@@ -1226,9 +1226,9 @@ void genX(CmdEndQueryIndexedEXT)(
#define TIMESTAMP 0x2358 #define TIMESTAMP 0x2358
void genX(CmdWriteTimestamp)( void genX(CmdWriteTimestamp2KHR)(
VkCommandBuffer commandBuffer, VkCommandBuffer commandBuffer,
VkPipelineStageFlagBits pipelineStage, VkPipelineStageFlags2KHR stage,
VkQueryPool queryPool, VkQueryPool queryPool,
uint32_t query) uint32_t query)
{ {
@@ -1241,13 +1241,10 @@ void genX(CmdWriteTimestamp)(
struct mi_builder b; struct mi_builder b;
mi_builder_init(&b, &cmd_buffer->device->info, &cmd_buffer->batch); mi_builder_init(&b, &cmd_buffer->device->info, &cmd_buffer->batch);
switch (pipelineStage) { if (stage == VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT_KHR) {
case VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT:
mi_store(&b, mi_mem64(anv_address_add(query_addr, 8)), mi_store(&b, mi_mem64(anv_address_add(query_addr, 8)),
mi_reg64(TIMESTAMP)); mi_reg64(TIMESTAMP));
break; } else {
default:
/* Everything else is bottom-of-pipe */ /* Everything else is bottom-of-pipe */
cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_POST_SYNC_BIT; cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_POST_SYNC_BIT;
genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer); genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
@@ -1260,7 +1257,6 @@ void genX(CmdWriteTimestamp)(
if (GFX_VER == 9 && cmd_buffer->device->info.gt == 4) if (GFX_VER == 9 && cmd_buffer->device->info.gt == 4)
pc.CommandStreamerStallEnable = true; pc.CommandStreamerStallEnable = true;
} }
break;
} }
emit_query_pc_availability(cmd_buffer, query_addr, true); emit_query_pc_availability(cmd_buffer, query_addr, true);