anv: Use submit-time implicit sync instead of allocate-time

In 83b943cc2f, we started making all VkDeviceMemory BOs resident all
the time.  One unfortunate side-effect of this is that every
vkQueueSubmit sets EXEC_OBJECT_WRITE on every WSI memory object which
means that X server or Wayland compositor, instead of waiting on the
last vkQueueSubmit to actually write the buffer, now waits on the last
vkQueueSubmit to from that driver instance relative to whenever the
compositor's GL driver instance calls execbuf.  This potentially leads
to a lot of extra synchronization that we didn't intend to have.

Instead, this commit makes it so that we leave WSI memory objects with
EXEC_OBJECT_ASYNC most of the time and only unset EXEC_OBJECT_ASYNC and
set EXEC_OBJECT_WRITE in the dummy execbuf that we do as part of
vkQueuePresent.  This should hopefully result in tighter integration
with the compositor, lower latency, and better performance.

Testing with DOOM 2016, this seems to reduce latency by at least a frame
if not two and makes the game much more responsive.  Testing was,
however, subjective, so we don't have any hard data on that.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
This commit is contained in:
Jason Ekstrand
2019-11-21 06:10:32 -06:00
parent 6ebf677cfd
commit ccb7d606f1
2 changed files with 18 additions and 15 deletions

View File

@@ -702,6 +702,7 @@ anv_queue_submit(struct anv_queue *queue,
const VkSemaphore *out_semaphores,
const uint64_t *out_values,
uint32_t num_out_semaphores,
struct anv_bo *wsi_signal_bo,
VkFence _fence)
{
ANV_FROM_HANDLE(anv_fence, fence, _fence);
@@ -829,6 +830,12 @@ anv_queue_submit(struct anv_queue *queue,
}
}
if (wsi_signal_bo) {
result = anv_queue_submit_add_fence_bo(submit, wsi_signal_bo, true /* signal */);
if (result != VK_SUCCESS)
goto error;
}
if (fence) {
/* Under most circumstances, out fences won't be temporary. However,
* the spec does allow it for opaque_fd. From the Vulkan 1.0.53 spec:
@@ -923,7 +930,8 @@ VkResult anv_QueueSubmit(
* come up with something more efficient but this shouldn't be a
* common case.
*/
result = anv_queue_submit(queue, NULL, NULL, NULL, 0, NULL, NULL, 0, fence);
result = anv_queue_submit(queue, NULL, NULL, NULL, 0, NULL, NULL, 0,
NULL, fence);
goto out;
}
@@ -931,6 +939,13 @@ VkResult anv_QueueSubmit(
/* Fence for this submit. NULL for all but the last one */
VkFence submit_fence = (i == submitCount - 1) ? fence : VK_NULL_HANDLE;
const struct wsi_memory_signal_submit_info *mem_signal_info =
vk_find_struct_const(pSubmits[i].pNext,
WSI_MEMORY_SIGNAL_SUBMIT_INFO_MESA);
struct anv_bo *wsi_signal_bo =
mem_signal_info && mem_signal_info->memory != VK_NULL_HANDLE ?
anv_device_memory_from_handle(mem_signal_info->memory)->bo : NULL;
const VkTimelineSemaphoreSubmitInfoKHR *timeline_info =
vk_find_struct_const(pSubmits[i].pNext,
TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR);
@@ -954,6 +969,7 @@ VkResult anv_QueueSubmit(
pSubmits[i].pSignalSemaphores,
signal_values,
pSubmits[i].signalSemaphoreCount,
wsi_signal_bo,
submit_fence);
if (result != VK_SUCCESS)
goto out;
@@ -992,7 +1008,7 @@ VkResult anv_QueueSubmit(
result = anv_queue_submit(queue, cmd_buffer,
in_semaphores, in_values, num_in_semaphores,
out_semaphores, out_values, num_out_semaphores,
execbuf_fence);
wsi_signal_bo, execbuf_fence);
if (result != VK_SUCCESS)
goto out;
}