anv: implement shareable timeline semaphores
This implements timeline semaphores using a new type of dma-fence stored into drm-syncobjs. We use a thread to implement delayed submissions. v2: Drop cloning of temporary semaphores and just transfer their ownership (Jason) Drain queue when dealing with binary semaphore Ensure we don't submit to the thread as long as we don't need to v3: Use __u64 not uintptr_t for kernel pointers Fix commented code for INTEL_DEBUG=bat Set DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES in timeline fence execbuf extension Add new anv_queue_set_lost() Drop multi queue stuff meant for the fake multi queue patch Rework temporary syncobj handling Don't use syncobj when not available (DeviceWaitIdle/CreateDevice) Use ANV_MULTIALLOC And a few more tweaks... v4: Drop drained condition helper (Lionel) Fix missing EXEC_OBJECT_WRITE on BOs we want to wait on (Jason) v5: Add missing device->lost_reported in _anv_device_report_lost (Lionel) Fix missing free on submit->simple_bo (Lionel) Don't drop setting the device in lost state on QueueSubmit error (Jason) Store submit->fence_bos as an array of uintptr_t (Jason) v6: condition device->has_thread_submit to i915 & core DRM support (Jason) v7: Fix submit->in_fence leakage on error (Jason) Keep dummy semaphore with no thread submission (Jason) v8: Move ownership of submit->out_fence to submit (Jason) v9: Don't forget to read the VkFence's syncobj binary payload (Lionel) v10: Take the mutex lock on anv_gem_close() (Jason/Lionel) v11: Fix void* -> u64 cast on 32bit (Lionel) v12: Rebase after BO backed timeline semaphore (Lionel) v13: Fix missing snippets lost after rebase (Lionel) v14: Drop update_binary usage (Lionel) v15: Use ANV_MULTIALLOC (Lionel) v16: Fix some realloc issues (Ivan) Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Jason Ekstrand <jason@jlekstrand.net> (v8) Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/2901>
This commit is contained in:

committed by
Marge Bot

parent
a965ffad21
commit
829699ba63
@@ -95,11 +95,16 @@ anv_queue_submit_free(struct anv_device *device,
|
||||
for (uint32_t i = 0; i < submit->sync_fd_semaphore_count; i++)
|
||||
anv_semaphore_unref(device, submit->sync_fd_semaphores[i]);
|
||||
/* Execbuf does not consume the in_fence. It's our job to close it. */
|
||||
if (submit->in_fence != -1)
|
||||
if (submit->in_fence != -1) {
|
||||
assert(!device->has_thread_submit);
|
||||
close(submit->in_fence);
|
||||
if (submit->out_fence != -1)
|
||||
}
|
||||
if (submit->out_fence != -1) {
|
||||
assert(!device->has_thread_submit);
|
||||
close(submit->out_fence);
|
||||
}
|
||||
vk_free(alloc, submit->fences);
|
||||
vk_free(alloc, submit->fence_values);
|
||||
vk_free(alloc, submit->temporary_semaphores);
|
||||
vk_free(alloc, submit->wait_timelines);
|
||||
vk_free(alloc, submit->wait_timeline_values);
|
||||
@@ -349,6 +354,98 @@ anv_device_submit_deferred_locked(struct anv_device *device)
|
||||
return anv_queue_submit_deferred_locked(&device->queue, &advance);
|
||||
}
|
||||
|
||||
static void
|
||||
anv_queue_submit_signal_fences(struct anv_device *device,
|
||||
struct anv_queue_submit *submit)
|
||||
{
|
||||
for (uint32_t i = 0; i < submit->fence_count; i++) {
|
||||
if (submit->fences[i].flags & I915_EXEC_FENCE_SIGNAL) {
|
||||
anv_gem_syncobj_timeline_signal(device, &submit->fences[i].handle,
|
||||
&submit->fence_values[i], 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void *
|
||||
anv_queue_task(void *_queue)
|
||||
{
|
||||
struct anv_queue *queue = _queue;
|
||||
|
||||
pthread_mutex_lock(&queue->mutex);
|
||||
|
||||
while (!queue->quit) {
|
||||
while (!list_is_empty(&queue->queued_submits)) {
|
||||
struct anv_queue_submit *submit =
|
||||
list_first_entry(&queue->queued_submits, struct anv_queue_submit, link);
|
||||
list_del(&submit->link);
|
||||
|
||||
pthread_mutex_unlock(&queue->mutex);
|
||||
|
||||
VkResult result = VK_ERROR_DEVICE_LOST;
|
||||
|
||||
/* Wait for timeline points to materialize before submitting. We need
|
||||
* to do this because we're using threads to do the submit to i915.
|
||||
* We could end up in a situation where the application submits to 2
|
||||
* queues with the first submit creating the dma-fence for the
|
||||
* second. But because the scheduling of the submission threads might
|
||||
* wakeup the second queue thread first, this would make that execbuf
|
||||
* fail because the dma-fence it depends on hasn't materialized yet.
|
||||
*/
|
||||
if (!queue->lost && submit->wait_timeline_count > 0) {
|
||||
int ret = queue->device->no_hw ? 0 :
|
||||
anv_gem_syncobj_timeline_wait(
|
||||
queue->device, submit->wait_timeline_syncobjs,
|
||||
submit->wait_timeline_values, submit->wait_timeline_count,
|
||||
anv_get_absolute_timeout(UINT64_MAX) /* wait forever */,
|
||||
true /* wait for all */, true /* wait for materialize */);
|
||||
if (ret) {
|
||||
result = anv_queue_set_lost(queue, "timeline timeout: %s",
|
||||
strerror(errno));
|
||||
}
|
||||
}
|
||||
|
||||
/* Now submit */
|
||||
if (!queue->lost) {
|
||||
pthread_mutex_lock(&queue->device->mutex);
|
||||
result = anv_queue_execbuf_locked(queue, submit);
|
||||
pthread_mutex_unlock(&queue->device->mutex);
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < submit->sync_fd_semaphore_count; i++) {
|
||||
struct anv_semaphore *semaphore = submit->sync_fd_semaphores[i];
|
||||
/* Out fences can't have temporary state because that would imply
|
||||
* that we imported a sync file and are trying to signal it.
|
||||
*/
|
||||
assert(semaphore->temporary.type == ANV_SEMAPHORE_TYPE_NONE);
|
||||
struct anv_semaphore_impl *impl = &semaphore->permanent;
|
||||
|
||||
assert(impl->type == ANV_SEMAPHORE_TYPE_SYNC_FILE);
|
||||
impl->fd = dup(submit->out_fence);
|
||||
}
|
||||
|
||||
if (result != VK_SUCCESS) {
|
||||
/* vkQueueSubmit or some other entry point will report the
|
||||
* DEVICE_LOST error at some point, but until we have emptied our
|
||||
* list of execbufs we need to wake up all potential the waiters
|
||||
* until one of them spots the error.
|
||||
*/
|
||||
anv_queue_submit_signal_fences(queue->device, submit);
|
||||
}
|
||||
|
||||
anv_queue_submit_free(queue->device, submit);
|
||||
|
||||
pthread_mutex_lock(&queue->mutex);
|
||||
}
|
||||
|
||||
if (!queue->quit)
|
||||
pthread_cond_wait(&queue->cond, &queue->mutex);
|
||||
}
|
||||
|
||||
pthread_mutex_unlock(&queue->mutex);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static VkResult
|
||||
_anv_queue_submit(struct anv_queue *queue, struct anv_queue_submit **_submit,
|
||||
bool flush_queue)
|
||||
@@ -360,42 +457,92 @@ _anv_queue_submit(struct anv_queue *queue, struct anv_queue_submit **_submit,
|
||||
* anv_queue.
|
||||
*/
|
||||
*_submit = NULL;
|
||||
if (queue->device->has_thread_submit) {
|
||||
pthread_mutex_lock(&queue->mutex);
|
||||
pthread_cond_broadcast(&queue->cond);
|
||||
list_addtail(&submit->link, &queue->queued_submits);
|
||||
pthread_mutex_unlock(&queue->mutex);
|
||||
return VK_SUCCESS;
|
||||
} else {
|
||||
pthread_mutex_lock(&queue->device->mutex);
|
||||
list_addtail(&submit->link, &queue->queued_submits);
|
||||
VkResult result = anv_device_submit_deferred_locked(queue->device);
|
||||
if (flush_queue) {
|
||||
while (result == VK_SUCCESS && !list_is_empty(&queue->queued_submits)) {
|
||||
int ret = pthread_cond_wait(&queue->device->queue_submit,
|
||||
&queue->device->mutex);
|
||||
if (ret != 0) {
|
||||
result = anv_device_set_lost(queue->device, "wait timeout");
|
||||
break;
|
||||
}
|
||||
|
||||
pthread_mutex_lock(&queue->device->mutex);
|
||||
list_addtail(&submit->link, &queue->queued_submits);
|
||||
VkResult result = anv_device_submit_deferred_locked(queue->device);
|
||||
if (flush_queue) {
|
||||
while (result == VK_SUCCESS && !list_is_empty(&queue->queued_submits)) {
|
||||
int ret = pthread_cond_wait(&queue->device->queue_submit,
|
||||
&queue->device->mutex);
|
||||
if (ret != 0) {
|
||||
result = anv_device_set_lost(queue->device, "wait timeout");
|
||||
break;
|
||||
result = anv_device_submit_deferred_locked(queue->device);
|
||||
}
|
||||
|
||||
result = anv_device_submit_deferred_locked(queue->device);
|
||||
}
|
||||
pthread_mutex_unlock(&queue->device->mutex);
|
||||
return result;
|
||||
}
|
||||
pthread_mutex_unlock(&queue->device->mutex);
|
||||
return result;
|
||||
}
|
||||
|
||||
VkResult
|
||||
anv_queue_init(struct anv_device *device, struct anv_queue *queue)
|
||||
{
|
||||
vk_object_base_init(&device->vk, &queue->base, VK_OBJECT_TYPE_QUEUE);
|
||||
VkResult result;
|
||||
|
||||
queue->device = device;
|
||||
queue->flags = 0;
|
||||
queue->lost = false;
|
||||
queue->quit = false;
|
||||
|
||||
list_inithead(&queue->queued_submits);
|
||||
|
||||
/* We only need those additional thread/mutex when using a thread for
|
||||
* submission.
|
||||
*/
|
||||
if (device->has_thread_submit) {
|
||||
if (pthread_mutex_init(&queue->mutex, NULL) != 0)
|
||||
return vk_error(VK_ERROR_INITIALIZATION_FAILED);
|
||||
|
||||
if (pthread_cond_init(&queue->cond, NULL) != 0) {
|
||||
result = vk_error(VK_ERROR_INITIALIZATION_FAILED);
|
||||
goto fail_mutex;
|
||||
}
|
||||
if (pthread_create(&queue->thread, NULL, anv_queue_task, queue)) {
|
||||
result = vk_error(VK_ERROR_INITIALIZATION_FAILED);
|
||||
goto fail_cond;
|
||||
}
|
||||
}
|
||||
|
||||
vk_object_base_init(&device->vk, &queue->base, VK_OBJECT_TYPE_QUEUE);
|
||||
|
||||
return VK_SUCCESS;
|
||||
|
||||
fail_cond:
|
||||
pthread_cond_destroy(&queue->cond);
|
||||
fail_mutex:
|
||||
pthread_mutex_destroy(&queue->mutex);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void
|
||||
anv_queue_finish(struct anv_queue *queue)
|
||||
{
|
||||
vk_object_base_finish(&queue->base);
|
||||
|
||||
if (!queue->device->has_thread_submit)
|
||||
return;
|
||||
|
||||
pthread_mutex_lock(&queue->mutex);
|
||||
pthread_cond_broadcast(&queue->cond);
|
||||
queue->quit = true;
|
||||
pthread_mutex_unlock(&queue->mutex);
|
||||
|
||||
void *ret;
|
||||
pthread_join(queue->thread, &ret);
|
||||
|
||||
pthread_cond_destroy(&queue->cond);
|
||||
pthread_mutex_destroy(&queue->mutex);
|
||||
}
|
||||
|
||||
static VkResult
|
||||
@@ -427,10 +574,42 @@ anv_queue_submit_add_fence_bo(struct anv_queue_submit *submit,
|
||||
static VkResult
|
||||
anv_queue_submit_add_syncobj(struct anv_queue_submit* submit,
|
||||
struct anv_device *device,
|
||||
uint32_t handle, uint32_t flags)
|
||||
uint32_t handle, uint32_t flags,
|
||||
uint64_t value)
|
||||
{
|
||||
assert(flags != 0);
|
||||
|
||||
if (device->has_thread_submit && (flags & I915_EXEC_FENCE_WAIT)) {
|
||||
if (submit->wait_timeline_count >= submit->wait_timeline_array_length) {
|
||||
uint32_t new_len = MAX2(submit->wait_timeline_array_length * 2, 64);
|
||||
|
||||
uint32_t *new_wait_timeline_syncobjs =
|
||||
vk_realloc(submit->alloc,
|
||||
submit->wait_timeline_syncobjs,
|
||||
new_len * sizeof(*submit->wait_timeline_syncobjs),
|
||||
8, submit->alloc_scope);
|
||||
if (new_wait_timeline_syncobjs == NULL)
|
||||
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
||||
submit->wait_timeline_syncobjs = new_wait_timeline_syncobjs;
|
||||
|
||||
uint64_t *new_wait_timeline_values =
|
||||
vk_realloc(submit->alloc,
|
||||
submit->wait_timeline_values, new_len * sizeof(*submit->wait_timeline_values),
|
||||
8, submit->alloc_scope);
|
||||
if (new_wait_timeline_values == NULL)
|
||||
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
||||
submit->wait_timeline_values = new_wait_timeline_values;
|
||||
submit->wait_timeline_array_length = new_len;
|
||||
}
|
||||
|
||||
submit->wait_timeline_syncobjs[submit->wait_timeline_count] = handle;
|
||||
submit->wait_timeline_values[submit->wait_timeline_count] = value;
|
||||
|
||||
submit->wait_timeline_count++;
|
||||
}
|
||||
|
||||
if (submit->fence_count >= submit->fence_array_length) {
|
||||
uint32_t new_len = MAX2(submit->fence_array_length * 2, 64);
|
||||
struct drm_i915_gem_exec_fence *new_fences =
|
||||
@@ -441,13 +620,24 @@ anv_queue_submit_add_syncobj(struct anv_queue_submit* submit,
|
||||
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
||||
submit->fences = new_fences;
|
||||
|
||||
uint64_t *new_fence_values =
|
||||
vk_realloc(submit->alloc,
|
||||
submit->fence_values, new_len * sizeof(*submit->fence_values),
|
||||
8, submit->alloc_scope);
|
||||
if (new_fence_values == NULL)
|
||||
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
||||
submit->fence_values = new_fence_values;
|
||||
submit->fence_array_length = new_len;
|
||||
}
|
||||
|
||||
submit->fences[submit->fence_count++] = (struct drm_i915_gem_exec_fence) {
|
||||
submit->fences[submit->fence_count] = (struct drm_i915_gem_exec_fence) {
|
||||
.handle = handle,
|
||||
.flags = flags,
|
||||
};
|
||||
submit->fence_values[submit->fence_count] = value;
|
||||
submit->fence_count++;
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
@@ -595,7 +785,7 @@ anv_queue_submit_simple_batch(struct anv_queue *queue,
|
||||
}
|
||||
|
||||
result = anv_queue_submit_add_syncobj(submit, device, syncobj,
|
||||
I915_EXEC_FENCE_SIGNAL);
|
||||
I915_EXEC_FENCE_SIGNAL, 0);
|
||||
} else {
|
||||
result = anv_device_alloc_bo(device, 4096,
|
||||
ANV_BO_ALLOC_EXTERNAL |
|
||||
@@ -742,7 +932,6 @@ anv_queue_submit(struct anv_queue *queue,
|
||||
submit->cmd_buffer = cmd_buffer;
|
||||
|
||||
VkResult result = VK_SUCCESS;
|
||||
|
||||
for (uint32_t i = 0; i < num_in_semaphores; i++) {
|
||||
ANV_FROM_HANDLE(anv_semaphore, semaphore, in_semaphores[i]);
|
||||
struct anv_semaphore_impl *impl;
|
||||
@@ -796,7 +985,8 @@ anv_queue_submit(struct anv_queue *queue,
|
||||
case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ: {
|
||||
result = anv_queue_submit_add_syncobj(submit, device,
|
||||
impl->syncobj,
|
||||
I915_EXEC_FENCE_WAIT);
|
||||
I915_EXEC_FENCE_WAIT,
|
||||
0);
|
||||
if (result != VK_SUCCESS)
|
||||
goto error;
|
||||
break;
|
||||
@@ -810,6 +1000,15 @@ anv_queue_submit(struct anv_queue *queue,
|
||||
goto error;
|
||||
break;
|
||||
|
||||
case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ_TIMELINE:
|
||||
result = anv_queue_submit_add_syncobj(submit, device,
|
||||
impl->syncobj,
|
||||
I915_EXEC_FENCE_WAIT,
|
||||
in_values ? in_values[i] : 0);
|
||||
if (result != VK_SUCCESS)
|
||||
goto error;
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
@@ -850,7 +1049,8 @@ anv_queue_submit(struct anv_queue *queue,
|
||||
|
||||
case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ: {
|
||||
result = anv_queue_submit_add_syncobj(submit, device, impl->syncobj,
|
||||
I915_EXEC_FENCE_SIGNAL);
|
||||
I915_EXEC_FENCE_SIGNAL,
|
||||
0);
|
||||
if (result != VK_SUCCESS)
|
||||
goto error;
|
||||
break;
|
||||
@@ -864,6 +1064,14 @@ anv_queue_submit(struct anv_queue *queue,
|
||||
goto error;
|
||||
break;
|
||||
|
||||
case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ_TIMELINE:
|
||||
result = anv_queue_submit_add_syncobj(submit, device, impl->syncobj,
|
||||
I915_EXEC_FENCE_SIGNAL,
|
||||
out_values ? out_values[i] : 0);
|
||||
if (result != VK_SUCCESS)
|
||||
goto error;
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
@@ -893,6 +1101,7 @@ anv_queue_submit(struct anv_queue *queue,
|
||||
|
||||
switch (impl->type) {
|
||||
case ANV_FENCE_TYPE_BO:
|
||||
assert(!device->has_thread_submit);
|
||||
result = anv_queue_submit_add_fence_bo(submit, impl->bo.bo, true /* signal */);
|
||||
if (result != VK_SUCCESS)
|
||||
goto error;
|
||||
@@ -904,8 +1113,11 @@ anv_queue_submit(struct anv_queue *queue,
|
||||
* also reset the fence's syncobj so that they don't contain a
|
||||
* signaled dma-fence.
|
||||
*/
|
||||
anv_gem_syncobj_reset(device, impl->syncobj);
|
||||
|
||||
result = anv_queue_submit_add_syncobj(submit, device, impl->syncobj,
|
||||
I915_EXEC_FENCE_SIGNAL);
|
||||
I915_EXEC_FENCE_SIGNAL,
|
||||
0);
|
||||
if (result != VK_SUCCESS)
|
||||
goto error;
|
||||
break;
|
||||
@@ -921,6 +1133,7 @@ anv_queue_submit(struct anv_queue *queue,
|
||||
goto error;
|
||||
|
||||
if (fence && fence->permanent.type == ANV_FENCE_TYPE_BO) {
|
||||
assert(!device->has_thread_submit);
|
||||
/* If we have permanent BO fence, the only type of temporary possible
|
||||
* would be BO_WSI (because BO fences are not shareable). The Vulkan spec
|
||||
* also requires that the fence passed to vkQueueSubmit() be :
|
||||
@@ -1291,16 +1504,34 @@ VkResult anv_GetFenceStatus(
|
||||
}
|
||||
|
||||
case ANV_FENCE_TYPE_SYNCOBJ: {
|
||||
int ret = anv_gem_syncobj_wait(device, &impl->syncobj, 1, 0, true);
|
||||
if (ret == -1) {
|
||||
if (errno == ETIME) {
|
||||
return VK_NOT_READY;
|
||||
if (device->has_thread_submit) {
|
||||
uint64_t binary_value = 0;
|
||||
int ret = anv_gem_syncobj_timeline_wait(device, &impl->syncobj,
|
||||
&binary_value, 1, 0,
|
||||
true /* wait_all */,
|
||||
false /* wait_materialize */);
|
||||
if (ret == -1) {
|
||||
if (errno == ETIME) {
|
||||
return VK_NOT_READY;
|
||||
} else {
|
||||
/* We don't know the real error. */
|
||||
return anv_device_set_lost(device, "drm_syncobj_wait failed: %m");
|
||||
}
|
||||
} else {
|
||||
/* We don't know the real error. */
|
||||
return anv_device_set_lost(device, "drm_syncobj_wait failed: %m");
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
} else {
|
||||
return VK_SUCCESS;
|
||||
int ret = anv_gem_syncobj_wait(device, &impl->syncobj, 1, 0, false);
|
||||
if (ret == -1) {
|
||||
if (errno == ETIME) {
|
||||
return VK_NOT_READY;
|
||||
} else {
|
||||
/* We don't know the real error. */
|
||||
return anv_device_set_lost(device, "drm_syncobj_wait failed: %m");
|
||||
}
|
||||
} else {
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1334,11 +1565,11 @@ anv_wait_for_syncobj_fences(struct anv_device *device,
|
||||
syncobjs[i] = impl->syncobj;
|
||||
}
|
||||
|
||||
int ret = 0;
|
||||
/* The gem_syncobj_wait ioctl may return early due to an inherent
|
||||
* limitation in the way it computes timeouts. Loop until we've actually
|
||||
* limitation in the way it computes timeouts. Loop until we've actually
|
||||
* passed the timeout.
|
||||
*/
|
||||
int ret;
|
||||
do {
|
||||
ret = anv_gem_syncobj_wait(device, syncobjs, fenceCount,
|
||||
abs_timeout_ns, waitAll);
|
||||
@@ -1496,6 +1727,8 @@ anv_wait_for_fences(struct anv_device *device,
|
||||
|
||||
switch (impl->type) {
|
||||
case ANV_FENCE_TYPE_BO:
|
||||
assert(!device->physical->has_syncobj_wait);
|
||||
/* fall-through */
|
||||
case ANV_FENCE_TYPE_WSI_BO:
|
||||
result = anv_wait_for_bo_fences(device, 1, &pFences[i],
|
||||
true, abs_timeout);
|
||||
@@ -1695,6 +1928,31 @@ VkResult anv_ImportFenceFdKHR(
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
/* The sideband payload of the DRM syncobj was incremented when the
|
||||
* application called vkQueueSubmit(). Here we wait for a fence with the same
|
||||
* value to materialize so that we can exporting (typically as a SyncFD).
|
||||
*/
|
||||
static VkResult
|
||||
wait_syncobj_materialize(struct anv_device *device,
|
||||
uint32_t syncobj,
|
||||
int *fd)
|
||||
{
|
||||
if (!device->has_thread_submit)
|
||||
return VK_SUCCESS;
|
||||
|
||||
uint64_t binary_value = 0;
|
||||
/* We might need to wait until the fence materializes before we can
|
||||
* export to a sync FD when we use a thread for submission.
|
||||
*/
|
||||
if (anv_gem_syncobj_timeline_wait(device, &syncobj, &binary_value, 1,
|
||||
anv_get_absolute_timeout(5ull * NSEC_PER_SEC),
|
||||
true /* wait_all */,
|
||||
true /* wait_materialize */))
|
||||
return anv_device_set_lost(device, "anv_gem_syncobj_timeline_wait failed: %m");
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
VkResult anv_GetFenceFdKHR(
|
||||
VkDevice _device,
|
||||
const VkFenceGetFdInfoKHR* pGetFdInfo,
|
||||
@@ -1721,6 +1979,10 @@ VkResult anv_GetFenceFdKHR(
|
||||
}
|
||||
|
||||
case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT: {
|
||||
VkResult result = wait_syncobj_materialize(device, impl->syncobj, pFd);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
int fd = anv_gem_syncobj_export_sync_file(device, impl->syncobj);
|
||||
if (fd < 0)
|
||||
return vk_error(VK_ERROR_TOO_MANY_OBJECTS);
|
||||
@@ -1794,8 +2056,24 @@ timeline_semaphore_create(struct anv_device *device,
|
||||
struct anv_semaphore_impl *impl,
|
||||
uint64_t initial_value)
|
||||
{
|
||||
impl->type = ANV_SEMAPHORE_TYPE_TIMELINE;
|
||||
anv_timeline_init(device, &impl->timeline, initial_value);
|
||||
if (device->has_thread_submit) {
|
||||
impl->type = ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ_TIMELINE;
|
||||
impl->syncobj = anv_gem_syncobj_create(device, 0);
|
||||
if (!impl->syncobj)
|
||||
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
if (initial_value) {
|
||||
if (anv_gem_syncobj_timeline_signal(device,
|
||||
&impl->syncobj,
|
||||
&initial_value, 1)) {
|
||||
anv_gem_syncobj_destroy(device, impl->syncobj);
|
||||
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
impl->type = ANV_SEMAPHORE_TYPE_TIMELINE;
|
||||
anv_timeline_init(device, &impl->timeline, initial_value);
|
||||
}
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
@@ -1824,7 +2102,7 @@ VkResult anv_CreateSemaphore(
|
||||
|
||||
const VkExportSemaphoreCreateInfo *export =
|
||||
vk_find_struct_const(pCreateInfo->pNext, EXPORT_SEMAPHORE_CREATE_INFO);
|
||||
VkExternalSemaphoreHandleTypeFlags handleTypes =
|
||||
VkExternalSemaphoreHandleTypeFlags handleTypes =
|
||||
export ? export->handleTypes : 0;
|
||||
VkResult result;
|
||||
|
||||
@@ -1839,8 +2117,10 @@ VkResult anv_CreateSemaphore(
|
||||
}
|
||||
} else if (handleTypes & VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT) {
|
||||
assert(handleTypes == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT);
|
||||
assert(sem_type == VK_SEMAPHORE_TYPE_BINARY_KHR);
|
||||
result = binary_semaphore_create(device, &semaphore->permanent, true);
|
||||
if (sem_type == VK_SEMAPHORE_TYPE_BINARY_KHR)
|
||||
result = binary_semaphore_create(device, &semaphore->permanent, true);
|
||||
else
|
||||
result = timeline_semaphore_create(device, &semaphore->permanent, timeline_value);
|
||||
if (result != VK_SUCCESS) {
|
||||
vk_free2(&device->vk.alloc, pAllocator, semaphore);
|
||||
return result;
|
||||
@@ -1897,6 +2177,7 @@ anv_semaphore_impl_cleanup(struct anv_device *device,
|
||||
break;
|
||||
|
||||
case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ:
|
||||
case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ_TIMELINE:
|
||||
anv_gem_syncobj_destroy(device, impl->syncobj);
|
||||
break;
|
||||
|
||||
@@ -1964,8 +2245,10 @@ void anv_GetPhysicalDeviceExternalSemaphoreProperties(
|
||||
|
||||
switch (pExternalSemaphoreInfo->handleType) {
|
||||
case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT:
|
||||
/* Timeline semaphores are not exportable. */
|
||||
if (sem_type == VK_SEMAPHORE_TYPE_TIMELINE_KHR)
|
||||
/* Timeline semaphores are not exportable, unless we have threaded
|
||||
* submission.
|
||||
*/
|
||||
if (sem_type == VK_SEMAPHORE_TYPE_TIMELINE_KHR && !device->has_thread_submit)
|
||||
break;
|
||||
pExternalSemaphoreProperties->exportFromImportedHandleTypes =
|
||||
VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT;
|
||||
@@ -2014,7 +2297,15 @@ VkResult anv_ImportSemaphoreFdKHR(
|
||||
switch (pImportSemaphoreFdInfo->handleType) {
|
||||
case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT:
|
||||
if (device->physical->has_syncobj) {
|
||||
new_impl.type = ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ;
|
||||
/* When importing non temporarily, reuse the semaphore's existing
|
||||
* type. The Linux/DRM implementation allows to interchangeably use
|
||||
* binary & timeline semaphores and we have no way to differenciate
|
||||
* them.
|
||||
*/
|
||||
if (pImportSemaphoreFdInfo->flags & VK_SEMAPHORE_IMPORT_TEMPORARY_BIT)
|
||||
new_impl.type = ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ;
|
||||
else
|
||||
new_impl.type = semaphore->permanent.type;
|
||||
|
||||
new_impl.syncobj = anv_gem_syncobj_fd_to_handle(device, fd);
|
||||
if (!new_impl.syncobj)
|
||||
@@ -2168,9 +2459,13 @@ VkResult anv_GetSemaphoreFdKHR(
|
||||
}
|
||||
|
||||
case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ:
|
||||
if (pGetFdInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT)
|
||||
if (pGetFdInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT) {
|
||||
VkResult result = wait_syncobj_materialize(device, impl->syncobj, pFd);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
fd = anv_gem_syncobj_export_sync_file(device, impl->syncobj);
|
||||
else {
|
||||
} else {
|
||||
assert(pGetFdInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT);
|
||||
fd = anv_gem_syncobj_handle_to_fd(device, impl->syncobj);
|
||||
}
|
||||
@@ -2179,6 +2474,14 @@ VkResult anv_GetSemaphoreFdKHR(
|
||||
*pFd = fd;
|
||||
break;
|
||||
|
||||
case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ_TIMELINE:
|
||||
assert(pGetFdInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT);
|
||||
fd = anv_gem_syncobj_handle_to_fd(device, impl->syncobj);
|
||||
if (fd < 0)
|
||||
return vk_error(VK_ERROR_TOO_MANY_OBJECTS);
|
||||
*pFd = fd;
|
||||
break;
|
||||
|
||||
default:
|
||||
return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE);
|
||||
}
|
||||
@@ -2217,6 +2520,15 @@ VkResult anv_GetSemaphoreCounterValue(
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ_TIMELINE: {
|
||||
int ret = anv_gem_syncobj_timeline_query(device, &impl->syncobj, pValue, 1);
|
||||
|
||||
if (ret != 0)
|
||||
return anv_device_set_lost(device, "unable to query timeline syncobj");
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
default:
|
||||
unreachable("Invalid semaphore type");
|
||||
}
|
||||
@@ -2236,8 +2548,8 @@ anv_timeline_wait_locked(struct anv_device *device,
|
||||
.tv_nsec = abs_timeout_ns % NSEC_PER_SEC,
|
||||
};
|
||||
|
||||
int ret = pthread_cond_timedwait(&device->queue_submit,
|
||||
&device->mutex, &abstime);
|
||||
UNUSED int ret = pthread_cond_timedwait(&device->queue_submit,
|
||||
&device->mutex, &abstime);
|
||||
assert(ret != EINVAL);
|
||||
if (anv_gettime_ns() >= abs_timeout_ns &&
|
||||
timeline->highest_pending < serial)
|
||||
@@ -2336,25 +2648,23 @@ VkResult anv_WaitSemaphores(
|
||||
uint64_t timeout)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_device, device, _device);
|
||||
uint32_t *handles;
|
||||
struct anv_timeline **timelines;
|
||||
uint64_t *values;
|
||||
|
||||
if (device->no_hw)
|
||||
return VK_SUCCESS;
|
||||
ANV_MULTIALLOC(ma);
|
||||
|
||||
struct anv_timeline **timelines =
|
||||
vk_alloc(&device->vk.alloc,
|
||||
pWaitInfo->semaphoreCount * sizeof(*timelines),
|
||||
8, VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
|
||||
if (!timelines)
|
||||
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
||||
uint64_t *values = vk_alloc(&device->vk.alloc,
|
||||
pWaitInfo->semaphoreCount * sizeof(*values),
|
||||
8, VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
|
||||
if (!values) {
|
||||
vk_free(&device->vk.alloc, timelines);
|
||||
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
anv_multialloc_add(&ma, &values, pWaitInfo->semaphoreCount);
|
||||
if (device->has_thread_submit) {
|
||||
anv_multialloc_add(&ma, &handles, pWaitInfo->semaphoreCount);
|
||||
} else {
|
||||
anv_multialloc_add(&ma, &timelines, pWaitInfo->semaphoreCount);
|
||||
}
|
||||
|
||||
if (!anv_multialloc_alloc(&ma, &device->vk.alloc,
|
||||
VK_SYSTEM_ALLOCATION_SCOPE_COMMAND))
|
||||
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
||||
uint32_t handle_count = 0;
|
||||
for (uint32_t i = 0; i < pWaitInfo->semaphoreCount; i++) {
|
||||
ANV_FROM_HANDLE(anv_semaphore, semaphore, pWaitInfo->pSemaphores[i]);
|
||||
@@ -2362,24 +2672,40 @@ VkResult anv_WaitSemaphores(
|
||||
semaphore->temporary.type != ANV_SEMAPHORE_TYPE_NONE ?
|
||||
&semaphore->temporary : &semaphore->permanent;
|
||||
|
||||
assert(impl->type == ANV_SEMAPHORE_TYPE_TIMELINE);
|
||||
|
||||
if (pWaitInfo->pValues[i] == 0)
|
||||
continue;
|
||||
|
||||
timelines[handle_count] = &impl->timeline;
|
||||
if (device->has_thread_submit) {
|
||||
assert(impl->type == ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ_TIMELINE);
|
||||
handles[handle_count] = impl->syncobj;
|
||||
} else {
|
||||
assert(impl->type == ANV_SEMAPHORE_TYPE_TIMELINE);
|
||||
timelines[handle_count] = &impl->timeline;
|
||||
}
|
||||
values[handle_count] = pWaitInfo->pValues[i];
|
||||
handle_count++;
|
||||
}
|
||||
|
||||
VkResult result = VK_SUCCESS;
|
||||
if (handle_count > 0) {
|
||||
result = anv_timelines_wait(device, timelines, values, handle_count,
|
||||
!(pWaitInfo->flags & VK_SEMAPHORE_WAIT_ANY_BIT_KHR),
|
||||
anv_get_absolute_timeout(timeout));
|
||||
if (device->has_thread_submit) {
|
||||
int ret =
|
||||
anv_gem_syncobj_timeline_wait(device,
|
||||
handles, values, handle_count,
|
||||
anv_get_absolute_timeout(timeout),
|
||||
!(pWaitInfo->flags & VK_SEMAPHORE_WAIT_ANY_BIT_KHR),
|
||||
false);
|
||||
if (ret != 0)
|
||||
result = errno == ETIME ? VK_TIMEOUT :
|
||||
anv_device_set_lost(device, "unable to wait on timeline syncobj");
|
||||
} else {
|
||||
result =
|
||||
anv_timelines_wait(device, timelines, values, handle_count,
|
||||
!(pWaitInfo->flags & VK_SEMAPHORE_WAIT_ANY_BIT_KHR),
|
||||
anv_get_absolute_timeout(timeout));
|
||||
}
|
||||
}
|
||||
|
||||
vk_free(&device->vk.alloc, timelines);
|
||||
vk_free(&device->vk.alloc, values);
|
||||
|
||||
return result;
|
||||
@@ -2414,6 +2740,20 @@ VkResult anv_SignalSemaphore(
|
||||
return result;
|
||||
}
|
||||
|
||||
case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ_TIMELINE: {
|
||||
/* Timeline semaphores are created with a value of 0, so signaling on 0
|
||||
* is a waste of time.
|
||||
*/
|
||||
if (pSignalInfo->value == 0)
|
||||
return VK_SUCCESS;
|
||||
|
||||
int ret = anv_gem_syncobj_timeline_signal(device, &impl->syncobj,
|
||||
&pSignalInfo->value, 1);
|
||||
|
||||
return ret == 0 ? VK_SUCCESS :
|
||||
anv_device_set_lost(device, "unable to signal timeline syncobj");
|
||||
}
|
||||
|
||||
default:
|
||||
unreachable("Invalid semaphore type");
|
||||
}
|
||||
|
Reference in New Issue
Block a user