anv: Convert to the common sync and submit framework
This is, unfortunately, a large flag-day mega-commit. However, any other approach would likely be fragile and involve a lot more churn as we try to plumb the new vk_fence and vk_semaphore primitives into ANV's submit code before we delete it all. Instead, we do it all in one go and accept the consequences. While this should be mostly functionally equivalent to the previous code, there is one potential perf-affecting change. The command buffer chaining optimization no longer works across VkSubmitInfo structs. Within a single VkSubmitInfo, we will attempt to chain all the command buffers together but we no longer try to chain across a VkSubmitInfo boundary. Hopefully, this isn't a significant perf problem. If it ever is, we'll have to teach the core runtime code how to combine two or more VkSubmitInfos into a single vk_queue_submit. Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13427>
This commit is contained in:
@@ -845,7 +845,7 @@ anv_AcquireImageANDROID(
|
||||
.handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT,
|
||||
.fd = semaphore_fd,
|
||||
};
|
||||
result = anv_ImportSemaphoreFdKHR(device_h, &info);
|
||||
result = vk_common_ImportSemaphoreFdKHR(device_h, &info);
|
||||
if (result == VK_SUCCESS)
|
||||
semaphore_fd = -1; /* ANV took ownership */
|
||||
}
|
||||
@@ -858,7 +858,7 @@ anv_AcquireImageANDROID(
|
||||
.handleType = VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT,
|
||||
.fd = fence_fd,
|
||||
};
|
||||
result = anv_ImportFenceFdKHR(device_h, &info);
|
||||
result = vk_common_ImportFenceFdKHR(device_h, &info);
|
||||
if (result == VK_SUCCESS)
|
||||
fence_fd = -1; /* ANV took ownership */
|
||||
}
|
||||
|
@@ -1180,11 +1180,19 @@ struct anv_execbuf {
|
||||
/* Allocated length of the 'objects' and 'bos' arrays */
|
||||
uint32_t array_length;
|
||||
|
||||
uint32_t syncobj_count;
|
||||
uint32_t syncobj_array_length;
|
||||
struct drm_i915_gem_exec_fence * syncobjs;
|
||||
uint64_t * syncobj_values;
|
||||
|
||||
/* List of relocations for surface states, only used with platforms not
|
||||
* using softpin.
|
||||
*/
|
||||
void * surface_states_relocs;
|
||||
|
||||
uint32_t cmd_buffer_count;
|
||||
struct anv_query_pool *perf_query_pool;
|
||||
|
||||
/* Indicates whether any of the command buffers have relocations. This
|
||||
* doesn't not necessarily mean we'll need the kernel to process them. It
|
||||
* might be that a previous execbuf has already placed things in the VMA
|
||||
@@ -1559,6 +1567,101 @@ reset_cmd_buffer_surface_offsets(struct anv_cmd_buffer *cmd_buffer)
|
||||
cmd_buffer->surface_relocs.relocs[i].presumed_offset = -1;
|
||||
}
|
||||
|
||||
static VkResult
|
||||
anv_execbuf_add_syncobj(struct anv_device *device,
|
||||
struct anv_execbuf *exec,
|
||||
uint32_t syncobj,
|
||||
uint32_t flags,
|
||||
uint64_t timeline_value)
|
||||
{
|
||||
if (exec->syncobj_count >= exec->syncobj_array_length) {
|
||||
uint32_t new_len = MAX2(exec->syncobj_array_length * 2, 16);
|
||||
|
||||
struct drm_i915_gem_exec_fence *new_syncobjs =
|
||||
vk_alloc(exec->alloc, new_len * sizeof(*new_syncobjs),
|
||||
8, exec->alloc_scope);
|
||||
if (!new_syncobjs)
|
||||
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
||||
if (exec->syncobjs)
|
||||
typed_memcpy(new_syncobjs, exec->syncobjs, exec->syncobj_count);
|
||||
|
||||
exec->syncobjs = new_syncobjs;
|
||||
|
||||
if (exec->syncobj_values) {
|
||||
uint64_t *new_syncobj_values =
|
||||
vk_alloc(exec->alloc, new_len * sizeof(*new_syncobj_values),
|
||||
8, exec->alloc_scope);
|
||||
if (!new_syncobj_values)
|
||||
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
||||
typed_memcpy(new_syncobj_values, exec->syncobj_values,
|
||||
exec->syncobj_count);
|
||||
|
||||
exec->syncobj_values = new_syncobj_values;
|
||||
}
|
||||
|
||||
exec->syncobj_array_length = new_len;
|
||||
}
|
||||
|
||||
if (timeline_value && !exec->syncobj_values) {
|
||||
exec->syncobj_values =
|
||||
vk_zalloc(exec->alloc, exec->syncobj_array_length *
|
||||
sizeof(*exec->syncobj_values),
|
||||
8, exec->alloc_scope);
|
||||
if (!exec->syncobj_values)
|
||||
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
}
|
||||
|
||||
exec->syncobjs[exec->syncobj_count] = (struct drm_i915_gem_exec_fence) {
|
||||
.handle = syncobj,
|
||||
.flags = flags,
|
||||
};
|
||||
if (timeline_value)
|
||||
exec->syncobj_values[exec->syncobj_count] = timeline_value;
|
||||
|
||||
exec->syncobj_count++;
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
static VkResult
|
||||
anv_execbuf_add_sync(struct anv_device *device,
|
||||
struct anv_execbuf *execbuf,
|
||||
struct vk_sync *sync,
|
||||
bool is_signal,
|
||||
uint64_t value)
|
||||
{
|
||||
/* It's illegal to signal a timeline with value 0 because that's never
|
||||
* higher than the current value. A timeline wait on value 0 is always
|
||||
* trivial because 0 <= uint64_t always.
|
||||
*/
|
||||
if ((sync->flags & VK_SYNC_IS_TIMELINE) && value == 0)
|
||||
return VK_SUCCESS;
|
||||
|
||||
if (vk_sync_is_anv_bo_sync(sync)) {
|
||||
struct anv_bo_sync *bo_sync =
|
||||
container_of(sync, struct anv_bo_sync, sync);
|
||||
|
||||
assert(is_signal == (bo_sync->state == ANV_BO_SYNC_STATE_RESET));
|
||||
|
||||
return anv_execbuf_add_bo(device, execbuf, bo_sync->bo, NULL,
|
||||
is_signal ? EXEC_OBJECT_WRITE : 0);
|
||||
} else if (vk_sync_type_is_drm_syncobj(sync->type)) {
|
||||
struct vk_drm_syncobj *syncobj = vk_sync_as_drm_syncobj(sync);
|
||||
|
||||
if (!(sync->flags & VK_SYNC_IS_TIMELINE))
|
||||
value = 0;
|
||||
|
||||
return anv_execbuf_add_syncobj(device, execbuf, syncobj->syncobj,
|
||||
is_signal ? I915_EXEC_FENCE_SIGNAL :
|
||||
I915_EXEC_FENCE_WAIT,
|
||||
value);
|
||||
}
|
||||
|
||||
unreachable("Invalid sync type");
|
||||
}
|
||||
|
||||
static VkResult
|
||||
setup_execbuf_for_cmd_buffer(struct anv_execbuf *execbuf,
|
||||
struct anv_cmd_buffer *cmd_buffer)
|
||||
@@ -1873,16 +1976,23 @@ setup_empty_execbuf(struct anv_execbuf *execbuf, struct anv_queue *queue)
|
||||
* pool resize only rarely happen, this will almost never be contended so
|
||||
* taking a lock isn't really an expensive operation in this case.
|
||||
*/
|
||||
VkResult
|
||||
anv_queue_execbuf_locked(struct anv_queue *queue,
|
||||
struct anv_queue_submit *submit)
|
||||
static VkResult
|
||||
anv_queue_exec_locked(struct anv_queue *queue,
|
||||
uint32_t wait_count,
|
||||
const struct vk_sync_wait *waits,
|
||||
uint32_t cmd_buffer_count,
|
||||
struct anv_cmd_buffer **cmd_buffers,
|
||||
uint32_t signal_count,
|
||||
const struct vk_sync_signal *signals,
|
||||
struct anv_query_pool *perf_query_pool,
|
||||
uint32_t perf_query_pass)
|
||||
{
|
||||
struct anv_device *device = queue->device;
|
||||
struct anv_execbuf execbuf;
|
||||
anv_execbuf_init(&execbuf);
|
||||
execbuf.alloc = submit->alloc;
|
||||
execbuf.alloc_scope = submit->alloc_scope;
|
||||
execbuf.perf_query_pass = submit->perf_query_pass;
|
||||
execbuf.alloc = &queue->device->vk.alloc;
|
||||
execbuf.alloc_scope = VK_SYSTEM_ALLOCATION_SCOPE_DEVICE;
|
||||
execbuf.perf_query_pass = perf_query_pass;
|
||||
|
||||
/* Always add the workaround BO as it includes a driver identifier for the
|
||||
* error_state.
|
||||
@@ -1892,34 +2002,28 @@ anv_queue_execbuf_locked(struct anv_queue *queue,
|
||||
if (result != VK_SUCCESS)
|
||||
goto error;
|
||||
|
||||
for (uint32_t i = 0; i < submit->fence_bo_count; i++) {
|
||||
int signaled;
|
||||
struct anv_bo *bo = anv_unpack_ptr(submit->fence_bos[i], 1, &signaled);
|
||||
|
||||
result = anv_execbuf_add_bo(device, &execbuf, bo, NULL,
|
||||
signaled ? EXEC_OBJECT_WRITE : 0);
|
||||
for (uint32_t i = 0; i < wait_count; i++) {
|
||||
result = anv_execbuf_add_sync(device, &execbuf,
|
||||
waits[i].sync,
|
||||
false /* is_signal */,
|
||||
waits[i].wait_value);
|
||||
if (result != VK_SUCCESS)
|
||||
goto error;
|
||||
}
|
||||
|
||||
if (submit->cmd_buffer_count) {
|
||||
result = setup_execbuf_for_cmd_buffers(&execbuf, queue,
|
||||
submit->cmd_buffers,
|
||||
submit->cmd_buffer_count);
|
||||
} else if (submit->simple_bo) {
|
||||
result = anv_execbuf_add_bo(device, &execbuf, submit->simple_bo, NULL, 0);
|
||||
for (uint32_t i = 0; i < signal_count; i++) {
|
||||
result = anv_execbuf_add_sync(device, &execbuf,
|
||||
signals[i].sync,
|
||||
true /* is_signal */,
|
||||
signals[i].signal_value);
|
||||
if (result != VK_SUCCESS)
|
||||
goto error;
|
||||
}
|
||||
|
||||
execbuf.execbuf = (struct drm_i915_gem_execbuffer2) {
|
||||
.buffers_ptr = (uintptr_t) execbuf.objects,
|
||||
.buffer_count = execbuf.bo_count,
|
||||
.batch_start_offset = 0,
|
||||
.batch_len = submit->simple_bo_size,
|
||||
.flags = I915_EXEC_HANDLE_LUT | queue->exec_flags | I915_EXEC_NO_RELOC,
|
||||
.rsvd1 = device->context_id,
|
||||
.rsvd2 = 0,
|
||||
};
|
||||
if (cmd_buffer_count) {
|
||||
result = setup_execbuf_for_cmd_buffers(&execbuf, queue,
|
||||
cmd_buffers,
|
||||
cmd_buffer_count);
|
||||
} else {
|
||||
result = setup_empty_execbuf(&execbuf, queue);
|
||||
}
|
||||
@@ -1928,9 +2032,7 @@ anv_queue_execbuf_locked(struct anv_queue *queue,
|
||||
goto error;
|
||||
|
||||
const bool has_perf_query =
|
||||
submit->perf_query_pass >= 0 &&
|
||||
submit->cmd_buffer_count &&
|
||||
submit->perf_query_pool;
|
||||
perf_query_pool && perf_query_pass >= 0 && cmd_buffer_count;
|
||||
|
||||
if (INTEL_DEBUG(DEBUG_SUBMIT)) {
|
||||
fprintf(stderr, "Batch offset=0x%x len=0x%x on queue 0\n",
|
||||
@@ -1945,30 +2047,25 @@ anv_queue_execbuf_locked(struct anv_queue *queue,
|
||||
|
||||
if (INTEL_DEBUG(DEBUG_BATCH)) {
|
||||
fprintf(stderr, "Batch on queue %d\n", (int)(queue - device->queues));
|
||||
if (submit->cmd_buffer_count) {
|
||||
if (cmd_buffer_count) {
|
||||
if (has_perf_query) {
|
||||
struct anv_query_pool *query_pool = submit->perf_query_pool;
|
||||
struct anv_bo *pass_batch_bo = query_pool->bo;
|
||||
struct anv_bo *pass_batch_bo = perf_query_pool->bo;
|
||||
uint64_t pass_batch_offset =
|
||||
khr_perf_query_preamble_offset(query_pool,
|
||||
submit->perf_query_pass);
|
||||
khr_perf_query_preamble_offset(perf_query_pool, perf_query_pass);
|
||||
|
||||
intel_print_batch(&device->decoder_ctx,
|
||||
pass_batch_bo->map + pass_batch_offset, 64,
|
||||
pass_batch_bo->offset + pass_batch_offset, false);
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < submit->cmd_buffer_count; i++) {
|
||||
for (uint32_t i = 0; i < cmd_buffer_count; i++) {
|
||||
struct anv_batch_bo **bo =
|
||||
u_vector_tail(&submit->cmd_buffers[i]->seen_bbos);
|
||||
device->cmd_buffer_being_decoded = submit->cmd_buffers[i];
|
||||
u_vector_tail(&cmd_buffers[i]->seen_bbos);
|
||||
device->cmd_buffer_being_decoded = cmd_buffers[i];
|
||||
intel_print_batch(&device->decoder_ctx, (*bo)->bo->map,
|
||||
(*bo)->bo->size, (*bo)->bo->offset, false);
|
||||
device->cmd_buffer_being_decoded = NULL;
|
||||
}
|
||||
} else if (submit->simple_bo) {
|
||||
intel_print_batch(&device->decoder_ctx, submit->simple_bo->map,
|
||||
submit->simple_bo->size, submit->simple_bo->offset, false);
|
||||
} else {
|
||||
intel_print_batch(&device->decoder_ctx,
|
||||
device->trivial_batch_bo->map,
|
||||
@@ -1977,26 +2074,23 @@ anv_queue_execbuf_locked(struct anv_queue *queue,
|
||||
}
|
||||
}
|
||||
|
||||
if (submit->fence_count > 0) {
|
||||
if (device->has_thread_submit) {
|
||||
execbuf.timeline_fences.fence_count = submit->fence_count;
|
||||
execbuf.timeline_fences.handles_ptr = (uintptr_t)submit->fences;
|
||||
execbuf.timeline_fences.values_ptr = (uintptr_t)submit->fence_values;
|
||||
anv_execbuf_add_ext(&execbuf,
|
||||
DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES,
|
||||
&execbuf.timeline_fences.base);
|
||||
} else {
|
||||
execbuf.execbuf.flags |= I915_EXEC_FENCE_ARRAY;
|
||||
execbuf.execbuf.num_cliprects = submit->fence_count;
|
||||
execbuf.execbuf.cliprects_ptr = (uintptr_t)submit->fences;
|
||||
}
|
||||
if (execbuf.syncobj_values) {
|
||||
execbuf.timeline_fences.fence_count = execbuf.syncobj_count;
|
||||
execbuf.timeline_fences.handles_ptr = (uintptr_t)execbuf.syncobjs;
|
||||
execbuf.timeline_fences.values_ptr = (uintptr_t)execbuf.syncobj_values;
|
||||
anv_execbuf_add_ext(&execbuf,
|
||||
DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES,
|
||||
&execbuf.timeline_fences.base);
|
||||
} else if (execbuf.syncobjs) {
|
||||
execbuf.execbuf.flags |= I915_EXEC_FENCE_ARRAY;
|
||||
execbuf.execbuf.num_cliprects = execbuf.syncobj_count;
|
||||
execbuf.execbuf.cliprects_ptr = (uintptr_t)execbuf.syncobjs;
|
||||
}
|
||||
|
||||
if (has_perf_query) {
|
||||
struct anv_query_pool *query_pool = submit->perf_query_pool;
|
||||
assert(submit->perf_query_pass < query_pool->n_passes);
|
||||
assert(perf_query_pass < perf_query_pool->n_passes);
|
||||
struct intel_perf_query_info *query_info =
|
||||
query_pool->pass_query[submit->perf_query_pass];
|
||||
perf_query_pool->pass_query[perf_query_pass];
|
||||
|
||||
/* Some performance queries just the pipeline statistic HW, no need for
|
||||
* OA in that case, so no need to reconfigure.
|
||||
@@ -2013,7 +2107,7 @@ anv_queue_execbuf_locked(struct anv_queue *queue,
|
||||
}
|
||||
}
|
||||
|
||||
struct anv_bo *pass_batch_bo = query_pool->bo;
|
||||
struct anv_bo *pass_batch_bo = perf_query_pool->bo;
|
||||
|
||||
struct drm_i915_gem_exec_object2 query_pass_object = {
|
||||
.handle = pass_batch_bo->gem_handle,
|
||||
@@ -2023,8 +2117,8 @@ anv_queue_execbuf_locked(struct anv_queue *queue,
|
||||
struct drm_i915_gem_execbuffer2 query_pass_execbuf = {
|
||||
.buffers_ptr = (uintptr_t) &query_pass_object,
|
||||
.buffer_count = 1,
|
||||
.batch_start_offset = khr_perf_query_preamble_offset(query_pool,
|
||||
submit->perf_query_pass),
|
||||
.batch_start_offset = khr_perf_query_preamble_offset(perf_query_pool,
|
||||
perf_query_pass),
|
||||
.flags = I915_EXEC_HANDLE_LUT | queue->exec_flags,
|
||||
.rsvd1 = device->context_id,
|
||||
};
|
||||
@@ -2048,9 +2142,211 @@ anv_queue_execbuf_locked(struct anv_queue *queue,
|
||||
}
|
||||
|
||||
error:
|
||||
pthread_cond_broadcast(&device->queue_submit);
|
||||
|
||||
anv_execbuf_finish(&execbuf);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static VkResult
|
||||
anv_queue_submit_locked(struct anv_queue *queue,
|
||||
struct vk_queue_submit *submit)
|
||||
{
|
||||
VkResult result;
|
||||
|
||||
if (submit->command_buffer_count == 0) {
|
||||
result = anv_queue_exec_locked(queue, submit->wait_count, submit->waits,
|
||||
0 /* cmd_buffer_count */,
|
||||
NULL /* cmd_buffers */,
|
||||
submit->signal_count, submit->signals,
|
||||
NULL /* perf_query_pool */,
|
||||
0 /* perf_query_pass */);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
} else {
|
||||
struct anv_query_pool *perf_query_pool = NULL;
|
||||
uint32_t start = 0;
|
||||
|
||||
/* Everything's easier if we don't have to bother with container_of() */
|
||||
STATIC_ASSERT(offsetof(struct anv_cmd_buffer, vk) == 0);
|
||||
struct vk_command_buffer **vk_cmd_buffers = submit->command_buffers;
|
||||
struct anv_cmd_buffer **cmd_buffers = (void *)vk_cmd_buffers;
|
||||
|
||||
const uint32_t end = submit->command_buffer_count;
|
||||
while (start < end) {
|
||||
uint32_t i = start + 1;
|
||||
for (; i < end; i++) {
|
||||
/* Can we chain the last buffer into the next one? */
|
||||
if (!anv_cmd_buffer_is_chainable(cmd_buffers[i]))
|
||||
break;
|
||||
|
||||
if (cmd_buffers[i]->perf_query_pool != NULL) {
|
||||
if (perf_query_pool != NULL) {
|
||||
/* They have to have the same query pool */
|
||||
if (cmd_buffers[i]->perf_query_pool != perf_query_pool)
|
||||
break;
|
||||
} else {
|
||||
perf_query_pool = cmd_buffers[i]->perf_query_pool;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
VkResult result =
|
||||
anv_queue_exec_locked(queue,
|
||||
start == 0 ? submit->wait_count : 0,
|
||||
start == 0 ? submit->waits : NULL,
|
||||
i - start, &cmd_buffers[start],
|
||||
i == end ? submit->signal_count : 0,
|
||||
i == end ? submit->signals : NULL,
|
||||
perf_query_pool,
|
||||
submit->perf_pass_index);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
perf_query_pool = NULL;
|
||||
start = i;
|
||||
}
|
||||
assert(start == end);
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < submit->signal_count; i++) {
|
||||
if (!vk_sync_is_anv_bo_sync(submit->signals[i].sync))
|
||||
continue;
|
||||
|
||||
struct anv_bo_sync *bo_sync =
|
||||
container_of(submit->signals[i].sync, struct anv_bo_sync, sync);
|
||||
|
||||
/* Once the execbuf has returned, we need to set the fence state to
|
||||
* SUBMITTED. We can't do this before calling execbuf because
|
||||
* anv_GetFenceStatus does take the global device lock before checking
|
||||
* fence->state.
|
||||
*
|
||||
* We set the fence state to SUBMITTED regardless of whether or not the
|
||||
* execbuf succeeds because we need to ensure that vkWaitForFences() and
|
||||
* vkGetFenceStatus() return a valid result (VK_ERROR_DEVICE_LOST or
|
||||
* VK_SUCCESS) in a finite amount of time even if execbuf fails.
|
||||
*/
|
||||
assert(bo_sync->state == ANV_BO_SYNC_STATE_RESET);
|
||||
bo_sync->state = ANV_BO_SYNC_STATE_SUBMITTED;
|
||||
}
|
||||
|
||||
pthread_cond_broadcast(&queue->device->queue_submit);
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
VkResult
|
||||
anv_queue_submit(struct vk_queue *vk_queue,
|
||||
struct vk_queue_submit *submit)
|
||||
{
|
||||
struct anv_queue *queue = container_of(vk_queue, struct anv_queue, vk);
|
||||
struct anv_device *device = queue->device;
|
||||
VkResult result;
|
||||
|
||||
if (queue->device->info.no_hw) {
|
||||
for (uint32_t i = 0; i < submit->signal_count; i++) {
|
||||
result = vk_sync_signal(&device->vk,
|
||||
submit->signals[i].sync,
|
||||
submit->signals[i].signal_value);
|
||||
if (result != VK_SUCCESS)
|
||||
return vk_queue_set_lost(&queue->vk, "vk_sync_signal failed");
|
||||
}
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
pthread_mutex_lock(&device->mutex);
|
||||
result = anv_queue_submit_locked(queue, submit);
|
||||
pthread_mutex_unlock(&device->mutex);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
VkResult
|
||||
anv_queue_submit_simple_batch(struct anv_queue *queue,
|
||||
struct anv_batch *batch)
|
||||
{
|
||||
struct anv_device *device = queue->device;
|
||||
VkResult result = VK_SUCCESS;
|
||||
|
||||
if (queue->device->info.no_hw)
|
||||
return VK_SUCCESS;
|
||||
|
||||
/* This is only used by device init so we can assume the queue is empty and
|
||||
* we aren't fighting with a submit thread.
|
||||
*/
|
||||
assert(vk_queue_is_empty(&queue->vk));
|
||||
|
||||
uint32_t batch_size = align_u32(batch->next - batch->start, 8);
|
||||
|
||||
struct anv_bo *batch_bo;
|
||||
result = anv_bo_pool_alloc(&device->batch_bo_pool, batch_size, &batch_bo);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
memcpy(batch_bo->map, batch->start, batch_size);
|
||||
if (!device->info.has_llc)
|
||||
intel_flush_range(batch_bo->map, batch_size);
|
||||
|
||||
struct anv_execbuf execbuf;
|
||||
anv_execbuf_init(&execbuf);
|
||||
execbuf.alloc = &queue->device->vk.alloc;
|
||||
execbuf.alloc_scope = VK_SYSTEM_ALLOCATION_SCOPE_DEVICE;
|
||||
|
||||
result = anv_execbuf_add_bo(device, &execbuf, batch_bo, NULL, 0);
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail;
|
||||
|
||||
execbuf.execbuf = (struct drm_i915_gem_execbuffer2) {
|
||||
.buffers_ptr = (uintptr_t) execbuf.objects,
|
||||
.buffer_count = execbuf.bo_count,
|
||||
.batch_start_offset = 0,
|
||||
.batch_len = batch_size,
|
||||
.flags = I915_EXEC_HANDLE_LUT | queue->exec_flags | I915_EXEC_NO_RELOC,
|
||||
.rsvd1 = device->context_id,
|
||||
.rsvd2 = 0,
|
||||
};
|
||||
|
||||
struct drm_i915_gem_exec_fence fence = {};
|
||||
if (device->physical->has_syncobj_wait) {
|
||||
fence.handle = anv_gem_syncobj_create(device, 0);
|
||||
if (fence.handle == 0) {
|
||||
result = vk_error(queue, VK_ERROR_OUT_OF_DEVICE_MEMORY);
|
||||
goto fail;
|
||||
}
|
||||
|
||||
fence.flags = I915_EXEC_FENCE_SIGNAL;
|
||||
|
||||
execbuf.execbuf.flags |= I915_EXEC_FENCE_ARRAY;
|
||||
execbuf.execbuf.num_cliprects = 1;
|
||||
execbuf.execbuf.cliprects_ptr = (uintptr_t)&fence;
|
||||
}
|
||||
|
||||
int err = anv_gem_execbuffer(device, &execbuf.execbuf);
|
||||
if (err) {
|
||||
result = vk_device_set_lost(&device->vk, "anv_gem_execbuffer failed: %m");
|
||||
goto fail;
|
||||
}
|
||||
|
||||
if (fence.handle) {
|
||||
err = anv_gem_syncobj_wait(device, &fence.handle, 1, INT64_MAX, true);
|
||||
if (err) {
|
||||
result = vk_device_set_lost(&device->vk,
|
||||
"anv_gem_syncobj_wait failed: %m");
|
||||
goto fail;
|
||||
}
|
||||
} else {
|
||||
result = anv_device_wait(device, batch_bo, INT64_MAX);
|
||||
if (result != VK_SUCCESS) {
|
||||
result = vk_device_set_lost(&device->vk,
|
||||
"anv_device_wait failed: %m");
|
||||
goto fail;
|
||||
}
|
||||
}
|
||||
|
||||
fail:
|
||||
anv_execbuf_finish(&execbuf);
|
||||
if (fence.handle)
|
||||
anv_gem_syncobj_destroy(device, fence.handle);
|
||||
anv_bo_pool_free(&device->batch_bo_pool, batch_bo);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
@@ -211,7 +211,6 @@ const struct vk_sync_type anv_bo_sync_type = {
|
||||
.reset = anv_bo_sync_reset,
|
||||
.wait_many = anv_bo_sync_wait,
|
||||
};
|
||||
VK_DECL_TIMELINE_TYPE(anv_bo_timeline_type, &anv_bo_sync_type);
|
||||
|
||||
VkResult
|
||||
anv_sync_create_for_bo(struct anv_device *device,
|
||||
|
@@ -53,6 +53,7 @@
|
||||
#include "git_sha1.h"
|
||||
#include "vk_util.h"
|
||||
#include "vk_deferred_operation.h"
|
||||
#include "vk_drm_syncobj.h"
|
||||
#include "common/intel_aux_map.h"
|
||||
#include "common/intel_defines.h"
|
||||
#include "common/intel_uuid.h"
|
||||
@@ -906,8 +907,23 @@ anv_physical_device_try_create(struct anv_instance *instance,
|
||||
if (env_var_as_boolean("ANV_QUEUE_THREAD_DISABLE", false))
|
||||
device->has_exec_timeline = false;
|
||||
|
||||
device->has_thread_submit =
|
||||
device->has_syncobj_wait_available && device->has_exec_timeline;
|
||||
unsigned st_idx = 0;
|
||||
if (device->has_syncobj_wait) {
|
||||
device->sync_types[st_idx++] = &vk_drm_binary_syncobj_type;
|
||||
} else {
|
||||
device->sync_types[st_idx++] = &vk_drm_binary_syncobj_no_wait_type;
|
||||
device->sync_types[st_idx++] = &anv_bo_sync_type;
|
||||
}
|
||||
|
||||
if (device->has_syncobj_wait_available && device->has_exec_timeline) {
|
||||
device->sync_types[st_idx++] = &vk_drm_timeline_syncobj_type;
|
||||
} else {
|
||||
device->sync_timeline_type = vk_sync_timeline_get_type(&anv_bo_sync_type);
|
||||
device->sync_types[st_idx++] = &device->sync_timeline_type.sync;
|
||||
}
|
||||
device->sync_types[st_idx++] = NULL;
|
||||
assert(st_idx <= ARRAY_SIZE(device->sync_types));
|
||||
device->vk.supported_sync_types = device->sync_types;
|
||||
|
||||
device->always_use_bindless =
|
||||
env_var_as_boolean("ANV_ALWAYS_BINDLESS", false);
|
||||
@@ -3021,6 +3037,7 @@ VkResult anv_CreateDevice(
|
||||
}
|
||||
|
||||
device->vk.check_status = anv_device_check_status;
|
||||
vk_device_set_drm_fd(&device->vk, device->fd);
|
||||
|
||||
uint32_t num_queues = 0;
|
||||
for (uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++)
|
||||
@@ -3065,8 +3082,6 @@ VkResult anv_CreateDevice(
|
||||
anv_gem_set_context_param(device->fd, device->context_id,
|
||||
I915_CONTEXT_PARAM_RECOVERABLE, false);
|
||||
|
||||
device->has_thread_submit = physical_device->has_thread_submit;
|
||||
|
||||
device->queues =
|
||||
vk_zalloc(&device->vk.alloc, num_queues * sizeof(*device->queues), 8,
|
||||
VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
|
||||
@@ -3464,30 +3479,6 @@ anv_device_check_status(struct vk_device *vk_device)
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
VkResult
|
||||
anv_device_bo_busy(struct anv_device *device, struct anv_bo *bo)
|
||||
{
|
||||
/* Note: This only returns whether or not the BO is in use by an i915 GPU.
|
||||
* Other usages of the BO (such as on different hardware) will not be
|
||||
* flagged as "busy" by this ioctl. Use with care.
|
||||
*/
|
||||
int ret = anv_gem_busy(device, bo->gem_handle);
|
||||
if (ret == 1) {
|
||||
return VK_NOT_READY;
|
||||
} else if (ret == -1) {
|
||||
/* We don't know the real error. */
|
||||
return vk_device_set_lost(&device->vk, "gem wait failed: %m");
|
||||
}
|
||||
|
||||
/* Query for device status after the busy call. If the BO we're checking
|
||||
* got caught in a GPU hang we don't want to return VK_SUCCESS to the
|
||||
* client because it clearly doesn't have valid data. Yes, this most
|
||||
* likely means an ioctl, but we just did an ioctl to query the busy status
|
||||
* so it's no great loss.
|
||||
*/
|
||||
return vk_device_check_status(&device->vk);
|
||||
}
|
||||
|
||||
VkResult
|
||||
anv_device_wait(struct anv_device *device, struct anv_bo *bo,
|
||||
int64_t timeout)
|
||||
|
@@ -66,17 +66,17 @@
|
||||
#include "vk_alloc.h"
|
||||
#include "vk_debug_report.h"
|
||||
#include "vk_device.h"
|
||||
#include "vk_drm_syncobj.h"
|
||||
#include "vk_enum_defines.h"
|
||||
#include "vk_image.h"
|
||||
#include "vk_instance.h"
|
||||
#include "vk_physical_device.h"
|
||||
#include "vk_shader_module.h"
|
||||
#include "vk_sync.h"
|
||||
#include "vk_timeline.h"
|
||||
#include "vk_sync_timeline.h"
|
||||
#include "vk_util.h"
|
||||
#include "vk_command_buffer.h"
|
||||
#include "vk_queue.h"
|
||||
#include "vk_sync.h"
|
||||
#include "vk_log.h"
|
||||
|
||||
/* Pre-declarations needed for WSI entrypoints */
|
||||
@@ -922,7 +922,6 @@ struct anv_physical_device {
|
||||
bool has_syncobj_wait_available;
|
||||
int max_context_priority;
|
||||
bool has_context_isolation;
|
||||
bool has_thread_submit;
|
||||
bool has_mmap_offset;
|
||||
bool has_userptr_probe;
|
||||
uint64_t gtt_size;
|
||||
@@ -977,6 +976,9 @@ struct anv_physical_device {
|
||||
uint8_t driver_uuid[VK_UUID_SIZE];
|
||||
uint8_t device_uuid[VK_UUID_SIZE];
|
||||
|
||||
struct vk_sync_timeline_type sync_timeline_type;
|
||||
const struct vk_sync_type * sync_types[4];
|
||||
|
||||
struct disk_cache * disk_cache;
|
||||
|
||||
struct wsi_device wsi_device;
|
||||
@@ -1017,53 +1019,6 @@ struct anv_instance {
|
||||
VkResult anv_init_wsi(struct anv_physical_device *physical_device);
|
||||
void anv_finish_wsi(struct anv_physical_device *physical_device);
|
||||
|
||||
struct anv_queue_submit {
|
||||
struct anv_cmd_buffer ** cmd_buffers;
|
||||
uint32_t cmd_buffer_count;
|
||||
uint32_t cmd_buffer_array_length;
|
||||
|
||||
uint32_t fence_count;
|
||||
uint32_t fence_array_length;
|
||||
struct drm_i915_gem_exec_fence * fences;
|
||||
uint64_t * fence_values;
|
||||
|
||||
uint32_t temporary_semaphore_count;
|
||||
uint32_t temporary_semaphore_array_length;
|
||||
struct anv_semaphore_impl * temporary_semaphores;
|
||||
|
||||
/* Allocated only with non shareable timelines. */
|
||||
union {
|
||||
struct anv_timeline ** wait_timelines;
|
||||
uint32_t * wait_timeline_syncobjs;
|
||||
};
|
||||
uint32_t wait_timeline_count;
|
||||
uint32_t wait_timeline_array_length;
|
||||
uint64_t * wait_timeline_values;
|
||||
|
||||
struct anv_timeline ** signal_timelines;
|
||||
uint32_t signal_timeline_count;
|
||||
uint32_t signal_timeline_array_length;
|
||||
uint64_t * signal_timeline_values;
|
||||
|
||||
uint32_t fence_bo_count;
|
||||
uint32_t fence_bo_array_length;
|
||||
/* An array of struct anv_bo pointers with lower bit used as a flag to
|
||||
* signal we will wait on that BO (see anv_(un)pack_ptr).
|
||||
*/
|
||||
uintptr_t * fence_bos;
|
||||
|
||||
int perf_query_pass;
|
||||
struct anv_query_pool * perf_query_pool;
|
||||
|
||||
const VkAllocationCallbacks * alloc;
|
||||
VkSystemAllocationScope alloc_scope;
|
||||
|
||||
struct anv_bo * simple_bo;
|
||||
uint32_t simple_bo_size;
|
||||
|
||||
struct list_head link;
|
||||
};
|
||||
|
||||
struct anv_queue {
|
||||
struct vk_queue vk;
|
||||
|
||||
@@ -1072,22 +1027,6 @@ struct anv_queue {
|
||||
const struct anv_queue_family * family;
|
||||
|
||||
uint32_t exec_flags;
|
||||
|
||||
/*
|
||||
* This mutext protects the variables below.
|
||||
*/
|
||||
pthread_mutex_t mutex;
|
||||
|
||||
pthread_t thread;
|
||||
pthread_cond_t cond;
|
||||
|
||||
/*
|
||||
* A list of struct anv_queue_submit to be submitted to i915.
|
||||
*/
|
||||
struct list_head queued_submits;
|
||||
|
||||
/* Set to true to stop the submission thread */
|
||||
bool quit;
|
||||
};
|
||||
|
||||
struct anv_pipeline_cache {
|
||||
@@ -1176,7 +1115,6 @@ struct anv_device {
|
||||
int fd;
|
||||
bool can_chain_batches;
|
||||
bool robust_buffer_access;
|
||||
bool has_thread_submit;
|
||||
|
||||
pthread_mutex_t vma_mutex;
|
||||
struct util_vma_heap vma_lo;
|
||||
@@ -1398,7 +1336,6 @@ anv_device_lookup_bo(struct anv_device *device, uint32_t gem_handle)
|
||||
return util_sparse_array_get(&device->bo_cache.bo_map, gem_handle);
|
||||
}
|
||||
|
||||
VkResult anv_device_bo_busy(struct anv_device *device, struct anv_bo *bo);
|
||||
VkResult anv_device_wait(struct anv_device *device, struct anv_bo *bo,
|
||||
int64_t timeout);
|
||||
|
||||
@@ -1408,13 +1345,11 @@ VkResult anv_queue_init(struct anv_device *device, struct anv_queue *queue,
|
||||
uint32_t index_in_family);
|
||||
void anv_queue_finish(struct anv_queue *queue);
|
||||
|
||||
VkResult anv_queue_execbuf_locked(struct anv_queue *queue, struct anv_queue_submit *submit);
|
||||
VkResult anv_queue_submit(struct vk_queue *queue,
|
||||
struct vk_queue_submit *submit);
|
||||
VkResult anv_queue_submit_simple_batch(struct anv_queue *queue,
|
||||
struct anv_batch *batch);
|
||||
|
||||
uint64_t anv_gettime_ns(void);
|
||||
uint64_t anv_get_absolute_timeout(uint64_t timeout);
|
||||
|
||||
void* anv_gem_mmap(struct anv_device *device,
|
||||
uint32_t gem_handle, uint64_t offset, uint64_t size, uint32_t flags);
|
||||
void anv_gem_munmap(struct anv_device *device, void *p, uint64_t size);
|
||||
@@ -3264,7 +3199,6 @@ struct anv_bo_sync {
|
||||
};
|
||||
|
||||
extern const struct vk_sync_type anv_bo_sync_type;
|
||||
extern const struct vk_timeline_type anv_bo_timeline_type;
|
||||
|
||||
static inline bool
|
||||
vk_sync_is_anv_bo_sync(const struct vk_sync *sync)
|
||||
@@ -3276,162 +3210,12 @@ VkResult anv_sync_create_for_bo(struct anv_device *device,
|
||||
struct anv_bo *bo,
|
||||
struct vk_sync **sync_out);
|
||||
|
||||
enum anv_fence_type {
|
||||
ANV_FENCE_TYPE_NONE = 0,
|
||||
ANV_FENCE_TYPE_BO,
|
||||
ANV_FENCE_TYPE_WSI_BO,
|
||||
ANV_FENCE_TYPE_SYNCOBJ,
|
||||
ANV_FENCE_TYPE_WSI,
|
||||
};
|
||||
|
||||
enum anv_bo_fence_state {
|
||||
/** Indicates that this is a new (or newly reset fence) */
|
||||
ANV_BO_FENCE_STATE_RESET,
|
||||
|
||||
/** Indicates that this fence has been submitted to the GPU but is still
|
||||
* (as far as we know) in use by the GPU.
|
||||
*/
|
||||
ANV_BO_FENCE_STATE_SUBMITTED,
|
||||
|
||||
ANV_BO_FENCE_STATE_SIGNALED,
|
||||
};
|
||||
|
||||
struct anv_fence_impl {
|
||||
enum anv_fence_type type;
|
||||
|
||||
union {
|
||||
/** Fence implementation for BO fences
|
||||
*
|
||||
* These fences use a BO and a set of CPU-tracked state flags. The BO
|
||||
* is added to the object list of the last execbuf call in a QueueSubmit
|
||||
* and is marked EXEC_WRITE. The state flags track when the BO has been
|
||||
* submitted to the kernel. We need to do this because Vulkan lets you
|
||||
* wait on a fence that has not yet been submitted and I915_GEM_BUSY
|
||||
* will say it's idle in this case.
|
||||
*/
|
||||
struct {
|
||||
struct anv_bo *bo;
|
||||
enum anv_bo_fence_state state;
|
||||
} bo;
|
||||
|
||||
/** DRM syncobj handle for syncobj-based fences */
|
||||
uint32_t syncobj;
|
||||
|
||||
/** WSI fence */
|
||||
struct vk_sync *sync_wsi;
|
||||
};
|
||||
};
|
||||
|
||||
struct anv_fence {
|
||||
struct vk_object_base base;
|
||||
|
||||
/* Permanent fence state. Every fence has some form of permanent state
|
||||
* (type != ANV_SEMAPHORE_TYPE_NONE). This may be a BO to fence on (for
|
||||
* cross-process fences) or it could just be a dummy for use internally.
|
||||
*/
|
||||
struct anv_fence_impl permanent;
|
||||
|
||||
/* Temporary fence state. A fence *may* have temporary state. That state
|
||||
* is added to the fence by an import operation and is reset back to
|
||||
* ANV_SEMAPHORE_TYPE_NONE when the fence is reset. A fence with temporary
|
||||
* state cannot be signaled because the fence must already be signaled
|
||||
* before the temporary state can be exported from the fence in the other
|
||||
* process and imported here.
|
||||
*/
|
||||
struct anv_fence_impl temporary;
|
||||
};
|
||||
|
||||
void anv_fence_reset_temporary(struct anv_device *device,
|
||||
struct anv_fence *fence);
|
||||
|
||||
struct anv_event {
|
||||
struct vk_object_base base;
|
||||
uint64_t semaphore;
|
||||
struct anv_state state;
|
||||
};
|
||||
|
||||
enum anv_semaphore_type {
|
||||
ANV_SEMAPHORE_TYPE_NONE = 0,
|
||||
ANV_SEMAPHORE_TYPE_WSI_BO,
|
||||
ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ,
|
||||
ANV_SEMAPHORE_TYPE_TIMELINE,
|
||||
ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ_TIMELINE,
|
||||
};
|
||||
|
||||
struct anv_timeline_point {
|
||||
struct list_head link;
|
||||
|
||||
uint64_t serial;
|
||||
|
||||
/* Number of waiter on this point, when > 0 the point should not be garbage
|
||||
* collected.
|
||||
*/
|
||||
int waiting;
|
||||
|
||||
/* BO used for synchronization. */
|
||||
struct anv_bo *bo;
|
||||
};
|
||||
|
||||
struct anv_timeline {
|
||||
pthread_mutex_t mutex;
|
||||
pthread_cond_t cond;
|
||||
|
||||
uint64_t highest_past;
|
||||
uint64_t highest_pending;
|
||||
|
||||
struct list_head points;
|
||||
struct list_head free_points;
|
||||
};
|
||||
|
||||
struct anv_semaphore_impl {
|
||||
enum anv_semaphore_type type;
|
||||
|
||||
union {
|
||||
/* A BO representing this semaphore when type == ANV_SEMAPHORE_TYPE_BO
|
||||
* or type == ANV_SEMAPHORE_TYPE_WSI_BO. This BO will be added to the
|
||||
* object list on any execbuf2 calls for which this semaphore is used as
|
||||
* a wait or signal fence. When used as a signal fence or when type ==
|
||||
* ANV_SEMAPHORE_TYPE_WSI_BO, the EXEC_OBJECT_WRITE flag will be set.
|
||||
*/
|
||||
struct anv_bo *bo;
|
||||
|
||||
/* Sync object handle when type == ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ.
|
||||
* Unlike GEM BOs, DRM sync objects aren't deduplicated by the kernel on
|
||||
* import so we don't need to bother with a userspace cache.
|
||||
*/
|
||||
uint32_t syncobj;
|
||||
|
||||
/* Non shareable timeline semaphore
|
||||
*
|
||||
* Used when kernel don't have support for timeline semaphores.
|
||||
*/
|
||||
struct anv_timeline timeline;
|
||||
};
|
||||
};
|
||||
|
||||
struct anv_semaphore {
|
||||
struct vk_object_base base;
|
||||
|
||||
/* Permanent semaphore state. Every semaphore has some form of permanent
|
||||
* state (type != ANV_SEMAPHORE_TYPE_NONE). This may be a BO to fence on
|
||||
* (for cross-process semaphores0 or it could just be a dummy for use
|
||||
* internally.
|
||||
*/
|
||||
struct anv_semaphore_impl permanent;
|
||||
|
||||
/* Temporary semaphore state. A semaphore *may* have temporary state.
|
||||
* That state is added to the semaphore by an import operation and is reset
|
||||
* back to ANV_SEMAPHORE_TYPE_NONE when the semaphore is waited on. A
|
||||
* semaphore with temporary state cannot be signaled because the semaphore
|
||||
* must already be signaled before the temporary state can be exported from
|
||||
* the semaphore in the other process and imported here.
|
||||
*/
|
||||
struct anv_semaphore_impl temporary;
|
||||
};
|
||||
|
||||
void anv_semaphore_reset_temporary(struct anv_device *device,
|
||||
struct anv_semaphore *semaphore);
|
||||
|
||||
#define ANV_STAGE_MASK ((1 << MESA_VULKAN_SHADER_STAGES) - 1)
|
||||
|
||||
#define anv_foreach_stage(stage, stage_bits) \
|
||||
@@ -4787,7 +4571,6 @@ VK_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_update_template, base,
|
||||
VK_OBJECT_TYPE_DESCRIPTOR_UPDATE_TEMPLATE)
|
||||
VK_DEFINE_NONDISP_HANDLE_CASTS(anv_device_memory, base, VkDeviceMemory,
|
||||
VK_OBJECT_TYPE_DEVICE_MEMORY)
|
||||
VK_DEFINE_NONDISP_HANDLE_CASTS(anv_fence, base, VkFence, VK_OBJECT_TYPE_FENCE)
|
||||
VK_DEFINE_NONDISP_HANDLE_CASTS(anv_event, base, VkEvent, VK_OBJECT_TYPE_EVENT)
|
||||
VK_DEFINE_NONDISP_HANDLE_CASTS(anv_framebuffer, base, VkFramebuffer,
|
||||
VK_OBJECT_TYPE_FRAMEBUFFER)
|
||||
@@ -4806,8 +4589,6 @@ VK_DEFINE_NONDISP_HANDLE_CASTS(anv_render_pass, base, VkRenderPass,
|
||||
VK_OBJECT_TYPE_RENDER_PASS)
|
||||
VK_DEFINE_NONDISP_HANDLE_CASTS(anv_sampler, base, VkSampler,
|
||||
VK_OBJECT_TYPE_SAMPLER)
|
||||
VK_DEFINE_NONDISP_HANDLE_CASTS(anv_semaphore, base, VkSemaphore,
|
||||
VK_OBJECT_TYPE_SEMAPHORE)
|
||||
VK_DEFINE_NONDISP_HANDLE_CASTS(anv_ycbcr_conversion, base,
|
||||
VkSamplerYcbcrConversion,
|
||||
VK_OBJECT_TYPE_SAMPLER_YCBCR_CONVERSION)
|
||||
|
File diff suppressed because it is too large
Load Diff
@@ -24,6 +24,9 @@
|
||||
#include "anv_private.h"
|
||||
#include "anv_measure.h"
|
||||
#include "wsi_common.h"
|
||||
#include "vk_fence.h"
|
||||
#include "vk_queue.h"
|
||||
#include "vk_semaphore.h"
|
||||
#include "vk_util.h"
|
||||
|
||||
static PFN_vkVoidFunction
|
||||
@@ -39,19 +42,19 @@ anv_wsi_signal_semaphore_for_memory(VkDevice _device,
|
||||
VkDeviceMemory _memory)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_device, device, _device);
|
||||
ANV_FROM_HANDLE(anv_semaphore, semaphore, _semaphore);
|
||||
VK_FROM_HANDLE(vk_semaphore, semaphore, _semaphore);
|
||||
ANV_FROM_HANDLE(anv_device_memory, memory, _memory);
|
||||
ASSERTED VkResult result;
|
||||
|
||||
/* Put a BO semaphore with the image BO in the temporary. For BO binary
|
||||
* semaphores, we always set EXEC_OBJECT_WRITE so this creates a WaR
|
||||
* hazard with the display engine's read to ensure that no one writes to
|
||||
* the image before the read is complete.
|
||||
*/
|
||||
anv_semaphore_reset_temporary(device, semaphore);
|
||||
vk_semaphore_reset_temporary(&device->vk, semaphore);
|
||||
|
||||
struct anv_semaphore_impl *impl = &semaphore->temporary;
|
||||
impl->type = ANV_SEMAPHORE_TYPE_WSI_BO;
|
||||
impl->bo = anv_bo_ref(memory->bo);
|
||||
result = anv_sync_create_for_bo(device, memory->bo, &semaphore->temporary);
|
||||
assert(result == VK_SUCCESS);
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -60,19 +63,18 @@ anv_wsi_signal_fence_for_memory(VkDevice _device,
|
||||
VkDeviceMemory _memory)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_device, device, _device);
|
||||
ANV_FROM_HANDLE(anv_fence, fence, _fence);
|
||||
VK_FROM_HANDLE(vk_fence, fence, _fence);
|
||||
ANV_FROM_HANDLE(anv_device_memory, memory, _memory);
|
||||
ASSERTED VkResult result;
|
||||
|
||||
/* Put a BO fence with the image BO in the temporary. For BO fences, we
|
||||
* always just wait until the BO isn't busy and reads from the BO should
|
||||
* count as busy.
|
||||
*/
|
||||
anv_fence_reset_temporary(device, fence);
|
||||
vk_fence_reset_temporary(&device->vk, fence);
|
||||
|
||||
struct anv_fence_impl *impl = &fence->temporary;
|
||||
impl->type = ANV_FENCE_TYPE_WSI_BO;
|
||||
impl->bo.bo = anv_bo_ref(memory->bo);
|
||||
impl->bo.state = ANV_BO_FENCE_STATE_SUBMITTED;
|
||||
result = anv_sync_create_for_bo(device, memory->bo, &fence->temporary);
|
||||
assert(result == VK_SUCCESS);
|
||||
}
|
||||
|
||||
VkResult
|
||||
@@ -118,6 +120,7 @@ VkResult anv_QueuePresentKHR(
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_queue, queue, _queue);
|
||||
struct anv_device *device = queue->device;
|
||||
VkResult result;
|
||||
|
||||
if (device->debug_frame_desc) {
|
||||
device->debug_frame_desc->frame_id++;
|
||||
@@ -127,64 +130,24 @@ VkResult anv_QueuePresentKHR(
|
||||
}
|
||||
}
|
||||
|
||||
if (device->has_thread_submit &&
|
||||
pPresentInfo->waitSemaphoreCount > 0) {
|
||||
/* Make sure all of the dependency semaphores have materialized when
|
||||
* using a threaded submission.
|
||||
*/
|
||||
VK_MULTIALLOC(ma);
|
||||
VK_MULTIALLOC_DECL(&ma, uint64_t, values,
|
||||
pPresentInfo->waitSemaphoreCount);
|
||||
VK_MULTIALLOC_DECL(&ma, uint32_t, syncobjs,
|
||||
pPresentInfo->waitSemaphoreCount);
|
||||
result = vk_queue_wait_before_present(&queue->vk, pPresentInfo);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
if (!vk_multialloc_alloc(&ma, &device->vk.alloc,
|
||||
VK_SYSTEM_ALLOCATION_SCOPE_COMMAND))
|
||||
return vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
||||
uint32_t wait_count = 0;
|
||||
for (uint32_t i = 0; i < pPresentInfo->waitSemaphoreCount; i++) {
|
||||
ANV_FROM_HANDLE(anv_semaphore, semaphore, pPresentInfo->pWaitSemaphores[i]);
|
||||
struct anv_semaphore_impl *impl =
|
||||
semaphore->temporary.type != ANV_SEMAPHORE_TYPE_NONE ?
|
||||
&semaphore->temporary : &semaphore->permanent;
|
||||
|
||||
assert(impl->type == ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ);
|
||||
syncobjs[wait_count] = impl->syncobj;
|
||||
values[wait_count] = 0;
|
||||
wait_count++;
|
||||
}
|
||||
|
||||
int ret = 0;
|
||||
if (wait_count > 0) {
|
||||
ret =
|
||||
anv_gem_syncobj_timeline_wait(device,
|
||||
syncobjs, values, wait_count,
|
||||
anv_get_absolute_timeout(INT64_MAX),
|
||||
true /* wait_all */,
|
||||
true /* wait_materialize */);
|
||||
}
|
||||
|
||||
vk_free(&device->vk.alloc, values);
|
||||
|
||||
if (ret)
|
||||
return vk_error(queue, VK_ERROR_DEVICE_LOST);
|
||||
}
|
||||
|
||||
VkResult result = wsi_common_queue_present(&device->physical->wsi_device,
|
||||
anv_device_to_handle(queue->device),
|
||||
_queue, 0,
|
||||
pPresentInfo);
|
||||
result = wsi_common_queue_present(&device->physical->wsi_device,
|
||||
anv_device_to_handle(queue->device),
|
||||
_queue, 0,
|
||||
pPresentInfo);
|
||||
|
||||
for (uint32_t i = 0; i < pPresentInfo->waitSemaphoreCount; i++) {
|
||||
ANV_FROM_HANDLE(anv_semaphore, semaphore, pPresentInfo->pWaitSemaphores[i]);
|
||||
VK_FROM_HANDLE(vk_semaphore, semaphore, pPresentInfo->pWaitSemaphores[i]);
|
||||
/* From the Vulkan 1.0.53 spec:
|
||||
*
|
||||
* "If the import is temporary, the implementation must restore the
|
||||
* semaphore to its prior permanent state after submitting the next
|
||||
* semaphore wait operation."
|
||||
*/
|
||||
anv_semaphore_reset_temporary(queue->device, semaphore);
|
||||
vk_semaphore_reset_temporary(&queue->device->vk, semaphore);
|
||||
}
|
||||
|
||||
return result;
|
||||
|
@@ -22,6 +22,7 @@
|
||||
|
||||
#include "anv_private.h"
|
||||
#include "wsi_common.h"
|
||||
#include "vk_fence.h"
|
||||
#include "vk_util.h"
|
||||
#include "wsi_common_display.h"
|
||||
|
||||
@@ -34,26 +35,27 @@ anv_RegisterDeviceEventEXT(VkDevice _device,
|
||||
VkFence *_fence)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_device, device, _device);
|
||||
struct anv_fence *fence;
|
||||
struct vk_fence *fence;
|
||||
VkResult ret;
|
||||
|
||||
fence = vk_object_zalloc(&device->vk, allocator, sizeof (*fence),
|
||||
VK_OBJECT_TYPE_FENCE);
|
||||
if (!fence)
|
||||
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
||||
fence->permanent.type = ANV_FENCE_TYPE_WSI;
|
||||
const VkFenceCreateInfo info = {
|
||||
.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
|
||||
.flags = 0,
|
||||
};
|
||||
ret = vk_fence_create(&device->vk, &info, allocator, &fence);
|
||||
if (ret != VK_SUCCESS)
|
||||
return ret;
|
||||
|
||||
ret = wsi_register_device_event(_device,
|
||||
&device->physical->wsi_device,
|
||||
device_event_info,
|
||||
allocator,
|
||||
&fence->permanent.sync_wsi,
|
||||
&fence->temporary,
|
||||
-1);
|
||||
if (ret == VK_SUCCESS)
|
||||
*_fence = anv_fence_to_handle(fence);
|
||||
*_fence = vk_fence_to_handle(fence);
|
||||
else
|
||||
vk_free2(&device->vk.alloc, allocator, fence);
|
||||
vk_fence_destroy(&device->vk, fence, allocator);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -65,23 +67,24 @@ anv_RegisterDisplayEventEXT(VkDevice _device,
|
||||
VkFence *_fence)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_device, device, _device);
|
||||
struct anv_fence *fence;
|
||||
struct vk_fence *fence;
|
||||
VkResult ret;
|
||||
|
||||
fence = vk_object_zalloc(&device->vk, allocator, sizeof (*fence),
|
||||
VK_OBJECT_TYPE_FENCE);
|
||||
if (!fence)
|
||||
return VK_ERROR_OUT_OF_HOST_MEMORY;
|
||||
|
||||
fence->permanent.type = ANV_FENCE_TYPE_WSI;
|
||||
const VkFenceCreateInfo info = {
|
||||
.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
|
||||
.flags = 0,
|
||||
};
|
||||
ret = vk_fence_create(&device->vk, &info, allocator, &fence);
|
||||
if (ret != VK_SUCCESS)
|
||||
return ret;
|
||||
|
||||
ret = wsi_register_display_event(
|
||||
_device, &device->physical->wsi_device,
|
||||
display, display_event_info, allocator, &fence->permanent.sync_wsi, -1);
|
||||
display, display_event_info, allocator, &fence->temporary, -1);
|
||||
|
||||
if (ret == VK_SUCCESS)
|
||||
*_fence = anv_fence_to_handle(fence);
|
||||
*_fence = vk_fence_to_handle(fence);
|
||||
else
|
||||
vk_free2(&device->vk.alloc, allocator, fence);
|
||||
vk_fence_destroy(&device->vk, fence, allocator);
|
||||
return ret;
|
||||
}
|
||||
|
Reference in New Issue
Block a user