anv/xe: Bind queue per anv_queue

The Xe uAPI is designed to use bind queues such that binds without input
dependencies (sync objects) do not block on binds with input
dependencies.

For example:

- Bind A (sparse) is submitted with a list of input dependencies.
- Bind B (immediate) is subsequently submitted without a list of input
  dependencies.

If Bind A and Bind B share a single bind queue, Bind B will not be
scheduled until Bind A completes. Using individual bind queues decouples
Bind A and Bind B, allowing Bind B to make immediate progress.

This change creates a separate bind queue for each ANV queue, enabling
support for sparse bindings that may have input dependencies.

v2:
 - Bail on bind queue creation failure (Linoel)
 - Only create bind queue if VK_QUEUE_SPARSE_BINDING_BIT is set (Jose)
v3:
 - Add comment around submit->queue usage (Jose)

Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: José Roberto de Souza <jose.souza@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32873>
This commit is contained in:
Matthew Brost
2024-12-17 17:53:21 -08:00
committed by Marge Bot
parent 50035f0316
commit 2a053b2e60
3 changed files with 83 additions and 9 deletions

View File

@@ -1340,6 +1340,8 @@ struct anv_queue {
uint32_t exec_queue_id; /* Xe */
};
uint32_t bind_queue_id; /* Xe */
/** Context/Engine id which executes companion RCS command buffer */
uint32_t companion_rcs_id;

View File

@@ -210,6 +210,10 @@ xe_vm_bind_op(struct anv_device *device,
struct drm_xe_vm_bind args = {
.vm_id = device->vm_id,
.num_binds = submit->binds_len,
/* submit->queue will be set for sparse bindings which application is
* required to synchronize access.
*/
.exec_queue_id = submit->queue ? submit->queue->bind_queue_id : 0,
.bind = {},
.num_syncs = num_syncs,
.syncs = (uintptr_t)xe_syncs,

View File

@@ -49,6 +49,16 @@ anv_vk_priority_to_drm_sched_priority(VkQueueGlobalPriorityKHR vk_priority)
}
}
static void
destroy_engine(struct anv_device *device, uint32_t exec_queue_id)
{
struct drm_xe_exec_queue_destroy destroy = {
.exec_queue_id = exec_queue_id,
};
intel_ioctl(device->fd, DRM_IOCTL_XE_EXEC_QUEUE_DESTROY, &destroy);
}
static VkResult
create_engine(struct anv_device *device,
struct anv_queue *queue,
@@ -123,6 +133,22 @@ create_engine(struct anv_device *device,
else
queue->exec_queue_id = create.exec_queue_id;
if (!create_companion_rcs_engine &&
queue_family->queueFlags & VK_QUEUE_SPARSE_BINDING_BIT) {
struct drm_xe_engine_class_instance bind_instance = {
.engine_class = DRM_XE_ENGINE_CLASS_VM_BIND,
};
create.num_placements = 1;
create.instances = (uintptr_t)&bind_instance;
create.extensions = 0;
ret = intel_ioctl(device->fd, DRM_IOCTL_XE_EXEC_QUEUE_CREATE, &create);
if (ret) {
destroy_engine(device, queue->exec_queue_id);
return vk_errorf(device, VK_ERROR_UNKNOWN, "Unable to create bind queue");
}
queue->bind_queue_id = create.exec_queue_id;
}
return VK_SUCCESS;
}
@@ -179,25 +205,67 @@ anv_xe_wait_exec_queue_idle(struct anv_device *device, uint32_t exec_queue_id)
}
static void
destroy_engine(struct anv_device *device, uint32_t exec_queue_id)
bind_engine_idle(struct anv_device *device, uint32_t exec_queue_id)
{
struct drm_xe_exec_queue_destroy destroy = {
.exec_queue_id = exec_queue_id,
struct drm_syncobj_create syncobj_create = {};
struct drm_xe_sync xe_sync = {
.type = DRM_XE_SYNC_TYPE_SYNCOBJ,
.flags = DRM_XE_SYNC_FLAG_SIGNAL,
};
struct drm_xe_vm_bind args = {
.vm_id = device->vm_id,
.num_binds = 0,
.exec_queue_id = exec_queue_id,
.bind = {},
.num_syncs = 1,
.syncs = (uintptr_t)&xe_sync,
};
struct drm_syncobj_destroy syncobj_destroy = {};
int ret = intel_ioctl(device->fd, DRM_IOCTL_SYNCOBJ_CREATE, &syncobj_create);
/* Application could submit a workload and before it is done, destroy the
* queue causing job timeouts in Xe KMD as it don't have permanent
* exec queues.
assert(ret == 0);
xe_sync.handle = syncobj_create.handle;
/* Using the special args.num_binds == 0 handling to get syncobj
* signaled when the last DRM_IOCTL_XE_VM_BIND is completed.
*/
anv_xe_wait_exec_queue_idle(device, exec_queue_id);
intel_ioctl(device->fd, DRM_IOCTL_XE_EXEC_QUEUE_DESTROY, &destroy);
ret = intel_ioctl(device->fd, DRM_IOCTL_XE_VM_BIND, &args);
if (ret) {
/* exec_queue could have been banned, that is why it is being destroyed
* so no assert() here
*/
goto error_bind;
}
struct drm_syncobj_wait syncobj_wait = {
.count_handles = 1,
.timeout_nsec = INT64_MAX,
.handles = (uintptr_t)&syncobj_create.handle,
};
ret = intel_ioctl(device->fd, DRM_IOCTL_SYNCOBJ_WAIT, &syncobj_wait);
assert(ret == 0);
error_bind:
syncobj_destroy.handle = syncobj_create.handle,
intel_ioctl(device->fd, DRM_IOCTL_SYNCOBJ_DESTROY, &syncobj_destroy);
}
void
anv_xe_destroy_engine(struct anv_device *device, struct anv_queue *queue)
{
/* Application could submit a workload and before it is done, destroy the
* queue causing job timeouts in Xe KMD as it don't have permanent
* exec queues.
*/
anv_xe_wait_exec_queue_idle(device, queue->exec_queue_id);
destroy_engine(device, queue->exec_queue_id);
if (queue->companion_rcs_id != 0)
if (queue->companion_rcs_id != 0) {
anv_xe_wait_exec_queue_idle(device, queue->companion_rcs_id);
destroy_engine(device, queue->companion_rcs_id);
}
if (queue->bind_queue_id != 0) {
bind_engine_idle(device, queue->bind_queue_id);
destroy_engine(device, queue->bind_queue_id);
}
}