iris: Wait for drm_xe_exec_queue to be idle before destroying it

Xe KMD don't refcount anything, so resources could be freed while they
are still in use if we don't wait for exec_queue to be idle.

This issue was found with Xe KMD error capture, VM was already
destroyed when it attemped to capture error state but it can also
happen in applications that did not hang.

This fixed the '*ERROR* GT0: TLB invalidation' errors when running
piglit all test list.

Signed-off-by: José Roberto de Souza <jose.souza@intel.com>
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27500>
This commit is contained in:
José Roberto de Souza
2024-02-06 10:27:54 -08:00
committed by Marge Bot
parent 138303fb9d
commit 665d30b544
3 changed files with 55 additions and 5 deletions

View File

@@ -862,8 +862,8 @@ iris_batch_name_to_string(enum iris_batch_name name)
return names[name]; return names[name];
} }
static inline bool bool
context_or_exec_queue_was_banned(struct iris_bufmgr *bufmgr, int ret) iris_batch_is_banned(struct iris_bufmgr *bufmgr, int ret)
{ {
enum intel_kmd_type kmd_type = iris_bufmgr_get_device_info(bufmgr)->kmd_type; enum intel_kmd_type kmd_type = iris_bufmgr_get_device_info(bufmgr)->kmd_type;
@@ -960,7 +960,7 @@ _iris_batch_flush(struct iris_batch *batch, const char *file, int line)
* has been lost and needs to be re-initialized. If this succeeds, * has been lost and needs to be re-initialized. If this succeeds,
* dubiously claim success... * dubiously claim success...
*/ */
if (ret && context_or_exec_queue_was_banned(bufmgr, ret)) { if (ret && iris_batch_is_banned(bufmgr, ret)) {
enum pipe_reset_status status = iris_batch_check_for_reset(batch); enum pipe_reset_status status = iris_batch_check_for_reset(batch);
if (status != PIPE_NO_RESET || ice->context_reset_signaled) if (status != PIPE_NO_RESET || ice->context_reset_signaled)

View File

@@ -446,6 +446,9 @@ iris_batch_mark_reset_sync(struct iris_batch *batch)
const char * const char *
iris_batch_name_to_string(enum iris_batch_name name); iris_batch_name_to_string(enum iris_batch_name name);
bool
iris_batch_is_banned(struct iris_bufmgr *bufmgr, int ret);
#define iris_foreach_batch(ice, batch) \ #define iris_foreach_batch(ice, batch) \
for (struct iris_batch *batch = &ice->batches[0]; \ for (struct iris_batch *batch = &ice->batches[0]; \
batch <= &ice->batches[((struct iris_screen *)ice->ctx.screen)->devinfo->ver >= 12 ? IRIS_BATCH_BLITTER : IRIS_BATCH_COMPUTE]; \ batch <= &ice->batches[((struct iris_screen *)ice->ctx.screen)->devinfo->ver >= 12 ? IRIS_BATCH_BLITTER : IRIS_BATCH_COMPUTE]; \

View File

@@ -151,7 +151,45 @@ void iris_xe_init_batches(struct iris_context *ice)
free(engines_info); free(engines_info);
} }
void iris_xe_destroy_batch(struct iris_batch *batch) /*
* Wait for all previous DRM_IOCTL_XE_EXEC calls over the
* drm_xe_exec_queue in this iris_batch to complete.
**/
static void
iris_xe_wait_exec_queue_idle(struct iris_batch *batch)
{
struct iris_bufmgr *bufmgr = batch->screen->bufmgr;
struct iris_syncobj *syncobj = iris_create_syncobj(bufmgr);
struct drm_xe_sync xe_sync = {
.type = DRM_XE_SYNC_TYPE_SYNCOBJ,
.flags = DRM_XE_SYNC_FLAG_SIGNAL,
};
struct drm_xe_exec exec = {
.exec_queue_id = batch->xe.exec_queue_id,
.num_syncs = 1,
.syncs = (uintptr_t)&xe_sync,
};
int ret;
if (!syncobj)
return;
xe_sync.handle = syncobj->handle;
/* Using the special exec.num_batch_buffer == 0 handling to get syncobj
* signaled when the last DRM_IOCTL_XE_EXEC is completed.
*/
ret = intel_ioctl(iris_bufmgr_get_fd(bufmgr), DRM_IOCTL_XE_EXEC, &exec);
if (ret == 0) {
assert(iris_wait_syncobj(bufmgr, syncobj, INT64_MAX));
} else {
assert(iris_batch_is_banned(bufmgr, errno) == true);
}
iris_syncobj_destroy(bufmgr, syncobj);
}
static void
iris_xe_destroy_exec_queue(struct iris_batch *batch)
{ {
struct iris_screen *screen = batch->screen; struct iris_screen *screen = batch->screen;
struct iris_bufmgr *bufmgr = screen->bufmgr; struct iris_bufmgr *bufmgr = screen->bufmgr;
@@ -165,6 +203,15 @@ void iris_xe_destroy_batch(struct iris_batch *batch)
assert(ret == 0); assert(ret == 0);
} }
void iris_xe_destroy_batch(struct iris_batch *batch)
{
/* Xe KMD don't refcount anything, so resources could be freed while they
* are still in use if we don't wait for exec_queue to be idle.
*/
iris_xe_wait_exec_queue_idle(batch);
iris_xe_destroy_exec_queue(batch);
}
bool iris_xe_replace_batch(struct iris_batch *batch) bool iris_xe_replace_batch(struct iris_batch *batch)
{ {
enum intel_engine_class engine_classes[IRIS_BATCH_COUNT]; enum intel_engine_class engine_classes[IRIS_BATCH_COUNT];
@@ -184,7 +231,7 @@ bool iris_xe_replace_batch(struct iris_batch *batch)
ret = iris_xe_init_batch(bufmgr, engines_info, engine_classes[batch->name], ret = iris_xe_init_batch(bufmgr, engines_info, engine_classes[batch->name],
ice->priority, &new_exec_queue_id); ice->priority, &new_exec_queue_id);
if (ret) { if (ret) {
iris_xe_destroy_batch(batch); iris_xe_destroy_exec_queue(batch);
batch->xe.exec_queue_id = new_exec_queue_id; batch->xe.exec_queue_id = new_exec_queue_id;
iris_lost_context_state(batch); iris_lost_context_state(batch);
} }