iris: Wait for drm_xe_exec_queue to be idle before destroying it
Xe KMD don't refcount anything, so resources could be freed while they are still in use if we don't wait for exec_queue to be idle. This issue was found with Xe KMD error capture, VM was already destroyed when it attemped to capture error state but it can also happen in applications that did not hang. This fixed the '*ERROR* GT0: TLB invalidation' errors when running piglit all test list. Signed-off-by: José Roberto de Souza <jose.souza@intel.com> Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27500>
This commit is contained in:

committed by
Marge Bot

parent
138303fb9d
commit
665d30b544
@@ -862,8 +862,8 @@ iris_batch_name_to_string(enum iris_batch_name name)
|
||||
return names[name];
|
||||
}
|
||||
|
||||
static inline bool
|
||||
context_or_exec_queue_was_banned(struct iris_bufmgr *bufmgr, int ret)
|
||||
bool
|
||||
iris_batch_is_banned(struct iris_bufmgr *bufmgr, int ret)
|
||||
{
|
||||
enum intel_kmd_type kmd_type = iris_bufmgr_get_device_info(bufmgr)->kmd_type;
|
||||
|
||||
@@ -960,7 +960,7 @@ _iris_batch_flush(struct iris_batch *batch, const char *file, int line)
|
||||
* has been lost and needs to be re-initialized. If this succeeds,
|
||||
* dubiously claim success...
|
||||
*/
|
||||
if (ret && context_or_exec_queue_was_banned(bufmgr, ret)) {
|
||||
if (ret && iris_batch_is_banned(bufmgr, ret)) {
|
||||
enum pipe_reset_status status = iris_batch_check_for_reset(batch);
|
||||
|
||||
if (status != PIPE_NO_RESET || ice->context_reset_signaled)
|
||||
|
@@ -446,6 +446,9 @@ iris_batch_mark_reset_sync(struct iris_batch *batch)
|
||||
const char *
|
||||
iris_batch_name_to_string(enum iris_batch_name name);
|
||||
|
||||
bool
|
||||
iris_batch_is_banned(struct iris_bufmgr *bufmgr, int ret);
|
||||
|
||||
#define iris_foreach_batch(ice, batch) \
|
||||
for (struct iris_batch *batch = &ice->batches[0]; \
|
||||
batch <= &ice->batches[((struct iris_screen *)ice->ctx.screen)->devinfo->ver >= 12 ? IRIS_BATCH_BLITTER : IRIS_BATCH_COMPUTE]; \
|
||||
|
@@ -151,7 +151,45 @@ void iris_xe_init_batches(struct iris_context *ice)
|
||||
free(engines_info);
|
||||
}
|
||||
|
||||
void iris_xe_destroy_batch(struct iris_batch *batch)
|
||||
/*
|
||||
* Wait for all previous DRM_IOCTL_XE_EXEC calls over the
|
||||
* drm_xe_exec_queue in this iris_batch to complete.
|
||||
**/
|
||||
static void
|
||||
iris_xe_wait_exec_queue_idle(struct iris_batch *batch)
|
||||
{
|
||||
struct iris_bufmgr *bufmgr = batch->screen->bufmgr;
|
||||
struct iris_syncobj *syncobj = iris_create_syncobj(bufmgr);
|
||||
struct drm_xe_sync xe_sync = {
|
||||
.type = DRM_XE_SYNC_TYPE_SYNCOBJ,
|
||||
.flags = DRM_XE_SYNC_FLAG_SIGNAL,
|
||||
};
|
||||
struct drm_xe_exec exec = {
|
||||
.exec_queue_id = batch->xe.exec_queue_id,
|
||||
.num_syncs = 1,
|
||||
.syncs = (uintptr_t)&xe_sync,
|
||||
};
|
||||
int ret;
|
||||
|
||||
if (!syncobj)
|
||||
return;
|
||||
|
||||
xe_sync.handle = syncobj->handle;
|
||||
/* Using the special exec.num_batch_buffer == 0 handling to get syncobj
|
||||
* signaled when the last DRM_IOCTL_XE_EXEC is completed.
|
||||
*/
|
||||
ret = intel_ioctl(iris_bufmgr_get_fd(bufmgr), DRM_IOCTL_XE_EXEC, &exec);
|
||||
if (ret == 0) {
|
||||
assert(iris_wait_syncobj(bufmgr, syncobj, INT64_MAX));
|
||||
} else {
|
||||
assert(iris_batch_is_banned(bufmgr, errno) == true);
|
||||
}
|
||||
|
||||
iris_syncobj_destroy(bufmgr, syncobj);
|
||||
}
|
||||
|
||||
static void
|
||||
iris_xe_destroy_exec_queue(struct iris_batch *batch)
|
||||
{
|
||||
struct iris_screen *screen = batch->screen;
|
||||
struct iris_bufmgr *bufmgr = screen->bufmgr;
|
||||
@@ -165,6 +203,15 @@ void iris_xe_destroy_batch(struct iris_batch *batch)
|
||||
assert(ret == 0);
|
||||
}
|
||||
|
||||
void iris_xe_destroy_batch(struct iris_batch *batch)
|
||||
{
|
||||
/* Xe KMD don't refcount anything, so resources could be freed while they
|
||||
* are still in use if we don't wait for exec_queue to be idle.
|
||||
*/
|
||||
iris_xe_wait_exec_queue_idle(batch);
|
||||
iris_xe_destroy_exec_queue(batch);
|
||||
}
|
||||
|
||||
bool iris_xe_replace_batch(struct iris_batch *batch)
|
||||
{
|
||||
enum intel_engine_class engine_classes[IRIS_BATCH_COUNT];
|
||||
@@ -184,7 +231,7 @@ bool iris_xe_replace_batch(struct iris_batch *batch)
|
||||
ret = iris_xe_init_batch(bufmgr, engines_info, engine_classes[batch->name],
|
||||
ice->priority, &new_exec_queue_id);
|
||||
if (ret) {
|
||||
iris_xe_destroy_batch(batch);
|
||||
iris_xe_destroy_exec_queue(batch);
|
||||
batch->xe.exec_queue_id = new_exec_queue_id;
|
||||
iris_lost_context_state(batch);
|
||||
}
|
||||
|
Reference in New Issue
Block a user