From 65ff9f0a556a0dfb0c5f7fa973b751c0afc967d2 Mon Sep 17 00:00:00 2001 From: Emma Anholt Date: Tue, 25 Jul 2023 11:48:40 -0700 Subject: [PATCH] tu: Fix data race in userspace VMA management. The sequence was two threads A and B on a shared VkDevice: A: move a BO to zombie VMA list A: drop the BO VMA lock B: prepare to allocate a BO B: Lock BO VMA lock B: call tu_free_zombie_vma_locked() B: close the gem handle from the VMA list B: Drop BO VMA lock B: allocate a BO, getting the recently-closed handle back. B: initialize the BO struct for the new handle. A: memset the BO struct to 0. Multithreading in C is the worst. Closes: #9049, #9247 Part-of: --- src/freedreno/ci/freedreno-a618-fails.txt | 2 -- src/freedreno/ci/freedreno-a618-flakes.txt | 12 ------------ src/freedreno/ci/freedreno-a630-fails.txt | 8 -------- src/freedreno/ci/freedreno-a630-flakes.txt | 13 ------------- src/freedreno/vulkan/tu_knl_drm_msm.cc | 6 +++++- 5 files changed, 5 insertions(+), 36 deletions(-) diff --git a/src/freedreno/ci/freedreno-a618-fails.txt b/src/freedreno/ci/freedreno-a618-fails.txt index a5f0943b682..4d1c9603bd5 100644 --- a/src/freedreno/ci/freedreno-a618-fails.txt +++ b/src/freedreno/ci/freedreno-a618-fails.txt @@ -378,5 +378,3 @@ dEQP-VK.api.driver_properties.conformance_version,Fail # After switch from 6.3.1 to 6.3.13 dEQP-VK.binding_model.descriptor_buffer.basic.limits,Fail gmem-dEQP-VK.binding_model.descriptor_buffer.basic.limits,Fail -dEQP-VK.api.object_management.multithreaded_per_thread_resources.event,Crash -gmem-dEQP-VK.api.object_management.multithreaded_per_thread_resources.descriptor_pool_free_descriptor_set,Crash diff --git a/src/freedreno/ci/freedreno-a618-flakes.txt b/src/freedreno/ci/freedreno-a618-flakes.txt index cc4df5d8d36..0214f612713 100644 --- a/src/freedreno/ci/freedreno-a618-flakes.txt +++ b/src/freedreno/ci/freedreno-a618-flakes.txt @@ -213,15 +213,3 @@ spec@!opengl 3.2@gl-3.2-adj-prims cull-front pv-last spec@!opengl 3.2@gl-3.2-adj-prims line cull-back pv-last spec@!opengl 3.2@gl-3.2-adj-prims line cull-front pv-last spec@!opengl 3.2@gl-3.2-adj-prims pv-last - -# https://gitlab.freedesktop.org/mesa/mesa/-/issues/9049 -dEQP-VK.api.object_management.multithreaded_per_thread_resources.descriptor_pool -dEQP-VK.api.object_management.multithreaded_per_thread_resources.descriptor_pool_free_descriptor_set -dEQP-VK.api.object_management.multithreaded_per_thread_resources.device_memory_small -dEQP-VK.api.object_management.multithreaded_per_thread_resources.event -dEQP-VK.api.object_management.multithreaded_per_thread_resources.query_pool -dEQP-VK.api.object_management.multithreaded_shared_resources.descriptor_pool -dEQP-VK.api.object_management.multithreaded_shared_resources.device_memory_small -dEQP-VK.api.object_management.multithreaded_shared_resources.event -dEQP-VK.api.object_management.multithreaded_shared_resources.query_pool -gmem-dEQP-VK.api.object_management.multithreaded_per_thread_resources.descriptor_pool_free_descriptor_set diff --git a/src/freedreno/ci/freedreno-a630-fails.txt b/src/freedreno/ci/freedreno-a630-fails.txt index 17a6a8225e2..4587bd7f76d 100644 --- a/src/freedreno/ci/freedreno-a630-fails.txt +++ b/src/freedreno/ci/freedreno-a630-fails.txt @@ -383,13 +383,5 @@ dynamic-dEQP-VK.renderpass2.depth_stencil_resolve.image_2d_32_32.samples_2.d32_s # since Debian 12 (bookworm) uprev dEQP-VK.api.driver_properties.conformance_version,Fail -# https://gitlab.freedesktop.org/mesa/mesa/-/issues/9049 -dEQP-VK.api.object_management.multithreaded_per_thread_resources.descriptor_pool_free_descriptor_set,Crash -dEQP-VK.api.object_management.multithreaded_per_thread_resources.descriptor_pool,Crash -dEQP-VK.api.object_management.multithreaded_per_thread_resources.event,Crash -dEQP-VK.api.object_management.multithreaded_shared_resources.descriptor_pool,Crash -dEQP-VK.api.object_management.multithreaded_shared_resources.event,Crash -dEQP-VK.api.object_management.multithreaded_shared_resources.descriptor_pool_free_descriptor_set,Crash -dEQP-VK.api.object_management.multithreaded_shared_resources.query_pool,Crash dEQP-VK.binding_model.descriptor_buffer.basic.limits,Fail gmem-dEQP-VK.binding_model.descriptor_buffer.basic.limits,Fail diff --git a/src/freedreno/ci/freedreno-a630-flakes.txt b/src/freedreno/ci/freedreno-a630-flakes.txt index 680162b34a6..9adb73fbd88 100644 --- a/src/freedreno/ci/freedreno-a630-flakes.txt +++ b/src/freedreno/ci/freedreno-a630-flakes.txt @@ -215,16 +215,3 @@ KHR-GL46.buffer_storage.map_persistent_flush # recently started flaking towards to UnexpectedPass spec@ext_external_objects@vk-depth-display@D24S8 - -# Inconsistently crashing. -# https://gitlab.freedesktop.org/mesa/mesa/-/issues/9049 -dEQP-VK.api.object_management.multithreaded_per_thread_resources.descriptor_pool -dEQP-VK.api.object_management.multithreaded_per_thread_resources.descriptor_pool_free_descriptor_set -dEQP-VK.api.object_management.multithreaded_per_thread_resources.device_memory_small -dEQP-VK.api.object_management.multithreaded_per_thread_resources.event -dEQP-VK.api.object_management.multithreaded_per_thread_resources.query_pool -dEQP-VK.api.object_management.multithreaded_shared_resources.descriptor_pool -dEQP-VK.api.object_management.multithreaded_shared_resources.device_memory_small -dEQP-VK.api.object_management.multithreaded_shared_resources.event -dEQP-VK.api.object_management.multithreaded_shared_resources.query_pool -gmem-dEQP-VK.api.object_management.multithreaded_per_thread_resources.descriptor_pool_free_descriptor_set diff --git a/src/freedreno/vulkan/tu_knl_drm_msm.cc b/src/freedreno/vulkan/tu_knl_drm_msm.cc index d093f57263d..0bc472fbd66 100644 --- a/src/freedreno/vulkan/tu_knl_drm_msm.cc +++ b/src/freedreno/vulkan/tu_knl_drm_msm.cc @@ -733,9 +733,13 @@ msm_bo_finish(struct tu_device *dev, struct tu_bo *bo) vma->iova = bo->iova; vma->size = bo->size; vma->fence = p_atomic_read(&dev->queues[0]->fence); - mtx_unlock(&dev->vma_mutex); + /* Must be cleared under the VMA mutex, or another thread could race to + * reap the VMA, closing the BO and letting a new GEM allocation produce + * this handle again. + */ memset(bo, 0, sizeof(*bo)); + mtx_unlock(&dev->vma_mutex); } else { /* Our BO structs are stored in a sparse array in the physical device, * so we don't want to free the BO pointer, instead we want to reset it