v3dv: Switch to the common submit framework

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15704>
This commit is contained in:
Jason Ekstrand
2022-03-29 17:52:32 -05:00
committed by Marge Bot
parent 321f0b85f2
commit 316728a55b
5 changed files with 391 additions and 1556 deletions

View File

@@ -352,7 +352,6 @@ spec@oes_texture_view@rendering-formats@clear GL_RGB10_A2 as GL_RGBA8I,Fail
# https://gitlab.khronos.org/Tracker/vk-gl-cts/-/issues/3510
dEQP-VK.api.external.semaphore.opaque_fd.info_timeline,Fail
dEQP-VK.api.external.semaphore.sync_fd.info_timeline,Fail
dEQP-VK.draw.renderpass.inverted_depth_ranges.nodepthclamp_deltazero,Fail

View File

@@ -45,6 +45,7 @@
#include "drm-uapi/v3d_drm.h"
#include "format/u_format.h"
#include "vk_drm_syncobj.h"
#include "vk_util.h"
#include "git_sha1.h"
@@ -844,6 +845,44 @@ physical_device_init(struct v3dv_physical_device *device,
device->options.merge_jobs = getenv("V3DV_NO_MERGE_JOBS") == NULL;
device->drm_syncobj_type = vk_drm_syncobj_get_type(device->render_fd);
/* We don't support timelines in the uAPI yet and we don't want it getting
* suddenly turned on by vk_drm_syncobj_get_type() without us adding v3dv
* code for it first.
*/
device->drm_syncobj_type.features &= ~VK_SYNC_FEATURE_TIMELINE;
/* Sync file export is incompatible with the current model of execution
* where some jobs may run on the CPU. There are CTS tests which do the
* following:
*
* 1. Create a command buffer with a vkCmdWaitEvents()
* 2. Submit the command buffer
* 3. vkGetSemaphoreFdKHR() to try to get a sync_file
* 4. vkSetEvent()
*
* This deadlocks because we have to wait for the syncobj to get a real
* fence in vkGetSemaphoreFdKHR() which only happens after all the work
* from the command buffer is complete which only happens after
* vkSetEvent(). No amount of CPU threading in userspace will ever fix
* this. Sadly, this is pretty explicitly allowed by the Vulkan spec:
*
* VUID-vkCmdWaitEvents-pEvents-01163
*
* "If pEvents includes one or more events that will be signaled by
* vkSetEvent after commandBuffer has been submitted to a queue, then
* vkCmdWaitEvents must not be called inside a render pass instance"
*
* Disable sync file support for now.
*/
device->drm_syncobj_type.import_sync_file = NULL;
device->drm_syncobj_type.export_sync_file = NULL;
device->sync_types[0] = &device->drm_syncobj_type;
device->sync_types[1] = NULL;
device->vk.supported_sync_types = device->sync_types;
result = v3dv_wsi_init(device);
if (result != VK_SUCCESS) {
vk_error(instance, result);
@@ -1845,6 +1884,17 @@ v3dv_EnumerateDeviceLayerProperties(VkPhysicalDevice physicalDevice,
return vk_error(physical_device, VK_ERROR_LAYER_NOT_PRESENT);
}
static void
destroy_queue_syncs(struct v3dv_queue *queue)
{
for (int i = 0; i < V3DV_QUEUE_COUNT; i++) {
if (queue->last_job_syncs.syncs[i]) {
drmSyncobjDestroy(queue->device->pdevice->render_fd,
queue->last_job_syncs.syncs[i]);
}
}
}
static VkResult
queue_init(struct v3dv_device *device, struct v3dv_queue *queue,
const VkDeviceQueueCreateInfo *create_info,
@@ -1854,23 +1904,43 @@ queue_init(struct v3dv_device *device, struct v3dv_queue *queue,
index_in_family);
if (result != VK_SUCCESS)
return result;
result = vk_queue_enable_submit_thread(&queue->vk);
if (result != VK_SUCCESS)
goto fail_submit_thread;
queue->device = device;
queue->vk.driver_submit = v3dv_queue_driver_submit;
for (int i = 0; i < V3DV_QUEUE_COUNT; i++) {
queue->last_job_syncs.first[i] = true;
int ret = drmSyncobjCreate(device->pdevice->render_fd,
DRM_SYNCOBJ_CREATE_SIGNALED,
&queue->last_job_syncs.syncs[i]);
if (ret) {
result = vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
"syncobj create failed: %m");
goto fail_last_job_syncs;
}
}
queue->noop_job = NULL;
list_inithead(&queue->submit_wait_list);
mtx_init(&queue->mutex, mtx_plain);
mtx_init(&queue->noop_mutex, mtx_plain);
return VK_SUCCESS;
fail_last_job_syncs:
destroy_queue_syncs(queue);
fail_submit_thread:
vk_queue_finish(&queue->vk);
return result;
}
static void
queue_finish(struct v3dv_queue *queue)
{
vk_queue_finish(&queue->vk);
assert(list_is_empty(&queue->submit_wait_list));
if (queue->noop_job)
v3dv_job_destroy(queue->noop_job);
mtx_destroy(&queue->mutex);
mtx_destroy(&queue->noop_mutex);
destroy_queue_syncs(queue);
vk_queue_finish(&queue->vk);
}
static void
@@ -1882,16 +1952,6 @@ init_device_meta(struct v3dv_device *device)
v3dv_meta_texel_buffer_copy_init(device);
}
static void
destroy_device_syncs(struct v3dv_device *device,
int render_fd)
{
for (int i = 0; i < V3DV_QUEUE_COUNT; i++) {
if (device->last_job_syncs.syncs[i])
drmSyncobjDestroy(render_fd, device->last_job_syncs.syncs[i]);
}
}
static void
destroy_device_meta(struct v3dv_device *device)
{
@@ -1944,10 +2004,12 @@ v3dv_CreateDevice(VkPhysicalDevice physicalDevice,
device->instance = instance;
device->pdevice = physical_device;
mtx_init(&device->mutex, mtx_plain);
mtx_init(&device->query_mutex, mtx_plain);
cnd_init(&device->query_ended);
vk_device_set_drm_fd(&device->vk, physical_device->render_fd);
vk_device_enable_threaded_submit(&device->vk);
result = queue_init(device, &device->queue,
pCreateInfo->pQueueCreateInfos, 0);
if (result != VK_SUCCESS)
@@ -1973,17 +2035,6 @@ v3dv_CreateDevice(VkPhysicalDevice physicalDevice,
if (device->features.robustBufferAccess)
perf_debug("Device created with Robust Buffer Access enabled.\n");
for (int i = 0; i < V3DV_QUEUE_COUNT; i++) {
device->last_job_syncs.first[i] = true;
int ret = drmSyncobjCreate(physical_device->render_fd,
DRM_SYNCOBJ_CREATE_SIGNALED,
&device->last_job_syncs.syncs[i]);
if (ret) {
result = VK_ERROR_INITIALIZATION_FAILED;
goto fail;
}
}
#ifdef DEBUG
v3dv_X(device, device_check_prepacked_sizes)();
#endif
@@ -1999,10 +2050,8 @@ v3dv_CreateDevice(VkPhysicalDevice physicalDevice,
return VK_SUCCESS;
fail:
destroy_device_syncs(device, physical_device->render_fd);
cnd_destroy(&device->query_ended);
mtx_destroy(&device->query_mutex);
mtx_destroy(&device->mutex);
vk_device_finish(&device->vk);
vk_free(&device->vk.alloc, device);
@@ -2015,10 +2064,8 @@ v3dv_DestroyDevice(VkDevice _device,
{
V3DV_FROM_HANDLE(v3dv_device, device, _device);
v3dv_DeviceWaitIdle(_device);
device->vk.dispatch_table.DeviceWaitIdle(_device);
queue_finish(&device->queue);
mtx_destroy(&device->mutex);
destroy_device_syncs(device, device->pdevice->render_fd);
destroy_device_meta(device);
v3dv_pipeline_cache_finish(&device->default_pipeline_cache);
@@ -2039,17 +2086,6 @@ v3dv_DestroyDevice(VkDevice _device,
vk_free2(&device->vk.alloc, pAllocator, device);
}
VKAPI_ATTR VkResult VKAPI_CALL
v3dv_DeviceWaitIdle(VkDevice _device)
{
V3DV_FROM_HANDLE(v3dv_device, device, _device);
if (vk_device_is_lost(&device->vk))
return VK_ERROR_DEVICE_LOST;
return v3dv_QueueWaitIdle(v3dv_queue_to_handle(&device->queue));
}
static VkResult
device_alloc(struct v3dv_device *device,
struct v3dv_device_memory *mem,

View File

@@ -43,6 +43,7 @@
#include "vk_log.h"
#include "vk_physical_device.h"
#include "vk_shader_module.h"
#include "vk_sync.h"
#include "vk_util.h"
#include "vk_command_buffer.h"
@@ -140,6 +141,9 @@ struct v3dv_physical_device {
uint8_t device_uuid[VK_UUID_SIZE];
uint8_t driver_uuid[VK_UUID_SIZE];
struct vk_sync_type drm_syncobj_type;
const struct vk_sync_type *sync_types[2];
struct disk_cache *disk_cache;
mtx_t mutex;
@@ -219,34 +223,30 @@ struct v3dv_instance {
bool default_pipeline_cache_enabled;
};
/* Tracks wait threads spawned from a single vkQueueSubmit call */
struct v3dv_queue_submit_wait_info {
/* struct vk_object_base base; ?*/
struct list_head list_link;
/* FIXME: In addition to tracking the last job submitted by GPU queue (cl, csd,
* tfu), we still need a syncobj to track the last overall job submitted
* (V3DV_QUEUE_ANY) for the case we don't support multisync. Someday we can
* start expecting multisync to be present and drop the legacy implementation
* together with this V3DV_QUEUE_ANY tracker.
*/
enum v3dv_queue_type {
V3DV_QUEUE_CL = 0,
V3DV_QUEUE_CSD,
V3DV_QUEUE_TFU,
V3DV_QUEUE_ANY,
V3DV_QUEUE_COUNT,
};
struct v3dv_device *device;
/* List of wait threads spawned for any command buffers in a particular
* call to vkQueueSubmit.
*/
uint32_t wait_thread_count;
struct {
pthread_t thread;
bool finished;
} wait_threads[16];
/* The master wait thread for the entire submit. This will wait for all
* other threads in this submit to complete before processing signal
* semaphores and fences.
*/
pthread_t master_wait_thread;
/* List of semaphores (and fence) to signal after all wait threads completed
* and all command buffer jobs in the submission have been sent to the GPU.
*/
uint32_t signal_semaphore_count;
VkSemaphore *signal_semaphores;
VkFence fence;
/* For each GPU queue, we use a syncobj to track the last job submitted. We
* set the flag `first` to determine when we are starting a new cmd buffer
* batch and therefore a job submitted to a given queue will be the first in a
* cmd buf batch.
*/
struct v3dv_last_job_sync {
/* If the job is the first submitted to a GPU queue in a cmd buffer batch */
bool first[V3DV_QUEUE_COUNT];
/* Array of syncobj to track the last job submitted to a GPU queue */
uint32_t syncs[V3DV_QUEUE_COUNT];
};
struct v3dv_queue {
@@ -254,18 +254,14 @@ struct v3dv_queue {
struct v3dv_device *device;
/* A list of active v3dv_queue_submit_wait_info */
struct list_head submit_wait_list;
/* A mutex to prevent concurrent access to the list of wait threads */
mtx_t mutex;
/* A mutex to prevent concurrent noop job submissions */
mtx_t noop_mutex;
struct v3dv_last_job_sync last_job_syncs;
struct v3dv_job *noop_job;
};
VkResult v3dv_queue_driver_submit(struct vk_queue *vk_queue,
struct vk_queue_submit *submit);
#define V3DV_META_BLIT_CACHE_KEY_SIZE (4 * sizeof(uint32_t))
#define V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE (3 * sizeof(uint32_t) + \
sizeof(VkComponentMapping))
@@ -438,32 +434,6 @@ struct v3dv_pipeline_cache {
bool externally_synchronized;
};
/* FIXME: In addition to tracking the last job submitted by GPU queue (cl, csd,
* tfu), we still need a syncobj to track the last overall job submitted
* (V3DV_QUEUE_ANY) for the case we don't support multisync. Someday we can
* start expecting multisync to be present and drop the legacy implementation
* together with this V3DV_QUEUE_ANY tracker.
*/
enum v3dv_queue_type {
V3DV_QUEUE_CL = 0,
V3DV_QUEUE_CSD,
V3DV_QUEUE_TFU,
V3DV_QUEUE_ANY,
V3DV_QUEUE_COUNT,
};
/* For each GPU queue, we use a syncobj to track the last job submitted. We
* set the flag `first` to determine when we are starting a new cmd buffer
* batch and therefore a job submitted to a given queue will be the first in a
* cmd buf batch.
*/
struct v3dv_last_job_sync {
/* If the job is the first submitted to a GPU queue in a cmd buffer batch */
bool first[V3DV_QUEUE_COUNT];
/* Array of syncobj to track the last job submitted to a GPU queue */
uint32_t syncs[V3DV_QUEUE_COUNT];
};
struct v3dv_device {
struct vk_device vk;
@@ -473,12 +443,6 @@ struct v3dv_device {
struct v3d_device_info devinfo;
struct v3dv_queue queue;
/* Syncobjs to track the last job submitted to any GPU queue */
struct v3dv_last_job_sync last_job_syncs;
/* A mutex to prevent concurrent access to last_job_sync from the queue */
mtx_t mutex;
/* Guards query->maybe_available and value for timestamps */
mtx_t query_mutex;
@@ -1001,17 +965,14 @@ struct v3dv_copy_query_results_cpu_job_info {
VkQueryResultFlags flags;
};
struct v3dv_submit_info_semaphores {
/* List of semaphores to wait before running a job */
uint32_t wait_sem_count;
VkSemaphore *wait_sems;
struct v3dv_submit_sync_info {
/* List of syncs to wait before running a job */
uint32_t wait_count;
struct vk_sync_wait *waits;
/* List of semaphores to signal when all jobs complete */
uint32_t signal_sem_count;
VkSemaphore *signal_sems;
/* A fence to signal when all jobs complete */
VkFence fence;
/* List of syncs to signal when all jobs complete */
uint32_t signal_count;
struct vk_sync_signal *signals;
};
struct v3dv_event_set_cpu_job_info {
@@ -1122,9 +1083,6 @@ struct v3dv_job {
/* Whether we need to serialize this job in our command stream */
bool serialize;
/* Whether this job is in charge of signalling semaphores */
bool do_sem_signal;
/* If this is a CL job, whether we should sync before binning */
bool needs_bcl_sync;
@@ -1156,7 +1114,7 @@ struct v3dv_wait_thread_info {
struct v3dv_job *job;
/* Semaphores info for any postponed jobs after a wait event */
struct v3dv_submit_info_semaphores *sems_info;
struct v3dv_submit_sync_info *sync_info;
};
void v3dv_job_init(struct v3dv_job *job,
@@ -1514,28 +1472,6 @@ void v3dv_cmd_buffer_add_private_obj(struct v3dv_cmd_buffer *cmd_buffer,
uint64_t obj,
v3dv_cmd_buffer_private_obj_destroy_cb destroy_cb);
struct v3dv_semaphore {
struct vk_object_base base;
/* A syncobject handle associated with this semaphore */
uint32_t sync;
/* A temporary syncobject handle produced from a vkImportSemaphoreFd. */
uint32_t temp_sync;
bool has_temp;
};
struct v3dv_fence {
struct vk_object_base base;
/* A syncobject handle associated with this fence */
uint32_t sync;
/* A temporary syncobject handle produced from a vkImportFenceFd. */
uint32_t temp_sync;
bool has_temp;
};
struct v3dv_event {
struct vk_object_base base;
int state;
@@ -2210,7 +2146,6 @@ VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_descriptor_update_template, base,
VkDescriptorUpdateTemplate,
VK_OBJECT_TYPE_DESCRIPTOR_UPDATE_TEMPLATE)
VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_event, base, VkEvent, VK_OBJECT_TYPE_EVENT)
VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_fence, base, VkFence, VK_OBJECT_TYPE_FENCE)
VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_framebuffer, base, VkFramebuffer,
VK_OBJECT_TYPE_FRAMEBUFFER)
VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_image, vk.base, VkImage,
@@ -2229,8 +2164,6 @@ VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_render_pass, base, VkRenderPass,
VK_OBJECT_TYPE_RENDER_PASS)
VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_sampler, base, VkSampler,
VK_OBJECT_TYPE_SAMPLER)
VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_semaphore, base, VkSemaphore,
VK_OBJECT_TYPE_SEMAPHORE)
static inline int
v3dv_ioctl(int fd, unsigned long request, void *arg)

File diff suppressed because it is too large Load Diff

View File

@@ -29,6 +29,9 @@
#include "vk_util.h"
#include "wsi_common.h"
#include "wsi_common_drm.h"
#include "vk_fence.h"
#include "vk_semaphore.h"
#include "vk_sync_dummy.h"
static VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
v3dv_wsi_proc_addr(VkPhysicalDevice physicalDevice, const char *pName)
@@ -146,26 +149,39 @@ v3dv_wsi_get_image_from_swapchain(VkSwapchainKHR swapchain, uint32_t index)
}
VKAPI_ATTR VkResult VKAPI_CALL
v3dv_AcquireNextImage2KHR(
VkDevice _device,
const VkAcquireNextImageInfoKHR* pAcquireInfo,
uint32_t* pImageIndex)
v3dv_AcquireNextImage2KHR(VkDevice _device,
const VkAcquireNextImageInfoKHR *pAcquireInfo,
uint32_t *pImageIndex)
{
V3DV_FROM_HANDLE(v3dv_device, device, _device);
V3DV_FROM_HANDLE(v3dv_fence, fence, pAcquireInfo->fence);
V3DV_FROM_HANDLE(v3dv_semaphore, semaphore, pAcquireInfo->semaphore);
VK_FROM_HANDLE(vk_fence, fence, pAcquireInfo->fence);
VK_FROM_HANDLE(vk_semaphore, semaphore, pAcquireInfo->semaphore);
struct v3dv_physical_device *pdevice = &device->instance->physicalDevice;
struct v3dv_physical_device *pdevice = device->pdevice;
VkResult result;
result = wsi_common_acquire_next_image2(&pdevice->wsi_device, _device,
pAcquireInfo, pImageIndex);
VkResult result = wsi_common_acquire_next_image2(
&pdevice->wsi_device, _device, pAcquireInfo, pImageIndex);
/* signal fence/semaphore - image is available immediately */
if (result == VK_SUCCESS || result == VK_SUBOPTIMAL_KHR) {
if (fence)
drmSyncobjSignal(pdevice->render_fd, &fence->sync, 1);
if (semaphore)
drmSyncobjSignal(pdevice->render_fd, &semaphore->sync, 1);
VkResult sync_res;
if (fence) {
vk_fence_reset_temporary(&device->vk, fence);
sync_res = vk_sync_create(&device->vk, &vk_sync_dummy_type,
0 /* flags */, 1 /* initial_value */,
&fence->temporary);
if (sync_res != VK_SUCCESS)
return sync_res;
}
if (semaphore) {
vk_semaphore_reset_temporary(&device->vk, semaphore);
sync_res = vk_sync_create(&device->vk, &vk_sync_dummy_type,
0 /* flags */, 1 /* initial_value */,
&semaphore->temporary);
if (sync_res != VK_SUCCESS)
return sync_res;
}
}
return result;