
Signed-off-by: Matt Coster <matt.coster@imgtec.com> Reviewed-by: Karmjit Mahil <Karmjit.Mahil@imgtec.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20932>
1129 lines
39 KiB
C
1129 lines
39 KiB
C
/*
|
|
* Copyright © 2022 Imagination Technologies Ltd.
|
|
*
|
|
* based in part on radv driver which is:
|
|
* Copyright © 2016 Red Hat.
|
|
* Copyright © 2016 Bas Nieuwenhuizen
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
* of this software and associated documentation files (the "Software"), to deal
|
|
* in the Software without restriction, including without limitation the rights
|
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
* copies of the Software, and to permit persons to whom the Software is
|
|
* furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice (including the next
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
* Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
* SOFTWARE.
|
|
*/
|
|
|
|
/**
|
|
* This file implements VkQueue, VkFence, and VkSemaphore
|
|
*/
|
|
|
|
#include <assert.h>
|
|
#include <stdbool.h>
|
|
#include <stddef.h>
|
|
#include <stdint.h>
|
|
#include <unistd.h>
|
|
#include <vulkan/vulkan.h>
|
|
|
|
#include "pvr_job_compute.h"
|
|
#include "pvr_job_context.h"
|
|
#include "pvr_job_render.h"
|
|
#include "pvr_job_transfer.h"
|
|
#include "pvr_limits.h"
|
|
#include "pvr_private.h"
|
|
#include "util/macros.h"
|
|
#include "util/u_atomic.h"
|
|
#include "vk_alloc.h"
|
|
#include "vk_fence.h"
|
|
#include "vk_log.h"
|
|
#include "vk_object.h"
|
|
#include "vk_queue.h"
|
|
#include "vk_semaphore.h"
|
|
#include "vk_sync.h"
|
|
#include "vk_sync_dummy.h"
|
|
#include "vk_util.h"
|
|
|
|
static VkResult pvr_queue_init(struct pvr_device *device,
|
|
struct pvr_queue *queue,
|
|
const VkDeviceQueueCreateInfo *pCreateInfo,
|
|
uint32_t index_in_family)
|
|
{
|
|
struct pvr_transfer_ctx *transfer_ctx;
|
|
struct pvr_compute_ctx *compute_ctx;
|
|
struct pvr_compute_ctx *query_ctx;
|
|
struct pvr_render_ctx *gfx_ctx;
|
|
VkResult result;
|
|
|
|
*queue = (struct pvr_queue){ 0 };
|
|
|
|
result =
|
|
vk_queue_init(&queue->vk, &device->vk, pCreateInfo, index_in_family);
|
|
if (result != VK_SUCCESS)
|
|
return result;
|
|
|
|
result = pvr_transfer_ctx_create(device,
|
|
PVR_WINSYS_CTX_PRIORITY_MEDIUM,
|
|
&transfer_ctx);
|
|
if (result != VK_SUCCESS)
|
|
goto err_vk_queue_finish;
|
|
|
|
result = pvr_compute_ctx_create(device,
|
|
PVR_WINSYS_CTX_PRIORITY_MEDIUM,
|
|
&compute_ctx);
|
|
if (result != VK_SUCCESS)
|
|
goto err_transfer_ctx_destroy;
|
|
|
|
result = pvr_compute_ctx_create(device,
|
|
PVR_WINSYS_CTX_PRIORITY_MEDIUM,
|
|
&query_ctx);
|
|
if (result != VK_SUCCESS)
|
|
goto err_compute_ctx_destroy;
|
|
|
|
result =
|
|
pvr_render_ctx_create(device, PVR_WINSYS_CTX_PRIORITY_MEDIUM, &gfx_ctx);
|
|
if (result != VK_SUCCESS)
|
|
goto err_query_ctx_destroy;
|
|
|
|
queue->device = device;
|
|
queue->gfx_ctx = gfx_ctx;
|
|
queue->compute_ctx = compute_ctx;
|
|
queue->query_ctx = query_ctx;
|
|
queue->transfer_ctx = transfer_ctx;
|
|
|
|
return VK_SUCCESS;
|
|
|
|
err_query_ctx_destroy:
|
|
pvr_compute_ctx_destroy(query_ctx);
|
|
|
|
err_compute_ctx_destroy:
|
|
pvr_compute_ctx_destroy(compute_ctx);
|
|
|
|
err_transfer_ctx_destroy:
|
|
pvr_transfer_ctx_destroy(transfer_ctx);
|
|
|
|
err_vk_queue_finish:
|
|
vk_queue_finish(&queue->vk);
|
|
|
|
return result;
|
|
}
|
|
|
|
VkResult pvr_queues_create(struct pvr_device *device,
|
|
const VkDeviceCreateInfo *pCreateInfo)
|
|
{
|
|
VkResult result;
|
|
|
|
/* Check requested queue families and queues */
|
|
assert(pCreateInfo->queueCreateInfoCount == 1);
|
|
assert(pCreateInfo->pQueueCreateInfos[0].queueFamilyIndex == 0);
|
|
assert(pCreateInfo->pQueueCreateInfos[0].queueCount <= PVR_MAX_QUEUES);
|
|
|
|
const VkDeviceQueueCreateInfo *queue_create =
|
|
&pCreateInfo->pQueueCreateInfos[0];
|
|
|
|
device->queues = vk_alloc(&device->vk.alloc,
|
|
queue_create->queueCount * sizeof(*device->queues),
|
|
8,
|
|
VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
|
|
if (!device->queues)
|
|
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
|
|
|
device->queue_count = 0;
|
|
|
|
for (uint32_t i = 0; i < queue_create->queueCount; i++) {
|
|
result = pvr_queue_init(device, &device->queues[i], queue_create, i);
|
|
if (result != VK_SUCCESS)
|
|
goto err_queues_finish;
|
|
|
|
device->queue_count++;
|
|
}
|
|
|
|
return VK_SUCCESS;
|
|
|
|
err_queues_finish:
|
|
pvr_queues_destroy(device);
|
|
return result;
|
|
}
|
|
|
|
static void pvr_queue_finish(struct pvr_queue *queue)
|
|
{
|
|
for (uint32_t i = 0; i < ARRAY_SIZE(queue->job_dependancy); i++) {
|
|
if (queue->job_dependancy[i])
|
|
vk_sync_destroy(&queue->device->vk, queue->job_dependancy[i]);
|
|
}
|
|
|
|
for (uint32_t i = 0; i < ARRAY_SIZE(queue->completion); i++) {
|
|
if (queue->completion[i])
|
|
vk_sync_destroy(&queue->device->vk, queue->completion[i]);
|
|
}
|
|
|
|
pvr_render_ctx_destroy(queue->gfx_ctx);
|
|
pvr_compute_ctx_destroy(queue->query_ctx);
|
|
pvr_compute_ctx_destroy(queue->compute_ctx);
|
|
pvr_transfer_ctx_destroy(queue->transfer_ctx);
|
|
|
|
vk_queue_finish(&queue->vk);
|
|
}
|
|
|
|
void pvr_queues_destroy(struct pvr_device *device)
|
|
{
|
|
for (uint32_t q_idx = 0; q_idx < device->queue_count; q_idx++)
|
|
pvr_queue_finish(&device->queues[q_idx]);
|
|
|
|
vk_free(&device->vk.alloc, device->queues);
|
|
}
|
|
|
|
VkResult pvr_QueueWaitIdle(VkQueue _queue)
|
|
{
|
|
PVR_FROM_HANDLE(pvr_queue, queue, _queue);
|
|
|
|
for (int i = 0U; i < ARRAY_SIZE(queue->completion); i++) {
|
|
VkResult result;
|
|
|
|
if (!queue->completion[i])
|
|
continue;
|
|
|
|
result = vk_sync_wait(&queue->device->vk,
|
|
queue->completion[i],
|
|
0U,
|
|
VK_SYNC_WAIT_COMPLETE,
|
|
UINT64_MAX);
|
|
if (result != VK_SUCCESS)
|
|
return result;
|
|
}
|
|
|
|
return VK_SUCCESS;
|
|
}
|
|
|
|
static VkResult
|
|
pvr_process_graphics_cmd_part(struct pvr_device *const device,
|
|
struct pvr_render_ctx *const gfx_ctx,
|
|
struct pvr_render_job *const job,
|
|
struct vk_sync *const geom_barrier,
|
|
struct vk_sync *const frag_barrier,
|
|
struct vk_sync **const geom_completion,
|
|
struct vk_sync **const frag_completion,
|
|
struct vk_sync **const waits,
|
|
const uint32_t wait_count,
|
|
uint32_t *const stage_flags)
|
|
{
|
|
struct vk_sync *geom_sync = NULL;
|
|
struct vk_sync *frag_sync = NULL;
|
|
VkResult result;
|
|
|
|
/* For each of geom and frag, a completion sync is optional but only allowed
|
|
* iff barrier is present.
|
|
*/
|
|
assert(geom_barrier || !geom_completion);
|
|
assert(frag_barrier || !frag_completion);
|
|
|
|
if (geom_barrier) {
|
|
result = vk_sync_create(&device->vk,
|
|
&device->pdevice->ws->syncobj_type,
|
|
0U,
|
|
0UL,
|
|
&geom_sync);
|
|
if (result != VK_SUCCESS)
|
|
goto err_out;
|
|
}
|
|
|
|
if (frag_barrier) {
|
|
result = vk_sync_create(&device->vk,
|
|
&device->pdevice->ws->syncobj_type,
|
|
0U,
|
|
0UL,
|
|
&frag_sync);
|
|
if (result != VK_SUCCESS)
|
|
goto err_destroy_sync_geom;
|
|
}
|
|
|
|
result = pvr_render_job_submit(gfx_ctx,
|
|
job,
|
|
geom_barrier,
|
|
frag_barrier,
|
|
waits,
|
|
wait_count,
|
|
stage_flags,
|
|
geom_sync,
|
|
frag_sync);
|
|
if (result != VK_SUCCESS)
|
|
goto err_destroy_sync_frag;
|
|
|
|
/* Replace the completion fences. */
|
|
if (geom_sync) {
|
|
if (*geom_completion)
|
|
vk_sync_destroy(&device->vk, *geom_completion);
|
|
|
|
*geom_completion = geom_sync;
|
|
}
|
|
|
|
if (frag_sync) {
|
|
if (*frag_completion)
|
|
vk_sync_destroy(&device->vk, *frag_completion);
|
|
|
|
*frag_completion = frag_sync;
|
|
}
|
|
|
|
return VK_SUCCESS;
|
|
|
|
err_destroy_sync_frag:
|
|
if (frag_sync)
|
|
vk_sync_destroy(&device->vk, frag_sync);
|
|
|
|
err_destroy_sync_geom:
|
|
if (geom_sync)
|
|
vk_sync_destroy(&device->vk, geom_sync);
|
|
|
|
err_out:
|
|
return result;
|
|
}
|
|
|
|
static VkResult
|
|
pvr_process_split_graphics_cmd(struct pvr_device *const device,
|
|
struct pvr_render_ctx *const gfx_ctx,
|
|
struct pvr_sub_cmd_gfx *sub_cmd,
|
|
struct vk_sync *const geom_barrier,
|
|
struct vk_sync *const frag_barrier,
|
|
struct vk_sync **const geom_completion,
|
|
struct vk_sync **const frag_completion,
|
|
struct vk_sync **const waits,
|
|
const uint32_t wait_count,
|
|
uint32_t *const stage_flags)
|
|
{
|
|
struct pvr_render_job *const job = &sub_cmd->job;
|
|
const pvr_dev_addr_t original_ctrl_stream_addr = job->ctrl_stream_addr;
|
|
const bool original_geometry_terminate = job->geometry_terminate;
|
|
const bool original_run_frag = job->run_frag;
|
|
VkResult result;
|
|
|
|
/* First submit must not touch fragment work. */
|
|
job->geometry_terminate = false;
|
|
job->run_frag = false;
|
|
|
|
result = pvr_process_graphics_cmd_part(device,
|
|
gfx_ctx,
|
|
job,
|
|
geom_barrier,
|
|
NULL,
|
|
geom_completion,
|
|
NULL,
|
|
waits,
|
|
wait_count,
|
|
stage_flags);
|
|
|
|
job->geometry_terminate = original_geometry_terminate;
|
|
job->run_frag = original_run_frag;
|
|
|
|
if (result != VK_SUCCESS)
|
|
return result;
|
|
|
|
/* Second submit contains only a trivial control stream to terminate the
|
|
* geometry work.
|
|
*/
|
|
assert(sub_cmd->terminate_ctrl_stream);
|
|
job->ctrl_stream_addr = sub_cmd->terminate_ctrl_stream->vma->dev_addr;
|
|
|
|
result = pvr_process_graphics_cmd_part(device,
|
|
gfx_ctx,
|
|
job,
|
|
NULL,
|
|
frag_barrier,
|
|
NULL,
|
|
frag_completion,
|
|
waits,
|
|
wait_count,
|
|
stage_flags);
|
|
|
|
job->ctrl_stream_addr = original_ctrl_stream_addr;
|
|
|
|
return result;
|
|
}
|
|
|
|
static VkResult
|
|
pvr_process_graphics_cmd(struct pvr_device *device,
|
|
struct pvr_queue *queue,
|
|
struct pvr_cmd_buffer *cmd_buffer,
|
|
struct pvr_sub_cmd_gfx *sub_cmd,
|
|
struct vk_sync *barrier_geom,
|
|
struct vk_sync *barrier_frag,
|
|
struct vk_sync **waits,
|
|
uint32_t wait_count,
|
|
uint32_t *stage_flags,
|
|
struct vk_sync *completions[static PVR_JOB_TYPE_MAX])
|
|
{
|
|
/* FIXME: DoShadowLoadOrStore() */
|
|
|
|
/* Perform two render submits when using multiple framebuffer layers. The
|
|
* first submit contains just geometry, while the second only terminates
|
|
* (and triggers the fragment render if originally specified). This is needed
|
|
* because the render target cache gets cleared on terminating submits, which
|
|
* could result in missing primitives.
|
|
*/
|
|
if (pvr_sub_cmd_gfx_requires_split_submit(sub_cmd)) {
|
|
return pvr_process_split_graphics_cmd(device,
|
|
queue->gfx_ctx,
|
|
sub_cmd,
|
|
barrier_geom,
|
|
barrier_frag,
|
|
&completions[PVR_JOB_TYPE_GEOM],
|
|
&completions[PVR_JOB_TYPE_FRAG],
|
|
waits,
|
|
wait_count,
|
|
stage_flags);
|
|
}
|
|
|
|
return pvr_process_graphics_cmd_part(device,
|
|
queue->gfx_ctx,
|
|
&sub_cmd->job,
|
|
barrier_geom,
|
|
barrier_frag,
|
|
&completions[PVR_JOB_TYPE_GEOM],
|
|
&completions[PVR_JOB_TYPE_FRAG],
|
|
waits,
|
|
wait_count,
|
|
stage_flags);
|
|
|
|
/* FIXME: DoShadowLoadOrStore() */
|
|
}
|
|
|
|
static VkResult
|
|
pvr_process_compute_cmd(struct pvr_device *device,
|
|
struct pvr_queue *queue,
|
|
struct pvr_sub_cmd_compute *sub_cmd,
|
|
struct vk_sync *barrier,
|
|
struct vk_sync **waits,
|
|
uint32_t wait_count,
|
|
uint32_t *stage_flags,
|
|
struct vk_sync *completions[static PVR_JOB_TYPE_MAX])
|
|
{
|
|
struct vk_sync *sync;
|
|
VkResult result;
|
|
|
|
result = vk_sync_create(&device->vk,
|
|
&device->pdevice->ws->syncobj_type,
|
|
0U,
|
|
0UL,
|
|
&sync);
|
|
if (result != VK_SUCCESS)
|
|
return result;
|
|
|
|
result = pvr_compute_job_submit(queue->compute_ctx,
|
|
sub_cmd,
|
|
barrier,
|
|
waits,
|
|
wait_count,
|
|
stage_flags,
|
|
sync);
|
|
if (result != VK_SUCCESS) {
|
|
vk_sync_destroy(&device->vk, sync);
|
|
return result;
|
|
}
|
|
|
|
/* Replace the completion fences. */
|
|
if (completions[PVR_JOB_TYPE_COMPUTE])
|
|
vk_sync_destroy(&device->vk, completions[PVR_JOB_TYPE_COMPUTE]);
|
|
|
|
completions[PVR_JOB_TYPE_COMPUTE] = sync;
|
|
|
|
return result;
|
|
}
|
|
|
|
static VkResult
|
|
pvr_process_transfer_cmds(struct pvr_device *device,
|
|
struct pvr_queue *queue,
|
|
struct pvr_sub_cmd_transfer *sub_cmd,
|
|
struct vk_sync *barrier,
|
|
struct vk_sync **waits,
|
|
uint32_t wait_count,
|
|
uint32_t *stage_flags,
|
|
struct vk_sync *completions[static PVR_JOB_TYPE_MAX])
|
|
{
|
|
struct vk_sync *sync;
|
|
VkResult result;
|
|
|
|
result = vk_sync_create(&device->vk,
|
|
&device->pdevice->ws->syncobj_type,
|
|
0U,
|
|
0UL,
|
|
&sync);
|
|
if (result != VK_SUCCESS)
|
|
return result;
|
|
|
|
result = pvr_transfer_job_submit(device,
|
|
queue->transfer_ctx,
|
|
sub_cmd,
|
|
barrier,
|
|
waits,
|
|
wait_count,
|
|
stage_flags,
|
|
sync);
|
|
if (result != VK_SUCCESS) {
|
|
vk_sync_destroy(&device->vk, sync);
|
|
return result;
|
|
}
|
|
|
|
/* Replace the completion fences. */
|
|
if (completions[PVR_JOB_TYPE_TRANSFER])
|
|
vk_sync_destroy(&device->vk, completions[PVR_JOB_TYPE_TRANSFER]);
|
|
|
|
completions[PVR_JOB_TYPE_TRANSFER] = sync;
|
|
|
|
return result;
|
|
}
|
|
|
|
static VkResult pvr_process_occlusion_query_cmd(
|
|
struct pvr_device *device,
|
|
struct pvr_queue *queue,
|
|
struct pvr_sub_cmd_compute *sub_cmd,
|
|
struct vk_sync *barrier,
|
|
struct vk_sync **waits,
|
|
uint32_t wait_count,
|
|
uint32_t *stage_flags,
|
|
struct vk_sync *completions[static PVR_JOB_TYPE_MAX])
|
|
{
|
|
struct vk_sync *sync;
|
|
VkResult result;
|
|
|
|
/* TODO: Currently we add barrier event sub commands to handle the sync
|
|
* necessary for the different occlusion query types. Would we get any speed
|
|
* up in processing the queue by doing that sync here without using event sub
|
|
* commands?
|
|
*/
|
|
|
|
result = vk_sync_create(&device->vk,
|
|
&device->pdevice->ws->syncobj_type,
|
|
0U,
|
|
0UL,
|
|
&sync);
|
|
if (result != VK_SUCCESS)
|
|
return result;
|
|
|
|
result = pvr_compute_job_submit(queue->query_ctx,
|
|
sub_cmd,
|
|
barrier,
|
|
waits,
|
|
wait_count,
|
|
stage_flags,
|
|
sync);
|
|
if (result != VK_SUCCESS) {
|
|
vk_sync_destroy(&device->vk, sync);
|
|
return result;
|
|
}
|
|
|
|
if (completions[PVR_JOB_TYPE_OCCLUSION_QUERY])
|
|
vk_sync_destroy(&device->vk, completions[PVR_JOB_TYPE_OCCLUSION_QUERY]);
|
|
|
|
completions[PVR_JOB_TYPE_OCCLUSION_QUERY] = sync;
|
|
|
|
return result;
|
|
}
|
|
|
|
static VkResult pvr_process_event_cmd_barrier(
|
|
struct pvr_device *device,
|
|
struct pvr_sub_cmd_event *sub_cmd,
|
|
struct vk_sync *barriers[static PVR_JOB_TYPE_MAX],
|
|
struct vk_sync *per_cmd_buffer_syncobjs[static PVR_JOB_TYPE_MAX],
|
|
struct vk_sync *per_submit_syncobjs[static PVR_JOB_TYPE_MAX],
|
|
struct vk_sync *queue_syncobjs[static PVR_JOB_TYPE_MAX],
|
|
struct vk_sync *previous_queue_syncobjs[static PVR_JOB_TYPE_MAX])
|
|
{
|
|
const uint32_t src_mask = sub_cmd->barrier.wait_for_stage_mask;
|
|
const uint32_t dst_mask = sub_cmd->barrier.wait_at_stage_mask;
|
|
const bool in_render_pass = sub_cmd->barrier.in_render_pass;
|
|
struct vk_sync *new_barriers[PVR_JOB_TYPE_MAX] = { 0 };
|
|
struct vk_sync *completions[PVR_JOB_TYPE_MAX] = { 0 };
|
|
struct vk_sync *src_syncobjs[PVR_JOB_TYPE_MAX];
|
|
uint32_t src_syncobj_count = 0;
|
|
VkResult result;
|
|
|
|
assert(sub_cmd->type == PVR_EVENT_TYPE_BARRIER);
|
|
|
|
assert(!(src_mask & ~PVR_PIPELINE_STAGE_ALL_BITS));
|
|
assert(!(dst_mask & ~PVR_PIPELINE_STAGE_ALL_BITS));
|
|
|
|
/* TODO: We're likely over synchronizing here, but the kernel doesn't
|
|
* guarantee that jobs submitted on a context will execute and complete in
|
|
* order, even though in practice they will, so we play it safe and don't
|
|
* make any assumptions. If the kernel starts to offer this guarantee then
|
|
* remove the extra dependencies being added here.
|
|
*/
|
|
|
|
u_foreach_bit (stage, src_mask) {
|
|
struct vk_sync *syncobj;
|
|
|
|
syncobj = per_cmd_buffer_syncobjs[stage];
|
|
|
|
if (!in_render_pass & !syncobj) {
|
|
if (per_submit_syncobjs[stage])
|
|
syncobj = per_submit_syncobjs[stage];
|
|
else if (queue_syncobjs[stage])
|
|
syncobj = queue_syncobjs[stage];
|
|
else if (previous_queue_syncobjs[stage])
|
|
syncobj = previous_queue_syncobjs[stage];
|
|
}
|
|
|
|
if (!syncobj)
|
|
continue;
|
|
|
|
src_syncobjs[src_syncobj_count++] = syncobj;
|
|
}
|
|
|
|
/* No previous src jobs that need finishing so no need for a barrier. */
|
|
if (src_syncobj_count == 0)
|
|
return VK_SUCCESS;
|
|
|
|
u_foreach_bit (stage, dst_mask) {
|
|
struct vk_sync *completion;
|
|
|
|
result = vk_sync_create(&device->vk,
|
|
&device->pdevice->ws->syncobj_type,
|
|
0U,
|
|
0UL,
|
|
&completion);
|
|
if (result != VK_SUCCESS)
|
|
goto err_destroy_completions;
|
|
|
|
result = device->ws->ops->null_job_submit(device->ws,
|
|
src_syncobjs,
|
|
src_syncobj_count,
|
|
completion);
|
|
if (result != VK_SUCCESS) {
|
|
vk_sync_destroy(&device->vk, completion);
|
|
|
|
goto err_destroy_completions;
|
|
}
|
|
|
|
completions[stage] = completion;
|
|
}
|
|
|
|
u_foreach_bit (stage, dst_mask) {
|
|
struct vk_sync *barrier_src_syncobjs[2];
|
|
uint32_t barrier_src_syncobj_count = 0;
|
|
struct vk_sync *barrier;
|
|
VkResult result;
|
|
|
|
assert(completions[stage]);
|
|
barrier_src_syncobjs[barrier_src_syncobj_count++] = completions[stage];
|
|
|
|
/* If there is a previous barrier we want to merge it with the new one.
|
|
*
|
|
* E.g.
|
|
* A <compute>, B <compute>,
|
|
* X <barrier src=compute, dst=graphics>,
|
|
* C <transfer>
|
|
* Y <barrier src=transfer, dst=graphics>,
|
|
* D <graphics>
|
|
*
|
|
* X barriers A and B at D. Y barriers C at D. So we want to merge both
|
|
* X and Y graphics vk_sync barriers to pass to D.
|
|
*
|
|
* Note that this is the same as:
|
|
* A <compute>, B <compute>, C <transfer>
|
|
* X <barrier src=compute, dst=graphics>,
|
|
* Y <barrier src=transfer, dst=graphics>,
|
|
* D <graphics>
|
|
*
|
|
*/
|
|
if (barriers[stage])
|
|
barrier_src_syncobjs[barrier_src_syncobj_count++] = barriers[stage];
|
|
|
|
result = vk_sync_create(&device->vk,
|
|
&device->pdevice->ws->syncobj_type,
|
|
0U,
|
|
0UL,
|
|
&barrier);
|
|
if (result != VK_SUCCESS)
|
|
goto err_destroy_new_barriers;
|
|
|
|
result = device->ws->ops->null_job_submit(device->ws,
|
|
barrier_src_syncobjs,
|
|
barrier_src_syncobj_count,
|
|
barrier);
|
|
if (result != VK_SUCCESS) {
|
|
vk_sync_destroy(&device->vk, barrier);
|
|
|
|
goto err_destroy_new_barriers;
|
|
}
|
|
|
|
new_barriers[stage] = barrier;
|
|
}
|
|
|
|
u_foreach_bit (stage, dst_mask) {
|
|
if (per_cmd_buffer_syncobjs[stage])
|
|
vk_sync_destroy(&device->vk, per_cmd_buffer_syncobjs[stage]);
|
|
|
|
per_cmd_buffer_syncobjs[stage] = completions[stage];
|
|
|
|
if (barriers[stage])
|
|
vk_sync_destroy(&device->vk, barriers[stage]);
|
|
|
|
barriers[stage] = new_barriers[stage];
|
|
}
|
|
|
|
return VK_SUCCESS;
|
|
|
|
err_destroy_new_barriers:
|
|
u_foreach_bit (stage, dst_mask) {
|
|
if (new_barriers[stage])
|
|
vk_sync_destroy(&device->vk, new_barriers[stage]);
|
|
}
|
|
|
|
err_destroy_completions:
|
|
u_foreach_bit (stage, dst_mask) {
|
|
if (completions[stage])
|
|
vk_sync_destroy(&device->vk, completions[stage]);
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
static VkResult pvr_process_event_cmd(
|
|
struct pvr_device *device,
|
|
struct pvr_sub_cmd_event *sub_cmd,
|
|
struct vk_sync *barriers[static PVR_JOB_TYPE_MAX],
|
|
struct vk_sync *per_cmd_buffer_syncobjs[static PVR_JOB_TYPE_MAX],
|
|
struct vk_sync *per_submit_syncobjs[static PVR_JOB_TYPE_MAX],
|
|
struct vk_sync *queue_syncobjs[static PVR_JOB_TYPE_MAX],
|
|
struct vk_sync *previous_queue_syncobjs[static PVR_JOB_TYPE_MAX])
|
|
{
|
|
switch (sub_cmd->type) {
|
|
case PVR_EVENT_TYPE_SET:
|
|
case PVR_EVENT_TYPE_RESET:
|
|
case PVR_EVENT_TYPE_WAIT:
|
|
pvr_finishme("Add support for event sub command type: %d", sub_cmd->type);
|
|
return VK_SUCCESS;
|
|
|
|
case PVR_EVENT_TYPE_BARRIER:
|
|
return pvr_process_event_cmd_barrier(device,
|
|
sub_cmd,
|
|
barriers,
|
|
per_cmd_buffer_syncobjs,
|
|
per_submit_syncobjs,
|
|
queue_syncobjs,
|
|
previous_queue_syncobjs);
|
|
|
|
default:
|
|
unreachable("Invalid event sub-command type.");
|
|
};
|
|
}
|
|
|
|
static VkResult
|
|
pvr_set_semaphore_payloads(struct pvr_device *device,
|
|
struct vk_sync *completions[static PVR_JOB_TYPE_MAX],
|
|
const VkSemaphore *signals,
|
|
uint32_t signal_count)
|
|
{
|
|
struct vk_sync *sync;
|
|
VkResult result;
|
|
int fd = -1;
|
|
|
|
result = vk_sync_create(&device->vk,
|
|
&device->pdevice->ws->syncobj_type,
|
|
0U,
|
|
0UL,
|
|
&sync);
|
|
if (result != VK_SUCCESS)
|
|
return result;
|
|
|
|
result = device->ws->ops->null_job_submit(device->ws,
|
|
completions,
|
|
PVR_JOB_TYPE_MAX,
|
|
sync);
|
|
if (result != VK_SUCCESS)
|
|
goto end_set_semaphore_payloads;
|
|
|
|
/* If we have a single signal semaphore, we can simply move merged sync's
|
|
* payload to the signal semahpore's payload.
|
|
*/
|
|
if (signal_count == 1U) {
|
|
VK_FROM_HANDLE(vk_semaphore, sem, signals[0]);
|
|
struct vk_sync *sem_sync = vk_semaphore_get_active_sync(sem);
|
|
|
|
result = vk_sync_move(&device->vk, sem_sync, sync);
|
|
goto end_set_semaphore_payloads;
|
|
}
|
|
|
|
result = vk_sync_export_sync_file(&device->vk, sync, &fd);
|
|
if (result != VK_SUCCESS)
|
|
goto end_set_semaphore_payloads;
|
|
|
|
for (uint32_t i = 0U; i < signal_count; i++) {
|
|
VK_FROM_HANDLE(vk_semaphore, sem, signals[i]);
|
|
struct vk_sync *sem_sync = vk_semaphore_get_active_sync(sem);
|
|
|
|
result = vk_sync_import_sync_file(&device->vk, sem_sync, fd);
|
|
if (result != VK_SUCCESS)
|
|
goto end_set_semaphore_payloads;
|
|
}
|
|
|
|
end_set_semaphore_payloads:
|
|
if (fd != -1)
|
|
close(fd);
|
|
|
|
vk_sync_destroy(&device->vk, sync);
|
|
|
|
return result;
|
|
}
|
|
|
|
static VkResult
|
|
pvr_set_fence_payload(struct pvr_device *device,
|
|
struct vk_sync *completions[static PVR_JOB_TYPE_MAX],
|
|
VkFence _fence)
|
|
{
|
|
VK_FROM_HANDLE(vk_fence, fence, _fence);
|
|
struct vk_sync *fence_sync;
|
|
struct vk_sync *sync;
|
|
VkResult result;
|
|
|
|
result = vk_sync_create(&device->vk,
|
|
&device->pdevice->ws->syncobj_type,
|
|
0U,
|
|
0UL,
|
|
&sync);
|
|
if (result != VK_SUCCESS)
|
|
return result;
|
|
|
|
result = device->ws->ops->null_job_submit(device->ws,
|
|
completions,
|
|
PVR_JOB_TYPE_MAX,
|
|
sync);
|
|
if (result != VK_SUCCESS) {
|
|
vk_sync_destroy(&device->vk, sync);
|
|
return result;
|
|
}
|
|
|
|
fence_sync = vk_fence_get_active_sync(fence);
|
|
result = vk_sync_move(&device->vk, fence_sync, sync);
|
|
vk_sync_destroy(&device->vk, sync);
|
|
|
|
return result;
|
|
}
|
|
|
|
static void pvr_update_syncobjs(struct pvr_device *device,
|
|
struct vk_sync *src[static PVR_JOB_TYPE_MAX],
|
|
struct vk_sync *dst[static PVR_JOB_TYPE_MAX])
|
|
{
|
|
for (uint32_t i = 0; i < PVR_JOB_TYPE_MAX; i++) {
|
|
if (src[i]) {
|
|
if (dst[i])
|
|
vk_sync_destroy(&device->vk, dst[i]);
|
|
|
|
dst[i] = src[i];
|
|
}
|
|
}
|
|
}
|
|
|
|
static VkResult pvr_process_cmd_buffer(
|
|
struct pvr_device *device,
|
|
struct pvr_queue *queue,
|
|
VkCommandBuffer commandBuffer,
|
|
struct vk_sync *barriers[static PVR_JOB_TYPE_MAX],
|
|
struct vk_sync **waits,
|
|
uint32_t wait_count,
|
|
uint32_t *stage_flags,
|
|
struct vk_sync *per_submit_syncobjs[static PVR_JOB_TYPE_MAX],
|
|
struct vk_sync *queue_syncobjs[static PVR_JOB_TYPE_MAX],
|
|
struct vk_sync *previous_queue_syncobjs[static PVR_JOB_TYPE_MAX])
|
|
{
|
|
struct vk_sync *per_cmd_buffer_syncobjs[PVR_JOB_TYPE_MAX] = {};
|
|
PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
|
|
VkResult result;
|
|
|
|
assert(cmd_buffer->vk.state == MESA_VK_COMMAND_BUFFER_STATE_EXECUTABLE);
|
|
|
|
list_for_each_entry_safe (struct pvr_sub_cmd,
|
|
sub_cmd,
|
|
&cmd_buffer->sub_cmds,
|
|
link) {
|
|
switch (sub_cmd->type) {
|
|
case PVR_SUB_CMD_TYPE_GRAPHICS: {
|
|
if (sub_cmd->gfx.has_occlusion_query) {
|
|
struct pvr_sub_cmd_event frag_to_transfer_barrier = {
|
|
.type = PVR_EVENT_TYPE_BARRIER,
|
|
.barrier = {
|
|
.wait_for_stage_mask = PVR_PIPELINE_STAGE_OCCLUSION_QUERY_BIT,
|
|
.wait_at_stage_mask = PVR_PIPELINE_STAGE_FRAG_BIT,
|
|
},
|
|
};
|
|
|
|
/* If the fragment job utilizes occlusion queries, for data
|
|
* integrity it needs to wait for the occlusion query to be
|
|
* processed.
|
|
*/
|
|
|
|
result = pvr_process_event_cmd_barrier(device,
|
|
&frag_to_transfer_barrier,
|
|
barriers,
|
|
per_cmd_buffer_syncobjs,
|
|
per_submit_syncobjs,
|
|
queue_syncobjs,
|
|
previous_queue_syncobjs);
|
|
if (result != VK_SUCCESS)
|
|
break;
|
|
}
|
|
|
|
result = pvr_process_graphics_cmd(device,
|
|
queue,
|
|
cmd_buffer,
|
|
&sub_cmd->gfx,
|
|
barriers[PVR_JOB_TYPE_GEOM],
|
|
barriers[PVR_JOB_TYPE_FRAG],
|
|
waits,
|
|
wait_count,
|
|
stage_flags,
|
|
per_cmd_buffer_syncobjs);
|
|
break;
|
|
}
|
|
|
|
case PVR_SUB_CMD_TYPE_COMPUTE:
|
|
result = pvr_process_compute_cmd(device,
|
|
queue,
|
|
&sub_cmd->compute,
|
|
barriers[PVR_JOB_TYPE_COMPUTE],
|
|
waits,
|
|
wait_count,
|
|
stage_flags,
|
|
per_cmd_buffer_syncobjs);
|
|
break;
|
|
|
|
case PVR_SUB_CMD_TYPE_TRANSFER: {
|
|
const bool serialize_with_frag = sub_cmd->transfer.serialize_with_frag;
|
|
|
|
if (serialize_with_frag) {
|
|
struct pvr_sub_cmd_event frag_to_transfer_barrier = {
|
|
.type = PVR_EVENT_TYPE_BARRIER,
|
|
.barrier = {
|
|
.wait_for_stage_mask = PVR_PIPELINE_STAGE_FRAG_BIT,
|
|
.wait_at_stage_mask = PVR_PIPELINE_STAGE_TRANSFER_BIT,
|
|
},
|
|
};
|
|
|
|
result = pvr_process_event_cmd_barrier(device,
|
|
&frag_to_transfer_barrier,
|
|
barriers,
|
|
per_cmd_buffer_syncobjs,
|
|
per_submit_syncobjs,
|
|
queue_syncobjs,
|
|
previous_queue_syncobjs);
|
|
if (result != VK_SUCCESS)
|
|
break;
|
|
}
|
|
|
|
result = pvr_process_transfer_cmds(device,
|
|
queue,
|
|
&sub_cmd->transfer,
|
|
barriers[PVR_JOB_TYPE_TRANSFER],
|
|
waits,
|
|
wait_count,
|
|
stage_flags,
|
|
per_cmd_buffer_syncobjs);
|
|
|
|
if (serialize_with_frag) {
|
|
struct pvr_sub_cmd_event transfer_to_frag_barrier = {
|
|
.type = PVR_EVENT_TYPE_BARRIER,
|
|
.barrier = {
|
|
.wait_for_stage_mask = PVR_PIPELINE_STAGE_TRANSFER_BIT,
|
|
.wait_at_stage_mask = PVR_PIPELINE_STAGE_FRAG_BIT,
|
|
},
|
|
};
|
|
|
|
if (result != VK_SUCCESS)
|
|
break;
|
|
|
|
result = pvr_process_event_cmd_barrier(device,
|
|
&transfer_to_frag_barrier,
|
|
barriers,
|
|
per_cmd_buffer_syncobjs,
|
|
per_submit_syncobjs,
|
|
queue_syncobjs,
|
|
previous_queue_syncobjs);
|
|
}
|
|
|
|
break;
|
|
}
|
|
|
|
case PVR_SUB_CMD_TYPE_OCCLUSION_QUERY:
|
|
result = pvr_process_occlusion_query_cmd(
|
|
device,
|
|
queue,
|
|
&sub_cmd->compute,
|
|
barriers[PVR_JOB_TYPE_OCCLUSION_QUERY],
|
|
waits,
|
|
wait_count,
|
|
stage_flags,
|
|
per_cmd_buffer_syncobjs);
|
|
break;
|
|
|
|
case PVR_SUB_CMD_TYPE_EVENT:
|
|
result = pvr_process_event_cmd(device,
|
|
&sub_cmd->event,
|
|
barriers,
|
|
per_cmd_buffer_syncobjs,
|
|
per_submit_syncobjs,
|
|
queue_syncobjs,
|
|
previous_queue_syncobjs);
|
|
break;
|
|
|
|
default:
|
|
mesa_loge("Unsupported sub-command type %d", sub_cmd->type);
|
|
result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
|
}
|
|
|
|
if (result != VK_SUCCESS)
|
|
return result;
|
|
|
|
p_atomic_inc(&device->global_cmd_buffer_submit_count);
|
|
}
|
|
|
|
pvr_update_syncobjs(device, per_cmd_buffer_syncobjs, per_submit_syncobjs);
|
|
|
|
return VK_SUCCESS;
|
|
}
|
|
|
|
static VkResult
|
|
pvr_submit_null_job(struct pvr_device *device,
|
|
struct vk_sync **waits,
|
|
uint32_t wait_count,
|
|
uint32_t *stage_flags,
|
|
struct vk_sync *completions[static PVR_JOB_TYPE_MAX])
|
|
{
|
|
VkResult result;
|
|
|
|
STATIC_ASSERT(PVR_JOB_TYPE_MAX >= PVR_NUM_SYNC_PIPELINE_STAGES);
|
|
for (uint32_t i = 0U; i < PVR_JOB_TYPE_MAX; i++) {
|
|
struct vk_sync *per_job_waits[wait_count];
|
|
uint32_t per_job_waits_count = 0;
|
|
|
|
/* Get the waits specific to the job type. */
|
|
for (uint32_t j = 0U; j < wait_count; j++) {
|
|
if (stage_flags[j] & (1U << i)) {
|
|
per_job_waits[per_job_waits_count] = waits[j];
|
|
per_job_waits_count++;
|
|
}
|
|
}
|
|
|
|
if (per_job_waits_count == 0U)
|
|
continue;
|
|
|
|
result = vk_sync_create(&device->vk,
|
|
&device->pdevice->ws->syncobj_type,
|
|
0U,
|
|
0UL,
|
|
&completions[i]);
|
|
if (result != VK_SUCCESS)
|
|
goto err_destroy_completion_syncs;
|
|
|
|
result = device->ws->ops->null_job_submit(device->ws,
|
|
per_job_waits,
|
|
per_job_waits_count,
|
|
completions[i]);
|
|
if (result != VK_SUCCESS)
|
|
goto err_destroy_completion_syncs;
|
|
}
|
|
|
|
return VK_SUCCESS;
|
|
|
|
err_destroy_completion_syncs:
|
|
for (uint32_t i = 0U; i < PVR_JOB_TYPE_MAX; i++) {
|
|
if (completions[i]) {
|
|
vk_sync_destroy(&device->vk, completions[i]);
|
|
completions[i] = NULL;
|
|
}
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
VkResult pvr_QueueSubmit(VkQueue _queue,
|
|
uint32_t submitCount,
|
|
const VkSubmitInfo *pSubmits,
|
|
VkFence fence)
|
|
{
|
|
PVR_FROM_HANDLE(pvr_queue, queue, _queue);
|
|
struct vk_sync *completion_syncobjs[PVR_JOB_TYPE_MAX] = {};
|
|
struct pvr_device *device = queue->device;
|
|
VkResult result;
|
|
|
|
for (uint32_t i = 0U; i < submitCount; i++) {
|
|
struct vk_sync *per_submit_completion_syncobjs[PVR_JOB_TYPE_MAX] = {};
|
|
const VkSubmitInfo *desc = &pSubmits[i];
|
|
struct vk_sync *waits[desc->waitSemaphoreCount];
|
|
uint32_t stage_flags[desc->waitSemaphoreCount];
|
|
uint32_t wait_count = 0;
|
|
|
|
for (uint32_t j = 0U; j < desc->waitSemaphoreCount; j++) {
|
|
VK_FROM_HANDLE(vk_semaphore, semaphore, desc->pWaitSemaphores[j]);
|
|
struct vk_sync *sync = vk_semaphore_get_active_sync(semaphore);
|
|
|
|
if (sync->type == &vk_sync_dummy_type)
|
|
continue;
|
|
|
|
/* We don't currently support timeline semaphores. */
|
|
assert(!(sync->flags & VK_SYNC_IS_TIMELINE));
|
|
|
|
stage_flags[wait_count] =
|
|
pvr_stage_mask_dst(desc->pWaitDstStageMask[j]);
|
|
waits[wait_count] = vk_semaphore_get_active_sync(semaphore);
|
|
wait_count++;
|
|
}
|
|
|
|
if (desc->commandBufferCount > 0U) {
|
|
for (uint32_t j = 0U; j < desc->commandBufferCount; j++) {
|
|
result = pvr_process_cmd_buffer(device,
|
|
queue,
|
|
desc->pCommandBuffers[j],
|
|
queue->job_dependancy,
|
|
waits,
|
|
wait_count,
|
|
stage_flags,
|
|
per_submit_completion_syncobjs,
|
|
completion_syncobjs,
|
|
queue->completion);
|
|
if (result != VK_SUCCESS)
|
|
return result;
|
|
}
|
|
} else {
|
|
result = pvr_submit_null_job(device,
|
|
waits,
|
|
wait_count,
|
|
stage_flags,
|
|
per_submit_completion_syncobjs);
|
|
if (result != VK_SUCCESS)
|
|
return result;
|
|
}
|
|
|
|
if (desc->signalSemaphoreCount) {
|
|
result = pvr_set_semaphore_payloads(device,
|
|
per_submit_completion_syncobjs,
|
|
desc->pSignalSemaphores,
|
|
desc->signalSemaphoreCount);
|
|
if (result != VK_SUCCESS)
|
|
return result;
|
|
}
|
|
|
|
pvr_update_syncobjs(device,
|
|
per_submit_completion_syncobjs,
|
|
completion_syncobjs);
|
|
}
|
|
|
|
if (fence) {
|
|
result = pvr_set_fence_payload(device, completion_syncobjs, fence);
|
|
if (result != VK_SUCCESS)
|
|
return result;
|
|
}
|
|
|
|
pvr_update_syncobjs(device, completion_syncobjs, queue->completion);
|
|
|
|
return VK_SUCCESS;
|
|
}
|