Files
third_party_mesa3d/src/imagination/vulkan/pvr_queue.c

1129 lines
39 KiB
C

/*
* Copyright © 2022 Imagination Technologies Ltd.
*
* based in part on radv driver which is:
* Copyright © 2016 Red Hat.
* Copyright © 2016 Bas Nieuwenhuizen
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
/**
* This file implements VkQueue, VkFence, and VkSemaphore
*/
#include <assert.h>
#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
#include <unistd.h>
#include <vulkan/vulkan.h>
#include "pvr_job_compute.h"
#include "pvr_job_context.h"
#include "pvr_job_render.h"
#include "pvr_job_transfer.h"
#include "pvr_limits.h"
#include "pvr_private.h"
#include "util/macros.h"
#include "util/u_atomic.h"
#include "vk_alloc.h"
#include "vk_fence.h"
#include "vk_log.h"
#include "vk_object.h"
#include "vk_queue.h"
#include "vk_semaphore.h"
#include "vk_sync.h"
#include "vk_sync_dummy.h"
#include "vk_util.h"
static VkResult pvr_queue_init(struct pvr_device *device,
struct pvr_queue *queue,
const VkDeviceQueueCreateInfo *pCreateInfo,
uint32_t index_in_family)
{
struct pvr_transfer_ctx *transfer_ctx;
struct pvr_compute_ctx *compute_ctx;
struct pvr_compute_ctx *query_ctx;
struct pvr_render_ctx *gfx_ctx;
VkResult result;
*queue = (struct pvr_queue){ 0 };
result =
vk_queue_init(&queue->vk, &device->vk, pCreateInfo, index_in_family);
if (result != VK_SUCCESS)
return result;
result = pvr_transfer_ctx_create(device,
PVR_WINSYS_CTX_PRIORITY_MEDIUM,
&transfer_ctx);
if (result != VK_SUCCESS)
goto err_vk_queue_finish;
result = pvr_compute_ctx_create(device,
PVR_WINSYS_CTX_PRIORITY_MEDIUM,
&compute_ctx);
if (result != VK_SUCCESS)
goto err_transfer_ctx_destroy;
result = pvr_compute_ctx_create(device,
PVR_WINSYS_CTX_PRIORITY_MEDIUM,
&query_ctx);
if (result != VK_SUCCESS)
goto err_compute_ctx_destroy;
result =
pvr_render_ctx_create(device, PVR_WINSYS_CTX_PRIORITY_MEDIUM, &gfx_ctx);
if (result != VK_SUCCESS)
goto err_query_ctx_destroy;
queue->device = device;
queue->gfx_ctx = gfx_ctx;
queue->compute_ctx = compute_ctx;
queue->query_ctx = query_ctx;
queue->transfer_ctx = transfer_ctx;
return VK_SUCCESS;
err_query_ctx_destroy:
pvr_compute_ctx_destroy(query_ctx);
err_compute_ctx_destroy:
pvr_compute_ctx_destroy(compute_ctx);
err_transfer_ctx_destroy:
pvr_transfer_ctx_destroy(transfer_ctx);
err_vk_queue_finish:
vk_queue_finish(&queue->vk);
return result;
}
VkResult pvr_queues_create(struct pvr_device *device,
const VkDeviceCreateInfo *pCreateInfo)
{
VkResult result;
/* Check requested queue families and queues */
assert(pCreateInfo->queueCreateInfoCount == 1);
assert(pCreateInfo->pQueueCreateInfos[0].queueFamilyIndex == 0);
assert(pCreateInfo->pQueueCreateInfos[0].queueCount <= PVR_MAX_QUEUES);
const VkDeviceQueueCreateInfo *queue_create =
&pCreateInfo->pQueueCreateInfos[0];
device->queues = vk_alloc(&device->vk.alloc,
queue_create->queueCount * sizeof(*device->queues),
8,
VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
if (!device->queues)
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
device->queue_count = 0;
for (uint32_t i = 0; i < queue_create->queueCount; i++) {
result = pvr_queue_init(device, &device->queues[i], queue_create, i);
if (result != VK_SUCCESS)
goto err_queues_finish;
device->queue_count++;
}
return VK_SUCCESS;
err_queues_finish:
pvr_queues_destroy(device);
return result;
}
static void pvr_queue_finish(struct pvr_queue *queue)
{
for (uint32_t i = 0; i < ARRAY_SIZE(queue->job_dependancy); i++) {
if (queue->job_dependancy[i])
vk_sync_destroy(&queue->device->vk, queue->job_dependancy[i]);
}
for (uint32_t i = 0; i < ARRAY_SIZE(queue->completion); i++) {
if (queue->completion[i])
vk_sync_destroy(&queue->device->vk, queue->completion[i]);
}
pvr_render_ctx_destroy(queue->gfx_ctx);
pvr_compute_ctx_destroy(queue->query_ctx);
pvr_compute_ctx_destroy(queue->compute_ctx);
pvr_transfer_ctx_destroy(queue->transfer_ctx);
vk_queue_finish(&queue->vk);
}
void pvr_queues_destroy(struct pvr_device *device)
{
for (uint32_t q_idx = 0; q_idx < device->queue_count; q_idx++)
pvr_queue_finish(&device->queues[q_idx]);
vk_free(&device->vk.alloc, device->queues);
}
VkResult pvr_QueueWaitIdle(VkQueue _queue)
{
PVR_FROM_HANDLE(pvr_queue, queue, _queue);
for (int i = 0U; i < ARRAY_SIZE(queue->completion); i++) {
VkResult result;
if (!queue->completion[i])
continue;
result = vk_sync_wait(&queue->device->vk,
queue->completion[i],
0U,
VK_SYNC_WAIT_COMPLETE,
UINT64_MAX);
if (result != VK_SUCCESS)
return result;
}
return VK_SUCCESS;
}
static VkResult
pvr_process_graphics_cmd_part(struct pvr_device *const device,
struct pvr_render_ctx *const gfx_ctx,
struct pvr_render_job *const job,
struct vk_sync *const geom_barrier,
struct vk_sync *const frag_barrier,
struct vk_sync **const geom_completion,
struct vk_sync **const frag_completion,
struct vk_sync **const waits,
const uint32_t wait_count,
uint32_t *const stage_flags)
{
struct vk_sync *geom_sync = NULL;
struct vk_sync *frag_sync = NULL;
VkResult result;
/* For each of geom and frag, a completion sync is optional but only allowed
* iff barrier is present.
*/
assert(geom_barrier || !geom_completion);
assert(frag_barrier || !frag_completion);
if (geom_barrier) {
result = vk_sync_create(&device->vk,
&device->pdevice->ws->syncobj_type,
0U,
0UL,
&geom_sync);
if (result != VK_SUCCESS)
goto err_out;
}
if (frag_barrier) {
result = vk_sync_create(&device->vk,
&device->pdevice->ws->syncobj_type,
0U,
0UL,
&frag_sync);
if (result != VK_SUCCESS)
goto err_destroy_sync_geom;
}
result = pvr_render_job_submit(gfx_ctx,
job,
geom_barrier,
frag_barrier,
waits,
wait_count,
stage_flags,
geom_sync,
frag_sync);
if (result != VK_SUCCESS)
goto err_destroy_sync_frag;
/* Replace the completion fences. */
if (geom_sync) {
if (*geom_completion)
vk_sync_destroy(&device->vk, *geom_completion);
*geom_completion = geom_sync;
}
if (frag_sync) {
if (*frag_completion)
vk_sync_destroy(&device->vk, *frag_completion);
*frag_completion = frag_sync;
}
return VK_SUCCESS;
err_destroy_sync_frag:
if (frag_sync)
vk_sync_destroy(&device->vk, frag_sync);
err_destroy_sync_geom:
if (geom_sync)
vk_sync_destroy(&device->vk, geom_sync);
err_out:
return result;
}
static VkResult
pvr_process_split_graphics_cmd(struct pvr_device *const device,
struct pvr_render_ctx *const gfx_ctx,
struct pvr_sub_cmd_gfx *sub_cmd,
struct vk_sync *const geom_barrier,
struct vk_sync *const frag_barrier,
struct vk_sync **const geom_completion,
struct vk_sync **const frag_completion,
struct vk_sync **const waits,
const uint32_t wait_count,
uint32_t *const stage_flags)
{
struct pvr_render_job *const job = &sub_cmd->job;
const pvr_dev_addr_t original_ctrl_stream_addr = job->ctrl_stream_addr;
const bool original_geometry_terminate = job->geometry_terminate;
const bool original_run_frag = job->run_frag;
VkResult result;
/* First submit must not touch fragment work. */
job->geometry_terminate = false;
job->run_frag = false;
result = pvr_process_graphics_cmd_part(device,
gfx_ctx,
job,
geom_barrier,
NULL,
geom_completion,
NULL,
waits,
wait_count,
stage_flags);
job->geometry_terminate = original_geometry_terminate;
job->run_frag = original_run_frag;
if (result != VK_SUCCESS)
return result;
/* Second submit contains only a trivial control stream to terminate the
* geometry work.
*/
assert(sub_cmd->terminate_ctrl_stream);
job->ctrl_stream_addr = sub_cmd->terminate_ctrl_stream->vma->dev_addr;
result = pvr_process_graphics_cmd_part(device,
gfx_ctx,
job,
NULL,
frag_barrier,
NULL,
frag_completion,
waits,
wait_count,
stage_flags);
job->ctrl_stream_addr = original_ctrl_stream_addr;
return result;
}
static VkResult
pvr_process_graphics_cmd(struct pvr_device *device,
struct pvr_queue *queue,
struct pvr_cmd_buffer *cmd_buffer,
struct pvr_sub_cmd_gfx *sub_cmd,
struct vk_sync *barrier_geom,
struct vk_sync *barrier_frag,
struct vk_sync **waits,
uint32_t wait_count,
uint32_t *stage_flags,
struct vk_sync *completions[static PVR_JOB_TYPE_MAX])
{
/* FIXME: DoShadowLoadOrStore() */
/* Perform two render submits when using multiple framebuffer layers. The
* first submit contains just geometry, while the second only terminates
* (and triggers the fragment render if originally specified). This is needed
* because the render target cache gets cleared on terminating submits, which
* could result in missing primitives.
*/
if (pvr_sub_cmd_gfx_requires_split_submit(sub_cmd)) {
return pvr_process_split_graphics_cmd(device,
queue->gfx_ctx,
sub_cmd,
barrier_geom,
barrier_frag,
&completions[PVR_JOB_TYPE_GEOM],
&completions[PVR_JOB_TYPE_FRAG],
waits,
wait_count,
stage_flags);
}
return pvr_process_graphics_cmd_part(device,
queue->gfx_ctx,
&sub_cmd->job,
barrier_geom,
barrier_frag,
&completions[PVR_JOB_TYPE_GEOM],
&completions[PVR_JOB_TYPE_FRAG],
waits,
wait_count,
stage_flags);
/* FIXME: DoShadowLoadOrStore() */
}
static VkResult
pvr_process_compute_cmd(struct pvr_device *device,
struct pvr_queue *queue,
struct pvr_sub_cmd_compute *sub_cmd,
struct vk_sync *barrier,
struct vk_sync **waits,
uint32_t wait_count,
uint32_t *stage_flags,
struct vk_sync *completions[static PVR_JOB_TYPE_MAX])
{
struct vk_sync *sync;
VkResult result;
result = vk_sync_create(&device->vk,
&device->pdevice->ws->syncobj_type,
0U,
0UL,
&sync);
if (result != VK_SUCCESS)
return result;
result = pvr_compute_job_submit(queue->compute_ctx,
sub_cmd,
barrier,
waits,
wait_count,
stage_flags,
sync);
if (result != VK_SUCCESS) {
vk_sync_destroy(&device->vk, sync);
return result;
}
/* Replace the completion fences. */
if (completions[PVR_JOB_TYPE_COMPUTE])
vk_sync_destroy(&device->vk, completions[PVR_JOB_TYPE_COMPUTE]);
completions[PVR_JOB_TYPE_COMPUTE] = sync;
return result;
}
static VkResult
pvr_process_transfer_cmds(struct pvr_device *device,
struct pvr_queue *queue,
struct pvr_sub_cmd_transfer *sub_cmd,
struct vk_sync *barrier,
struct vk_sync **waits,
uint32_t wait_count,
uint32_t *stage_flags,
struct vk_sync *completions[static PVR_JOB_TYPE_MAX])
{
struct vk_sync *sync;
VkResult result;
result = vk_sync_create(&device->vk,
&device->pdevice->ws->syncobj_type,
0U,
0UL,
&sync);
if (result != VK_SUCCESS)
return result;
result = pvr_transfer_job_submit(device,
queue->transfer_ctx,
sub_cmd,
barrier,
waits,
wait_count,
stage_flags,
sync);
if (result != VK_SUCCESS) {
vk_sync_destroy(&device->vk, sync);
return result;
}
/* Replace the completion fences. */
if (completions[PVR_JOB_TYPE_TRANSFER])
vk_sync_destroy(&device->vk, completions[PVR_JOB_TYPE_TRANSFER]);
completions[PVR_JOB_TYPE_TRANSFER] = sync;
return result;
}
static VkResult pvr_process_occlusion_query_cmd(
struct pvr_device *device,
struct pvr_queue *queue,
struct pvr_sub_cmd_compute *sub_cmd,
struct vk_sync *barrier,
struct vk_sync **waits,
uint32_t wait_count,
uint32_t *stage_flags,
struct vk_sync *completions[static PVR_JOB_TYPE_MAX])
{
struct vk_sync *sync;
VkResult result;
/* TODO: Currently we add barrier event sub commands to handle the sync
* necessary for the different occlusion query types. Would we get any speed
* up in processing the queue by doing that sync here without using event sub
* commands?
*/
result = vk_sync_create(&device->vk,
&device->pdevice->ws->syncobj_type,
0U,
0UL,
&sync);
if (result != VK_SUCCESS)
return result;
result = pvr_compute_job_submit(queue->query_ctx,
sub_cmd,
barrier,
waits,
wait_count,
stage_flags,
sync);
if (result != VK_SUCCESS) {
vk_sync_destroy(&device->vk, sync);
return result;
}
if (completions[PVR_JOB_TYPE_OCCLUSION_QUERY])
vk_sync_destroy(&device->vk, completions[PVR_JOB_TYPE_OCCLUSION_QUERY]);
completions[PVR_JOB_TYPE_OCCLUSION_QUERY] = sync;
return result;
}
static VkResult pvr_process_event_cmd_barrier(
struct pvr_device *device,
struct pvr_sub_cmd_event *sub_cmd,
struct vk_sync *barriers[static PVR_JOB_TYPE_MAX],
struct vk_sync *per_cmd_buffer_syncobjs[static PVR_JOB_TYPE_MAX],
struct vk_sync *per_submit_syncobjs[static PVR_JOB_TYPE_MAX],
struct vk_sync *queue_syncobjs[static PVR_JOB_TYPE_MAX],
struct vk_sync *previous_queue_syncobjs[static PVR_JOB_TYPE_MAX])
{
const uint32_t src_mask = sub_cmd->barrier.wait_for_stage_mask;
const uint32_t dst_mask = sub_cmd->barrier.wait_at_stage_mask;
const bool in_render_pass = sub_cmd->barrier.in_render_pass;
struct vk_sync *new_barriers[PVR_JOB_TYPE_MAX] = { 0 };
struct vk_sync *completions[PVR_JOB_TYPE_MAX] = { 0 };
struct vk_sync *src_syncobjs[PVR_JOB_TYPE_MAX];
uint32_t src_syncobj_count = 0;
VkResult result;
assert(sub_cmd->type == PVR_EVENT_TYPE_BARRIER);
assert(!(src_mask & ~PVR_PIPELINE_STAGE_ALL_BITS));
assert(!(dst_mask & ~PVR_PIPELINE_STAGE_ALL_BITS));
/* TODO: We're likely over synchronizing here, but the kernel doesn't
* guarantee that jobs submitted on a context will execute and complete in
* order, even though in practice they will, so we play it safe and don't
* make any assumptions. If the kernel starts to offer this guarantee then
* remove the extra dependencies being added here.
*/
u_foreach_bit (stage, src_mask) {
struct vk_sync *syncobj;
syncobj = per_cmd_buffer_syncobjs[stage];
if (!in_render_pass & !syncobj) {
if (per_submit_syncobjs[stage])
syncobj = per_submit_syncobjs[stage];
else if (queue_syncobjs[stage])
syncobj = queue_syncobjs[stage];
else if (previous_queue_syncobjs[stage])
syncobj = previous_queue_syncobjs[stage];
}
if (!syncobj)
continue;
src_syncobjs[src_syncobj_count++] = syncobj;
}
/* No previous src jobs that need finishing so no need for a barrier. */
if (src_syncobj_count == 0)
return VK_SUCCESS;
u_foreach_bit (stage, dst_mask) {
struct vk_sync *completion;
result = vk_sync_create(&device->vk,
&device->pdevice->ws->syncobj_type,
0U,
0UL,
&completion);
if (result != VK_SUCCESS)
goto err_destroy_completions;
result = device->ws->ops->null_job_submit(device->ws,
src_syncobjs,
src_syncobj_count,
completion);
if (result != VK_SUCCESS) {
vk_sync_destroy(&device->vk, completion);
goto err_destroy_completions;
}
completions[stage] = completion;
}
u_foreach_bit (stage, dst_mask) {
struct vk_sync *barrier_src_syncobjs[2];
uint32_t barrier_src_syncobj_count = 0;
struct vk_sync *barrier;
VkResult result;
assert(completions[stage]);
barrier_src_syncobjs[barrier_src_syncobj_count++] = completions[stage];
/* If there is a previous barrier we want to merge it with the new one.
*
* E.g.
* A <compute>, B <compute>,
* X <barrier src=compute, dst=graphics>,
* C <transfer>
* Y <barrier src=transfer, dst=graphics>,
* D <graphics>
*
* X barriers A and B at D. Y barriers C at D. So we want to merge both
* X and Y graphics vk_sync barriers to pass to D.
*
* Note that this is the same as:
* A <compute>, B <compute>, C <transfer>
* X <barrier src=compute, dst=graphics>,
* Y <barrier src=transfer, dst=graphics>,
* D <graphics>
*
*/
if (barriers[stage])
barrier_src_syncobjs[barrier_src_syncobj_count++] = barriers[stage];
result = vk_sync_create(&device->vk,
&device->pdevice->ws->syncobj_type,
0U,
0UL,
&barrier);
if (result != VK_SUCCESS)
goto err_destroy_new_barriers;
result = device->ws->ops->null_job_submit(device->ws,
barrier_src_syncobjs,
barrier_src_syncobj_count,
barrier);
if (result != VK_SUCCESS) {
vk_sync_destroy(&device->vk, barrier);
goto err_destroy_new_barriers;
}
new_barriers[stage] = barrier;
}
u_foreach_bit (stage, dst_mask) {
if (per_cmd_buffer_syncobjs[stage])
vk_sync_destroy(&device->vk, per_cmd_buffer_syncobjs[stage]);
per_cmd_buffer_syncobjs[stage] = completions[stage];
if (barriers[stage])
vk_sync_destroy(&device->vk, barriers[stage]);
barriers[stage] = new_barriers[stage];
}
return VK_SUCCESS;
err_destroy_new_barriers:
u_foreach_bit (stage, dst_mask) {
if (new_barriers[stage])
vk_sync_destroy(&device->vk, new_barriers[stage]);
}
err_destroy_completions:
u_foreach_bit (stage, dst_mask) {
if (completions[stage])
vk_sync_destroy(&device->vk, completions[stage]);
}
return result;
}
static VkResult pvr_process_event_cmd(
struct pvr_device *device,
struct pvr_sub_cmd_event *sub_cmd,
struct vk_sync *barriers[static PVR_JOB_TYPE_MAX],
struct vk_sync *per_cmd_buffer_syncobjs[static PVR_JOB_TYPE_MAX],
struct vk_sync *per_submit_syncobjs[static PVR_JOB_TYPE_MAX],
struct vk_sync *queue_syncobjs[static PVR_JOB_TYPE_MAX],
struct vk_sync *previous_queue_syncobjs[static PVR_JOB_TYPE_MAX])
{
switch (sub_cmd->type) {
case PVR_EVENT_TYPE_SET:
case PVR_EVENT_TYPE_RESET:
case PVR_EVENT_TYPE_WAIT:
pvr_finishme("Add support for event sub command type: %d", sub_cmd->type);
return VK_SUCCESS;
case PVR_EVENT_TYPE_BARRIER:
return pvr_process_event_cmd_barrier(device,
sub_cmd,
barriers,
per_cmd_buffer_syncobjs,
per_submit_syncobjs,
queue_syncobjs,
previous_queue_syncobjs);
default:
unreachable("Invalid event sub-command type.");
};
}
static VkResult
pvr_set_semaphore_payloads(struct pvr_device *device,
struct vk_sync *completions[static PVR_JOB_TYPE_MAX],
const VkSemaphore *signals,
uint32_t signal_count)
{
struct vk_sync *sync;
VkResult result;
int fd = -1;
result = vk_sync_create(&device->vk,
&device->pdevice->ws->syncobj_type,
0U,
0UL,
&sync);
if (result != VK_SUCCESS)
return result;
result = device->ws->ops->null_job_submit(device->ws,
completions,
PVR_JOB_TYPE_MAX,
sync);
if (result != VK_SUCCESS)
goto end_set_semaphore_payloads;
/* If we have a single signal semaphore, we can simply move merged sync's
* payload to the signal semahpore's payload.
*/
if (signal_count == 1U) {
VK_FROM_HANDLE(vk_semaphore, sem, signals[0]);
struct vk_sync *sem_sync = vk_semaphore_get_active_sync(sem);
result = vk_sync_move(&device->vk, sem_sync, sync);
goto end_set_semaphore_payloads;
}
result = vk_sync_export_sync_file(&device->vk, sync, &fd);
if (result != VK_SUCCESS)
goto end_set_semaphore_payloads;
for (uint32_t i = 0U; i < signal_count; i++) {
VK_FROM_HANDLE(vk_semaphore, sem, signals[i]);
struct vk_sync *sem_sync = vk_semaphore_get_active_sync(sem);
result = vk_sync_import_sync_file(&device->vk, sem_sync, fd);
if (result != VK_SUCCESS)
goto end_set_semaphore_payloads;
}
end_set_semaphore_payloads:
if (fd != -1)
close(fd);
vk_sync_destroy(&device->vk, sync);
return result;
}
static VkResult
pvr_set_fence_payload(struct pvr_device *device,
struct vk_sync *completions[static PVR_JOB_TYPE_MAX],
VkFence _fence)
{
VK_FROM_HANDLE(vk_fence, fence, _fence);
struct vk_sync *fence_sync;
struct vk_sync *sync;
VkResult result;
result = vk_sync_create(&device->vk,
&device->pdevice->ws->syncobj_type,
0U,
0UL,
&sync);
if (result != VK_SUCCESS)
return result;
result = device->ws->ops->null_job_submit(device->ws,
completions,
PVR_JOB_TYPE_MAX,
sync);
if (result != VK_SUCCESS) {
vk_sync_destroy(&device->vk, sync);
return result;
}
fence_sync = vk_fence_get_active_sync(fence);
result = vk_sync_move(&device->vk, fence_sync, sync);
vk_sync_destroy(&device->vk, sync);
return result;
}
static void pvr_update_syncobjs(struct pvr_device *device,
struct vk_sync *src[static PVR_JOB_TYPE_MAX],
struct vk_sync *dst[static PVR_JOB_TYPE_MAX])
{
for (uint32_t i = 0; i < PVR_JOB_TYPE_MAX; i++) {
if (src[i]) {
if (dst[i])
vk_sync_destroy(&device->vk, dst[i]);
dst[i] = src[i];
}
}
}
static VkResult pvr_process_cmd_buffer(
struct pvr_device *device,
struct pvr_queue *queue,
VkCommandBuffer commandBuffer,
struct vk_sync *barriers[static PVR_JOB_TYPE_MAX],
struct vk_sync **waits,
uint32_t wait_count,
uint32_t *stage_flags,
struct vk_sync *per_submit_syncobjs[static PVR_JOB_TYPE_MAX],
struct vk_sync *queue_syncobjs[static PVR_JOB_TYPE_MAX],
struct vk_sync *previous_queue_syncobjs[static PVR_JOB_TYPE_MAX])
{
struct vk_sync *per_cmd_buffer_syncobjs[PVR_JOB_TYPE_MAX] = {};
PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
VkResult result;
assert(cmd_buffer->vk.state == MESA_VK_COMMAND_BUFFER_STATE_EXECUTABLE);
list_for_each_entry_safe (struct pvr_sub_cmd,
sub_cmd,
&cmd_buffer->sub_cmds,
link) {
switch (sub_cmd->type) {
case PVR_SUB_CMD_TYPE_GRAPHICS: {
if (sub_cmd->gfx.has_occlusion_query) {
struct pvr_sub_cmd_event frag_to_transfer_barrier = {
.type = PVR_EVENT_TYPE_BARRIER,
.barrier = {
.wait_for_stage_mask = PVR_PIPELINE_STAGE_OCCLUSION_QUERY_BIT,
.wait_at_stage_mask = PVR_PIPELINE_STAGE_FRAG_BIT,
},
};
/* If the fragment job utilizes occlusion queries, for data
* integrity it needs to wait for the occlusion query to be
* processed.
*/
result = pvr_process_event_cmd_barrier(device,
&frag_to_transfer_barrier,
barriers,
per_cmd_buffer_syncobjs,
per_submit_syncobjs,
queue_syncobjs,
previous_queue_syncobjs);
if (result != VK_SUCCESS)
break;
}
result = pvr_process_graphics_cmd(device,
queue,
cmd_buffer,
&sub_cmd->gfx,
barriers[PVR_JOB_TYPE_GEOM],
barriers[PVR_JOB_TYPE_FRAG],
waits,
wait_count,
stage_flags,
per_cmd_buffer_syncobjs);
break;
}
case PVR_SUB_CMD_TYPE_COMPUTE:
result = pvr_process_compute_cmd(device,
queue,
&sub_cmd->compute,
barriers[PVR_JOB_TYPE_COMPUTE],
waits,
wait_count,
stage_flags,
per_cmd_buffer_syncobjs);
break;
case PVR_SUB_CMD_TYPE_TRANSFER: {
const bool serialize_with_frag = sub_cmd->transfer.serialize_with_frag;
if (serialize_with_frag) {
struct pvr_sub_cmd_event frag_to_transfer_barrier = {
.type = PVR_EVENT_TYPE_BARRIER,
.barrier = {
.wait_for_stage_mask = PVR_PIPELINE_STAGE_FRAG_BIT,
.wait_at_stage_mask = PVR_PIPELINE_STAGE_TRANSFER_BIT,
},
};
result = pvr_process_event_cmd_barrier(device,
&frag_to_transfer_barrier,
barriers,
per_cmd_buffer_syncobjs,
per_submit_syncobjs,
queue_syncobjs,
previous_queue_syncobjs);
if (result != VK_SUCCESS)
break;
}
result = pvr_process_transfer_cmds(device,
queue,
&sub_cmd->transfer,
barriers[PVR_JOB_TYPE_TRANSFER],
waits,
wait_count,
stage_flags,
per_cmd_buffer_syncobjs);
if (serialize_with_frag) {
struct pvr_sub_cmd_event transfer_to_frag_barrier = {
.type = PVR_EVENT_TYPE_BARRIER,
.barrier = {
.wait_for_stage_mask = PVR_PIPELINE_STAGE_TRANSFER_BIT,
.wait_at_stage_mask = PVR_PIPELINE_STAGE_FRAG_BIT,
},
};
if (result != VK_SUCCESS)
break;
result = pvr_process_event_cmd_barrier(device,
&transfer_to_frag_barrier,
barriers,
per_cmd_buffer_syncobjs,
per_submit_syncobjs,
queue_syncobjs,
previous_queue_syncobjs);
}
break;
}
case PVR_SUB_CMD_TYPE_OCCLUSION_QUERY:
result = pvr_process_occlusion_query_cmd(
device,
queue,
&sub_cmd->compute,
barriers[PVR_JOB_TYPE_OCCLUSION_QUERY],
waits,
wait_count,
stage_flags,
per_cmd_buffer_syncobjs);
break;
case PVR_SUB_CMD_TYPE_EVENT:
result = pvr_process_event_cmd(device,
&sub_cmd->event,
barriers,
per_cmd_buffer_syncobjs,
per_submit_syncobjs,
queue_syncobjs,
previous_queue_syncobjs);
break;
default:
mesa_loge("Unsupported sub-command type %d", sub_cmd->type);
result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
}
if (result != VK_SUCCESS)
return result;
p_atomic_inc(&device->global_cmd_buffer_submit_count);
}
pvr_update_syncobjs(device, per_cmd_buffer_syncobjs, per_submit_syncobjs);
return VK_SUCCESS;
}
static VkResult
pvr_submit_null_job(struct pvr_device *device,
struct vk_sync **waits,
uint32_t wait_count,
uint32_t *stage_flags,
struct vk_sync *completions[static PVR_JOB_TYPE_MAX])
{
VkResult result;
STATIC_ASSERT(PVR_JOB_TYPE_MAX >= PVR_NUM_SYNC_PIPELINE_STAGES);
for (uint32_t i = 0U; i < PVR_JOB_TYPE_MAX; i++) {
struct vk_sync *per_job_waits[wait_count];
uint32_t per_job_waits_count = 0;
/* Get the waits specific to the job type. */
for (uint32_t j = 0U; j < wait_count; j++) {
if (stage_flags[j] & (1U << i)) {
per_job_waits[per_job_waits_count] = waits[j];
per_job_waits_count++;
}
}
if (per_job_waits_count == 0U)
continue;
result = vk_sync_create(&device->vk,
&device->pdevice->ws->syncobj_type,
0U,
0UL,
&completions[i]);
if (result != VK_SUCCESS)
goto err_destroy_completion_syncs;
result = device->ws->ops->null_job_submit(device->ws,
per_job_waits,
per_job_waits_count,
completions[i]);
if (result != VK_SUCCESS)
goto err_destroy_completion_syncs;
}
return VK_SUCCESS;
err_destroy_completion_syncs:
for (uint32_t i = 0U; i < PVR_JOB_TYPE_MAX; i++) {
if (completions[i]) {
vk_sync_destroy(&device->vk, completions[i]);
completions[i] = NULL;
}
}
return result;
}
VkResult pvr_QueueSubmit(VkQueue _queue,
uint32_t submitCount,
const VkSubmitInfo *pSubmits,
VkFence fence)
{
PVR_FROM_HANDLE(pvr_queue, queue, _queue);
struct vk_sync *completion_syncobjs[PVR_JOB_TYPE_MAX] = {};
struct pvr_device *device = queue->device;
VkResult result;
for (uint32_t i = 0U; i < submitCount; i++) {
struct vk_sync *per_submit_completion_syncobjs[PVR_JOB_TYPE_MAX] = {};
const VkSubmitInfo *desc = &pSubmits[i];
struct vk_sync *waits[desc->waitSemaphoreCount];
uint32_t stage_flags[desc->waitSemaphoreCount];
uint32_t wait_count = 0;
for (uint32_t j = 0U; j < desc->waitSemaphoreCount; j++) {
VK_FROM_HANDLE(vk_semaphore, semaphore, desc->pWaitSemaphores[j]);
struct vk_sync *sync = vk_semaphore_get_active_sync(semaphore);
if (sync->type == &vk_sync_dummy_type)
continue;
/* We don't currently support timeline semaphores. */
assert(!(sync->flags & VK_SYNC_IS_TIMELINE));
stage_flags[wait_count] =
pvr_stage_mask_dst(desc->pWaitDstStageMask[j]);
waits[wait_count] = vk_semaphore_get_active_sync(semaphore);
wait_count++;
}
if (desc->commandBufferCount > 0U) {
for (uint32_t j = 0U; j < desc->commandBufferCount; j++) {
result = pvr_process_cmd_buffer(device,
queue,
desc->pCommandBuffers[j],
queue->job_dependancy,
waits,
wait_count,
stage_flags,
per_submit_completion_syncobjs,
completion_syncobjs,
queue->completion);
if (result != VK_SUCCESS)
return result;
}
} else {
result = pvr_submit_null_job(device,
waits,
wait_count,
stage_flags,
per_submit_completion_syncobjs);
if (result != VK_SUCCESS)
return result;
}
if (desc->signalSemaphoreCount) {
result = pvr_set_semaphore_payloads(device,
per_submit_completion_syncobjs,
desc->pSignalSemaphores,
desc->signalSemaphoreCount);
if (result != VK_SUCCESS)
return result;
}
pvr_update_syncobjs(device,
per_submit_completion_syncobjs,
completion_syncobjs);
}
if (fence) {
result = pvr_set_fence_payload(device, completion_syncobjs, fence);
if (result != VK_SUCCESS)
return result;
}
pvr_update_syncobjs(device, completion_syncobjs, queue->completion);
return VK_SUCCESS;
}