pvr: Split render job submission for multi-layer framebuffers
Signed-off-by: Matt Coster <matt.coster@imgtec.com> Reviewed-by: Karmjit Mahil <Karmjit.Mahil@imgtec.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20932>
This commit is contained in:
@@ -91,6 +91,7 @@ static void pvr_cmd_buffer_free_sub_cmd(struct pvr_cmd_buffer *cmd_buffer,
|
|||||||
case PVR_SUB_CMD_TYPE_GRAPHICS:
|
case PVR_SUB_CMD_TYPE_GRAPHICS:
|
||||||
util_dynarray_fini(&sub_cmd->gfx.sec_query_indices);
|
util_dynarray_fini(&sub_cmd->gfx.sec_query_indices);
|
||||||
pvr_csb_finish(&sub_cmd->gfx.control_stream);
|
pvr_csb_finish(&sub_cmd->gfx.control_stream);
|
||||||
|
pvr_bo_free(cmd_buffer->device, sub_cmd->gfx.terminate_ctrl_stream);
|
||||||
pvr_bo_free(cmd_buffer->device, sub_cmd->gfx.depth_bias_bo);
|
pvr_bo_free(cmd_buffer->device, sub_cmd->gfx.depth_bias_bo);
|
||||||
pvr_bo_free(cmd_buffer->device, sub_cmd->gfx.scissor_bo);
|
pvr_bo_free(cmd_buffer->device, sub_cmd->gfx.scissor_bo);
|
||||||
break;
|
break;
|
||||||
@@ -343,22 +344,25 @@ err_free_depth_bias_bo:
|
|||||||
}
|
}
|
||||||
|
|
||||||
static VkResult
|
static VkResult
|
||||||
pvr_cmd_buffer_emit_ppp_state(struct pvr_cmd_buffer *cmd_buffer,
|
pvr_cmd_buffer_emit_ppp_state(const struct pvr_cmd_buffer *const cmd_buffer,
|
||||||
struct pvr_sub_cmd_gfx *const sub_cmd)
|
struct pvr_csb *const csb)
|
||||||
{
|
{
|
||||||
struct pvr_framebuffer *framebuffer =
|
const struct pvr_framebuffer *const framebuffer =
|
||||||
cmd_buffer->state.render_pass_info.framebuffer;
|
cmd_buffer->state.render_pass_info.framebuffer;
|
||||||
|
|
||||||
pvr_csb_emit (&sub_cmd->control_stream, VDMCTRL_PPP_STATE0, state0) {
|
assert(csb->stream_type == PVR_CMD_STREAM_TYPE_GRAPHICS ||
|
||||||
|
csb->stream_type == PVR_CMD_STREAM_TYPE_GRAPHICS_DEFERRED);
|
||||||
|
|
||||||
|
pvr_csb_emit (csb, VDMCTRL_PPP_STATE0, state0) {
|
||||||
state0.addrmsb = framebuffer->ppp_state_bo->vma->dev_addr;
|
state0.addrmsb = framebuffer->ppp_state_bo->vma->dev_addr;
|
||||||
state0.word_count = framebuffer->ppp_state_size;
|
state0.word_count = framebuffer->ppp_state_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
pvr_csb_emit (&sub_cmd->control_stream, VDMCTRL_PPP_STATE1, state1) {
|
pvr_csb_emit (csb, VDMCTRL_PPP_STATE1, state1) {
|
||||||
state1.addrlsb = framebuffer->ppp_state_bo->vma->dev_addr;
|
state1.addrlsb = framebuffer->ppp_state_bo->vma->dev_addr;
|
||||||
}
|
}
|
||||||
|
|
||||||
return VK_SUCCESS;
|
return csb->status;
|
||||||
}
|
}
|
||||||
|
|
||||||
VkResult pvr_cmd_buffer_upload_general(struct pvr_cmd_buffer *const cmd_buffer,
|
VkResult pvr_cmd_buffer_upload_general(struct pvr_cmd_buffer *const cmd_buffer,
|
||||||
@@ -559,6 +563,44 @@ err_free_usc_pixel_program:
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static VkResult pvr_sub_cmd_gfx_build_terminate_ctrl_stream(
|
||||||
|
struct pvr_device *const device,
|
||||||
|
const struct pvr_cmd_buffer *const cmd_buffer,
|
||||||
|
struct pvr_sub_cmd_gfx *const gfx_sub_cmd)
|
||||||
|
{
|
||||||
|
struct list_head bo_list;
|
||||||
|
struct pvr_csb csb;
|
||||||
|
VkResult result;
|
||||||
|
|
||||||
|
pvr_csb_init(device, PVR_CMD_STREAM_TYPE_GRAPHICS, &csb);
|
||||||
|
|
||||||
|
result = pvr_cmd_buffer_emit_ppp_state(cmd_buffer, &csb);
|
||||||
|
if (result != VK_SUCCESS)
|
||||||
|
goto err_csb_finish;
|
||||||
|
|
||||||
|
result = pvr_csb_emit_terminate(&csb);
|
||||||
|
if (result != VK_SUCCESS)
|
||||||
|
goto err_csb_finish;
|
||||||
|
|
||||||
|
result = pvr_csb_bake(&csb, &bo_list);
|
||||||
|
if (result != VK_SUCCESS)
|
||||||
|
goto err_csb_finish;
|
||||||
|
|
||||||
|
/* This is a trivial control stream, there's no reason it should ever require
|
||||||
|
* more memory than a single bo can provide.
|
||||||
|
*/
|
||||||
|
assert(list_is_singular(&bo_list));
|
||||||
|
gfx_sub_cmd->terminate_ctrl_stream =
|
||||||
|
list_first_entry(&bo_list, struct pvr_bo, link);
|
||||||
|
|
||||||
|
return VK_SUCCESS;
|
||||||
|
|
||||||
|
err_csb_finish:
|
||||||
|
pvr_csb_finish(&csb);
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
static VkResult
|
static VkResult
|
||||||
pvr_load_op_constants_create_and_upload(struct pvr_cmd_buffer *cmd_buffer,
|
pvr_load_op_constants_create_and_upload(struct pvr_cmd_buffer *cmd_buffer,
|
||||||
const struct pvr_load_op *load_op,
|
const struct pvr_load_op *load_op,
|
||||||
@@ -1535,7 +1577,18 @@ VkResult pvr_cmd_buffer_end_sub_cmd(struct pvr_cmd_buffer *cmd_buffer)
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
result = pvr_cmd_buffer_emit_ppp_state(cmd_buffer, gfx_sub_cmd);
|
if (pvr_sub_cmd_gfx_requires_split_submit(gfx_sub_cmd)) {
|
||||||
|
result = pvr_sub_cmd_gfx_build_terminate_ctrl_stream(device,
|
||||||
|
cmd_buffer,
|
||||||
|
gfx_sub_cmd);
|
||||||
|
if (result != VK_SUCCESS) {
|
||||||
|
state->status = result;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
result = pvr_cmd_buffer_emit_ppp_state(cmd_buffer,
|
||||||
|
&gfx_sub_cmd->control_stream);
|
||||||
if (result != VK_SUCCESS) {
|
if (result != VK_SUCCESS) {
|
||||||
state->status = result;
|
state->status = result;
|
||||||
return result;
|
return result;
|
||||||
|
@@ -659,6 +659,9 @@ struct pvr_sub_cmd_gfx {
|
|||||||
/* Control stream builder object */
|
/* Control stream builder object */
|
||||||
struct pvr_csb control_stream;
|
struct pvr_csb control_stream;
|
||||||
|
|
||||||
|
/* Required iff pvr_sub_cmd_gfx_requires_split_submit() returns true. */
|
||||||
|
struct pvr_bo *terminate_ctrl_stream;
|
||||||
|
|
||||||
uint32_t hw_render_idx;
|
uint32_t hw_render_idx;
|
||||||
|
|
||||||
uint32_t max_tiles_in_flight;
|
uint32_t max_tiles_in_flight;
|
||||||
@@ -1548,6 +1551,12 @@ pvr_stage_mask_dst(VkPipelineStageFlags2KHR stage_mask)
|
|||||||
return pvr_stage_mask(stage_mask);
|
return pvr_stage_mask(stage_mask);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline bool pvr_sub_cmd_gfx_requires_split_submit(
|
||||||
|
const struct pvr_sub_cmd_gfx *const sub_cmd)
|
||||||
|
{
|
||||||
|
return sub_cmd->job.run_frag && sub_cmd->framebuffer->layers > 1;
|
||||||
|
}
|
||||||
|
|
||||||
VkResult pvr_pds_fragment_program_create_and_upload(
|
VkResult pvr_pds_fragment_program_create_and_upload(
|
||||||
struct pvr_device *device,
|
struct pvr_device *device,
|
||||||
const VkAllocationCallbacks *allocator,
|
const VkAllocationCallbacks *allocator,
|
||||||
|
@@ -205,6 +205,150 @@ VkResult pvr_QueueWaitIdle(VkQueue _queue)
|
|||||||
return VK_SUCCESS;
|
return VK_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static VkResult
|
||||||
|
pvr_process_graphics_cmd_part(struct pvr_device *const device,
|
||||||
|
struct pvr_render_ctx *const gfx_ctx,
|
||||||
|
struct pvr_render_job *const job,
|
||||||
|
struct vk_sync *const geom_barrier,
|
||||||
|
struct vk_sync *const frag_barrier,
|
||||||
|
struct vk_sync **const geom_completion,
|
||||||
|
struct vk_sync **const frag_completion,
|
||||||
|
struct vk_sync **const waits,
|
||||||
|
const uint32_t wait_count,
|
||||||
|
uint32_t *const stage_flags)
|
||||||
|
{
|
||||||
|
struct vk_sync *geom_sync = NULL;
|
||||||
|
struct vk_sync *frag_sync = NULL;
|
||||||
|
VkResult result;
|
||||||
|
|
||||||
|
/* For each of geom and frag, a completion sync is optional but only allowed
|
||||||
|
* iff barrier is present.
|
||||||
|
*/
|
||||||
|
assert(geom_barrier || !geom_completion);
|
||||||
|
assert(frag_barrier || !frag_completion);
|
||||||
|
|
||||||
|
if (geom_barrier) {
|
||||||
|
result = vk_sync_create(&device->vk,
|
||||||
|
&device->pdevice->ws->syncobj_type,
|
||||||
|
0U,
|
||||||
|
0UL,
|
||||||
|
&geom_sync);
|
||||||
|
if (result != VK_SUCCESS)
|
||||||
|
goto err_out;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (frag_barrier) {
|
||||||
|
result = vk_sync_create(&device->vk,
|
||||||
|
&device->pdevice->ws->syncobj_type,
|
||||||
|
0U,
|
||||||
|
0UL,
|
||||||
|
&frag_sync);
|
||||||
|
if (result != VK_SUCCESS)
|
||||||
|
goto err_destroy_sync_geom;
|
||||||
|
}
|
||||||
|
|
||||||
|
result = pvr_render_job_submit(gfx_ctx,
|
||||||
|
job,
|
||||||
|
geom_barrier,
|
||||||
|
frag_barrier,
|
||||||
|
waits,
|
||||||
|
wait_count,
|
||||||
|
stage_flags,
|
||||||
|
geom_sync,
|
||||||
|
frag_sync);
|
||||||
|
if (result != VK_SUCCESS)
|
||||||
|
goto err_destroy_sync_frag;
|
||||||
|
|
||||||
|
/* Replace the completion fences. */
|
||||||
|
if (geom_sync) {
|
||||||
|
if (*geom_completion)
|
||||||
|
vk_sync_destroy(&device->vk, *geom_completion);
|
||||||
|
|
||||||
|
*geom_completion = geom_sync;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (frag_sync) {
|
||||||
|
if (*frag_completion)
|
||||||
|
vk_sync_destroy(&device->vk, *frag_completion);
|
||||||
|
|
||||||
|
*frag_completion = frag_sync;
|
||||||
|
}
|
||||||
|
|
||||||
|
return VK_SUCCESS;
|
||||||
|
|
||||||
|
err_destroy_sync_frag:
|
||||||
|
if (frag_sync)
|
||||||
|
vk_sync_destroy(&device->vk, frag_sync);
|
||||||
|
|
||||||
|
err_destroy_sync_geom:
|
||||||
|
if (geom_sync)
|
||||||
|
vk_sync_destroy(&device->vk, geom_sync);
|
||||||
|
|
||||||
|
err_out:
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static VkResult
|
||||||
|
pvr_process_split_graphics_cmd(struct pvr_device *const device,
|
||||||
|
struct pvr_render_ctx *const gfx_ctx,
|
||||||
|
struct pvr_sub_cmd_gfx *sub_cmd,
|
||||||
|
struct vk_sync *const geom_barrier,
|
||||||
|
struct vk_sync *const frag_barrier,
|
||||||
|
struct vk_sync **const geom_completion,
|
||||||
|
struct vk_sync **const frag_completion,
|
||||||
|
struct vk_sync **const waits,
|
||||||
|
const uint32_t wait_count,
|
||||||
|
uint32_t *const stage_flags)
|
||||||
|
{
|
||||||
|
struct pvr_render_job *const job = &sub_cmd->job;
|
||||||
|
const pvr_dev_addr_t original_ctrl_stream_addr = job->ctrl_stream_addr;
|
||||||
|
const bool original_geometry_terminate = job->geometry_terminate;
|
||||||
|
const bool original_run_frag = job->run_frag;
|
||||||
|
VkResult result;
|
||||||
|
|
||||||
|
/* First submit must not touch fragment work. */
|
||||||
|
job->geometry_terminate = false;
|
||||||
|
job->run_frag = false;
|
||||||
|
|
||||||
|
result = pvr_process_graphics_cmd_part(device,
|
||||||
|
gfx_ctx,
|
||||||
|
job,
|
||||||
|
geom_barrier,
|
||||||
|
NULL,
|
||||||
|
geom_completion,
|
||||||
|
NULL,
|
||||||
|
waits,
|
||||||
|
wait_count,
|
||||||
|
stage_flags);
|
||||||
|
|
||||||
|
job->geometry_terminate = original_geometry_terminate;
|
||||||
|
job->run_frag = original_run_frag;
|
||||||
|
|
||||||
|
if (result != VK_SUCCESS)
|
||||||
|
return result;
|
||||||
|
|
||||||
|
/* Second submit contains only a trivial control stream to terminate the
|
||||||
|
* geometry work.
|
||||||
|
*/
|
||||||
|
assert(sub_cmd->terminate_ctrl_stream);
|
||||||
|
job->ctrl_stream_addr = sub_cmd->terminate_ctrl_stream->vma->dev_addr;
|
||||||
|
|
||||||
|
result = pvr_process_graphics_cmd_part(device,
|
||||||
|
gfx_ctx,
|
||||||
|
job,
|
||||||
|
NULL,
|
||||||
|
frag_barrier,
|
||||||
|
NULL,
|
||||||
|
frag_completion,
|
||||||
|
waits,
|
||||||
|
wait_count,
|
||||||
|
stage_flags);
|
||||||
|
|
||||||
|
job->ctrl_stream_addr = original_ctrl_stream_addr;
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
static VkResult
|
static VkResult
|
||||||
pvr_process_graphics_cmd(struct pvr_device *device,
|
pvr_process_graphics_cmd(struct pvr_device *device,
|
||||||
struct pvr_queue *queue,
|
struct pvr_queue *queue,
|
||||||
@@ -217,66 +361,39 @@ pvr_process_graphics_cmd(struct pvr_device *device,
|
|||||||
uint32_t *stage_flags,
|
uint32_t *stage_flags,
|
||||||
struct vk_sync *completions[static PVR_JOB_TYPE_MAX])
|
struct vk_sync *completions[static PVR_JOB_TYPE_MAX])
|
||||||
{
|
{
|
||||||
const struct pvr_framebuffer *framebuffer = sub_cmd->framebuffer;
|
|
||||||
struct vk_sync *sync_geom;
|
|
||||||
struct vk_sync *sync_frag;
|
|
||||||
VkResult result;
|
|
||||||
|
|
||||||
result = vk_sync_create(&device->vk,
|
|
||||||
&device->pdevice->ws->syncobj_type,
|
|
||||||
0U,
|
|
||||||
0UL,
|
|
||||||
&sync_geom);
|
|
||||||
if (result != VK_SUCCESS)
|
|
||||||
return result;
|
|
||||||
|
|
||||||
result = vk_sync_create(&device->vk,
|
|
||||||
&device->pdevice->ws->syncobj_type,
|
|
||||||
0U,
|
|
||||||
0UL,
|
|
||||||
&sync_frag);
|
|
||||||
if (result != VK_SUCCESS) {
|
|
||||||
vk_sync_destroy(&device->vk, sync_geom);
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* FIXME: DoShadowLoadOrStore() */
|
/* FIXME: DoShadowLoadOrStore() */
|
||||||
|
|
||||||
/* FIXME: If the framebuffer being rendered to has multiple layers then we
|
/* Perform two render submits when using multiple framebuffer layers. The
|
||||||
* need to split submissions that run a fragment job into two.
|
* first submit contains just geometry, while the second only terminates
|
||||||
|
* (and triggers the fragment render if originally specified). This is needed
|
||||||
|
* because the render target cache gets cleared on terminating submits, which
|
||||||
|
* could result in missing primitives.
|
||||||
*/
|
*/
|
||||||
if (sub_cmd->job.run_frag && framebuffer->layers > 1)
|
if (pvr_sub_cmd_gfx_requires_split_submit(sub_cmd)) {
|
||||||
pvr_finishme("Split job submission for framebuffers with > 1 layers");
|
return pvr_process_split_graphics_cmd(device,
|
||||||
|
queue->gfx_ctx,
|
||||||
result = pvr_render_job_submit(queue->gfx_ctx,
|
sub_cmd,
|
||||||
&sub_cmd->job,
|
barrier_geom,
|
||||||
barrier_geom,
|
barrier_frag,
|
||||||
barrier_frag,
|
&completions[PVR_JOB_TYPE_GEOM],
|
||||||
waits,
|
&completions[PVR_JOB_TYPE_FRAG],
|
||||||
wait_count,
|
waits,
|
||||||
stage_flags,
|
wait_count,
|
||||||
sync_geom,
|
stage_flags);
|
||||||
sync_frag);
|
|
||||||
if (result != VK_SUCCESS) {
|
|
||||||
vk_sync_destroy(&device->vk, sync_geom);
|
|
||||||
vk_sync_destroy(&device->vk, sync_frag);
|
|
||||||
return result;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Replace the completion fences. */
|
return pvr_process_graphics_cmd_part(device,
|
||||||
if (completions[PVR_JOB_TYPE_GEOM])
|
queue->gfx_ctx,
|
||||||
vk_sync_destroy(&device->vk, completions[PVR_JOB_TYPE_GEOM]);
|
&sub_cmd->job,
|
||||||
|
barrier_geom,
|
||||||
completions[PVR_JOB_TYPE_GEOM] = sync_geom;
|
barrier_frag,
|
||||||
|
&completions[PVR_JOB_TYPE_GEOM],
|
||||||
if (completions[PVR_JOB_TYPE_FRAG])
|
&completions[PVR_JOB_TYPE_FRAG],
|
||||||
vk_sync_destroy(&device->vk, completions[PVR_JOB_TYPE_FRAG]);
|
waits,
|
||||||
|
wait_count,
|
||||||
completions[PVR_JOB_TYPE_FRAG] = sync_frag;
|
stage_flags);
|
||||||
|
|
||||||
/* FIXME: DoShadowLoadOrStore() */
|
/* FIXME: DoShadowLoadOrStore() */
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static VkResult
|
static VkResult
|
||||||
|
Reference in New Issue
Block a user