turnip: Move gmem clears and loads to the first subpass that uses them.
This will help us share gmem space between attachments that aren't used at the same time. It's also a correctness fix for VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT, because they're supposed to happen at the first subpass using the attachment, not the start of the renderpass. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20994>
This commit is contained in:
@@ -756,7 +756,7 @@ use_sysmem_rendering(struct tu_cmd_buffer *cmd,
|
||||
|
||||
/* Optimization: there is no reason to load gmem if there is no
|
||||
* geometry to process. COND_REG_EXEC predicate is set here,
|
||||
* but the actual skip happens in tu6_emit_tile_load() and tile_store_cs,
|
||||
* but the actual skip happens in tu_load_gmem_attachment() and tile_store_cs,
|
||||
* for each blit separately.
|
||||
*/
|
||||
static void
|
||||
@@ -958,17 +958,6 @@ tu6_emit_sysmem_resolves(struct tu_cmd_buffer *cmd,
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
tu6_emit_tile_load(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
|
||||
{
|
||||
tu6_emit_blit_scissor(cmd, cs, true);
|
||||
|
||||
const bool cond_exec_allowed = cmd->state.tiling->binning &&
|
||||
cmd->state.pass->has_cond_load_store;
|
||||
for (uint32_t i = 0; i < cmd->state.pass->attachment_count; ++i)
|
||||
tu_load_gmem_attachment(cmd, cs, i, cond_exec_allowed, false);
|
||||
}
|
||||
|
||||
static void
|
||||
tu6_emit_tile_store(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
|
||||
{
|
||||
@@ -1466,23 +1455,12 @@ tu_emit_renderpass_begin(struct tu_cmd_buffer *cmd)
|
||||
{
|
||||
struct tu_cs *cs = &cmd->draw_cs;
|
||||
|
||||
if (cmd->state.pass->has_fdm)
|
||||
tu_cs_set_writeable(cs, true);
|
||||
|
||||
tu_cond_exec_start(cs, CP_COND_EXEC_0_RENDER_MODE_GMEM);
|
||||
|
||||
tu6_emit_tile_load(cmd, cs);
|
||||
|
||||
tu6_emit_blit_scissor(cmd, cs, false);
|
||||
|
||||
for (uint32_t i = 0; i < cmd->state.pass->attachment_count; ++i)
|
||||
tu_clear_gmem_attachment(cmd, cs, i);
|
||||
|
||||
tu_cond_exec_end(cs);
|
||||
|
||||
if (cmd->state.pass->has_fdm)
|
||||
tu_cs_set_writeable(cs, false);
|
||||
|
||||
/* Emit sysmem loads and clears, which we do all of in one cond block at the
|
||||
* beginning of the render pass.
|
||||
*
|
||||
* gmem loads and clears happen per-subpass, so we can reuse gmem space
|
||||
* between attachments in separate subpasses.
|
||||
*/
|
||||
tu_cond_exec_start(cs, CP_COND_EXEC_0_RENDER_MODE_SYSMEM);
|
||||
|
||||
for (uint32_t i = 0; i < cmd->state.pass->attachment_count; ++i)
|
||||
@@ -3600,13 +3578,64 @@ tu_subpass_barrier(struct tu_cmd_buffer *cmd_buffer,
|
||||
tu_flush_for_stage(cache, src_stage, dst_stage);
|
||||
}
|
||||
|
||||
/* emit mrt/zs/msaa/ubwc state for the subpass that is starting (either at
|
||||
* vkCmdBeginRenderPass2() or vkCmdNextSubpass2())
|
||||
static void
|
||||
tu_emit_subpass_begin_gmem(struct tu_cmd_buffer *cmd)
|
||||
{
|
||||
struct tu_cs *cs = &cmd->draw_cs;
|
||||
uint32_t subpass_idx = cmd->state.subpass - cmd->state.pass->subpasses;
|
||||
|
||||
/* If we might choose to bin, then put the loads under a check for geometry
|
||||
* having been binned to this tile. If we don't choose to bin in the end,
|
||||
* then we will have manually set those registers to say geometry is present.
|
||||
*
|
||||
* However, if the draw CS has a write to the condition for some other reason
|
||||
* (perf queries), then we can't do this optimization since the
|
||||
* start-of-the-CS geometry condition will have been overwritten.
|
||||
*/
|
||||
bool cond_load_allowed = cmd->state.tiling->binning &&
|
||||
cmd->state.pass->has_cond_load_store &&
|
||||
!cmd->state.rp.draw_cs_writes_to_cond_pred;
|
||||
|
||||
tu_cond_exec_start(cs, CP_COND_EXEC_0_RENDER_MODE_GMEM);
|
||||
|
||||
/* Emit gmem loads that are first used in this subpass. */
|
||||
bool emitted_scissor = false;
|
||||
for (uint32_t i = 0; i < cmd->state.pass->attachment_count; ++i) {
|
||||
struct tu_render_pass_attachment *att = &cmd->state.pass->attachments[i];
|
||||
if ((att->load || att->load_stencil) && att->first_subpass_idx == subpass_idx) {
|
||||
if (!emitted_scissor) {
|
||||
tu6_emit_blit_scissor(cmd, cs, true);
|
||||
emitted_scissor = true;
|
||||
}
|
||||
tu_load_gmem_attachment(cmd, cs, i, cond_load_allowed, false);
|
||||
}
|
||||
}
|
||||
|
||||
/* Emit gmem clears that are first used in this subpass. */
|
||||
emitted_scissor = false;
|
||||
for (uint32_t i = 0; i < cmd->state.pass->attachment_count; ++i) {
|
||||
struct tu_render_pass_attachment *att = &cmd->state.pass->attachments[i];
|
||||
if (att->clear_mask && att->first_subpass_idx == subpass_idx) {
|
||||
if (!emitted_scissor) {
|
||||
tu6_emit_blit_scissor(cmd, cs, false);
|
||||
emitted_scissor = true;
|
||||
}
|
||||
tu_clear_gmem_attachment(cmd, cs, i);
|
||||
}
|
||||
}
|
||||
|
||||
tu_cond_exec_end(cs); /* CP_COND_EXEC_0_RENDER_MODE_GMEM */
|
||||
}
|
||||
|
||||
/* emit gmem loads/clears, and mrt/zs/msaa/ubwc state for the subpass that is
|
||||
* starting (either at vkCmdBeginRenderPass2() or vkCmdNextSubpass2())
|
||||
*/
|
||||
static void
|
||||
tu_emit_subpass_begin(struct tu_cmd_buffer *cmd)
|
||||
{
|
||||
tu_fill_render_pass_state(&cmd->state.vk_rp, cmd->state.pass, cmd->state.subpass);
|
||||
|
||||
tu_emit_subpass_begin_gmem(cmd);
|
||||
tu6_emit_zs(cmd, cmd->state.subpass, &cmd->draw_cs);
|
||||
tu6_emit_mrt(cmd, cmd->state.subpass, &cmd->draw_cs);
|
||||
tu6_emit_render_cntl(cmd, cmd->state.subpass, &cmd->draw_cs, false);
|
||||
|
@@ -796,11 +796,8 @@ tu_subpass_use_attachment(struct tu_render_pass *pass, int i, uint32_t a, const
|
||||
update_samples(subpass, pCreateInfo->pAttachments[a].samples);
|
||||
att->clear_views |= subpass->multiview_mask;
|
||||
|
||||
/* Loads and clears are emitted at vkBeginRenderPass() time. */
|
||||
if (att->clear_mask || att->load || att->load_stencil)
|
||||
att->first_subpass_idx = 0;
|
||||
else
|
||||
att->first_subpass_idx = MIN2(i, att->first_subpass_idx);
|
||||
/* Loads and clears are emitted at the start of the subpass that needs them. */
|
||||
att->first_subpass_idx = MIN2(i, att->first_subpass_idx);
|
||||
|
||||
/* Stores are emitted at vkEndRenderPass() time. */
|
||||
if (att->store || att->store_stencil)
|
||||
|
Reference in New Issue
Block a user