diff --git a/src/freedreno/vulkan/tu_clear_blit.c b/src/freedreno/vulkan/tu_clear_blit.c index 5ecf11ce530..130720a53bc 100644 --- a/src/freedreno/vulkan/tu_clear_blit.c +++ b/src/freedreno/vulkan/tu_clear_blit.c @@ -1045,6 +1045,9 @@ r3d_setup(struct tu_cmd_buffer *cmd, tu_cs_emit_regs(cs, A6XX_GRAS_LRZ_CNTL(0)); tu_cs_emit_regs(cs, A6XX_RB_LRZ_CNTL(0)); + tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_SC_CNTL, + A6XX_GRAS_SC_CNTL_CCUSINGLECACHELINESIZE(2)); + if (cmd->state.predication_active) { tu_cs_emit_pkt7(cs, CP_DRAW_PRED_ENABLE_LOCAL, 1); tu_cs_emit(cs, 0); diff --git a/src/freedreno/vulkan/tu_cmd_buffer.c b/src/freedreno/vulkan/tu_cmd_buffer.c index bd80aabf7af..d29ec1d3cb6 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.c +++ b/src/freedreno/vulkan/tu_cmd_buffer.c @@ -279,6 +279,26 @@ tu6_emit_mrt(struct tu_cmd_buffer *cmd, unsigned layers = MAX2(fb->layers, util_logbase2(subpass->multiview_mask) + 1); tu_cs_emit_regs(cs, A6XX_GRAS_MAX_LAYER_INDEX(layers - 1)); + + tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_SC_CNTL, + A6XX_GRAS_SC_CNTL_CCUSINGLECACHELINESIZE(2)); + + /* If there is a feedback loop, then the shader can read the previous value + * of a pixel being written out. It can also write some components and then + * read different components without a barrier in between. This is a + * problem in sysmem mode with UBWC, because the main buffer and flags + * buffer can get out-of-sync if only one is flushed. We fix this by + * setting the SINGLE_PRIM_MODE field to the same value that the blob does + * for advanced_blend in sysmem mode if a feedback loop is detected. + */ + if (subpass->feedback) { + tu_cond_exec_start(cs, CP_COND_EXEC_0_RENDER_MODE_SYSMEM); + tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_SC_CNTL, + A6XX_GRAS_SC_CNTL_CCUSINGLECACHELINESIZE(2) | + A6XX_GRAS_SC_CNTL_SINGLE_PRIM_MODE( + FLUSH_PER_OVERLAP_AND_OVERWRITE)); + tu_cond_exec_end(cs); + } } void @@ -783,8 +803,6 @@ tu6_init_hw(struct tu_cmd_buffer *cmd, struct tu_cs *cs) tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_B183, 0); tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_SU_CONSERVATIVE_RAS_CNTL, 0); - tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_SC_CNTL, - A6XX_GRAS_SC_CNTL_CCUSINGLECACHELINESIZE(2)); tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_UNKNOWN_80AF, 0); tu_cs_emit_write_reg(cs, REG_A6XX_VPC_UNKNOWN_9210, 0); tu_cs_emit_write_reg(cs, REG_A6XX_VPC_UNKNOWN_9211, 0); diff --git a/src/freedreno/vulkan/tu_pass.c b/src/freedreno/vulkan/tu_pass.c index a76abe9fa5c..c67b3e6f1e1 100644 --- a/src/freedreno/vulkan/tu_pass.c +++ b/src/freedreno/vulkan/tu_pass.c @@ -403,6 +403,36 @@ tu_render_pass_patch_input_gmem(struct tu_render_pass *pass) } } +static void +tu_render_pass_check_feedback_loop(struct tu_render_pass *pass) +{ + for (unsigned i = 0; i < pass->subpass_count; i++) { + struct tu_subpass *subpass = &pass->subpasses[i]; + + for (unsigned j = 0; j < subpass->color_count; j++) { + uint32_t a = subpass->color_attachments[j].attachment; + if (a == VK_ATTACHMENT_UNUSED) + continue; + for (unsigned k = 0; k < subpass->input_count; k++) { + if (subpass->input_attachments[k].attachment == a) { + subpass->feedback = true; + break; + } + } + } + + if (subpass->depth_stencil_attachment.attachment != VK_ATTACHMENT_UNUSED) { + for (unsigned k = 0; k < subpass->input_count; k++) { + if (subpass->input_attachments[k].attachment == + subpass->depth_stencil_attachment.attachment) { + subpass->feedback = true; + break; + } + } + } + } +} + static void update_samples(struct tu_subpass *subpass, VkSampleCountFlagBits samples) { @@ -708,6 +738,8 @@ tu_CreateRenderPass2(VkDevice _device, tu_render_pass_patch_input_gmem(pass); + tu_render_pass_check_feedback_loop(pass); + /* disable unused attachments */ for (uint32_t i = 0; i < pass->attachment_count; i++) { struct tu_render_pass_attachment *att = &pass->attachments[i]; diff --git a/src/freedreno/vulkan/tu_private.h b/src/freedreno/vulkan/tu_private.h index eaadf028da6..d87d64ac2ac 100644 --- a/src/freedreno/vulkan/tu_private.h +++ b/src/freedreno/vulkan/tu_private.h @@ -1566,6 +1566,9 @@ struct tu_subpass uint32_t resolve_count; bool resolve_depth_stencil; + /* True if there is any feedback loop at all. */ + bool feedback; + /* True if we must invalidate UCHE thanks to a feedback loop. */ bool feedback_invalidate;