turnip: Use LATE_Z when there might be depth/stencil feedback loop
Otherwise a shader invocation would read the value which should have
been set AFTER this shader invocation.
Fixes tests:
dEQP-VK.rasterization.rasterization_order_attachment_access.depth.samples_1.multi_draw_barriers
dEQP-VK.rasterization.rasterization_order_attachment_access.stencil.samples_1.multi_draw_barriers
Fixes: 71595a189a
("tu: Fix feedback loops in sysmem mode")
Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15106>
This commit is contained in:

committed by
Marge Bot

parent
d10fd5b7c9
commit
dab34bd5c8
@@ -316,7 +316,7 @@ tu6_emit_mrt(struct tu_cmd_buffer *cmd,
|
|||||||
* setting the SINGLE_PRIM_MODE field to the same value that the blob does
|
* setting the SINGLE_PRIM_MODE field to the same value that the blob does
|
||||||
* for advanced_blend in sysmem mode if a feedback loop is detected.
|
* for advanced_blend in sysmem mode if a feedback loop is detected.
|
||||||
*/
|
*/
|
||||||
if (subpass->feedback) {
|
if (subpass->feedback_loop_color || subpass->feedback_loop_ds) {
|
||||||
tu_cond_exec_start(cs, CP_COND_EXEC_0_RENDER_MODE_SYSMEM);
|
tu_cond_exec_start(cs, CP_COND_EXEC_0_RENDER_MODE_SYSMEM);
|
||||||
tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_SC_CNTL,
|
tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_SC_CNTL,
|
||||||
A6XX_GRAS_SC_CNTL_CCUSINGLECACHELINESIZE(2) |
|
A6XX_GRAS_SC_CNTL_CCUSINGLECACHELINESIZE(2) |
|
||||||
@@ -3879,7 +3879,8 @@ tu6_build_depth_plane_z_mode(struct tu_cmd_buffer *cmd)
|
|||||||
bool depth_write = tu6_writes_depth(cmd, depth_test_enable);
|
bool depth_write = tu6_writes_depth(cmd, depth_test_enable);
|
||||||
bool stencil_write = tu6_writes_stencil(cmd);
|
bool stencil_write = tu6_writes_stencil(cmd);
|
||||||
|
|
||||||
if (cmd->state.pipeline->lrz.fs_has_kill &&
|
if ((cmd->state.pipeline->lrz.fs_has_kill ||
|
||||||
|
cmd->state.pipeline->subpass_feedback_loop_ds) &&
|
||||||
(depth_write || stencil_write)) {
|
(depth_write || stencil_write)) {
|
||||||
zmode = cmd->state.lrz.valid ? A6XX_EARLY_LRZ_LATE_Z : A6XX_LATE_Z;
|
zmode = cmd->state.lrz.valid ? A6XX_EARLY_LRZ_LATE_Z : A6XX_LATE_Z;
|
||||||
}
|
}
|
||||||
|
@@ -481,7 +481,7 @@ tu_render_pass_check_feedback_loop(struct tu_render_pass *pass)
|
|||||||
continue;
|
continue;
|
||||||
for (unsigned k = 0; k < subpass->input_count; k++) {
|
for (unsigned k = 0; k < subpass->input_count; k++) {
|
||||||
if (subpass->input_attachments[k].attachment == a) {
|
if (subpass->input_attachments[k].attachment == a) {
|
||||||
subpass->feedback = true;
|
subpass->feedback_loop_color = true;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -491,7 +491,7 @@ tu_render_pass_check_feedback_loop(struct tu_render_pass *pass)
|
|||||||
for (unsigned k = 0; k < subpass->input_count; k++) {
|
for (unsigned k = 0; k < subpass->input_count; k++) {
|
||||||
if (subpass->input_attachments[k].attachment ==
|
if (subpass->input_attachments[k].attachment ==
|
||||||
subpass->depth_stencil_attachment.attachment) {
|
subpass->depth_stencil_attachment.attachment) {
|
||||||
subpass->feedback = true;
|
subpass->feedback_loop_ds = true;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -273,6 +273,8 @@ struct tu_pipeline_builder
|
|||||||
VkFormat depth_attachment_format;
|
VkFormat depth_attachment_format;
|
||||||
uint32_t render_components;
|
uint32_t render_components;
|
||||||
uint32_t multiview_mask;
|
uint32_t multiview_mask;
|
||||||
|
|
||||||
|
bool subpass_feedback_loop_ds;
|
||||||
};
|
};
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
@@ -3174,6 +3176,7 @@ tu_pipeline_builder_build(struct tu_pipeline_builder *builder,
|
|||||||
return VK_ERROR_OUT_OF_HOST_MEMORY;
|
return VK_ERROR_OUT_OF_HOST_MEMORY;
|
||||||
|
|
||||||
(*pipeline)->layout = builder->layout;
|
(*pipeline)->layout = builder->layout;
|
||||||
|
(*pipeline)->subpass_feedback_loop_ds = builder->subpass_feedback_loop_ds;
|
||||||
(*pipeline)->executables_mem_ctx = ralloc_context(NULL);
|
(*pipeline)->executables_mem_ctx = ralloc_context(NULL);
|
||||||
util_dynarray_init(&(*pipeline)->executables, (*pipeline)->executables_mem_ctx);
|
util_dynarray_init(&(*pipeline)->executables, (*pipeline)->executables_mem_ctx);
|
||||||
|
|
||||||
@@ -3287,6 +3290,8 @@ tu_pipeline_builder_init_graphics(
|
|||||||
const struct tu_subpass *subpass =
|
const struct tu_subpass *subpass =
|
||||||
&pass->subpasses[create_info->subpass];
|
&pass->subpasses[create_info->subpass];
|
||||||
|
|
||||||
|
builder->subpass_feedback_loop_ds = subpass->feedback_loop_ds;
|
||||||
|
|
||||||
builder->multiview_mask = subpass->multiview_mask;
|
builder->multiview_mask = subpass->multiview_mask;
|
||||||
|
|
||||||
builder->rasterizer_discard =
|
builder->rasterizer_discard =
|
||||||
|
@@ -1361,6 +1361,8 @@ struct tu_pipeline
|
|||||||
|
|
||||||
struct tu_lrz_pipeline lrz;
|
struct tu_lrz_pipeline lrz;
|
||||||
|
|
||||||
|
bool subpass_feedback_loop_ds;
|
||||||
|
|
||||||
/* Base drawcall cost for sysmem vs gmem autotuner */
|
/* Base drawcall cost for sysmem vs gmem autotuner */
|
||||||
uint8_t drawcall_base_cost;
|
uint8_t drawcall_base_cost;
|
||||||
|
|
||||||
@@ -1695,8 +1697,8 @@ struct tu_subpass
|
|||||||
uint32_t resolve_count;
|
uint32_t resolve_count;
|
||||||
bool resolve_depth_stencil;
|
bool resolve_depth_stencil;
|
||||||
|
|
||||||
/* True if there is any feedback loop at all. */
|
bool feedback_loop_color;
|
||||||
bool feedback;
|
bool feedback_loop_ds;
|
||||||
|
|
||||||
/* True if we must invalidate UCHE thanks to a feedback loop. */
|
/* True if we must invalidate UCHE thanks to a feedback loop. */
|
||||||
bool feedback_invalidate;
|
bool feedback_invalidate;
|
||||||
|
Reference in New Issue
Block a user