intel: fix null render target setup logic

Or current render target cache setting is to key on the binding table
index, meaning the HW associates a number in the range [0, 7] to a
RENDER_SURFACE_STATE description. If you want change the render target
0 between 2 draw calls, you need to insert a PIPE_CONTROL in between
the 2 draw calls with pb-stall + rt-flush in order to flush an writes
to a previous RENDER_SURFACE_STATE that has now becomed disassociated
with the [0, 7] number.

This PIPE_CONTROL taking care of the flush is dealt with in
cmd_buffer_maybe_flush_rt_writes(). This function diffs the current
BTI setup for render targets (first 0 to 7 BTIs) with what the next
fragment shader wants.

The issue here is we might have a render pass with 0 color attachments
and yet in 98cdb9349a we added one pointing to the render target 0,
but in the emit_binding_table() when we finally program the BTI, we
check the render pass color count and program a null surface state
instead of an actual surface state. And this leads to hangs because
the render target cache will end up with inconsistent state data.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Fixes: 98cdb9349a ("anv: ensure null-rt bit in compiler isn't used when there is ds attachment")
Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/12955
Reviewed-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34603>
(cherry picked from commit 63f633557ff53726b7bc8e2292f330ace8624be0)
This commit is contained in:
Lionel Landwerlin
2025-04-18 12:06:25 +03:00
committed by Eric Engestrom
parent 234d66a1e2
commit d8cf36fbb1
4 changed files with 24 additions and 26 deletions

View File

@@ -314,7 +314,7 @@
"description": "intel: fix null render target setup logic", "description": "intel: fix null render target setup logic",
"nominated": true, "nominated": true,
"nomination_type": 2, "nomination_type": 2,
"resolution": 0, "resolution": 1,
"main_sha": null, "main_sha": null,
"because_sha": "98cdb9349a7fa181c3895655d217589f909a7beb", "because_sha": "98cdb9349a7fa181c3895655d217589f909a7beb",
"notes": null "notes": null

View File

@@ -154,6 +154,13 @@ brw_nir_fs_needs_null_rt(const struct intel_device_info *devinfo,
if (devinfo->ver < 11) if (devinfo->ver < 11)
return true; return true;
/* Depth/Stencil needs a valid render target even if there is no color
* output.
*/
if (nir->info.outputs_written & (BITFIELD_BIT(FRAG_RESULT_DEPTH) |
BITFIELD_BIT(FRAG_RESULT_STENCIL)))
return true;
uint64_t relevant_outputs = 0; uint64_t relevant_outputs = 0;
if (multisample_fbo) if (multisample_fbo)
relevant_outputs |= BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK); relevant_outputs |= BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK);

View File

@@ -456,15 +456,6 @@ rp_color_mask(const struct vk_render_pass_state *rp)
color_mask |= BITFIELD_BIT(i); color_mask |= BITFIELD_BIT(i);
} }
/* If there is depth/stencil attachment, even if the fragment shader
* doesn't write the depth/stencil output, we need a valid render target so
* that the compiler doesn't use the null-rt which would cull the
* depth/stencil output.
*/
if (rp->depth_attachment_format != VK_FORMAT_UNDEFINED ||
rp->stencil_attachment_format != VK_FORMAT_UNDEFINED)
color_mask |= 1;
return color_mask; return color_mask;
} }
@@ -1481,23 +1472,13 @@ anv_pipeline_link_fs(const struct brw_compiler *compiler,
const struct vk_render_pass_state *rp) const struct vk_render_pass_state *rp)
{ {
/* Initially the valid outputs value is set to all possible render targets /* Initially the valid outputs value is set to all possible render targets
* valid (see populate_wm_prog_key()), before we look at the shader * valid (see populate_wm_prog_key()), because we're not looking at the
* variables. Here we look at the output variables of the shader an compute * shader code yet. Here we look at the output written to get a correct
* a correct number of render target outputs. * number of render target outputs.
*/ */
stage->key.wm.color_outputs_valid = 0; const uint64_t rt_mask =
nir_foreach_shader_out_variable_safe(var, stage->nir) { stage->nir->info.outputs_written >> FRAG_RESULT_DATA0;
if (var->data.location < FRAG_RESULT_DATA0) stage->key.wm.color_outputs_valid = rt_mask & rp_color_mask(rp);
continue;
const unsigned rt = var->data.location - FRAG_RESULT_DATA0;
const unsigned array_len =
glsl_type_is_array(var->type) ? glsl_get_length(var->type) : 1;
assert(rt + array_len <= MAX_RTS);
stage->key.wm.color_outputs_valid |= BITFIELD_RANGE(rt, array_len);
}
stage->key.wm.color_outputs_valid &= rp_color_mask(rp);
stage->key.wm.nr_color_regions = stage->key.wm.nr_color_regions =
util_last_bit(stage->key.wm.color_outputs_valid); util_last_bit(stage->key.wm.color_outputs_valid);
@@ -1527,6 +1508,9 @@ anv_pipeline_link_fs(const struct brw_compiler *compiler,
compiler->devinfo, stage->nir, compiler->devinfo, stage->nir,
stage->key.wm.multisample_fbo != INTEL_NEVER, stage->key.wm.multisample_fbo != INTEL_NEVER,
stage->key.wm.alpha_to_coverage != INTEL_NEVER)) { stage->key.wm.alpha_to_coverage != INTEL_NEVER)) {
/* Ensure the shader doesn't discard the writes */
stage->key.wm.color_outputs_valid = 0x1;
stage->key.wm.nr_color_regions = 1;
/* Setup a null render target */ /* Setup a null render target */
rt_bindings[0] = (struct anv_pipeline_binding) { rt_bindings[0] = (struct anv_pipeline_binding) {
.set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS, .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS,

View File

@@ -270,7 +270,14 @@ struct intel_perf_query_result;
#define ANV_TRTT_L1_NULL_TILE_VAL 0 #define ANV_TRTT_L1_NULL_TILE_VAL 0
#define ANV_TRTT_L1_INVALID_TILE_VAL 1 #define ANV_TRTT_L1_INVALID_TILE_VAL 1
/* The binding table entry id disabled, the shader can write to it and the
* driver should use a null surface state so that writes are discarded.
*/
#define ANV_COLOR_OUTPUT_DISABLED (0xff) #define ANV_COLOR_OUTPUT_DISABLED (0xff)
/* The binding table entry id unused, the shader does not write to it and the
* driver can leave whatever surface state was used before. Transitioning
* to/from this entry does not require render target cache flush.
*/
#define ANV_COLOR_OUTPUT_UNUSED (0xfe) #define ANV_COLOR_OUTPUT_UNUSED (0xfe)
static inline uint32_t static inline uint32_t