anv: Fix MI_ARB_CHECK calls in generated indirect draws optimization

According to PRMs, to use self-modifying code correctly we have to
disable preparser before jumping to the generated commands, and re-enable
it with a first command in that buffer.

Old implementation did it wrong: for both inplace and inring generation
it disabled preparser before running the generation shader, had it
disabled during generation, and re-enabled it just before jumping to
the generated commands.

This usually didn't cause any trouble, because the generation shader and
generated draws are in different BOs, and the jump distance is greater than
the command FIFO depth. But we allocate them from the same pool,
so there are rare cases where the end of the BO with generation commands,
and the beginning of the BO with generated draws are adjacent. In such
cases, the wrong commands might be fetched.

Cc: mesa-stable
Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/10162
Signed-off-by: Sviatoslav Peleshko <sviatoslav.peleshko@globallogic.com>
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26427>
(cherry picked from commit 5cb20b5edc)
This commit is contained in:
Sviatoslav Peleshko
2023-11-30 00:42:07 +02:00
committed by Eric Engestrom
parent 50cbb6ae23
commit d6711f9e2d
2 changed files with 41 additions and 29 deletions

View File

@@ -654,7 +654,7 @@
"description": "anv: Fix MI_ARB_CHECK calls in generated indirect draws optimization",
"nominated": true,
"nomination_type": 0,
"resolution": 0,
"resolution": 1,
"main_sha": null,
"because_sha": null,
"notes": null

View File

@@ -114,13 +114,6 @@ genX(cmd_buffer_emit_generate_draws)(struct anv_cmd_buffer *cmd_buffer,
static void
genX(cmd_buffer_emit_indirect_generated_draws_init)(struct anv_cmd_buffer *cmd_buffer)
{
#if GFX_VER >= 12
anv_batch_emit(&cmd_buffer->batch, GENX(MI_ARB_CHECK), arb) {
arb.PreParserDisableMask = true;
arb.PreParserDisable = true;
}
#endif
anv_batch_emit_ensure_space(&cmd_buffer->generation.batch, 4);
trace_intel_begin_generate_draws(&cmd_buffer->trace);
@@ -133,6 +126,13 @@ genX(cmd_buffer_emit_indirect_generated_draws_init)(struct anv_cmd_buffer *cmd_b
cmd_buffer->generation.return_addr = anv_batch_current_address(&cmd_buffer->batch);
#if GFX_VER >= 12
anv_batch_emit(&cmd_buffer->batch, GENX(MI_ARB_CHECK), arb) {
arb.PreParserDisableMask = true;
arb.PreParserDisable = false;
}
#endif
trace_intel_end_generate_draws(&cmd_buffer->trace);
struct anv_device *device = cmd_buffer->device;
@@ -354,6 +354,9 @@ genX(cmd_buffer_emit_indirect_generated_draws_inring)(struct anv_cmd_buffer *cmd
if (cmd_buffer->generation.ring_bo == NULL) {
const uint32_t bo_size = align(
#if GFX_VER >= 12
GENX(MI_ARB_CHECK_length) * 4 +
#endif
draw_cmd_stride * MAX_RING_BO_ITEMS +
#if GFX_VER == 9
4 * MAX_RING_BO_ITEMS +
@@ -376,6 +379,8 @@ genX(cmd_buffer_emit_indirect_generated_draws_inring)(struct anv_cmd_buffer *cmd
/* The ring bo has the following layout:
*
* --------------------------------------------------
* | MI_ARB_CHECK to resume CS prefetch (Gfx12+) |
* |------------------------------------------------|
* | ring_count * 3DPRIMITIVE |
* |------------------------------------------------|
* | jump instruction (either back to generate more |
@@ -391,6 +396,22 @@ genX(cmd_buffer_emit_indirect_generated_draws_inring)(struct anv_cmd_buffer *cmd
GENX(MI_BATCH_BUFFER_START_length) * 4,
};
struct anv_address draw_cmds_addr = (struct anv_address) {
.bo = cmd_buffer->generation.ring_bo,
#if GFX_VER >= 12
.offset = GENX(MI_ARB_CHECK_length) * 4,
#endif
};
#if GFX_VER >= 12
struct GENX(MI_ARB_CHECK) resume_prefetch = {
.PreParserDisableMask = true,
.PreParserDisable = false,
};
GENX(MI_ARB_CHECK_pack)(NULL, cmd_buffer->generation.ring_bo->map,
&resume_prefetch);
#endif
#if GFX_VER == 9
/* Mark the VB-0 as using the entire ring_bo, but only for the draw call
* starting the generation batch. All the following ones will use the same
@@ -437,16 +458,6 @@ genX(cmd_buffer_emit_indirect_generated_draws_inring)(struct anv_cmd_buffer *cmd
*/
struct anv_address gen_addr = anv_batch_current_address(&cmd_buffer->batch);
#if GFX_VER >= 12
/* Prior to Gfx12 we cannot disable the CS prefetch but it doesn't matter
* as the prefetch shouldn't follow the MI_BATCH_BUFFER_START.
*/
anv_batch_emit(&cmd_buffer->batch, GENX(MI_ARB_CHECK), arb) {
arb.PreParserDisableMask = true;
arb.PreParserDisable = true;
}
#endif
struct anv_simple_shader simple_state = (struct anv_simple_shader) {
.device = device,
.cmd_buffer = cmd_buffer,
@@ -463,9 +474,7 @@ genX(cmd_buffer_emit_indirect_generated_draws_inring)(struct anv_cmd_buffer *cmd
genX(cmd_buffer_emit_generate_draws)(
cmd_buffer,
&simple_state,
(struct anv_address) {
.bo = cmd_buffer->generation.ring_bo,
},
draw_cmds_addr,
draw_cmd_stride,
indirect_data_addr,
indirect_data_stride,
@@ -486,13 +495,6 @@ genX(cmd_buffer_emit_indirect_generated_draws_inring)(struct anv_cmd_buffer *cmd
ANV_PIPE_CS_STALL_BIT,
"after generation flush");
#if GFX_VER >= 12
anv_batch_emit(&cmd_buffer->batch, GENX(MI_ARB_CHECK), arb) {
arb.PreParserDisableMask = true;
arb.PreParserDisable = false;
}
#endif
trace_intel_end_generate_draws(&cmd_buffer->trace);
if (cmd_buffer->state.conditional_render_enabled)
@@ -502,6 +504,16 @@ genX(cmd_buffer_emit_indirect_generated_draws_inring)(struct anv_cmd_buffer *cmd
genX(cmd_buffer_flush_gfx_state)(cmd_buffer);
if (max_draw_count > 0) {
#if GFX_VER >= 12
/* Prior to Gfx12 we cannot disable the CS prefetch but it doesn't matter
* as the prefetch shouldn't follow the MI_BATCH_BUFFER_START.
*/
anv_batch_emit(&cmd_buffer->batch, GENX(MI_ARB_CHECK), arb) {
arb.PreParserDisableMask = true;
arb.PreParserDisable = true;
}
#endif
/* Jump into the ring buffer. */
anv_batch_emit(&cmd_buffer->batch, GENX(MI_BATCH_BUFFER_START), bbs) {
bbs.AddressSpaceIndicator = ASI_PPGTT;
@@ -636,7 +648,7 @@ genX(cmd_buffer_flush_generated_draws)(struct anv_cmd_buffer *cmd_buffer)
#if GFX_VER >= 12
anv_batch_emit(batch, GENX(MI_ARB_CHECK), arb) {
arb.PreParserDisableMask = true;
arb.PreParserDisable = false;
arb.PreParserDisable = true;
}
#else
/* Prior to Gfx12 we cannot disable the CS prefetch but it doesn't matter