anv: Fix MI_ARB_CHECK calls in generated indirect draws optimization
According to PRMs, to use self-modifying code correctly we have to
disable preparser before jumping to the generated commands, and re-enable
it with a first command in that buffer.
Old implementation did it wrong: for both inplace and inring generation
it disabled preparser before running the generation shader, had it
disabled during generation, and re-enabled it just before jumping to
the generated commands.
This usually didn't cause any trouble, because the generation shader and
generated draws are in different BOs, and the jump distance is greater than
the command FIFO depth. But we allocate them from the same pool,
so there are rare cases where the end of the BO with generation commands,
and the beginning of the BO with generated draws are adjacent. In such
cases, the wrong commands might be fetched.
Cc: mesa-stable
Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/10162
Signed-off-by: Sviatoslav Peleshko <sviatoslav.peleshko@globallogic.com>
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26427>
(cherry picked from commit 5cb20b5edc
)
This commit is contained in:

committed by
Eric Engestrom

parent
50cbb6ae23
commit
d6711f9e2d
@@ -654,7 +654,7 @@
|
||||
"description": "anv: Fix MI_ARB_CHECK calls in generated indirect draws optimization",
|
||||
"nominated": true,
|
||||
"nomination_type": 0,
|
||||
"resolution": 0,
|
||||
"resolution": 1,
|
||||
"main_sha": null,
|
||||
"because_sha": null,
|
||||
"notes": null
|
||||
|
@@ -114,13 +114,6 @@ genX(cmd_buffer_emit_generate_draws)(struct anv_cmd_buffer *cmd_buffer,
|
||||
static void
|
||||
genX(cmd_buffer_emit_indirect_generated_draws_init)(struct anv_cmd_buffer *cmd_buffer)
|
||||
{
|
||||
#if GFX_VER >= 12
|
||||
anv_batch_emit(&cmd_buffer->batch, GENX(MI_ARB_CHECK), arb) {
|
||||
arb.PreParserDisableMask = true;
|
||||
arb.PreParserDisable = true;
|
||||
}
|
||||
#endif
|
||||
|
||||
anv_batch_emit_ensure_space(&cmd_buffer->generation.batch, 4);
|
||||
|
||||
trace_intel_begin_generate_draws(&cmd_buffer->trace);
|
||||
@@ -133,6 +126,13 @@ genX(cmd_buffer_emit_indirect_generated_draws_init)(struct anv_cmd_buffer *cmd_b
|
||||
|
||||
cmd_buffer->generation.return_addr = anv_batch_current_address(&cmd_buffer->batch);
|
||||
|
||||
#if GFX_VER >= 12
|
||||
anv_batch_emit(&cmd_buffer->batch, GENX(MI_ARB_CHECK), arb) {
|
||||
arb.PreParserDisableMask = true;
|
||||
arb.PreParserDisable = false;
|
||||
}
|
||||
#endif
|
||||
|
||||
trace_intel_end_generate_draws(&cmd_buffer->trace);
|
||||
|
||||
struct anv_device *device = cmd_buffer->device;
|
||||
@@ -354,6 +354,9 @@ genX(cmd_buffer_emit_indirect_generated_draws_inring)(struct anv_cmd_buffer *cmd
|
||||
|
||||
if (cmd_buffer->generation.ring_bo == NULL) {
|
||||
const uint32_t bo_size = align(
|
||||
#if GFX_VER >= 12
|
||||
GENX(MI_ARB_CHECK_length) * 4 +
|
||||
#endif
|
||||
draw_cmd_stride * MAX_RING_BO_ITEMS +
|
||||
#if GFX_VER == 9
|
||||
4 * MAX_RING_BO_ITEMS +
|
||||
@@ -376,6 +379,8 @@ genX(cmd_buffer_emit_indirect_generated_draws_inring)(struct anv_cmd_buffer *cmd
|
||||
/* The ring bo has the following layout:
|
||||
*
|
||||
* --------------------------------------------------
|
||||
* | MI_ARB_CHECK to resume CS prefetch (Gfx12+) |
|
||||
* |------------------------------------------------|
|
||||
* | ring_count * 3DPRIMITIVE |
|
||||
* |------------------------------------------------|
|
||||
* | jump instruction (either back to generate more |
|
||||
@@ -391,6 +396,22 @@ genX(cmd_buffer_emit_indirect_generated_draws_inring)(struct anv_cmd_buffer *cmd
|
||||
GENX(MI_BATCH_BUFFER_START_length) * 4,
|
||||
};
|
||||
|
||||
struct anv_address draw_cmds_addr = (struct anv_address) {
|
||||
.bo = cmd_buffer->generation.ring_bo,
|
||||
#if GFX_VER >= 12
|
||||
.offset = GENX(MI_ARB_CHECK_length) * 4,
|
||||
#endif
|
||||
};
|
||||
|
||||
#if GFX_VER >= 12
|
||||
struct GENX(MI_ARB_CHECK) resume_prefetch = {
|
||||
.PreParserDisableMask = true,
|
||||
.PreParserDisable = false,
|
||||
};
|
||||
GENX(MI_ARB_CHECK_pack)(NULL, cmd_buffer->generation.ring_bo->map,
|
||||
&resume_prefetch);
|
||||
#endif
|
||||
|
||||
#if GFX_VER == 9
|
||||
/* Mark the VB-0 as using the entire ring_bo, but only for the draw call
|
||||
* starting the generation batch. All the following ones will use the same
|
||||
@@ -437,16 +458,6 @@ genX(cmd_buffer_emit_indirect_generated_draws_inring)(struct anv_cmd_buffer *cmd
|
||||
*/
|
||||
struct anv_address gen_addr = anv_batch_current_address(&cmd_buffer->batch);
|
||||
|
||||
#if GFX_VER >= 12
|
||||
/* Prior to Gfx12 we cannot disable the CS prefetch but it doesn't matter
|
||||
* as the prefetch shouldn't follow the MI_BATCH_BUFFER_START.
|
||||
*/
|
||||
anv_batch_emit(&cmd_buffer->batch, GENX(MI_ARB_CHECK), arb) {
|
||||
arb.PreParserDisableMask = true;
|
||||
arb.PreParserDisable = true;
|
||||
}
|
||||
#endif
|
||||
|
||||
struct anv_simple_shader simple_state = (struct anv_simple_shader) {
|
||||
.device = device,
|
||||
.cmd_buffer = cmd_buffer,
|
||||
@@ -463,9 +474,7 @@ genX(cmd_buffer_emit_indirect_generated_draws_inring)(struct anv_cmd_buffer *cmd
|
||||
genX(cmd_buffer_emit_generate_draws)(
|
||||
cmd_buffer,
|
||||
&simple_state,
|
||||
(struct anv_address) {
|
||||
.bo = cmd_buffer->generation.ring_bo,
|
||||
},
|
||||
draw_cmds_addr,
|
||||
draw_cmd_stride,
|
||||
indirect_data_addr,
|
||||
indirect_data_stride,
|
||||
@@ -486,13 +495,6 @@ genX(cmd_buffer_emit_indirect_generated_draws_inring)(struct anv_cmd_buffer *cmd
|
||||
ANV_PIPE_CS_STALL_BIT,
|
||||
"after generation flush");
|
||||
|
||||
#if GFX_VER >= 12
|
||||
anv_batch_emit(&cmd_buffer->batch, GENX(MI_ARB_CHECK), arb) {
|
||||
arb.PreParserDisableMask = true;
|
||||
arb.PreParserDisable = false;
|
||||
}
|
||||
#endif
|
||||
|
||||
trace_intel_end_generate_draws(&cmd_buffer->trace);
|
||||
|
||||
if (cmd_buffer->state.conditional_render_enabled)
|
||||
@@ -502,6 +504,16 @@ genX(cmd_buffer_emit_indirect_generated_draws_inring)(struct anv_cmd_buffer *cmd
|
||||
genX(cmd_buffer_flush_gfx_state)(cmd_buffer);
|
||||
|
||||
if (max_draw_count > 0) {
|
||||
#if GFX_VER >= 12
|
||||
/* Prior to Gfx12 we cannot disable the CS prefetch but it doesn't matter
|
||||
* as the prefetch shouldn't follow the MI_BATCH_BUFFER_START.
|
||||
*/
|
||||
anv_batch_emit(&cmd_buffer->batch, GENX(MI_ARB_CHECK), arb) {
|
||||
arb.PreParserDisableMask = true;
|
||||
arb.PreParserDisable = true;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Jump into the ring buffer. */
|
||||
anv_batch_emit(&cmd_buffer->batch, GENX(MI_BATCH_BUFFER_START), bbs) {
|
||||
bbs.AddressSpaceIndicator = ASI_PPGTT;
|
||||
@@ -636,7 +648,7 @@ genX(cmd_buffer_flush_generated_draws)(struct anv_cmd_buffer *cmd_buffer)
|
||||
#if GFX_VER >= 12
|
||||
anv_batch_emit(batch, GENX(MI_ARB_CHECK), arb) {
|
||||
arb.PreParserDisableMask = true;
|
||||
arb.PreParserDisable = false;
|
||||
arb.PreParserDisable = true;
|
||||
}
|
||||
#else
|
||||
/* Prior to Gfx12 we cannot disable the CS prefetch but it doesn't matter
|
||||
|
Reference in New Issue
Block a user