anv: wait for CS write completion before executing secondary

Got a error state on DG2 with a jump to secondary. The secondary is
empty and padded with MI_NOOPs to workaround the CS prefetching.
According to the error state, the return jump address from the
secondary to the primary is 0x0. The ACTHD register value is 0x10, so
it seems that the command streamer indeed jumped to 0x0 and hanged on
a few dwords after that.

The return address should have been set edited by a previous
MI_STORE_DATA_IMM instruction. So it appears it did not complete in
time for the command stream to catch it. On Gfx12+ this can happend if
we do not set ForceWriteCompletionCheck.

This change also takes the opportunity to remove the padding MI_NOOPs
at the end of secondaries on Gfx12+ by using disabling the prefetching
just before jumping into secondaries and reenabling it at the
beginning of each secondary.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Cc: mesa-stable
Reviewed-by: Ivan Briano <ivan.briano@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26665>
(cherry picked from commit 6a92af158d)
This commit is contained in:
Lionel Landwerlin
2023-12-13 11:45:31 +02:00
committed by Eric Engestrom
parent 44440e5aab
commit 6664fc4162
4 changed files with 72 additions and 32 deletions

View File

@@ -1254,7 +1254,7 @@
"description": "anv: wait for CS write completion before executing secondary",
"nominated": true,
"nomination_type": 0,
"resolution": 0,
"resolution": 1,
"main_sha": null,
"because_sha": null,
"notes": ""

View File

@@ -1016,27 +1016,9 @@ anv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer *cmd_buffer)
const uint32_t length = cmd_buffer->batch.next - cmd_buffer->batch.start;
if (cmd_buffer->device->physical->use_call_secondary) {
cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_CALL_AND_RETURN;
/* If the secondary command buffer begins & ends in the same BO and
* its length is less than the length of CS prefetch, add some NOOPs
* instructions so the last MI_BATCH_BUFFER_START is outside the CS
* prefetch.
*/
if (cmd_buffer->batch_bos.next == cmd_buffer->batch_bos.prev) {
const enum intel_engine_class engine_class = cmd_buffer->queue_family->engine_class;
/* Careful to have everything in signed integer. */
int32_t prefetch_len = devinfo->engine_class_prefetch[engine_class];
int32_t batch_len = cmd_buffer->batch.next - cmd_buffer->batch.start;
for (int32_t i = 0; i < (prefetch_len - batch_len); i += 4)
anv_batch_emit(&cmd_buffer->batch, GFX9_MI_NOOP, noop);
}
void *jump_addr =
anv_batch_emitn(&cmd_buffer->batch,
GFX9_MI_BATCH_BUFFER_START_length,
GFX9_MI_BATCH_BUFFER_START,
.AddressSpaceIndicator = ASI_PPGTT,
.SecondLevelBatchBuffer = Firstlevelbatch) +
anv_genX(devinfo, batch_emit_return)(&cmd_buffer->batch) +
(GFX9_MI_BATCH_BUFFER_START_BatchBufferStartAddress_start / 8);
cmd_buffer->return_addr = anv_batch_address(&cmd_buffer->batch, jump_addr);
@@ -1156,18 +1138,10 @@ anv_cmd_buffer_add_secondary(struct anv_cmd_buffer *primary,
struct anv_batch_bo *first_bbo =
list_first_entry(&secondary->batch_bos, struct anv_batch_bo, link);
uint64_t *write_return_addr =
anv_batch_emitn(&primary->batch,
GFX9_MI_STORE_DATA_IMM_length + 1 /* QWord write */,
GFX9_MI_STORE_DATA_IMM,
.Address = secondary->return_addr)
+ (GFX9_MI_STORE_DATA_IMM_ImmediateData_start / 8);
emit_batch_buffer_start(&primary->batch, first_bbo->bo, 0);
*write_return_addr =
anv_address_physical(anv_batch_address(&primary->batch,
primary->batch.next));
anv_genX(primary->device->info, batch_emit_secondary_call)(
&primary->batch,
(struct anv_address) { .bo = first_bbo->bo },
secondary->return_addr);
anv_cmd_buffer_add_seen_bbos(primary, &secondary->batch_bos);
break;

View File

@@ -173,6 +173,12 @@ void genX(cmd_buffer_dispatch_kernel)(struct anv_cmd_buffer *cmd_buffer,
void genX(blorp_exec)(struct blorp_batch *batch,
const struct blorp_params *params);
void genX(batch_emit_secondary_call)(struct anv_batch *batch,
struct anv_address secondary_addr,
struct anv_address secondary_return_addr);
void *genX(batch_emit_return)(struct anv_batch *batch);
void genX(cmd_emit_timestamp)(struct anv_batch *batch,
struct anv_device *device,
struct anv_address addr,

View File

@@ -3295,6 +3295,19 @@ genX(BeginCommandBuffer)(
if (cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY)
cmd_buffer->usage_flags &= ~VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT;
#if GFX_VER >= 12
/* Reenable prefetching at the beginning of secondary command buffers. We
* do this so that the return instruction edition is not prefetched before
* completion.
*/
if (cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_SECONDARY) {
anv_batch_emit(&cmd_buffer->batch, GENX(MI_ARB_CHECK), arb) {
arb.PreParserDisableMask = true;
arb.PreParserDisable = false;
}
}
#endif
trace_intel_begin_cmd_buffer(&cmd_buffer->trace);
if (anv_cmd_buffer_is_video_queue(cmd_buffer) ||
@@ -8130,6 +8143,53 @@ void genX(cmd_emit_timestamp)(struct anv_batch *batch,
}
}
void genX(batch_emit_secondary_call)(struct anv_batch *batch,
struct anv_address secondary_addr,
struct anv_address secondary_return_addr)
{
/* Emit a write to change the return address of the secondary */
uint64_t *write_return_addr =
anv_batch_emitn(batch,
GENX(MI_STORE_DATA_IMM_length) + 1 /* QWord write */,
GENX(MI_STORE_DATA_IMM),
#if GFX_VER >= 12
.ForceWriteCompletionCheck = true,
#endif
.Address = secondary_return_addr) +
GENX(MI_STORE_DATA_IMM_ImmediateData_start) / 8;
#if GFX_VER >= 12
/* Disable prefetcher before jumping into a secondary */
anv_batch_emit(batch, GENX(MI_ARB_CHECK), arb) {
arb.PreParserDisableMask = true;
arb.PreParserDisable = true;
}
#endif
/* Jump into the secondary */
anv_batch_emit(batch, GENX(MI_BATCH_BUFFER_START), bbs) {
bbs.AddressSpaceIndicator = ASI_PPGTT;
bbs.SecondLevelBatchBuffer = Firstlevelbatch;
bbs.BatchBufferStartAddress = secondary_addr;
}
/* Replace the return address written by the MI_STORE_DATA_IMM above with
* the primary's current batch address (immediately after the jump).
*/
*write_return_addr =
anv_address_physical(anv_batch_current_address(batch));
}
void *
genX(batch_emit_return)(struct anv_batch *batch)
{
return anv_batch_emitn(batch,
GENX(MI_BATCH_BUFFER_START_length),
GENX(MI_BATCH_BUFFER_START),
.AddressSpaceIndicator = ASI_PPGTT,
.SecondLevelBatchBuffer = Firstlevelbatch);
}
void
genX(batch_emit_dummy_post_sync_op)(struct anv_batch *batch,
struct anv_device *device,