radv: make the GDS/GDS OA buffer objects resident

GDS is used for NGG queries/streamout (GFX10+ only) and the BOs were
only added to the graphics queue because compute doesn't need them.
Though, the kernel emits a GDS switch when a queue submission doesn't
use GDS. That means that submitting jobs on the compute queue without
GDS can reset the state of the graphics queue and lead to GPU hangs.

The only viable solution for now is to make the GDS BOs resident to
avoid resetting the state between queues. This shouldn't introduce
more syncs between queues because GDS BOs are similar for both.

This fixes a GPU hang with Warhammer Chaosbane during loading time and
possibly some spurious random GPU hangs. Note that this GPU hang was
workarounded on the Steam side with RADV_DEBUG=nongg.

Cc: mesa-stable
Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19466>
This commit is contained in:
Samuel Pitoiset
2022-11-02 13:53:58 +01:00
committed by Marge Bot
parent cdc1abad7b
commit 26c8fedc1b
2 changed files with 27 additions and 10 deletions

View File

@@ -3033,10 +3033,14 @@ radv_queue_state_finish(struct radv_queue_state *queue, struct radeon_winsys *ws
ws->buffer_destroy(ws, queue->task_rings_bo);
if (queue->attr_ring_bo)
ws->buffer_destroy(ws, queue->attr_ring_bo);
if (queue->gds_bo)
if (queue->gds_bo) {
ws->buffer_make_resident(ws, queue->gds_bo, false);
ws->buffer_destroy(ws, queue->gds_bo);
if (queue->gds_oa_bo)
}
if (queue->gds_oa_bo) {
ws->buffer_make_resident(ws, queue->gds_oa_bo, false);
ws->buffer_destroy(ws, queue->gds_oa_bo);
}
if (queue->compute_scratch_bo)
ws->buffer_destroy(ws, queue->compute_scratch_bo);
}
@@ -4710,6 +4714,13 @@ radv_update_preamble_cs(struct radv_queue_state *queue, struct radv_device *devi
RADV_BO_PRIORITY_SCRATCH, 0, &gds_bo);
if (result != VK_SUCCESS)
goto fail;
/* Add the GDS BO to our global BO list to prevent the kernel to emit a GDS switch and reset
* the state when a compute queue is used.
*/
result = device->ws->buffer_make_resident(ws, gds_bo, true);
if (result != VK_SUCCESS)
goto fail;
}
if (!queue->ring_info.gds_oa && needs->gds_oa) {
@@ -4719,6 +4730,13 @@ radv_update_preamble_cs(struct radv_queue_state *queue, struct radv_device *devi
RADV_BO_PRIORITY_SCRATCH, 0, &gds_oa_bo);
if (result != VK_SUCCESS)
goto fail;
/* Add the GDS OA BO to our global BO list to prevent the kernel to emit a GDS switch and
* reset the state when a compute queue is used.
*/
result = device->ws->buffer_make_resident(ws, gds_oa_bo, true);
if (result != VK_SUCCESS)
goto fail;
}
/* Re-initialize the descriptor BO when any ring BOs changed.
@@ -4847,11 +4865,6 @@ radv_update_preamble_cs(struct radv_queue_state *queue, struct radv_device *devi
break;
}
if (gds_bo)
radv_cs_add_buffer(ws, cs, gds_bo);
if (gds_oa_bo)
radv_cs_add_buffer(ws, cs, gds_oa_bo);
if (i < 2) {
/* The two initial preambles have a cache flush at the beginning. */
const enum amd_gfx_level gfx_level = device->physical_device->rad_info.gfx_level;
@@ -4946,10 +4959,14 @@ fail:
ws->buffer_destroy(ws, task_rings_bo);
if (attr_ring_bo && attr_ring_bo != queue->attr_ring_bo)
ws->buffer_destroy(ws, attr_ring_bo);
if (gds_bo && gds_bo != queue->gds_bo)
if (gds_bo && gds_bo != queue->gds_bo) {
ws->buffer_make_resident(ws, queue->gds_bo, false);
ws->buffer_destroy(ws, gds_bo);
if (gds_oa_bo && gds_oa_bo != queue->gds_oa_bo)
}
if (gds_oa_bo && gds_oa_bo != queue->gds_oa_bo) {
ws->buffer_make_resident(ws, queue->gds_oa_bo, false);
ws->buffer_destroy(ws, gds_oa_bo);
}
return vk_error(queue, result);
}

View File

@@ -486,7 +486,7 @@ radv_amdgpu_winsys_bo_create(struct radeon_winsys *_ws, uint64_t size, unsigned
request.flags |= AMDGPU_GEM_CREATE_CPU_GTT_USWC;
if (!(flags & RADEON_FLAG_IMPLICIT_SYNC))
request.flags |= AMDGPU_GEM_CREATE_EXPLICIT_SYNC;
if (flags & RADEON_FLAG_NO_INTERPROCESS_SHARING &&
if ((initial_domain & RADEON_DOMAIN_VRAM_GTT) && (flags & RADEON_FLAG_NO_INTERPROCESS_SHARING) &&
((ws->perftest & RADV_PERFTEST_LOCAL_BOS) || (flags & RADEON_FLAG_PREFER_LOCAL_BO))) {
bo->base.is_local = true;
request.flags |= AMDGPU_GEM_CREATE_VM_ALWAYS_VALID;