radv: make the GDS/GDS OA buffer objects resident
GDS is used for NGG queries/streamout (GFX10+ only) and the BOs were only added to the graphics queue because compute doesn't need them. Though, the kernel emits a GDS switch when a queue submission doesn't use GDS. That means that submitting jobs on the compute queue without GDS can reset the state of the graphics queue and lead to GPU hangs. The only viable solution for now is to make the GDS BOs resident to avoid resetting the state between queues. This shouldn't introduce more syncs between queues because GDS BOs are similar for both. This fixes a GPU hang with Warhammer Chaosbane during loading time and possibly some spurious random GPU hangs. Note that this GPU hang was workarounded on the Steam side with RADV_DEBUG=nongg. Cc: mesa-stable Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19466>
This commit is contained in:

committed by
Marge Bot

parent
cdc1abad7b
commit
26c8fedc1b
@@ -3033,10 +3033,14 @@ radv_queue_state_finish(struct radv_queue_state *queue, struct radeon_winsys *ws
|
||||
ws->buffer_destroy(ws, queue->task_rings_bo);
|
||||
if (queue->attr_ring_bo)
|
||||
ws->buffer_destroy(ws, queue->attr_ring_bo);
|
||||
if (queue->gds_bo)
|
||||
if (queue->gds_bo) {
|
||||
ws->buffer_make_resident(ws, queue->gds_bo, false);
|
||||
ws->buffer_destroy(ws, queue->gds_bo);
|
||||
if (queue->gds_oa_bo)
|
||||
}
|
||||
if (queue->gds_oa_bo) {
|
||||
ws->buffer_make_resident(ws, queue->gds_oa_bo, false);
|
||||
ws->buffer_destroy(ws, queue->gds_oa_bo);
|
||||
}
|
||||
if (queue->compute_scratch_bo)
|
||||
ws->buffer_destroy(ws, queue->compute_scratch_bo);
|
||||
}
|
||||
@@ -4710,6 +4714,13 @@ radv_update_preamble_cs(struct radv_queue_state *queue, struct radv_device *devi
|
||||
RADV_BO_PRIORITY_SCRATCH, 0, &gds_bo);
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail;
|
||||
|
||||
/* Add the GDS BO to our global BO list to prevent the kernel to emit a GDS switch and reset
|
||||
* the state when a compute queue is used.
|
||||
*/
|
||||
result = device->ws->buffer_make_resident(ws, gds_bo, true);
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail;
|
||||
}
|
||||
|
||||
if (!queue->ring_info.gds_oa && needs->gds_oa) {
|
||||
@@ -4719,6 +4730,13 @@ radv_update_preamble_cs(struct radv_queue_state *queue, struct radv_device *devi
|
||||
RADV_BO_PRIORITY_SCRATCH, 0, &gds_oa_bo);
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail;
|
||||
|
||||
/* Add the GDS OA BO to our global BO list to prevent the kernel to emit a GDS switch and
|
||||
* reset the state when a compute queue is used.
|
||||
*/
|
||||
result = device->ws->buffer_make_resident(ws, gds_oa_bo, true);
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail;
|
||||
}
|
||||
|
||||
/* Re-initialize the descriptor BO when any ring BOs changed.
|
||||
@@ -4847,11 +4865,6 @@ radv_update_preamble_cs(struct radv_queue_state *queue, struct radv_device *devi
|
||||
break;
|
||||
}
|
||||
|
||||
if (gds_bo)
|
||||
radv_cs_add_buffer(ws, cs, gds_bo);
|
||||
if (gds_oa_bo)
|
||||
radv_cs_add_buffer(ws, cs, gds_oa_bo);
|
||||
|
||||
if (i < 2) {
|
||||
/* The two initial preambles have a cache flush at the beginning. */
|
||||
const enum amd_gfx_level gfx_level = device->physical_device->rad_info.gfx_level;
|
||||
@@ -4946,10 +4959,14 @@ fail:
|
||||
ws->buffer_destroy(ws, task_rings_bo);
|
||||
if (attr_ring_bo && attr_ring_bo != queue->attr_ring_bo)
|
||||
ws->buffer_destroy(ws, attr_ring_bo);
|
||||
if (gds_bo && gds_bo != queue->gds_bo)
|
||||
if (gds_bo && gds_bo != queue->gds_bo) {
|
||||
ws->buffer_make_resident(ws, queue->gds_bo, false);
|
||||
ws->buffer_destroy(ws, gds_bo);
|
||||
if (gds_oa_bo && gds_oa_bo != queue->gds_oa_bo)
|
||||
}
|
||||
if (gds_oa_bo && gds_oa_bo != queue->gds_oa_bo) {
|
||||
ws->buffer_make_resident(ws, queue->gds_oa_bo, false);
|
||||
ws->buffer_destroy(ws, gds_oa_bo);
|
||||
}
|
||||
|
||||
return vk_error(queue, result);
|
||||
}
|
||||
|
@@ -486,7 +486,7 @@ radv_amdgpu_winsys_bo_create(struct radeon_winsys *_ws, uint64_t size, unsigned
|
||||
request.flags |= AMDGPU_GEM_CREATE_CPU_GTT_USWC;
|
||||
if (!(flags & RADEON_FLAG_IMPLICIT_SYNC))
|
||||
request.flags |= AMDGPU_GEM_CREATE_EXPLICIT_SYNC;
|
||||
if (flags & RADEON_FLAG_NO_INTERPROCESS_SHARING &&
|
||||
if ((initial_domain & RADEON_DOMAIN_VRAM_GTT) && (flags & RADEON_FLAG_NO_INTERPROCESS_SHARING) &&
|
||||
((ws->perftest & RADV_PERFTEST_LOCAL_BOS) || (flags & RADEON_FLAG_PREFER_LOCAL_BO))) {
|
||||
bo->base.is_local = true;
|
||||
request.flags |= AMDGPU_GEM_CREATE_VM_ALWAYS_VALID;
|
||||
|
Reference in New Issue
Block a user