radv: emit a dummy ZPASS_DONE to prevent GPU hangs on GFX9
A ZPASS_DONE or PIXEL_STAT_DUMP_EVENT (of the DB occlusion counters) must immediately precede every timestamp event to prevent a GPU hang on GFX9. Cc: 18.1 <mesa-stable@lists.freedesktop.org> Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
This commit is contained in:
@@ -319,11 +319,21 @@ radv_reset_cmd_buffer(struct radv_cmd_buffer *cmd_buffer)
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
|
if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
|
||||||
|
unsigned num_db = cmd_buffer->device->physical_device->rad_info.num_render_backends;
|
||||||
|
unsigned eop_bug_offset;
|
||||||
void *fence_ptr;
|
void *fence_ptr;
|
||||||
|
|
||||||
radv_cmd_buffer_upload_alloc(cmd_buffer, 8, 0,
|
radv_cmd_buffer_upload_alloc(cmd_buffer, 8, 0,
|
||||||
&cmd_buffer->gfx9_fence_offset,
|
&cmd_buffer->gfx9_fence_offset,
|
||||||
&fence_ptr);
|
&fence_ptr);
|
||||||
cmd_buffer->gfx9_fence_bo = cmd_buffer->upload.upload_bo;
|
cmd_buffer->gfx9_fence_bo = cmd_buffer->upload.upload_bo;
|
||||||
|
|
||||||
|
/* Allocate a buffer for the EOP bug on GFX9. */
|
||||||
|
radv_cmd_buffer_upload_alloc(cmd_buffer, 16 * num_db, 0,
|
||||||
|
&eop_bug_offset, &fence_ptr);
|
||||||
|
cmd_buffer->gfx9_eop_bug_va =
|
||||||
|
radv_buffer_get_va(cmd_buffer->upload.upload_bo);
|
||||||
|
cmd_buffer->gfx9_eop_bug_va += eop_bug_offset;
|
||||||
}
|
}
|
||||||
|
|
||||||
cmd_buffer->status = RADV_CMD_BUFFER_STATUS_INITIAL;
|
cmd_buffer->status = RADV_CMD_BUFFER_STATUS_INITIAL;
|
||||||
@@ -473,7 +483,7 @@ radv_cmd_buffer_after_draw(struct radv_cmd_buffer *cmd_buffer,
|
|||||||
cmd_buffer->device->physical_device->rad_info.chip_class,
|
cmd_buffer->device->physical_device->rad_info.chip_class,
|
||||||
ptr, va,
|
ptr, va,
|
||||||
radv_cmd_buffer_uses_mec(cmd_buffer),
|
radv_cmd_buffer_uses_mec(cmd_buffer),
|
||||||
flags);
|
flags, cmd_buffer->gfx9_eop_bug_va);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (unlikely(cmd_buffer->device->trace_bo))
|
if (unlikely(cmd_buffer->device->trace_bo))
|
||||||
@@ -4357,7 +4367,8 @@ static void write_event(struct radv_cmd_buffer *cmd_buffer,
|
|||||||
cmd_buffer->device->physical_device->rad_info.chip_class,
|
cmd_buffer->device->physical_device->rad_info.chip_class,
|
||||||
radv_cmd_buffer_uses_mec(cmd_buffer),
|
radv_cmd_buffer_uses_mec(cmd_buffer),
|
||||||
V_028A90_BOTTOM_OF_PIPE_TS, 0,
|
V_028A90_BOTTOM_OF_PIPE_TS, 0,
|
||||||
EOP_DATA_SEL_VALUE_32BIT, va, 2, value);
|
EOP_DATA_SEL_VALUE_32BIT, va, 2, value,
|
||||||
|
cmd_buffer->gfx9_eop_bug_va);
|
||||||
}
|
}
|
||||||
|
|
||||||
assert(cmd_buffer->cs->cdw <= cdw_max);
|
assert(cmd_buffer->cs->cdw <= cdw_max);
|
||||||
|
@@ -2240,7 +2240,7 @@ radv_get_preamble_cs(struct radv_queue *queue,
|
|||||||
RADV_CMD_FLAG_INV_SMEM_L1 |
|
RADV_CMD_FLAG_INV_SMEM_L1 |
|
||||||
RADV_CMD_FLAG_INV_VMEM_L1 |
|
RADV_CMD_FLAG_INV_VMEM_L1 |
|
||||||
RADV_CMD_FLAG_INV_GLOBAL_L2 |
|
RADV_CMD_FLAG_INV_GLOBAL_L2 |
|
||||||
RADV_CMD_FLAG_START_PIPELINE_STATS);
|
RADV_CMD_FLAG_START_PIPELINE_STATS, 0);
|
||||||
} else if (i == 1) {
|
} else if (i == 1) {
|
||||||
si_cs_emit_cache_flush(cs,
|
si_cs_emit_cache_flush(cs,
|
||||||
queue->device->physical_device->rad_info.chip_class,
|
queue->device->physical_device->rad_info.chip_class,
|
||||||
@@ -2251,7 +2251,7 @@ radv_get_preamble_cs(struct radv_queue *queue,
|
|||||||
RADV_CMD_FLAG_INV_SMEM_L1 |
|
RADV_CMD_FLAG_INV_SMEM_L1 |
|
||||||
RADV_CMD_FLAG_INV_VMEM_L1 |
|
RADV_CMD_FLAG_INV_VMEM_L1 |
|
||||||
RADV_CMD_FLAG_INV_GLOBAL_L2 |
|
RADV_CMD_FLAG_INV_GLOBAL_L2 |
|
||||||
RADV_CMD_FLAG_START_PIPELINE_STATS);
|
RADV_CMD_FLAG_START_PIPELINE_STATS, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!queue->device->ws->cs_finalize(cs))
|
if (!queue->device->ws->cs_finalize(cs))
|
||||||
|
@@ -1044,6 +1044,7 @@ struct radv_cmd_buffer {
|
|||||||
uint32_t gfx9_fence_offset;
|
uint32_t gfx9_fence_offset;
|
||||||
struct radeon_winsys_bo *gfx9_fence_bo;
|
struct radeon_winsys_bo *gfx9_fence_bo;
|
||||||
uint32_t gfx9_fence_idx;
|
uint32_t gfx9_fence_idx;
|
||||||
|
uint64_t gfx9_eop_bug_va;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Whether a query pool has been resetted and we have to flush caches.
|
* Whether a query pool has been resetted and we have to flush caches.
|
||||||
@@ -1075,7 +1076,8 @@ void si_cs_emit_write_event_eop(struct radeon_cmdbuf *cs,
|
|||||||
unsigned data_sel,
|
unsigned data_sel,
|
||||||
uint64_t va,
|
uint64_t va,
|
||||||
uint32_t old_fence,
|
uint32_t old_fence,
|
||||||
uint32_t new_fence);
|
uint32_t new_fence,
|
||||||
|
uint64_t gfx9_eop_bug_va);
|
||||||
|
|
||||||
void si_emit_wait_fence(struct radeon_cmdbuf *cs,
|
void si_emit_wait_fence(struct radeon_cmdbuf *cs,
|
||||||
uint64_t va, uint32_t ref,
|
uint64_t va, uint32_t ref,
|
||||||
@@ -1084,7 +1086,8 @@ void si_cs_emit_cache_flush(struct radeon_cmdbuf *cs,
|
|||||||
enum chip_class chip_class,
|
enum chip_class chip_class,
|
||||||
uint32_t *fence_ptr, uint64_t va,
|
uint32_t *fence_ptr, uint64_t va,
|
||||||
bool is_mec,
|
bool is_mec,
|
||||||
enum radv_cmd_flush_bits flush_bits);
|
enum radv_cmd_flush_bits flush_bits,
|
||||||
|
uint64_t gfx9_eop_bug_va);
|
||||||
void si_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer);
|
void si_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer);
|
||||||
void si_emit_set_predication_state(struct radv_cmd_buffer *cmd_buffer, uint64_t va);
|
void si_emit_set_predication_state(struct radv_cmd_buffer *cmd_buffer, uint64_t va);
|
||||||
void si_cp_dma_buffer_copy(struct radv_cmd_buffer *cmd_buffer,
|
void si_cp_dma_buffer_copy(struct radv_cmd_buffer *cmd_buffer,
|
||||||
|
@@ -1180,7 +1180,8 @@ static void emit_end_query(struct radv_cmd_buffer *cmd_buffer,
|
|||||||
radv_cmd_buffer_uses_mec(cmd_buffer),
|
radv_cmd_buffer_uses_mec(cmd_buffer),
|
||||||
V_028A90_BOTTOM_OF_PIPE_TS, 0,
|
V_028A90_BOTTOM_OF_PIPE_TS, 0,
|
||||||
EOP_DATA_SEL_VALUE_32BIT,
|
EOP_DATA_SEL_VALUE_32BIT,
|
||||||
avail_va, 0, 1);
|
avail_va, 0, 1,
|
||||||
|
cmd_buffer->gfx9_eop_bug_va);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
unreachable("ending unhandled query type");
|
unreachable("ending unhandled query type");
|
||||||
@@ -1303,13 +1304,15 @@ void radv_CmdWriteTimestamp(
|
|||||||
mec,
|
mec,
|
||||||
V_028A90_BOTTOM_OF_PIPE_TS, 0,
|
V_028A90_BOTTOM_OF_PIPE_TS, 0,
|
||||||
EOP_DATA_SEL_TIMESTAMP,
|
EOP_DATA_SEL_TIMESTAMP,
|
||||||
query_va, 0, 0);
|
query_va, 0, 0,
|
||||||
|
cmd_buffer->gfx9_eop_bug_va);
|
||||||
si_cs_emit_write_event_eop(cs,
|
si_cs_emit_write_event_eop(cs,
|
||||||
cmd_buffer->device->physical_device->rad_info.chip_class,
|
cmd_buffer->device->physical_device->rad_info.chip_class,
|
||||||
mec,
|
mec,
|
||||||
V_028A90_BOTTOM_OF_PIPE_TS, 0,
|
V_028A90_BOTTOM_OF_PIPE_TS, 0,
|
||||||
EOP_DATA_SEL_VALUE_32BIT,
|
EOP_DATA_SEL_VALUE_32BIT,
|
||||||
avail_va, 0, 1);
|
avail_va, 0, 1,
|
||||||
|
cmd_buffer->gfx9_eop_bug_va);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
query_va += pool->stride;
|
query_va += pool->stride;
|
||||||
|
@@ -679,7 +679,8 @@ void si_cs_emit_write_event_eop(struct radeon_cmdbuf *cs,
|
|||||||
unsigned data_sel,
|
unsigned data_sel,
|
||||||
uint64_t va,
|
uint64_t va,
|
||||||
uint32_t old_fence,
|
uint32_t old_fence,
|
||||||
uint32_t new_fence)
|
uint32_t new_fence,
|
||||||
|
uint64_t gfx9_eop_bug_va)
|
||||||
{
|
{
|
||||||
unsigned op = EVENT_TYPE(event) |
|
unsigned op = EVENT_TYPE(event) |
|
||||||
EVENT_INDEX(5) |
|
EVENT_INDEX(5) |
|
||||||
@@ -693,6 +694,17 @@ void si_cs_emit_write_event_eop(struct radeon_cmdbuf *cs,
|
|||||||
sel |= EOP_INT_SEL(EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM);
|
sel |= EOP_INT_SEL(EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM);
|
||||||
|
|
||||||
if (chip_class >= GFX9 || is_gfx8_mec) {
|
if (chip_class >= GFX9 || is_gfx8_mec) {
|
||||||
|
/* A ZPASS_DONE or PIXEL_STAT_DUMP_EVENT (of the DB occlusion
|
||||||
|
* counters) must immediately precede every timestamp event to
|
||||||
|
* prevent a GPU hang on GFX9.
|
||||||
|
*/
|
||||||
|
if (chip_class == GFX9) {
|
||||||
|
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
|
||||||
|
radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1));
|
||||||
|
radeon_emit(cs, gfx9_eop_bug_va);
|
||||||
|
radeon_emit(cs, gfx9_eop_bug_va >> 32);
|
||||||
|
}
|
||||||
|
|
||||||
radeon_emit(cs, PKT3(PKT3_RELEASE_MEM, is_gfx8_mec ? 5 : 6, false));
|
radeon_emit(cs, PKT3(PKT3_RELEASE_MEM, is_gfx8_mec ? 5 : 6, false));
|
||||||
radeon_emit(cs, op);
|
radeon_emit(cs, op);
|
||||||
radeon_emit(cs, sel);
|
radeon_emit(cs, sel);
|
||||||
@@ -772,7 +784,8 @@ si_cs_emit_cache_flush(struct radeon_cmdbuf *cs,
|
|||||||
uint32_t *flush_cnt,
|
uint32_t *flush_cnt,
|
||||||
uint64_t flush_va,
|
uint64_t flush_va,
|
||||||
bool is_mec,
|
bool is_mec,
|
||||||
enum radv_cmd_flush_bits flush_bits)
|
enum radv_cmd_flush_bits flush_bits,
|
||||||
|
uint64_t gfx9_eop_bug_va)
|
||||||
{
|
{
|
||||||
unsigned cp_coher_cntl = 0;
|
unsigned cp_coher_cntl = 0;
|
||||||
uint32_t flush_cb_db = flush_bits & (RADV_CMD_FLAG_FLUSH_AND_INV_CB |
|
uint32_t flush_cb_db = flush_bits & (RADV_CMD_FLAG_FLUSH_AND_INV_CB |
|
||||||
@@ -803,7 +816,8 @@ si_cs_emit_cache_flush(struct radeon_cmdbuf *cs,
|
|||||||
V_028A90_FLUSH_AND_INV_CB_DATA_TS,
|
V_028A90_FLUSH_AND_INV_CB_DATA_TS,
|
||||||
0,
|
0,
|
||||||
EOP_DATA_SEL_DISCARD,
|
EOP_DATA_SEL_DISCARD,
|
||||||
0, 0, 0);
|
0, 0, 0,
|
||||||
|
gfx9_eop_bug_va);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_DB) {
|
if (flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_DB) {
|
||||||
@@ -873,7 +887,8 @@ si_cs_emit_cache_flush(struct radeon_cmdbuf *cs,
|
|||||||
|
|
||||||
si_cs_emit_write_event_eop(cs, chip_class, false, cb_db_event, tc_flags,
|
si_cs_emit_write_event_eop(cs, chip_class, false, cb_db_event, tc_flags,
|
||||||
EOP_DATA_SEL_VALUE_32BIT,
|
EOP_DATA_SEL_VALUE_32BIT,
|
||||||
flush_va, old_fence, *flush_cnt);
|
flush_va, old_fence, *flush_cnt,
|
||||||
|
gfx9_eop_bug_va);
|
||||||
si_emit_wait_fence(cs, flush_va, *flush_cnt, 0xffffffff);
|
si_emit_wait_fence(cs, flush_va, *flush_cnt, 0xffffffff);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -975,7 +990,8 @@ si_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer)
|
|||||||
cmd_buffer->device->physical_device->rad_info.chip_class,
|
cmd_buffer->device->physical_device->rad_info.chip_class,
|
||||||
ptr, va,
|
ptr, va,
|
||||||
radv_cmd_buffer_uses_mec(cmd_buffer),
|
radv_cmd_buffer_uses_mec(cmd_buffer),
|
||||||
cmd_buffer->state.flush_bits);
|
cmd_buffer->state.flush_bits,
|
||||||
|
cmd_buffer->gfx9_eop_bug_va);
|
||||||
|
|
||||||
|
|
||||||
if (unlikely(cmd_buffer->device->trace_bo))
|
if (unlikely(cmd_buffer->device->trace_bo))
|
||||||
|
Reference in New Issue
Block a user