radeonsi/sqtt: update registers for gfx11

Based on registers delta and PAL.

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20529>
This commit is contained in:
Pierre-Eric Pelloux-Prayer
2023-01-04 13:25:10 +01:00
committed by Marge Bot
parent a3dc8b870d
commit 215babd3ca

View File

@@ -112,43 +112,77 @@ si_emit_thread_trace_start(struct si_context* sctx,
int first_active_cu = ffs(sctx->screen->info.cu_mask[se][0]);
if (sctx->gfx_level >= GFX10) {
/* Order seems important for the following 2 registers. */
radeon_set_privileged_config_reg(R_008D04_SQ_THREAD_TRACE_BUF0_SIZE,
S_008D04_SIZE(shifted_size) |
S_008D04_BASE_HI(shifted_va >> 32));
radeon_set_privileged_config_reg(R_008D00_SQ_THREAD_TRACE_BUF0_BASE, shifted_va);
uint32_t token_mask = V_008D18_REG_INCLUDE_SQDEC |
V_008D18_REG_INCLUDE_SHDEC |
V_008D18_REG_INCLUDE_GFXUDEC |
V_008D18_REG_INCLUDE_CONTEXT |
V_008D18_REG_INCLUDE_COMP |
V_008D18_REG_INCLUDE_CONFIG;
int wgp = first_active_cu / 2;
radeon_set_privileged_config_reg(R_008D14_SQ_THREAD_TRACE_MASK,
S_008D14_WTYPE_INCLUDE(0x7f) | /* all shader stages */
S_008D14_SA_SEL(0) |
S_008D14_WGP_SEL(wgp) |
S_008D14_SIMD_SEL(0));
unsigned shader_mask = 0x7f; /* all shader stages */
radeon_set_privileged_config_reg(R_008D18_SQ_THREAD_TRACE_TOKEN_MASK,
S_008D18_REG_INCLUDE(V_008D18_REG_INCLUDE_SQDEC |
V_008D18_REG_INCLUDE_SHDEC |
V_008D18_REG_INCLUDE_GFXUDEC |
V_008D18_REG_INCLUDE_CONTEXT |
V_008D18_REG_INCLUDE_COMP |
V_008D18_REG_INCLUDE_CONFIG) |
S_008D18_TOKEN_EXCLUDE(V_008D18_TOKEN_EXCLUDE_PERF));
/* Order seems important for the following 2 registers. */
if (sctx->gfx_level >= GFX11) {
/* Disable unsupported hw shader stages */
shader_mask &= ~(0x02 /* VS */ | 0x08 /* ES */ | 0x20 /* LS */);
radeon_set_uconfig_reg(R_0367A4_SQ_THREAD_TRACE_BUF0_SIZE,
S_0367A4_SIZE(shifted_size) |
S_0367A4_BASE_HI(shifted_va >> 32));
radeon_set_uconfig_reg(R_0367A0_SQ_THREAD_TRACE_BUF0_BASE, shifted_va);
radeon_set_uconfig_reg(R_0367B4_SQ_THREAD_TRACE_MASK,
S_0367B4_WTYPE_INCLUDE(shader_mask) |
S_0367B4_SA_SEL(0) |
S_0367B4_WGP_SEL(wgp) |
S_0367B4_SIMD_SEL(0));
radeon_set_uconfig_reg(R_0367B8_SQ_THREAD_TRACE_TOKEN_MASK,
S_0367B8_REG_INCLUDE(token_mask) |
S_0367B8_TOKEN_EXCLUDE(V_008D18_TOKEN_EXCLUDE_PERF));
} else {
radeon_set_privileged_config_reg(R_008D04_SQ_THREAD_TRACE_BUF0_SIZE,
S_008D04_SIZE(shifted_size) |
S_008D04_BASE_HI(shifted_va >> 32));
radeon_set_privileged_config_reg(R_008D00_SQ_THREAD_TRACE_BUF0_BASE, shifted_va);
radeon_set_privileged_config_reg(R_008D14_SQ_THREAD_TRACE_MASK,
S_008D14_WTYPE_INCLUDE(shader_mask) |
S_008D14_SA_SEL(0) |
S_008D14_WGP_SEL(wgp) |
S_008D14_SIMD_SEL(0));
radeon_set_privileged_config_reg(R_008D18_SQ_THREAD_TRACE_TOKEN_MASK,
S_008D18_REG_INCLUDE(token_mask) |
S_008D18_TOKEN_EXCLUDE(V_008D18_TOKEN_EXCLUDE_PERF));
}
/* Should be emitted last (it enables thread traces). */
radeon_set_privileged_config_reg(R_008D1C_SQ_THREAD_TRACE_CTRL,
S_008D1C_MODE(1) |
S_008D1C_HIWATER(5) |
S_008D1C_UTIL_TIMER(1) |
S_008D1C_RT_FREQ(2) | /* 4096 clk */
S_008D1C_DRAW_EVENT_EN(1) |
S_008D1C_REG_STALL_EN(1) |
S_008D1C_SPI_STALL_EN(1) |
S_008D1C_SQ_STALL_EN(1) |
S_008D1C_REG_DROP_ON_STALL(0) |
S_008D1C_LOWATER_OFFSET(
sctx->gfx_level >= GFX10_3 ? 4 : 0) |
S_008D1C_AUTO_FLUSH_MODE(sctx->screen->info.has_sqtt_auto_flush_mode_bug));
uint32_t ctrl = S_008D1C_MODE(1) | S_008D1C_HIWATER(5) | S_008D1C_UTIL_TIMER(1) |
S_008D1C_RT_FREQ(2) | /* 4096 clk */S_008D1C_DRAW_EVENT_EN(1);
if (sctx->gfx_level == GFX10_3)
ctrl |= S_008D1C_LOWATER_OFFSET(4);
ctrl |= S_008D1C_AUTO_FLUSH_MODE(sctx->screen->info.has_sqtt_auto_flush_mode_bug);
switch (sctx->gfx_level) {
case GFX10:
case GFX10_3:
ctrl |= S_008D1C_REG_STALL_EN(1) | S_008D1C_SPI_STALL_EN(1) |
S_008D1C_SQ_STALL_EN(1) |S_008D1C_REG_DROP_ON_STALL(0);
radeon_set_privileged_config_reg(R_008D1C_SQ_THREAD_TRACE_CTRL, ctrl);
break;
case GFX11:
ctrl |= S_0367B0_SPI_STALL_EN(1) | S_0367B0_SQ_STALL_EN(1) |
S_0367B0_REG_AT_HWM(2);
radeon_set_uconfig_reg(R_0367B0_SQ_THREAD_TRACE_CTRL, ctrl);
break;
default:
assert(false);
}
} else {
/* Order seems important for the following 4 registers. */
radeon_set_uconfig_reg(R_030CDC_SQ_THREAD_TRACE_BASE2,
@@ -248,6 +282,14 @@ static const uint32_t gfx10_thread_trace_info_regs[] =
R_008D24_SQ_THREAD_TRACE_DROPPED_CNTR,
};
static const uint32_t gfx11_thread_trace_info_regs[] =
{
R_0367BC_SQ_THREAD_TRACE_WPTR,
R_0367D0_SQ_THREAD_TRACE_STATUS,
R_0367E8_SQ_THREAD_TRACE_DROPPED_CNTR,
};
static void
si_copy_thread_trace_info_regs(struct si_context* sctx,
struct radeon_cmdbuf *cs,
@@ -260,6 +302,9 @@ si_copy_thread_trace_info_regs(struct si_context* sctx,
case GFX10:
thread_trace_info_regs = gfx10_thread_trace_info_regs;
break;
case GFX11:
thread_trace_info_regs = gfx11_thread_trace_info_regs;
break;
case GFX9:
thread_trace_info_regs = gfx9_thread_trace_info_regs;
break;
@@ -332,28 +377,32 @@ si_emit_thread_trace_stop(struct si_context *sctx,
S_030800_INSTANCE_BROADCAST_WRITES(1));
if (sctx->gfx_level >= GFX10) {
uint32_t tt_status_reg = sctx->gfx_level >= GFX11 ? R_0367D0_SQ_THREAD_TRACE_STATUS :
R_008D20_SQ_THREAD_TRACE_STATUS;
if (!sctx->screen->info.has_sqtt_rb_harvest_bug) {
/* Make sure to wait for the trace buffer. */
radeon_emit(PKT3(PKT3_WAIT_REG_MEM, 5, 0));
radeon_emit(WAIT_REG_MEM_NOT_EQUAL); /* wait until the register is equal to the reference value */
radeon_emit(R_008D20_SQ_THREAD_TRACE_STATUS >> 2); /* register */
radeon_emit(tt_status_reg >> 2); /* register */
radeon_emit(0);
radeon_emit(0); /* reference value */
radeon_emit(~C_008D20_FINISH_DONE); /* mask */
radeon_emit(sctx->gfx_level >= GFX11 ? ~C_0367D0_FINISH_DONE : ~C_008D20_FINISH_DONE); /* mask */
radeon_emit(4); /* poll interval */
}
/* Disable the thread trace mode. */
radeon_set_privileged_config_reg(R_008D1C_SQ_THREAD_TRACE_CTRL,
S_008D1C_MODE(0));
if (sctx->gfx_level >= GFX11)
radeon_set_uconfig_reg(R_0367B0_SQ_THREAD_TRACE_CTRL, S_008D1C_MODE(0));
else
radeon_set_privileged_config_reg(R_008D1C_SQ_THREAD_TRACE_CTRL, S_008D1C_MODE(0));
/* Wait for thread trace completion. */
radeon_emit(PKT3(PKT3_WAIT_REG_MEM, 5, 0));
radeon_emit(WAIT_REG_MEM_EQUAL); /* wait until the register is equal to the reference value */
radeon_emit(R_008D20_SQ_THREAD_TRACE_STATUS >> 2); /* register */
radeon_emit(tt_status_reg >> 2); /* register */
radeon_emit(0);
radeon_emit(0); /* reference value */
radeon_emit(~C_008D20_BUSY); /* mask */
radeon_emit(sctx->gfx_level >= GFX11 ? ~C_0367D0_BUSY : ~C_008D20_BUSY); /* mask */
radeon_emit(4); /* poll interval */
} else {
/* Disable the thread trace mode. */
@@ -623,7 +672,7 @@ si_init_thread_trace(struct si_context *sctx)
return false;
}
if (sctx->gfx_level > GFX10_3) {
if (sctx->gfx_level > GFX11) {
fprintf(stderr, "radeonsi: Thread trace is not supported "
"for that GPU!\n");
return false;
@@ -655,8 +704,9 @@ si_init_thread_trace(struct si_context *sctx)
list_inithead(&sctx->thread_trace->rgp_code_object.record);
simple_mtx_init(&sctx->thread_trace->rgp_code_object.lock, mtx_plain);
if (sctx->gfx_level >= GFX10 && debug_get_bool_option("AMD_THREAD_TRACE_SPM", true)) {
/* Limit SPM counters to GFX10+ for now */
if (sctx->gfx_level >= GFX10 &&
debug_get_bool_option("AMD_THREAD_TRACE_SPM", sctx->gfx_level < GFX11)) {
/* Limit SPM counters to GFX10 and GFX10_3 for now */
ASSERTED bool r = si_spm_init(sctx);
assert(r);
}