radeonsi/sqtt: update registers for gfx11
Based on registers delta and PAL. Reviewed-by: Marek Olšák <marek.olsak@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20529>
This commit is contained in:

committed by
Marge Bot

parent
a3dc8b870d
commit
215babd3ca
@@ -112,43 +112,77 @@ si_emit_thread_trace_start(struct si_context* sctx,
|
||||
int first_active_cu = ffs(sctx->screen->info.cu_mask[se][0]);
|
||||
|
||||
if (sctx->gfx_level >= GFX10) {
|
||||
/* Order seems important for the following 2 registers. */
|
||||
radeon_set_privileged_config_reg(R_008D04_SQ_THREAD_TRACE_BUF0_SIZE,
|
||||
S_008D04_SIZE(shifted_size) |
|
||||
S_008D04_BASE_HI(shifted_va >> 32));
|
||||
|
||||
radeon_set_privileged_config_reg(R_008D00_SQ_THREAD_TRACE_BUF0_BASE, shifted_va);
|
||||
|
||||
uint32_t token_mask = V_008D18_REG_INCLUDE_SQDEC |
|
||||
V_008D18_REG_INCLUDE_SHDEC |
|
||||
V_008D18_REG_INCLUDE_GFXUDEC |
|
||||
V_008D18_REG_INCLUDE_CONTEXT |
|
||||
V_008D18_REG_INCLUDE_COMP |
|
||||
V_008D18_REG_INCLUDE_CONFIG;
|
||||
int wgp = first_active_cu / 2;
|
||||
radeon_set_privileged_config_reg(R_008D14_SQ_THREAD_TRACE_MASK,
|
||||
S_008D14_WTYPE_INCLUDE(0x7f) | /* all shader stages */
|
||||
S_008D14_SA_SEL(0) |
|
||||
S_008D14_WGP_SEL(wgp) |
|
||||
S_008D14_SIMD_SEL(0));
|
||||
unsigned shader_mask = 0x7f; /* all shader stages */
|
||||
|
||||
radeon_set_privileged_config_reg(R_008D18_SQ_THREAD_TRACE_TOKEN_MASK,
|
||||
S_008D18_REG_INCLUDE(V_008D18_REG_INCLUDE_SQDEC |
|
||||
V_008D18_REG_INCLUDE_SHDEC |
|
||||
V_008D18_REG_INCLUDE_GFXUDEC |
|
||||
V_008D18_REG_INCLUDE_CONTEXT |
|
||||
V_008D18_REG_INCLUDE_COMP |
|
||||
V_008D18_REG_INCLUDE_CONFIG) |
|
||||
S_008D18_TOKEN_EXCLUDE(V_008D18_TOKEN_EXCLUDE_PERF));
|
||||
/* Order seems important for the following 2 registers. */
|
||||
if (sctx->gfx_level >= GFX11) {
|
||||
/* Disable unsupported hw shader stages */
|
||||
shader_mask &= ~(0x02 /* VS */ | 0x08 /* ES */ | 0x20 /* LS */);
|
||||
|
||||
radeon_set_uconfig_reg(R_0367A4_SQ_THREAD_TRACE_BUF0_SIZE,
|
||||
S_0367A4_SIZE(shifted_size) |
|
||||
S_0367A4_BASE_HI(shifted_va >> 32));
|
||||
|
||||
radeon_set_uconfig_reg(R_0367A0_SQ_THREAD_TRACE_BUF0_BASE, shifted_va);
|
||||
|
||||
radeon_set_uconfig_reg(R_0367B4_SQ_THREAD_TRACE_MASK,
|
||||
S_0367B4_WTYPE_INCLUDE(shader_mask) |
|
||||
S_0367B4_SA_SEL(0) |
|
||||
S_0367B4_WGP_SEL(wgp) |
|
||||
S_0367B4_SIMD_SEL(0));
|
||||
|
||||
radeon_set_uconfig_reg(R_0367B8_SQ_THREAD_TRACE_TOKEN_MASK,
|
||||
S_0367B8_REG_INCLUDE(token_mask) |
|
||||
S_0367B8_TOKEN_EXCLUDE(V_008D18_TOKEN_EXCLUDE_PERF));
|
||||
} else {
|
||||
radeon_set_privileged_config_reg(R_008D04_SQ_THREAD_TRACE_BUF0_SIZE,
|
||||
S_008D04_SIZE(shifted_size) |
|
||||
S_008D04_BASE_HI(shifted_va >> 32));
|
||||
|
||||
radeon_set_privileged_config_reg(R_008D00_SQ_THREAD_TRACE_BUF0_BASE, shifted_va);
|
||||
|
||||
radeon_set_privileged_config_reg(R_008D14_SQ_THREAD_TRACE_MASK,
|
||||
S_008D14_WTYPE_INCLUDE(shader_mask) |
|
||||
S_008D14_SA_SEL(0) |
|
||||
S_008D14_WGP_SEL(wgp) |
|
||||
S_008D14_SIMD_SEL(0));
|
||||
|
||||
radeon_set_privileged_config_reg(R_008D18_SQ_THREAD_TRACE_TOKEN_MASK,
|
||||
S_008D18_REG_INCLUDE(token_mask) |
|
||||
S_008D18_TOKEN_EXCLUDE(V_008D18_TOKEN_EXCLUDE_PERF));
|
||||
}
|
||||
|
||||
/* Should be emitted last (it enables thread traces). */
|
||||
radeon_set_privileged_config_reg(R_008D1C_SQ_THREAD_TRACE_CTRL,
|
||||
S_008D1C_MODE(1) |
|
||||
S_008D1C_HIWATER(5) |
|
||||
S_008D1C_UTIL_TIMER(1) |
|
||||
S_008D1C_RT_FREQ(2) | /* 4096 clk */
|
||||
S_008D1C_DRAW_EVENT_EN(1) |
|
||||
S_008D1C_REG_STALL_EN(1) |
|
||||
S_008D1C_SPI_STALL_EN(1) |
|
||||
S_008D1C_SQ_STALL_EN(1) |
|
||||
S_008D1C_REG_DROP_ON_STALL(0) |
|
||||
S_008D1C_LOWATER_OFFSET(
|
||||
sctx->gfx_level >= GFX10_3 ? 4 : 0) |
|
||||
S_008D1C_AUTO_FLUSH_MODE(sctx->screen->info.has_sqtt_auto_flush_mode_bug));
|
||||
uint32_t ctrl = S_008D1C_MODE(1) | S_008D1C_HIWATER(5) | S_008D1C_UTIL_TIMER(1) |
|
||||
S_008D1C_RT_FREQ(2) | /* 4096 clk */S_008D1C_DRAW_EVENT_EN(1);
|
||||
|
||||
if (sctx->gfx_level == GFX10_3)
|
||||
ctrl |= S_008D1C_LOWATER_OFFSET(4);
|
||||
|
||||
ctrl |= S_008D1C_AUTO_FLUSH_MODE(sctx->screen->info.has_sqtt_auto_flush_mode_bug);
|
||||
|
||||
switch (sctx->gfx_level) {
|
||||
case GFX10:
|
||||
case GFX10_3:
|
||||
ctrl |= S_008D1C_REG_STALL_EN(1) | S_008D1C_SPI_STALL_EN(1) |
|
||||
S_008D1C_SQ_STALL_EN(1) |S_008D1C_REG_DROP_ON_STALL(0);
|
||||
radeon_set_privileged_config_reg(R_008D1C_SQ_THREAD_TRACE_CTRL, ctrl);
|
||||
break;
|
||||
case GFX11:
|
||||
ctrl |= S_0367B0_SPI_STALL_EN(1) | S_0367B0_SQ_STALL_EN(1) |
|
||||
S_0367B0_REG_AT_HWM(2);
|
||||
radeon_set_uconfig_reg(R_0367B0_SQ_THREAD_TRACE_CTRL, ctrl);
|
||||
break;
|
||||
default:
|
||||
assert(false);
|
||||
}
|
||||
} else {
|
||||
/* Order seems important for the following 4 registers. */
|
||||
radeon_set_uconfig_reg(R_030CDC_SQ_THREAD_TRACE_BASE2,
|
||||
@@ -248,6 +282,14 @@ static const uint32_t gfx10_thread_trace_info_regs[] =
|
||||
R_008D24_SQ_THREAD_TRACE_DROPPED_CNTR,
|
||||
};
|
||||
|
||||
static const uint32_t gfx11_thread_trace_info_regs[] =
|
||||
{
|
||||
R_0367BC_SQ_THREAD_TRACE_WPTR,
|
||||
R_0367D0_SQ_THREAD_TRACE_STATUS,
|
||||
R_0367E8_SQ_THREAD_TRACE_DROPPED_CNTR,
|
||||
};
|
||||
|
||||
|
||||
static void
|
||||
si_copy_thread_trace_info_regs(struct si_context* sctx,
|
||||
struct radeon_cmdbuf *cs,
|
||||
@@ -260,6 +302,9 @@ si_copy_thread_trace_info_regs(struct si_context* sctx,
|
||||
case GFX10:
|
||||
thread_trace_info_regs = gfx10_thread_trace_info_regs;
|
||||
break;
|
||||
case GFX11:
|
||||
thread_trace_info_regs = gfx11_thread_trace_info_regs;
|
||||
break;
|
||||
case GFX9:
|
||||
thread_trace_info_regs = gfx9_thread_trace_info_regs;
|
||||
break;
|
||||
@@ -332,28 +377,32 @@ si_emit_thread_trace_stop(struct si_context *sctx,
|
||||
S_030800_INSTANCE_BROADCAST_WRITES(1));
|
||||
|
||||
if (sctx->gfx_level >= GFX10) {
|
||||
uint32_t tt_status_reg = sctx->gfx_level >= GFX11 ? R_0367D0_SQ_THREAD_TRACE_STATUS :
|
||||
R_008D20_SQ_THREAD_TRACE_STATUS;
|
||||
if (!sctx->screen->info.has_sqtt_rb_harvest_bug) {
|
||||
/* Make sure to wait for the trace buffer. */
|
||||
radeon_emit(PKT3(PKT3_WAIT_REG_MEM, 5, 0));
|
||||
radeon_emit(WAIT_REG_MEM_NOT_EQUAL); /* wait until the register is equal to the reference value */
|
||||
radeon_emit(R_008D20_SQ_THREAD_TRACE_STATUS >> 2); /* register */
|
||||
radeon_emit(tt_status_reg >> 2); /* register */
|
||||
radeon_emit(0);
|
||||
radeon_emit(0); /* reference value */
|
||||
radeon_emit(~C_008D20_FINISH_DONE); /* mask */
|
||||
radeon_emit(sctx->gfx_level >= GFX11 ? ~C_0367D0_FINISH_DONE : ~C_008D20_FINISH_DONE); /* mask */
|
||||
radeon_emit(4); /* poll interval */
|
||||
}
|
||||
|
||||
/* Disable the thread trace mode. */
|
||||
radeon_set_privileged_config_reg(R_008D1C_SQ_THREAD_TRACE_CTRL,
|
||||
S_008D1C_MODE(0));
|
||||
if (sctx->gfx_level >= GFX11)
|
||||
radeon_set_uconfig_reg(R_0367B0_SQ_THREAD_TRACE_CTRL, S_008D1C_MODE(0));
|
||||
else
|
||||
radeon_set_privileged_config_reg(R_008D1C_SQ_THREAD_TRACE_CTRL, S_008D1C_MODE(0));
|
||||
|
||||
/* Wait for thread trace completion. */
|
||||
radeon_emit(PKT3(PKT3_WAIT_REG_MEM, 5, 0));
|
||||
radeon_emit(WAIT_REG_MEM_EQUAL); /* wait until the register is equal to the reference value */
|
||||
radeon_emit(R_008D20_SQ_THREAD_TRACE_STATUS >> 2); /* register */
|
||||
radeon_emit(tt_status_reg >> 2); /* register */
|
||||
radeon_emit(0);
|
||||
radeon_emit(0); /* reference value */
|
||||
radeon_emit(~C_008D20_BUSY); /* mask */
|
||||
radeon_emit(sctx->gfx_level >= GFX11 ? ~C_0367D0_BUSY : ~C_008D20_BUSY); /* mask */
|
||||
radeon_emit(4); /* poll interval */
|
||||
} else {
|
||||
/* Disable the thread trace mode. */
|
||||
@@ -623,7 +672,7 @@ si_init_thread_trace(struct si_context *sctx)
|
||||
return false;
|
||||
}
|
||||
|
||||
if (sctx->gfx_level > GFX10_3) {
|
||||
if (sctx->gfx_level > GFX11) {
|
||||
fprintf(stderr, "radeonsi: Thread trace is not supported "
|
||||
"for that GPU!\n");
|
||||
return false;
|
||||
@@ -655,8 +704,9 @@ si_init_thread_trace(struct si_context *sctx)
|
||||
list_inithead(&sctx->thread_trace->rgp_code_object.record);
|
||||
simple_mtx_init(&sctx->thread_trace->rgp_code_object.lock, mtx_plain);
|
||||
|
||||
if (sctx->gfx_level >= GFX10 && debug_get_bool_option("AMD_THREAD_TRACE_SPM", true)) {
|
||||
/* Limit SPM counters to GFX10+ for now */
|
||||
if (sctx->gfx_level >= GFX10 &&
|
||||
debug_get_bool_option("AMD_THREAD_TRACE_SPM", sctx->gfx_level < GFX11)) {
|
||||
/* Limit SPM counters to GFX10 and GFX10_3 for now */
|
||||
ASSERTED bool r = si_spm_init(sctx);
|
||||
assert(r);
|
||||
}
|
||||
|
Reference in New Issue
Block a user