radeonsi: implement fw based mcbp

Some chips support firmware based mcbp. If supported this means
radeonsi needs to allocate 3 buffers and pass them to the firmware.

From there, the firmware will handle mcbp and register shadowing
on its own so we don't need to insert LOAD packet in the preamble.

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21986>
This commit is contained in:
Pierre-Eric Pelloux-Prayer
2023-03-17 14:44:42 +01:00
committed by Marge Bot
parent 8fe39e9997
commit 65b40d0b7e
11 changed files with 97 additions and 28 deletions

View File

@@ -4288,6 +4288,8 @@ void ac_create_shadowing_ib_preamble(const struct radeon_info *info,
CC1_SHADOW_GFX_SH_REGS(1) |
CC1_SHADOW_GLOBAL_UCONFIG(1));
for (unsigned i = 0; i < SI_NUM_SHADOWED_REG_RANGES; i++)
ac_build_load_reg(info, pm4_cmd_add, pm4_cmdbuf, i, gpu_address);
if (!info->has_fw_based_shadowing) {
for (unsigned i = 0; i < SI_NUM_SHADOWED_REG_RANGES; i++)
ac_build_load_reg(info, pm4_cmd_add, pm4_cmdbuf, i, gpu_address);
}
}

View File

@@ -41,22 +41,43 @@ void si_init_cp_reg_shadowing(struct si_context *sctx)
if (sctx->has_graphics &&
(sctx->screen->info.mid_command_buffer_preemption_enabled ||
sctx->screen->debug_flags & DBG(SHADOW_REGS))) {
sctx->shadowed_regs =
si_aligned_buffer_create(sctx->b.screen,
PIPE_RESOURCE_FLAG_UNMAPPABLE | SI_RESOURCE_FLAG_DRIVER_INTERNAL,
PIPE_USAGE_DEFAULT,
SI_SHADOWED_REG_BUFFER_SIZE,
4096);
if (!sctx->shadowed_regs)
fprintf(stderr, "radeonsi: cannot create a shadowed_regs buffer\n");
if (sctx->screen->info.has_fw_based_shadowing) {
sctx->shadowing.registers =
si_aligned_buffer_create(sctx->b.screen,
PIPE_RESOURCE_FLAG_UNMAPPABLE | SI_RESOURCE_FLAG_DRIVER_INTERNAL,
PIPE_USAGE_DEFAULT,
sctx->screen->info.fw_based_mcbp.shadow_size,
sctx->screen->info.fw_based_mcbp.shadow_alignment);
sctx->shadowing.csa =
si_aligned_buffer_create(sctx->b.screen,
PIPE_RESOURCE_FLAG_UNMAPPABLE | SI_RESOURCE_FLAG_DRIVER_INTERNAL,
PIPE_USAGE_DEFAULT,
sctx->screen->info.fw_based_mcbp.csa_size,
sctx->screen->info.fw_based_mcbp.csa_alignment);
if (!sctx->shadowing.registers || !sctx->shadowing.csa)
fprintf(stderr, "radeonsi: cannot create register shadowing buffer(s)\n");
else
sctx->ws->cs_set_mcbp_reg_shadowing_va(&sctx->gfx_cs,
sctx->shadowing.registers->gpu_address,
sctx->shadowing.csa->gpu_address);
} else {
sctx->shadowing.registers =
si_aligned_buffer_create(sctx->b.screen,
PIPE_RESOURCE_FLAG_UNMAPPABLE | SI_RESOURCE_FLAG_DRIVER_INTERNAL,
PIPE_USAGE_DEFAULT,
SI_SHADOWED_REG_BUFFER_SIZE,
4096);
if (!sctx->shadowing.registers)
fprintf(stderr, "radeonsi: cannot create a shadowed_regs buffer\n");
}
}
si_init_cs_preamble_state(sctx, sctx->shadowed_regs != NULL);
si_init_cs_preamble_state(sctx, sctx->shadowing.registers != NULL);
if (sctx->shadowed_regs) {
if (sctx->shadowing.registers) {
/* We need to clear the shadowed reg buffer. */
si_cp_dma_clear_buffer(sctx, &sctx->gfx_cs, &sctx->shadowed_regs->b.b,
0, sctx->shadowed_regs->bo_size, 0, SI_OP_SYNC_AFTER,
si_cp_dma_clear_buffer(sctx, &sctx->gfx_cs, &sctx->shadowing.registers->b.b,
0, sctx->shadowing.registers->bo_size, 0, SI_OP_SYNC_AFTER,
SI_COHERENCY_CP, L2_BYPASS);
/* Create the shadowing preamble. */
@@ -72,11 +93,14 @@ void si_init_cp_reg_shadowing(struct si_context *sctx)
ac_create_shadowing_ib_preamble(&sctx->screen->info,
(pm4_cmd_add_fn)si_pm4_cmd_add, shadowing_preamble,
sctx->shadowed_regs->gpu_address, sctx->screen->dpbb_allowed);
sctx->shadowing.registers->gpu_address, sctx->screen->dpbb_allowed);
/* Initialize shadowed registers as follows. */
radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, sctx->shadowed_regs,
radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, sctx->shadowing.registers,
RADEON_USAGE_READWRITE | RADEON_PRIO_DESCRIPTORS);
if (sctx->shadowing.csa)
radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, sctx->shadowing.csa,
RADEON_USAGE_READWRITE | RADEON_PRIO_DESCRIPTORS);
si_pm4_emit(sctx, shadowing_preamble);
ac_emulate_clear_state(&sctx->screen->info, &sctx->gfx_cs, si_set_context_reg_array);
si_pm4_emit(sctx, sctx->cs_preamble_state);

View File

@@ -2184,7 +2184,7 @@ static void si_emit_global_shader_pointers(struct si_context *sctx, struct si_de
radeon_emit_one_32bit_pointer(sctx, descs, R_00B130_SPI_SHADER_USER_DATA_VS_0);
radeon_emit_one_32bit_pointer(sctx, descs, R_00B230_SPI_SHADER_USER_DATA_GS_0);
radeon_emit_one_32bit_pointer(sctx, descs, R_00B430_SPI_SHADER_USER_DATA_HS_0);
} else if (sctx->gfx_level == GFX9 && sctx->shadowed_regs) {
} else if (sctx->gfx_level == GFX9 && sctx->shadowing.registers) {
/* We can't use the COMMON registers with register shadowing. */
radeon_emit_one_32bit_pointer(sctx, descs, R_00B030_SPI_SHADER_USER_DATA_PS_0);
radeon_emit_one_32bit_pointer(sctx, descs, R_00B130_SPI_SHADER_USER_DATA_VS_0);

View File

@@ -417,9 +417,13 @@ void si_begin_new_gfx_cs(struct si_context *ctx, bool first_cs)
radeon_add_to_buffer_list(ctx, &ctx->gfx_cs, ctx->border_color_buffer,
RADEON_USAGE_READ | RADEON_PRIO_BORDER_COLORS);
}
if (ctx->shadowed_regs) {
radeon_add_to_buffer_list(ctx, &ctx->gfx_cs, ctx->shadowed_regs,
if (ctx->shadowing.registers) {
radeon_add_to_buffer_list(ctx, &ctx->gfx_cs, ctx->shadowing.registers,
RADEON_USAGE_READWRITE | RADEON_PRIO_DESCRIPTORS);
if (ctx->shadowing.csa)
radeon_add_to_buffer_list(ctx, &ctx->gfx_cs, ctx->shadowing.csa,
RADEON_USAGE_READWRITE | RADEON_PRIO_DESCRIPTORS);
}
si_add_all_descriptors_to_bo_list(ctx);
@@ -484,7 +488,7 @@ void si_begin_new_gfx_cs(struct si_context *ctx, bool first_cs)
if (ctx->screen->use_ngg_culling)
si_mark_atom_dirty(ctx, &ctx->atoms.s.ngg_cull_state);
if (first_cs || !ctx->shadowed_regs) {
if (first_cs || !ctx->shadowing.registers) {
/* These don't add any buffers, so skip them with shadowing. */
si_mark_atom_dirty(ctx, &ctx->atoms.s.clip_regs);
/* CLEAR_STATE sets zeros. */

View File

@@ -350,7 +350,8 @@ static void si_destroy_context(struct pipe_context *context)
sctx->ws->fence_reference(&sctx->last_gfx_fence, NULL);
si_resource_reference(&sctx->eop_bug_scratch, NULL);
si_resource_reference(&sctx->eop_bug_scratch_tmz, NULL);
si_resource_reference(&sctx->shadowed_regs, NULL);
si_resource_reference(&sctx->shadowing.registers, NULL);
si_resource_reference(&sctx->shadowing.csa, NULL);
si_destroy_compiler(&sctx->compiler);

View File

@@ -962,7 +962,14 @@ struct si_context {
struct u_log_context *log;
void *query_result_shader;
void *sh_query_result_shader;
struct si_resource *shadowed_regs;
struct {
/* Memory where the shadowed registers will be saved and loaded from. */
struct si_resource *registers;
/* Context Save Area: scratch area to save other required data. Only
* used if info->has_fw_based_mcbp is true.
*/
struct si_resource *csa;
} shadowing;
void (*emit_cache_flush)(struct si_context *ctx, struct radeon_cmdbuf *cs);

View File

@@ -1471,7 +1471,7 @@ static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw
/* draw packet */
if (index_size) {
/* Register shadowing doesn't shadow INDEX_TYPE. */
if (index_size != sctx->last_index_size || sctx->shadowed_regs ||
if (index_size != sctx->last_index_size || sctx->shadowing.registers ||
(GFX_VERSION == GFX10_3 && disable_instance_packing != sctx->disable_instance_packing)) {
unsigned index_type;
@@ -1598,7 +1598,7 @@ static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw
}
} else {
/* Register shadowing requires that we always emit PKT3_NUM_INSTANCES. */
if (sctx->shadowed_regs ||
if (sctx->shadowing.registers ||
sctx->last_instance_count == SI_INSTANCE_COUNT_UNKNOWN ||
sctx->last_instance_count != instance_count) {
radeon_emit(PKT3(PKT3_NUM_INSTANCES, 0, 0));

View File

@@ -3693,7 +3693,7 @@ static void si_cs_preamble_add_vgt_flush(struct si_context *sctx, bool tmz)
&sctx->cs_preamble_has_vgt_flush;
/* We shouldn't get here if registers are shadowed. */
assert(!sctx->shadowed_regs);
assert(!sctx->shadowing.registers);
if (*has_vgt_flush)
return;
@@ -3810,7 +3810,7 @@ bool si_update_gs_ring_buffers(struct si_context *sctx)
false, 0, 0, 0);
}
if (sctx->shadowed_regs) {
if (sctx->shadowing.registers) {
/* These registers will be shadowed, so set them only once. */
struct radeon_cmdbuf *cs = &sctx->gfx_cs;
@@ -4080,7 +4080,7 @@ void si_init_tess_factor_ring(struct si_context *sctx)
assert((tf_ring_size_field & C_030938_SIZE) == 0);
if (sctx->shadowed_regs) {
if (sctx->shadowing.registers) {
/* These registers will be shadowed, so set them only once. */
/* TODO: tmz + shadowed_regs support */
struct radeon_cmdbuf *cs = &sctx->gfx_cs;

View File

@@ -751,6 +751,12 @@ struct radeon_winsys {
* Stable pstate
*/
bool (*cs_set_pstate)(struct radeon_cmdbuf *cs, enum radeon_ctx_pstate state);
/**
* Pass the VAs to the buffers where various information is saved by the FW during mcbp.
*/
void (*cs_set_mcbp_reg_shadowing_va)(struct radeon_cmdbuf *cs, uint64_t regs_va,
uint64_t csa_va);
};
static inline bool radeon_emitted(struct radeon_cmdbuf *cs, unsigned num_dw)

View File

@@ -1490,7 +1490,7 @@ static void amdgpu_cs_submit_ib(void *job, void *gdata, int thread_index)
if (acs->ip_type == AMD_IP_GFX)
ws->gfx_bo_list_counter += cs->num_real_buffers;
struct drm_amdgpu_cs_chunk chunks[7];
struct drm_amdgpu_cs_chunk chunks[8];
unsigned num_chunks = 0;
/* BO list */
@@ -1565,6 +1565,13 @@ static void amdgpu_cs_submit_ib(void *job, void *gdata, int thread_index)
num_chunks++;
}
if (ws->info.has_fw_based_shadowing) {
chunks[num_chunks].chunk_id = AMDGPU_CHUNK_ID_CP_GFX_SHADOW;
chunks[num_chunks].length_dw = sizeof(struct drm_amdgpu_cs_chunk_cp_gfx_shadow) / 4;
chunks[num_chunks].chunk_data = (uintptr_t)&acs->mcbp_fw_shadow_chunk;
num_chunks++;
}
/* Fence */
if (has_user_fence) {
chunks[num_chunks].chunk_id = AMDGPU_CHUNK_ID_FENCE;
@@ -1674,6 +1681,9 @@ cleanup:
if (r || noop)
amdgpu_fence_signalled(cs->fence);
if (unlikely(ws->info.has_fw_based_shadowing && acs->mcbp_fw_shadow_chunk.flags && r == 0))
acs->mcbp_fw_shadow_chunk.flags = 0;
cs->error_code = r;
/* Only decrement num_active_ioctls for those buffers where we incremented it. */
@@ -1855,6 +1865,16 @@ static bool amdgpu_bo_is_referenced(struct radeon_cmdbuf *rcs,
return amdgpu_bo_is_referenced_by_cs_with_usage(cs, bo, usage);
}
static void amdgpu_cs_set_mcbp_reg_shadowing_va(struct radeon_cmdbuf *rcs,uint64_t regs_va,
uint64_t csa_va)
{
struct amdgpu_cs *cs = amdgpu_cs(rcs);
cs->mcbp_fw_shadow_chunk.shadow_va = regs_va;
cs->mcbp_fw_shadow_chunk.csa_va = csa_va;
cs->mcbp_fw_shadow_chunk.gds_va = 0;
cs->mcbp_fw_shadow_chunk.flags = AMDGPU_CS_CHUNK_CP_GFX_SHADOW_FLAGS_INIT_SHADOW;
}
void amdgpu_cs_init_functions(struct amdgpu_screen_winsys *ws)
{
ws->base.ctx_create = amdgpu_ctx_create;
@@ -1880,4 +1900,7 @@ void amdgpu_cs_init_functions(struct amdgpu_screen_winsys *ws)
ws->base.fence_import_sync_file = amdgpu_fence_import_sync_file;
ws->base.fence_export_sync_file = amdgpu_fence_export_sync_file;
ws->base.export_signalled_sync_file = amdgpu_export_signalled_sync_file;
if (ws->aws->info.has_fw_based_shadowing)
ws->base.cs_set_mcbp_reg_shadowing_va = amdgpu_cs_set_mcbp_reg_shadowing_va;
}

View File

@@ -161,6 +161,8 @@ struct amdgpu_cs {
struct util_queue_fence flush_completed;
struct pipe_fence_handle *next_fence;
struct pb_buffer *preamble_ib_bo;
struct drm_amdgpu_cs_chunk_cp_gfx_shadow mcbp_fw_shadow_chunk;
};
struct amdgpu_fence {