radv: move emitting GE_CNTL for non-NGG pipelines from the cmdbuf

GE_CNTL is the equivalent of IA_MULTI_VGT_PARAM on GFX9 and older.
Calling this function for every draw shouldn't really hurt in practice
because only non-NGG pipelines need this.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18344>
This commit is contained in:
Samuel Pitoiset
2022-08-29 18:35:52 +02:00
committed by Marge Bot
parent 0bf822144f
commit 76960e2d93
3 changed files with 46 additions and 36 deletions

View File

@@ -4010,6 +4010,44 @@ si_emit_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer, bool instanced_dr
} }
} }
static void
gfx10_emit_ge_cntl(struct radv_cmd_buffer *cmd_buffer)
{
const struct radv_graphics_pipeline *pipeline = cmd_buffer->state.graphics_pipeline;
struct radv_cmd_state *state = &cmd_buffer->state;
bool break_wave_at_eoi = false;
unsigned primgroup_size;
unsigned ge_cntl;
if (pipeline->is_ngg)
return;
if (radv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_CTRL)) {
primgroup_size = pipeline->base.shaders[MESA_SHADER_TESS_CTRL]->info.num_tess_patches;
if (pipeline->base.shaders[MESA_SHADER_TESS_CTRL]->info.uses_prim_id ||
radv_get_shader(&pipeline->base, MESA_SHADER_TESS_EVAL)->info.uses_prim_id) {
break_wave_at_eoi = true;
}
} else if (radv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY)) {
const struct gfx9_gs_info *gs_state =
&pipeline->base.shaders[MESA_SHADER_GEOMETRY]->info.gs_ring_info;
primgroup_size = G_028A44_GS_PRIMS_PER_SUBGRP(gs_state->vgt_gs_onchip_cntl);
} else {
primgroup_size = 128; /* recommended without a GS and tess */
}
ge_cntl = S_03096C_PRIM_GRP_SIZE_GFX10(primgroup_size) |
S_03096C_VERT_GRP_SIZE(256) | /* disable vertex grouping */
S_03096C_PACKET_TO_ONE_PA(0) /* line stipple */ |
S_03096C_BREAK_WAVE_AT_EOI(break_wave_at_eoi);
if (state->last_ge_cntl != ge_cntl) {
radeon_set_uconfig_reg(cmd_buffer->cs, R_03096C_GE_CNTL, ge_cntl);
state->last_ge_cntl = ge_cntl;
}
}
static void static void
radv_emit_draw_registers(struct radv_cmd_buffer *cmd_buffer, const struct radv_draw_info *draw_info) radv_emit_draw_registers(struct radv_cmd_buffer *cmd_buffer, const struct radv_draw_info *draw_info)
{ {
@@ -4020,7 +4058,9 @@ radv_emit_draw_registers(struct radv_cmd_buffer *cmd_buffer, const struct radv_d
bool disable_instance_packing = false; bool disable_instance_packing = false;
/* Draw state. */ /* Draw state. */
if (info->gfx_level < GFX10) { if (info->gfx_level >= GFX10) {
gfx10_emit_ge_cntl(cmd_buffer);
} else {
si_emit_ia_multi_vgt_param(cmd_buffer, draw_info->instance_count > 1, draw_info->indirect, si_emit_ia_multi_vgt_param(cmd_buffer, draw_info->instance_count > 1, draw_info->indirect,
!!draw_info->strmout_buffer, !!draw_info->strmout_buffer,
draw_info->indirect ? 0 : draw_info->count); draw_info->indirect ? 0 : draw_info->count);
@@ -5707,6 +5747,10 @@ radv_CmdExecuteCommands(VkCommandBuffer commandBuffer, uint32_t commandBufferCou
primary->state.last_ia_multi_vgt_param = secondary->state.last_ia_multi_vgt_param; primary->state.last_ia_multi_vgt_param = secondary->state.last_ia_multi_vgt_param;
} }
if (secondary->state.last_ge_cntl) {
primary->state.last_ge_cntl = secondary->state.last_ge_cntl;
}
primary->state.last_first_instance = secondary->state.last_first_instance; primary->state.last_first_instance = secondary->state.last_first_instance;
primary->state.last_num_instances = secondary->state.last_num_instances; primary->state.last_num_instances = secondary->state.last_num_instances;
primary->state.last_drawid = secondary->state.last_drawid; primary->state.last_drawid = secondary->state.last_drawid;

View File

@@ -5624,38 +5624,6 @@ radv_pipeline_emit_cliprect_rule(struct radeon_cmdbuf *ctx_cs,
radeon_set_context_reg(ctx_cs, R_02820C_PA_SC_CLIPRECT_RULE, cliprect_rule); radeon_set_context_reg(ctx_cs, R_02820C_PA_SC_CLIPRECT_RULE, cliprect_rule);
} }
static void
gfx10_pipeline_emit_ge_cntl(struct radeon_cmdbuf *ctx_cs,
const struct radv_graphics_pipeline *pipeline)
{
bool break_wave_at_eoi = false;
unsigned primgroup_size;
unsigned vertgroup_size = 256; /* 256 = disable vertex grouping */
if (radv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_CTRL)) {
primgroup_size = pipeline->base.shaders[MESA_SHADER_TESS_CTRL]->info.num_tess_patches;
} else if (radv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY)) {
const struct gfx9_gs_info *gs_state =
&pipeline->base.shaders[MESA_SHADER_GEOMETRY]->info.gs_ring_info;
unsigned vgt_gs_onchip_cntl = gs_state->vgt_gs_onchip_cntl;
primgroup_size = G_028A44_GS_PRIMS_PER_SUBGRP(vgt_gs_onchip_cntl);
} else {
primgroup_size = 128; /* recommended without a GS and tess */
}
if (radv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_CTRL)) {
if (pipeline->base.shaders[MESA_SHADER_TESS_CTRL]->info.uses_prim_id ||
radv_get_shader(&pipeline->base, MESA_SHADER_TESS_EVAL)->info.uses_prim_id)
break_wave_at_eoi = true;
}
radeon_set_uconfig_reg(ctx_cs, R_03096C_GE_CNTL,
S_03096C_PRIM_GRP_SIZE_GFX10(primgroup_size) |
S_03096C_VERT_GRP_SIZE(vertgroup_size) |
S_03096C_PACKET_TO_ONE_PA(0) /* line stipple */ |
S_03096C_BREAK_WAVE_AT_EOI(break_wave_at_eoi));
}
static void static void
radv_pipeline_emit_vgt_gs_out(struct radeon_cmdbuf *ctx_cs, radv_pipeline_emit_vgt_gs_out(struct radeon_cmdbuf *ctx_cs,
const struct radv_graphics_pipeline *pipeline, const struct radv_graphics_pipeline *pipeline,
@@ -5792,9 +5760,6 @@ radv_pipeline_emit_pm4(struct radv_graphics_pipeline *pipeline,
radv_pipeline_emit_cliprect_rule(ctx_cs, state); radv_pipeline_emit_cliprect_rule(ctx_cs, state);
radv_pipeline_emit_vgt_gs_out(ctx_cs, pipeline, vgt_gs_out_prim_type); radv_pipeline_emit_vgt_gs_out(ctx_cs, pipeline, vgt_gs_out_prim_type);
if (pdevice->rad_info.gfx_level >= GFX10 && !radv_pipeline_has_ngg(pipeline))
gfx10_pipeline_emit_ge_cntl(ctx_cs, pipeline);
if (pdevice->rad_info.gfx_level >= GFX10_3) { if (pdevice->rad_info.gfx_level >= GFX10_3) {
gfx103_pipeline_emit_vgt_draw_payload_cntl(ctx_cs, pipeline, state); gfx103_pipeline_emit_vgt_draw_payload_cntl(ctx_cs, pipeline, state);
gfx103_pipeline_emit_vrs_state(ctx_cs, pipeline, state); gfx103_pipeline_emit_vrs_state(ctx_cs, pipeline, state);

View File

@@ -1487,6 +1487,7 @@ struct radv_cmd_state {
bool prims_gen_query_enabled; bool prims_gen_query_enabled;
uint32_t trace_id; uint32_t trace_id;
uint32_t last_ia_multi_vgt_param; uint32_t last_ia_multi_vgt_param;
uint32_t last_ge_cntl;
uint32_t last_num_instances; uint32_t last_num_instances;
uint32_t last_first_instance; uint32_t last_first_instance;