diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index 5daaf77817a..aba9c4dbe90 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -1891,12 +1891,18 @@ radv_emit_tcs_epilog_state(struct radv_cmd_buffer *cmd_buffer, struct radv_shade { const enum amd_gfx_level gfx_level = cmd_buffer->device->physical_device->rad_info.gfx_level; struct radv_shader *tcs = cmd_buffer->state.shaders[MESA_SHADER_TESS_CTRL]; + uint32_t rsrc1; if (cmd_buffer->state.emitted_tcs_epilog == tcs_epilog) return; + if (tcs->info.merged_shader_compiled_separately) { + radv_shader_combine_cfg_vs_tcs(cmd_buffer->state.shaders[MESA_SHADER_VERTEX], tcs, &rsrc1, NULL); + } else { + rsrc1 = tcs->config.rsrc1; + } + assert(tcs->config.num_shared_vgprs == 0); - uint32_t rsrc1 = tcs->config.rsrc1; if (G_00B848_VGPRS(tcs_epilog->rsrc1) > G_00B848_VGPRS(rsrc1)) rsrc1 = (rsrc1 & C_00B848_VGPRS) | (tcs_epilog->rsrc1 & ~C_00B848_VGPRS); if (gfx_level < GFX10 && G_00B228_SGPRS(tcs_epilog->rsrc1) > G_00B228_SGPRS(rsrc1)) @@ -2628,7 +2634,13 @@ radv_emit_patch_control_points(struct radv_cmd_buffer *cmd_buffer) } if (pdevice->rad_info.gfx_level >= GFX9) { - unsigned hs_rsrc2 = tcs->config.rsrc2; + unsigned hs_rsrc2; + + if (tcs->info.merged_shader_compiled_separately) { + radv_shader_combine_cfg_vs_tcs(cmd_buffer->state.shaders[MESA_SHADER_VERTEX], tcs, NULL, &hs_rsrc2); + } else { + hs_rsrc2 = tcs->config.rsrc2; + } if (pdevice->rad_info.gfx_level >= GFX10) { hs_rsrc2 |= S_00B42C_LDS_SIZE_GFX10(cmd_buffer->state.tess_lds_size); @@ -3898,6 +3910,8 @@ static void emit_prolog_regs(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *vs_shader, const struct radv_shader_part *prolog) { + uint32_t rsrc1, rsrc2; + /* no need to re-emit anything in this case */ if (cmd_buffer->state.emitted_vs_prolog == prolog) return; @@ -3906,8 +3920,15 @@ emit_prolog_regs(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *v assert(cmd_buffer->state.emitted_graphics_pipeline == cmd_buffer->state.graphics_pipeline); - uint32_t rsrc1 = vs_shader->config.rsrc1; - if (chip < GFX10 && G_00B228_SGPRS(prolog->rsrc1) > G_00B228_SGPRS(vs_shader->config.rsrc1)) + if (vs_shader->info.merged_shader_compiled_separately) { + assert(vs_shader->info.next_stage == MESA_SHADER_TESS_CTRL); + + radv_shader_combine_cfg_vs_tcs(vs_shader, cmd_buffer->state.shaders[MESA_SHADER_TESS_CTRL], &rsrc1, &rsrc2); + } else { + rsrc1 = vs_shader->config.rsrc1; + } + + if (chip < GFX10 && G_00B228_SGPRS(prolog->rsrc1) > G_00B228_SGPRS(rsrc1)) rsrc1 = (rsrc1 & C_00B228_SGPRS) | (prolog->rsrc1 & ~C_00B228_SGPRS); /* The main shader must not use less VGPRs than the prolog, otherwise shared vgprs might not @@ -3936,10 +3957,15 @@ emit_prolog_regs(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *v radeon_set_sh_reg(cmd_buffer->cs, pgm_lo_reg, prolog->va >> 8); - if (chip < GFX10) + if (chip < GFX10) { radeon_set_sh_reg(cmd_buffer->cs, rsrc1_reg, rsrc1); - else + + if (vs_shader->info.merged_shader_compiled_separately) { + radeon_set_sh_reg(cmd_buffer->cs, rsrc1_reg + 4, rsrc2); + } + } else { assert(rsrc1 == vs_shader->config.rsrc1); + } radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, prolog->bo); } @@ -9048,7 +9074,7 @@ radv_emit_shaders(struct radv_cmd_buffer *cmd_buffer) const gl_shader_stage last_vgt_api_stage = radv_cmdbuf_get_last_vgt_api_stage(cmd_buffer); const struct radv_shader *last_vgt_shader = cmd_buffer->state.shaders[last_vgt_api_stage]; const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; - const struct radv_device *device = cmd_buffer->device; + struct radv_device *device = cmd_buffer->device; struct radeon_cmdbuf *cs = cmd_buffer->cs; if (cmd_buffer->state.graphics_pipeline) @@ -9060,9 +9086,17 @@ radv_emit_shaders(struct radv_cmd_buffer *cmd_buffer) struct radv_shader_object *shader_obj = cmd_buffer->state.shader_objs[s]; switch (s) { - case MESA_SHADER_VERTEX: - radv_emit_vertex_shader(device, cs, cs, cmd_buffer->state.shaders[MESA_SHADER_VERTEX]); + case MESA_SHADER_VERTEX: { + const struct radv_shader *vs = cmd_buffer->state.shaders[MESA_SHADER_VERTEX]; + struct radv_shader *next_stage = NULL; + + if (vs->info.merged_shader_compiled_separately) { + next_stage = cmd_buffer->state.shaders[MESA_SHADER_TESS_CTRL]; + } + + radv_emit_vertex_shader(device, cs, cs, vs, next_stage); break; + } case MESA_SHADER_TESS_CTRL: radv_emit_tess_ctrl_shader(device, cs, cmd_buffer->state.shaders[MESA_SHADER_TESS_CTRL]); break; diff --git a/src/amd/vulkan/radv_pipeline_graphics.c b/src/amd/vulkan/radv_pipeline_graphics.c index 4ad3ed06bfd..be2a6c93be9 100644 --- a/src/amd/vulkan/radv_pipeline_graphics.c +++ b/src/amd/vulkan/radv_pipeline_graphics.c @@ -3113,8 +3113,33 @@ radv_emit_hw_hs(const struct radv_device *device, struct radeon_cmdbuf *cs, cons void radv_emit_vertex_shader(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs, - const struct radv_shader *vs) + const struct radv_shader *vs, const struct radv_shader *next_stage) { + if (vs->info.merged_shader_compiled_separately) { + assert(vs->info.next_stage == MESA_SHADER_TESS_CTRL); + const struct radv_userdata_info *loc = &vs->info.user_sgprs_locs.shader_data[AC_UD_NEXT_STAGE_PC]; + const uint32_t base_reg = vs->info.user_data_0; + + assert(loc->sgpr_idx != -1 && loc->num_sgprs == 1); + + if (!vs->info.vs.has_prolog) { + uint32_t rsrc1; + + radv_shader_combine_cfg_vs_tcs(vs, next_stage, &rsrc1, NULL); + + if (device->physical_device->rad_info.gfx_level >= GFX10) { + radeon_set_sh_reg(cs, R_00B520_SPI_SHADER_PGM_LO_LS, vs->va >> 8); + } else { + radeon_set_sh_reg(cs, R_00B410_SPI_SHADER_PGM_LO_LS, vs->va >> 8); + } + + radeon_set_sh_reg(cs, R_00B428_SPI_SHADER_PGM_RSRC1_HS, rsrc1); + } + + radv_emit_shader_pointer(device, cs, base_reg + loc->sgpr_idx * 4, next_stage->va, false); + return; + } + if (vs->info.vs.as_ls) radv_emit_hw_ls(cs, vs); else if (vs->info.vs.as_es) @@ -3128,6 +3153,13 @@ radv_emit_vertex_shader(const struct radv_device *device, struct radeon_cmdbuf * void radv_emit_tess_ctrl_shader(const struct radv_device *device, struct radeon_cmdbuf *cs, const struct radv_shader *tcs) { + if (tcs->info.merged_shader_compiled_separately) { + /* When VS+TCS are compiled separately on GFX9+, the VS will jump to the TCS and everything is + * emitted as part of the VS. + */ + return; + } + radv_emit_hw_hs(device, cs, tcs); } @@ -3637,7 +3669,7 @@ radv_pipeline_emit_pm4(const struct radv_device *device, struct radv_graphics_pi radv_emit_vgt_gs_mode(device, ctx_cs, pipeline->base.shaders[pipeline->last_vgt_api_stage]); if (radv_pipeline_has_stage(pipeline, MESA_SHADER_VERTEX)) { - radv_emit_vertex_shader(device, ctx_cs, cs, pipeline->base.shaders[MESA_SHADER_VERTEX]); + radv_emit_vertex_shader(device, ctx_cs, cs, pipeline->base.shaders[MESA_SHADER_VERTEX], NULL); } if (radv_pipeline_has_stage(pipeline, MESA_SHADER_MESH)) { diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index 52af6b8bd06..122add82d26 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -2343,7 +2343,7 @@ bool radv_mem_vectorize_callback(unsigned align_mul, unsigned align_offset, unsi nir_intrinsic_instr *low, nir_intrinsic_instr *high, void *data); void radv_emit_vertex_shader(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs, - const struct radv_shader *vs); + const struct radv_shader *vs, const struct radv_shader *next_stage); void radv_emit_tess_ctrl_shader(const struct radv_device *device, struct radeon_cmdbuf *cs, const struct radv_shader *tcs); diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index 9026bd2a966..045a1523cae 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -1797,6 +1797,33 @@ radv_postprocess_binary_config(struct radv_device *device, struct radv_shader_bi return true; } +void +radv_shader_combine_cfg_vs_tcs(const struct radv_shader *vs, const struct radv_shader *tcs, uint32_t *rsrc1_out, + uint32_t *rsrc2_out) +{ + if (rsrc1_out) { + uint32_t rsrc1 = vs->config.rsrc1; + + if (G_00B848_VGPRS(tcs->config.rsrc1) > G_00B848_VGPRS(rsrc1)) + rsrc1 = (rsrc1 & C_00B848_VGPRS) | (tcs->config.rsrc1 & ~C_00B848_VGPRS); + if (G_00B228_SGPRS(tcs->config.rsrc1) > G_00B228_SGPRS(rsrc1)) + rsrc1 = (rsrc1 & C_00B228_SGPRS) | (tcs->config.rsrc1 & ~C_00B228_SGPRS); + if (G_00B428_LS_VGPR_COMP_CNT(tcs->config.rsrc1) > G_00B428_LS_VGPR_COMP_CNT(rsrc1)) + rsrc1 = (rsrc1 & C_00B428_LS_VGPR_COMP_CNT) | (tcs->config.rsrc1 & ~C_00B428_LS_VGPR_COMP_CNT); + + *rsrc1_out = rsrc1; + } + + if (rsrc2_out) { + uint32_t rsrc2 = vs->config.rsrc2; + + if (G_00B12C_SCRATCH_EN(tcs->config.rsrc2) > G_00B12C_SCRATCH_EN(rsrc2)) + rsrc2 = (rsrc2 & C_00B12C_SCRATCH_EN) | (tcs->config.rsrc2 & ~C_00B12C_SCRATCH_EN); + + *rsrc2_out = rsrc2; + } +} + static bool radv_shader_binary_upload(struct radv_device *device, const struct radv_shader_binary *binary, struct radv_shader *shader, void *dest_ptr) diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h index 8816fe83c64..6c705f5375f 100644 --- a/src/amd/vulkan/radv_shader.h +++ b/src/amd/vulkan/radv_shader.h @@ -1056,4 +1056,7 @@ void radv_nir_shader_info_init(gl_shader_stage stage, gl_shader_stage next_stage void radv_nir_shader_info_link(struct radv_device *device, const struct radv_graphics_state_key *gfx_state, struct radv_shader_stage *stages); +void radv_shader_combine_cfg_vs_tcs(const struct radv_shader *vs, const struct radv_shader *tcs, uint32_t *rsrc1_out, + uint32_t *rsrc2_out); + #endif