radeonsi: replace llvm based fixed tcs with nir
Create nir passthrough shader with explicit input/output and vertex output count so that it can be handled by compiler same as user tcs. The drawback is we create more si_shader_selector with different input/output and vertex output count which was handled by compiler backend before. As fixed function tcs can be handled like user tcs, we don't need the dedicated fixed_func_tcs_shader state either. Reviewed-by: Marek Olšák <marek.olsak@amd.com> Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Signed-off-by: Qiang Yu <yuq825@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16705>
This commit is contained in:
@@ -1036,19 +1036,13 @@ static void si_dump_debug_state(struct pipe_context *ctx, FILE *f, unsigned flag
|
||||
|
||||
void si_log_draw_state(struct si_context *sctx, struct u_log_context *log)
|
||||
{
|
||||
struct si_shader_ctx_state *tcs_shader;
|
||||
|
||||
if (!log)
|
||||
return;
|
||||
|
||||
tcs_shader = &sctx->shader.tcs;
|
||||
if (sctx->shader.tes.cso && !sctx->shader.tcs.cso)
|
||||
tcs_shader = &sctx->fixed_func_tcs_shader;
|
||||
|
||||
si_dump_framebuffer(sctx, log);
|
||||
|
||||
si_dump_gfx_shader(sctx, &sctx->shader.vs, log);
|
||||
si_dump_gfx_shader(sctx, tcs_shader, log);
|
||||
si_dump_gfx_shader(sctx, &sctx->shader.tcs, log);
|
||||
si_dump_gfx_shader(sctx, &sctx->shader.tes, log);
|
||||
si_dump_gfx_shader(sctx, &sctx->shader.gs, log);
|
||||
si_dump_gfx_shader(sctx, &sctx->shader.ps, log);
|
||||
@@ -1057,7 +1051,7 @@ void si_log_draw_state(struct si_context *sctx, struct u_log_context *log)
|
||||
4, sctx->descriptors[SI_DESCS_INTERNAL].num_active_slots, si_identity,
|
||||
log);
|
||||
si_dump_gfx_descriptors(sctx, &sctx->shader.vs, log);
|
||||
si_dump_gfx_descriptors(sctx, tcs_shader, log);
|
||||
si_dump_gfx_descriptors(sctx, &sctx->shader.tcs, log);
|
||||
si_dump_gfx_descriptors(sctx, &sctx->shader.tes, log);
|
||||
si_dump_gfx_descriptors(sctx, &sctx->shader.gs, log);
|
||||
si_dump_gfx_descriptors(sctx, &sctx->shader.ps, log);
|
||||
|
@@ -228,8 +228,13 @@ static void si_destroy_context(struct pipe_context *context)
|
||||
for (i = 0; i < ARRAY_SIZE(sctx->vgt_shader_config); i++)
|
||||
si_pm4_free_state(sctx, sctx->vgt_shader_config[i], SI_STATE_IDX(vgt_shader_config));
|
||||
|
||||
if (sctx->fixed_func_tcs_shader.cso)
|
||||
sctx->b.delete_tcs_state(&sctx->b, sctx->fixed_func_tcs_shader.cso);
|
||||
if (sctx->fixed_func_tcs_shader_cache) {
|
||||
hash_table_foreach(sctx->fixed_func_tcs_shader_cache, entry) {
|
||||
sctx->b.delete_tcs_state(&sctx->b, entry->data);
|
||||
}
|
||||
_mesa_hash_table_destroy(sctx->fixed_func_tcs_shader_cache, NULL);
|
||||
}
|
||||
|
||||
if (sctx->custom_dsa_flush)
|
||||
sctx->b.delete_depth_stencil_alpha_state(&sctx->b, sctx->custom_dsa_flush);
|
||||
if (sctx->custom_blend_resolve)
|
||||
|
@@ -999,7 +999,7 @@ struct si_context {
|
||||
struct si_screen *screen;
|
||||
struct util_debug_callback debug;
|
||||
struct ac_llvm_compiler compiler; /* only non-threaded compilation */
|
||||
struct si_shader_ctx_state fixed_func_tcs_shader;
|
||||
struct hash_table *fixed_func_tcs_shader_cache;
|
||||
struct si_resource *wait_mem_scratch;
|
||||
struct si_resource *wait_mem_scratch_tmz;
|
||||
unsigned wait_mem_number;
|
||||
@@ -1076,6 +1076,8 @@ struct si_context {
|
||||
struct si_shader_ctx_state shaders[SI_NUM_GRAPHICS_SHADERS];
|
||||
};
|
||||
struct si_cs_shader_state cs_shader_state;
|
||||
/* if current tcs set by user */
|
||||
bool is_user_tcs;
|
||||
|
||||
/* shader information */
|
||||
uint64_t ps_inputs_read_or_disabled;
|
||||
@@ -1557,7 +1559,6 @@ void *si_create_passthrough_tcs(struct si_context *sctx);
|
||||
/* si_shaderlib_tgsi.c */
|
||||
void *si_get_blitter_vs(struct si_context *sctx, enum blitter_attrib_type type,
|
||||
unsigned num_layers);
|
||||
void *si_create_fixed_func_tcs(struct si_context *sctx);
|
||||
void *si_create_dma_compute_shader(struct pipe_context *ctx, unsigned num_dwords_per_thread,
|
||||
bool dst_stream_cache_policy, bool is_copy);
|
||||
void *si_create_clear_buffer_rmw_cs(struct si_context *sctx);
|
||||
|
@@ -1239,8 +1239,6 @@ static void si_dump_shader_key(const struct si_shader *shader, FILE *f)
|
||||
si_dump_shader_key_vs(key, &key->ge.part.tcs.ls_prolog, "part.tcs.ls_prolog", f);
|
||||
}
|
||||
fprintf(f, " part.tcs.epilog.prim_mode = %u\n", key->ge.part.tcs.epilog.prim_mode);
|
||||
fprintf(f, " mono.u.ff_tcs_inputs_to_copy = 0x%" PRIx64 "\n",
|
||||
key->ge.mono.u.ff_tcs_inputs_to_copy);
|
||||
fprintf(f, " opt.prefer_mono = %u\n", key->ge.opt.prefer_mono);
|
||||
fprintf(f, " opt.same_patch_vertices = %u\n", key->ge.opt.same_patch_vertices);
|
||||
break;
|
||||
@@ -2018,12 +2016,8 @@ void si_get_tcs_epilog_key(struct si_shader *shader, union si_shader_part_key *k
|
||||
key->tcs_epilog.wave32 = shader->wave_size == 32;
|
||||
key->tcs_epilog.states = shader->key.ge.part.tcs.epilog;
|
||||
|
||||
/* If output patches are wholly in one wave, we don't need a barrier.
|
||||
* The fixed-func TCS doesn't set tcs_vertices_out, but it won't use a barrier
|
||||
* anyway because tess levels are always defined in all invocations there.
|
||||
*/
|
||||
/* If output patches are wholly in one wave, we don't need a barrier. */
|
||||
key->tcs_epilog.noop_s_barrier =
|
||||
shader->selector->info.base.tess.tcs_vertices_out &&
|
||||
shader->wave_size % shader->selector->info.base.tess.tcs_vertices_out == 0;
|
||||
}
|
||||
|
||||
|
@@ -686,7 +686,6 @@ struct si_shader_key_ge {
|
||||
union si_vs_fix_fetch vs_fix_fetch[SI_MAX_ATTRIBS];
|
||||
|
||||
union {
|
||||
uint64_t ff_tcs_inputs_to_copy; /* fixed-func TCS only */
|
||||
/* When PS needs PrimID and GS is disabled. */
|
||||
unsigned vs_export_prim_id : 1; /* VS and TES only */
|
||||
unsigned gs_tri_strip_adj_fix : 1; /* GS only */
|
||||
|
@@ -1023,12 +1023,9 @@ bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shader *shad
|
||||
/* If both input and output patches are wholly in one wave, we don't need a barrier.
|
||||
* That's true when both VS and TCS have the same number of patch vertices and
|
||||
* the wave size is a multiple of the number of patch vertices.
|
||||
*
|
||||
* The fixed-func TCS doesn't set tcs_vertices_out.
|
||||
*/
|
||||
if (!shader->key.ge.opt.same_patch_vertices ||
|
||||
(sel->info.base.tess.tcs_vertices_out &&
|
||||
ctx->ac.wave_size % sel->info.base.tess.tcs_vertices_out != 0))
|
||||
ctx->ac.wave_size % sel->info.base.tess.tcs_vertices_out != 0)
|
||||
ac_build_s_barrier(&ctx->ac, ctx->stage);
|
||||
}
|
||||
} else if (ctx->stage == MESA_SHADER_GEOMETRY && !shader->key.ge.as_ngg) {
|
||||
|
@@ -71,9 +71,6 @@ static unsigned get_tcs_out_vertex_dw_stride_constant(struct si_shader_context *
|
||||
{
|
||||
assert(ctx->stage == MESA_SHADER_TESS_CTRL);
|
||||
|
||||
if (ctx->shader->key.ge.mono.u.ff_tcs_inputs_to_copy)
|
||||
return util_last_bit64(ctx->shader->key.ge.mono.u.ff_tcs_inputs_to_copy) * 4;
|
||||
|
||||
return util_last_bit64(ctx->shader->selector->info.outputs_written) * 4;
|
||||
}
|
||||
|
||||
@@ -86,9 +83,6 @@ static LLVMValueRef get_tcs_out_vertex_dw_stride(struct si_shader_context *ctx)
|
||||
|
||||
static LLVMValueRef get_tcs_out_patch_stride(struct si_shader_context *ctx)
|
||||
{
|
||||
if (ctx->shader->key.ge.mono.u.ff_tcs_inputs_to_copy)
|
||||
return si_unpack_param(ctx, ctx->tcs_out_lds_layout, 0, 13);
|
||||
|
||||
const struct si_shader_info *info = &ctx->shader->selector->info;
|
||||
unsigned tcs_out_vertices = info->base.tess.tcs_vertices_out;
|
||||
unsigned vertex_dw_stride = get_tcs_out_vertex_dw_stride_constant(ctx);
|
||||
@@ -141,7 +135,7 @@ LLVMValueRef si_get_num_tcs_out_vertices(struct si_shader_context *ctx)
|
||||
ctx->shader->selector ? ctx->shader->selector->info.base.tess.tcs_vertices_out
|
||||
: 0;
|
||||
|
||||
/* If !tcs_out_vertices, it's either the fixed-func TCS or the TCS epilog. */
|
||||
/* If !tcs_out_vertices, it's the TCS epilog. */
|
||||
if (ctx->stage == MESA_SHADER_TESS_CTRL && tcs_out_vertices)
|
||||
return LLVMConstInt(ctx->ac.i32, tcs_out_vertices, 0);
|
||||
|
||||
@@ -550,41 +544,6 @@ static void si_nir_store_output_tcs(struct ac_shader_abi *abi,
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Forward all outputs from the vertex shader to the TES. This is only used
|
||||
* for the fixed function TCS.
|
||||
*/
|
||||
static void si_copy_tcs_inputs(struct si_shader_context *ctx)
|
||||
{
|
||||
LLVMValueRef invocation_id, buffer, buffer_offset;
|
||||
LLVMValueRef lds_vertex_stride, lds_base;
|
||||
uint64_t inputs;
|
||||
|
||||
invocation_id = si_unpack_param(ctx, ctx->args.tcs_rel_ids, 8, 5);
|
||||
buffer = get_tess_ring_descriptor(ctx, TESS_OFFCHIP_RING_TCS);
|
||||
buffer_offset = ac_get_arg(&ctx->ac, ctx->args.tess_offchip_offset);
|
||||
|
||||
lds_vertex_stride = si_get_tcs_in_vertex_dw_stride(ctx);
|
||||
lds_base = get_tcs_in_current_patch_offset(ctx);
|
||||
lds_base = ac_build_imad(&ctx->ac, invocation_id, lds_vertex_stride, lds_base);
|
||||
|
||||
inputs = ctx->shader->key.ge.mono.u.ff_tcs_inputs_to_copy;
|
||||
while (inputs) {
|
||||
unsigned i = u_bit_scan64(&inputs);
|
||||
|
||||
LLVMValueRef lds_ptr =
|
||||
LLVMBuildAdd(ctx->ac.builder, lds_base, LLVMConstInt(ctx->ac.i32, 4 * i, 0), "");
|
||||
|
||||
LLVMValueRef buffer_addr = get_tcs_tes_buffer_address(
|
||||
ctx, get_rel_patch_id(ctx), invocation_id, LLVMConstInt(ctx->ac.i32, i, 0));
|
||||
|
||||
LLVMValueRef value = lshs_lds_load(ctx, ctx->ac.i32, ~0, lds_ptr);
|
||||
|
||||
ac_build_buffer_store_dword(&ctx->ac, buffer, value, NULL, buffer_addr, buffer_offset,
|
||||
ac_glc);
|
||||
}
|
||||
}
|
||||
|
||||
static void si_write_tess_factors(struct si_shader_context *ctx, union si_shader_part_key *key,
|
||||
LLVMValueRef rel_patch_id, LLVMValueRef invocation_id,
|
||||
LLVMValueRef tcs_out_current_patch_data_offset,
|
||||
@@ -751,8 +710,6 @@ void si_llvm_tcs_build_end(struct si_shader_context *ctx)
|
||||
LLVMBuilderRef builder = ctx->ac.builder;
|
||||
LLVMValueRef rel_patch_id, invocation_id, tf_lds_offset;
|
||||
|
||||
si_copy_tcs_inputs(ctx);
|
||||
|
||||
rel_patch_id = get_rel_patch_id(ctx);
|
||||
invocation_id = si_unpack_param(ctx, ctx->args.tcs_rel_ids, 8, 5);
|
||||
tf_lds_offset = get_tcs_out_current_patch_data_offset(ctx);
|
||||
|
@@ -81,33 +81,6 @@ void *si_get_blitter_vs(struct si_context *sctx, enum blitter_attrib_type type,
|
||||
return *vs;
|
||||
}
|
||||
|
||||
/**
|
||||
* This is used when TCS is NULL in the VS->TCS->TES chain. In this case,
|
||||
* VS passes its outputs to TES directly, so the fixed-function shader only
|
||||
* has to write TESSOUTER and TESSINNER.
|
||||
*/
|
||||
void *si_create_fixed_func_tcs(struct si_context *sctx)
|
||||
{
|
||||
struct ureg_src outer, inner;
|
||||
struct ureg_dst tessouter, tessinner;
|
||||
struct ureg_program *ureg = ureg_create(PIPE_SHADER_TESS_CTRL);
|
||||
|
||||
if (!ureg)
|
||||
return NULL;
|
||||
|
||||
outer = ureg_DECL_system_value(ureg, TGSI_SEMANTIC_TESS_DEFAULT_OUTER_LEVEL, 0);
|
||||
inner = ureg_DECL_system_value(ureg, TGSI_SEMANTIC_TESS_DEFAULT_INNER_LEVEL, 0);
|
||||
|
||||
tessouter = ureg_DECL_output(ureg, TGSI_SEMANTIC_TESSOUTER, 0);
|
||||
tessinner = ureg_DECL_output(ureg, TGSI_SEMANTIC_TESSINNER, 0);
|
||||
|
||||
ureg_MOV(ureg, tessouter, outer);
|
||||
ureg_MOV(ureg, tessinner, inner);
|
||||
ureg_END(ureg);
|
||||
|
||||
return ureg_create_shader_and_destroy(ureg, &sctx->b);
|
||||
}
|
||||
|
||||
/* Create a compute shader implementing clear_buffer or copy_buffer. */
|
||||
void *si_create_dma_compute_shader(struct pipe_context *ctx, unsigned num_dwords_per_thread,
|
||||
bool dst_stream_cache_policy, bool is_copy)
|
||||
|
@@ -597,6 +597,7 @@ void si_init_tess_factor_ring(struct si_context *sctx);
|
||||
bool si_update_gs_ring_buffers(struct si_context *sctx);
|
||||
bool si_update_spi_tmpring_size(struct si_context *sctx, unsigned bytes);
|
||||
unsigned si_calc_inst_pref_size(struct si_shader *shader);
|
||||
bool si_set_tcs_to_fixed_func_shader(struct si_context *sctx);
|
||||
|
||||
/* si_state_draw.cpp */
|
||||
void si_cp_dma_prefetch(struct si_context *sctx, struct pipe_resource *buf,
|
||||
|
@@ -126,28 +126,16 @@ static bool si_update_shaders(struct si_context *sctx)
|
||||
return false;
|
||||
}
|
||||
|
||||
if (sctx->shader.tcs.cso) {
|
||||
r = si_shader_select(ctx, &sctx->shader.tcs);
|
||||
if (r)
|
||||
if (!sctx->is_user_tcs) {
|
||||
if (!si_set_tcs_to_fixed_func_shader(sctx))
|
||||
return false;
|
||||
si_pm4_bind_state(sctx, hs, sctx->shader.tcs.current);
|
||||
} else {
|
||||
if (!sctx->fixed_func_tcs_shader.cso) {
|
||||
sctx->fixed_func_tcs_shader.cso =
|
||||
(struct si_shader_selector*)si_create_fixed_func_tcs(sctx);
|
||||
if (!sctx->fixed_func_tcs_shader.cso)
|
||||
return false;
|
||||
|
||||
sctx->fixed_func_tcs_shader.key.ge.part.tcs.epilog.invoc0_tess_factors_are_def =
|
||||
sctx->fixed_func_tcs_shader.cso->info.tessfactors_are_def_in_all_invocs;
|
||||
}
|
||||
|
||||
r = si_shader_select(ctx, &sctx->fixed_func_tcs_shader);
|
||||
if (r)
|
||||
return false;
|
||||
si_pm4_bind_state(sctx, hs, sctx->fixed_func_tcs_shader.current);
|
||||
}
|
||||
|
||||
r = si_shader_select(ctx, &sctx->shader.tcs);
|
||||
if (r)
|
||||
return false;
|
||||
si_pm4_bind_state(sctx, hs, sctx->shader.tcs.current);
|
||||
|
||||
if (!HAS_GS || GFX_VERSION <= GFX8) {
|
||||
r = si_shader_select(ctx, &sctx->shader.tes);
|
||||
if (r)
|
||||
@@ -164,6 +152,12 @@ static bool si_update_shaders(struct si_context *sctx)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
/* Reset TCS to clear fixed function shader. */
|
||||
if (!sctx->is_user_tcs && sctx->shader.tcs.cso) {
|
||||
sctx->shader.tcs.cso = NULL;
|
||||
sctx->shader.tcs.current = NULL;
|
||||
}
|
||||
|
||||
if (GFX_VERSION <= GFX8) {
|
||||
si_pm4_bind_state(sctx, ls, NULL);
|
||||
sctx->prefetch_L2_mask &= ~SI_PREFETCH_LS;
|
||||
@@ -626,10 +620,7 @@ static void si_emit_derived_tess_state(struct si_context *sctx, unsigned *num_pa
|
||||
{
|
||||
struct si_shader *ls_current;
|
||||
struct si_shader_selector *ls;
|
||||
/* The TES pointer will only be used for sctx->last_tcs.
|
||||
* It would be wrong to think that TCS = TES. */
|
||||
struct si_shader_selector *tcs =
|
||||
sctx->shader.tcs.cso ? sctx->shader.tcs.cso : sctx->shader.tes.cso;
|
||||
struct si_shader_selector *tcs = sctx->shader.tcs.cso;
|
||||
unsigned tess_uses_primid = sctx->ia_multi_vgt_param_key.u.tess_uses_prim_id;
|
||||
bool has_primid_instancing_bug = sctx->gfx_level == GFX6 && sctx->screen->info.max_se == 1;
|
||||
unsigned tes_sh_base = sctx->shader_pointers.sh_base[PIPE_SHADER_TESS_EVAL];
|
||||
@@ -637,11 +628,7 @@ static void si_emit_derived_tess_state(struct si_context *sctx, unsigned *num_pa
|
||||
|
||||
/* Since GFX9 has merged LS-HS in the TCS state, set LS = TCS. */
|
||||
if (sctx->gfx_level >= GFX9) {
|
||||
if (sctx->shader.tcs.cso)
|
||||
ls_current = sctx->shader.tcs.current;
|
||||
else
|
||||
ls_current = sctx->fixed_func_tcs_shader.current;
|
||||
|
||||
ls_current = sctx->shader.tcs.current;
|
||||
ls = ls_current->key.ge.part.tcs.ls;
|
||||
} else {
|
||||
ls_current = sctx->shader.vs.current;
|
||||
@@ -663,19 +650,9 @@ static void si_emit_derived_tess_state(struct si_context *sctx, unsigned *num_pa
|
||||
|
||||
/* This calculates how shader inputs and outputs among VS, TCS, and TES
|
||||
* are laid out in LDS. */
|
||||
unsigned num_tcs_inputs = util_last_bit64(ls->info.outputs_written);
|
||||
unsigned num_tcs_output_cp, num_tcs_outputs, num_tcs_patch_outputs;
|
||||
|
||||
if (sctx->shader.tcs.cso) {
|
||||
num_tcs_outputs = util_last_bit64(tcs->info.outputs_written);
|
||||
num_tcs_output_cp = tcs->info.base.tess.tcs_vertices_out;
|
||||
num_tcs_patch_outputs = util_last_bit64(tcs->info.patch_outputs_written);
|
||||
} else {
|
||||
/* No TCS. Route varyings from LS to TES. */
|
||||
num_tcs_outputs = num_tcs_inputs;
|
||||
num_tcs_output_cp = num_tcs_input_cp;
|
||||
num_tcs_patch_outputs = 2; /* TESSINNER + TESSOUTER */
|
||||
}
|
||||
unsigned num_tcs_outputs = util_last_bit64(tcs->info.outputs_written);
|
||||
unsigned num_tcs_output_cp = tcs->info.base.tess.tcs_vertices_out;
|
||||
unsigned num_tcs_patch_outputs = util_last_bit64(tcs->info.patch_outputs_written);
|
||||
|
||||
unsigned input_vertex_size = ls->info.lshs_vertex_stride;
|
||||
unsigned output_vertex_size = num_tcs_outputs * 16;
|
||||
@@ -2223,34 +2200,44 @@ static void si_draw(struct pipe_context *ctx,
|
||||
si_need_gfx_cs_space(sctx, num_draws);
|
||||
|
||||
if (HAS_TESS) {
|
||||
struct si_shader_selector *tcs = sctx->shader.tcs.cso;
|
||||
if (sctx->is_user_tcs) {
|
||||
struct si_shader_selector *tcs = sctx->shader.tcs.cso;
|
||||
|
||||
/* The rarely occuring tcs == NULL case is not optimized. */
|
||||
bool same_patch_vertices =
|
||||
GFX_VERSION >= GFX9 &&
|
||||
tcs && sctx->patch_vertices == tcs->info.base.tess.tcs_vertices_out;
|
||||
bool same_patch_vertices =
|
||||
GFX_VERSION >= GFX9 &&
|
||||
sctx->patch_vertices == tcs->info.base.tess.tcs_vertices_out;
|
||||
|
||||
if (sctx->shader.tcs.key.ge.opt.same_patch_vertices != same_patch_vertices) {
|
||||
sctx->shader.tcs.key.ge.opt.same_patch_vertices = same_patch_vertices;
|
||||
sctx->do_update_shaders = true;
|
||||
}
|
||||
|
||||
if (GFX_VERSION == GFX9 && sctx->screen->info.has_ls_vgpr_init_bug) {
|
||||
/* Determine whether the LS VGPR fix should be applied.
|
||||
*
|
||||
* It is only required when num input CPs > num output CPs,
|
||||
* which cannot happen with the fixed function TCS. We should
|
||||
* also update this bit when switching from TCS to fixed
|
||||
* function TCS.
|
||||
*/
|
||||
bool ls_vgpr_fix =
|
||||
tcs && sctx->patch_vertices > tcs->info.base.tess.tcs_vertices_out;
|
||||
|
||||
if (ls_vgpr_fix != sctx->shader.tcs.key.ge.part.tcs.ls_prolog.ls_vgpr_fix) {
|
||||
sctx->shader.tcs.key.ge.part.tcs.ls_prolog.ls_vgpr_fix = ls_vgpr_fix;
|
||||
sctx->fixed_func_tcs_shader.key.ge.part.tcs.ls_prolog.ls_vgpr_fix = ls_vgpr_fix;
|
||||
if (sctx->shader.tcs.key.ge.opt.same_patch_vertices != same_patch_vertices) {
|
||||
sctx->shader.tcs.key.ge.opt.same_patch_vertices = same_patch_vertices;
|
||||
sctx->do_update_shaders = true;
|
||||
}
|
||||
|
||||
if (GFX_VERSION == GFX9 && sctx->screen->info.has_ls_vgpr_init_bug) {
|
||||
/* Determine whether the LS VGPR fix should be applied.
|
||||
*
|
||||
* It is only required when num input CPs > num output CPs,
|
||||
* which cannot happen with the fixed function TCS.
|
||||
*/
|
||||
bool ls_vgpr_fix =
|
||||
sctx->patch_vertices > tcs->info.base.tess.tcs_vertices_out;
|
||||
|
||||
if (ls_vgpr_fix != sctx->shader.tcs.key.ge.part.tcs.ls_prolog.ls_vgpr_fix) {
|
||||
sctx->shader.tcs.key.ge.part.tcs.ls_prolog.ls_vgpr_fix = ls_vgpr_fix;
|
||||
sctx->do_update_shaders = true;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
/* These fields are static for fixed function TCS. So no need to set
|
||||
* do_update_shaders between fixed-TCS draws. As fixed-TCS to user-TCS
|
||||
* or opposite, do_update_shaders should already be set by bind state.
|
||||
*/
|
||||
sctx->shader.tcs.key.ge.opt.same_patch_vertices = GFX_VERSION >= GFX9;
|
||||
sctx->shader.tcs.key.ge.part.tcs.ls_prolog.ls_vgpr_fix = false;
|
||||
|
||||
/* User may only change patch vertices, needs to update fixed func TCS. */
|
||||
if (sctx->shader.tcs.cso &&
|
||||
sctx->shader.tcs.cso->info.base.tess.tcs_vertices_out != sctx->patch_vertices)
|
||||
sctx->do_update_shaders = true;
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -3373,7 +3373,6 @@ static void si_bind_vs_shader(struct pipe_context *ctx, void *state)
|
||||
sctx->shader.vs.current = (sel && sel->variants_count) ? sel->variants[0] : NULL;
|
||||
sctx->num_vs_blit_sgprs = sel ? sel->info.base.vs.blit_sgprs_amd : 0;
|
||||
sctx->vs_uses_draw_id = sel ? sel->info.uses_drawid : false;
|
||||
sctx->fixed_func_tcs_shader.key.ge.mono.u.ff_tcs_inputs_to_copy = sel ? sel->info.outputs_written : 0;
|
||||
|
||||
if (si_update_ngg(sctx))
|
||||
si_shader_change_notify(sctx);
|
||||
@@ -3486,6 +3485,11 @@ static void si_bind_tcs_shader(struct pipe_context *ctx, void *state)
|
||||
struct si_shader_selector *sel = (struct si_shader_selector*)state;
|
||||
bool enable_changed = !!sctx->shader.tcs.cso != !!sel;
|
||||
|
||||
/* Note it could happen that user shader sel is same as fixed function shader,
|
||||
* so we should update this field even sctx->shader.tcs.cso == sel.
|
||||
*/
|
||||
sctx->is_user_tcs = !!sel;
|
||||
|
||||
if (sctx->shader.tcs.cso == sel)
|
||||
return;
|
||||
|
||||
@@ -3518,11 +3522,9 @@ static void si_bind_tes_shader(struct pipe_context *ctx, void *state)
|
||||
si_update_tess_uses_prim_id(sctx);
|
||||
|
||||
sctx->shader.tcs.key.ge.part.tcs.epilog.prim_mode =
|
||||
sctx->fixed_func_tcs_shader.key.ge.part.tcs.epilog.prim_mode =
|
||||
sel ? sel->info.base.tess._primitive_mode : 0;
|
||||
|
||||
sctx->shader.tcs.key.ge.part.tcs.epilog.tes_reads_tess_factors =
|
||||
sctx->fixed_func_tcs_shader.key.ge.part.tcs.epilog.tes_reads_tess_factors =
|
||||
sel ? sel->info.reads_tess_factors : 0;
|
||||
|
||||
si_update_common_shader_state(sctx, sel, PIPE_SHADER_TESS_EVAL);
|
||||
@@ -3976,17 +3978,8 @@ static int si_update_scratch_buffer(struct si_context *sctx, struct si_shader *s
|
||||
return 1;
|
||||
}
|
||||
|
||||
static struct si_shader *si_get_tcs_current(struct si_context *sctx)
|
||||
{
|
||||
if (!sctx->shader.tes.cso)
|
||||
return NULL; /* tessellation disabled */
|
||||
|
||||
return sctx->shader.tcs.cso ? sctx->shader.tcs.current : sctx->fixed_func_tcs_shader.current;
|
||||
}
|
||||
|
||||
static bool si_update_scratch_relocs(struct si_context *sctx)
|
||||
{
|
||||
struct si_shader *tcs = si_get_tcs_current(sctx);
|
||||
int r;
|
||||
|
||||
/* Update the shaders, so that they are using the latest scratch.
|
||||
@@ -4006,11 +3999,11 @@ static bool si_update_scratch_relocs(struct si_context *sctx)
|
||||
if (r == 1)
|
||||
si_pm4_bind_state(sctx, gs, sctx->shader.gs.current);
|
||||
|
||||
r = si_update_scratch_buffer(sctx, tcs);
|
||||
r = si_update_scratch_buffer(sctx, sctx->shader.tcs.current);
|
||||
if (r < 0)
|
||||
return false;
|
||||
if (r == 1)
|
||||
si_pm4_bind_state(sctx, hs, tcs);
|
||||
si_pm4_bind_state(sctx, hs, sctx->shader.tcs.current);
|
||||
|
||||
/* VS can be bound as LS, ES, or VS. */
|
||||
r = si_update_scratch_buffer(sctx, sctx->shader.vs.current);
|
||||
@@ -4251,6 +4244,53 @@ static void si_emit_scratch_state(struct si_context *sctx)
|
||||
}
|
||||
}
|
||||
|
||||
struct si_fixed_func_tcs_shader_key {
|
||||
uint64_t outputs_written;
|
||||
uint8_t vertices_out;
|
||||
};
|
||||
|
||||
static uint32_t si_fixed_func_tcs_shader_key_hash(const void *key)
|
||||
{
|
||||
return _mesa_hash_data(key, sizeof(struct si_fixed_func_tcs_shader_key));
|
||||
}
|
||||
|
||||
static bool si_fixed_func_tcs_shader_key_equals(const void *a, const void *b)
|
||||
{
|
||||
return memcmp(a, b, sizeof(struct si_fixed_func_tcs_shader_key)) == 0;
|
||||
}
|
||||
|
||||
bool si_set_tcs_to_fixed_func_shader(struct si_context *sctx)
|
||||
{
|
||||
if (!sctx->fixed_func_tcs_shader_cache) {
|
||||
sctx->fixed_func_tcs_shader_cache = _mesa_hash_table_create(
|
||||
NULL, si_fixed_func_tcs_shader_key_hash,
|
||||
si_fixed_func_tcs_shader_key_equals);
|
||||
}
|
||||
|
||||
struct si_fixed_func_tcs_shader_key key;
|
||||
key.outputs_written = sctx->shader.vs.cso->info.outputs_written;
|
||||
key.vertices_out = sctx->patch_vertices;
|
||||
|
||||
struct hash_entry *entry = _mesa_hash_table_search(
|
||||
sctx->fixed_func_tcs_shader_cache, &key);
|
||||
|
||||
struct si_shader_selector *tcs;
|
||||
if (entry)
|
||||
tcs = (struct si_shader_selector *)entry->data;
|
||||
else {
|
||||
tcs = (struct si_shader_selector *)si_create_passthrough_tcs(sctx);
|
||||
if (!tcs)
|
||||
return false;
|
||||
_mesa_hash_table_insert(sctx->fixed_func_tcs_shader_cache, &key, (void *)tcs);
|
||||
}
|
||||
|
||||
sctx->shader.tcs.cso = tcs;
|
||||
sctx->shader.tcs.key.ge.part.tcs.epilog.invoc0_tess_factors_are_def =
|
||||
tcs->info.tessfactors_are_def_in_all_invocs;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void si_init_screen_live_shader_cache(struct si_screen *sscreen)
|
||||
{
|
||||
util_live_shader_cache_init(&sscreen->live_shader_cache, si_create_shader_selector,
|
||||
|
Reference in New Issue
Block a user