diff --git a/src/gallium/drivers/radeonsi/si_debug.c b/src/gallium/drivers/radeonsi/si_debug.c index 85f5667f2a7..d1deb09cca6 100644 --- a/src/gallium/drivers/radeonsi/si_debug.c +++ b/src/gallium/drivers/radeonsi/si_debug.c @@ -1036,19 +1036,13 @@ static void si_dump_debug_state(struct pipe_context *ctx, FILE *f, unsigned flag void si_log_draw_state(struct si_context *sctx, struct u_log_context *log) { - struct si_shader_ctx_state *tcs_shader; - if (!log) return; - tcs_shader = &sctx->shader.tcs; - if (sctx->shader.tes.cso && !sctx->shader.tcs.cso) - tcs_shader = &sctx->fixed_func_tcs_shader; - si_dump_framebuffer(sctx, log); si_dump_gfx_shader(sctx, &sctx->shader.vs, log); - si_dump_gfx_shader(sctx, tcs_shader, log); + si_dump_gfx_shader(sctx, &sctx->shader.tcs, log); si_dump_gfx_shader(sctx, &sctx->shader.tes, log); si_dump_gfx_shader(sctx, &sctx->shader.gs, log); si_dump_gfx_shader(sctx, &sctx->shader.ps, log); @@ -1057,7 +1051,7 @@ void si_log_draw_state(struct si_context *sctx, struct u_log_context *log) 4, sctx->descriptors[SI_DESCS_INTERNAL].num_active_slots, si_identity, log); si_dump_gfx_descriptors(sctx, &sctx->shader.vs, log); - si_dump_gfx_descriptors(sctx, tcs_shader, log); + si_dump_gfx_descriptors(sctx, &sctx->shader.tcs, log); si_dump_gfx_descriptors(sctx, &sctx->shader.tes, log); si_dump_gfx_descriptors(sctx, &sctx->shader.gs, log); si_dump_gfx_descriptors(sctx, &sctx->shader.ps, log); diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 659223fb888..c8fc1bdcc3d 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -228,8 +228,13 @@ static void si_destroy_context(struct pipe_context *context) for (i = 0; i < ARRAY_SIZE(sctx->vgt_shader_config); i++) si_pm4_free_state(sctx, sctx->vgt_shader_config[i], SI_STATE_IDX(vgt_shader_config)); - if (sctx->fixed_func_tcs_shader.cso) - sctx->b.delete_tcs_state(&sctx->b, sctx->fixed_func_tcs_shader.cso); + if (sctx->fixed_func_tcs_shader_cache) { + hash_table_foreach(sctx->fixed_func_tcs_shader_cache, entry) { + sctx->b.delete_tcs_state(&sctx->b, entry->data); + } + _mesa_hash_table_destroy(sctx->fixed_func_tcs_shader_cache, NULL); + } + if (sctx->custom_dsa_flush) sctx->b.delete_depth_stencil_alpha_state(&sctx->b, sctx->custom_dsa_flush); if (sctx->custom_blend_resolve) diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 80ec9ae4a79..80e03678366 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -999,7 +999,7 @@ struct si_context { struct si_screen *screen; struct util_debug_callback debug; struct ac_llvm_compiler compiler; /* only non-threaded compilation */ - struct si_shader_ctx_state fixed_func_tcs_shader; + struct hash_table *fixed_func_tcs_shader_cache; struct si_resource *wait_mem_scratch; struct si_resource *wait_mem_scratch_tmz; unsigned wait_mem_number; @@ -1076,6 +1076,8 @@ struct si_context { struct si_shader_ctx_state shaders[SI_NUM_GRAPHICS_SHADERS]; }; struct si_cs_shader_state cs_shader_state; + /* if current tcs set by user */ + bool is_user_tcs; /* shader information */ uint64_t ps_inputs_read_or_disabled; @@ -1557,7 +1559,6 @@ void *si_create_passthrough_tcs(struct si_context *sctx); /* si_shaderlib_tgsi.c */ void *si_get_blitter_vs(struct si_context *sctx, enum blitter_attrib_type type, unsigned num_layers); -void *si_create_fixed_func_tcs(struct si_context *sctx); void *si_create_dma_compute_shader(struct pipe_context *ctx, unsigned num_dwords_per_thread, bool dst_stream_cache_policy, bool is_copy); void *si_create_clear_buffer_rmw_cs(struct si_context *sctx); diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index a3f74e519f1..b65cec6b978 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -1239,8 +1239,6 @@ static void si_dump_shader_key(const struct si_shader *shader, FILE *f) si_dump_shader_key_vs(key, &key->ge.part.tcs.ls_prolog, "part.tcs.ls_prolog", f); } fprintf(f, " part.tcs.epilog.prim_mode = %u\n", key->ge.part.tcs.epilog.prim_mode); - fprintf(f, " mono.u.ff_tcs_inputs_to_copy = 0x%" PRIx64 "\n", - key->ge.mono.u.ff_tcs_inputs_to_copy); fprintf(f, " opt.prefer_mono = %u\n", key->ge.opt.prefer_mono); fprintf(f, " opt.same_patch_vertices = %u\n", key->ge.opt.same_patch_vertices); break; @@ -2018,12 +2016,8 @@ void si_get_tcs_epilog_key(struct si_shader *shader, union si_shader_part_key *k key->tcs_epilog.wave32 = shader->wave_size == 32; key->tcs_epilog.states = shader->key.ge.part.tcs.epilog; - /* If output patches are wholly in one wave, we don't need a barrier. - * The fixed-func TCS doesn't set tcs_vertices_out, but it won't use a barrier - * anyway because tess levels are always defined in all invocations there. - */ + /* If output patches are wholly in one wave, we don't need a barrier. */ key->tcs_epilog.noop_s_barrier = - shader->selector->info.base.tess.tcs_vertices_out && shader->wave_size % shader->selector->info.base.tess.tcs_vertices_out == 0; } diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 8c38792e6de..8bed98dbd82 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -686,7 +686,6 @@ struct si_shader_key_ge { union si_vs_fix_fetch vs_fix_fetch[SI_MAX_ATTRIBS]; union { - uint64_t ff_tcs_inputs_to_copy; /* fixed-func TCS only */ /* When PS needs PrimID and GS is disabled. */ unsigned vs_export_prim_id : 1; /* VS and TES only */ unsigned gs_tri_strip_adj_fix : 1; /* GS only */ diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm.c b/src/gallium/drivers/radeonsi/si_shader_llvm.c index 781352191e5..5e9b3d441ab 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm.c @@ -1023,12 +1023,9 @@ bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shader *shad /* If both input and output patches are wholly in one wave, we don't need a barrier. * That's true when both VS and TCS have the same number of patch vertices and * the wave size is a multiple of the number of patch vertices. - * - * The fixed-func TCS doesn't set tcs_vertices_out. */ if (!shader->key.ge.opt.same_patch_vertices || - (sel->info.base.tess.tcs_vertices_out && - ctx->ac.wave_size % sel->info.base.tess.tcs_vertices_out != 0)) + ctx->ac.wave_size % sel->info.base.tess.tcs_vertices_out != 0) ac_build_s_barrier(&ctx->ac, ctx->stage); } } else if (ctx->stage == MESA_SHADER_GEOMETRY && !shader->key.ge.as_ngg) { diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c b/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c index 67e896e1291..9ef5e241d50 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c @@ -71,9 +71,6 @@ static unsigned get_tcs_out_vertex_dw_stride_constant(struct si_shader_context * { assert(ctx->stage == MESA_SHADER_TESS_CTRL); - if (ctx->shader->key.ge.mono.u.ff_tcs_inputs_to_copy) - return util_last_bit64(ctx->shader->key.ge.mono.u.ff_tcs_inputs_to_copy) * 4; - return util_last_bit64(ctx->shader->selector->info.outputs_written) * 4; } @@ -86,9 +83,6 @@ static LLVMValueRef get_tcs_out_vertex_dw_stride(struct si_shader_context *ctx) static LLVMValueRef get_tcs_out_patch_stride(struct si_shader_context *ctx) { - if (ctx->shader->key.ge.mono.u.ff_tcs_inputs_to_copy) - return si_unpack_param(ctx, ctx->tcs_out_lds_layout, 0, 13); - const struct si_shader_info *info = &ctx->shader->selector->info; unsigned tcs_out_vertices = info->base.tess.tcs_vertices_out; unsigned vertex_dw_stride = get_tcs_out_vertex_dw_stride_constant(ctx); @@ -141,7 +135,7 @@ LLVMValueRef si_get_num_tcs_out_vertices(struct si_shader_context *ctx) ctx->shader->selector ? ctx->shader->selector->info.base.tess.tcs_vertices_out : 0; - /* If !tcs_out_vertices, it's either the fixed-func TCS or the TCS epilog. */ + /* If !tcs_out_vertices, it's the TCS epilog. */ if (ctx->stage == MESA_SHADER_TESS_CTRL && tcs_out_vertices) return LLVMConstInt(ctx->ac.i32, tcs_out_vertices, 0); @@ -550,41 +544,6 @@ static void si_nir_store_output_tcs(struct ac_shader_abi *abi, } } -/** - * Forward all outputs from the vertex shader to the TES. This is only used - * for the fixed function TCS. - */ -static void si_copy_tcs_inputs(struct si_shader_context *ctx) -{ - LLVMValueRef invocation_id, buffer, buffer_offset; - LLVMValueRef lds_vertex_stride, lds_base; - uint64_t inputs; - - invocation_id = si_unpack_param(ctx, ctx->args.tcs_rel_ids, 8, 5); - buffer = get_tess_ring_descriptor(ctx, TESS_OFFCHIP_RING_TCS); - buffer_offset = ac_get_arg(&ctx->ac, ctx->args.tess_offchip_offset); - - lds_vertex_stride = si_get_tcs_in_vertex_dw_stride(ctx); - lds_base = get_tcs_in_current_patch_offset(ctx); - lds_base = ac_build_imad(&ctx->ac, invocation_id, lds_vertex_stride, lds_base); - - inputs = ctx->shader->key.ge.mono.u.ff_tcs_inputs_to_copy; - while (inputs) { - unsigned i = u_bit_scan64(&inputs); - - LLVMValueRef lds_ptr = - LLVMBuildAdd(ctx->ac.builder, lds_base, LLVMConstInt(ctx->ac.i32, 4 * i, 0), ""); - - LLVMValueRef buffer_addr = get_tcs_tes_buffer_address( - ctx, get_rel_patch_id(ctx), invocation_id, LLVMConstInt(ctx->ac.i32, i, 0)); - - LLVMValueRef value = lshs_lds_load(ctx, ctx->ac.i32, ~0, lds_ptr); - - ac_build_buffer_store_dword(&ctx->ac, buffer, value, NULL, buffer_addr, buffer_offset, - ac_glc); - } -} - static void si_write_tess_factors(struct si_shader_context *ctx, union si_shader_part_key *key, LLVMValueRef rel_patch_id, LLVMValueRef invocation_id, LLVMValueRef tcs_out_current_patch_data_offset, @@ -751,8 +710,6 @@ void si_llvm_tcs_build_end(struct si_shader_context *ctx) LLVMBuilderRef builder = ctx->ac.builder; LLVMValueRef rel_patch_id, invocation_id, tf_lds_offset; - si_copy_tcs_inputs(ctx); - rel_patch_id = get_rel_patch_id(ctx); invocation_id = si_unpack_param(ctx, ctx->args.tcs_rel_ids, 8, 5); tf_lds_offset = get_tcs_out_current_patch_data_offset(ctx); diff --git a/src/gallium/drivers/radeonsi/si_shaderlib_tgsi.c b/src/gallium/drivers/radeonsi/si_shaderlib_tgsi.c index 045ea934cf4..0cc53483213 100644 --- a/src/gallium/drivers/radeonsi/si_shaderlib_tgsi.c +++ b/src/gallium/drivers/radeonsi/si_shaderlib_tgsi.c @@ -81,33 +81,6 @@ void *si_get_blitter_vs(struct si_context *sctx, enum blitter_attrib_type type, return *vs; } -/** - * This is used when TCS is NULL in the VS->TCS->TES chain. In this case, - * VS passes its outputs to TES directly, so the fixed-function shader only - * has to write TESSOUTER and TESSINNER. - */ -void *si_create_fixed_func_tcs(struct si_context *sctx) -{ - struct ureg_src outer, inner; - struct ureg_dst tessouter, tessinner; - struct ureg_program *ureg = ureg_create(PIPE_SHADER_TESS_CTRL); - - if (!ureg) - return NULL; - - outer = ureg_DECL_system_value(ureg, TGSI_SEMANTIC_TESS_DEFAULT_OUTER_LEVEL, 0); - inner = ureg_DECL_system_value(ureg, TGSI_SEMANTIC_TESS_DEFAULT_INNER_LEVEL, 0); - - tessouter = ureg_DECL_output(ureg, TGSI_SEMANTIC_TESSOUTER, 0); - tessinner = ureg_DECL_output(ureg, TGSI_SEMANTIC_TESSINNER, 0); - - ureg_MOV(ureg, tessouter, outer); - ureg_MOV(ureg, tessinner, inner); - ureg_END(ureg); - - return ureg_create_shader_and_destroy(ureg, &sctx->b); -} - /* Create a compute shader implementing clear_buffer or copy_buffer. */ void *si_create_dma_compute_shader(struct pipe_context *ctx, unsigned num_dwords_per_thread, bool dst_stream_cache_policy, bool is_copy) diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index ca830703198..e48e1665054 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -597,6 +597,7 @@ void si_init_tess_factor_ring(struct si_context *sctx); bool si_update_gs_ring_buffers(struct si_context *sctx); bool si_update_spi_tmpring_size(struct si_context *sctx, unsigned bytes); unsigned si_calc_inst_pref_size(struct si_shader *shader); +bool si_set_tcs_to_fixed_func_shader(struct si_context *sctx); /* si_state_draw.cpp */ void si_cp_dma_prefetch(struct si_context *sctx, struct pipe_resource *buf, diff --git a/src/gallium/drivers/radeonsi/si_state_draw.cpp b/src/gallium/drivers/radeonsi/si_state_draw.cpp index 876d1a93e15..8a0eba58303 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.cpp +++ b/src/gallium/drivers/radeonsi/si_state_draw.cpp @@ -126,28 +126,16 @@ static bool si_update_shaders(struct si_context *sctx) return false; } - if (sctx->shader.tcs.cso) { - r = si_shader_select(ctx, &sctx->shader.tcs); - if (r) + if (!sctx->is_user_tcs) { + if (!si_set_tcs_to_fixed_func_shader(sctx)) return false; - si_pm4_bind_state(sctx, hs, sctx->shader.tcs.current); - } else { - if (!sctx->fixed_func_tcs_shader.cso) { - sctx->fixed_func_tcs_shader.cso = - (struct si_shader_selector*)si_create_fixed_func_tcs(sctx); - if (!sctx->fixed_func_tcs_shader.cso) - return false; - - sctx->fixed_func_tcs_shader.key.ge.part.tcs.epilog.invoc0_tess_factors_are_def = - sctx->fixed_func_tcs_shader.cso->info.tessfactors_are_def_in_all_invocs; - } - - r = si_shader_select(ctx, &sctx->fixed_func_tcs_shader); - if (r) - return false; - si_pm4_bind_state(sctx, hs, sctx->fixed_func_tcs_shader.current); } + r = si_shader_select(ctx, &sctx->shader.tcs); + if (r) + return false; + si_pm4_bind_state(sctx, hs, sctx->shader.tcs.current); + if (!HAS_GS || GFX_VERSION <= GFX8) { r = si_shader_select(ctx, &sctx->shader.tes); if (r) @@ -164,6 +152,12 @@ static bool si_update_shaders(struct si_context *sctx) } } } else { + /* Reset TCS to clear fixed function shader. */ + if (!sctx->is_user_tcs && sctx->shader.tcs.cso) { + sctx->shader.tcs.cso = NULL; + sctx->shader.tcs.current = NULL; + } + if (GFX_VERSION <= GFX8) { si_pm4_bind_state(sctx, ls, NULL); sctx->prefetch_L2_mask &= ~SI_PREFETCH_LS; @@ -626,10 +620,7 @@ static void si_emit_derived_tess_state(struct si_context *sctx, unsigned *num_pa { struct si_shader *ls_current; struct si_shader_selector *ls; - /* The TES pointer will only be used for sctx->last_tcs. - * It would be wrong to think that TCS = TES. */ - struct si_shader_selector *tcs = - sctx->shader.tcs.cso ? sctx->shader.tcs.cso : sctx->shader.tes.cso; + struct si_shader_selector *tcs = sctx->shader.tcs.cso; unsigned tess_uses_primid = sctx->ia_multi_vgt_param_key.u.tess_uses_prim_id; bool has_primid_instancing_bug = sctx->gfx_level == GFX6 && sctx->screen->info.max_se == 1; unsigned tes_sh_base = sctx->shader_pointers.sh_base[PIPE_SHADER_TESS_EVAL]; @@ -637,11 +628,7 @@ static void si_emit_derived_tess_state(struct si_context *sctx, unsigned *num_pa /* Since GFX9 has merged LS-HS in the TCS state, set LS = TCS. */ if (sctx->gfx_level >= GFX9) { - if (sctx->shader.tcs.cso) - ls_current = sctx->shader.tcs.current; - else - ls_current = sctx->fixed_func_tcs_shader.current; - + ls_current = sctx->shader.tcs.current; ls = ls_current->key.ge.part.tcs.ls; } else { ls_current = sctx->shader.vs.current; @@ -663,19 +650,9 @@ static void si_emit_derived_tess_state(struct si_context *sctx, unsigned *num_pa /* This calculates how shader inputs and outputs among VS, TCS, and TES * are laid out in LDS. */ - unsigned num_tcs_inputs = util_last_bit64(ls->info.outputs_written); - unsigned num_tcs_output_cp, num_tcs_outputs, num_tcs_patch_outputs; - - if (sctx->shader.tcs.cso) { - num_tcs_outputs = util_last_bit64(tcs->info.outputs_written); - num_tcs_output_cp = tcs->info.base.tess.tcs_vertices_out; - num_tcs_patch_outputs = util_last_bit64(tcs->info.patch_outputs_written); - } else { - /* No TCS. Route varyings from LS to TES. */ - num_tcs_outputs = num_tcs_inputs; - num_tcs_output_cp = num_tcs_input_cp; - num_tcs_patch_outputs = 2; /* TESSINNER + TESSOUTER */ - } + unsigned num_tcs_outputs = util_last_bit64(tcs->info.outputs_written); + unsigned num_tcs_output_cp = tcs->info.base.tess.tcs_vertices_out; + unsigned num_tcs_patch_outputs = util_last_bit64(tcs->info.patch_outputs_written); unsigned input_vertex_size = ls->info.lshs_vertex_stride; unsigned output_vertex_size = num_tcs_outputs * 16; @@ -2223,34 +2200,44 @@ static void si_draw(struct pipe_context *ctx, si_need_gfx_cs_space(sctx, num_draws); if (HAS_TESS) { - struct si_shader_selector *tcs = sctx->shader.tcs.cso; + if (sctx->is_user_tcs) { + struct si_shader_selector *tcs = sctx->shader.tcs.cso; - /* The rarely occuring tcs == NULL case is not optimized. */ - bool same_patch_vertices = - GFX_VERSION >= GFX9 && - tcs && sctx->patch_vertices == tcs->info.base.tess.tcs_vertices_out; + bool same_patch_vertices = + GFX_VERSION >= GFX9 && + sctx->patch_vertices == tcs->info.base.tess.tcs_vertices_out; - if (sctx->shader.tcs.key.ge.opt.same_patch_vertices != same_patch_vertices) { - sctx->shader.tcs.key.ge.opt.same_patch_vertices = same_patch_vertices; - sctx->do_update_shaders = true; - } - - if (GFX_VERSION == GFX9 && sctx->screen->info.has_ls_vgpr_init_bug) { - /* Determine whether the LS VGPR fix should be applied. - * - * It is only required when num input CPs > num output CPs, - * which cannot happen with the fixed function TCS. We should - * also update this bit when switching from TCS to fixed - * function TCS. - */ - bool ls_vgpr_fix = - tcs && sctx->patch_vertices > tcs->info.base.tess.tcs_vertices_out; - - if (ls_vgpr_fix != sctx->shader.tcs.key.ge.part.tcs.ls_prolog.ls_vgpr_fix) { - sctx->shader.tcs.key.ge.part.tcs.ls_prolog.ls_vgpr_fix = ls_vgpr_fix; - sctx->fixed_func_tcs_shader.key.ge.part.tcs.ls_prolog.ls_vgpr_fix = ls_vgpr_fix; + if (sctx->shader.tcs.key.ge.opt.same_patch_vertices != same_patch_vertices) { + sctx->shader.tcs.key.ge.opt.same_patch_vertices = same_patch_vertices; sctx->do_update_shaders = true; } + + if (GFX_VERSION == GFX9 && sctx->screen->info.has_ls_vgpr_init_bug) { + /* Determine whether the LS VGPR fix should be applied. + * + * It is only required when num input CPs > num output CPs, + * which cannot happen with the fixed function TCS. + */ + bool ls_vgpr_fix = + sctx->patch_vertices > tcs->info.base.tess.tcs_vertices_out; + + if (ls_vgpr_fix != sctx->shader.tcs.key.ge.part.tcs.ls_prolog.ls_vgpr_fix) { + sctx->shader.tcs.key.ge.part.tcs.ls_prolog.ls_vgpr_fix = ls_vgpr_fix; + sctx->do_update_shaders = true; + } + } + } else { + /* These fields are static for fixed function TCS. So no need to set + * do_update_shaders between fixed-TCS draws. As fixed-TCS to user-TCS + * or opposite, do_update_shaders should already be set by bind state. + */ + sctx->shader.tcs.key.ge.opt.same_patch_vertices = GFX_VERSION >= GFX9; + sctx->shader.tcs.key.ge.part.tcs.ls_prolog.ls_vgpr_fix = false; + + /* User may only change patch vertices, needs to update fixed func TCS. */ + if (sctx->shader.tcs.cso && + sctx->shader.tcs.cso->info.base.tess.tcs_vertices_out != sctx->patch_vertices) + sctx->do_update_shaders = true; } } diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.cpp b/src/gallium/drivers/radeonsi/si_state_shaders.cpp index 015bb078da2..7fd3f9cf934 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.cpp +++ b/src/gallium/drivers/radeonsi/si_state_shaders.cpp @@ -3373,7 +3373,6 @@ static void si_bind_vs_shader(struct pipe_context *ctx, void *state) sctx->shader.vs.current = (sel && sel->variants_count) ? sel->variants[0] : NULL; sctx->num_vs_blit_sgprs = sel ? sel->info.base.vs.blit_sgprs_amd : 0; sctx->vs_uses_draw_id = sel ? sel->info.uses_drawid : false; - sctx->fixed_func_tcs_shader.key.ge.mono.u.ff_tcs_inputs_to_copy = sel ? sel->info.outputs_written : 0; if (si_update_ngg(sctx)) si_shader_change_notify(sctx); @@ -3486,6 +3485,11 @@ static void si_bind_tcs_shader(struct pipe_context *ctx, void *state) struct si_shader_selector *sel = (struct si_shader_selector*)state; bool enable_changed = !!sctx->shader.tcs.cso != !!sel; + /* Note it could happen that user shader sel is same as fixed function shader, + * so we should update this field even sctx->shader.tcs.cso == sel. + */ + sctx->is_user_tcs = !!sel; + if (sctx->shader.tcs.cso == sel) return; @@ -3518,11 +3522,9 @@ static void si_bind_tes_shader(struct pipe_context *ctx, void *state) si_update_tess_uses_prim_id(sctx); sctx->shader.tcs.key.ge.part.tcs.epilog.prim_mode = - sctx->fixed_func_tcs_shader.key.ge.part.tcs.epilog.prim_mode = sel ? sel->info.base.tess._primitive_mode : 0; sctx->shader.tcs.key.ge.part.tcs.epilog.tes_reads_tess_factors = - sctx->fixed_func_tcs_shader.key.ge.part.tcs.epilog.tes_reads_tess_factors = sel ? sel->info.reads_tess_factors : 0; si_update_common_shader_state(sctx, sel, PIPE_SHADER_TESS_EVAL); @@ -3976,17 +3978,8 @@ static int si_update_scratch_buffer(struct si_context *sctx, struct si_shader *s return 1; } -static struct si_shader *si_get_tcs_current(struct si_context *sctx) -{ - if (!sctx->shader.tes.cso) - return NULL; /* tessellation disabled */ - - return sctx->shader.tcs.cso ? sctx->shader.tcs.current : sctx->fixed_func_tcs_shader.current; -} - static bool si_update_scratch_relocs(struct si_context *sctx) { - struct si_shader *tcs = si_get_tcs_current(sctx); int r; /* Update the shaders, so that they are using the latest scratch. @@ -4006,11 +3999,11 @@ static bool si_update_scratch_relocs(struct si_context *sctx) if (r == 1) si_pm4_bind_state(sctx, gs, sctx->shader.gs.current); - r = si_update_scratch_buffer(sctx, tcs); + r = si_update_scratch_buffer(sctx, sctx->shader.tcs.current); if (r < 0) return false; if (r == 1) - si_pm4_bind_state(sctx, hs, tcs); + si_pm4_bind_state(sctx, hs, sctx->shader.tcs.current); /* VS can be bound as LS, ES, or VS. */ r = si_update_scratch_buffer(sctx, sctx->shader.vs.current); @@ -4251,6 +4244,53 @@ static void si_emit_scratch_state(struct si_context *sctx) } } +struct si_fixed_func_tcs_shader_key { + uint64_t outputs_written; + uint8_t vertices_out; +}; + +static uint32_t si_fixed_func_tcs_shader_key_hash(const void *key) +{ + return _mesa_hash_data(key, sizeof(struct si_fixed_func_tcs_shader_key)); +} + +static bool si_fixed_func_tcs_shader_key_equals(const void *a, const void *b) +{ + return memcmp(a, b, sizeof(struct si_fixed_func_tcs_shader_key)) == 0; +} + +bool si_set_tcs_to_fixed_func_shader(struct si_context *sctx) +{ + if (!sctx->fixed_func_tcs_shader_cache) { + sctx->fixed_func_tcs_shader_cache = _mesa_hash_table_create( + NULL, si_fixed_func_tcs_shader_key_hash, + si_fixed_func_tcs_shader_key_equals); + } + + struct si_fixed_func_tcs_shader_key key; + key.outputs_written = sctx->shader.vs.cso->info.outputs_written; + key.vertices_out = sctx->patch_vertices; + + struct hash_entry *entry = _mesa_hash_table_search( + sctx->fixed_func_tcs_shader_cache, &key); + + struct si_shader_selector *tcs; + if (entry) + tcs = (struct si_shader_selector *)entry->data; + else { + tcs = (struct si_shader_selector *)si_create_passthrough_tcs(sctx); + if (!tcs) + return false; + _mesa_hash_table_insert(sctx->fixed_func_tcs_shader_cache, &key, (void *)tcs); + } + + sctx->shader.tcs.cso = tcs; + sctx->shader.tcs.key.ge.part.tcs.epilog.invoc0_tess_factors_are_def = + tcs->info.tessfactors_are_def_in_all_invocs; + + return true; +} + void si_init_screen_live_shader_cache(struct si_screen *sscreen) { util_live_shader_cache_init(&sscreen->live_shader_cache, si_create_shader_selector,