diff --git a/src/amd/llvm/ac_nir_to_llvm.c b/src/amd/llvm/ac_nir_to_llvm.c index de572aa896d..074774e982f 100644 --- a/src/amd/llvm/ac_nir_to_llvm.c +++ b/src/amd/llvm/ac_nir_to_llvm.c @@ -3476,17 +3476,10 @@ static LLVMValueRef visit_load(struct ac_nir_context *ctx, nir_intrinsic_instr * if (ctx->stage == MESA_SHADER_TESS_CTRL || (ctx->stage == MESA_SHADER_TESS_EVAL && !is_output)) { - bool vertex_index_is_invoc_id = - vertex_index_src && - vertex_index_src->ssa->parent_instr->type == nir_instr_type_intrinsic && - nir_instr_as_intrinsic(vertex_index_src->ssa->parent_instr)->intrinsic == - nir_intrinsic_load_invocation_id; - LLVMValueRef result = ctx->abi->load_tess_varyings(ctx->abi, component_type, vertex_index, indir_index, base, component, - count, !is_output, - vertex_index_is_invoc_id); + count, !is_output); if (instr->dest.ssa.bit_size == 16) { result = ac_to_integer(&ctx->ac, result); result = LLVMBuildTrunc(ctx->ac.builder, result, dest_type, ""); diff --git a/src/amd/llvm/ac_shader_abi.h b/src/amd/llvm/ac_shader_abi.h index 2d2697b2e71..4584d52d120 100644 --- a/src/amd/llvm/ac_shader_abi.h +++ b/src/amd/llvm/ac_shader_abi.h @@ -67,8 +67,7 @@ struct ac_shader_abi { LLVMValueRef (*load_tess_varyings)(struct ac_shader_abi *abi, LLVMTypeRef type, LLVMValueRef vertex_index, LLVMValueRef param_index, unsigned driver_location, unsigned component, - unsigned num_components, - bool load_inputs, bool vertex_index_is_invoc_id); + unsigned num_components, bool load_inputs); void (*store_tcs_outputs)(struct ac_shader_abi *abi, LLVMValueRef vertex_index, LLVMValueRef param_index, diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 0ec4f8c42f8..3e91215a627 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -1490,6 +1490,30 @@ static bool si_nir_kill_outputs(nir_shader *nir, const union si_shader_key *key) return progress; } +static unsigned si_map_io_driver_location(unsigned semantic) +{ + return si_shader_io_get_unique_index(semantic, false); +} + +static bool si_lower_io_to_mem(const union si_shader_key *key, + nir_shader *nir, + uint64_t tcs_vgpr_only_inputs) +{ + if (nir->info.stage == MESA_SHADER_VERTEX) { + if (key->ge.as_ls) { + NIR_PASS_V(nir, ac_nir_lower_ls_outputs_to_mem, si_map_io_driver_location, + key->ge.opt.same_patch_vertices, tcs_vgpr_only_inputs); + return true; + } + } else if (nir->info.stage == MESA_SHADER_TESS_CTRL) { + NIR_PASS_V(nir, ac_nir_lower_hs_inputs_to_mem, si_map_io_driver_location, + key->ge.opt.same_patch_vertices); + return true; + } + + return false; +} + struct nir_shader *si_get_nir_shader(struct si_shader_selector *sel, const union si_shader_key *key, bool *free_nir, @@ -1603,10 +1627,22 @@ struct nir_shader *si_get_nir_shader(struct si_shader_selector *sel, * this should be done after that. */ progress2 |= ac_nir_lower_indirect_derefs(nir, sel->screen->info.gfx_level); - if (progress2) + + bool opt_offsets = si_lower_io_to_mem(key, nir, tcs_vgpr_only_inputs); + + if (progress2 || opt_offsets) si_nir_opts(sel->screen, nir, false); - if (progress || progress2) + if (opt_offsets) { + static const nir_opt_offsets_options offset_options = { + .uniform_max = 0, + .buffer_max = ~0, + .shared_max = ~0, + }; + NIR_PASS_V(nir, nir_opt_offsets, &offset_options); + } + + if (progress || progress2 || opt_offsets) si_nir_late_opts(nir); NIR_PASS_V(nir, nir_divergence_analysis); diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h b/src/gallium/drivers/radeonsi/si_shader_internal.h index 907dc04b796..ee04b3708d7 100644 --- a/src/gallium/drivers/radeonsi/si_shader_internal.h +++ b/src/gallium/drivers/radeonsi/si_shader_internal.h @@ -39,7 +39,6 @@ struct si_shader_output_values { struct si_shader_context { struct ac_llvm_context ac; struct si_shader *shader; - struct si_shader_selector *next_shader_sel; struct si_screen *screen; struct pipe_stream_output_info so; diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm.c b/src/gallium/drivers/radeonsi/si_shader_llvm.c index 2a15e71086e..a5b1389f24c 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm.c @@ -743,10 +743,10 @@ static LLVMValueRef si_llvm_load_intrinsic(struct ac_shader_abi *abi, nir_intrin } case nir_intrinsic_load_tess_level_outer: - return abi->load_tess_varyings(abi, ctx->ac.f32, NULL, NULL, info->num_inputs, 0, 4, true, false); + return abi->load_tess_varyings(abi, ctx->ac.f32, NULL, NULL, info->num_inputs, 0, 4, true); case nir_intrinsic_load_tess_level_inner: - return abi->load_tess_varyings(abi, ctx->ac.f32, NULL, NULL, info->num_inputs + 1, 0, 4, true, false); + return abi->load_tess_varyings(abi, ctx->ac.f32, NULL, NULL, info->num_inputs + 1, 0, 4, true); case nir_intrinsic_load_tess_level_outer_default: case nir_intrinsic_load_tess_level_inner_default: { @@ -1241,9 +1241,6 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler * si_llvm_build_tcs_epilog(&ctx, &tcs_epilog_key); parts[3] = ctx.main_fn; - /* VS as LS main part */ - ctx.next_shader_sel = ctx.shader->selector; - struct si_shader shader_ls = {}; shader_ls.selector = ls; shader_ls.key.ge.part.vs.prolog = shader->key.ge.part.tcs.ls_prolog; diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c b/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c index 72c355378bb..24e1858e3a2 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c @@ -384,44 +384,33 @@ void si_llvm_preload_tes_rings(struct si_shader_context *ctx) static LLVMValueRef si_nir_load_tcs_varyings(struct ac_shader_abi *abi, LLVMTypeRef type, LLVMValueRef vertex_index, LLVMValueRef param_index, unsigned driver_location, unsigned component, - unsigned num_components, bool load_input, - bool vertex_index_is_invoc_id) + unsigned num_components, bool load_input) { struct si_shader_context *ctx = si_shader_context_from_abi(abi); struct si_shader_info *info = &ctx->shader->selector->info; - LLVMValueRef dw_addr, stride; - ubyte semantic; + LLVMValueRef value[4]; if (load_input) { - semantic = info->input[driver_location].semantic; - } else { - semantic = info->output_semantic[driver_location]; - } + assert(ctx->shader->key.ge.opt.same_patch_vertices && !param_index); - /* Load the TCS input from a VGPR if possible. */ - if (ctx->shader->key.ge.opt.same_patch_vertices && - load_input && vertex_index_is_invoc_id && !param_index) { + ubyte semantic = info->input[driver_location].semantic; + /* Load the TCS input from a VGPR. */ unsigned func_param = ctx->args.tcs_rel_ids.arg_index + 1 + - si_shader_io_get_unique_index(semantic, false) * 4; - LLVMValueRef value[4]; + si_shader_io_get_unique_index(semantic, false) * 4; for (unsigned i = component; i < component + num_components; i++) { value[i] = LLVMGetParam(ctx->main_fn, func_param + i); value[i] = LLVMBuildBitCast(ctx->ac.builder, value[i], type, ""); } - - return ac_build_varying_gather_values(&ctx->ac, value, num_components, component); - } - - bool is_patch = vertex_index == NULL; - assert((semantic >= VARYING_SLOT_PATCH0 || - semantic == VARYING_SLOT_TESS_LEVEL_INNER || - semantic == VARYING_SLOT_TESS_LEVEL_OUTER) == is_patch); - - if (load_input) { - stride = si_get_tcs_in_vertex_dw_stride(ctx); - dw_addr = get_tcs_in_current_patch_offset(ctx); } else { + ubyte semantic = info->output_semantic[driver_location]; + + bool is_patch = vertex_index == NULL; + assert((semantic >= VARYING_SLOT_PATCH0 || + semantic == VARYING_SLOT_TESS_LEVEL_INNER || + semantic == VARYING_SLOT_TESS_LEVEL_OUTER) == is_patch); + + LLVMValueRef dw_addr, stride; if (is_patch) { stride = NULL; dw_addr = get_tcs_out_current_patch_data_offset(ctx); @@ -429,23 +418,21 @@ static LLVMValueRef si_nir_load_tcs_varyings(struct ac_shader_abi *abi, LLVMType stride = get_tcs_out_vertex_dw_stride(ctx); dw_addr = get_tcs_out_current_patch_offset(ctx); } + + dw_addr = get_dw_address_from_generic_indices(ctx, stride, dw_addr, vertex_index, + param_index, semantic); + + for (unsigned i = component; i < component + num_components; i++) + value[i] = lshs_lds_load(ctx, type, i, dw_addr); } - dw_addr = get_dw_address_from_generic_indices(ctx, stride, dw_addr, vertex_index, param_index, - semantic); - - LLVMValueRef value[4]; - for (unsigned i = component; i < component + num_components; i++) - value[i] = lshs_lds_load(ctx, type, i, dw_addr); - return ac_build_varying_gather_values(&ctx->ac, value, num_components, component); } static LLVMValueRef si_nir_load_input_tes(struct ac_shader_abi *abi, LLVMTypeRef type, LLVMValueRef vertex_index, LLVMValueRef param_index, unsigned driver_location, unsigned component, - unsigned num_components, - bool load_input, bool vertex_index_is_invoc_id) + unsigned num_components, bool load_input) { struct si_shader_context *ctx = si_shader_context_from_abi(abi); struct si_shader_info *info = &ctx->shader->selector->info; @@ -877,58 +864,20 @@ void si_llvm_ls_build_end(struct si_shader_context *ctx) { struct si_shader *shader = ctx->shader; struct si_shader_info *info = &shader->selector->info; - unsigned i, chan; - LLVMValueRef vertex_id; - if (ctx->screen->info.gfx_level >= GFX11) { - vertex_id = ac_build_imad(&ctx->ac, si_unpack_param(ctx, ctx->args.tcs_wave_id, 0, 5), - LLVMConstInt(ctx->ac.i32, ctx->ac.wave_size, 0), - ac_get_thread_id(&ctx->ac)); - } else { - vertex_id = ac_get_arg(&ctx->ac, ctx->args.vs_rel_patch_id); - } - LLVMValueRef vertex_dw_stride = si_get_tcs_in_vertex_dw_stride(ctx); - LLVMValueRef base_dw_addr = LLVMBuildMul(ctx->ac.builder, vertex_id, vertex_dw_stride, ""); LLVMValueRef *addrs = ctx->abi.outputs; unsigned ret_offset = 8 + GFX9_TCS_NUM_USER_SGPR + 2; - /* Write outputs to LDS. The next shader (TCS aka HS) will read - * its inputs from it. */ - for (i = 0; i < info->num_outputs; i++) { - unsigned semantic = info->output_semantic[i]; + if (shader->key.ge.opt.same_patch_vertices) { + for (unsigned i = 0; i < info->num_outputs; i++) { + unsigned semantic = info->output_semantic[i]; + int param = si_shader_io_get_unique_index(semantic, false); - /* The ARB_shader_viewport_layer_array spec contains the - * following issue: - * - * 2) What happens if gl_ViewportIndex or gl_Layer is - * written in the vertex shader and a geometry shader is - * present? - * - * RESOLVED: The value written by the last vertex processing - * stage is used. If the last vertex processing stage - * (vertex, tessellation evaluation or geometry) does not - * statically assign to gl_ViewportIndex or gl_Layer, index - * or layer zero is assumed. - * - * So writes to those outputs in VS-as-LS are simply ignored. - */ - if (semantic == VARYING_SLOT_LAYER || semantic == VARYING_SLOT_VIEWPORT) - continue; + for (unsigned chan = 0; chan < 4; chan++) { + if (!(info->output_usagemask[i] & (1 << chan))) + continue; - int param = si_shader_io_get_unique_index(semantic, false); - LLVMValueRef dw_addr = - LLVMBuildAdd(ctx->ac.builder, base_dw_addr, LLVMConstInt(ctx->ac.i32, param * 4, 0), ""); + LLVMValueRef value = LLVMBuildLoad(ctx->ac.builder, addrs[4 * i + chan], ""); - for (chan = 0; chan < 4; chan++) { - if (!(info->output_usagemask[i] & (1 << chan))) - continue; - - LLVMValueRef value = LLVMBuildLoad(ctx->ac.builder, addrs[4 * i + chan], ""); - - if (!shader->key.ge.opt.same_patch_vertices || - !(ctx->next_shader_sel->info.tcs_vgpr_only_inputs & (1ull << semantic))) - lshs_lds_store(ctx, chan, dw_addr, value); - - if (shader->key.ge.opt.same_patch_vertices) { ctx->return_value = LLVMBuildInsertValue(ctx->ac.builder, ctx->return_value, value, ret_offset + param * 4 + chan, ""); }