From 7598bfd768f02d1d77007ebc07990db9c83a6fb4 Mon Sep 17 00:00:00 2001 From: Qiang Yu Date: Sat, 28 May 2022 17:52:35 +0800 Subject: [PATCH] radeonsi: replace llvm tcs output with nir lower pass MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove the store_tcs_outputs abi, we can use common output abi to handle the tessfactor pass as vgpr. Reviewed-by: Marek Olšák Acked-by: Pierre-Eric Pelloux-Prayer Signed-off-by: Qiang Yu Part-of: --- src/amd/llvm/ac_nir_to_llvm.c | 20 +-- src/amd/llvm/ac_shader_abi.h | 6 - src/gallium/drivers/radeonsi/si_shader.c | 24 ++- .../drivers/radeonsi/si_shader_internal.h | 1 - src/gallium/drivers/radeonsi/si_shader_llvm.c | 5 - .../drivers/radeonsi/si_shader_llvm_tess.c | 156 ++++-------------- 6 files changed, 51 insertions(+), 161 deletions(-) diff --git a/src/amd/llvm/ac_nir_to_llvm.c b/src/amd/llvm/ac_nir_to_llvm.c index 7fc61f40936..00511ab487f 100644 --- a/src/amd/llvm/ac_nir_to_llvm.c +++ b/src/amd/llvm/ac_nir_to_llvm.c @@ -2367,12 +2367,9 @@ static void visit_store_output(struct ac_nir_context *ctx, nir_intrinsic_instr * unsigned component = nir_intrinsic_component(instr); LLVMValueRef src = ac_to_float(&ctx->ac, get_src(ctx, instr->src[0])); nir_src offset = *nir_get_io_offset_src(instr); - LLVMValueRef indir_index = NULL; - if (nir_src_is_const(offset)) - assert(nir_src_as_uint(offset) == 0); - else - indir_index = get_src(ctx, offset); + /* No indirect indexing is allowed here. */ + assert(nir_src_is_const(offset) && nir_src_as_uint(offset) == 0); switch (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src))) { case 16: @@ -2388,19 +2385,6 @@ static void visit_store_output(struct ac_nir_context *ctx, nir_intrinsic_instr * writemask <<= component; - if (ctx->stage == MESA_SHADER_TESS_CTRL) { - nir_src *vertex_index_src = nir_get_io_arrayed_index_src(instr); - LLVMValueRef vertex_index = vertex_index_src ? get_src(ctx, *vertex_index_src) : NULL; - unsigned location = nir_intrinsic_io_semantics(instr).location; - - ctx->abi->store_tcs_outputs(ctx->abi, vertex_index, indir_index, src, - writemask, component, location, base); - return; - } - - /* No indirect indexing is allowed after this point. */ - assert(!indir_index); - for (unsigned chan = 0; chan < 8; chan++) { if (!(writemask & (1 << chan))) continue; diff --git a/src/amd/llvm/ac_shader_abi.h b/src/amd/llvm/ac_shader_abi.h index 4584d52d120..ee9e17a88d0 100644 --- a/src/amd/llvm/ac_shader_abi.h +++ b/src/amd/llvm/ac_shader_abi.h @@ -69,12 +69,6 @@ struct ac_shader_abi { unsigned driver_location, unsigned component, unsigned num_components, bool load_inputs); - void (*store_tcs_outputs)(struct ac_shader_abi *abi, - LLVMValueRef vertex_index, LLVMValueRef param_index, - LLVMValueRef src, unsigned writemask, - unsigned component, unsigned location, unsigned driver_location); - - LLVMValueRef (*load_ubo)(struct ac_shader_abi *abi, LLVMValueRef index); /** diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index b65cec6b978..5e102df047d 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -1491,13 +1491,20 @@ static bool si_nir_kill_outputs(nir_shader *nir, const union si_shader_key *key) static unsigned si_map_io_driver_location(unsigned semantic) { + if ((semantic >= VARYING_SLOT_PATCH0 && semantic < VARYING_SLOT_TESS_MAX) || + semantic == VARYING_SLOT_TESS_LEVEL_INNER || + semantic == VARYING_SLOT_TESS_LEVEL_OUTER) + return si_shader_io_get_unique_index_patch(semantic); + return si_shader_io_get_unique_index(semantic, false); } -static bool si_lower_io_to_mem(const union si_shader_key *key, - nir_shader *nir, +static bool si_lower_io_to_mem(struct si_shader *shader, nir_shader *nir, uint64_t tcs_vgpr_only_inputs) { + struct si_shader_selector *sel = shader->selector; + const union si_shader_key *key = &shader->key; + if (nir->info.stage == MESA_SHADER_VERTEX) { if (key->ge.as_ls) { NIR_PASS_V(nir, ac_nir_lower_ls_outputs_to_mem, si_map_io_driver_location, @@ -1507,6 +1514,17 @@ static bool si_lower_io_to_mem(const union si_shader_key *key, } else if (nir->info.stage == MESA_SHADER_TESS_CTRL) { NIR_PASS_V(nir, ac_nir_lower_hs_inputs_to_mem, si_map_io_driver_location, key->ge.opt.same_patch_vertices); + NIR_PASS_V(nir, ac_nir_lower_hs_outputs_to_mem, si_map_io_driver_location, + sel->screen->info.gfx_level, + false, /* does not matter as we disabled final tess factor write */ + ~0ULL, ~0ULL, /* no TES inputs filter */ + util_last_bit64(sel->info.outputs_written), + util_last_bit64(sel->info.patch_outputs_written), + shader->wave_size, + /* ALL TCS inputs are passed by register. */ + key->ge.opt.same_patch_vertices && + !(sel->info.base.inputs_read & ~sel->info.tcs_vgpr_only_inputs), + sel->info.tessfactors_are_def_in_all_invocs, false); return true; } @@ -1633,7 +1651,7 @@ struct nir_shader *si_get_nir_shader(struct si_shader *shader, bool *free_nir, */ progress2 |= ac_nir_lower_indirect_derefs(nir, sel->screen->info.gfx_level); - bool opt_offsets = si_lower_io_to_mem(key, nir, tcs_vgpr_only_inputs); + bool opt_offsets = si_lower_io_to_mem(shader, nir, tcs_vgpr_only_inputs); if (progress2 || opt_offsets) si_nir_opts(sel->screen, nir, false); diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h b/src/gallium/drivers/radeonsi/si_shader_internal.h index cbb4a510912..6bda67d5b7b 100644 --- a/src/gallium/drivers/radeonsi/si_shader_internal.h +++ b/src/gallium/drivers/radeonsi/si_shader_internal.h @@ -144,7 +144,6 @@ struct si_shader_context { LLVMValueRef gsvs_ring[4]; LLVMValueRef tess_offchip_ring; - LLVMValueRef invoc0_tess_factors[6]; /* outer[4], inner[2] */ LLVMValueRef gs_next_vertex[4]; LLVMValueRef gs_curprim_verts[4]; LLVMValueRef gs_generated_prims[4]; diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm.c b/src/gallium/drivers/radeonsi/si_shader_llvm.c index 8f3263ab4b0..a1de3d85a35 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm.c @@ -834,11 +834,6 @@ bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shader *shad case MESA_SHADER_TESS_CTRL: si_llvm_init_tcs_callbacks(ctx); si_llvm_preload_tess_rings(ctx); - - if (sel->info.tessfactors_are_def_in_all_invocs) { - for (unsigned i = 0; i < 6; i++) - ctx->invoc0_tess_factors[i] = ac_build_alloca_undef(&ctx->ac, ctx->ac.i32, ""); - } break; case MESA_SHADER_TESS_EVAL: diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c b/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c index 70ab6e310a7..4d75f291ccb 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c @@ -383,42 +383,18 @@ static LLVMValueRef si_nir_load_tcs_varyings(struct ac_shader_abi *abi, LLVMType { struct si_shader_context *ctx = si_shader_context_from_abi(abi); struct si_shader_info *info = &ctx->shader->selector->info; + + assert(ctx->shader->key.ge.opt.same_patch_vertices && !param_index); + + ubyte semantic = info->input[driver_location].semantic; + /* Load the TCS input from a VGPR. */ + unsigned func_param = ctx->args.tcs_rel_ids.arg_index + 1 + + si_shader_io_get_unique_index(semantic, false) * 4; + LLVMValueRef value[4]; - - if (load_input) { - assert(ctx->shader->key.ge.opt.same_patch_vertices && !param_index); - - ubyte semantic = info->input[driver_location].semantic; - /* Load the TCS input from a VGPR. */ - unsigned func_param = ctx->args.tcs_rel_ids.arg_index + 1 + - si_shader_io_get_unique_index(semantic, false) * 4; - - for (unsigned i = component; i < component + num_components; i++) { - value[i] = LLVMGetParam(ctx->main_fn, func_param + i); - value[i] = LLVMBuildBitCast(ctx->ac.builder, value[i], type, ""); - } - } else { - ubyte semantic = info->output_semantic[driver_location]; - - bool is_patch = vertex_index == NULL; - assert((semantic >= VARYING_SLOT_PATCH0 || - semantic == VARYING_SLOT_TESS_LEVEL_INNER || - semantic == VARYING_SLOT_TESS_LEVEL_OUTER) == is_patch); - - LLVMValueRef dw_addr, stride; - if (is_patch) { - stride = NULL; - dw_addr = get_tcs_out_current_patch_data_offset(ctx); - } else { - stride = get_tcs_out_vertex_dw_stride(ctx); - dw_addr = get_tcs_out_current_patch_offset(ctx); - } - - dw_addr = get_dw_address_from_generic_indices(ctx, stride, dw_addr, vertex_index, - param_index, semantic); - - for (unsigned i = component; i < component + num_components; i++) - value[i] = lshs_lds_load(ctx, type, i, dw_addr); + for (unsigned i = component; i < component + num_components; i++) { + value[i] = LLVMGetParam(ctx->main_fn, func_param + i); + value[i] = LLVMBuildBitCast(ctx->ac.builder, value[i], type, ""); } return ac_build_varying_gather_values(&ctx->ac, value, num_components, component); @@ -455,96 +431,6 @@ static LLVMValueRef si_nir_load_input_tes(struct ac_shader_abi *abi, LLVMTypeRef return ac_build_varying_gather_values(&ctx->ac, value, num_components, component); } -static void si_nir_store_output_tcs(struct ac_shader_abi *abi, - LLVMValueRef vertex_index, LLVMValueRef param_index, - LLVMValueRef src, unsigned writemask, - unsigned component, unsigned location, unsigned driver_location) -{ - struct si_shader_context *ctx = si_shader_context_from_abi(abi); - struct si_shader_info *info = &ctx->shader->selector->info; - LLVMValueRef dw_addr, stride; - LLVMValueRef buffer, base, addr; - LLVMValueRef values[8]; - bool is_tess_factor = false, is_tess_inner = false; - - ubyte semantic = info->output_semantic[driver_location]; - - const bool is_const = !param_index; - const bool is_patch = vertex_index == NULL; - - /* Invalid SPIR-V can cause this. */ - if ((semantic >= VARYING_SLOT_PATCH0 || semantic == VARYING_SLOT_TESS_LEVEL_INNER || - semantic == VARYING_SLOT_TESS_LEVEL_OUTER) != is_patch) - return; - - if (!is_patch) { - stride = get_tcs_out_vertex_dw_stride(ctx); - dw_addr = get_tcs_out_current_patch_offset(ctx); - dw_addr = get_dw_address_from_generic_indices(ctx, stride, dw_addr, vertex_index, param_index, - semantic); - } else { - dw_addr = get_tcs_out_current_patch_data_offset(ctx); - dw_addr = get_dw_address_from_generic_indices(ctx, NULL, dw_addr, vertex_index, param_index, - semantic); - - if (is_const) { - int semantic = info->output_semantic[driver_location]; - - /* Always write tess factors into LDS for the TCS epilog. */ - if (semantic == VARYING_SLOT_TESS_LEVEL_INNER || - semantic == VARYING_SLOT_TESS_LEVEL_OUTER) { - is_tess_factor = true; - is_tess_inner = semantic == VARYING_SLOT_TESS_LEVEL_INNER; - } - } - } - - buffer = ctx->tess_offchip_ring; - - base = ac_get_arg(&ctx->ac, ctx->args.tess_offchip_offset); - - addr = - get_tcs_tes_buffer_address_from_generic_indices(ctx, vertex_index, param_index, semantic); - - for (unsigned chan = component; chan < 4; chan++) { - if (!(writemask & (1 << chan))) - continue; - LLVMValueRef value = ac_llvm_extract_elem(&ctx->ac, src, chan - component); - - /* Skip LDS stores if there is no LDS read of this output. */ - if (info->output_readmask[driver_location] & (1 << chan) || - /* The epilog reads LDS if invocation 0 doesn't define tess factors. */ - (is_tess_factor && - !ctx->shader->selector->info.tessfactors_are_def_in_all_invocs)) - lshs_lds_store(ctx, chan, dw_addr, value); - - value = ac_to_integer(&ctx->ac, value); - values[chan] = value; - - if (writemask != 0xF && !is_tess_factor) { - LLVMValueRef voffset = LLVMBuildAdd(ctx->ac.builder, addr, - LLVMConstInt(ctx->ac.i32, 4 * chan, 0), ""); - ac_build_buffer_store_dword(&ctx->ac, buffer, value, NULL, voffset, base, ac_glc); - } - - /* Write tess factors into VGPRs for the epilog. */ - if (is_tess_factor && ctx->shader->selector->info.tessfactors_are_def_in_all_invocs) { - if (!is_tess_inner) { - LLVMBuildStore(ctx->ac.builder, value, /* outer */ - ctx->invoc0_tess_factors[chan]); - } else if (chan < 2) { - LLVMBuildStore(ctx->ac.builder, value, /* inner */ - ctx->invoc0_tess_factors[4 + chan]); - } - } - } - - if (writemask == 0xF && !is_tess_factor) { - LLVMValueRef value = ac_build_gather_values(&ctx->ac, values, 4); - ac_build_buffer_store_dword(&ctx->ac, buffer, value, NULL, addr, base, ac_glc); - } -} - static void si_write_tess_factors(struct si_shader_context *ctx, union si_shader_part_key *key, LLVMValueRef rel_patch_id, LLVMValueRef invocation_id, LLVMValueRef tcs_out_current_patch_data_offset, @@ -769,10 +655,25 @@ void si_llvm_tcs_build_end(struct si_shader_context *ctx) ret = LLVMBuildInsertValue(builder, ret, rel_patch_id, vgpr++, ""); ret = LLVMBuildInsertValue(builder, ret, invocation_id, vgpr++, ""); - if (ctx->shader->selector->info.tessfactors_are_def_in_all_invocs) { + struct si_shader_info *info = &ctx->shader->selector->info; + if (info->tessfactors_are_def_in_all_invocs) { vgpr++; /* skip the tess factor LDS offset */ + + /* get tess factor driver location */ + int outer_loc = -1; + int inner_loc = -1; + for (int i = 0; i < info->num_outputs; i++) { + unsigned semantic = info->output_semantic[i]; + if (semantic == VARYING_SLOT_TESS_LEVEL_OUTER) + outer_loc = i; + else if (semantic == VARYING_SLOT_TESS_LEVEL_INNER) + inner_loc = i; + } + for (unsigned i = 0; i < 6; i++) { - LLVMValueRef value = LLVMBuildLoad(builder, ctx->invoc0_tess_factors[i], ""); + int loc = i < 4 ? outer_loc : inner_loc; + LLVMValueRef value = loc < 0 ? LLVMGetUndef(ctx->ac.f32) : + LLVMBuildLoad(builder, ctx->abi.outputs[loc * 4 + i % 4], ""); value = ac_to_float(&ctx->ac, value); ret = LLVMBuildInsertValue(builder, ret, value, vgpr++, ""); } @@ -920,7 +821,6 @@ void si_llvm_build_tcs_epilog(struct si_shader_context *ctx, union si_shader_par void si_llvm_init_tcs_callbacks(struct si_shader_context *ctx) { ctx->abi.load_tess_varyings = si_nir_load_tcs_varyings; - ctx->abi.store_tcs_outputs = si_nir_store_output_tcs; } void si_llvm_init_tes_callbacks(struct si_shader_context *ctx, bool ngg_cull_shader)