From c68f9ed02084bc10a8f5a39e18975450c225a8b0 Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Thu, 20 Oct 2022 13:21:54 +0100 Subject: [PATCH] radv/llvm: use the ring_offsets shader arg Besides being nicer, this also fixes load_sample_positions_amd with LLVM. Signed-off-by: Rhys Perry Reviewed-by: Samuel Pitoiset Part-of: --- src/amd/vulkan/radv_nir_lower_abi.c | 29 +----- src/amd/vulkan/radv_nir_to_llvm.c | 131 +--------------------------- src/amd/vulkan/radv_pipeline.c | 4 +- src/amd/vulkan/radv_shader.h | 3 +- src/amd/vulkan/radv_shader_args.c | 4 +- 5 files changed, 8 insertions(+), 163 deletions(-) diff --git a/src/amd/vulkan/radv_nir_lower_abi.c b/src/amd/vulkan/radv_nir_lower_abi.c index 5ca6a86fc45..ad9eb7281e6 100644 --- a/src/amd/vulkan/radv_nir_lower_abi.c +++ b/src/amd/vulkan/radv_nir_lower_abi.c @@ -34,7 +34,6 @@ typedef struct { const struct radv_shader_args *args; const struct radv_shader_info *info; const struct radv_pipeline_key *pl_key; - bool use_llvm; uint32_t address32_hi; nir_ssa_def *gsvs_ring[4]; } lower_abi_state; @@ -128,22 +127,12 @@ lower_abi_instr(nir_builder *b, nir_instr *instr, void *state) switch (intrin->intrinsic) { case nir_intrinsic_load_ring_tess_factors_amd: - if (s->use_llvm) { - progress = false; - break; - } - replacement = load_ring(b, RING_HS_TESS_FACTOR, s); break; case nir_intrinsic_load_ring_tess_factors_offset_amd: replacement = ac_nir_load_arg(b, &s->args->ac, s->args->ac.tcs_factor_offset); break; case nir_intrinsic_load_ring_tess_offchip_amd: - if (s->use_llvm) { - progress = false; - break; - } - replacement = load_ring(b, RING_HS_TESS_OFFCHIP, s); break; case nir_intrinsic_load_ring_tess_offchip_offset_amd: @@ -162,19 +151,9 @@ lower_abi_instr(nir_builder *b, nir_instr *instr, void *state) } break; case nir_intrinsic_load_ring_esgs_amd: - if (s->use_llvm) { - progress = false; - break; - } - replacement = load_ring(b, stage == MESA_SHADER_GEOMETRY ? RING_ESGS_GS : RING_ESGS_VS, s); break; case nir_intrinsic_load_ring_gsvs_amd: - if (s->use_llvm) { - progress = false; - break; - } - if (stage == MESA_SHADER_VERTEX) replacement = load_ring(b, RING_GSVS_VS, s); else @@ -188,11 +167,6 @@ lower_abi_instr(nir_builder *b, nir_instr *instr, void *state) break; case nir_intrinsic_load_ring_attr_amd: - if (s->use_llvm) { - progress = false; - break; - } - replacement = load_ring(b, RING_PS_ATTR, s); nir_ssa_def *dword1 = nir_channel(b, replacement, 1); @@ -550,14 +524,13 @@ load_gsvs_ring(nir_builder *b, lower_abi_state *s, unsigned stream_id) void radv_nir_lower_abi(nir_shader *shader, enum amd_gfx_level gfx_level, const struct radv_shader_info *info, const struct radv_shader_args *args, - const struct radv_pipeline_key *pl_key, bool use_llvm, uint32_t address32_hi) + const struct radv_pipeline_key *pl_key, uint32_t address32_hi) { lower_abi_state state = { .gfx_level = gfx_level, .info = info, .args = args, .pl_key = pl_key, - .use_llvm = use_llvm, .address32_hi = address32_hi, }; diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c index 47cbd220cae..c017bc87098 100644 --- a/src/amd/vulkan/radv_nir_to_llvm.c +++ b/src/amd/vulkan/radv_nir_to_llvm.c @@ -56,18 +56,10 @@ struct radv_shader_context { LLVMValueRef descriptor_sets[MAX_SETS]; - LLVMValueRef ring_offsets; - LLVMValueRef vs_rel_patch_id; LLVMValueRef gs_wave_id; - LLVMValueRef esgs_ring; - LLVMValueRef gsvs_ring[4]; - LLVMValueRef hs_ring_tess_offchip; - LLVMValueRef hs_ring_tess_factor; - LLVMValueRef attr_ring; - uint64_t output_mask; }; @@ -167,11 +159,6 @@ create_function(struct radv_shader_context *ctx, gl_shader_stage stage, bool has get_llvm_calling_convention(ctx->main_function.value, stage), ctx->max_workgroup_size, ctx->options); - ctx->ring_offsets = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.implicit.buffer.ptr", - LLVMPointerType(ctx->ac.i8, AC_ADDR_SPACE_CONST), NULL, 0, 0); - ctx->ring_offsets = LLVMBuildBitCast(ctx->ac.builder, ctx->ring_offsets, - ac_array_in_const_addr_space(ctx->ac.v4i32), ""); - load_descriptor_sets(ctx); if (stage == MESA_SHADER_TESS_CTRL || @@ -801,99 +788,6 @@ ac_llvm_finalize_module(struct radv_shader_context *ctx, LLVMPassManagerRef pass ac_llvm_context_dispose(&ctx->ac); } -static void -ac_setup_rings(struct radv_shader_context *ctx) -{ - struct ac_llvm_pointer ring_offsets = { .t = ctx->ac.v4i32, .v = ctx->ring_offsets }; - - if (ctx->options->gfx_level <= GFX8 && - (ctx->stage == MESA_SHADER_GEOMETRY || - (ctx->stage == MESA_SHADER_VERTEX && ctx->shader_info->vs.as_es) || - (ctx->stage == MESA_SHADER_TESS_EVAL && ctx->shader_info->tes.as_es))) { - unsigned ring = ctx->stage == MESA_SHADER_GEOMETRY ? RING_ESGS_GS : RING_ESGS_VS; - LLVMValueRef offset = LLVMConstInt(ctx->ac.i32, ring, false); - - ctx->esgs_ring = ac_build_load_to_sgpr(&ctx->ac, ring_offsets, offset); - } - - if (ctx->args->is_gs_copy_shader) { - ctx->gsvs_ring[0] = ac_build_load_to_sgpr(&ctx->ac, ring_offsets, LLVMConstInt(ctx->ac.i32, RING_GSVS_VS, false)); - } - - if (ctx->stage == MESA_SHADER_GEOMETRY) { - /* The conceptual layout of the GSVS ring is - * v0c0 .. vLv0 v0c1 .. vLc1 .. - * but the real memory layout is swizzled across - * threads: - * t0v0c0 .. t15v0c0 t0v1c0 .. t15v1c0 ... t15vLcL - * t16v0c0 .. - * Override the buffer descriptor accordingly. - */ - LLVMTypeRef v2i64 = LLVMVectorType(ctx->ac.i64, 2); - uint64_t stream_offset = 0; - unsigned num_records = ctx->ac.wave_size; - LLVMValueRef base_ring; - - base_ring = ac_build_load_to_sgpr(&ctx->ac, ring_offsets, - LLVMConstInt(ctx->ac.i32, RING_GSVS_GS, false)); - - for (unsigned stream = 0; stream < 4; stream++) { - unsigned num_components, stride; - LLVMValueRef ring, tmp; - - num_components = ctx->shader_info->gs.num_stream_output_components[stream]; - - if (!num_components) - continue; - - stride = 4 * num_components * ctx->shader->info.gs.vertices_out; - - /* Limit on the stride field for <= GFX7. */ - assert(stride < (1 << 14)); - - ring = LLVMBuildBitCast(ctx->ac.builder, base_ring, v2i64, ""); - tmp = LLVMBuildExtractElement(ctx->ac.builder, ring, ctx->ac.i32_0, ""); - tmp = LLVMBuildAdd(ctx->ac.builder, tmp, LLVMConstInt(ctx->ac.i64, stream_offset, 0), ""); - ring = LLVMBuildInsertElement(ctx->ac.builder, ring, tmp, ctx->ac.i32_0, ""); - - stream_offset += stride * ctx->ac.wave_size; - - ring = LLVMBuildBitCast(ctx->ac.builder, ring, ctx->ac.v4i32, ""); - - tmp = LLVMBuildExtractElement(ctx->ac.builder, ring, ctx->ac.i32_1, ""); - tmp = LLVMBuildOr(ctx->ac.builder, tmp, - LLVMConstInt(ctx->ac.i32, S_008F04_STRIDE(stride), false), ""); - ring = LLVMBuildInsertElement(ctx->ac.builder, ring, tmp, ctx->ac.i32_1, ""); - - ring = LLVMBuildInsertElement(ctx->ac.builder, ring, - LLVMConstInt(ctx->ac.i32, num_records, false), - LLVMConstInt(ctx->ac.i32, 2, false), ""); - - ctx->gsvs_ring[stream] = ring; - } - } - - if (ctx->stage == MESA_SHADER_TESS_CTRL || ctx->stage == MESA_SHADER_TESS_EVAL) { - ctx->hs_ring_tess_offchip = ac_build_load_to_sgpr( - &ctx->ac, ring_offsets, LLVMConstInt(ctx->ac.i32, RING_HS_TESS_OFFCHIP, false)); - ctx->hs_ring_tess_factor = ac_build_load_to_sgpr( - &ctx->ac, ring_offsets, LLVMConstInt(ctx->ac.i32, RING_HS_TESS_FACTOR, false)); - } - - if (ctx->options->gfx_level >= GFX11 && - ((ctx->stage == MESA_SHADER_VERTEX && !ctx->shader_info->vs.as_es && !ctx->shader_info->vs.as_ls) || - (ctx->stage == MESA_SHADER_TESS_EVAL && !ctx->shader_info->tes.as_es) || - (ctx->stage == MESA_SHADER_GEOMETRY))) { - ctx->attr_ring = ac_build_load_to_sgpr(&ctx->ac, ring_offsets, - LLVMConstInt(ctx->ac.i32, RING_PS_ATTR, false)); - - LLVMValueRef tmp = LLVMBuildExtractElement(ctx->ac.builder, ctx->attr_ring, ctx->ac.i32_1, ""); - uint32_t stride = S_008F04_STRIDE(16 * ctx->shader_info->outinfo.param_exports); - tmp = LLVMBuildOr(ctx->ac.builder, tmp, LLVMConstInt(ctx->ac.i32, stride, false), ""); - ctx->attr_ring = LLVMBuildInsertElement(ctx->ac.builder, ctx->attr_ring, tmp, ctx->ac.i32_1, ""); - } -} - /* Fixup the HW not emitting the TCS regs if there are no HS threads. */ static void ac_nir_fixup_ls_hs_input_vgprs(struct radv_shader_context *ctx) @@ -931,35 +825,20 @@ prepare_gs_input_vgprs(struct radv_shader_context *ctx, bool merged) static void declare_esgs_ring(struct radv_shader_context *ctx) { - if (ctx->esgs_ring) - return; - assert(!LLVMGetNamedGlobal(ctx->ac.module, "esgs_ring")); - ctx->esgs_ring = LLVMAddGlobalInAddressSpace(ctx->ac.module, LLVMArrayType(ctx->ac.i32, 0), - "esgs_ring", AC_ADDR_SPACE_LDS); - LLVMSetLinkage(ctx->esgs_ring, LLVMExternalLinkage); - LLVMSetAlignment(ctx->esgs_ring, 64 * 1024); + LLVMValueRef esgs_ring = LLVMAddGlobalInAddressSpace(ctx->ac.module, LLVMArrayType(ctx->ac.i32, 0), + "esgs_ring", AC_ADDR_SPACE_LDS); + LLVMSetLinkage(esgs_ring, LLVMExternalLinkage); + LLVMSetAlignment(esgs_ring, 64 * 1024); } static LLVMValueRef radv_intrinsic_load(struct ac_shader_abi *abi, nir_intrinsic_instr *intrin) { - struct radv_shader_context *ctx = radv_shader_context_from_abi(abi); - switch (intrin->intrinsic) { case nir_intrinsic_load_base_vertex: case nir_intrinsic_load_first_vertex: return radv_load_base_vertex(abi, intrin->intrinsic == nir_intrinsic_load_base_vertex); - case nir_intrinsic_load_ring_tess_factors_amd: - return ctx->hs_ring_tess_factor; - case nir_intrinsic_load_ring_tess_offchip_amd: - return ctx->hs_ring_tess_offchip; - case nir_intrinsic_load_ring_esgs_amd: - return ctx->esgs_ring; - case nir_intrinsic_load_ring_attr_amd: - return ctx->attr_ring; - case nir_intrinsic_load_ring_gsvs_amd: - return ctx->gsvs_ring[nir_intrinsic_stream_id(intrin)]; default: return NULL; } @@ -1107,8 +986,6 @@ ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm, nir_foreach_shader_out_variable(variable, shaders[shader_idx]) scan_shader_output_decl( &ctx, variable, shaders[shader_idx], shaders[shader_idx]->info.stage); - ac_setup_rings(&ctx); - bool check_merged_wave_info = shader_count >= 2 && !(is_ngg && shader_idx == 1); LLVMBasicBlockRef merge_block = NULL; diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index 0083ba337dd..fd452f56843 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -3069,8 +3069,7 @@ radv_pipeline_create_gs_copy_shader(struct radv_pipeline *pipeline, info.inline_push_constant_mask = gs_copy_args.ac.inline_push_const_mask; NIR_PASS_V(nir, radv_nir_lower_abi, device->physical_device->rad_info.gfx_level, &info, - &gs_copy_args, pipeline_key, radv_use_llvm_for_stage(device, MESA_SHADER_VERTEX), - device->physical_device->rad_info.address32_hi); + &gs_copy_args, pipeline_key, device->physical_device->rad_info.address32_hi); return radv_create_gs_copy_shader(device, nir, &info, &gs_copy_args, gs_copy_binary, keep_executable_info, keep_statistic_info, @@ -3332,7 +3331,6 @@ radv_postprocess_nir(struct radv_pipeline *pipeline, NIR_PASS(_, stage->nir, ac_nir_lower_global_access); NIR_PASS_V(stage->nir, radv_nir_lower_abi, gfx_level, &stage->info, &stage->args, pipeline_key, - radv_use_llvm_for_stage(device, stage->stage), device->physical_device->rad_info.address32_hi); radv_optimize_nir_algebraic( stage->nir, io_to_mem || lowered_ngg || stage->stage == MESA_SHADER_COMPUTE || diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h index 3069b69159f..d517143d0e5 100644 --- a/src/amd/vulkan/radv_shader.h +++ b/src/amd/vulkan/radv_shader.h @@ -550,8 +550,7 @@ nir_shader *radv_shader_spirv_to_nir(struct radv_device *device, void radv_nir_lower_abi(nir_shader *shader, enum amd_gfx_level gfx_level, const struct radv_shader_info *info, const struct radv_shader_args *args, - const struct radv_pipeline_key *pl_key, bool use_llvm, - uint32_t address32_hi); + const struct radv_pipeline_key *pl_key, uint32_t address32_hi); void radv_init_shader_arenas(struct radv_device *device); void radv_destroy_shader_arenas(struct radv_device *device); diff --git a/src/amd/vulkan/radv_shader_args.c b/src/amd/vulkan/radv_shader_args.c index 74232013d59..259a3da211a 100644 --- a/src/amd/vulkan/radv_shader_args.c +++ b/src/amd/vulkan/radv_shader_args.c @@ -583,9 +583,7 @@ radv_declare_shader_args(enum amd_gfx_level gfx_level, const struct radv_pipelin allocate_user_sgprs(gfx_level, info, args, stage, has_previous_stage, previous_stage, needs_view_index, has_ngg_query, has_ngg_provoking_vtx, key, &user_sgpr_info); - if (args->explicit_scratch_args) { - ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_CONST_DESC_PTR, &args->ac.ring_offsets); - } + ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_CONST_DESC_PTR, &args->ac.ring_offsets); if (stage == MESA_SHADER_TASK) { ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_CONST_DESC_PTR, &args->task_ring_offsets); }