From 22ca8c8561c4ad6c33cfa98d0849e700449f2db3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timur=20Krist=C3=B3f?= Date: Fri, 10 Feb 2023 23:44:05 +0100 Subject: [PATCH] ac/llvm: Implement typed buffer load intrinsic. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Timur Kristóf Reviewed-by: Qiang Yu Reviewed-by: Rhys Perry Acked-by: Konstantin Seurer Part-of: --- src/amd/llvm/ac_llvm_build.c | 45 +++++++++++++++++++++++++++++++++++ src/amd/llvm/ac_llvm_build.h | 11 +++++++++ src/amd/llvm/ac_nir_to_llvm.c | 21 +++++++++++++--- 3 files changed, 74 insertions(+), 3 deletions(-) diff --git a/src/amd/llvm/ac_llvm_build.c b/src/amd/llvm/ac_llvm_build.c index b30cea10a12..f4567a32fab 100644 --- a/src/amd/llvm/ac_llvm_build.c +++ b/src/amd/llvm/ac_llvm_build.c @@ -1473,6 +1473,51 @@ LLVMValueRef ac_build_struct_tbuffer_load(struct ac_llvm_context *ctx, LLVMValue ctx->i32, cache_policy, can_speculate); } +LLVMValueRef ac_build_safe_tbuffer_load(struct ac_llvm_context *ctx, LLVMValueRef rsrc, + LLVMValueRef vidx, LLVMValueRef base_voffset, + LLVMValueRef soffset, LLVMTypeRef channel_type, + const struct ac_vtx_format_info *vtx_info, + unsigned const_offset, + unsigned align_offset, + unsigned align_mul, + unsigned num_channels, + unsigned cache_policy, + bool can_speculate) +{ + const unsigned max_channels = vtx_info->num_channels; + LLVMValueRef voffset_plus_const = + LLVMBuildAdd(ctx->builder, base_voffset, LLVMConstInt(ctx->i32, const_offset, 0), ""); + + /* Split the specified load into several MTBUF instructions, + * according to a safe fetch size determined by aligmnent information. + */ + LLVMValueRef result = NULL; + for (unsigned i = 0, fetch_num_channels; i < num_channels; i += fetch_num_channels) { + /* Packed formats (determined here by chan_byte_size == 0) should never be split. */ + assert(i == 0 || vtx_info->chan_byte_size); + + const unsigned fetch_const_offset = const_offset + i * vtx_info->chan_byte_size; + const unsigned fetch_align_offset = (align_offset + i * vtx_info->chan_byte_size) % align_mul; + const unsigned fetch_alignment = fetch_align_offset ? 1 << (ffs(fetch_align_offset) - 1) : align_mul; + + fetch_num_channels = + ac_get_safe_fetch_size(ctx->gfx_level, vtx_info, fetch_const_offset, + max_channels - i, fetch_alignment, num_channels - i); + const unsigned fetch_format = vtx_info->hw_format[fetch_num_channels - 1]; + LLVMValueRef fetch_voffset = + LLVMBuildAdd(ctx->builder, voffset_plus_const, + LLVMConstInt(ctx->i32, i * vtx_info->chan_byte_size, 0), ""); + LLVMValueRef item = + ac_build_tbuffer_load(ctx, rsrc, vidx, fetch_voffset, soffset, + fetch_num_channels, fetch_format, channel_type, + cache_policy, can_speculate); + result = ac_build_concat(ctx, result, item); + } + + return result; +} + + LLVMValueRef ac_build_buffer_load_short(struct ac_llvm_context *ctx, LLVMValueRef rsrc, LLVMValueRef voffset, LLVMValueRef soffset, unsigned cache_policy) diff --git a/src/amd/llvm/ac_llvm_build.h b/src/amd/llvm/ac_llvm_build.h index 0c11f4942a9..5b1c1bbcd41 100644 --- a/src/amd/llvm/ac_llvm_build.h +++ b/src/amd/llvm/ac_llvm_build.h @@ -310,6 +310,17 @@ LLVMValueRef ac_build_struct_tbuffer_load(struct ac_llvm_context *ctx, LLVMValue unsigned dfmt, unsigned nfmt, unsigned cache_policy, bool can_speculate); +LLVMValueRef ac_build_safe_tbuffer_load(struct ac_llvm_context *ctx, LLVMValueRef rsrc, + LLVMValueRef vindex, LLVMValueRef voffset, + LLVMValueRef soffset, LLVMTypeRef channel_type, + const struct ac_vtx_format_info *vtx_info, + unsigned const_offset, + unsigned align_offset, + unsigned align_mul, + unsigned num_channels, + unsigned cache_policy, + bool can_speculate); + LLVMValueRef ac_build_opencoded_load_format(struct ac_llvm_context *ctx, unsigned log_size, unsigned num_channels, unsigned format, bool reverse, bool known_aligned, LLVMValueRef rsrc, diff --git a/src/amd/llvm/ac_nir_to_llvm.c b/src/amd/llvm/ac_nir_to_llvm.c index 58b4db20793..1d12ba91c4c 100644 --- a/src/amd/llvm/ac_nir_to_llvm.c +++ b/src/amd/llvm/ac_nir_to_llvm.c @@ -3966,6 +3966,7 @@ static bool visit_intrinsic(struct ac_nir_context *ctx, nir_intrinsic_instr *ins case nir_intrinsic_set_vertex_and_primitive_count: /* Currently ignored. */ break; + case nir_intrinsic_load_typed_buffer_amd: case nir_intrinsic_load_buffer_amd: case nir_intrinsic_store_buffer_amd: { unsigned src_base = instr->intrinsic == nir_intrinsic_store_buffer_amd ? 1 : 0; @@ -4004,7 +4005,8 @@ static bool visit_intrinsic(struct ac_nir_context *ctx, nir_intrinsic_instr *ins } else if (instr->intrinsic == nir_intrinsic_store_buffer_amd && uses_format) { assert(instr->src[0].ssa->bit_size == 16 || instr->src[0].ssa->bit_size == 32); ac_build_buffer_store_format(&ctx->ac, descriptor, store_data, vidx, voffset, cache_policy); - } else if (instr->intrinsic == nir_intrinsic_load_buffer_amd) { + } else if (instr->intrinsic == nir_intrinsic_load_buffer_amd || + instr->intrinsic == nir_intrinsic_load_typed_buffer_amd) { /* LLVM is unable to select instructions for larger than 32-bit channel types. * Workaround by using i32 and casting to the correct type later. */ @@ -4013,8 +4015,21 @@ static bool visit_intrinsic(struct ac_nir_context *ctx, nir_intrinsic_instr *ins LLVMTypeRef channel_type = LLVMIntTypeInContext(ctx->ac.context, MIN2(32, instr->dest.ssa.bit_size)); - result = ac_build_buffer_load(&ctx->ac, descriptor, fetch_num_components, vidx, voffset, - addr_soffset, channel_type, cache_policy, reorder, false); + if (instr->intrinsic == nir_intrinsic_load_buffer_amd) { + result = ac_build_buffer_load(&ctx->ac, descriptor, fetch_num_components, vidx, voffset, + addr_soffset, channel_type, cache_policy, reorder, false); + } else { + const unsigned align_offset = nir_intrinsic_align_offset(instr); + const unsigned align_mul = nir_intrinsic_align_mul(instr); + const enum pipe_format format = nir_intrinsic_format(instr); + const struct ac_vtx_format_info *vtx_info = + ac_get_vtx_format_info(ctx->ac.gfx_level, ctx->ac.family, format); + + result = + ac_build_safe_tbuffer_load(&ctx->ac, descriptor, vidx, addr_voffset, addr_soffset, + channel_type, vtx_info, const_offset, align_offset, + align_mul, fetch_num_components, cache_policy, reorder); + } /* Trim to needed vector components. */ result = ac_trim_vector(&ctx->ac, result, fetch_num_components);