ac/llvm: Implement typed buffer load intrinsic.

Signed-off-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Qiang Yu <yuq825@gmail.com>
Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Acked-by: Konstantin Seurer <konstantin.seurer@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16805>
This commit is contained in:
Timur Kristóf
2023-02-10 23:44:05 +01:00
committed by Marge Bot
parent 477cb943f6
commit 22ca8c8561
3 changed files with 74 additions and 3 deletions

View File

@@ -1473,6 +1473,51 @@ LLVMValueRef ac_build_struct_tbuffer_load(struct ac_llvm_context *ctx, LLVMValue
ctx->i32, cache_policy, can_speculate);
}
LLVMValueRef ac_build_safe_tbuffer_load(struct ac_llvm_context *ctx, LLVMValueRef rsrc,
LLVMValueRef vidx, LLVMValueRef base_voffset,
LLVMValueRef soffset, LLVMTypeRef channel_type,
const struct ac_vtx_format_info *vtx_info,
unsigned const_offset,
unsigned align_offset,
unsigned align_mul,
unsigned num_channels,
unsigned cache_policy,
bool can_speculate)
{
const unsigned max_channels = vtx_info->num_channels;
LLVMValueRef voffset_plus_const =
LLVMBuildAdd(ctx->builder, base_voffset, LLVMConstInt(ctx->i32, const_offset, 0), "");
/* Split the specified load into several MTBUF instructions,
* according to a safe fetch size determined by aligmnent information.
*/
LLVMValueRef result = NULL;
for (unsigned i = 0, fetch_num_channels; i < num_channels; i += fetch_num_channels) {
/* Packed formats (determined here by chan_byte_size == 0) should never be split. */
assert(i == 0 || vtx_info->chan_byte_size);
const unsigned fetch_const_offset = const_offset + i * vtx_info->chan_byte_size;
const unsigned fetch_align_offset = (align_offset + i * vtx_info->chan_byte_size) % align_mul;
const unsigned fetch_alignment = fetch_align_offset ? 1 << (ffs(fetch_align_offset) - 1) : align_mul;
fetch_num_channels =
ac_get_safe_fetch_size(ctx->gfx_level, vtx_info, fetch_const_offset,
max_channels - i, fetch_alignment, num_channels - i);
const unsigned fetch_format = vtx_info->hw_format[fetch_num_channels - 1];
LLVMValueRef fetch_voffset =
LLVMBuildAdd(ctx->builder, voffset_plus_const,
LLVMConstInt(ctx->i32, i * vtx_info->chan_byte_size, 0), "");
LLVMValueRef item =
ac_build_tbuffer_load(ctx, rsrc, vidx, fetch_voffset, soffset,
fetch_num_channels, fetch_format, channel_type,
cache_policy, can_speculate);
result = ac_build_concat(ctx, result, item);
}
return result;
}
LLVMValueRef ac_build_buffer_load_short(struct ac_llvm_context *ctx, LLVMValueRef rsrc,
LLVMValueRef voffset, LLVMValueRef soffset,
unsigned cache_policy)

View File

@@ -310,6 +310,17 @@ LLVMValueRef ac_build_struct_tbuffer_load(struct ac_llvm_context *ctx, LLVMValue
unsigned dfmt, unsigned nfmt, unsigned cache_policy,
bool can_speculate);
LLVMValueRef ac_build_safe_tbuffer_load(struct ac_llvm_context *ctx, LLVMValueRef rsrc,
LLVMValueRef vindex, LLVMValueRef voffset,
LLVMValueRef soffset, LLVMTypeRef channel_type,
const struct ac_vtx_format_info *vtx_info,
unsigned const_offset,
unsigned align_offset,
unsigned align_mul,
unsigned num_channels,
unsigned cache_policy,
bool can_speculate);
LLVMValueRef ac_build_opencoded_load_format(struct ac_llvm_context *ctx, unsigned log_size,
unsigned num_channels, unsigned format, bool reverse,
bool known_aligned, LLVMValueRef rsrc,

View File

@@ -3966,6 +3966,7 @@ static bool visit_intrinsic(struct ac_nir_context *ctx, nir_intrinsic_instr *ins
case nir_intrinsic_set_vertex_and_primitive_count:
/* Currently ignored. */
break;
case nir_intrinsic_load_typed_buffer_amd:
case nir_intrinsic_load_buffer_amd:
case nir_intrinsic_store_buffer_amd: {
unsigned src_base = instr->intrinsic == nir_intrinsic_store_buffer_amd ? 1 : 0;
@@ -4004,7 +4005,8 @@ static bool visit_intrinsic(struct ac_nir_context *ctx, nir_intrinsic_instr *ins
} else if (instr->intrinsic == nir_intrinsic_store_buffer_amd && uses_format) {
assert(instr->src[0].ssa->bit_size == 16 || instr->src[0].ssa->bit_size == 32);
ac_build_buffer_store_format(&ctx->ac, descriptor, store_data, vidx, voffset, cache_policy);
} else if (instr->intrinsic == nir_intrinsic_load_buffer_amd) {
} else if (instr->intrinsic == nir_intrinsic_load_buffer_amd ||
instr->intrinsic == nir_intrinsic_load_typed_buffer_amd) {
/* LLVM is unable to select instructions for larger than 32-bit channel types.
* Workaround by using i32 and casting to the correct type later.
*/
@@ -4013,8 +4015,21 @@ static bool visit_intrinsic(struct ac_nir_context *ctx, nir_intrinsic_instr *ins
LLVMTypeRef channel_type =
LLVMIntTypeInContext(ctx->ac.context, MIN2(32, instr->dest.ssa.bit_size));
result = ac_build_buffer_load(&ctx->ac, descriptor, fetch_num_components, vidx, voffset,
addr_soffset, channel_type, cache_policy, reorder, false);
if (instr->intrinsic == nir_intrinsic_load_buffer_amd) {
result = ac_build_buffer_load(&ctx->ac, descriptor, fetch_num_components, vidx, voffset,
addr_soffset, channel_type, cache_policy, reorder, false);
} else {
const unsigned align_offset = nir_intrinsic_align_offset(instr);
const unsigned align_mul = nir_intrinsic_align_mul(instr);
const enum pipe_format format = nir_intrinsic_format(instr);
const struct ac_vtx_format_info *vtx_info =
ac_get_vtx_format_info(ctx->ac.gfx_level, ctx->ac.family, format);
result =
ac_build_safe_tbuffer_load(&ctx->ac, descriptor, vidx, addr_voffset, addr_soffset,
channel_type, vtx_info, const_offset, align_offset,
align_mul, fetch_num_components, cache_policy, reorder);
}
/* Trim to needed vector components. */
result = ac_trim_vector(&ctx->ac, result, fetch_num_components);