diff --git a/src/amd/llvm/ac_nir_to_llvm.c b/src/amd/llvm/ac_nir_to_llvm.c index 23140cb5d93..f67baa13c8f 100644 --- a/src/amd/llvm/ac_nir_to_llvm.c +++ b/src/amd/llvm/ac_nir_to_llvm.c @@ -4403,6 +4403,15 @@ static bool visit_intrinsic(struct ac_nir_context *ctx, nir_intrinsic_instr *ins LLVMSetMetadata(result, ctx->ac.invariant_load_md_kind, ctx->ac.empty_md); break; } + case nir_intrinsic_load_smem_buffer_amd: { + LLVMValueRef descriptor = get_src(ctx, instr->src[0]); + LLVMValueRef offset = get_src(ctx, instr->src[1]); + unsigned num_components = instr->dest.ssa.num_components; + + result = ac_build_buffer_load(&ctx->ac, descriptor, num_components, NULL, offset, NULL, + ctx->ac.i32, 0, true, true); + break; + } case nir_intrinsic_ordered_xfb_counter_add_amd: { /* must be called in a single lane of a workgroup. */ LLVMTypeRef gdsptr = LLVMPointerType(ctx->ac.i32, AC_ADDR_SPACE_GDS); diff --git a/src/compiler/nir/nir_divergence_analysis.c b/src/compiler/nir/nir_divergence_analysis.c index 20d361c4ba4..40d6f1666ad 100644 --- a/src/compiler/nir/nir_divergence_analysis.c +++ b/src/compiler/nir/nir_divergence_analysis.c @@ -185,6 +185,7 @@ visit_intrinsic(nir_shader *shader, nir_intrinsic_instr *instr) case nir_intrinsic_load_tess_level_outer_default: case nir_intrinsic_load_scalar_arg_amd: case nir_intrinsic_load_smem_amd: + case nir_intrinsic_load_smem_buffer_amd: case nir_intrinsic_load_rt_dynamic_callable_stack_base_amd: case nir_intrinsic_load_global_const_block_intel: case nir_intrinsic_load_reloc_const_intel: diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index 64e8ae0dfa3..937185ce306 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -1482,6 +1482,11 @@ intrinsic("load_smem_amd", src_comp=[1, 1], dest_comp=0, bit_sizes=[32], indices=[ALIGN_MUL, ALIGN_OFFSET], flags=[CAN_ELIMINATE, CAN_REORDER]) +# src[] = { descriptor, offset } +intrinsic("load_smem_buffer_amd", src_comp=[4, 1], dest_comp=0, bit_sizes=[32], + indices=[ALIGN_MUL, ALIGN_OFFSET], + flags=[CAN_ELIMINATE, CAN_REORDER]) + # src[] = { offset }. intrinsic("load_shared2_amd", [1], dest_comp=2, indices=[OFFSET0, OFFSET1, ST64], flags=[CAN_ELIMINATE])